ref: eac2df886379b488c0d7ad3762128f610f6997ec
parent: 71e906d032b5d0fa35577d8b3add539b22b8226f
author: cinap_lenrek <cinap_lenrek@centraldogma>
date: Tue Sep 20 00:14:29 EDT 2011
uhtml: remove trailing utf BOM marker, html2ms: fix underline handling and escaping
--- a/sys/src/cmd/html2ms.c
+++ b/sys/src/cmd/html2ms.c
@@ -142,8 +142,24 @@
}
void
-ona(Text *text, Tag *)
+restoreunderline(Text *text, Tag *tag)
{+ text->underline = tag->restore;
+ emit(text, "");
+}
+
+void
+ona(Text *text, Tag *tag)
+{+ int i;
+
+ for(i=0; i<tag->nattr; i++)
+ if(cistrcmp(tag->attr[i].attr, "href") == 0)
+ break;
+ if(i == tag->nattr)
+ return;
+ tag->restore = text->underline;
+ tag->close = restoreunderline;
text->underline = 1;
}
@@ -207,10 +223,13 @@
if(n != 7 || cistrncmp(buf, "[CDATA[", 7))
continue;
while((c = Bgetc(&in)) > 0){- if(c == ']')
- if(Bgetc(&in) == ']')
- if(Bgetc(&in) == '>')
- return;
+ if(c == ']'){+ if(Bgetc(&in) == ']'){+ if(Bgetc(&in) != '>')
+ Bungetc(&in);
+ return;
+ }
+ }
}
}
}
@@ -425,10 +444,9 @@
case '\r':
case ' ':
case '\t':
- if(text->pre == 0){- text->space = 1;
+ text->space = 1;
+ if(text->pre == 0)
continue;
- }
default:
if(r == '\n' || r == '\r')
text->pos = 0;
@@ -435,8 +453,8 @@
if(text->space){text->space = 0;
if(text->underline){- emit(text, "");
- text->pos = Bprint(&out, ".UL ");
+ emit(text, ".UL ");
+ text->pos = 1;
} else if(text->pos >= 70){text->pos = 0;
Bputc(&out, '\n');
@@ -445,16 +463,15 @@
Bputc(&out, ' ');
}
}
- if(text->pos == 0 && r == '.'){- text->pos++;
- Bputc(&out, ' ');
- }
- text->pos++;
- if(r == 0xA0){+ if(text->pos == 0 && r == '.')
+ text->pos += Bprint(&out, "\\&");
+ else if(r == '\\')
+ text->pos += Bprint(&out, "\\&\\");
+ else if(r == 0xA0){r = ' ';
- Bputc(&out, '\\');
+ text->pos += Bprint(&out, "\\");
}
- Bprint(&out, "%C", r);
+ text->pos += Bprint(&out, "%C", r);
}
}
}
@@ -473,7 +490,10 @@
Binit(&out, 1, OWRITE);
memset(&text, 0, sizeof(text));
+
+ text.font = "R";
text.output = 1;
+
parsetext(&text, nil);
emit(&text, "\n");
}
--- a/sys/src/cmd/page.c
+++ b/sys/src/cmd/page.c
@@ -623,9 +623,9 @@
p->data = "lp -dstdout";
p->open = popengs;
}
- else if(cistrncmp(buf, "<?xml", 5) == 0 ||
- cistrncmp(buf, "<!DOCTYPE", 9) == 0 ||
- cistrncmp(buf, "<HTML", 5) == 0){+ else if(cistrstr(buf, "<?xml") ||
+ cistrstr(buf, "<!DOCTYPE") ||
+ cistrstr(buf, "<HTML")){p->data = "uhtml | html2ms | troff -ms | lp -dstdout";
p->open = popengs;
}
--- a/sys/src/cmd/uhtml.c
+++ b/sys/src/cmd/uhtml.c
@@ -62,6 +62,11 @@
if((nbuf = read(0, buf, sizeof(buf)-1)) < 0)
sysfatal("read: %r");buf[nbuf] = 0;
+
+ /* useless BOM marker */
+ if(memcmp(buf, "\xEF\xBB\xBF", 3)==0)
+ memmove(buf, buf+3, nbuf-3);
+
for(;;){if(s = cistrstr(buf, "encoding="))
if(s = strval(s+9)){--
⑨