ref: c05dd199c1e5250f443d44a41144b79d8a98994c
dir: /sys/src/cmd/aux/mswordstrings.c/
#include <u.h> #include <libc.h> #include <bio.h> /* automatically generated; do not edit. */ typedef struct Fibhdr Fibhdr; struct Fibhdr { ushort wIdent; ushort nFib; ushort nProduct; ushort lid; short pnNext; uchar fDot; uchar fGlsy; uchar fComplex; uchar fHasPic; uchar cQuickSaves; uchar fEncrypted; uchar fWhichTblStm; uchar fReadOnlyRecommended; uchar fWriteReservation; uchar fExtChar; uchar fLoadOverride; uchar fFarEast; uchar fCrypto; ushort nFibBack; ulong lKey; uchar envr; uchar fMac; uchar fEmptySpecial; uchar fLoadOverridePage; uchar fFutureSavedUndo; uchar fWord97Saved; ushort chs; ushort chsTables; long fcMin; long fcMac; ushort csw; }; enum { bcFibhdr = 0x22 }; /* automatically generated; do not edit. */ void readFibhdr(Fibhdr *s, uchar *v, int nv) { if(nv < bcFibhdr) sysfatal("not enough data for Fibhdr"); s->wIdent = v[0x0] | (v[0x0+1] << 8); s->nFib = v[0x2] | (v[0x2+1] << 8); s->nProduct = v[0x4] | (v[0x4+1] << 8); s->lid = v[0x6] | (v[0x6+1] << 8); s->pnNext = v[0x8] | (v[0x8+1] << 8); s->fDot = ((v[0xA]) & 0x1) >> 0; s->fGlsy = ((v[0xA]) & 0x2) >> 1; s->fComplex = ((v[0xA]) & 0x4) >> 2; s->fHasPic = ((v[0xA]) & 0x8) >> 3; s->cQuickSaves = ((v[0xA]) & 0x240) >> 4; s->fEncrypted = ((v[0xB]) & 0x1) >> 0; s->fWhichTblStm = ((v[0xB]) & 0x2) >> 1; s->fReadOnlyRecommended = ((v[0xB]) & 0x4) >> 2; s->fWriteReservation = ((v[0xB]) & 0x8) >> 3; s->fExtChar = ((v[0xB]) & 0x16) >> 4; s->fLoadOverride = ((v[0xB]) & 0x32) >> 5; s->fFarEast = ((v[0xB]) & 0x64) >> 6; s->fCrypto = ((v[0xB]) & 0x128) >> 7; s->nFibBack = v[0xC] | (v[0xC+1] << 8); s->lKey = v[0xE] | (v[0xE+1] << 8)| (v[0xE+2] << 16) | (v[0xE+3] << 24); s->envr = v[0x12]; s->fMac = ((v[0x13]) & 0x1) >> 0; s->fEmptySpecial = ((v[0x13]) & 0x2) >> 1; s->fLoadOverridePage = ((v[0x13]) & 0x4) >> 2; s->fFutureSavedUndo = ((v[0x13]) & 0x8) >> 3; s->fWord97Saved = ((v[0x13]) & 0x16) >> 4; s->chs = v[0x14] | (v[0x14+1] << 8); s->chsTables = v[0x16] | (v[0x16+1] << 8); s->fcMin = v[0x18] | (v[0x18+1] << 8)| (v[0x18+2] << 16) | (v[0x18+3] << 24); s->fcMac = v[0x1C] | (v[0x1C+1] << 8)| (v[0x1C+2] << 16) | (v[0x1C+3] << 24); s->csw = v[0x20] | (v[0x20+1] << 8); } void usage(void) { fprint(2, "usage: wordtext /mnt/doc/WordDocument\n"); exits("usage"); } void main(int argc, char **argv) { Biobuf *b; Biobuf bout; uchar buf[512]; Fibhdr f; int i, c, n; ARGBEGIN{ default: usage(); }ARGEND if(argc != 1) usage(); Binit(&bout, 1, OWRITE); b = Bopen(argv[0], OREAD); if(b == nil) { fprint(2, "couldn't open file: %r\n"); exits("word"); } n = Bread(b, buf, sizeof buf); if(n < sizeof buf) { fprint(2, "short read: %r\n"); exits("read"); } readFibhdr(&f, buf, sizeof buf); // printFibhdr(&f); Bseek(b, f.fcMin, 0); n = f.fcMac - f.fcMin; for(i=0; i<n; i++) { c = Bgetc(b); if(c < 0) break; switch(c) { default: Bputc(&bout, c); break; case '\\': Bprint(&bout, "\\"); break; /* field escape */ case 7: Bprint(&bout, "\n"); break; /* cell, row mark */ case 9: Bprint(&bout, "\t"); break; /* tab */ case 11: Bprint(&bout, "\n"); break; /* hard line break */ case 12: Bprint(&bout, "\n\n\n\n"); break; /* page break */ case 13: Bprint(&bout, "\n\n"); break; /* paragraph end */ case 14: break; /* column break */ case 19: Bprint(&bout, "<"); break; /* field begin */ case 20: Bprint(&bout, ":"); break; /* field sep */ case 21: Bprint(&bout, ">"); break; /* field end */ case 30: Bprint(&bout, "-"); break; /* non-breaking hyphen */ case 31: break; /* non-required hyphen */ /* case 45: Bprint(&bout, "-"); break; /* breaking hyphen */ case 160: Bprint(&bout, " "); break; /* non-breaking space */ /* * these are only supposed to get used when special is set, but we * never see these ascii values otherwise anyway. */ /* * Empirically, some documents have sections of text where * every character is followed by a zero byte. Some have sections * of text where there are no zero bytes. Still others have both * types and alternate between them. Until we parse which * characters are ``special'', page numbers lose out. */ case 0: /* Bprint(&bout, "<pageno>"); */ break; case 1: Bprint(&bout, "<picture>"); break; case 2: Bprint(&bout, "<footnote>"); break; case 3: Bprint(&bout, "<footnote sep>"); break; case 4: Bprint(&bout, "<footnote cont>"); break; case 5: Bprint(&bout, "<animation>"); break; case 6: Bprint(&bout, "<lineno>"); break; /* case 7: Bprint(&bout, "<hand picture>"); break; */ case 8: Bprint(&bout, "<drawn object>"); break; case 10: Bprint(&bout, "<abbrev date>"); break; /* case 11: Bprint(&bout, "<hh:mm:ss>"); break; */ /* case 12: Bprint(&bout, "<section no>"); break; */ /* case 14: Bprint(&bout, "<Thu>"); break; */ case 15: Bprint(&bout, "<Thursday>"); break; case 16: Bprint(&bout, "<day of month>"); break; case 22: Bprint(&bout, "<hour>"); break; case 23: Bprint(&bout, "<hour hh>"); break; case 24: Bprint(&bout, "<minute>"); break; case 25: Bprint(&bout, "<minute mm>"); break; case 26: Bprint(&bout, "<seconds>"); break; case 27: Bprint(&bout, "<AM/PM>"); break; case 28: Bprint(&bout, "<hh:mm:ss>"); break; case 29: Bprint(&bout, "<date>"); break; /* printable ascii begins hereish */ /* case 30: Bprint(&bout, "<mm/dd/yy>"); break; case 33: Bprint(&bout, "<mm>"); break; case 34: Bprint(&bout, "<yyyy>"); break; case 35: Bprint(&bout, "<yy>"); break; case 36: Bprint(&bout, "<Feb>"); break; case 37: Bprint(&bout, "<February>"); break; case 38: Bprint(&bout, "<hh:mm>"); break; case 39: Bprint(&bout, "<long date>"); break; case 41: break; */ } } Bprint(&bout, "\n"); }