shithub: plan9front

ref: 5a807265a819206f8342ab3a23b940a0c75049fc

View raw version
#pragma lib "libhtml.a"
#pragma src "/sys/src/libhtml"

/* UTILS */
extern uchar*	fromStr(Rune* buf, int n, int chset);
extern Rune*	toStr(uchar* buf, int n, int chset);

/* Common LEX and BUILD enums */

/* Media types */
enum
{
	ApplMsword,
	ApplOctets,
	ApplPdf,
	ApplPostscript,
	ApplRtf,
	ApplFramemaker,
	ApplMsexcel,
	ApplMspowerpoint,
	UnknownType,
	Audio32kadpcm,
	AudioBasic,
	ImageCgm,
	ImageG3fax,
	ImageGif,
	ImageIef,
	ImageJpeg,
	ImagePng,
	ImageTiff,
	ImageXBit,
	ImageXBit2,
	ImageXBitmulti,
	ImageXXBitmap,
	ModelVrml,
	MultiDigest,
	MultiMixed,
	TextCss,
	TextEnriched,
	TextHtml,
	TextJavascript,
	TextPlain,
	TextRichtext,
	TextSgml,
	TextTabSeparatedValues,
	TextXml,
	VideoMpeg,
	VideoQuicktime,
	NMEDIATYPES
};

/* HTTP methods */
enum
{
	HGet,
	HPost
};

/* Charsets */
enum
{
	UnknownCharset,
	US_Ascii,
	ISO_8859_1,
	UTF_8,
	Unicode,
	NCHARSETS
};

/* Frame Target IDs */
enum {
	FTtop,
	FTself,
	FTparent,
	FTblank
};

/* LEX */
typedef struct Token Token;
typedef struct Attr Attr;

#pragma incomplete Token

/* BUILD */

typedef struct Item Item;
typedef struct Itext Itext;
typedef struct Irule Irule;
typedef struct Iimage Iimage;
typedef struct Iformfield Iformfield;
typedef struct Itable Itable;
typedef struct Ifloat Ifloat;
typedef struct Ispacer Ispacer;
typedef struct Genattr Genattr;
typedef struct SEvent SEvent;
typedef struct Formfield Formfield;
typedef struct Option Option;
typedef struct Form Form;
typedef struct Table Table;
typedef struct Tablecol Tablecol;
typedef struct Tablerow Tablerow;
typedef struct Tablecell Tablecell;
typedef struct Align Align;
typedef struct Dimen Dimen;
typedef struct Anchor Anchor;
typedef struct DestAnchor DestAnchor;
typedef struct Map Map;
typedef struct Area Area;
typedef struct Background Background;
typedef struct Kidinfo Kidinfo;
typedef struct Docinfo Docinfo;
typedef struct Stack Stack;
typedef struct Pstate Pstate;
typedef struct ItemSource ItemSource;
typedef struct Lay Lay;		/* defined in Layout module */

#pragma incomplete Lay


/* Alignment types */
enum {
	ALnone = 0, ALleft, ALcenter, ALright, ALjustify,
	ALchar, ALtop, ALmiddle, ALbottom, ALbaseline,
};

struct Align
{
	uchar	halign;		/* one of ALnone, ALleft, etc. */
	uchar	valign;		/* one of ALnone, ALtop, etc. */
};

/*
 * A Dimen holds a dimension specification, especially for those
 * cases when a number can be followed by a % or a * to indicate
 * percentage of total or relative weight.
 * Dnone means no dimension was specified
 */

/* To fit in a word, use top bits to identify kind, rest for value */
enum {
	Dnone =		0,
	Dpixels =	(1<<29),
	Dpercent =	(2<<29),
	Drelative =	(3<<29),
	Dkindmask =	(3<<29),
	Dspecmask =	(~Dkindmask)
};

struct Dimen
{
	int	kindspec;	/* kind | spec */
};

/*
 * Background is either an image or a color.
 * If both are set, the image has precedence.
 */
struct Background
{
	Rune*	image;		/* url */
	int	color;
};


/*
 * There are about a half dozen Item variants.
 * The all look like this at the start (using Plan 9 C's
 * anonymous structure member mechanism),
 * and then the tag field dictates what extra fields there are.
 */
struct Item
{
	Item*	next;		/* successor in list of items */
	int	width;		/* width in pixels (0 for floating items) */
	int	height;		/* height in pixels */
	int	ascent;		/* ascent (from top to baseline) in pixels */
	int	anchorid;	/* if nonzero, which anchor we're in */
	int	state;		/* flags and values (see below) */
	Genattr*genattr;	/* generic attributes and events */
	int	tag;		/* variant discriminator: Itexttag, etc. */
};

/* Item variant tags */
enum {
	Itexttag,
	Iruletag,
	Iimagetag,
	Iformfieldtag,
	Itabletag,
	Ifloattag,
	Ispacertag
};

struct Itext
{
	Item;			/* (with tag ==Itexttag) */
	Rune*	s;		/* the characters */
	int	fnt;		/* style*NumSize+size (see font stuff, below) */
	int	fg;		/* Pixel (color) for text */
	uchar	voff; /* Voffbias+vertical offset from baseline, in pixels (+ve == down) */
	uchar	ul;		/* ULnone, ULunder, or ULmid */
};

struct Irule
{
	Item;			/* (with tag ==Iruletag) */
	uchar	align;		/* alignment spec */
	uchar	noshade;	/* if true, don't shade */
	int	size;		/* size attr (rule height) */
	int	color;		/* color attr */
	Dimen	wspec;		/* width spec */
};


struct Iimage
{
	Item;			/* (with tag ==Iimagetag) */
	Rune*	imsrc;		/* image src url */
	int	imwidth;	/* spec width (actual, if no spec) */
	int	imheight;	/* spec height (actual, if no spec) */
	Rune*	altrep;		/* alternate representation, in absence of image */
	Map*	map;		/* if non-nil, client side map */
	int	ctlid;		/* if animated */
	uchar	align;		/* vertical alignment */
	uchar	hspace;		/* in pixels; buffer space on each side */
	uchar	vspace;		/* in pixels; buffer space on top and bottom */
	uchar	border;		/* in pixels: border width to draw around image */
	Iimage*	nextimage;	/* next in list of document's images */
	void*	aux;
};


struct Iformfield
{
	Item;			/* (with tag ==Iformfieldtag) */
	Formfield*formfield;
	void*	aux;
};


struct Itable
{
	Item;			/* (with tag ==Itabletag) */
	Table*	table;
};


struct Ifloat
{
	Item;			/* (with tag ==Ifloattag) */
	Item*	item;		/* table or image item that floats */
	int	x;		/* x coord of top (from right, if ALright) */
	int	y;		/* y coord of top */
	uchar	side;		/* margin it floats to: ALleft or ALright */
	uchar	infloats;	/* true if this has been added to a lay.floats */
};


struct Ispacer
{
	Item;			/* (with tag ==Ispacertag) */
	int	spkind;		/* ISPnull, etc. */
};

/* Item state flags and value fields */
enum {
	IFbrk	= 0x80000000,	/* forced break before this item */
	IFbrksp	= 0x40000000,	/* add 1 line space to break (IFbrk set too) */
	IFnobrk	= 0x20000000,	/* break not allowed before this item */
	IFcleft	= 0x10000000,	/* clear left floats (IFbrk set too) */
	IFcright= 0x08000000,	/* clear right floats (IFbrk set too) */
	IFwrap	= 0x04000000,	/* in a wrapping (non-pre) line */
	IFhang	= 0x02000000,	/* in a hanging (into left indent) item */
	IFrjust	= 0x01000000,	/* right justify current line */
	IFcjust	= 0x00800000,	/* center justify current line */
	IFsmap	= 0x00400000,	/* image is server-side map */
	IFindentshift	= 8,
	IFindentmask	= (255<<IFindentshift),	/* current indent, in tab stops */
	IFhangmask	= 255	/* current hang into left indent, in 1/10th tabstops */
};

/* Bias added to Itext's voff field */
enum { Voffbias = 128 };

/* Spacer kinds */
enum {
	ISPnull,	/* 0 height and width */
	ISPvline,	/* height and ascent of current font */
	ISPhspace,	/* width of space in current font */
	ISPgeneral	/* other purposes (e.g., between markers and list) */
};

/* Generic attributes and events (not many elements will have any of these set) */
struct Genattr
{
	Rune*	id;
	Rune*	class;
	Rune*	style;
	Rune*	title;
	SEvent*	events;
};

struct SEvent
{
	SEvent*	next;		/* in list of events */
	int	type;		/* SEonblur, etc. */
	Rune*	script;
};

enum {
	SEonblur, SEonchange, SEonclick, SEondblclick,
	SEonfocus, SEonkeypress, SEonkeyup, SEonload,
	SEonmousedown, SEonmousemove, SEonmouseout,
	SEonmouseover, SEonmouseup, SEonreset, SEonselect,
	SEonsubmit, SEonunload,
	Numscriptev
};

/* Form field types */
enum {
	Ftext,
	Fpassword,
	Fcheckbox,
	Fradio,
	Fsubmit,
	Fhidden,
	Fimage,
	Freset,
	Ffile,
	Fbutton,
	Fselect,
	Ftextarea
};

/* Information about a field in a form */
struct Formfield
{
	Formfield*next;		/* in list of fields for a form */
	int	ftype;		/* Ftext, Fpassword, etc. */
	int	fieldid;	/* serial no. of field within its form */
	Form*	form;		/* containing form */
	Rune*	name;		/* name attr */
	Rune*	value;		/* value attr */
	int	size;		/* size attr */
	int	maxlength;	/* maxlength attr */
	int	rows;		/* rows attr */
	int	cols;		/* cols attr */
	uchar	flags;		/* FFchecked, etc. */
	Option*	options;	/* for Fselect fields */
	Item*	image;		/* image item, for Fimage fields */
	int	ctlid;		/* identifies control for this field in layout */
	SEvent*	events;		/* same as genattr->events of containing item */
};

enum {
	FFchecked =	(1<<7),
	FFmultiple =	(1<<6)
};

/* Option holds info about an option in a "select" form field */
struct Option
{
	Option*	next;		/* next in list of options for a field */
	int	selected;	/* true if selected initially */
	Rune*	value;		/* value attr */
	Rune*	display;	/* display string */
};

/* Form holds info about a form */
struct Form
{
	Form*	next;		/* in list of forms for document */
	int	formid;		/* serial no. of form within its doc */
	Rune*	name;	/* name or id attr (netscape uses name, HTML 4.0 uses id) */
	Rune*	action;		/* action attr */
	int	target;		/* target attr as targetid */
	int	method;		/* HGet or HPost */
	int	nfields;	/* number of fields */
	Formfield*fields;	/* field's forms, in input order */
};

/* Flags used in various table structures */
enum {
	TFparsing =	(1<<7),
	TFnowrap =	(1<<6),
	TFisth =	(1<<5)
};


/* Information about a table */
struct Table
{
	Table*	next;		/* next in list of document's tables */
	int	tableid;	/* serial no. of table within its doc */
	Tablerow*rows;		/* array of row specs (list during parsing) */
	int	nrow;		/* total number of rows */
	Tablecol*cols;		/* array of column specs */
	int	ncol;		/* total number of columns */
	Tablecell*cells;	/* list of unique cells */
	int	ncell;		/* total number of cells */
	Tablecell***grid;	/* 2-D array of cells */
	Align	align;		/* alignment spec for whole table */
	Dimen	width;		/* width spec for whole table */
	int	border;		/* border attr */
	int	cellspacing;	/* cellspacing attr */
	int	cellpadding;	/* cellpadding attr */
	Background background;	/* table background */
	Item*	caption;	/* linked list of Items, giving caption */
	uchar	caption_place;	/* ALtop or ALbottom */
	Lay*	caption_lay;	/* layout of caption */
	int	totw;		/* total width */
	int	toth;		/* total height */
	int	caph;		/* caption height */
	int	availw;		/* used for previous 3 sizes */
	Token*	tabletok;	/* token that started the table */
	uchar	flags;		/* Lchanged, perhaps */
};


struct Tablecol
{
	int	width;
	Align	align;
	Point	pos;
};


struct Tablerow
{
	Tablerow*next;		/* Next in list of rows, during parsing */
	Tablecell*cells;	/* Cells in row, linked through nextinrow */
	int	height;
	int	ascent;
	Align	align;
	Background background;
	Point	pos;
	uchar	flags;		/* 0 or TFparsing */
};

/*
 * A Tablecell is one cell of a table.
 * It may span multiple rows and multiple columns.
 * Cells are linked on two lists: the list for all the cells of
 * a document (the next pointers), and the list of all the
 * cells that start in a given row (the nextinrow pointers)
 */
struct Tablecell
{
	Tablecell*next;		/* next in list of table's cells */
	Tablecell*nextinrow;	/* next in list of row's cells */
	int	cellid;		/* serial no. of cell within table */
	Item*	content;	/* contents before layout */
	Lay*	lay;		/* layout of cell */
	int	rowspan;	/* number of rows spanned by this cell */
	int	colspan;	/* number of cols spanned by this cell */
	Align	align;		/* alignment spec */
	uchar	flags;		/* TFparsing, TFnowrap, TFisth */
	Dimen	wspec;		/* suggested width */
	int	hspec;		/* suggested height */
	Background background;	/* cell background */
	int	minw;		/* minimum possible width */
	int	maxw;		/* maximum width */
	int	ascent;		/* cell's ascent */
	int	row;		/* row of upper left corner */
	int	col;		/* col of upper left corner */
	Point	pos;		/* nw corner of cell contents, in cell */
};

/* Anchor is for info about hyperlinks that go somewhere */
struct Anchor
{
	Anchor*	next;		/* next in list of document's anchors */
	int	index;		/* serial no. of anchor within its doc */
	Rune*	name;		/* name attr */
	Rune*	href;		/* href attr */
	int	target;		/* target attr as targetid */
};


/* DestAnchor is for info about hyperlinks that are destinations */
struct DestAnchor
{
	DestAnchor*next;	/* next in list of document's destanchors */
	int	index;		/* serial no. of anchor within its doc */
	Rune*	name;		/* name attr */
	Item*	item;		/* the destination */
};


/* Maps (client side) */
struct Map
{
	Map*	next;		/* next in list of document's maps */
	Rune*	name;		/* map name */
	Area*	areas;		/* list of map areas */
};


struct Area
{
	Area*	next;		/* next in list of a map's areas */
	int	shape;		/* SHrect, etc. */
	Rune*	href;		/* associated hypertext link */
	int	target;		/* associated target frame */
	Dimen*	coords;		/* array of coords for shape */
	int	ncoords;	/* size of coords array */
};

/* Area shapes */
enum {
	SHrect, SHcircle, SHpoly
};

/* Fonts are represented by integers: style*NumSize + size */

/* Font styles */
enum {
	FntR,		/* roman */
	FntI,		/* italic */
	FntB,		/* bold */
	FntT,		/* typewriter */
	NumStyle
};

/* Font sizes */
enum {
	Tiny,
	Small,
	Normal,
	Large,
	Verylarge,
	NumSize
};

enum {
	NumFnt = NumStyle*NumSize,
	DefFnt = FntR*NumSize+Normal,
};

/* Lines are needed through some text items, for underlining or strikethrough */
enum {
	ULnone, ULunder, ULmid
};

/* Kidinfo flags */
enum {
	FRnoresize =	(1<<0),
	FRnoscroll =	(1<<1),
	FRhscroll = 	(1<<2),
	FRvscroll =	(1<<3),
	FRhscrollauto = (1<<4),
	FRvscrollauto =	(1<<5)
};

/* Information about child frame or frameset */
struct Kidinfo
{
	Kidinfo*next;		/* in list of kidinfos for a frameset */
	int	isframeset;

	/* fields for "frame" */
	Rune*	src;		/* only nil if a "dummy" frame or this is frameset */
	Rune*	name;		/* always non-empty if this isn't frameset */
	int	marginw;
	int	marginh;
	int	framebd;
	int	flags;

	/* fields for "frameset" */
	Dimen*	rows;		/* array of row dimensions */
	int	nrows;		/* length of rows */
	Dimen*	cols;		/* array of col dimensions */
	int	ncols;		/* length of cols */
	Kidinfo*kidinfos;
	Kidinfo*nextframeset;	/* parsing stack */
};


/* Document info (global information about HTML page) */
struct Docinfo
{
	/* stuff from HTTP headers, doc head, and body tag */
	Rune*	src;		/* original source of doc */
	Rune*	base;		/* base URL of doc */
	Rune*	doctitle;	/* from <title> element */
	Background background;	/* background specification */
	Iimage*	backgrounditem;	/* Image Item for doc background image, or nil */
	int	text;		/* doc foreground (text) color */
	int	link;		/* unvisited hyperlink color */
	int	vlink;		/* visited hyperlink color */
	int	alink;		/* highlighting hyperlink color */
	int	target;		/* target frame default */
	int	chset;		/* ISO_8859, etc. */
	int	mediatype;	/* TextHtml, etc. */
	int	scripttype;	/* TextJavascript, etc. */
	int	hasscripts;	/* true if scripts used */
	Rune*	refresh;	/* content of <http-equiv=Refresh ...> */
	Kidinfo*kidinfo;	/* if a frameset */
	int	frameid;	/* id of document frame */

	/* info needed to respond to user actions */
	Anchor*	anchors;	/* list of href anchors */
	DestAnchor*dests;	/* list of destination anchors */
	Form*	forms;		/* list of forms */
	Table*	tables;		/* list of tables */
	Map*	maps;		/* list of maps */
	Iimage*	images;		/* list of image items (through nextimage links) */
};

extern int	dimenkind(Dimen d);
extern int	dimenspec(Dimen d);
extern void	freedocinfo(Docinfo* d);
extern void	freeitems(Item* ithead);
extern Item*	parsehtml(uchar* data, int datalen, Rune* src, int mtype, int chset, Docinfo** pdi);
extern void	printitems(Item* items, char* msg);
extern int	targetid(Rune* s);
extern Rune*	targetname(int targid);
extern int	validitems(Item* i);

#pragma varargck	type "I"	Item*

/* Control print output */
extern int	warn;
extern int	dbglex;
extern int	dbgbuild;

/*
 * To be provided by caller
 * emalloc and erealloc should not return if can't get memory.
 * emalloc should zero its memory.
 */
extern void*	emalloc(ulong);
extern void*	erealloc(void* p, ulong size);