#pragma lib "libhtml.a" #pragma src "/sys/src/libhtml" // UTILS extern uchar* fromStr(Rune* buf, int n, int chset); extern Rune* toStr(uchar* buf, int n, int chset); // Common LEX and BUILD enums // Media types enum { ApplMsword, ApplOctets, ApplPdf, ApplPostscript, ApplRtf, ApplFramemaker, ApplMsexcel, ApplMspowerpoint, UnknownType, Audio32kadpcm, AudioBasic, ImageCgm, ImageG3fax, ImageGif, ImageIef, ImageJpeg, ImagePng, ImageTiff, ImageXBit, ImageXBit2, ImageXBitmulti, ImageXXBitmap, ModelVrml, MultiDigest, MultiMixed, TextCss, TextEnriched, TextHtml, TextJavascript, TextPlain, TextRichtext, TextSgml, TextTabSeparatedValues, TextXml, VideoMpeg, VideoQuicktime, NMEDIATYPES }; // HTTP methods enum { HGet, HPost }; // Charsets enum { UnknownCharset, US_Ascii, ISO_8859_1, UTF_8, Unicode, WIN_1251, KOI8, NCHARSETS }; // Frame Target IDs enum { FTtop, FTself, FTparent, FTblank }; // LEX typedef struct Token Token; typedef struct Attr Attr; #pragma incomplete Token // BUILD typedef struct Item Item; typedef struct Itext Itext; typedef struct Irule Irule; typedef struct Iimage Iimage; typedef struct Iformfield Iformfield; typedef struct Itable Itable; typedef struct Ifloat Ifloat; typedef struct Ispacer Ispacer; typedef struct Genattr Genattr; typedef struct SEvent SEvent; typedef struct Formfield Formfield; typedef struct Option Option; typedef struct Form Form; typedef struct Table Table; typedef struct Tablecol Tablecol; typedef struct Tablerow Tablerow; typedef struct Tablecell Tablecell; typedef struct Align Align; typedef struct Dimen Dimen; typedef struct Anchor Anchor; typedef struct DestAnchor DestAnchor; typedef struct Map Map; typedef struct Area Area; typedef struct Background Background; typedef struct Kidinfo Kidinfo; typedef struct Docinfo Docinfo; typedef struct Stack Stack; typedef struct Pstate Pstate; typedef struct ItemSource ItemSource; typedef struct Lay Lay; // defined in Layout module #pragma incomplete Lay // Alignment types enum { ALnone = 0, ALleft, ALcenter, ALright, ALjustify, ALchar, ALtop, ALmiddle, ALbottom, ALbaseline }; struct Align { uchar halign; // one of ALnone, ALleft, etc. uchar valign; // one of ALnone, ALtop, etc. }; // A Dimen holds a dimension specification, especially for those // cases when a number can be followed by a % or a * to indicate // percentage of total or relative weight. // Dnone means no dimension was specified // To fit in a word, use top bits to identify kind, rest for value enum { Dnone = 0, Dpixels = (1<<29), Dpercent = (2<<29), Drelative = (3<<29), Dkindmask = (3<<29), Dspecmask = (~Dkindmask) }; struct Dimen { int kindspec; // kind | spec }; // Background is either an image or a color. // If both are set, the image has precedence. struct Background { Rune* image; // url int color; }; // There are about a half dozen Item variants. // The all look like this at the start (using Plan 9 C's // anonymous structure member mechanism), // and then the tag field dictates what extra fields there are. struct Item { Item* next; // successor in list of items int width; // width in pixels (0 for floating items) int height; // height in pixels Rectangle r; int ascent; // ascent (from top to baseline) in pixels int anchorid; // if nonzero, which anchor we're in int state; // flags and values (see below) Genattr* genattr; // generic attributes and events int tag; // variant discriminator: Itexttag, etc. }; // Item variant tags enum { Itexttag, Iruletag, Iimagetag, Iformfieldtag, Itabletag, Ifloattag, Ispacertag }; struct Itext { Item; // (with tag ==Itexttag) Rune* s; // the characters int fnt; // style*NumSize+size (see font stuff, below) int fg; // Pixel (color) for text uchar voff; // Voffbias+vertical offset from baseline, in pixels (+ve == down) uchar ul; // ULnone, ULunder, or ULmid }; struct Irule { Item; // (with tag ==Iruletag) uchar align; // alignment spec uchar noshade; // if true, don't shade int size; // size attr (rule height) Dimen wspec; // width spec }; struct Iimage { Item; // (with tag ==Iimagetag) Rune* imsrc; // image src url int imwidth; // spec width (actual, if no spec) int imheight; // spec height (actual, if no spec) Rune* altrep; // alternate representation, in absence of image Map* map; // if non-nil, client side map int ctlid; // if animated uchar align; // vertical alignment uchar hspace; // in pixels; buffer space on each side uchar vspace; // in pixels; buffer space on top and bottom uchar border; // in pixels: border width to draw around image Iimage* nextimage; // next in list of document's images void* aux; }; struct Iformfield { Item; // (with tag ==Iformfieldtag) Formfield* formfield; }; struct Itable { Item; // (with tag ==Itabletag) Table* table; }; struct Ifloat { Item; // (with tag ==Ifloattag) Item* item; // table or image item that floats int x; // x coord of top (from right, if ALright) int y; // y coord of top uchar side; // margin it floats to: ALleft or ALright uchar infloats; // true if this has been added to a lay.floats Ifloat* nextfloat; // in list of floats }; struct Ispacer { Item; // (with tag ==Ispacertag) int spkind; // ISPnull, etc. }; // Item state flags and value fields enum { IFbrk = 0x80000000, // forced break before this item IFbrksp = 0x40000000, // add 1 line space to break (IFbrk set too) IFnobrk = 0x20000000, // break not allowed before this item IFcleft = 0x10000000, // clear left floats (IFbrk set too) IFcright = 0x08000000, // clear right floats (IFbrk set too) IFwrap = 0x04000000, // in a wrapping (non-pre) line IFhang = 0x02000000, // in a hanging (into left indent) item IFrjust = 0x01000000, // right justify current line IFcjust = 0x00800000, // center justify current line IFsmap = 0x00400000, // image is server-side map IFindentshift = 8, IFindentmask = (255<events of containing item void* aux; }; enum { FFchecked = (1<<7), FFmultiple = (1<<6) }; // Option holds info about an option in a "select" form field struct Option { Option* next; // next in list of options for a field int selected; // true if selected initially Rune* value; // value attr Rune* display; // display string }; // Form holds info about a form struct Form { Form* next; // in list of forms for document int formid; // serial no. of form within its doc Rune* name; // name or id attr (netscape uses name, HTML 4.0 uses id) Rune* action; // action attr int target; // target attr as targetid int method; // HGet or HPost int nfields; // number of fields Formfield* fields; // field's forms, in input order }; // Flags used in various table structures enum { TFparsing = (1<<7), TFnowrap = (1<<6), TFisth = (1<<5) }; // Information about a table struct Table { Table* next; // next in list of document's tables int tableid; // serial no. of table within its doc Tablerow* rows; // array of row specs (list during parsing) int nrow; // total number of rows Tablecol* cols; // array of column specs int ncol; // total number of columns Tablecell* cells; // list of unique cells int ncell; // total number of cells Tablecell*** grid; // 2-D array of cells Align align; // alignment spec for whole table Dimen width; // width spec for whole table int border; // border attr int cellspacing; // cellspacing attr int cellpadding; // cellpadding attr Background background; // table background Item* caption; // linked list of Items, giving caption uchar caption_place; // ALtop or ALbottom Lay* caption_lay; // layout of caption int totw; // total width int toth; // total height int caph; // caption height int availw; // used for previous 3 sizes Token* tabletok; // token that started the table uchar flags; // Lchanged, perhaps }; struct Tablecol { int width; Align align; Point pos; }; struct Tablerow { Tablerow* next; // Next in list of rows, during parsing Tablecell* cells; // Cells in row, linked through nextinrow int height; int ascent; Align align; Background background; Point pos; uchar flags; // 0 or TFparsing }; // A Tablecell is one cell of a table. // It may span multiple rows and multiple columns. // Cells are linked on two lists: the list for all the cells of // a document (the next pointers), and the list of all the // cells that start in a given row (the nextinrow pointers) struct Tablecell { Tablecell* next; // next in list of table's cells Tablecell* nextinrow; // next in list of row's cells int cellid; // serial no. of cell within table Item* content; // contents before layout Lay* lay; // layout of cell int rowspan; // number of rows spanned by this cell int colspan; // number of cols spanned by this cell Align align; // alignment spec uchar flags; // TFparsing, TFnowrap, TFisth Dimen wspec; // suggested width int hspec; // suggested height Background background; // cell background int minw; // minimum possible width int maxw; // maximum width int ascent; // cell's ascent int row; // row of upper left corner int col; // col of upper left corner Point pos; // nw corner of cell contents, in cell Rectangle r; }; // Anchor is for info about hyperlinks that go somewhere struct Anchor { Anchor* next; // next in list of document's anchors int index; // serial no. of anchor within its doc Rune* name; // name attr Rune* href; // href attr int target; // target attr as targetid }; // DestAnchor is for info about hyperlinks that are destinations struct DestAnchor { DestAnchor* next; // next in list of document's destanchors int index; // serial no. of anchor within its doc Rune* name; // name attr Item* item; // the destination }; // Maps (client side) struct Map { Map* next; // next in list of document's maps Rune* name; // map name Area* areas; // list of map areas }; struct Area { Area* next; // next in list of a map's areas int shape; // SHrect, etc. Rune* href; // associated hypertext link int target; // associated target frame Dimen* coords; // array of coords for shape int ncoords; // size of coords array }; // Area shapes enum { SHrect, SHcircle, SHpoly }; // Fonts are represented by integers: style*NumSize + size // Font styles enum { FntR, // roman FntI, // italic FntB, // bold FntT, // typewriter NumStyle }; // Font sizes enum { Tiny, Small, Normal, Large, Verylarge, NumSize }; enum { NumFnt = (NumStyle*NumSize), DefFnt = (FntR*NumSize+Normal) }; // Lines are needed through some text items, for underlining or strikethrough enum { ULnone, ULunder, ULmid }; // Kidinfo flags enum { FRnoresize = (1<<0), FRnoscroll = (1<<1), FRhscroll = (1<<2), FRvscroll = (1<<3), FRhscrollauto = (1<<4), FRvscrollauto = (1<<5) }; // Information about child frame or frameset struct Kidinfo { Kidinfo* next; // in list of kidinfos for a frameset int isframeset; // fields for "frame" Rune* src; // only nil if a "dummy" frame or this is frameset Rune* name; // always non-empty if this isn't frameset int marginw; int marginh; int framebd; int flags; // fields for "frameset" Dimen* rows; // array of row dimensions int nrows; // length of rows Dimen* cols; // array of col dimensions int ncols; // length of cols Kidinfo* kidinfos; Kidinfo* nextframeset; // parsing stack }; // Document info (global information about HTML page) struct Docinfo { // stuff from HTTP headers, doc head, and body tag Rune* src; // original source of doc Rune* base; // base URL of doc Rune* doctitle; // from element Background background; // background specification Iimage* backgrounditem; // Image Item for doc background image, or nil int text; // doc foreground (text) color int link; // unvisited hyperlink color int vlink; // visited hyperlink color int alink; // highlighting hyperlink color int target; // target frame default int chset; // ISO_8859, etc. int mediatype; // TextHtml, etc. int scripttype; // TextJavascript, etc. int hasscripts; // true if scripts used Rune* refresh; // content of <http-equiv=Refresh ...> Kidinfo* kidinfo; // if a frameset int frameid; // id of document frame // info needed to respond to user actions Anchor* anchors; // list of href anchors DestAnchor* dests; // list of destination anchors Form* forms; // list of forms Table* tables; // list of tables Map* maps; // list of maps Iimage* images; // list of image items (through nextimage links) }; extern int dimenkind(Dimen d); extern int dimenspec(Dimen d); extern void freedocinfo(Docinfo* d); extern void freeitems(Item* ithead); extern Item* parsehtml(uchar* data, int datalen, Rune* src, int mtype, int chset, Docinfo** pdi); extern void printitems(Item* items, char* msg); extern int targetid(Rune* s); extern Rune* targetname(int targid); extern int validitems(Item* i); #pragma varargck type "I" Item* // Control print output extern int warn; extern int dbglex; extern int dbgbuild; // To be provided by caller // emalloc and erealloc should not return if can't get memory. // emalloc should zero its memory. extern void* emalloc(ulong); extern void* erealloc(void* p, ulong size);