/* html.h (c) 1998-2001 (W3C) MIT, INRIA, Keio University See tidy.c for the copyright notice. CVS Info : $Author: creitzel $ $Date: 2001/10/26 13:54:45 $ $Revision: 1.60 $ */ /* indentation modes */ #define NO_INDENT 0 #define BLOCK_INDENT 1 #define SMART_INDENT 2 /* character encodings */ #define RAW 0 #define ASCII 1 #define LATIN1 2 #define UTF8 3 #define ISO2022 4 #define MACROMAN 5 #if SUPPORT_UTF16_ENCODINGS #define UTF16LE 6 #define UTF16BE 7 #define UTF16 8 #endif #define WIN1252 9 #if SUPPORT_ASIAN_ENCODINGS #define BIG5 10 /* #431953 - RJ */ #define SHIFTJIS 11 /* #431953 - RJ */ /* RJ. Note that Big5 and SHIFTJIS are not converted to ISO 10646 codepoints (i.e., to Unicode) before being recoded into UTF-8. This may be confusing: usually UTF-8 implies ISO10646 codepoints. */ #endif typedef struct { int encoding; int state; /* for ISO 2022 */ FILE *fp; } Out; void outc(uint c, Out *out); /* states for ISO 2022 A document in ISO-2022 based encoding uses some ESC sequences called "designator" to switch character sets. The designators defined and used in ISO-2022-JP are: "ESC" + "(" + ? for ISO646 variants "ESC" + "$" + ? and "ESC" + "$" + "(" + ? for multibyte character sets */ #define FSM_ASCII 0 #define FSM_ESC 1 #define FSM_ESCD 2 #define FSM_ESCDP 3 #define FSM_ESCP 4 #define FSM_NONASCII 5 /* lexer char types */ #define digit 1 #define letter 2 #define namechar 4 #define white 8 #define newline 16 #define lowercase 32 #define uppercase 64 /* lexer GetToken states */ #define LEX_CONTENT 0 #define LEX_GT 1 #define LEX_ENDTAG 2 #define LEX_STARTTAG 3 #define LEX_COMMENT 4 #define LEX_DOCTYPE 5 #define LEX_PROCINSTR 6 #define LEX_ENDCOMMENT 7 #define LEX_CDATA 8 #define LEX_SECTION 9 #define LEX_ASP 10 #define LEX_JSTE 11 #define LEX_PHP 12 #define LEX_XMLDECL 13 /* content model shortcut encoding */ #define CM_UNKNOWN 0 #define CM_EMPTY (1 << 0) #define CM_HTML (1 << 1) #define CM_HEAD (1 << 2) #define CM_BLOCK (1 << 3) #define CM_INLINE (1 << 4) #define CM_LIST (1 << 5) #define CM_DEFLIST (1 << 6) #define CM_TABLE (1 << 7) #define CM_ROWGRP (1 << 8) #define CM_ROW (1 << 9) #define CM_FIELD (1 << 10) #define CM_OBJECT (1 << 11) #define CM_PARAM (1 << 12) #define CM_FRAMES (1 << 13) #define CM_HEADING (1 << 14) #define CM_OPT (1 << 15) #define CM_IMG (1 << 16) #define CM_MIXED (1 << 17) #define CM_NO_INDENT (1 << 18) #define CM_OBSOLETE (1 << 19) #define CM_NEW (1 << 20) #define CM_OMITST (1 << 21) /* Linked list of class names and styles */ struct _style { char *tag; char *tag_class; char *properties; struct _style *next; }; typedef struct _style Style; /* Linked list of style properties */ struct _styleprop { char *name; char *value; struct _styleprop *next; }; typedef struct _styleprop StyleProp; /* mode controlling treatment of doctype */ typedef enum { doctype_omit, doctype_auto, doctype_strict, doctype_loose, doctype_user } DocTypeMode; /* mode controlling treatment of duplicate Attributes */ typedef enum { keep_first, keep_last } DupAttrMode; /* Attribute/Value linked list node */ struct _attval { struct _attval *next; struct _attribute *dict; struct _node *asp; struct _node *php; int delim; char *attribute; char *value; }; typedef struct _attval AttVal; /* node->type is one of these values */ #define RootNode 0 #define DocTypeTag 1 #define CommentTag 2 #define ProcInsTag 3 #define TextNode 4 #define StartTag 5 #define EndTag 6 #define StartEndTag 7 #define CDATATag 8 #define SectionTag 9 #define AspTag 10 #define JsteTag 11 #define PhpTag 12 #define XmlDecl 13 struct _node { struct _node *parent; struct _node *prev; struct _node *next; struct _node *content; struct _node *last; struct _attval *attributes; char *element; /* name (null for text nodes) */ uint start; /* start of span onto text array */ uint end; /* end of span onto text array */ uint type; /* TextNode, StartTag, EndTag etc. */ Bool closed; /* true if closed by explicit end tag */ Bool implicit; /* true if inferred */ Bool linebreak; /* true if followed by a line break */ struct _tagdict *was; /* old tag when it was changed */ struct _tagdict *tag; /* tag's dictionary definition */ }; typedef struct _node Node; /* Anchor/Node linked list */ struct _anchor { struct _anchor *next; Node *node; char *name; }; typedef struct _anchor Anchor; /* If the document uses just HTML 2.0 tags and attributes described it as HTML 2.0 Similarly for HTML 3.2 and the 3 flavors of HTML 4.0. If there are proprietary tags and attributes then describe it as HTML Proprietary. If it includes the xml-lang or xmlns attributes but is otherwise HTML 2.0, 3.2 or 4.0 then describe it as one of the flavors of Voyager (strict, loose or frameset). */ #define VERS_UNKNOWN 0 #define VERS_HTML20 1 #define VERS_HTML32 2 #define VERS_HTML40_STRICT 4 #define VERS_HTML40_LOOSE 8 #define VERS_FRAMESET 16 #define VERS_XML 32 /* special flag */ #define VERS_NETSCAPE 64 #define VERS_MICROSOFT 128 #define VERS_SUN 256 #define VERS_MALFORMED 512 #define VERS_XHTML11 1024 #define VERS_BASIC 2048 /* all tags and attributes are ok in proprietary version of HTML */ #define VERS_PROPRIETARY (VERS_NETSCAPE|VERS_MICROSOFT|VERS_SUN) /* tags/attrs in HTML4 but not in earlier version*/ #define VERS_HTML40 (VERS_HTML40_STRICT|VERS_HTML40_LOOSE|VERS_FRAMESET) /* tags/attrs in HTML 4 loose and frameset */ #define VERS_IFRAME (VERS_HTML40_LOOSE|VERS_FRAMESET) /* tags/attrs which are in all versions of HTML except strict */ #define VERS_LOOSE (VERS_HTML20|VERS_HTML32|VERS_IFRAME) /* tags/attrs in all versions from HTML 3.2 onwards */ #define VERS_FROM32 (VERS_HTML32|VERS_HTML40) /* versions with on... attributes */ #define VERS_EVENTS (VERS_HTML40|VERS_XHTML11) #define VERS_ALL (VERS_HTML20|VERS_HTML32|VERS_HTML40|VERS_XHTML11|VERS_BASIC) /* Mosaic handles inlines via a separate stack from other elements We duplicate this to recover from inline markup errors such as: italic text
more italic text normal text which for compatibility with Mosaic is mapped to: italic text
more italic text normal text
Note that any inline end tag pop's the effect of the current
inline start tag, so that pop's in the above example.
*/
struct _inode
{
struct _inode *next;
struct _tagdict *tag; /* tag's dictionary definition */
char *element; /* name (null for text nodes) */
struct _attval *attributes;
};
typedef struct _inode IStack;
typedef struct _lexer Lexer;
#define CHARBUF_SIZE 5
/* non-raw input is cleaned up*/
typedef struct
{
int state; /* FSM for ISO2022 */
Bool pushed;
int charbuf[CHARBUF_SIZE];
int bufpos;
int tabs;
int lastcol;
int curcol;
int curline;
int encoding;
FILE *file;
Lexer *lexer; /* needed for error reporting */
} StreamIn;
StreamIn *OpenInput(FILE *fp);
int ReadChar(StreamIn *in);
void UngetChar(int c, StreamIn *in);
/*
The following are private to the lexer
Use NewLexer(fp) to create a lexer, and
FreeLexer(lexer) to free it.
*/
struct _lexer
{
StreamIn *in; /* file stream */
FILE *errout; /* error output stream */
uint badAccess; /* for accessibility errors */
uint badLayout; /* for bad style errors */
uint badChars; /* for bad char encodings */
uint badForm; /* for mismatched/mispositioned form tags */
uint warnings; /* count of warnings in this document */
uint errors; /* count of errors */
uint lines; /* lines seen */
uint columns; /* at start of current token */
Bool waswhite; /* used to collapse contiguous white space */
Bool pushed; /* true after token has been pushed back */
Bool insertspace; /* when space is moved after end tag */
Bool excludeBlocks; /* Netscape compatibility */
Bool exiled; /* true if moved out of table */
Bool isvoyager; /* true if xmlns attribute on html element */
uint versions; /* bit vector of HTML versions */
int doctype; /* version as given by doctype (if any) */
Bool bad_doctype; /* e.g. if html or PUBLIC is missing */
uint txtstart; /* start of current node */
uint txtend; /* end of current node */
uint state; /* state of lexer's finite state machine */
struct _node *token;
/*
lexer character buffer
parse tree nodes span onto this buffer
which contains the concatenated text
contents of all of the elements.
lexsize must be reset for each file.
*/
char *lexbuf; /* char buffer */
uint lexlength; /* allocated */
uint lexsize; /* used */
/* Inline stack for compatibility with Mosaic */
Node *inode; /* for deferring text node */
IStack *insert; /* for inferring inline tags */
IStack *istack;
uint istacklength; /* allocated */
uint istacksize; /* used */
uint istackbase; /* start of frame */
Style *styles; /* used for cleaning up presentation markup */
};
typedef void (Parser)(Lexer *lexer, Node *node, uint mode);
typedef void (CheckAttribs)(Lexer *lexer, Node *node);
/* declaration for methods that check attribute values */
typedef void (AttrCheck)(Lexer *lexer, Node *node, AttVal *attval);
/* each tag/attribute helps to constrain the version of HTML */
void ConstrainVersion(Lexer *lexer, uint vers);
struct _attribute
{
struct _attribute *next;
char *name;
Bool nowrap;
Bool literal;
unsigned versions;
AttrCheck *attrchk;
};
typedef struct _attribute Attribute;
/* well known attributes */
extern Attribute *attr_href;
extern Attribute *attr_src;
extern Attribute *attr_id;
extern Attribute *attr_name;
extern Attribute *attr_summary;
extern Attribute *attr_alt;
extern Attribute *attr_longdesc;
extern Attribute *attr_title;
/*
Tag dictionary node
*/
/* types of tags that the user can define */
#define tagtype_empty 1
#define tagtype_inline 2
#define tagtype_block 4
#define tagtype_pre 8
struct _tagdict
{
struct _tagdict *next;
char *name;
uint versions;
uint model;
Parser *parser;
CheckAttribs *chkattrs;
};
typedef struct _tagdict Dict;
/* modes for GetToken() */
#define IgnoreWhitespace 0
#define MixedContent 1
#define Preformatted 2
#define IgnoreMarkup 3
void FatalError(char *msg);
void FileError(FILE *fp, const char *file);
void AddByte(Lexer *lexer, uint c);
Node *GetToken(Lexer *lexer, uint mode);
/* one level unget only */
void UngetToken(Lexer *lexer);
/* create lexer for a file stream */
Lexer *NewLexer(StreamIn *in);
/* delete lexer */
void FreeLexer(Lexer *lexer);
Bool EndOfInput(Lexer *lexer);
/* used for script or style */
Node *GetCDATA(Lexer *lexer, Node *container);
/* use this to create node for inferred start tag */
Node *InferredTag(Lexer *lexer, char *name);
/* used to create line break in preformatted text
when cleaning the augean stables (Word2000) */
Node *NewLineNode(Lexer *lexer);
/* used for adding a for Word2000 */
Node *NewLiteralTextNode(Lexer *lexer, char* txt );
/* Parser calls this to create RootNode */
Node *NewNode(void);
AttVal *NewAttribute(void);
AttVal *NewAttributeEx(char *name, char *value);
void FreeAttrs(Node *node);
void FreeAttribute(AttVal *av);
void RemoveAttribute(Node *node, AttVal *attr);
/* use this to free parse tree node and all its children */
void FreeNode(Node *node);
/* used to clone heading nodes when split by an
*/
Node *CloneNode(Lexer *lexer, Node *element);
/* lexer char map - must be initialized */
void InitMap(void);
void AddCharToLexer(Lexer *lexer, uint c);
void AddStringLiteral(Lexer *lexer, char *str);
Node *TextToken(Lexer *lexer);
/* used by pretty printer for tag names */
char FoldCase(char c, Bool tocaps);
Bool IsLetter(uint c);
Bool IsDigit(uint c);
Bool IsWhite(uint c);
Bool IsNamechar(uint c);
Bool IsLower(uint c);
Bool IsUpper(uint c);
uint ToLower(uint c);
uint ToUpper(uint c);
/* used to fixup doctype to match contents */
Node *FindDocType(Node *root);
Node *FindHTML(Node *root);
Node *FindHEAD(Node *root);
Node *FindBody(Node *root);
Bool AddGenerator(Lexer *lexer, Node *root);
void DiscardDocType(Node *root);
Bool FixDocType(Lexer *lexer, Node *node);
char *HTMLVersionName(Lexer *lexer);
int ApparentVersion(Lexer *lexer);
Bool FixXmlDecl(Lexer *lexer, Node *root);
Bool SetXHTMLDocType(Lexer *lexer, Node *root);
void FixId(Lexer *lexer, Node *node);
Bool CheckDocTypeKeyWords(Lexer *lexer, Node *doctype);
/* used to detect faulty attributes */
Bool IsValidAttrName( char *attr);
/* parser.c */
Node *ParseDocument(Lexer *lexer);
Node *ParseXMLDocument(Lexer *lexer);
Bool XMLPreserveWhiteSpace(Node *element);
void CoerceNode(Lexer *lexer, Node *node, Dict *tag);
Bool CheckNodeIntegrity(Node *node);
Bool IsNewNode(Node *node);
void RemoveNode(Node *node);
Node *DiscardElement(Node *element);
void InsertNodeAtStart(Node *element, Node *node);
void InsertNodeAtEnd(Node *element, Node *node);
void InsertNodeBeforeElement(Node *element, Node *node);
void InsertNodeAfterElement(Node *element, Node *node);
Bool IsJavaScript(Node *node);
Bool IsBlank(Lexer *lexer, Node *node);
Bool PreContent(Node *node);
/* attrs.c */
void InitAttrs(void);
void FreeAttrTable(void);
Attribute *FindAttribute(AttVal *attval);
AttVal *GetAttrByName(Node *node, char *name);
void AddAttribute(Node *node, char *name, char *value);
void RepairDuplicateAttributes(Lexer *lexer, Node *node);
void CheckAttributes(Lexer *lexer, Node *node);
Attribute *CheckAttribute(Lexer *lexer, Node *node, AttVal *attval);
Bool IsUrl(char *attrname);
Bool IsScript(char *attrname);
Bool IsBool(char *attrname);
void DeclareLiteralAttrib(char *name);
Bool IsLiteralAttribute(char *attrname);
Bool IsAnchorElement(Node *node);
void FreeAnchor(Anchor *a);
void RemoveAnchorByNode(Node *node);
Anchor *NewAnchor(void);
Anchor *AddAnchor(char *name, Node *node);
Node *GetNodeByAnchor(char *name);
void FreeAnchors(void);
/* istack.c */
void PushInline(Lexer *lexer, Node *node);
void PopInline(Lexer *lexer, Node *node);
Bool IsPushed(Lexer *lexer, Node *node);
int InlineDup(Lexer *lexer, Node *node);
Node *InsertedToken(Lexer *lexer);
AttVal *DupAttrs(AttVal *attrs);
void DeferDup(Lexer *lexer);
void InsertNode(Node *element, Node *node);
/* clean.c */
void FreeStyles(Lexer *lexer);
void AddClass(Node *node, char *classname);
void CleanTree(Lexer *lexer, Node *node);
void NestedEmphasis(Node *node);
void EmFromI(Node *node);
void CleanWord2000(Lexer *lexer, Node *node);
void DropSections(Lexer *lexer, Node *node);
void List2BQ(Node *node);
void BQ2Div(Node *node);
Bool IsWord2000(Node *root);
void BumpObject(Lexer *lexer, Node *html);
/* entities.c */
void InitEntities(void);
void FreeEntities(void);
uint EntityCode(char *name);
char *EntityName(uint n);
/* tags.c */
void DefineTag(int tagType, char *name);
void ResetDefinedTagSearch(void);
char *FindNextDefinedTag(int tagType);
Bool FindTag(Node *node);
void InitTags(void);
void FreeTags(void);
Parser *FindParser(Node *node);
int HTMLVersion(Lexer *lexer);
/* localize.c -- used for all message text */
void ShowVersion(FILE *fp);
void ReportUnknownOption(char *option);
void ReportBadArgument(char *option);
void NeedsAuthorIntervention(FILE *errout);
void MissingBody(FILE *errout);
void ReportNumberOfSlides(FILE *errout, int count);
void GeneralInfo(FILE *errout);
void SetFilename (char *filename); /* #431895 - fix by Dave Bryan 04 Jan 01 */
void HelloMessage(FILE *errout, char *date, char *filename);
void ReportVersion(FILE *errout, Lexer *lexer,
char *filename, Node *doctype);
void ReportNumWarnings(FILE *errout, Lexer *lexer);
/* pprint.c */
uint GetUTF8(unsigned char *str, uint *ch);
char *PutUTF8(char *buf, uint c);
void FreePrintBuf(void);
void PPrintTree(Out *out, uint mode, uint indent,
Lexer *lexer, Node *node);
void PPrintXMLTree(Out *fout, uint mode, uint indent,
Lexer *lexer, Node *node);
void PFlushLine(Out *out, uint indent);
void PCondFlushLine(Out *out, uint indent);
void PrintBody(Out *fout, Lexer *lexer, Node *root); /* Feature request #434940 - fix by Dave Raggett/Ignacio Vazquez-Abrams 21 Jun 01 */
void AddTransitionEffect(Lexer *lexer, Node *root, int effect, float duration);
/* tidy.c */
#define EndOfStream EOF
/* UTF-8 encoding/decoding functions */
/* The Getter/Putter callbacks are called to retrieve/store 0 or more additional UTF-8 bytes. */
/* The Getter callback can also Unget if necessary to re-synchronize the input stream. */
/* "count" is the number of bytes actually stored in external buffer "buf"; <= 0 if error or EOF */
typedef void (GetBytes)(StreamIn *in, unsigned char *buf, int *count, Bool unget);
typedef void (PutBytes)(Out *out, unsigned char *buf, int *count);
/* Pass in null for the buf, in, out, getter and putter parameters respectively if not appropriate */
/* Return < 0 if error or EOF */
int DecodeUTF8BytesToChar(uint *c, uint firstByte, unsigned char *successorBytes,
StreamIn *in, GetBytes getter, int *count);
int EncodeCharToUTF8Bytes(uint c, unsigned char *encodebuf,
Out *out, PutBytes putter, int *count);
/* char encoding used when replacing illegal SGML chars, regardless of specified encoding */
extern int ReplacementCharEncoding;
/* Function for conversion from Windows-1252 to Unicode */
uint DecodeWin1252(uint c);
/* Function to convert from MacRoman to Unicode */
uint DecodeMacRoman(uint c);
/* defined in platform.h - TRT */
/*
void *MemAlloc(uint size);
void *MemRealloc(void *mem, uint newsize);
void MemFree(void *mem);
void ClearMemory(void *, uint size);
*/
/* string functions */
char *wstrdup(char *str);
char *wstrndup(char *str, int len);
void wstrncpy(char *s1, char *s2, int size);
void wstrcat(char *s1, char *s2);
void wstrcpy(char *s1, char *s2);
int wstrcmp(char *s1, char *s2);
int wstrcasecmp(char *s1, char *s2);
int wstrncmp(char *s1, char *s2, int n);
int wstrncasecmp(char *s1, char *s2, int n);
int wstrlen(char *str);
Bool wsubstr(char *s1, char *s2);
Bool wsubstrn(char *s1, int len1, char *s2 );
Bool wsubstrncase(char *s1, int len1, char *s2 );
int wstrnchr( char *s1, int len1, char cc );
char *wstrtolower(char *s);
void tidy_out(FILE *fp, const char* msg, ...);
/* error codes for entities/numeric character references */
#define MISSING_SEMICOLON 1
#define MISSING_SEMICOLON_NCR 2
#define UNKNOWN_ENTITY 3
#define UNESCAPED_AMPERSAND 4
#define APOS_UNDEFINED 5
/* error codes for element messages */
#define MISSING_ENDTAG_FOR 1
#define MISSING_ENDTAG_BEFORE 2
#define DISCARDING_UNEXPECTED 3
#define NESTED_EMPHASIS 4
#define NON_MATCHING_ENDTAG 5
#define TAG_NOT_ALLOWED_IN 6
#define MISSING_STARTTAG 7
#define UNEXPECTED_ENDTAG 8
#define USING_BR_INPLACE_OF 9
#define INSERTING_TAG 10
#define SUSPECTED_MISSING_QUOTE 11
#define MISSING_TITLE_ELEMENT 12
#define DUPLICATE_FRAMESET 13
#define CANT_BE_NESTED 14
#define OBSOLETE_ELEMENT 15
#define PROPRIETARY_ELEMENT 16
#define UNKNOWN_ELEMENT 17
#define TRIM_EMPTY_ELEMENT 18
#define COERCE_TO_ENDTAG 19
#define ILLEGAL_NESTING 20
#define NOFRAMES_CONTENT 21
#define CONTENT_AFTER_BODY 22
#define INCONSISTENT_VERSION 23
#define MALFORMED_COMMENT 24
#define BAD_COMMENT_CHARS 25
#define BAD_XML_COMMENT 26
#define BAD_CDATA_CONTENT 27
#define INCONSISTENT_NAMESPACE 28
#define DOCTYPE_AFTER_TAGS 29
#define MALFORMED_DOCTYPE 30
#define UNEXPECTED_END_OF_FILE 31
#define DTYPE_NOT_UPPER_CASE 32
#define TOO_MANY_ELEMENTS 33
#define UNESCAPED_ELEMENT 34
#define NESTED_QUOTATION 35
#define ELEMENT_NOT_EMPTY 36
/* error codes used for attribute messages */
#define UNKNOWN_ATTRIBUTE 1
#define MISSING_ATTRIBUTE 2
#define MISSING_ATTR_VALUE 3
#define BAD_ATTRIBUTE_VALUE 4
#define UNEXPECTED_GT 5
#define PROPRIETARY_ATTRIBUTE 6
#define PROPRIETARY_ATTR_VALUE 7
#define REPEATED_ATTRIBUTE 8
#define MISSING_IMAGEMAP 9
#define XML_ATTRIBUTE_VALUE 10
#define UNEXPECTED_QUOTEMARK 11
#define MISSING_QUOTEMARK 12
#define ID_NAME_MISMATCH 13
#define BACKSLASH_IN_URI 14
#define FIXED_BACKSLASH 15
#define ILLEGAL_URI_REFERENCE 16
#define ESCAPED_ILLEGAL_URI 17
#define NEWLINE_IN_URI 18
#define ANCHOR_NOT_UNIQUE 19
#define ENTITY_IN_ID 20
#define JOINING_ATTRIBUTE 21
#define UNEXPECTED_EQUALSIGN 22
/* page transition effects */
#define EFFECT_BLEND -1
#define EFFECT_BOX_IN 0
#define EFFECT_BOX_OUT 1
#define EFFECT_CIRCLE_IN 2
#define EFFECT_CIRCLE_OUT 3
#define EFFECT_WIPE_UP 4
#define EFFECT_WIPE_DOWN 5
#define EFFECT_WIPE_RIGHT 6
#define EFFECT_WIPE_LEFT 7
#define EFFECT_VERT_BLINDS 8
#define EFFECT_HORZ_BLINDS 9
#define EFFECT_CHK_ACROSS 10
#define EFFECT_CHK_DOWN 11
#define EFFECT_RND_DISSOLVE 12
#define EFFECT_SPLIT_VIRT_IN 13
#define EFFECT_SPLIT_VIRT_OUT 14
#define EFFECT_SPLIT_HORZ_IN 15
#define EFFECT_SPLIT_HORZ_OUT 16
#define EFFECT_STRIPS_LEFT_DOWN 17
#define EFFECT_STRIPS_LEFT_UP 18
#define EFFECT_STRIPS_RIGHT_DOWN 19
#define EFFECT_STRIPS_RIGHT_UP 20
#define EFFECT_RND_BARS_HORZ 21
#define EFFECT_RND_BARS_VERT 22
#define EFFECT_RANDOM 23
/* accessibility flaws */
#define MISSING_IMAGE_ALT 1
#define MISSING_LINK_ALT 2
#define MISSING_SUMMARY 4
#define MISSING_IMAGE_MAP 8
#define USING_FRAMES 16
#define USING_NOFRAMES 32
/* presentation flaws */
#define USING_SPACER 1
#define USING_LAYER 2
#define USING_NOBR 4
#define USING_FONT 8
#define USING_BODY 16
/* character encoding errors */
/* "or" DISCARDED_CHAR with the other errors if discarding char; otherwise default is replacing */
#define REPLACED_CHAR 0
#define DISCARDED_CHAR 1
#define VENDOR_SPECIFIC_CHARS 2
#define INVALID_SGML_CHARS 4
#define INVALID_UTF8 8
#define INVALID_UTF16 16
#define ENCODING_MISMATCH 32 /* fatal error */
#define INVALID_URI 64
#define INVALID_NCR 128
void HelpText(FILE *errout, char *prog);
void GeneralInfo(FILE *errout);
void UnknownOption(FILE *errout, char c);
void UnknownFile(FILE *errout, char *program, char *file);
void ErrorSummary(Lexer *lexer);
void ReportEncodingError(Lexer *lexer, uint code, uint c);
void ReportEntityError(Lexer *lexer, uint code, char *entity, int c);
void ReportAttrError(Lexer *lexer, Node *node, AttVal *av, uint code);
void ReportMissingAttr(Lexer* lexer, Node* node, char* name);
void ReportWarning(Lexer *lexer, Node *element, Node *node, uint code);
void ReportError(Lexer *lexer, Node *element, Node *node, uint code);
/* slide maker functions */
Node *FindBody(Node *node);
/* counts number of h2 children belonging to node */
int CountSlides(Node *node);
void PPrintSlide(Out *fout, uint mode, uint indent, Lexer *lexer);
void CreateSlides(Lexer *lexer, Node *root);
/* config parameters, see config.c for defaults */
void InitConfig(void);
void FreeConfig(void);
void ParseConfigFile(char *file);
Bool ParseConfig(char *option, char *parameter);
void AdjustCharEncoding(int encoding);
void AdjustConfig(void);
char *CharEncodingName(int encoding);
void PrintConfigOptions(FILE *errout, Bool showCurrent);
extern uint spaces; /* default indentation */
extern uint wraplen; /* default wrap margin */
extern int tabsize;
extern int CharEncoding;
extern int inCharEncoding;
extern int outCharEncoding;
/* char encoding used when replacing illegal SGML chars, regardless of specified encoding */
extern int ReplacementCharEncoding;
extern DocTypeMode doctype_mode; /* see doctype property */
extern char *doctype_str; /* user specified doctype */
extern char *slide_style; /* style sheet for slides */
extern char *Language; /* #431953 - RJ language for line breaking */
extern char *errfile; /* file name to write errors to */
extern Bool writeback; /* if true then output tidied markup */
extern Bool KeepFileTimes; /* if true keeps last modified time */
extern Bool TidyMark; /* add meta element indicating tidied doc */
extern Bool OnlyErrors; /* if true normal output is suppressed */
extern Bool ShowWarnings; /* errors are always shown */
extern Bool Quiet;
extern Bool IndentContent;
extern Bool SmartIndent;
extern Bool HideEndTags;
extern Bool XmlTags;
extern Bool XmlOut;
extern Bool xHTML;
extern Bool HtmlOut; /* Yes means set explicitly. */
extern Bool XmlPi; /* add */
extern Bool XmlPIs; /* assume PIs end with ?> as per XML */
extern Bool XmlSpace;
extern Bool RawOut;
extern Bool UpperCaseTags;
extern Bool UpperCaseAttrs;
extern Bool MakeBare;
extern Bool MakeClean;
extern Bool LogicalEmphasis;
extern Bool DropEmptyParas;
extern Bool DropPropAttrs;
extern Bool FixComments;
extern Bool DropFontTags;
extern Bool EncloseBodyText;
extern Bool EncloseBlockText;
extern Bool BurstSlides;
extern Bool BreakBeforeBR;
extern Bool NumEntities;
extern Bool QuoteMarks;
extern Bool QuoteNbsp;
extern Bool QuoteAmpersand;
extern Bool WrapAttVals;
extern Bool WrapScriptlets;
extern Bool WrapSection;
extern Bool WrapAsp;
extern Bool WrapJste;
extern Bool WrapPhp;
extern Bool FixBackslash;
extern Bool IndentAttributes;
extern Bool Word2000;
extern Bool Emacs; /* sasdjb 01May00 GNU Emacs error output format */
extern Bool LiteralAttribs;
extern Bool BodyOnly; /* #434940 - output BODY content only */
extern Bool FixUri;
extern Bool LowerLiterals;
extern Bool HideComments;
extern Bool IndentCdata;
extern Bool ForceOutput;
extern uint ShowErrors;
extern Bool AsciiChars;
extern Bool JoinClasses;
extern Bool JoinStyles;
extern DupAttrMode DuplicateAttrs;
extern Bool EscapeCdata;
extern Bool NCR; /* #431953 - RJ */
extern Bool OutputBOM;
extern Bool SmartBOM;
/* Parser methods for tags */
Parser ParseHTML;
Parser ParseHead;
Parser ParseTitle;
Parser ParseScript;
Parser ParseFrameSet;
Parser ParseNoFrames;
Parser ParseBody;
Parser ParsePre;
Parser ParseList;
Parser ParseLI;
Parser ParseDefList;
Parser ParseBlock;
Parser ParseInline;
Parser ParseEmpty;
Parser ParseTableTag;
Parser ParseColGroup;
Parser ParseRowGroup;
Parser ParseRow;
Parser ParseSelect;
Parser ParseOptGroup;
Parser ParseText;
Parser ParseObject;
Parser ParseMap;
/* Attribute checking methods */
CheckAttribs CheckHR;
CheckAttribs CheckIMG;
CheckAttribs CheckAnchor;
CheckAttribs CheckLINK;
CheckAttribs CheckMap;
CheckAttribs CheckAREA;
CheckAttribs CheckTABLE;
CheckAttribs CheckTableCell;
CheckAttribs CheckCaption;
CheckAttribs CheckSCRIPT;
CheckAttribs CheckSTYLE;
CheckAttribs CheckHTML;
CheckAttribs CheckFORM;
CheckAttribs CheckMETA;
/* used to control printing of null attributes */
Bool IsBoolAttribute(AttVal *attval);
extern Dict *tag_html;
extern Dict *tag_head;
extern Dict *tag_body;
extern Dict *tag_frameset;
extern Dict *tag_frame;
extern Dict *tag_iframe; /* #433359 - fix by Randy Waki 12 Mar 01 */
extern Dict *tag_noframes;
extern Dict *tag_title;
extern Dict *tag_base;
extern Dict *tag_hr;
extern Dict *tag_meta;
extern Dict *tag_pre;
extern Dict *tag_listing;
extern Dict *tag_h1;
extern Dict *tag_h2;
extern Dict *tag_p;
extern Dict *tag_ul;
extern Dict *tag_ol;
extern Dict *tag_dir;
extern Dict *tag_li;
extern Dict *tag_dt;
extern Dict *tag_dd;
extern Dict *tag_dl;
extern Dict *tag_td;
extern Dict *tag_th;
extern Dict *tag_tr;
extern Dict *tag_col;
extern Dict *tag_br;
extern Dict *tag_a;
extern Dict *tag_link;
extern Dict *tag_b;
extern Dict *tag_i;
extern Dict *tag_strong;
extern Dict *tag_em;
extern Dict *tag_big;
extern Dict *tag_small;
extern Dict *tag_param;
extern Dict *tag_option;
extern Dict *tag_optgroup;
extern Dict *tag_img;
extern Dict *tag_map;
extern Dict *tag_area;
extern Dict *tag_nobr;
extern Dict *tag_wbr;
extern Dict *tag_layer;
extern Dict *tag_center;
extern Dict *tag_spacer;
extern Dict *tag_font;
extern Dict *tag_style;
extern Dict *tag_script;
extern Dict *tag_noscript;
extern Dict *tag_table;
extern Dict *tag_caption;
extern Dict *tag_form;
extern Dict *tag_textarea;
extern Dict *tag_blockquote;
extern Dict *tag_applet;
extern Dict *tag_object;
extern Dict *tag_div;
extern Dict *tag_span;
extern Dict *tag_input;
extern Dict *tag_q;