/* attrs.c -- recognize HTML attributes
(c) 1998-2001 (W3C) MIT, INRIA, Keio University
See tidy.c for the copyright notice.
CVS Info :
$Author: creitzel $
$Date: 2001/10/26 13:57:03 $
$Revision: 1.39 $
*/
#include "platform.h" /* platform independent stuff */
#include "html.h" /* to pull in definition of nodes */
Attribute *attr_href;
Attribute *attr_src;
Attribute *attr_id;
Attribute *attr_name;
Attribute *attr_summary;
Attribute *attr_alt;
Attribute *attr_longdesc;
Attribute *attr_usemap;
Attribute *attr_ismap;
Attribute *attr_language;
Attribute *attr_type;
Attribute *attr_value;
Attribute *attr_content;
Attribute *attr_title;
Attribute *attr_xmlns;
Attribute *attr_datafld;
Attribute *attr_width;
Attribute *attr_height;
AttrCheck CheckUrl;
AttrCheck CheckScript;
AttrCheck CheckName;
AttrCheck CheckId;
AttrCheck CheckAlign;
AttrCheck CheckValign;
AttrCheck CheckBool;
AttrCheck CheckLength;
AttrCheck CheckTarget;
AttrCheck CheckFsubmit;
AttrCheck CheckClear;
AttrCheck CheckShape;
AttrCheck CheckNumber;
AttrCheck CheckScope;
AttrCheck CheckColor;
AttrCheck CheckVType;
AttrCheck CheckScroll;
AttrCheck CheckTextDir;
AttrCheck CheckLang;
extern Bool XmlTags;
extern Bool XmlOut;
extern char *alt_text;
#define XHTML_NAMESPACE "http://www.w3.org/1999/xhtml"
#define HASHSIZE 357
static Attribute *hashtab[HASHSIZE];
/*
Bind attribute types to procedures to check values.
You can add new procedures for better validation
and each procedure has access to the node in which
the attribute occurred as well as the attribute name
and its value.
By default, attributes are checked without regard
to the element they are found on. You have the choice
of making the procedure test which element is involved
or in writing methods for each element which controls
exactly how the attributes of that element are checked.
This latter approach is best for detecting the absence
of required attributes.
*/
#define TEXT null
#define CHARSET null
#define TYPE null
#define CHARACTER null
#define URLS null
#define URL CheckUrl
#define SCRIPT CheckScript
#define ALIGN CheckAlign
#define VALIGN CheckValign
#define COLOR CheckColor
#define CLEAR CheckClear
#define BORDER CheckBool /* kludge */
#define LANG CheckLang
#define BOOL CheckBool
#define COLS null
#define NUMBER CheckNumber
#define LENGTH CheckLength
#define COORDS null
#define DATE null
#define TEXTDIR CheckTextDir
#define IDREFS null
#define IDREF null
#define IDDEF CheckId
#define NAME CheckName
#define TFRAME null
#define FBORDER null
#define MEDIA null
#define FSUBMIT CheckFsubmit
#define LINKTYPES null
#define TRULES null
#define SCOPE CheckScope
#define SHAPE CheckShape
#define SCROLL CheckScroll
#define TARGET CheckTarget
#define VTYPE CheckVType
static struct _attrlist
{
char *name;
unsigned versions;
AttrCheck *attrchk;
} attrlist[] =
{
{"abbr", VERS_HTML40, TEXT},
{"accept-charset", VERS_HTML40, CHARSET},
{"accept", VERS_ALL, TYPE},
{"accesskey", VERS_HTML40, CHARACTER},
{"action", VERS_ALL, URL},
{"add_date", VERS_NETSCAPE, TEXT}, /* A */
{"align", VERS_ALL, ALIGN}, /* set varies with element */
{"alink", VERS_LOOSE, COLOR},
{"alt", VERS_ALL, TEXT},
{"archive", VERS_HTML40, URLS}, /* space or comma separated list */
{"axis", VERS_HTML40, TEXT},
{"background", VERS_LOOSE, URL},
{"bgcolor", VERS_LOOSE, COLOR},
{"bgproperties", VERS_PROPRIETARY, TEXT}, /* BODY "fixed" fixes background */
{"border", VERS_ALL, BORDER}, /* like LENGTH + "border" */
{"bordercolor", VERS_MICROSOFT, COLOR}, /* used on TABLE */
{"bottommargin", VERS_MICROSOFT, NUMBER}, /* used on BODY */
{"cellpadding", VERS_FROM32, LENGTH}, /* % or pixel values */
{"cellspacing", VERS_FROM32, LENGTH},
{"char", VERS_HTML40, CHARACTER},
{"charoff", VERS_HTML40, LENGTH},
{"charset", VERS_HTML40, CHARSET},
{"checked", VERS_ALL, BOOL}, /* i.e. "checked" or absent */
{"cite", VERS_HTML40, URL},
{"class", VERS_HTML40, TEXT},
{"classid", VERS_HTML40, URL},
{"clear", VERS_LOOSE, CLEAR}, /* BR: left, right, all */
{"code", VERS_LOOSE, TEXT}, /* APPLET */
{"codebase", VERS_HTML40, URL}, /* OBJECT */
{"codetype", VERS_HTML40, TYPE}, /* OBJECT */
{"color", VERS_LOOSE, COLOR}, /* BASEFONT, FONT */
{"cols", VERS_IFRAME, COLS}, /* TABLE & FRAMESET */
{"colspan", VERS_FROM32, NUMBER},
{"compact", VERS_ALL, BOOL}, /* lists */
{"content", VERS_ALL, TEXT}, /* META */
{"coords", VERS_FROM32, COORDS}, /* AREA, A */
{"data", VERS_HTML40, URL}, /* OBJECT */
{"datafld", VERS_MICROSOFT, TEXT}, /* used on DIV, IMG */
{"dataformatas", VERS_MICROSOFT, TEXT}, /* used on DIV, IMG */
{"datapagesize", VERS_MICROSOFT, NUMBER}, /* used on DIV, IMG */
{"datasrc", VERS_MICROSOFT, URL}, /* used on TABLE */
{"datetime", VERS_HTML40, DATE}, /* INS, DEL */
{"declare", VERS_HTML40, BOOL}, /* OBJECT */
{"defer", VERS_HTML40, BOOL}, /* SCRIPT */
{"dir", VERS_HTML40, TEXTDIR}, /* ltr or rtl */
{"disabled", VERS_HTML40, BOOL}, /* form fields */
{"enctype", VERS_ALL, TYPE}, /* FORM */
{"face", VERS_LOOSE, TEXT}, /* BASEFONT, FONT */
{"for", VERS_HTML40, IDREF}, /* LABEL */
{"frame", VERS_HTML40, TFRAME}, /* TABLE */
{"frameborder", VERS_FRAMESET, FBORDER}, /* 0 or 1 */
{"framespacing", VERS_PROPRIETARY, NUMBER}, /* pixel value */
{"gridx", VERS_PROPRIETARY, NUMBER}, /* TABLE Adobe golive*/
{"gridy", VERS_PROPRIETARY, NUMBER}, /* TABLE Adobe golive */
{"headers", VERS_HTML40, IDREFS}, /* table cells */
{"height", VERS_ALL, LENGTH}, /* pixels only for TH/TD */
{"href", VERS_ALL, URL}, /* A, AREA, LINK and BASE */
{"hreflang", VERS_HTML40, LANG}, /* A, LINK */
{"hspace", VERS_ALL, NUMBER}, /* APPLET, IMG, OBJECT */
{"http-equiv", VERS_ALL, TEXT}, /* META */
{"id", VERS_HTML40, IDDEF},
{"ismap", VERS_ALL, BOOL}, /* IMG */
{"label", VERS_HTML40, TEXT}, /* OPT, OPTGROUP */
{"lang", VERS_HTML40, LANG},
{"language", VERS_LOOSE, TEXT}, /* SCRIPT */
{"last_modified", VERS_NETSCAPE, TEXT}, /* A */
{"last_visit", VERS_NETSCAPE, TEXT}, /* A */
{"leftmargin", VERS_MICROSOFT, NUMBER}, /* used on BODY */
{"link", VERS_LOOSE, COLOR}, /* BODY */
{"longdesc", VERS_HTML40, URL}, /* IMG */
{"lowsrc", VERS_PROPRIETARY, URL}, /* IMG */
{"marginheight", VERS_IFRAME, NUMBER}, /* FRAME, IFRAME, BODY */
{"marginwidth", VERS_IFRAME, NUMBER}, /* ditto */
{"maxlength", VERS_ALL, NUMBER}, /* INPUT */
{"media", VERS_HTML40, MEDIA}, /* STYLE, LINK */
{"method", VERS_ALL, FSUBMIT}, /* FORM: get or post */
{"multiple", VERS_ALL, BOOL}, /* SELECT */
{"name", VERS_ALL, NAME},
{"nohref", VERS_FROM32, BOOL}, /* AREA */
{"noresize", VERS_FRAMESET, BOOL}, /* FRAME */
{"noshade", VERS_LOOSE, BOOL}, /* HR */
{"nowrap", VERS_LOOSE, BOOL}, /* table cells */
{"object", VERS_HTML40_LOOSE, TEXT}, /* APPLET */
{"onblur", VERS_EVENTS, SCRIPT}, /* event */
{"onchange", VERS_EVENTS, SCRIPT}, /* event */
{"onclick", VERS_EVENTS, SCRIPT}, /* event */
{"ondblclick", VERS_EVENTS, SCRIPT}, /* event */
{"onkeydown", VERS_EVENTS, SCRIPT}, /* event */
{"onkeypress", VERS_EVENTS, SCRIPT}, /* event */
{"onkeyup", VERS_EVENTS, SCRIPT}, /* event */
{"onload", VERS_EVENTS, SCRIPT}, /* event */
{"onmousedown", VERS_EVENTS, SCRIPT}, /* event */
{"onmousemove", VERS_EVENTS, SCRIPT}, /* event */
{"onmouseout", VERS_EVENTS, SCRIPT}, /* event */
{"onmouseover", VERS_EVENTS, SCRIPT}, /* event */
{"onmouseup", VERS_EVENTS, SCRIPT}, /* event */
{"onsubmit", VERS_EVENTS, SCRIPT}, /* event */
{"onreset", VERS_EVENTS, SCRIPT}, /* event */
{"onselect", VERS_EVENTS, SCRIPT}, /* event */
{"onunload", VERS_EVENTS, SCRIPT}, /* event */
{"onfocus", VERS_EVENTS, SCRIPT}, /* event */
{"onafterupdate", VERS_MICROSOFT, SCRIPT}, /* form fields */
{"onbeforeupdate", VERS_MICROSOFT, SCRIPT}, /* form fields */
{"onerrorupdate", VERS_MICROSOFT, SCRIPT}, /* form fields */
{"onrowenter", VERS_MICROSOFT, SCRIPT}, /* form fields */
{"onrowexit", VERS_MICROSOFT, SCRIPT}, /* form fields */
{"onbeforeunload", VERS_MICROSOFT, SCRIPT}, /* form fields */
{"ondatasetchanged", VERS_MICROSOFT, SCRIPT}, /* object, applet */
{"ondataavailable", VERS_MICROSOFT, SCRIPT}, /* object, applet */
{"ondatasetcomplete",VERS_MICROSOFT, SCRIPT}, /* object, applet */
{"profile", VERS_HTML40, URL}, /* HEAD */
{"prompt", VERS_LOOSE, TEXT}, /* ISINDEX */
{"readonly", VERS_HTML40, BOOL}, /* form fields */
{"rel", VERS_ALL, LINKTYPES}, /* A, LINK */
{"rev", VERS_ALL, LINKTYPES}, /* A, LINK */
{"rightmargin", VERS_MICROSOFT, NUMBER}, /* used on BODY */
{"rows", VERS_ALL, NUMBER}, /* TEXTAREA */
{"rowspan", VERS_ALL, NUMBER}, /* table cells */
{"rules", VERS_HTML40, TRULES}, /* TABLE */
{"scheme", VERS_HTML40, TEXT}, /* META */
{"scope", VERS_HTML40, SCOPE}, /* table cells */
{"scrolling", VERS_IFRAME, SCROLL}, /* yes, no or auto */
{"selected", VERS_ALL, BOOL}, /* OPTION */
{"shape", VERS_FROM32, SHAPE}, /* AREA, A */
{"showgrid", VERS_PROPRIETARY, BOOL}, /* TABLE Adobe golive */
{"showgridx", VERS_PROPRIETARY, BOOL}, /* TABLE Adobe golive*/
{"showgridy", VERS_PROPRIETARY, BOOL}, /* TABLE Adobe golive*/
{"size", VERS_LOOSE, NUMBER}, /* HR, FONT, BASEFONT, SELECT */
{"span", VERS_HTML40, NUMBER}, /* COL, COLGROUP */
{"src", VERS_ALL, URL}, /* IMG, FRAME, IFRAME */
{"standby", VERS_HTML40, TEXT}, /* OBJECT */
{"start", VERS_ALL, NUMBER}, /* OL */
{"style", VERS_HTML40, TEXT},
{"summary", VERS_HTML40, TEXT}, /* TABLE */
{"tabindex", VERS_HTML40, NUMBER}, /* fields, OBJECT and A */
{"target", VERS_HTML40, TARGET}, /* names a frame/window */
{"text", VERS_LOOSE, COLOR}, /* BODY */
{"title", VERS_HTML40, TEXT}, /* text tool tip */
{"topmargin", VERS_MICROSOFT, NUMBER}, /* used on BODY */
{"type", VERS_FROM32, TYPE}, /* also used by SPACER */
{"usemap", VERS_ALL, BOOL}, /* things with images */
{"valign", VERS_FROM32, VALIGN},
{"value", VERS_ALL, TEXT}, /* OPTION, PARAM */
{"valuetype", VERS_HTML40, VTYPE}, /* PARAM: data, ref, object */
{"version", VERS_ALL, TEXT}, /* HTML */
{"vlink", VERS_LOOSE, COLOR}, /* BODY */
{"vspace", VERS_LOOSE, NUMBER}, /* IMG, OBJECT, APPLET */
{"width", VERS_ALL, LENGTH}, /* pixels only for TD/TH */
{"wrap", VERS_NETSCAPE, TEXT}, /* textarea */
{"xml:lang", VERS_XML, TEXT}, /* XML language */
{"xml:space", VERS_XML, TEXT}, /* XML language */
{"xmlns", VERS_ALL, TEXT}, /* name space */
{"rbspan", VERS_XHTML11, NUMBER}, /* ruby markup */
/* this must be the final entry */
{null, 0, 0}
};
/* used by CheckColor() */
static struct _colors
{
char *name;
char *hex;
} colors[] =
{
{"black", "#000000"}, {"green", "#008000"},
{"silver", "#C0C0C0"}, {"lime", "#00FF00"},
{"gray", "#808080"}, {"olive", "#808000"},
{"white", "#FFFFFF"}, {"yellow", "#FFFF00"},
{"maroon", "#800000"}, {"navy", "#000080"},
{"red", "#FF0000"}, {"blue", "#0000FF"},
{"purple", "#800080"}, {"teal", "#008080"},
{"fuchsia", "#FF00FF"}, {"aqua", "#00FFFF"},
{null, null}
};
static unsigned hash(char *s)
{
unsigned hashval;
for (hashval = 0; *s != '\0'; s++)
hashval = *s + 31*hashval;
return hashval % HASHSIZE;
}
static Attribute *lookup(char *s)
{
Attribute *np;
for (np = hashtab[hash(s)]; np != null; np = np->next)
if (wstrcmp(s, np->name) == 0)
return np;
return null;
}
static Attribute *install(char *name, uint versions, AttrCheck *attrchk)
{
Attribute *np;
unsigned hashval;
if ((np = lookup(name)) == null)
{
np = (Attribute *)MemAlloc(sizeof(*np));
if (np == null || (np->name = wstrdup(name)) == null)
return null;
hashval = hash(name);
np->next = hashtab[hashval];
hashtab[hashval] = np;
}
np->versions = versions;
np->attrchk = attrchk;
np->nowrap = no;
np->literal = no;
return np;
}
static void SetNoWrap(Attribute *attr)
{
attr->nowrap = yes; /* defaults to no */
}
/* public method for finding attribute definition by name */
Attribute *FindAttribute(AttVal *attval)
{
Attribute *np;
if (attval->attribute && (np = lookup(attval->attribute)))
return np;
return null;
}
AttVal *GetAttrByName(Node *node, char *name)
{
AttVal *attr;
for (attr = node->attributes; attr != null; attr = attr->next)
{
if (wstrcmp(attr->attribute, name) == 0)
break;
}
return attr;
}
void AddAttribute(Node *node, char *name, char *value)
{
AttVal *av = NewAttribute();
av->delim = '"';
av->attribute = wstrdup(name);
av->value = wstrdup(value);
av->dict = FindAttribute(av);
if (node->attributes == null)
node->attributes = av;
else /* append to end of attributes */
{
AttVal *here = node->attributes;
while (here->next)
here = here->next;
here->next = av;
}
}
Bool IsUrl(char *attrname)
{
Attribute *np;
return (Bool)((np = lookup(attrname)) && np->attrchk == URL);
}
Bool IsBool(char *attrname)
{
Attribute *np;
return (Bool)((np = lookup(attrname)) && np->attrchk == BOOL);
}
Bool IsScript(char *attrname)
{
Attribute *np;
return (Bool)((np = lookup(attrname)) && np->attrchk == SCRIPT);
}
Bool IsLiteralAttribute(char *attrname)
{
Attribute *np;
return (Bool)((np = lookup(attrname)) && np->literal);
}
/* may id or name serve as anchor? */
Bool IsAnchorElement(Node *node)
{
if (node->tag == tag_a ||
node->tag == tag_applet ||
node->tag == tag_form ||
node->tag == tag_frame ||
node->tag == tag_iframe ||
node->tag == tag_img ||
node->tag == tag_map)
return yes;
return no;
}
/* anchor/node hash */
Anchor *anchor_list = null;
/* free single anchor */
void FreeAnchor(Anchor *a)
{
if (a->name)
MemFree(a->name);
MemFree(a);
}
/* removes anchor for specific node */
void RemoveAnchorByNode(Node *node)
{
Anchor *delme = null, *found, *prev = null, *next;
for (found = anchor_list; found != null; found = found->next)
{
next = found->next;
if (found->node == node)
{
if (prev)
prev->next = next;
else
anchor_list = next;
delme = found;
}
else
prev = found;
}
if (delme)
FreeAnchor(delme);
}
/* initialize new anchor */
Anchor *NewAnchor(void)
{
Anchor *a = (Anchor *)MemAlloc(sizeof(Anchor));
a->name = null;
a->next = null;
a->node = null;
return a;
}
/* add new anchor to namespace */
Anchor *AddAnchor(char *name, Node *node)
{
Anchor *a = NewAnchor();
a->name = wstrdup(name);
a->node = node;
if (anchor_list == null)
anchor_list = a;
else
{
Anchor *here = anchor_list;
while (here->next)
here = here->next;
here->next = a;
}
return anchor_list;
}
/* return node associated with anchor */
Node *GetNodeByAnchor(char *name)
{
Anchor *found;
for (found = anchor_list; found != null; found = found->next)
{
if (wstrcasecmp(found->name, name) == 0)
break;
}
if (found == null)
return null;
else
return found->node;
}
/* free all anchors */
void FreeAnchors(void)
{
Anchor *a;
while (anchor_list)
{
a = anchor_list;
if (a->name)
MemFree(a->name);
anchor_list = a->next;
MemFree(a);
}
}
/* public method for inititializing attribute dictionary */
void InitAttrs(void)
{
struct _attrlist *ap;
for(ap = attrlist; ap->name != null; ++ap)
install(ap->name, ap->versions, ap->attrchk);
attr_href = lookup("href");
attr_src = lookup("src");
attr_id = lookup("id");
attr_name = lookup("name");
attr_summary = lookup("summary");
attr_alt = lookup("alt");
attr_longdesc = lookup("longdesc");
attr_usemap = lookup("usemap");
attr_ismap = lookup("ismap");
attr_language = lookup("language");
attr_type = lookup("type");
attr_title = lookup("title");
attr_xmlns = lookup("xmlns");
attr_datafld = lookup("datafld");
attr_value = lookup("value");
attr_content = lookup("content");
attr_width = lookup("width");
attr_height = lookup("height");
SetNoWrap(attr_alt);
SetNoWrap(attr_value);
SetNoWrap(attr_content);
}
/*
Henry Zrepa reports that some folk are
using embed with script attributes where
newlines are signficant. These need to be
declared and handled specially!
*/
void DeclareLiteralAttrib(char *name)
{
Attribute *attrib = lookup(name);
if (attrib == null) /* #431337 - fix by Terry Teague 07 Jun 01 */
attrib = install(name, VERS_PROPRIETARY, null);
attrib->literal = yes;
}
void FreeAttrTable(void)
{
Attribute *dict, *next;
int i;
for (i = 0; i < HASHSIZE; ++i)
{
dict = hashtab[i];
while(dict)
{
next = dict->next;
MemFree(dict->name);
MemFree(dict);
dict = next;
}
hashtab[i] = null;
}
FreeAnchors();
}
/*
the same attribute name can't be used
more than once in each element
*/
void RepairDuplicateAttributes(Lexer *lexer, Node *node)
{
AttVal *attval;
for (attval = node->attributes; attval != null;)
{
if (attval->asp == null && attval->php == null)
{
AttVal *current;
for (current = attval->next; current != null;)
{
if (current->asp == null && current->php == null &&
wstrcasecmp(attval->attribute, current->attribute) == 0)
{
AttVal *temp;
if (wstrcasecmp(current->attribute, "class") == 0 && JoinClasses)
{
/* concatenate classes */
current->value = (char *)MemRealloc(current->value, wstrlen(current->value) +
wstrlen(attval->value) + 2);
wstrcat(current->value, " ");
wstrcat(current->value, attval->value);
temp = attval->next;
if (temp->next == null)
current = null;
else
current = current->next;
ReportAttrError(lexer, node, attval, JOINING_ATTRIBUTE);
RemoveAttribute(node, attval);
attval = temp;
}
else if (wstrcasecmp(current->attribute, "style") == 0 && JoinStyles)
{
/* concatenate styles */
/*
this doesn't handle CSS comments and
leading/trailing white-space very well
see http://www.w3.org/TR/css-style-attr
*/
size_t end = strlen(current->value);
if (current->value[end] == ';')
{
/* attribute ends with declaration seperator */
current->value = (char *)MemRealloc(current->value,
end + wstrlen(attval->value) + 2);
wstrcat(current->value, " ");
wstrcat(current->value, attval->value);
}
else if (current->value[end] == '}')
{
/* attribute ends with rule set */
current->value = (char *)MemRealloc(current->value,
end + wstrlen(attval->value) + 6);
wstrcat(current->value, " { ");
wstrcat(current->value, attval->value);
wstrcat(current->value, " }");
}
else
{
/* attribute ends with property value */
current->value = (char *)MemRealloc(current->value,
end + wstrlen(attval->value) + 3);
wstrcat(current->value, "; ");
wstrcat(current->value, attval->value);
}
temp = attval->next;
if (temp->next == null)
current = null;
else
current = current->next;
ReportAttrError(lexer, node, attval, JOINING_ATTRIBUTE);
RemoveAttribute(node, attval);
attval = temp;
}
else if (DuplicateAttrs == keep_last)
{
temp = current->next;
ReportAttrError(lexer, node, current, REPEATED_ATTRIBUTE);
RemoveAttribute(node, current);
current = temp;
}
else
{
temp = attval->next;
if (attval->next == null)
current = null;
else
current = current->next;
ReportAttrError(lexer, node, attval, REPEATED_ATTRIBUTE);
RemoveAttribute(node, attval);
attval = temp;
}
}
else
current = current->next;
}
attval = attval->next;
}
else
attval = attval->next;
}
}
/* ignore unknown attributes for proprietary elements */
Attribute *CheckAttribute(Lexer *lexer, Node *node, AttVal *attval)
{
Attribute *attribute;
if ((attribute = attval->dict) != null)
{
/* if attribute looks like