00001
00002
00003 #include <string.h>
00004 #include "gHtmlParser.h"
00005 #include "gHtmlCtrl.h"
00006 #include "gHtmlSeq.h"
00007 #include "gstringext.h"
00008
00009
00010
00011 const char* gHParsed::tblStateStr[e_HS_Last]={
00012 "START",
00013 "START_",
00014 "HTML",
00015 "HEAD",
00016 "HEAD_",
00017 "BODY",
00018 "BODY_",
00019 "HTML_"};
00020
00021 t_int16 gHtmlParser::nElems=-1;
00022 sHtmlElement gHtmlParser::lElems[]={
00023 {0, "A", "anchor", '@', '@', '@', '@', '@'},
00024 {0, "ABBR", "abbreviated form", '@', '@', '@', '@', e_HtmlElementPhrase},
00025 {0, "ACRONYM", "acronym", '@', '@', '@', '@', e_HtmlElementPhrase},
00026 {0, "ADDRESS", "information on author", '@', '@', '@', '@', '@'},
00027 {0, "APPLET", "Java applet", '@', '@', 'D', 'L', '@'},
00028 {0, "AREA", "client-side image map", '@', 'F', '@', '@', '@'},
00029 {0, "B", "bold text style", '@', '@', '@', '@', '@'},
00030 {0, "BASE", "document base URI", '@', 'F', '@', '@', '@'},
00031 {0, "BASEFONT", "base font size", '@', 'F', 'D', 'L', '@'},
00032 {0, "BDO", "I18N BiDi over-ride", '@', '@', '@', '@', '@'},
00033 {0, "BIG", "large text style", '@', '@', '@', '@', '@'},
00034 {0, "BLOCKQUOTE", "long quotation", '@', '@', '@', '@', '@'},
00035 {0, "BODY", "document body", 'O', 'O', '@', '@', '@'},
00036 {0, "BR", "forced line break", '@', 'F', '@', '@', '@'},
00037 {0, "BUTTON", "push button", '@', '@', '@', '@', '@'},
00038 {0, "CAPTION", "table caption", '@', '@', '@', '@', '@'},
00039 {0, "CENTER", "center line (deprecated)", '@', '@', 'D', 'L', '@'},
00040 {0, "CITE", "citation", '@', '@', '@', '@', e_HtmlElementPhrase},
00041 {0, "CODE", "computer code fragment", '@', '@', '@', '@', e_HtmlElementPhrase},
00042 {0, "COL", "table column", '@', 'F', '@', '@', '@'},
00043 {0, "COLGROUP", "table column group", '@', 'O', '@', '@', '@'},
00044 {0, "DD", "definition description", '@', 'O', '@', '@', '@'},
00045 {0, "DEL", "deleted text", '@', '@', '@', '@', '@'},
00046 {0, "DFN", "instance definition", '@', '@', '@', '@', e_HtmlElementPhrase},
00047 {0, "DIR", "directory list", '@', '@', 'D', 'L', '@'},
00048 {0, "DIV", "generic language/style", '@', '@', '@', '@', '@'},
00049 {0, "DL", "definition list", '@', '@', '@', '@', '@'},
00050 {0, "DT", "definition term", '@', 'O', '@', '@', '@'},
00051 {0, "EM", "emphasis", '@', '@', '@', '@', e_HtmlElementPhrase},
00052 {0, "FIELDSET", "form control group", '@', '@', '@', '@', '@'},
00053 {0, "FONT", "local change to font", '@', '@', 'D', 'L', '@'},
00054 {0, "FORM", "interactive form", '@', '@', '@', '@', '@'},
00055 {0, "FRAME", "subwindow", '@', 'F', '@', 'F', '@'},
00056 {0, "FRAMESET", "window subdivision", '@', '@', '@', 'F', '@'},
00057 {0, "H1", "heading", '@', '@', '@', '@', '@'},
00058 {0, "H2", "heading", '@', '@', '@', '@', '@'},
00059 {0, "H3", "heading", '@', '@', '@', '@', '@'},
00060 {0, "H4", "heading", '@', '@', '@', '@', '@'},
00061 {0, "H5", "heading", '@', '@', '@', '@', '@'},
00062 {0, "H6", "heading", '@', '@', '@', '@', '@'},
00063 {0, "HEAD", "document head", 'O', 'O', '@', '@', '@'},
00064 {0, "HR", "horizontal rule", '@', 'F', '@', '@', '@'},
00065 {0, "HTML", "document root element", 'O', 'O', '@', '@', '@'},
00066 {0, "I", "italic text style", '@', '@', '@', '@', '@'},
00067 {0, "IFRAME", "inline subwindow", '@', '@', '@', 'L', '@'},
00068 {0, "IMG", "Embedded image", '@', 'F', '@', '@', '@'},
00069 {0, "INPUT", "form control", '@', 'F', '@', '@', '@'},
00070 {0, "INS", "inserted text", '@', '@', '@', '@', '@'},
00071 {0, "ISINDEX", "single line prompt", '@', 'F', 'D', 'L', '@'},
00072 {0, "KBD", "text to be entered by user", '@', '@', '@', '@', e_HtmlElementPhrase},
00073 {0, "LABEL", "form field label text", '@', '@', '@', '@', '@'},
00074 {0, "LEGEND", "fieldset legend", '@', '@', '@', '@', '@'},
00075 {0, "LI", "list item", '@', 'O', '@', '@', '@'},
00076 {0, "LINK", "media-independent link", '@', 'F', '@', '@', '@'},
00077 {0, "MAP", "client-side image map", '@', '@', '@', '@', '@'},
00078 {0, "MENU", "menu list", '@', '@', 'D', 'L', '@'},
00079 {0, "META", "generic meta info", '@', 'F', '@', '@', '@'},
00080 {0, "NOFRAMES", "alternate content", '@', '@', '@', 'F', '@'},
00081 {0, "NOSCRIPT", "alternate content", '@', '@', '@', '@', '@'},
00082 {0, "OBJECT", "generic embedded object", '@', '@', '@', '@', '@'},
00083 {0, "OL", "ordered list", '@', '@', '@', '@', '@'},
00084 {0, "OPTGROUP", "option group", '@', '@', '@', '@', '@'},
00085 {0, "OPTION", "selectable choice", '@', 'O', '@', '@', '@'},
00086 {0, "P", "paragraph", '@', 'O', '@', '@', '@'},
00087 {0, "PARAM", "named property value", '@', 'F', '@', '@', '@'},
00088 {0, "PRE", "preformatted text", '@', '@', '@', '@', '@'},
00089 {0, "Q", "short inline quotation", '@', '@', '@', '@', '@'},
00090 {0, "S", "strike-through text style", '@', '@', 'D', 'L', '@'},
00091 {0, "SAMP", "sample program output", '@', '@', '@', '@', e_HtmlElementPhrase},
00092 {0, "SCRIPT", "script statements", '@', '@', '@', '@', '@'},
00093 {0, "SELECT", "option selector", '@', '@', '@', '@', '@'},
00094 {0, "SMALL", "small text style", '@', '@', '@', '@', '@'},
00095 {0, "SPAN", "generic language/style", '@', '@', '@', '@', '@'},
00096 {0, "STRIKE", "strike-through text", '@', '@', 'D', 'L', '@'},
00097 {0, "STRONG", "strong emphasis", '@', '@', '@', '@', e_HtmlElementPhrase},
00098 {0, "STYLE", "style info", '@', '@', '@', '@', '@'},
00099 {0, "SUB", "subscript", '@', '@', '@', '@', '@'},
00100 {0, "SUP", "superscript", '@', '@', '@', '@', '@'},
00101 {0, "TABLE", "table", '@', '@', '@', '@', '@'},
00102 {0, "TBODY", "table body", 'O', 'O', '@', '@', '@'},
00103 {0, "TD", "table data cell", '@', 'O', '@', '@', '@'},
00104 {0, "TEXTAREA", "multi-line text field", '@', '@', '@', '@', '@'},
00105 {0, "TFOOT", "table footer", '@', 'O', '@', '@', '@'},
00106 {0, "TH", "table header cell", '@', 'O', '@', '@', '@'},
00107 {0, "THEAD", "table header", '@', 'O', '@', '@', '@'},
00108 {0, "TITLE", "document title", '@', '@', '@', '@', '@'},
00109 {0, "TR", "table row", '@', 'O', '@', '@', '@'},
00110 {0, "TT", "teletype or monospaced", '@', '@', '@', '@', '@'},
00111 {0, "U", "underlined text style", '@', '@', 'D', 'L', '@'},
00112 {0, "UL", "unordered list", '@', '@', '@', '@', '@'},
00113 {0, "VAR", "instance of a variable", '@', '@', '@', '@', e_HtmlElementPhrase},
00114 {0, NULL, NULL, '@', '@', '@', '@', '\0'}};
00115
00116
00117 sAttrDef gHtmlParser::lAttrs[]={
00118 {0, "abbr", "TD, TH", "%Text;", "#I", 'n', "", "abbreviation for header cell"},
00119 {0, "accept-charset", "FORM", "%Charsets;", "#I", 'n', "", "list of supported charsets"},
00120 {0, "accept", "FORM, INPUT", "%ContentTypes;", "#I", 'n', "", "list of MIME types for file upload"},
00121 {0, "accesskey", "A, AREA, BUTTON, INPUT, LABEL, LEGEND, TEXTAREA", "%Character;", "#I", 'n', "", "accessibility key character"},
00122 {0, "action", "FORM", "%URI;", "#R", 'n', "", "server-side form handler"},
00123 {0, "align", "CAPTION", "%CAlign;", "#I", 'D', "L", "relative to table"},
00124 {0, "align", "APPLET, IFRAME, IMG, INPUT, OBJECT", "%IAlign;", "#I", 'D', "L", "vertical or horizontal alignment"},
00125 {0, "align", "LEGEND", "%LAlign;", "#I", 'D', "L", "relative to fieldset"},
00126 {0, "align", "TABLE", "%TAlign;", "#I", 'D', "L", "table position relative to window"},
00127 {0, "align", "HR", "(left|center|right)", "#I", 'D', "L", ""},
00128 {0, "align", "DIV, H1, H2, H3, H4, H5, H6, P", "(left|center|right|justify)", "#I", 'D', "L", "align, text alignment"},
00129 {0, "align", "COL, COLGROUP, TBODY, TD, TFOOT, TH, THEAD, TR", "(left|center|right|justify|char)", "#I", 'n', "", ""},
00130 {0, "alink", "BODY", "%Color;", "#I", 'D', "L", "color of selected links"},
00131 {0, "alt", "APPLET", "%Text;", "#I", 'D', "L", "short description"},
00132 {0, "alt", "AREA, IMG", "%Text;", "#R", 'n', "", "short description"},
00133 {0, "alt", "INPUT", "CDATA", "#I", 'n', "", "short description"},
00134 {0, "archive", "APPLET", "CDATA", "#I", 'D', "L", "comma-separated archive list"},
00135 {0, "archive", "OBJECT", "CDATA", "#I", 'n', "", "space-separated list of URIs"},
00136 {0, "axis", "TD, TH", "CDATA", "#I", 'n', "", "comma-separated list of related headers"},
00137 {0, "background", "BODY", "%URI;", "#I", 'D', "L", "texture tile for document background"},
00138 {0, "bgcolor", "TABLE", "%Color;", "#I", 'D', "L", "background color for cells"},
00139 {0, "bgcolor", "TR", "%Color;", "#I", 'D', "L", "background color for row"},
00140 {0, "bgcolor", "TD, TH", "%Color;", "#I", 'D', "L", "cell background color"},
00141 {0, "bgcolor", "BODY", "%Color;", "#I", 'D', "L", "document background color"},
00142 {0, "border", "TABLE", "%Pixels;", "#I", 'n', "", "controls frame width around table"},
00143 {0, "border", "IMG, OBJECT", "%Pixels;", "#I", 'D', "L", "link border width"},
00144 {0, "cellpadding", "TABLE", "%Length;", "#I", 'n', "", "spacing within cells"},
00145 {0, "cellspacing", "TABLE", "%Length;", "#I", 'n', "", "spacing between cells"},
00146 {0, "char", "COL, COLGROUP, TBODY, TD, TFOOT, TH, THEAD, TR", "%Character;", "#I", 'n', "", "alignment char, e.g. char :"},
00147 {0, "charoff", "COL, COLGROUP, TBODY, TD, TFOOT, TH, THEAD, TR", "%Length;", "#I", 'n', "", "offset for alignment char"},
00148 {0, "charset", "A, LINK, SCRIPT", "%Charset;", "#I", 'n', "", "char encoding of linked resource"},
00149 {0, "checked", "INPUT", "(checked)", "#I", 'n', "", "for radio buttons and check boxes"},
00150 {0, "cite", "BLOCKQUOTE, Q", "%URI;", "#I", 'n', "", "URI for source document or msg"},
00151 {0, "cite", "DEL, INS", "%URI;", "#I", 'n', "", "info on reason for change"},
00152 {0, "class", "@BASE, BASEFONT, HEAD, HTML, META, PARAM, SCRIPT, STYLE, TITLE", "CDATA", "#I", 'n', "", "space-separated list of classes"},
00153 {0, "classid", "OBJECT", "%URI;", "#I", 'n', "", "identifies an implementation"},
00154 {0, "clear", "BR", "(left|all|right|none)", "none", 'D', "L", "control of text flow"},
00155 {0, "code", "APPLET", "CDATA", "#I", 'D', "L", "applet class file"},
00156 {0, "codebase", "OBJECT", "%URI;", "#I", 'n', "", "base URI for classid, data, archive"},
00157 {0, "codebase", "APPLET", "%URI;", "#I", 'D', "L", "optional base URI for applet"},
00158 {0, "codetype", "OBJECT", "%ContentType;", "#I", 'n', "", "content type for code"},
00159 {0, "color", "BASEFONT, FONT", "%Color;", "#I", 'D', "L", "text color"},
00160 {0, "cols", "FRAMESET", "%MultiLengths;", "#I", 'n', "F", "list of lengths, default: 100% (1 col)"},
00161 {0, "cols", "TEXTAREA", "NUMBER", "#R", 'n', "", ""},
00162 {0, "colspan", "TD, TH", "NUMBER", "1", 'n', "", "number of cols spanned by cell"},
00163 {0, "compact", "DIR, DL, MENU, OL, UL", "(compact)", "#I", 'D', "L", "reduced interitem spacing"},
00164 {0, "content", "META", "CDATA", "#R", 'n', "", "associated information"},
00165 {0, "coords", "AREA", "%Coords;", "#I", 'n', "", "comma-separated list of lengths"},
00166 {0, "coords", "A", "%Coords;", "#I", 'n', "", "for use with client-side image maps"},
00167 {0, "data", "OBJECT", "%URI;", "#I", 'n', "", "reference to object$s data"},
00168 {0, "datetime", "DEL, INS", "%Datetime;", "#I", 'n', "", "date and time of change"},
00169 {0, "declare", "OBJECT", "(declare)", "#I", 'n', "", "declare but don$t instantiate flag"},
00170 {0, "defer", "SCRIPT", "(defer)", "#I", 'n', "", "UA may defer execution of script"},
00171 {0, "dir", "@APPLET, BASE, BASEFONT, BDO, BR, FRAME, FRAMESET, IFRAME, PARAM, SCRIPT", "(ltr|rtl)", "#I", 'n', "", "direction for weak/neutral text"},
00172 {0, "dir", "BDO", "(ltr|rtl)", "#R", 'n', "", "directionality"},
00173 {0, "disabled", "BUTTON, INPUT, OPTGROUP, OPTION, SELECT, TEXTAREA", "(disabled)", "#I", 'n', "", "unavailable in this context"},
00174 {0, "enctype", "FORM", "%ContentType;", "application/x-www- form-urlencoded", 'n', "", ""},
00175 {0, "face", "BASEFONT, FONT", "CDATA", "#I", 'D', "L", "comma-separated list of font names"},
00176 {0, "for", "LABEL", "IDREF", "#I", 'n', "", "matches field ID value"},
00177 {0, "frame", "TABLE", "%TFrame;", "#I", 'n', "", "which parts of frame to render"},
00178 {0, "frameborder", "FRAME, IFRAME", "(1|0)", "1", 'n', "F", "request frame borders?"},
00179 {0, "headers", "TD, TH", "IDREFS", "#I", 'n', "", "list of id$s for header cells"},
00180 {0, "height", "IFRAME", "%Length;", "#I", 'f', "L", "frame height"},
00181 {0, "height", "TD, TH", "%Length;", "#I", 'D', "L", "height for cell"},
00182 {0, "height", "IMG, OBJECT", "%Length;", "#I", 'n', "", "override height"},
00183 {0, "height", "APPLET", "%Length;", "#R", 'D', "L", "initial height"},
00184 {0, "href", "A, AREA, LINK", "%URI;", "#I", 'n', "", "URI for linked resource"},
00185 {0, "href", "BASE", "%URI;", "#I", 'n', "", "URI that acts as base URI"},
00186 {0, "hreflang", "A, LINK", "%LanguageCode;", "#I", 'n', "", "language code"},
00187 {0, "hspace", "APPLET, IMG, OBJECT", "%Pixels;", "#I", 'D', "L", "horizontal gutter"},
00188 {0, "http-equiv", "META", "NAME", "#I", 'n', "", "HTTP response header name"},
00189 {0, "id", "@BASE, HEAD, HTML, META, SCRIPT, STYLE, TITLE", "ID", "#I", 'n', "", "document-wide unique id"},
00190 {0, "ismap", "IMG, INPUT", "(ismap)", "#I", 'n', "", "use server-side image map"},
00191 {0, "label", "OPTION", "%Text;", "#I", 'n', "", "for use in hierarchical menus"},
00192 {0, "label", "OPTGROUP", "%Text;", "#R", 'n', "", "for use in hierarchical menus"},
00193 {0, "lang", "@APPLET, BASE, BASEFONT, BR, FRAME, FRAMESET, IFRAME, PARAM, SCRIPT", "%LanguageCode;", "#I", 'n', "", "language code"},
00194 {0, "language", "SCRIPT", "CDATA", "#I", 'D', "L", "predefined script language name"},
00195 {0, "link", "BODY", "%Color;", "#I", 'D', "L", "color of links"},
00196 {0, "longdesc", "IMG", "%URI;", "#I", 'n', "", "link to long description (complements alt)"},
00197 {0, "longdesc", "FRAME, IFRAME", "%URI;", "#I", 'n', "F", "link to long description (complements title)"},
00198 {0, "marginheight", "FRAME, IFRAME", "%Pixels;", "#I", 'n', "F", "margin height in pixels"},
00199 {0, "marginwidth", "FRAME, IFRAME", "%Pixels;", "#I", 'n', "F", "margin widths in pixels"},
00200 {0, "maxlength", "INPUT", "NUMBER", "#I", 'n', "", "max chars for text fields"},
00201 {0, "media", "STYLE", "%MediaDesc;", "#I", 'n', "", "designed for use with these media"},
00202 {0, "media", "LINK", "%MediaDesc;", "#I", 'n', "", "for rendering on these media"},
00203 {0, "method", "FORM", "(GET|POST)", "GET", 'n', "", "HTTP method used to submit the form"},
00204 {0, "multiple", "SELECT", "(multiple)", "#I", 'n', "", "default is single selection"},
00205 {0, "name", "BUTTON, TEXTAREA", "CDATA", "#I", 'n', "", ""},
00206 {0, "name", "APPLET", "CDATA", "#I", 'D', "L", "allows applets to find each other"},
00207 {0, "name", "SELECT", "CDATA", "#I", 'n', "", "field name"},
00208 {0, "name", "FORM", "CDATA", "#I", 'n', "", "name of form for scripting"},
00209 {0, "name", "FRAME, IFRAME", "CDATA", "#I", 'n', "F", "name of frame for targetting"},
00210 {0, "name", "IMG", "CDATA", "#I", 'n', "", "name of image for scripting"},
00211 {0, "name", "A", "CDATA", "#I", 'n', "", "named link end"},
00212 {0, "name", "INPUT, OBJECT", "CDATA", "#I", 'n', "", "submit as part of form"},
00213 {0, "name", "MAP", "CDATA", "#R", 'n', "", "for reference by usemap"},
00214 {0, "name", "PARAM", "CDATA", "#R", 'n', "", "property name"},
00215 {0, "name", "META", "NAME", "#I", 'n', "", "metainformation name"},
00216 {0, "nohref", "AREA", "(nohref)", "#I", 'n', "", "this region has no action"},
00217 {0, "noresize", "FRAME", "(noresize)", "#I", 'n', "F", "allow users to resize frames?"},
00218 {0, "noshade", "HR", "(noshade)", "#I", 'D', "L", ""},
00219 {0, "nowrap", "TD, TH", "(nowrap)", "#I", 'D', "L", "suppress word wrap"},
00220 {0, "object", "APPLET", "CDATA", "#I", 'D', "L", "serialized applet file"},
00221 {0, "onblur", "A, AREA, BUTTON, INPUT, LABEL, SELECT, TEXTAREA", "%Script;", "#I", 'n', "", "the element lost the focus"},
00222 {0, "onchange", "INPUT, SELECT, TEXTAREA", "%Script;", "#I", 'n', "", "the element value was changed"},
00223 {0, "onclick", "@APPLET, BASE, BASEFONT, BDO, BR, FONT, FRAME, FRAMESET, HEAD, HTML, IFRAME, ISINDEX, META, PARAM, SCRIPT, STYLE, TITLE", "%Script;", "#I", 'n', "", "a pointer button was clicked"},
00224 {0, "ondblclick", "@APPLET, BASE, BASEFONT, BDO, BR, FONT, FRAME, FRAMESET, HEAD, HTML, IFRAME, ISINDEX, META, PARAM, SCRIPT, STYLE, TITLE", "%Script;", "#I", 'n', "", "a pointer button was double clicked"},
00225 {0, "onfocus", "A, AREA, BUTTON, INPUT, LABEL, SELECT, TEXTAREA", "%Script;", "#I", 'n', "", "the element got the focus"},
00226 {0, "onkeydown", "@APPLET, BASE, BASEFONT, BDO, BR, FONT, FRAME, FRAMESET, HEAD, HTML, IFRAME, ISINDEX, META, PARAM, SCRIPT, STYLE, TITLE", "%Script;", "#I", 'n', "", "a key was pressed down"},
00227 {0, "onkeypress", "@APPLET, BASE, BASEFONT, BDO, BR, FONT, FRAME, FRAMESET, HEAD, HTML, IFRAME, ISINDEX, META, PARAM, SCRIPT, STYLE, TITLE", "%Script;", "#I", 'n', "", "a key was pressed and released"},
00228 {0, "onkeyup", "@APPLET, BASE, BASEFONT, BDO, BR, FONT, FRAME, FRAMESET, HEAD, HTML, IFRAME, ISINDEX, META, PARAM, SCRIPT, STYLE, TITLE", "%Script;", "#I", 'n', "", "a key was released"},
00229 {0, "onload", "FRAMESET", "%Script;", "#I", 'n', "F", "all the frames have been loaded"},
00230 {0, "onload", "BODY", "%Script;", "#I", 'n', "", "the document has been loaded"},
00231 {0, "onmousedown", "@APPLET, BASE, BASEFONT, BDO, BR, FONT, FRAME, FRAMESET, HEAD, HTML, IFRAME, ISINDEX, META, PARAM, SCRIPT, STYLE, TITLE", "%Script;", "#I", 'n', "", "a pointer button was pressed down"},
00232 {0, "onmousemove", "@APPLET, BASE, BASEFONT, BDO, BR, FONT, FRAME, FRAMESET, HEAD, HTML, IFRAME, ISINDEX, META, PARAM, SCRIPT, STYLE, TITLE", "%Script;", "#I", 'n', "", "a pointer was moved within"},
00233 {0, "onmouseout", "@APPLET, BASE, BASEFONT, BDO, BR, FONT, FRAME, FRAMESET, HEAD, HTML, IFRAME, ISINDEX, META, PARAM, SCRIPT, STYLE, TITLE", "%Script;", "#I", 'n', "", "a pointer was moved away"},
00234 {0, "onmouseover", "@APPLET, BASE, BASEFONT, BDO, BR, FONT, FRAME, FRAMESET, HEAD, HTML, IFRAME, ISINDEX, META, PARAM, SCRIPT, STYLE, TITLE", "%Script;", "#I", 'n', "", "a pointer was moved onto"},
00235 {0, "onmouseup", "@APPLET, BASE, BASEFONT, BDO, BR, FONT, FRAME, FRAMESET, HEAD, HTML, IFRAME, ISINDEX, META, PARAM, SCRIPT, STYLE, TITLE", "%Script;", "#I", 'n', "", "a pointer button was released"},
00236 {0, "onreset", "FORM", "%Script;", "#I", 'n', "", "the form was reset"},
00237 {0, "onselect", "INPUT, TEXTAREA", "%Script;", "#I", 'n', "", "some text was selected"},
00238 {0, "onsubmit", "FORM", "%Script;", "#I", 'n', "", "the form was submitted"},
00239 {0, "onunload", "FRAMESET", "%Script;", "#I", 'n', "F", "all the frames have been removed"},
00240 {0, "onunload", "BODY", "%Script;", "#I", 'n', "", "the document has been removed"},
00241 {0, "profile", "HEAD", "%URI;", "#I", 'n', "", "named dictionary of meta info"},
00242 {0, "prompt", "ISINDEX", "%Text;", "#I", 'D', "L", "prompt message"},
00243 {0, "readonly", "TEXTAREA", "(readonly)", "#I", 'n', "", ""},
00244 {0, "readonly", "INPUT", "(readonly)", "#I", 'n', "", "for text and passwd"},
00245 {0, "rel", "A, LINK", "%LinkTypes;", "#I", 'n', "", "forward link types"},
00246 {0, "rev", "A, LINK", "%LinkTypes;", "#I", 'n', "", "reverse link types"},
00247 {0, "rows", "FRAMESET", "%MultiLengths;", "#I", 'n', "F", "list of lengths, default: 100% (1 row)"},
00248 {0, "rows", "TEXTAREA", "NUMBER", "#R", 'n', "", ""},
00249 {0, "rowspan", "TD, TH", "NUMBER", "1", 'n', "", "number of rows spanned by cell"},
00250 {0, "rules", "TABLE", "%TRules;", "#I", 'n', "", "rulings between rows and cols"},
00251 {0, "scheme", "META", "CDATA", "#I", 'n', "", "select form of content"},
00252 {0, "scope", "TD, TH", "%Scope;", "#I", 'n', "", "scope covered by header cells"},
00253 {0, "scrolling", "FRAME, IFRAME", "(yes|no|auto)", "auto", 'n', "F", "scrollbar or none"},
00254 {0, "selected", "OPTION", "(selected)", "#I", 'n', "", ""},
00255 {0, "shape", "AREA", "%Shape;", "rect", 'n', "", "controls interpretation of coords"},
00256 {0, "shape", "A", "%Shape;", "rect", 'n', "", "for use with client-side image maps"},
00257 {0, "size", "HR", "%Pixels;", "#I", 'D', "L", ""},
00258 {0, "size", "FONT", "CDATA", "#I", 'D', "L", "[+|-]nn e.g. size=+1, size=4"},
00259 {0, "size", "INPUT", "CDATA", "#I", 'n', "", "specific to each type of field"},
00260 {0, "size", "BASEFONT", "CDATA", "#R", 'D', "L", "base font size for FONT elements"},
00261 {0, "size", "SELECT", "NUMBER", "#I", 'n', "", "rows visible"},
00262 {0, "span", "COL", "NUMBER", "1", 'n', "", "COL attributes affect N columns"},
00263 {0, "span", "COLGROUP", "NUMBER", "1", 'n', "", "default number of columns in group"},
00264 {0, "src", "SCRIPT", "%URI;", "#I", 'n', "", "URI for an external script"},
00265 {0, "src", "INPUT", "%URI;", "#I", 'n', "", "for fields with images"},
00266 {0, "src", "FRAME, IFRAME", "%URI;", "#I", 'n', "F", "source of frame content"},
00267 {0, "src", "IMG", "%URI;", "#R", 'n', "", "URI of image to embed"},
00268 {0, "standby", "OBJECT", "%Text;", "#I", 'n', "", "message to show while loading"},
00269 {0, "start", "OL", "NUMBER", "#I", 'D', "L", "starting sequence number"},
00270 {0, "style", "@BASE, BASEFONT, HEAD, HTML, META, PARAM, SCRIPT, STYLE, TITLE", "%StyleSheet;", "#I", 'n', "", "associated style info"},
00271 {0, "summary", "TABLE", "%Text;", "#I", 'n', "", "purpose/structure for speech output"},
00272 {0, "tabindex", "A, AREA, BUTTON, INPUT, OBJECT, SELECT, TEXTAREA", "NUMBER", "#I", 'n', "", "position in tabbing order"},
00273 {0, "target", "A, AREA, BASE, FORM, LINK", "%FrameTarget;", "#I", 'f', "L", "render in this frame"},
00274 {0, "text", "BODY", "%Color;", "#I", 'D', "L", "document text color"},
00275 {0, "title", "@BASE, BASEFONT, HEAD, HTML, META, PARAM, SCRIPT, TITLE", "%Text;", "#I", 'n', "", "advisory title"},
00276 {0, "type", "A, LINK", "%ContentType;", "#I", 'n', "", "advisory content type"},
00277 {0, "type", "OBJECT", "%ContentType;", "#I", 'n', "", "content type for data"},
00278 {0, "type", "PARAM", "%ContentType;", "#I", 'n', "", "content type for value when valuetype=ref"},
00279 {0, "type", "SCRIPT", "%ContentType;", "#R", 'n', "", "content type of script language"},
00280 {0, "type", "STYLE", "%ContentType;", "#R", 'n', "", "content type of style language"},
00281 {0, "type", "INPUT", "%InputType;", "TEXT", 'n', "", "what kind of widget is needed"},
00282 {0, "type", "LI", "%LIStyle;", "#I", 'D', "L", "list item style"},
00283 {0, "type", "OL", "%OLStyle;", "#I", 'D', "L", "numbering style"},
00284 {0, "type", "UL", "%ULStyle;", "#I", 'D', "L", "bullet style"},
00285 {0, "type", "BUTTON", "(button|submit|reset)", "submit", 'n', "", "for use as form button"},
00286 {0, "usemap", "IMG, INPUT, OBJECT", "%URI;", "#I", 'n', "", "use client-side image map"},
00287 {0, "valign", "COL, COLGROUP, TBODY, TD, TFOOT, TH, THEAD, TR", "(top|middle|bottom|baseline)", "#I", 'n', "", "vertical alignment in cells"},
00288 {0, "value", "INPUT", "CDATA", "#I", 'n', "", "Specify for radio buttons and checkboxes"},
00289 {0, "value", "OPTION", "CDATA", "#I", 'n', "", "defaults to element content"},
00290 {0, "value", "PARAM", "CDATA", "#I", 'n', "", "property value"},
00291 {0, "value", "BUTTON", "CDATA", "#I", 'n', "", "sent to server when submitted"},
00292 {0, "value", "LI", "NUMBER", "#I", 'D', "L", "reset sequence number"},
00293 {0, "valuetype", "PARAM", "(DATA|REF|OBJECT)", "DATA", 'n', "", "How to interpret value"},
00294 {0, "version", "HTML", "CDATA", "%HTML.Version;", 'D', "L", "Constant"},
00295 {0, "vlink", "BODY", "%Color;", "#I", 'D', "L", "color of visited links"},
00296 {0, "vspace", "APPLET, IMG, OBJECT", "%Pixels;", "#I", 'D', "L", "vertical gutter"},
00297 {0, "width", "HR", "%Length;", "#I", 'D', "L", ""},
00298 {0, "width", "IFRAME", "%Length;", "#I", 'f', "L", "frame width"},
00299 {0, "width", "IMG, OBJECT", "%Length;", "#I", 'n', "", "override width"},
00300 {0, "width", "TABLE", "%Length;", "#I", 'n', "", "table width"},
00301 {0, "width", "TD, TH", "%Length;", "#I", 'D', "L", "width for cell"},
00302 {0, "width", "APPLET", "%Length;", "#R", 'D', "L", "initial width"},
00303 {0, "width", "COL", "%MultiLength;", "#I", 'n', "", "column width specification"},
00304 {0, "width", "COLGROUP", "%MultiLength;", "#I", 'n', "", "default width for enclosed COLs"},
00305 {0, "width", "PRE", "NUMBER", "#I", 'D', "L", "predefined characters width"},
00306 {0, NULL, NULL, NULL, NULL, '\0', NULL, NULL}};
00307 sAttrRefer* gHtmlParser::lAttrRef=nil;
00308
00309 t_int16 gHtmlParser::nDefAttrNorms=-1;
00310 sAttrNorm gHtmlParser::lDefAttrNorms[]={
00311 { -2, "\0", NULL },
00312 { XH_IDTAG_ANCHOR, "HREF", NULL },
00313 { XH_IDTAG_IMG, "SRC, ALT", NULL },
00314 { -1, NULL, NULL }};
00315
00316
00317
00318
00319 sHAttrType gHtmlParser::lHAttrTypes[]={
00320 { -2, "%HTML-Attribute-types", e_HAT_None, e_HAT_Single },
00321 { 1, "CDATA", e_HAT_CDATA, e_HAT_Single },
00322 { 1, "ID", e_HAT_ID, e_HAT_Single },
00323 { 1, "IDREF", e_HAT_IDREF, e_HAT_Single },
00324 { 1, "IDREFS", e_HAT_IDREFS, e_HAT_IDREF },
00325 { 1, "NAME", e_HAT_NAME, e_HAT_Single },
00326 { 1, "NUMBER", e_HAT_NUMBER, e_HAT_Single },
00327 { 0, "CAlign", e_HATp_CAlign, e_HAT_Single },
00328 { 0, "Character", e_HATp_Character, e_HAT_Single },
00329 { 0, "Charset", e_HATp_Charset, e_HAT_Single },
00330 { 0, "Charsets", e_HATp_Charsets, e_HATp_Charset },
00331 { 0, "Color", e_HATp_Color, e_HAT_Single },
00332 { 0, "ContentType", e_HATp_ContentType, e_HAT_Single },
00333 { 0, "ContentTypes", e_HATp_ContentTypes, e_HATp_ContentType },
00334 { 0, "Coords", e_HATp_Coords, e_HAT_Single },
00335 { 0, "Datetime", e_HATp_DateTime, e_HAT_Single },
00336 { 0, "FrameTarget", e_HATp_FrameTarget, e_HAT_Single },
00337 { 0, "HTML.Version", e_HATp_HtmlVersion, e_HAT_Single },
00338 { 0, "IAlign", e_HATp_IAlign, e_HAT_Single },
00339 { 0, "InputType", e_HATp_InputType, e_HAT_Single },
00340 { 0, "LAlign", e_HATp_LAlign, e_HAT_Single },
00341 { 0, "LanguageCode", e_HATp_LanguageCode, e_HAT_Single },
00342 { 0, "Length", e_HATp_Length, e_HAT_Single },
00343 { 0, "LinkTypes", e_HATp_LinkTypes, e_HAT_Single },
00344 { 0, "LIStyle", e_HATp_LIStyle, e_HAT_Single },
00345 { 0, "MediaDesc", e_HATp_MediaDesc, e_HAT_Single },
00346 { 0, "MultiLength", e_HATp_MultiLength, e_HAT_Single },
00347 { 0, "MultiLengths", e_HATp_MultiLengths, e_HATp_MultiLength },
00348 { 0, "OLStyle", e_HATp_OLStyle, e_HAT_Single },
00349 { 0, "Pixels", e_HATp_Pixels, e_HAT_Single },
00350 { 0, "Scope", e_HATp_Scope, e_HAT_Single },
00351 { 0, "Script", e_HATp_Script, e_HAT_Single },
00352 { 0, "Shape", e_HATp_Shape, e_HAT_Single },
00353 { 0, "StyleSheet", e_HATp_StyleSheet, e_HAT_Single },
00354 { 0, "TAlign", e_HATp_TAlign, e_HAT_Single },
00355 { 0, "Text", e_HATp_Text, e_HAT_Single },
00356 { 0, "TFrame", e_HATp_TFrame, e_HAT_Single },
00357 { 0, "TRules", e_HATp_TRules, e_HAT_Single },
00358 { 0, "ULStyle", e_HATp_ULStyle, e_HAT_Single },
00359 { 0, "URI", e_HATp_URI, e_HAT_Single },
00360 { 10, "1|0", e_HATp_EnumChoice, -1 },
00361 { 10, "button|submit|reset", e_HATp_EnumChoice, -1 },
00362 { 10, "checked", e_HATp_EnumChoice, -1 },
00363 { 10, "compact", e_HATp_EnumChoice, -1 },
00364 { 10, "DATA|REF|OBJECT", e_HATp_EnumChoice, -1 },
00365 { 10, "declare", e_HATp_EnumChoice, -1 },
00366 { 10, "defer", e_HATp_EnumChoice, -1 },
00367 { 10, "disabled", e_HATp_EnumChoice, -1 },
00368 { 10, "GET|POST", e_HATp_EnumChoice, -1 },
00369 { 10, "ismap", e_HATp_EnumChoice, -1 },
00370 { 10, "left|all|right|none", e_HATp_EnumChoice, -1 },
00371 { 10, "left|center|right|justify|char", e_HATp_EnumChoice, -1 },
00372 { 10, "left|center|right|justify", e_HATp_EnumChoice, -1 },
00373 { 10, "left|center|right", e_HATp_EnumChoice, -1 },
00374 { 10, "ltr|rtl", e_HATp_EnumChoice, -1 },
00375 { 10, "multiple", e_HATp_EnumChoice, -1 },
00376 { 10, "nohref", e_HATp_EnumChoice, -1 },
00377 { 10, "noresize", e_HATp_EnumChoice, -1 },
00378 { 10, "noshade", e_HATp_EnumChoice, -1 },
00379 { 10, "nowrap", e_HATp_EnumChoice, -1 },
00380 { 10, "readonly", e_HATp_EnumChoice, -1 },
00381 { 10, "selected", e_HATp_EnumChoice, -1 },
00382 { 10, "top|middle|bottom|baseline", e_HATp_EnumChoice, -1 },
00383 { 10, "yes|no|auto", e_HATp_EnumChoice, -1 },
00384 { -2, "4.01", e_HAT_None, -1 },
00385 { -2, "v0.0", e_HAT_None, -1 }};
00386
00387
00388 gHashTriple* gHtmlParser::hElems=nil;
00389
00390 sAttrRefer::sAttrRefer (t_int16 nAttrs)
00391 : nlAttrs( nAttrs ),
00392 sAttrs( nil ),
00393 lstRelated( nil ),
00394 idxUniqMax( -1 ),
00395 sLUniqs( nil ),
00396 idxLUniqs( nil )
00397 {
00398 sAttrs = new gString[ nlAttrs ];
00399 ASSERTION(sAttrs!=nil,"sAttrs!=nil");
00400 lstRelated = new gSmartList[ nlAttrs ];
00401 ASSERTION(lstRelated!=nil,"lstRelated!=nil");
00402 lstIsAllButEtc = new gSwitch[ nlAttrs ];
00403 ASSERTION(lstIsAllButEtc!=nil,"lstIsAllButEtc!=nil");
00404 sLUniqs = new gString[ nlAttrs ];
00405 ASSERTION(sLUniqs!=nil,"sLUniqs!=nil");
00406 idxLUniqs = new t_int16[ nlAttrs ];
00407 ASSERTION(idxLUniqs!=nil,"idxLUniqs!=nil");
00408 for (t_int16 idx=0; idx<nlAttrs; idx++) {
00409 idxLUniqs[ idx ] = -1;
00410 }
00411 }
00412
00413 sAttrRefer::~sAttrRefer ()
00414 {
00415 delete[] sAttrs;
00416 delete[] lstRelated;
00417 delete[] lstIsAllButEtc;
00418 delete[] sLUniqs;
00419 delete[] idxLUniqs;
00420 }
00421
00422 t_int16 sAttrRefer::FindAttr (char* attrName, t_int16& uniqIdx)
00423 {
00424
00425 for (uniqIdx=0; uniqIdx<=idxUniqMax; uniqIdx++) {
00426 if ( sLUniqs[ uniqIdx ].Match( attrName ) ) {
00427 return idxLUniqs[ uniqIdx ];
00428 }
00429 }
00430 uniqIdx = -1;
00431 return -1;
00432 }
00433
00434 t_int16 sAttrRefer::FindAttr (char* attrName, char* strTag, t_int16& uniqIdx)
00435 {
00436 bool isAllBut;
00437 t_int16 idxAbs, firstIdx = FindAttr( attrName, uniqIdx );
00438
00439 sOutHelper.SetEmpty();
00440
00441 if ( firstIdx<0 ) return -1;
00442 ASSERTION(uniqIdx>=0,"uniqIdx>=0");
00443
00444
00445
00446 for (idxAbs=firstIdx; idxAbs<nlAttrs; idxAbs++) {
00447 isAllBut = lstIsAllButEtc[ idxAbs ].IsOn();
00448 if ( isAllBut==false ) {
00449
00450 if ( lstRelated[ idxAbs ].Match( strTag )>0 )
00451 return idxAbs;
00452 }
00453 else {
00454 if ( lstRelated[ idxAbs ].Match( strTag )==0 )
00455 return idxAbs;
00456 sOutHelper.Set( lstRelated[ idxAbs ].Str(1) );
00457
00458 }
00459
00460
00461
00462 if ( sAttrs[ idxAbs ].Match( attrName )==false ) break;
00463 }
00464
00465 return -1;
00466 }
00467
00468 gHtmlCouple::gHtmlCouple (unsigned lineNr, char* sText)
00469 : iLine( lineNr ),
00470 idTag( XH_NOTAG ),
00471 idEndTag( -1 ),
00472 pHStr( nil ),
00473 pElem( nil ),
00474 synError( 0 ),
00475 coupleId( -1 ),
00476 theDocType( 0 ),
00477 oCouple( nil )
00478 {
00479 if ( sText!=nil ) AddText( sText );
00480 }
00481
00482 gHtmlCouple::gHtmlCouple (unsigned lineNr, char* strTag, char* sAttrLst, bool doAddSkippedTags)
00483 : iLine( lineNr ),
00484 idTag( -1 ),
00485 idEndTag( -1 ),
00486 pHStr( nil ),
00487 pElem( nil ),
00488 synError( 0 ),
00489 coupleId( -1 ),
00490 theDocType( 0 ),
00491 oCouple( nil )
00492 {
00493 DBGPRINT_MIN("DBG: Couple:%s, sAttrLst:%s\n",strTag,sAttrLst);
00494 AddTag( strTag );
00495 if ( synError==0 || (synError==XH_SKIP_TAG && doAddSkippedTags==true) ) {
00496 if ( sAttrLst!=nil ) {
00497 Add( sAttrLst );
00498
00499 attrL.Set( sAttrLst );
00500 }
00501 }
00502 }
00503
00504 gHtmlCouple::~gHtmlCouple ()
00505 {
00506 delete pHStr;
00507 delete oCouple;
00508 }
00509
00510 bool gHtmlCouple::IsOk ()
00511 {
00512 ASSERTION(IsText() || (IsText()==false && ((idTag>=0 && pElem!=nil) || (idTag==-1 && synError==-1 && pElem==nil))),"gHtmlCouple::IsOk");
00513 return synError==0;
00514 }
00515
00516 char* gHtmlCouple::GetStr ()
00517 {
00518 char* str;
00519 sWholeTag.SetEmpty();
00520 if ( IsText() ) {
00521 str = Str( 1 );
00522 if ( pHStr==nil ) return str;
00523 char* strX = pHStr->Str();
00524 if ( strX==nil ) return nil;
00525 if ( strX[0]==0 ) return str;
00526 return strX;
00527 }
00528 sWholeTag.Add( '<' );
00529 if ( IsTagEnd() ) sWholeTag.Add( '/' );
00530 sWholeTag.AddString( sTag );
00531 if ( N()==2 ) {
00532 sWholeTag.Add( ' ' );
00533
00534 sWholeTag.Add( attrL.Str() );
00535 }
00536 sWholeTag.Add( '>' );
00537 return sWholeTag.Str();
00538 }
00539
00540 char* gHtmlCouple::GetStrForTree ()
00541 {
00542 char* str;
00543
00544 sKeepStr = "TODO:::TODO:::";
00545 sWholeTag.SetEmpty();
00546 if ( IsText() ) {
00547 str = Str( 1 );
00548 if ( pHStr==nil ) return str;
00549 char* strX = pHStr->Str();
00550 if ( strX==nil ) return nil;
00551 if ( strX[0]==0 ) return str;
00552 return strX;
00553 }
00554 sWholeTag.Add( '<' );
00555 if ( IsTagEnd() ) sWholeTag.Add( '/' );
00556 sWholeTag.AddString( sTag );
00557 if ( N()==2 ) {
00558 sWholeTag.Add( ' ' );
00559
00560 sWholeTag.Add( attrL.Str() );
00561 }
00562 sWholeTag.Add( '>' );
00563 sprintf(sKeepStr.Str(),"[%u]",N());
00564 sWholeTag.AddString( sKeepStr );
00565 return sWholeTag.Str();
00566 }
00567
00568 gString& gHtmlCouple::TagString (bool forceEnd)
00569 {
00570 sTagStr.SetEmpty();
00571 if ( IsText() ) return sTagStr;
00572 sTagStr.Add( '<' );
00573 if ( forceEnd || IsTagEnd() ) sTagStr.Add( '/' );
00574 sTagStr.AddString( sTag );
00575 sTagStr.Add( '>' );
00576 return sTagStr;
00577 }
00578
00579 char* gHtmlCouple::GetHRef ()
00580 {
00581 if ( IsText()==true ) return nil;
00582 if ( IsAnchor()==false || IsTagEnd()==true ) return nil;
00583 return attrL.Find( "HREF", true );
00584 }
00585
00586 unsigned gHtmlCouple::Add (char* s)
00587 {
00588 gString sTrim( s );
00589 sTrim.Trim();
00590 return gList::Add( sTrim );
00591 }
00592
00593 unsigned gHtmlCouple::Add (gString& copy)
00594 {
00595 return Add( copy.Str() );
00596 }
00597
00598 unsigned gHtmlCouple::AddText (char* s)
00599 {
00600
00601 Add( s );
00602 ASSERTION(N()==1,"N()==1");
00603 s = Str( 1 );
00604 pHStr = new gHtmlString( s );
00605 ASSERTION(pHStr!=nil,"pHStr!=nil");
00606 return (unsigned)pHStr->IsOk();
00607 }
00608
00609 unsigned gHtmlCouple::AddTag (char* strTag)
00610 {
00611 t_uchar uChr;
00612
00613 ASSERTION(strTag!=nil,"strTag!=nil");
00614 gString s( strTag );
00615 s.Trim();
00616 if ( s.IsEmpty() ) return 0;
00617 gString sUp( s );
00618 uChr = s[1];
00619 if ( uChr=='!' || uChr=='?' ) {
00620 synError = XH_SKIP_TAG;
00621 theDocType = (t_int16)sUp.Match("!DOCTYPE");
00622 }
00623
00624 sUp.UpString();
00625 gList::Add( sUp );
00626 sTag = sUp;
00627 return 1;
00628 }
00629
00630 gHtmlContent::gHtmlContent ()
00631 : nLines( 0 ),
00632 theHtmlOpt( nil )
00633 {
00634 }
00635
00636 gHtmlContent::~gHtmlContent ()
00637 {
00638 }
00639
00640 gHtmlCouple* gHtmlContent::GetCouple (unsigned idx)
00641 {
00642 ASSERTION(idx>0,"idx>0");
00643 if ( IsValidIndex(idx)==false ) return nil;
00644 gStorage* pObjx = GetObjectPtr( idx );
00645 ASSERTION(pObjx!=nil,"pObjx!=nil");
00646 return (gHtmlCouple*)pObjx;
00647 }
00648
00649 char* gHtmlContent::Str (unsigned idx)
00650 {
00651 gHtmlCouple* pObjC = GetCouple( idx );
00652 if ( pObjC==nil ) return nil;
00653
00654 return pObjC->GetStr();
00655 }
00656
00657 unsigned gHtmlContent::Add (char* s)
00658 {
00659 nLines++;
00660 if ( s==nil || s[0]==0 ) return 0;
00661 thisAddHmtlLine( nLines, s );
00662 return nLines;
00663 }
00664
00665 int gHtmlContent::UpCaseAttributes (char* strTag, gString& sRes)
00666 {
00667
00668 unsigned i, n;
00669 unsigned idx;
00670 t_uchar uChr;
00671 short quoteCount=0;
00672 bool tidyApplied=false;
00673
00674 ASSERTION(strTag!=nil,"strTag!=nil");
00675
00676
00677 if ( GetHtmlOpt().outOpt.HasTidy( strTag, idx ) ) {
00678
00679 gString s;
00680
00681 for (i=1, n=sRes.Length(); i<=n; i++) {
00682 uChr = sRes[i];
00683 if ( uChr=='"' ) quoteCount = quoteCount==0;
00684 if ( uChr=='=' && quoteCount==0 ) {
00685 s.TrimRight();
00686 }
00687 if ( uChr==' ' ) {
00688 if ( sRes[i-1]=='=' ) continue;
00689 }
00690 s.Add( uChr );
00691 }
00692 sRes = s;
00693 DBGPRINT("DBG: Apply tidy for [%s|%s] (idx=%u)\n",strTag,sRes.Str(),idx);
00694 s.SetEmpty();
00695 gParam aParam( sRes, " ", gParam::e_NormalQuoted );
00696
00697 for (i=1, n=aParam.N(); i<=n; i++) {
00698 gString sAssign;
00699 gString sTemp( aParam.Str(i) );
00700 sTemp.Trim();
00701 gParam paramVal( sTemp, "=", gParam::e_StopSplitOnFirst );
00702 unsigned nVal = paramVal.N();
00703 ASSERTION(nVal==1 || nVal==2,"nVal...");
00704 sAssign.Set( paramVal.Str(1) );
00705 if ( nVal>=2 ) {
00706 sAssign.UpString();
00707
00708 bool doTidyAttr = GetHtmlOpt().outOpt.HasTidyAttr( strTag, sAssign );
00709 gString sVal( paramVal.Str(2) );
00710 sVal.Trim();
00711 if ( doTidyAttr ) {
00712 if ( sVal[1]!='"' && sVal[1]!='\'' ) {
00713 gString sTempVal( "\"" );
00714 sTempVal.AddString( sVal );
00715 sTempVal.Add( "\"" );
00716 sVal = sTempVal;
00717 }
00718 else {
00719 switch ( sVal[1] ) {
00720 case '"':
00721 if ( sVal[sVal.Length()]=='\'' ) {
00722 sVal[sVal.Length()] = '"';
00723 }
00724 break;
00725 case '\'':
00726
00727 if ( sVal.Find( '"' )==0 && sVal[sVal.Length()]=='\'' ) {
00728 sVal[1] = '"';
00729 sVal[sVal.Length()] = '"';
00730 }
00731 default:
00732 break;
00733 }
00734 }
00735 }
00736 sAssign.Add( "=" );
00737 sAssign.AddString( sVal );
00738 }
00739 DBGPRINT("DBG: param: [%s]\n",sAssign.Str());
00740 if ( i>1 ) s.Add( " " );
00741 s.AddString( sAssign );
00742 }
00743 tidyApplied = sRes.Match( s )==false;
00744
00745 DBGPRINT_WEB3("DBG: Applied tidy(%c): [%s|%s]\n",ISyORn(tidyApplied),sRes.Str(),s.Str());
00746
00747
00748 sRes = s;
00749 }
00750
00751 for (i=1, n=sRes.Length(), quoteCount=0; i<=n; i++) {
00752 uChr = sRes[i];
00753 if ( uChr=='"' ) quoteCount = quoteCount==0;
00754 if ( quoteCount ) continue;
00755 if ( uChr>='a' && uChr<='z' ) sRes[i] = uChr-32;
00756 }
00757
00758 return quoteCount!=0;
00759 }
00760
00761 gHtmlOpt& gHtmlContent::GetHtmlOpt ()
00762 {
00763 ASSERTION(theHtmlOpt!=nil,"theHtmlOpt!=nil");
00764 return *theHtmlOpt;
00765 }
00766
00767 bool gHtmlContent::SetHtmlOpt (gHtmlOpt* pHtmlOpt)
00768 {
00769 return (theHtmlOpt = pHtmlOpt)!=nil;
00770 }
00771
00772 int gHtmlContent::TagError (unsigned lineNr, int error, char* sLine, char* sShortMsg)
00773 {
00774 FILE* fRepErr = stderr;
00775
00776 if ( error==0 ) return 0;
00777 HTML_LOG(fRepErr,LOG_ERROR,"Line %u: Invalid syntax (int-ref %d): '%s'. %s\n",
00778 lineNr,
00779 error,
00780 sLine,
00781 sShortMsg);
00782 return error;
00783 }
00784
00785 void gHtmlContent::Show (bool doShowAll)
00786 {
00787 unsigned i, n=N();
00788
00789 for (i=1; i<=n; i++) {
00790 if ( doShowAll ) printf("C%u/%u: ",i,n);
00791 printf("%s\n",Str(i));
00792 }
00793 }
00794
00795 int gHtmlContent::thisAddHmtlLine (unsigned lineNr, char* s)
00796 {
00797 gString sLine( s ), sRes;
00798 unsigned pos, posEnd, posTagEnd, len=sLine.Length();
00799
00800 ASSERTION(len>0,"len>0");
00801 sLine.Trim();
00802 pos = sLine.Find('<');
00803 posEnd = sLine.Find('>');
00804 if ( pos>0 ) {
00805 if ( posEnd>0 && posEnd+1>pos ) {
00806 sRes.CopyFromTo( sLine, pos+1, posEnd-1 );
00807 sRes.Trim();
00808 posTagEnd = sRes[1]=='/';
00809 return thisAddHtmlTag( lineNr, sRes.Str()+posTagEnd, posTagEnd>0 );
00810 }
00811 }
00812 return thisAddHtmlText( lineNr, sLine.Str() );
00813 }
00814
00815 int gHtmlContent::thisAddHtmlText (unsigned lineNr, char* sText)
00816 {
00817
00818 gHtmlCouple* pCouple;
00819
00820 pCouple = new gHtmlCouple( lineNr, sText );
00821 thisAddCouple( pCouple, *this );
00822 return 0;
00823 }
00824
00825 int gHtmlContent::thisAddHtmlTag (unsigned lineNr, char* strTag, bool isEndTag)
00826 {
00827
00828 int error=0;
00829 unsigned pos;
00830 gHtmlCouple* pCouple;
00831
00832 ASSERTION(strTag!=nil,"strTag!=nil");
00833
00834
00835
00836 pos = gStrControl::Self().Find( strTag, " " );
00837 if ( pos==0 ) {
00838 pCouple = new gHtmlCouple( lineNr, strTag, nil );
00839 }
00840 else {
00841 gString sMainTag( strTag );
00842 sMainTag[ pos ] = 0;
00843 DBGPRINT("DBG: thisAddHtmlTag: %s [%s|%s]\n",strTag,sMainTag.Str(),strTag);
00844 strTag += pos;
00845 gString sAttr( strTag );
00846 sAttr.Trim();
00847 UpCaseAttributes( sMainTag.Str(), sAttr );
00848 pCouple = new gHtmlCouple( lineNr, sMainTag.Str(), sAttr.Str() );
00849 error = pCouple->attrL.lastOpError;
00850 TagError( lineNr, error, strTag, "Invalid attribute" );
00851 }
00852 pCouple->idEndTag = isEndTag ? XH_ENDTAG : -1;
00853 thisAddCouple( pCouple, *this );
00854 return error;
00855 }
00856
00857 int gHtmlContent::thisAddCouple (gHtmlCouple* pCouple, gList& oL)
00858 {
00859 ASSERTION(pCouple!=nil,"pCouple!=nil");
00860 oL.AppendObject( pCouple );
00861 return 0;
00862 }
00863
00864 gHtmlCouple* gHParsed::CurrentCouple ()
00865 {
00866 gHtmlCouple* pCouple;
00867 ASSERTION(Depth()>0,"Depth()>0");
00868 pCouple = (gHtmlCouple*)GetCurrent();
00869 ASSERTION(pCouple!=nil,"pCouple!=nil");
00870 return pCouple;
00871 }
00872
00873 char* gHParsed::Str (unsigned idx)
00874 {
00875 return GetCouple( idx )->GetStr();
00876 }
00877
00878 char* gHParsed::StrMust (unsigned idx)
00879 {
00880 gHtmlCouple* pCouple;
00881 pCouple = (gHtmlCouple*)kMust.GetObjectPtr( idx );
00882 ASSERTION(pCouple!=nil,"pCouple!=nil");
00883 return pCouple->GetStr();
00884 }
00885
00886 gHtmlCouple* gHParsed::GetCouple (unsigned idx)
00887 {
00888 ASSERTION(IsValidIndex(idx),"IsValidIndex(idx)");
00889 gStorage* pObx = GetObjectPtr( idx );
00890 ASSERTION(pObx!=nil,"pObx!=nil");
00891 return (gHtmlCouple*)pObx;
00892 }
00893
00894 gHtmlCouple* gHParsed::FindCouple (t_int16 idTag)
00895 {
00896 unsigned idx, n=N();
00897 gHtmlCouple* pCouple;
00898
00899 if ( idTag<0 ) return nil;
00900 for (idx=1; idx<=n; idx++) {
00901 pCouple = GetCouple( idx );
00902 if ( pCouple->idTag==idTag ) return pCouple;
00903 }
00904 return nil;
00905 }
00906
00907 void gHParsed::PushTagOptEnd (gHtmlCouple& couple)
00908 {
00909 thisPushCouple( couple, true );
00910 }
00911
00912 int gHParsed::PushTag (gHtmlCouple& couple, bool doCheckOnly)
00913 {
00914 t_int16 id = couple.idTag;
00915 int error=0;
00916
00917 if ( doCheckOnly==false ) thisPushCouple( couple, false );
00918
00919
00920
00921 switch ( id ) {
00922 case XH_IDTAG_BASE:
00923
00924
00925
00926
00927 hasBaseHRef = true;
00928 if ( state!=e_HS_Head ) return 102;
00929 break;
00930 case XH_IDTAG_BODY:
00931 error = state==e_HS_HeadAfter ? 0 : -103;
00932 state = e_HS_Body;
00933 break;
00934 case XH_IDTAG_HEAD:
00935 error = state==e_HS_Html || state==e_HS_StartAfter ? 0 : -104;
00936 state = e_HS_Head;
00937 break;
00938 case XH_IDTAG_HTML:
00939 error = state==e_HS_Start ? 0 : -105;
00940 state = e_HS_Html;
00941 break;
00942 default:
00943 if ( state==e_HS_Start ) {
00944 state = e_HS_StartAfter;
00945 return -106;
00946 }
00947 if ( state==e_HS_End ) return 107;
00948 if ( state==e_HS_HeadAfter ) return -108;
00949 if ( state==e_HS_BodyAfter ) return -109;
00950 return 0;
00951 }
00952
00953 return error;
00954 }
00955
00956 int gHParsed::PopTag (bool hasOptEnd)
00957 {
00958 t_int16 id;
00959 int error=0;
00960 gHtmlCouple* pCouple = CurrentCouple();
00961
00962
00963
00964 id = pCouple->idTag;
00965
00966 Pop();
00967 if ( hasOptEnd ) return 0;
00968
00969 kMust.Pop();
00970
00971 switch ( id ) {
00972 case XH_IDTAG_BODY:
00973 error = state==e_HS_Body ? 0 : 121;
00974 state = e_HS_BodyAfter;
00975 break;
00976 case XH_IDTAG_HEAD:
00977 error = state==e_HS_Head ? 0 : 122;
00978 state = e_HS_HeadAfter;
00979 break;
00980 case XH_IDTAG_HTML:
00981 error = state==e_HS_BodyAfter ? 0 : 123;
00982 state = e_HS_End;
00983 break;
00984 default:
00985 break;
00986 }
00987
00988 return 0;
00989 }
00990
00991 void gHParsed::TrashLast ()
00992 {
00993 DBGPRINT("DBG: trash: %s (N=%u kMustN=%u)\n",CurrentCouple()->sTag.Str(),N(),kMust.N());
00994 Pop();
00995 }
00996
00997 int gHParsed::thisPush (gHtmlCouple& couple, gStack& aStack)
00998 {
00999 gHtmlCouple* newCouple;
01000 newCouple = new gHtmlCouple( couple.iLine, couple.sTag.Str(), nil );
01001 ASSERTION(newCouple!=nil,"newCouple!=nil");
01002 newCouple->CopyTag( couple );
01003 aStack.Push( newCouple );
01004 return 0;
01005 }
01006
01007 int gHParsed::thisPushCouple (gHtmlCouple& couple, bool hasOptEnd)
01008 {
01009 thisPush( couple, *this );
01010 if ( hasOptEnd==false ) thisPush( couple, kMust );
01011
01012 return 0;
01013 }
01014
01015 gHtmlCouple* gHList::GetCouple (unsigned idx)
01016 {
01017 if ( idx>0 ) return gHtmlContent::GetCouple( idx );
01018 ASSERTION(coupleFakeBody!=nil,"coupleFakeBody!=nil");
01019 return coupleFakeBody;
01020 }
01021
01022 void gHList::AppendCouple (gHtmlCouple& couple)
01023 {
01024 gHtmlCouple* newCouple;
01025 newCouple = new gHtmlCouple( couple.iLine, couple.sTag.Str(), nil );
01026 ASSERTION(newCouple!=nil,"newCouple!=nil");
01027
01028 if ( couple.attrL.IsEmpty()==false ) {
01029 newCouple->Add( couple.attrL.Str() );
01030 }
01031 newCouple->CopyTag( couple );
01032 AppendObject( newCouple );
01033 }
01034
01035 gHtmlParser::gHtmlParser (gUnweb* ptrUnweb)
01036 : docType( 0 ),
01037 nErrorsSyntax( 0 ),
01038 nErrorsOther( 0 ),
01039 nWarnings( 0 ),
01040 lastWarnOpCode( 0 ),
01041 pUnweb( ptrUnweb )
01042 {
01043 lastOpError = thisInitTbl( nElems );
01044 ASSERTION(lastOpError==0,"lastOpError==0");
01045 }
01046
01047 gHtmlParser::~gHtmlParser ()
01048 {
01049 }
01050
01051 sHtmlElement* gHtmlParser::GetTagElement (t_int16 idxTag)
01052 {
01053 if ( idxTag<0 || idxTag>nElems ) return nil;
01054 return &lElems[ idxTag ];
01055 }
01056
01057 sAttrDef* gHtmlParser::GetAttrDef (t_int16 idxAttr)
01058 {
01059 ASSERTION(lAttrRef!=nil,"lAttrRef!=nil");
01060 ASSERTION(idxAttr>=0 && idxAttr<lAttrRef->nlAttrs,"GetAttrDef(3)");
01061 return &lAttrs[ idxAttr ];
01062 }
01063
01064 sAttrRefer* gHtmlParser::GetAttrRef ()
01065 {
01066 ASSERTION(lAttrRef!=nil,"lAttrRef!=nil");
01067 return lAttrRef;
01068 }
01069
01070 sAttrNorm* gHtmlParser::GetDefaultAttrNorm (t_int16 idxNorm)
01071 {
01072 if ( idxNorm<0 || idxNorm>nDefAttrNorms ) return nil;
01073 return &lDefAttrNorms[ idxNorm ];
01074 }
01075
01076 sHAttrType* gHtmlParser::GetAttrType (t_int16 idxType)
01077 {
01078 t_int16 n = GetNAttrType();
01079 if ( idxType<1 || idxType>n ) return nil;
01080 return &lHAttrTypes[ idxType ];
01081 }
01082
01083 void gHtmlParser::ReleaseHash ()
01084 {
01085 t_int16 idx;
01086
01087 delete hElems; hElems = nil;
01088 nElems = -1;
01089
01090 delete lAttrRef; lAttrRef = nil;
01091
01092 for (idx=0; idx<=nDefAttrNorms; idx++) {
01093 delete lDefAttrNorms[ idx ].pAttrSeq;
01094 lDefAttrNorms[ idx ].pAttrSeq = nil;
01095 }
01096 }
01097
01098 bool gHtmlParser::SetOptions (gHtmlOpt& copy)
01099 {
01100 htmlOpt.CopyOptions( copy );
01101
01102 t_uint16 i, n=htmlOpt.lIdTagOptEnd.N();
01103 t_int16 id, nMax = nElems;
01104 for (i=1; i<=n; i++) {
01105 id = htmlOpt.lIdTagOptEnd.GetInt( i );
01106 if ( id<0 || id>=nMax ) return false;
01107 char cOpt = lElems[id].optEnd;
01108 DBGPRINT("DBG: CFG_OPTEND: id=%d: %s (%c)\n",id,lElems[id].elemName,cOpt);
01109 if ( cOpt!='@' && cOpt!=XH_TBL_TAG_OPTEND ) return false;
01110 lElems[id].optEnd = XH_TBL_TAG_OPTEND_CFG;
01111 }
01112
01113 htmlInput.SetHtmlOpt( &htmlOpt );
01114
01115 return true;
01116 }
01117
01118 int gHtmlParser::Parse (FILE* fRepErr)
01119 {
01120 int error = thisParse( fRepErr );
01121
01122 return error;
01123 }
01124
01125 int gHtmlParser::SetError (int opError)
01126 {
01127 lastOpError = opError;
01128 gControl::SetError( 0 );
01129 if ( opError==0 ) return 0;
01130 return lastOpError = opError;
01131 }
01132
01133 int gHtmlParser::SetWarn (int opError)
01134 {
01135 lastWarnOpCode = opError;
01136 if ( opError==0 ) return 0;
01137
01138 nWarnings++;
01139 return opError;
01140 }
01141
01142 int gHtmlParser::ShowTree (FILE* fRepErr)
01143 {
01144
01145 short p, nFilled;
01146 unsigned n;
01147 gHSeq hSeq( fRepErr );
01148 eHState aState, showState=e_HS_Body;
01149
01150 hSeq.SetBaseHRef( myBaseHRef );
01151
01152 #ifdef DEBUG
01153 for (p=0; p<(short)e_HS_Last; p++) {
01154 n = lParts[p].N();
01155 if ( n==0 ) continue;
01156 printf("ShowPart(%d)--->START<---\n",p);
01157 hSeq.ShowPart( lParts[p], (eHState)p, htmlOpt );
01158 printf("ShowPart(%d)--->END<---\n",p);
01159 }
01160 #endif //DEBUG(_...)
01161
01162 for (p=0, nFilled=0, aState=e_HS_Body; p<(short)e_HS_Last; p++) {
01163 n = lParts[p].N();
01164 if ( n==0 ) continue;
01165 if ( nFilled==0 ) {
01166 aState = (eHState)p;
01167 }
01168 nFilled++;
01169 DBGPRINT("DBG: nFilled=%d, aState=%d, showState=%d\n",
01170 (int)nFilled,
01171 aState,
01172 showState);
01173 }
01174
01175 if ( nFilled<=0 ) return -1;
01176
01177 if ( nFilled==1 ) {
01178
01179 showState = e_HS_Sp_FakeBody;
01180 }
01181 else {
01182
01183
01184
01185 aState = showState;
01186 }
01187
01188 hSeq.Build( lParts[aState], showState );
01189 hSeq.hTree.Show( true );
01190
01191 return 0;
01192 }
01193
01194
01195 #ifdef DEBUG
01196 int gHtmlParser::Show_dbg (bool doShowAll)
01197 {
01198 int ex;
01199 t_int16 id;
01200 for (ex=1; ex<=(int)htmlInput.N(); ex++) {
01201 gStorage* pObx = htmlInput.GetObjectPtr(ex);
01202 gHtmlCouple* pObC = (gHtmlCouple*)pObx;
01203 thisFindTag( pObC->sTag.Str(), id ) ;
01204 printf("DBG: line %u: %d/%d [%s]",
01205 pObC->iLine,
01206 ex, (short)htmlInput.N(),
01207 htmlInput.Str(ex));
01208 if ( pObC->IsText() )
01209 printf("\n");
01210 else
01211 printf(" TAG:%s {id:%d}\n",pObC->sTag.Str(),id);
01212
01213
01214
01215 }
01216 return 0;
01217 }
01218 #endif //DEBUG
01219
01220 int gHtmlParser::thisFillFromUnweb (gUnweb& unweb, gHtmlContent& hInput)
01221 {
01222 unsigned i, nPost = unweb.coordSerial.N();
01223
01224 for (i=1; i<=nPost; i++) {
01225 hInput.Add( unweb.Str( i ) );
01226 }
01227 return 0;
01228 }
01229
01230 sHtmlElement* gHtmlParser::thisFindTag (char* strTag, t_int16& idxTag)
01231 {
01232
01233 sHtmlElement* pElem;
01234 gHashElemTriple* pHashElem;
01235
01236 idxTag = -1;
01237 if ( strTag==nil || strTag[0]==0 ) return nil;
01238
01239
01240
01241
01242
01243
01244
01245 gKey hKey( strTag );
01246 unsigned idx;
01247 int x;
01248 pHashElem = hElems->Find( hKey, idx, x );
01249 if ( pHashElem==nil ) return nil;
01250
01251
01252 idxTag = (t_int16)pHashElem->iVal;
01253 ASSERTION(idxTag<nElems,"idxTag<nElems");
01254 pElem = &lElems[idxTag];
01255
01256
01257
01258
01259
01260
01261
01262
01263
01264
01265
01266 return pElem;
01267 }
01268
01269 int gHtmlParser::thisInitTbl (t_int16& size)
01270 {
01271 t_int16 idx, iN;
01272 sHtmlElement* pElem;
01273 sAttrDef* pAttrDef;
01274 char* elemName;
01275 char* attrName, *lastName;
01276 char* str;
01277
01278 if ( size>0 ) return 0;
01279
01280 if ( hElems==nil ) {
01281 hElems = new gHashTriple( 7591 );
01282 ASSERTION(hElems!=nil,"hElems!=nil");
01283 }
01284 if ( hElems->IsEmpty()==false ) return 0;
01285
01286 for (idx=0, size=0;
01287 (pElem = &(lElems[idx]))!=nil &&
01288 (elemName = pElem->elemName)!=nil;
01289 idx++) {
01290 size++;
01291 gKey hKey( elemName );
01292 hElems->AddTriple( hKey, (int)idx );
01293 switch ( pElem->family ) {
01294 case '@':
01295 pElem->family = 0;
01296 case (char)e_HtmlElementPhrase:
01297 break;
01298 default:
01299 return -1;
01300 }
01301
01302 switch ( pElem->optStart ) {
01303 case 'O':
01304 pElem->optStart = 1;
01305 break;
01306 case '@':
01307 pElem->optStart = 0;
01308 break;
01309 default:
01310 ASSERTION_FALSE("optStart(1)");
01311 break;
01312 }
01313
01314 char isDeprecated = pElem->isDeprecated;
01315 if ( isDeprecated=='@' ) isDeprecated = pElem->isDeprecated = 0;
01316
01317
01318 if ( pElem->kindDTD=='@' ) pElem->kindDTD = 0;
01319 if ( isDeprecated!=0 && pElem->kindDTD!='L' ) return -1;
01320 }
01321
01322 for (idx=0; (attrName = lAttrs[idx].attrName)!=NULL; ) idx++;
01323
01324 ASSERTION(lAttrRef==nil,"lAttrRef==nil");
01325 lAttrRef = new sAttrRefer( iN = idx );
01326 ASSERTION(lAttrRef!=nil,"lAttrRef!=nil");
01327
01328 for (idx=0, lastName="@"; idx<iN; idx++) {
01329 pAttrDef = &lAttrs[ idx ];
01330 attrName = pAttrDef->attrName;
01331 gString s( attrName );
01332 s.UpString();
01333 lAttrRef->sAttrs[ idx ] = s;
01334 if ( strcmp( attrName, lastName ) ) {
01335
01336 lAttrRef->idxUniqMax++;
01337 lAttrRef->sLUniqs[ lAttrRef->idxUniqMax ] = s;
01338 lAttrRef->idxLUniqs[ lAttrRef->idxUniqMax ] = idx;
01339 }
01340 elemName = pAttrDef->strRelatedLst;
01341 bool isAllBut = elemName[0]=='@';
01342 elemName += isAllBut;
01343 lAttrRef->lstIsAllButEtc[ idx ].SetOn( isAllBut );
01344 lAttrRef->lstRelated[ idx ].AddFromStr( elemName );
01345
01346
01347 lastName = attrName;
01348 }
01349
01350 sAttrNorm* pNormAttr;
01351
01352 ASSERTION(nDefAttrNorms==-1,"nDefAttrNorms==-1");
01353 for (idx=0, nDefAttrNorms=0;
01354 (pNormAttr = &lDefAttrNorms[idx])!=nil && pNormAttr->idTag!=-1;
01355 idx++) {
01356 str = pNormAttr->strAttrSeq;
01357 if ( str!=nil && str[0]!=0 ) {
01358 pNormAttr->pAttrSeq = new gSmartList;
01359 ASSERTION(pNormAttr->pAttrSeq!=nil,"pNormAttr->pAttrSeq!=nil");
01360 pNormAttr->pAttrSeq->AddFromStr( str );
01361 }
01362 nDefAttrNorms++;
01363 }
01364
01365
01366 for (idx=1;
01367 lHAttrTypes[idx].typeFamily>=0;
01368 ) idx++;
01369
01370 lHAttrTypes[ 0 ].typeFamily = idx;
01371
01372 return 0;
01373 }
01374
01375 int gHtmlParser::thisParse (FILE* fRepErr)
01376 {
01377 int error;
01378 unsigned i, n;
01379 unsigned currLine = 0;
01380 t_int16 id;
01381 gHtmlCouple* pObjC;
01382 sHtmlElement* pElem;
01383 char* str;
01384 char* msgError;
01385 bool isSkippedTag;
01386 bool isInvalidTag;
01387
01388 if ( pUnweb==nil ) return -1;
01389 if ( thisFillFromUnweb( *pUnweb, htmlInput )!=0 ) return -1;
01390
01391
01392 for (i=1, n=htmlInput.N(); i<=n; i++) {
01393 pObjC = htmlInput.GetCouple( i );
01394 ASSERTION(pObjC!=nil,"pObjC!=nil");
01395 currLine = pObjC->iLine;
01396 if ( pObjC->IsText() ) {
01397
01398 DBGPRINT_MIN("DBG: [%s]: pHStr:%c OK-pHStr:%c\n",pObjC->Str(1),ISyORn(pObjC->pHStr!=nil),ISyORn(pObjC->pHStr==nil?0:pObjC->pHStr->IsOk()));
01399 continue;
01400 }
01401 str = pObjC->sTag.Str();
01402
01403 if ( pObjC->theDocType!=0 ) {
01404
01405
01406 DBGPRINT("DBG: DocType: %s [%s]\n",pObjC->attrL.Str(),pObjC->attrL.GetAttrValue(3));
01407 if ( docType==0 ) {
01408 docType = pObjC->theDocType;
01409 }
01410 else {
01411 SetWarn( 20 );
01412 HTML_LOG(fRepErr,LOG_WARNING,"Line %u: %s\n",currLine,"Duplicated DOCTYPE declaration");
01413 SetWarn( 0 );
01414 }
01415 }
01416 pElem = thisFindTag( str, pObjC->idTag );
01417 pObjC->pElem = pElem;
01418 if ( pElem==nil ) {
01419
01420 if ( htmlOpt.outOpt.IsTagSkipped( str ) )
01421 pObjC->synError = XH_SKIP_TAG_FORCE;
01422 }
01423 else {
01424 if ( pElem->ctrl==-1 )
01425 pObjC->synError = XH_SKIP_TAG_FORCE;
01426 }
01427 }
01428
01429 gHParsed kParsed;
01430
01431
01432
01433 for (i=1; i<=n; i++) {
01434 pObjC = htmlInput.GetCouple( i );
01435 id = pObjC->idTag;
01436 str = htmlInput.Str( i );
01437 ASSERTION(str!=nil,"str!=nil");
01438 currLine = pObjC->iLine;
01439 if ( pObjC->IsText() ) {
01440
01441
01442 if ( kParsed.state==e_HS_Start ) kParsed.state = e_HS_StartAfter;
01443 if ( pObjC->pHStr!=nil && pObjC->pHStr->Str()!=nil && pObjC->pHStr->Str()[0]==0 ) {
01444 HTML_LOG(fRepErr,LOG_NOTICE,"Line %u: invalid string, but used: '%s'\n",currLine,str);
01445 }
01446 thisAddedParsedLine( str, *pObjC, kParsed.state, true );
01447 }
01448 else {
01449 error = thisParseLine( *pObjC, currLine, str, kParsed );
01450 msgError = GetErrorStr();
01451 isInvalidTag = lastOpError<0;
01452 isSkippedTag = pObjC->IsSkippedTag();
01453 if ( isSkippedTag ) {
01454 ;
01455 if ( pObjC->synError!=XH_SKIP_TAG_FORCE )
01456 thisAddedParsedLine( str, *pObjC, kParsed.state, true );
01457 continue;
01458 }
01459 if ( isInvalidTag ) {
01460 nErrorsSyntax++;
01461 HTML_LOG(fRepErr,LOG_ERROR,"Line %u: Invalid tag: %s\n",currLine,str);
01462 if ( htmlOpt.oErrInvTagWrite.IsOn()==false ) continue;
01463 }
01464
01465 char* strBaseHRef;
01466
01467 if ( id==XH_IDTAG_HEAD && kParsed.state==e_HS_HeadAfter && htmlOpt.DoBaseHRef()==true ) {
01468
01469 strBaseHRef = htmlOpt.GetBaseHRef();
01470 myBaseHRef.Set( strBaseHRef );
01471 gHtmlCouple newObjC( currLine, "BASE", strBaseHRef );
01472 thisAddedParsedLine( newObjC.GetStr(), newObjC, kParsed.state, true );
01473 }
01474 thisAddedParsedLine( str, *pObjC, kParsed.state, lastOpError>=0 );
01475 if ( lastOpError==0 ) {
01476 if ( lastWarnOpCode==0 ) {
01477 if ( id==XH_IDTAG_BASE ) {
01478 strBaseHRef = pObjC->attrL.Find("HREF");
01479 if ( strBaseHRef!=nil ) myBaseHRef.Set( strBaseHRef );
01480 }
01481 }
01482 else {
01483 HTML_LOG(fRepErr,LOG_WARNING,"Line %u: %s\n",currLine,msgError);
01484 SetWarn( 0 );
01485 }
01486 continue;
01487 }
01488 if ( isInvalidTag==false ) {
01489 nErrorsOther++;
01490 HTML_LOG(fRepErr,LOG_ERROR,"Line %u: bad sequence (int-reg %d): %s\n%s%s%s",
01491 currLine,
01492 lastOpError,
01493 str,
01494 msgError[0]==0 ? "\0" : "ERROR: ", msgError, msgError[0]==0 ? "\0" : "\n");
01495 }
01496 }
01497 }
01498
01499 error = nErrorsSyntax!=0 || nErrorsOther!=0;
01500 if ( error!=0 ) return 1;
01501
01502 n = kParsed.kMust.N();
01503 for (i=1; i<=n; i++) {
01504 pObjC = (gHtmlCouple*)kParsed.kMust.GetObjectPtr( i );
01505 ASSERTION(pObjC!=nil,"pObjC");
01506 nErrorsOther++;
01507 HTML_LOG(fRepErr,LOG_ERROR,"Line %u: unclosed tag (int-ref %d): %s, opened at line %u\n",
01508 currLine,
01509 10,
01510 pObjC->TagString().Str(),
01511 pObjC->iLine);
01512 DBGPRINT("DBG: ATTRS: %s (%s)\n",pObjC->attrL.Str(),pObjC->attrL.Find("HREF"));
01513 }
01514
01515 return 0;
01516 }
01517
01518 int gHtmlParser::thisParseLine (gHtmlCouple& inCouple, unsigned lineNr, char* s, gHParsed& kParsed)
01519 {
01520
01521
01522
01523 int error;
01524 t_int16 id;
01525 sHtmlElement* pInElem;
01526 gHtmlCouple* pCouple;
01527 bool inRefEnd, inCannotEnd, inOptEnd;
01528
01529 ASSERTION(s!=nil,"s!=nil");
01530 SetError( 0 );
01531 id = inCouple.idTag;
01532 if ( inCouple.IsSkippedTag() ) return inCouple.synError;
01533 if ( id<0 ) return SetError( -8 );
01534 pInElem = inCouple.pElem;
01535
01536 inRefEnd = inCouple.IsTagEnd();
01537 inCannotEnd = pInElem->CannotEndTag();
01538 inOptEnd = pInElem->MayEndTag();
01539
01540 DBGPRINT_MIN("DBG: thisParseLine:%u [%s] inCouple=%s\n",lineNr,s,inCouple.sTag.Str());
01541
01542 if ( inRefEnd ) {
01543
01544 if ( inCannotEnd ) return SetError( 12 );
01545
01546 if ( kParsed.Depth()<=0 ) return SetError( 13 );
01547 pCouple = kParsed.CurrentCouple();
01548 if ( id==pCouple->idTag ) {
01549 error = kParsed.PopTag( inOptEnd );
01550 ASSERTION(error==0,"kParsed.PopTag(1)");
01551 return 0;
01552 }
01553 if ( inOptEnd ) {
01554
01555 if ( id!=pCouple->idTag ) {
01556 int countTrash=0;
01557 int iOptIdx, iOptN=(int)kParsed.N();
01558 for (iOptIdx=iOptN; iOptIdx>0 && countTrash==0; iOptIdx--) {
01559 gHtmlCouple* pObjC = kParsed.GetCouple( (unsigned)iOptIdx );
01560
01561
01562
01563 if ( id==pObjC->idTag ) {
01564 kParsed.Delete( iOptIdx, iOptIdx );
01565 countTrash++;
01566 }
01567 }
01568 }
01569 }
01570 else {
01571
01572 while ( kParsed.N()>kParsed.kMust.N() ) {
01573 pCouple = kParsed.CurrentCouple();
01574 if ( id==pCouple->idTag ) break;
01575 kParsed.TrashLast();
01576 }
01577 }
01578
01579 pCouple = kParsed.CurrentCouple();
01580
01581 if ( id!=pCouple->idTag ) {
01582
01583 SetError( 14 );
01584 snprintf(sStrError,200,
01585 "Expected %s [line %u] but got %s",
01586 pCouple->TagString(true).Str(),
01587 pCouple->iLine,
01588 s);
01589 return 14;
01590 }
01591
01592 error = kParsed.PopTag( inOptEnd );
01593 ASSERTION(error==0,"kParsed.PopTag(2)");
01594 }
01595 else {
01596
01597
01598
01599
01600 if ( inOptEnd==true && inCannotEnd==false ) {
01601
01602 kParsed.PushTagOptEnd( inCouple );
01603 return 0;
01604 }
01605
01606
01607
01608
01609
01610 if ( id==XH_IDTAG_ANCHOR ) {
01611 gHtmlCouple* pCoupleAnchor = kParsed.FindCouple( XH_IDTAG_ANCHOR );
01612 if ( pCoupleAnchor!=nil ) {
01613 error = 129;
01614 SetError( error );
01615 snprintf(sStrError,200,
01616 "Doubled anchor on line %u: already started on line %u (int-seq:%d)",
01617 inCouple.iLine,
01618 pCoupleAnchor->iLine,
01619 error);
01620 return error;
01621 }
01622 }
01623
01624 error = kParsed.PushTag( inCouple, inCannotEnd );
01625
01626 if ( error==0 ) return 0;
01627 if ( error<0 ) {
01628 if ( htmlOpt.oErrStateAllSuppress.IsOn() )
01629 return 0;
01630 switch ( error ) {
01631 case -103:
01632 if ( htmlOpt.oErrStateSuppress[e_HS_HeadAfter].IsOn() )
01633 return 0;
01634 break;
01635 case -104:
01636 if ( htmlOpt.oErrStateSuppress[e_HS_Html].IsOn() )
01637 return 0;
01638 break;
01639 case -105:
01640 if ( htmlOpt.oErrStateSuppress[e_HS_Start].IsOn() )
01641 return 0;
01642 break;
01643 case -106:
01644 if ( htmlOpt.oErrStateSuppress[e_HS_Body].IsOn() )
01645 return 0;
01646 default:
01647 break;
01648 }
01649 SetWarn( 21 );
01650 snprintf(sStrError,200,
01651 "Not in LTD sequence: %s (int-seq:%d)",
01652 s,
01653 -error);
01654 }
01655 else {
01656
01657 return SetError( error );
01658 }
01659 }
01660 return 0;
01661 }
01662
01663 int gHtmlParser::thisAddedParsedLine (char* s, gHtmlCouple& inCouple, eHState state, bool doAccept)
01664 {
01665 ASSERTION(s!=nil,"s!=nil");
01666 if ( s[0]==0 ) return 0;
01667 if ( fOutAll.f!=nil ) fprintf(fOutAll.f,"%s%s",s,fOutAll.NewLine());
01668 lOut.Add( s );
01669 lParts[state].AppendCouple( inCouple );
01670 return 0;
01671 }
01672
01673