00001
00002
00003 #include <string.h>
00004 #include "gdNetStrings.h"
00005 #include "gstringext.h"
00006
00007
00008
00009 unsigned gHtmlString::nTblSymUsu=0;
00010 t_uint32 gHtmlString::vTblSymUsuMin=0;
00011 unsigned gHtmlString::nTblSymIsoLatin1=0;
00012 t_uint32 gHtmlString::vTblSymIsoL1Min=0;
00013 t_uint32 gHtmlString::vTblSymIsoL1Max=0;
00014
00015 sHtmlEntSymbol gHtmlString::tblSymUsu[191-160+10]={
00016 {"nbsp", 160, 'U', "no-break space"},
00017 {"iexcl", 161, '@', "inverted exclamation mark"},
00018 {"cent", 162, '@', "cent sign"},
00019 {"pound", 163, 'U', "pound sterling sign"},
00020 {"curren", 164, '@', "general currency sign"},
00021 {"yen" , 165, '@', "yen sign"},
00022 {"brvbar", 166, '@', "broken (vertical) bar"},
00023 {"sect", 167, '@', "section sign"},
00024 {"uml" , 168, '@', "umlaut (dieresis)"},
00025 {"copy", 169, 'U', "copyright sign"},
00026 {"ordf", 170, '@', "ordinal indicator, feminine"},
00027 {"laquo", 171, '@', "angle quotation mark, left"},
00028 {"not" , 172, '@', "not sign"},
00029 {"shy" , 173, '@', "soft hyphen"},
00030 {"reg" , 174, 'U', "registered sign"},
00031 {"macr", 175, '@', "macron"},
00032 {"deg" , 176, '@', "degree sign"},
00033 {"plusmn", 177, '@', "plus-or-minus sign"},
00034 {"sup2", 178, 'n', "superscript two"},
00035 {"sup3", 179, 'n', "superscript three"},
00036 {"acute", 180, 'U', "acute accent"},
00037 {"micro", 181, '@', "micro sign"},
00038 {"para", 182, '@', "pilcrow (paragraph sign)"},
00039 {"middot", 183, '@', "middle dot"},
00040 {"cedil", 184, '@', "cedilla"},
00041 {"sup1", 185, 'n', "superscript one"},
00042 {"ordm", 186, '@', "ordinal indicator, masculine"},
00043 {"raquo", 187, '@', "angle quotation mark, right"},
00044 {"frac14", 188, 'n', "fraction one-quarter"},
00045 {"frac12", 189, 'n', "fraction one-half"},
00046 {"frac34", 190, 'n', "fraction three-quarters"},
00047 {"iquest", 191, '@', "inverted question mark"},
00048 {nil, 0, '\0', nil}};
00049
00050 sHtmlEntSymbol gHtmlString::tblSymIsoLatin1[255-198+30]={
00051 {"AElig", 198, 'C', "AE diphthong (ligature)"},
00052 {"Aacute", 193, 'C', "A, acute accent"},
00053 {"Acirc", 194, 'C', "A, circumflex accent"},
00054 {"Agrave", 192, 'C', "A, grave accent"},
00055 {"Aring", 197, 'C', "A, ring"},
00056 {"Atilde", 195, 'C', "A, tilde"},
00057 {"Auml", 196, 'C', "A, dieresis or umlaut mark"},
00058 {"Ccedil", 199, 'C', "C, cedilla"},
00059 {"ETH" , 208, 'C', "Eth, Icelandic"},
00060 {"Eacute", 201, 'C', "E, acute accent"},
00061 {"Ecirc", 202, 'C', "E, circumflex accent"},
00062 {"Egrave", 200, 'C', "E, grave accent"},
00063 {"Euml", 203, 'C', "E, dieresis or umlaut mark"},
00064 {"Iacute", 205, 'C', "I, acute accent"},
00065 {"Icirc", 206, 'C', "I, circumflex accent"},
00066 {"Igrave", 204, 'C', "I, grave accent"},
00067 {"Iuml", 207, 'C', "I, dieresis or umlaut mark"},
00068 {"Ntilde", 209, 'C', "N, tilde"},
00069 {"Oacute", 211, 'C', "O, acute accent"},
00070 {"Ocirc", 212, 'C', "O, circumflex accent"},
00071 {"Ograve", 210, 'C', "O, grave accent"},
00072 {"Oslash", 216, 'C', "O, slash"},
00073 {"Otilde", 213, 'C', "O, tilde"},
00074 {"Ouml", 214, 'C', "O, dieresis or umlaut mark"},
00075 {"THORN", 222, 'C', "THORN, Icelandic"},
00076 {"Uacute", 218, 'C', "U, acute accent"},
00077 {"Ucirc", 219, 'C', "U, circumflex accent"},
00078 {"Ugrave", 217, 'C', "U, grave accent"},
00079 {"Uuml", 220, 'C', "U, dieresis or umlaut mark"},
00080 {"Yacute", 221, 'C', "Y, acute accent"},
00081 {"aacute", 225, 's', "a, acute accent"},
00082 {"acirc", 226, 's', "a, circumflex accent"},
00083 {"aelig", 230, 's', "ae diphthong (ligature)"},
00084 {"agrave", 224, 's', "a, grave accent"},
00085 {"aring", 229, 's', "a, ring"},
00086 {"atilde", 227, 's', "a, tilde"},
00087 {"auml", 228, 's', "a, dieresis or umlaut mark"},
00088 {"ccedil", 231, 's', "c, cedilla"},
00089 {"eacute", 233, 's', "e, acute accent"},
00090 {"ecirc", 234, 's', "e, circumflex accent"},
00091 {"egrave", 232, 's', "e, grave accent"},
00092 {"eth" , 240, 's', "eth, Icelandic"},
00093 {"euml", 235, 's', "e, dieresis or umlaut mark"},
00094 {"iacute", 237, 's', "i, acute accent"},
00095 {"icirc", 238, 's', "i, circumflex accent"},
00096 {"igrave", 236, 's', "i, grave accent"},
00097 {"iuml", 239, 's', "i, dieresis or umlaut mark"},
00098 {"ntilde", 241, 's', "n, tilde"},
00099 {"oacute", 243, 's', "o, acute accent"},
00100 {"ocirc", 244, 's', "o, circumflex accent"},
00101 {"ograve", 242, 's', "o, grave accent"},
00102 {"oslash", 248, 's', "o, slash"},
00103 {"otilde", 245, 's', "o, tilde"},
00104 {"ouml", 246, 's', "o, dieresis or umlaut mark"},
00105 {"szlig", 223, 's', "@sharp s, German (sz ligature)"},
00106 {"thorn", 254, 's', "thorn, Icelandic"},
00107 {"uacute", 250, 's', "u, acute accent"},
00108 {"ucirc", 251, 's', "u, circumflex accent"},
00109 {"ugrave", 249, 's', "u, grave accent"},
00110 {"uuml", 252, 's', "u, dieresis or umlaut mark"},
00111 {"yacute", 253, 's', "y, acute"},
00112 {"yuml", 255, 's', "y, dieresis or umlaut mark"},
00113 {nil, 0, '\0', nil}};
00114
00115 gHtmlString::gHtmlString (char* s, bool aIgnoreAmp)
00116 : lastOpError( 0 ),
00117 doIgnoreAmp( aIgnoreAmp )
00118 {
00119 thisFillUpTables();
00120 ConvertToHString( s );
00121 }
00122
00123 gHtmlString::~gHtmlString ()
00124 {
00125 }
00126
00127 char* gHtmlString::AsciiToString (t_uchar v)
00128 {
00129 switch ( v ) {
00130 case '\n':
00131 return "<BR>\n";
00132 case '<':
00133 return thisStrSymbolAmp("lt");
00134 case '>':
00135 return thisStrSymbolAmp("gt");
00136 case '&':
00137 return thisStrSymbolAmp("amp");
00138 case '"':
00139 return thisStrSymbolAmp("quot");
00140 case '\t':
00141 return nil;
00142 case 127:
00143 lastOpError = 32;
00144 return nil;
00145 default:
00146 break;
00147 }
00148 if ( v<' ' ) return nil;
00149 if ( v<127 ) {
00150 sEntStr.SetEmpty(); sEntStr.Add( v );
00151 return sEntStr.Str();
00152 }
00153 return thisSymbolString( (t_uint32)v );
00154 }
00155
00156 int gHtmlString::ConvertToHString (char* s)
00157 {
00158
00159
00160
00161
00162
00163
00164 lastOpError = 0;
00165 sOriStr.SetEmpty();
00166 Delete();
00167 if ( s==nil ) return 0;
00168
00169 sOriStr.Set( s );
00170
00171 if ( doIgnoreAmp )
00172 return thisConvertToHStringIgnoreAmp( s );
00173
00174 gString sResult;
00175 lastOpError = ConvertAmpToAscii( s, sResult );
00176 DBGPRINT_MIN("DBG: gHtmlString:[%s|%s] OpError=%d\n",s,sResult.Str(),lastOpError);
00177 if ( lastOpError!=0 ) sResult.Set( s );
00178
00179 return thisConvertToHStringIgnoreAmp( sResult.Str() );
00180 }
00181
00182 int gHtmlString::ConvertAmpToAscii (char* s, gString& sResult)
00183 {
00184 int error, resError=0;
00185 unsigned i, pos, len;
00186 t_uchar uChr;
00187
00188 ASSERTION(s!=nil,"s!=nil");
00189 sResult.SetEmpty();
00190
00191 gString sIn( s );
00192 for (i=1, len=sIn.Length(); i<=len; i++) {
00193 uChr = sIn[i];
00194 if ( uChr=='&' ) {
00195 pos = gStrControl::Self().Find( s+i, ";" );
00196 if ( pos==0 ) return 64;
00197 gString sTemp;
00198 sTemp.CopyFromTo( sIn, i, i+pos );
00199 i += pos;
00200 error = ConvertHToAscii( sTemp.Str(), uChr );
00201 ASSERTION(error>=0,"error>=0");
00202 if ( error!=0 ) {
00203 resError = 65;
00204 continue;
00205 }
00206 }
00207 sResult.Add( uChr );
00208 }
00209 return resError;
00210 }
00211
00212 int gHtmlString::ConvertHToAscii (char* sAmp, t_uchar& cRes)
00213 {
00214
00215
00216 unsigned len;
00217 ASSERTION(sAmp!=nil,"sAmp!=nil");
00218 cRes = 0;
00219 if ( sAmp[0]!='&' ) return -1;
00220 gString s( sAmp+1 );
00221 if ( s[len = s.Length()]!=';' ) return -1;
00222 if ( len<=2 ) return 1;
00223 gString sIn;
00224 sIn.CopyFromTo( s, 1, len-1 );
00225 short tblIdx;
00226 unsigned idx;
00227 return thisConvertHToAscii( sIn, tblIdx, idx, cRes )!=0;
00228 }
00229
00230 int gHtmlString::thisConvertToHStringIgnoreAmp (char* s)
00231 {
00232 char* str;
00233 t_uchar v;
00234 int error=0, resError=0;
00235
00236 for ( ; (v = (t_uchar)s[0])!=0; s++) {
00237 str = AsciiToString( v );
00238 error = lastOpError!=0 || str==nil;
00239 if ( error==0 )
00240 Add( str );
00241 else
00242 resError = error;
00243 }
00244 return resError;
00245 }
00246
00247 char* gHtmlString::thisSymbolString (t_uint32 val)
00248 {
00249 unsigned idx;
00250 if ( val<vTblSymUsuMin && val<vTblSymIsoL1Min ) {
00251 lastOpError = 8;
00252 return nil;
00253 }
00254 if ( val>vTblSymIsoL1Max ) {
00255 lastOpError = 9;
00256 return nil;
00257 }
00258 if ( val>=vTblSymIsoL1Min ) {
00259 return thisStrSymbolAmp( thisFindSymbolISO( 1, val, nTblSymIsoLatin1, idx ) );
00260 }
00261 if ( val>=vTblSymUsuMin && val<vTblSymUsuMin+(t_uint32)nTblSymUsu ) {
00262 val -= vTblSymUsuMin;
00263 return thisStrSymbolAmp( tblSymUsu[val].sName );
00264 }
00265 return nil;
00266 }
00267
00268 char* gHtmlString::thisFindSymbolISO (short tblIdx,
00269 t_uint32 val,
00270 unsigned nEntries,
00271 unsigned& idx)
00272 {
00273 t_uint32 vAscii;
00274 for (idx=0; idx<nEntries; idx++) {
00275
00276
00277 vAscii = tblSymIsoLatin1[idx].vAscii;
00278 if ( vAscii==val ) return tblSymIsoLatin1[idx].sName;
00279 }
00280 idx = MAX_UINT16_U;
00281 return nil;
00282 }
00283
00284 int gHtmlString::thisConvertHToAscii (gString& s,
00285 short& tblIdx,
00286 unsigned& idx,
00287 t_uchar& cRes)
00288 {
00289
00290
00291 char* str = s.Str();
00292 t_uint32 vAscii;
00293
00294 cRes = 0;
00295
00296
00297 if ( strcmp(str,"amp")==0 ) {
00298 cRes = '&';
00299 return 0;
00300 }
00301 if ( strcmp(str,"lt")==0 ) {
00302 cRes = '<';
00303 return 0;
00304 }
00305 if ( strcmp(str,"gt")==0 ) {
00306 cRes = '>';
00307 return 0;
00308 }
00309 if ( strcmp(str,"quot")==0 ) {
00310 cRes = '"';
00311 return 0;
00312 }
00313
00314 for (idx=0, tblIdx=0; idx<nTblSymUsu; idx++) {
00315 if ( strcmp(str,tblSymUsu[idx].sName)==0 ) {
00316 cRes = (t_uchar)(vAscii = tblSymUsu[idx].vAscii);
00317 return vAscii>=256;
00318 }
00319 }
00320 for (idx=0, tblIdx=1; idx<nTblSymIsoLatin1; idx++) {
00321 if ( strcmp(str,tblSymIsoLatin1[idx].sName)==0 ) {
00322 cRes = (t_uchar)(vAscii = tblSymIsoLatin1[idx].vAscii);
00323 return vAscii>=256;
00324 }
00325 }
00326 tblIdx = -1;
00327 return 2;
00328 }
00329
00330 char* gHtmlString::thisStrSymbolAmp (char* s)
00331 {
00332 sEntStr.SetEmpty();
00333 if ( s==nil ) {
00334 lastOpError = 16;
00335 return nil;
00336 }
00337 sEntStr.Add( '&' );
00338 sEntStr.Add( s );
00339 sEntStr.Add( ';' );
00340 return sEntStr.Str();
00341 }
00342
00343 int gHtmlString::thisFillUpTables ()
00344 {
00345 unsigned idx;
00346 t_uint32 v, vMin=(t_uint32)MAX_UINT16_U, vMax=0;
00347
00348 if ( nTblSymUsu>0 ) return 0;
00349
00350
00351 for (idx=0; (v = tblSymUsu[idx].vAscii)!=0; idx++) {
00352 if ( idx==0 ) vTblSymUsuMin = v;
00353 nTblSymUsu++;
00354 }
00355 for (idx=0; (v = tblSymIsoLatin1[idx].vAscii)!=0; idx++) {
00356 nTblSymIsoLatin1++;
00357 if ( v<vMin ) vMin = v;
00358 if ( v>vMax ) vMax = v;
00359 }
00360 vTblSymIsoL1Min = vMin;
00361 vTblSymIsoL1Max = vMax;
00362 return 0;
00363 }
00364
00365