gdNetStrings.cpp

Go to the documentation of this file.
00001 // gdNetStrings.cpp -- Version 0.0
00002 
00003 #include <string.h>
00004 #include "gdNetStrings.h"
00005 #include "gstringext.h"
00006 ////////////////////////////////////////////////////////////
00007 // Static members
00008 // ---------------------------------------------------------
00009 unsigned gHtmlString::nTblSymUsu=0;
00010 t_uint32 gHtmlString::vTblSymUsuMin=0;
00011 unsigned gHtmlString::nTblSymIsoLatin1=0;
00012 t_uint32 gHtmlString::vTblSymIsoL1Min=0;
00013 t_uint32 gHtmlString::vTblSymIsoL1Max=0;
00014 
00015 sHtmlEntSymbol gHtmlString::tblSymUsu[191-160+10]={
00016         {"nbsp",                160,    'U', "no-break space"},
00017         {"iexcl",               161,    '@', "inverted exclamation mark"},
00018         {"cent",                162,    '@', "cent sign"},
00019         {"pound",               163,    'U', "pound sterling sign"},
00020         {"curren",              164,    '@', "general currency sign"},
00021         {"yen" ,                165,    '@', "yen sign"},
00022         {"brvbar",              166,    '@', "broken (vertical) bar"},
00023         {"sect",                167,    '@', "section sign"},
00024         {"uml" ,                168,    '@', "umlaut (dieresis)"},
00025         {"copy",                169,    'U', "copyright sign"},
00026         {"ordf",                170,    '@', "ordinal indicator, feminine"},
00027         {"laquo",               171,    '@', "angle quotation mark, left"},
00028         {"not" ,                172,    '@', "not sign"},
00029         {"shy" ,                173,    '@', "soft hyphen"},
00030         {"reg" ,                174,    'U', "registered sign"},
00031         {"macr",                175,    '@', "macron"},
00032         {"deg" ,                176,    '@', "degree sign"},
00033         {"plusmn",              177,    '@', "plus-or-minus sign"},
00034         {"sup2",                178,    'n', "superscript two"},
00035         {"sup3",                179,    'n', "superscript three"},
00036         {"acute",               180,    'U', "acute accent"},
00037         {"micro",               181,    '@', "micro sign"},
00038         {"para",                182,    '@', "pilcrow (paragraph sign)"},
00039         {"middot",              183,    '@', "middle dot"},
00040         {"cedil",               184,    '@', "cedilla"},
00041         {"sup1",                185,    'n', "superscript one"},
00042         {"ordm",                186,    '@', "ordinal indicator, masculine"},
00043         {"raquo",               187,    '@', "angle quotation mark, right"},
00044         {"frac14",              188,    'n', "fraction one-quarter"},
00045         {"frac12",              189,    'n', "fraction one-half"},
00046         {"frac34",              190,    'n', "fraction three-quarters"},
00047         {"iquest",              191,    '@', "inverted question mark"},
00048         {nil, 0, '\0', nil}};
00049 
00050 sHtmlEntSymbol gHtmlString::tblSymIsoLatin1[255-198+30]={
00051         {"AElig",       198,    'C', "AE diphthong (ligature)"},
00052         {"Aacute",      193,    'C', "A, acute accent"},
00053         {"Acirc",       194,    'C', "A, circumflex accent"},
00054         {"Agrave",      192,    'C', "A, grave accent"},
00055         {"Aring",       197,    'C', "A, ring"},
00056         {"Atilde",      195,    'C', "A, tilde"},
00057         {"Auml",        196,    'C', "A, dieresis or umlaut mark"},
00058         {"Ccedil",      199,    'C', "C, cedilla"},
00059         {"ETH" ,        208,    'C', "Eth, Icelandic"},
00060         {"Eacute",      201,    'C', "E, acute accent"},
00061         {"Ecirc",       202,    'C', "E, circumflex accent"},
00062         {"Egrave",      200,    'C', "E, grave accent"},
00063         {"Euml",        203,    'C', "E, dieresis or umlaut mark"},
00064         {"Iacute",      205,    'C', "I, acute accent"},
00065         {"Icirc",       206,    'C', "I, circumflex accent"},
00066         {"Igrave",      204,    'C', "I, grave accent"},
00067         {"Iuml",        207,    'C', "I, dieresis or umlaut mark"},
00068         {"Ntilde",      209,    'C', "N, tilde"},
00069         {"Oacute",      211,    'C', "O, acute accent"},
00070         {"Ocirc",       212,    'C', "O, circumflex accent"},
00071         {"Ograve",      210,    'C', "O, grave accent"},
00072         {"Oslash",      216,    'C', "O, slash"},
00073         {"Otilde",      213,    'C', "O, tilde"},
00074         {"Ouml",        214,    'C', "O, dieresis or umlaut mark"},
00075         {"THORN",       222,    'C', "THORN, Icelandic"},
00076         {"Uacute",      218,    'C', "U, acute accent"},
00077         {"Ucirc",       219,    'C', "U, circumflex accent"},
00078         {"Ugrave",      217,    'C', "U, grave accent"},
00079         {"Uuml",        220,    'C', "U, dieresis or umlaut mark"},
00080         {"Yacute",      221,    'C', "Y, acute accent"},
00081         {"aacute",      225,    's', "a, acute accent"},
00082         {"acirc",       226,    's', "a, circumflex accent"},
00083         {"aelig",       230,    's', "ae diphthong (ligature)"},
00084         {"agrave",      224,    's', "a, grave accent"},
00085         {"aring",       229,    's', "a, ring"},
00086         {"atilde",      227,    's', "a, tilde"},
00087         {"auml",        228,    's', "a, dieresis or umlaut mark"},
00088         {"ccedil",      231,    's', "c, cedilla"},
00089         {"eacute",      233,    's', "e, acute accent"},
00090         {"ecirc",       234,    's', "e, circumflex accent"},
00091         {"egrave",      232,    's', "e, grave accent"},
00092         {"eth" ,        240,    's', "eth, Icelandic"},
00093         {"euml",        235,    's', "e, dieresis or umlaut mark"},
00094         {"iacute",      237,    's', "i, acute accent"},
00095         {"icirc",       238,    's', "i, circumflex accent"},
00096         {"igrave",      236,    's', "i, grave accent"},
00097         {"iuml",        239,    's', "i, dieresis or umlaut mark"},
00098         {"ntilde",      241,    's', "n, tilde"},
00099         {"oacute",      243,    's', "o, acute accent"},
00100         {"ocirc",       244,    's', "o, circumflex accent"},
00101         {"ograve",      242,    's', "o, grave accent"},
00102         {"oslash",      248,    's', "o, slash"},
00103         {"otilde",      245,    's', "o, tilde"},
00104         {"ouml",        246,    's', "o, dieresis or umlaut mark"},
00105         {"szlig",       223,    's', "@sharp s, German (sz ligature)"},
00106         {"thorn",       254,    's', "thorn, Icelandic"},
00107         {"uacute",      250,    's', "u, acute accent"},
00108         {"ucirc",       251,    's', "u, circumflex accent"},
00109         {"ugrave",      249,    's', "u, grave accent"},
00110         {"uuml",        252,    's', "u, dieresis or umlaut mark"},
00111         {"yacute",      253,    's', "y, acute"},
00112         {"yuml",        255,    's', "y, dieresis or umlaut mark"},
00113         {nil, 0, '\0', nil}};
00114 ////////////////////////////////////////////////////////////
00115 gHtmlString::gHtmlString (char* s, bool aIgnoreAmp)
00116     : lastOpError( 0 ),
00117       doIgnoreAmp( aIgnoreAmp )
00118 {
00119  thisFillUpTables();
00120  ConvertToHString( s );
00121 }
00122 
00123 gHtmlString::~gHtmlString ()
00124 {
00125 }
00126 
00127 char* gHtmlString::AsciiToString (t_uchar v)
00128 {
00129  switch ( v ) {
00130  case '\n':
00131      return "<BR>\n";
00132  case '<':
00133      return thisStrSymbolAmp("lt");
00134  case '>':
00135      return thisStrSymbolAmp("gt");
00136  case '&':
00137      return thisStrSymbolAmp("amp");
00138  case '"':
00139      return thisStrSymbolAmp("quot");
00140  case '\t':
00141      return nil;
00142  case 127:
00143      lastOpError = 32;
00144      return nil;
00145  default:
00146      break;
00147  }
00148  if ( v<' ' ) return nil;
00149  if ( v<127 ) {
00150      sEntStr.SetEmpty(); sEntStr.Add( v );
00151      return sEntStr.Str();
00152  }
00153  return thisSymbolString( (t_uint32)v );
00154 }
00155 
00156 int gHtmlString::ConvertToHString (char* s)
00157 {
00158  // E.g.: "CA~O"(C,A-tilde,O) into "C&Atilde;O"
00159  // (this word: dog in portuguese)
00160  // If doIgnoreAmp=false (default), then
00161  // upon receiving a string like "C&Atilde;O",
00162  // it will be expanded exactly as is.
00163 
00164  lastOpError = 0;
00165  sOriStr.SetEmpty();
00166  Delete();  // Completely deletes the list
00167  if ( s==nil ) return 0;
00168 
00169  sOriStr.Set( s );
00170 
00171  if ( doIgnoreAmp )
00172      return thisConvertToHStringIgnoreAmp( s );
00173 
00174  gString sResult;
00175  lastOpError = ConvertAmpToAscii( s, sResult );
00176  DBGPRINT_MIN("DBG: gHtmlString:[%s|%s] OpError=%d\n",s,sResult.Str(),lastOpError);
00177  if ( lastOpError!=0 ) sResult.Set( s );
00178  // Check: not kept lastOpError
00179  return thisConvertToHStringIgnoreAmp( sResult.Str() );
00180 }
00181 
00182 int gHtmlString::ConvertAmpToAscii (char* s, gString& sResult)
00183 {
00184  int error, resError=0;
00185  unsigned i, pos, len;
00186  t_uchar uChr;
00187 
00188  ASSERTION(s!=nil,"s!=nil");
00189  sResult.SetEmpty();
00190 
00191  gString sIn( s );
00192  for (i=1, len=sIn.Length(); i<=len; i++) {
00193      uChr = sIn[i];
00194      if ( uChr=='&' ) {
00195          pos = gStrControl::Self().Find( s+i, ";" );
00196          if ( pos==0 ) return 64;  // &XYZ, but no ';'
00197          gString sTemp;
00198          sTemp.CopyFromTo( sIn, i, i+pos );
00199          i += pos;
00200          error = ConvertHToAscii( sTemp.Str(), uChr );
00201          ASSERTION(error>=0,"error>=0");
00202          if ( error!=0 ) {
00203              resError = 65;  // &XYZ; but XYZ not known!
00204              continue;
00205          }
00206      }
00207      sResult.Add( uChr );
00208  }
00209  return resError;
00210 }
00211 
00212 int gHtmlString::ConvertHToAscii (char* sAmp, t_uchar& cRes)
00213 {
00214  // Return -1 if "&XYZ;" format is not present;
00215  // 0 if all ok; or other >0 error when not found "XYZ"
00216  unsigned len;
00217  ASSERTION(sAmp!=nil,"sAmp!=nil");
00218  cRes = 0;
00219  if ( sAmp[0]!='&' ) return -1;
00220  gString s( sAmp+1 );
00221  if ( s[len = s.Length()]!=';' ) return -1;
00222  if ( len<=2 ) return 1;
00223  gString sIn;
00224  sIn.CopyFromTo( s, 1, len-1 );
00225  short tblIdx;
00226  unsigned idx;
00227  return thisConvertHToAscii( sIn, tblIdx, idx, cRes )!=0;
00228 }
00229 
00230 int gHtmlString::thisConvertToHStringIgnoreAmp (char* s)
00231 {
00232  char* str;
00233  t_uchar v;
00234  int error=0, resError=0;
00235 
00236  for ( ; (v = (t_uchar)s[0])!=0; s++) {
00237      str = AsciiToString( v );
00238      error = lastOpError!=0 || str==nil;
00239      if ( error==0 )
00240          Add( str );
00241      else
00242          resError = error;
00243  }
00244  return resError;
00245 }
00246 
00247 char* gHtmlString::thisSymbolString (t_uint32 val)
00248 {
00249  unsigned idx;
00250  if ( val<vTblSymUsuMin && val<vTblSymIsoL1Min ) {
00251      lastOpError = 8;  // No usual symbol, nor within ISO chars
00252      return nil;
00253  }
00254  if ( val>vTblSymIsoL1Max ) {
00255      lastOpError = 9;  // No usual symbol, nor within ISO chars, ASCII too high
00256      return nil;
00257  }
00258  if ( val>=vTblSymIsoL1Min ) {
00259      return thisStrSymbolAmp( thisFindSymbolISO( 1/*As in 8859-1*/, val, nTblSymIsoLatin1, idx ) );
00260  }
00261  if ( val>=vTblSymUsuMin && val<vTblSymUsuMin+(t_uint32)nTblSymUsu ) {
00262      val -= vTblSymUsuMin;
00263      return thisStrSymbolAmp( tblSymUsu[val].sName );
00264  }
00265  return nil;
00266 }
00267 
00268 char* gHtmlString::thisFindSymbolISO (short tblIdx,
00269                                       t_uint32 val,
00270                                       unsigned nEntries,
00271                                       unsigned& idx)
00272 {
00273  t_uint32 vAscii;
00274  for (idx=0; idx<nEntries; idx++) {
00275      // switch ( tblIdx ) -<- optimized, removed
00276      // sHtmlEntSymbol* pSym = &tblSymIsoLatin1[idx] ...
00277      vAscii = tblSymIsoLatin1[idx].vAscii;
00278      if ( vAscii==val ) return tblSymIsoLatin1[idx].sName;
00279  }
00280  idx = MAX_UINT16_U;  // Or zero
00281  return nil;
00282 }
00283 
00284 int gHtmlString::thisConvertHToAscii (gString& s,
00285                                       short& tblIdx,
00286                                       unsigned& idx,
00287                                       t_uchar& cRes)
00288 {
00289  // Input 's' is not "&XYZ;" but "XYZ" only; return 0 iff ok
00290  // Return 1 if out-of-bounds (ASCII>=256), 2 on other error
00291  char* str = s.Str();
00292  t_uint32 vAscii;
00293 
00294  cRes = 0;
00295 
00296  // Check first whether is a regular "amp", "lt", ...
00297  if ( strcmp(str,"amp")==0 ) {
00298      cRes = '&';
00299      return 0;
00300  }
00301  if ( strcmp(str,"lt")==0 ) {
00302      cRes = '<';
00303      return 0;
00304  }
00305  if ( strcmp(str,"gt")==0 ) {
00306      cRes = '>';
00307      return 0;
00308  }
00309  if ( strcmp(str,"quot")==0 ) {
00310      cRes = '"';
00311      return 0;
00312  }
00313  // Maybe in TblSymUsu or in Latin1
00314  for (idx=0, tblIdx=0; idx<nTblSymUsu; idx++) {
00315      if ( strcmp(str,tblSymUsu[idx].sName)==0 ) {
00316          cRes = (t_uchar)(vAscii = tblSymUsu[idx].vAscii);
00317          return vAscii>=256;
00318      }
00319  }
00320  for (idx=0, tblIdx=1; idx<nTblSymIsoLatin1; idx++) {
00321      if ( strcmp(str,tblSymIsoLatin1[idx].sName)==0 ) {
00322          cRes = (t_uchar)(vAscii = tblSymIsoLatin1[idx].vAscii);
00323          return vAscii>=256;
00324      }
00325  }
00326  tblIdx = -1;
00327  return 2;
00328 }
00329 
00330 char* gHtmlString::thisStrSymbolAmp (char* s)
00331 {
00332  sEntStr.SetEmpty();
00333  if ( s==nil ) {
00334      lastOpError = 16;
00335      return nil;
00336  }
00337  sEntStr.Add( '&' );
00338  sEntStr.Add( s );
00339  sEntStr.Add( ';' );
00340  return sEntStr.Str();
00341 }
00342 
00343 int gHtmlString::thisFillUpTables ()
00344 {
00345  unsigned idx;
00346  t_uint32 v, vMin=(t_uint32)MAX_UINT16_U, vMax=0;
00347 
00348  if ( nTblSymUsu>0 ) return 0; // Already calculate
00349 
00350  // Usual symbols
00351  for (idx=0; (v = tblSymUsu[idx].vAscii)!=0; idx++) {
00352      if ( idx==0 ) vTblSymUsuMin = v;
00353      nTblSymUsu++;
00354  }
00355  for (idx=0; (v = tblSymIsoLatin1[idx].vAscii)!=0; idx++) {
00356      nTblSymIsoLatin1++;
00357      if ( v<vMin ) vMin = v;
00358      if ( v>vMax ) vMax = v;
00359  }
00360  vTblSymIsoL1Min = vMin;
00361  vTblSymIsoL1Max = vMax;
00362  return 0;
00363 }
00364 ////////////////////////////////////////////////////////////
00365 

Generated on Sat Aug 18 02:40:52 2007 for xpfweb_v2x lib by  doxygen 1.4.2