00001
00002
00003 #include <string.h>
00004 #include "gweb.h"
00005 #include "gstringext.h"
00006
00007
00008
00009
00010
00011
00012
00013 gIntCoord::gIntCoord ()
00014 : iAux( 0 ),
00015 y( 0 ),
00016 z( 0 )
00017 {
00018 }
00019
00020 gIntCoord::gIntCoord (int ax, int ay)
00021 : gInt( ax ),
00022 iAux( 0 ),
00023 y( ay ),
00024 z( 0 )
00025 {
00026 }
00027
00028 gIntCoord::gIntCoord (int ax, int ay, int az)
00029 : gInt( ax ),
00030 iAux( 0 ),
00031 y( ay ),
00032 z( az )
00033 {
00034 }
00035
00036 gStorage* gIntCoord::NewObject ()
00037 {
00038 gIntCoord* a = new gIntCoord( c, y, z );
00039 return a;
00040 }
00041
00042 t_uchar* gIntCoord::ToString (t_uchar* uBuf)
00043 {
00044 if ( uBuf==nil ) return nil;
00045 if ( z!=0 )
00046 sprintf( (char*)uBuf, "[%d,%d,%d]", c, y, z );
00047 else
00048 sprintf( (char*)uBuf, "[%d,%d]", c, y );
00049 return uBuf;
00050 }
00051
00052 gIntCoord& gIntCoord::operator= (gIntCoord& copy)
00053 {
00054 iAux = copy.iAux;
00055 c = copy.GetX();
00056 y = copy.GetY();
00057 z = copy.GetZ();
00058 return *this;
00059 }
00060
00061 void gIntCoord::Show (bool doShowAll)
00062 {
00063 printf("[%d,%d]%s",c,y,doShowAll?"\n":"\0");
00064 }
00065
00066 gTagCoord::gTagCoord ()
00067 : opId( 0 ),
00068 depth( 0 )
00069 {
00070 }
00071
00072 gTagCoord::~gTagCoord ()
00073 {
00074 }
00075
00076 void gTagCoord::Reset ()
00077 {
00078 opId = 0;
00079 depth = 0;
00080 gIntCoord::Reset();
00081 }
00082
00083 bool gTagCoord::SetZ (int az)
00084 {
00085 bool assignOk = az>GetY();
00086 ASSERTION(assignOk,"assignOk");
00087 return gIntCoord::SetZ( az ) && assignOk;
00088 }
00089
00090 bool sCoordText::IsOk ()
00091 {
00092 bool isOk = coordL.N()==textL.N();
00093 ASSERTION(isOk,"coordL.N()==textL.N()");
00094 return isOk;
00095 }
00096
00097 unsigned sCoordText::N ()
00098 {
00099 if ( IsOk()==false ) return 0;
00100 return coordL.N();
00101 }
00102
00103 char* sCoordText::Str (unsigned idx)
00104 {
00105 return textL.Str( idx );
00106 }
00107
00108 gIntCoord& sCoordText::GetCoord (unsigned idx)
00109 {
00110 gStorage* inObj = coordL.GetObjectPtr( idx );
00111 gIntCoord* pairPtr = (gIntCoord*)inObj;
00112 ASSERTION(pairPtr!=nil,"pairPtr!=nil");
00113 return *pairPtr;
00114 }
00115
00116 unsigned sCoordText::FindCoordX (int x)
00117 {
00118 gList foundL, foundTxtL;
00119 return FindCoordX( x, foundL, foundTxtL );
00120 }
00121
00122 unsigned sCoordText::FindCoordX (int x, gList& foundL, gList& foundTxtL)
00123 {
00124 unsigned idx, firstIdx=0, n = N();
00125 int thisX, y, z;
00126
00127 for (idx=1; idx<=n; idx++) {
00128 thisX = GetCoord( idx ).GetX();
00129 if ( thisX==x ) {
00130 if ( firstIdx==0 ) firstIdx = idx;
00131 y = GetCoord( idx ).GetY();
00132 z = GetCoord( idx ).GetZ();
00133 AddCoordToList( x, y, z, foundL );
00134 foundTxtL.Add( textL.Str( idx ) );
00135 }
00136 }
00137 return firstIdx;
00138 }
00139
00140 bool sCoordText::Add (int x, int y, char* s)
00141 {
00142 ASSERTION(s!=nil,"s!=nil");
00143 gString aS( s );
00144 return Add( x, y, 0, aS );
00145 }
00146
00147 bool sCoordText::Add (int x, int y, int z, gString& s)
00148 {
00149 gString sTrim( s );
00150 sTrim.Trim();
00151 if ( sTrim.IsEmpty() ) return false;
00152
00153 #ifdef DEBUG
00154 if ( z==0 ) {
00155 printf("DBG:AddTxt(%d,%d):'%s' as '%s'\n",x,y,s.Str(),sTrim.Str());
00156 }
00157 else {
00158 printf("DBG:AddCmd(%d,%d):%s!\n",x,y,s.Str());
00159 }
00160 #endif //DEBUG
00161
00162 AddCoord( x, y, z );
00163 AddText( sTrim.Str() );
00164 return true;
00165 }
00166
00167 bool sCoordText::Add (int x, int y, int z, char* s)
00168 {
00169 ASSERTION(s!=nil,"s!=nil");
00170 gString aS( s );
00171 return Add( x, y, z, aS );
00172 }
00173
00174 bool sCoordText::AddCoord (int x, int y, int z)
00175 {
00176 return AddCoordToList( x, y, z, coordL );
00177 }
00178
00179 bool sCoordText::AddCoordToList (int x, int y, int z, gList& resL)
00180 {
00181 bool isOk;
00182 gIntCoord* newObj = new gIntCoord( x, y, z );
00183 ASSERTION(newObj!=nil,"newObj!=nil");
00184 isOk = IsOk();
00185 resL.AppendObject( newObj );
00186 return isOk;
00187 }
00188
00189 bool sCoordText::AddText (gString& s)
00190 {
00191 return AddText( s.Str() );
00192 }
00193
00194 bool sCoordText::AddText (char* s)
00195 {
00196 if ( s==nil ) return false;
00197 textL.Add( s );
00198 return true;
00199 }
00200
00201 void sCoordText::Delete ()
00202 {
00203 coordL.Delete();
00204 textL.Delete();
00205 }
00206
00207 void sCoordText::Report (FILE* fRep, bool doShowAll)
00208 {
00209 unsigned i, n = N();
00210
00211 if ( fRep==nil ) fRep = stdout;
00212 for (i=1; i<=n; i++) {
00213 fprintf(fRep,"[%s%d,%s%d]:%s\n",
00214 doShowAll?"Line:":"\0",
00215 GetCoord( i ).GetX(),
00216 doShowAll?"Col:":":",
00217 GetCoord( i ).GetY(),
00218 Str( i ));
00219 }
00220 }
00221
00222 gUnweb::gUnweb (char* fName, bool doVerbose)
00223 : gFileFetch( fName, -1, doVerbose ),
00224 firstHtmlError( 0 ),
00225 firstHtmlErrLNr( 0 ),
00226 lastHtmlError( 0 ),
00227 lastHtmlErrLNr( 0 ),
00228 nHtmlErrors( 0 ),
00229 showKind( e_ShowAll ),
00230 isVerbose( doVerbose ),
00231 tagCaseCh( e_CaseUnchange ),
00232 scriptLevel( -9 ),
00233 cCHR_HTM_PARSED( cCHR_HTM_PARSED_DEFAULT ),
00234 cCHR_HTM_INFO( cCHR_HTM_INFO_DEFAULT )
00235 {
00236
00237 SetChrHtmParsed( cCHR_HTM_PARSED );
00238 }
00239
00240 gUnweb::gUnweb (gString& sInput, bool doVerbose)
00241 : gFileFetch( sInput, doVerbose ),
00242 firstHtmlError( 0 ),
00243 firstHtmlErrLNr( 0 ),
00244 lastHtmlError( 0 ),
00245 lastHtmlErrLNr( 0 ),
00246 nHtmlErrors( 0 ),
00247 showKind( e_ShowAll ),
00248 isVerbose( doVerbose ),
00249 tagCaseCh( e_CaseUnchange ),
00250 scriptLevel( -9 ),
00251 cCHR_HTM_PARSED( cCHR_HTM_PARSED_DEFAULT ),
00252 cCHR_HTM_INFO( cCHR_HTM_INFO_DEFAULT )
00253 {
00254 SetChrHtmParsed( cCHR_HTM_PARSED );
00255 }
00256
00257 gUnweb::~gUnweb ()
00258 {
00259 }
00260
00261 FILE* gUnweb::VerboseStream ()
00262 {
00263 if ( isVerbose==false ) return nil;
00264 ASSERTION(fVRepErr!=nil,"fVRepErr!=nil");
00265 return fVRepErr;
00266 }
00267
00268 bool gUnweb::IsBufferOk ()
00269 {
00270 return IsOpened() && gFileFetch::IsBufferOk();
00271 }
00272
00273 char* gUnweb::Str (unsigned idx)
00274 {
00275 return
00276 HasProcessed() ? coordSerial.Str( idx ) : aL.Str( idx );
00277 }
00278
00279 void gUnweb::SetChrHtmParsed (t_uchar c)
00280 {
00281 cCHR_HTM_PARSED = c;
00282 sCHR_HTM_PARSED[0] = (char)c;
00283 sCHR_HTM_PARSED[1] = 0;
00284 }
00285
00286 unsigned gUnweb::FindStringKey (gString& s, char* sub, unsigned startPos, unsigned endPos)
00287 {
00288 unsigned k, uLen = s.Length();
00289 short quoting = 0;
00290 t_uchar uChr;
00291 char* str;
00292
00293 ASSERTION(sub!=nil,"sub!=nil");
00294 if ( uLen==0 ) return 0;
00295
00296 ASSERTION(startPos>0,"startPos>0");
00297 ASSERTION(endPos>0,"endPos>0");
00298 ASSERTION(endPos<=uLen,"endPos<=uLen");
00299
00300 if ( sub[0]==0 ) return 0;
00301
00302 for (k=startPos, str=s.Str()+k-1; k<=endPos; k++, str++) {
00303 uChr = s[k];
00304 if ( uChr=='"' ) {
00305 quoting = quoting==0;
00306 }
00307 if ( quoting!=0 ) continue;
00308 if ( gStrControl::Self().Find( str, sub )==1 ) {
00309 return k;
00310 }
00311 }
00312 return 0;
00313 }
00314
00315 unsigned gUnweb::FindStringKey (gString& s, char* sub)
00316 {
00317 return FindStringKey( s, sub, 1, s.Length() );
00318 }
00319
00320 unsigned gUnweb::FindStringKey (char* s, char* sub, unsigned startPos, unsigned endPos)
00321 {
00322 ASSERTION(s!=nil,"s!=nil");
00323 gString aS( s );
00324 return FindStringKey( aS, sub, startPos, endPos );
00325 }
00326
00327 unsigned gUnweb::FindStringKey (char* s, char* sub)
00328 {
00329 ASSERTION(s!=nil,"s!=nil");
00330 gString aS( s );
00331 return FindStringKey( aS, sub );
00332 }
00333
00334 int gUnweb::Dump (FILE* fRepErr)
00335 {
00336 int error;
00337 gList headL;
00338
00339 error = thisPreProcess( fRepErr, aL, headL );
00340 if ( error<0 ) return 0;
00341 if ( error!=0 ) {
00342 if ( isVerbose ) fprintf(VerboseStream(),"Error pre-processing HTML: error-code: %d.\n",error);
00343 return error;
00344 }
00345
00346 gList oL;
00347 error = thisPostProcess( fRepErr, headL, stripL, oL, coordSerial );
00348 if ( error!=0 ) return error;
00349
00350 DBGPRINT_WEB("thisPostProcess: DONE\n");
00351
00352 #ifdef DEBUG_WEB
00353 unsigned ix;
00354 for (ix=1; ix<=coordSerial.N(); ix++) {
00355 unsigned eX=coordSerial.GetCoord( ix ).GetX();
00356 printf("WEB: ix=%u (%u): %s\n",ix,eX,Str(ix));
00357 }
00358
00359
00360 #endif //DEBUG_WEB
00361
00362
00363
00364
00365 thisConsolidateData( stripL, 1, stripL.N(), 2 );
00366
00367 if ( isVerbose ) fprintf(VerboseStream(),"gUnweb::Post-Consolidate (%u lines)\n",coordSerial.N());
00368
00369 thisConsolidateDataCoord( coordParseErr, coordSerial );
00370
00371 if ( isVerbose ) fprintf(VerboseStream(),"gUnweb::Post-Consolidate-check (%u error(s))\n",coordParseErr.N());
00372
00373 #ifdef DEBUG
00374 if ( 1 ) {
00375 printf("DBG:{\n");
00376 for (short dbgIter=1, dbgAll=1; dbgIter<=(short)coordSerial.N(); dbgIter++) {
00377 printf("[%d,%d:%3d]:%s%s%s\n",
00378 coordSerial.GetCoord( dbgIter ).GetX(),
00379 coordSerial.GetCoord( dbgIter ).GetY(),
00380 coordSerial.GetCoord( dbgIter ).GetZ(),
00381 dbgAll?"'":"\0",
00382 coordSerial.Str( dbgIter ),
00383 dbgAll?"'":"\0");
00384 }
00385 printf("DBG:}\n");
00386 if ( coordParseErr.N()>0 ) {
00387 printf("DBG:TXT-ERRORS:START:");
00388 coordParseErr.Report(stdout,true);
00389 printf("DBG:TXT-ERRORS:END.\n");
00390 }
00391 }
00392 #endif //DEBUG
00393
00394 thisCutCommentsInTxt( coordComments, coordSerial );
00395
00396 return nHtmlErrors>0;
00397 }
00398
00399 bool gUnweb::SetVerboseStream (FILE* aFRepErr)
00400 {
00401 fVRepErr = aFRepErr;
00402 isVerbose = fVRepErr!=nil;
00403 return isVerbose;
00404 }
00405
00406 int gUnweb::thisConsolidateData (gList& ioL, unsigned lowL, unsigned highL, short step)
00407 {
00408 unsigned i;
00409 unsigned kIter, kCode;
00410 unsigned kLen;
00411 char* str;
00412
00413 for (i=lowL; i<=highL; i++) {
00414 str = ioL.Str( i );
00415 kLen = (unsigned)strlen( str );
00416 for (kIter=0; kIter<kLen; kIter++) {
00417 kCode = (t_uchar)str[kIter];
00418 switch ( step ) {
00419 case 1:
00420 case 2:
00421 if ( kCode==cCHR_HTM_PARSED )
00422 str[kIter] = ' ';
00423 break;
00424 default:
00425 break;
00426 }
00427 }
00428 }
00429
00430 switch ( step ) {
00431 case 2:
00432
00433 #ifdef DEBUG
00434 for (i=lowL; i<=highL; i++) {
00435 gString sTemp( ioL.Str( i ) );
00436 sTemp.TrimRight();
00437 if ( sTemp.Length()>0 ) printf("DBG %3d:@@ %s\n",i,sTemp.Str());
00438 }
00439 #endif //DEBUG
00440 break;
00441 default:
00442 break;
00443 }
00444 return 0;
00445 }
00446
00447 int gUnweb::thisConsolidateDataCoord (sCoordText& cErr, sCoordText& coord)
00448 {
00449 unsigned i, k, n=coord.N();
00450 unsigned kLen;
00451 unsigned pos, posEnd;
00452 unsigned x, y;
00453 char* str;
00454 short levelScript=0;
00455 bool hasSomeScript;
00456
00457 for (i=1; i<=n; i++) {
00458 short quoting = 0, quoting2 = 0;
00459 short notOkCount = 0, doesEnd = 0;
00460 t_uchar uChr;
00461 gString sCmd( str = coord.Str( i ) );
00462 gString sCmdUp( str );
00463 sCmdUp.UpString();
00464 x = coord.GetCoord(i).GetX();
00465 y = coord.GetCoord(i).GetY();
00466
00467 DBGPRINT_WEB("WEB: DataCoord(%u,%u): %s\n",x,y,str);
00468
00469 hasSomeScript = false;
00470 if ( (pos = sCmdUp.Find("<SCRIPT"))>0 ) {
00471 if ( levelScript!=0 ) {
00472 cErr.Add( x, y+pos-1, 126, "Script-Script tag, invalid nesting" );
00473 }
00474 levelScript = 1;
00475 hasSomeScript = true;
00476 }
00477 posEnd = sCmdUp.Find("</SCRIPT");
00478 if ( posEnd>pos ) {
00479 levelScript--;
00480 hasSomeScript = true;
00481 }
00482 if ( levelScript<0 ) {
00483 cErr.Add( x, y+posEnd-1, 127, "EndScript-EndScript tag, invalid nesting" );
00484 levelScript = 0;
00485 }
00486 hasSomeScript = hasSomeScript==true || levelScript>0;
00487
00488 if ( hasSomeScript ) continue;
00489
00490 if ( sCmd[1]!='<' ) continue;
00491
00492 kLen = sCmd.Length();
00493 uChr = sCmdUp[2];
00494 doesEnd = uChr=='/';
00495 if ( ( (uChr>='A' && uChr<='Z') || uChr=='!' || uChr=='?' || doesEnd!=0 )==false ) {
00496 if ( kLen>2 )
00497 cErr.Add( x, y+1, 123, "Strictly not a tag." );
00498 else
00499 cErr.Add( x, y+1, 133, "Strictly not a tag (empty)." );
00500 notOkCount++;
00501 }
00502
00503 if ( sCmd[ kLen ]!='>' ) {
00504
00505 cErr.Add( x, y, 116, "Unterminated tag." );
00506 continue;
00507 }
00508
00509 if ( notOkCount==0 ) {
00510
00511 gString sCmdRem;
00512 sCmdRem.CopyFromTo( sCmdUp, 2+doesEnd, kLen );
00513 k = sCmdRem.FindExcept("ABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789!?");
00514 ASSERTION(k>0,"k>0");
00515 uChr = sCmdRem[k];
00516 k += (unsigned)doesEnd;
00517 if ( k+1<kLen ) {
00518 if ( uChr!=' ' && uChr!='=' ) {
00519 cErr.Add( x, y+k, 124, "Not strictly valid character(s) in tag." );
00520 notOkCount++;
00521 }
00522 else {
00523 if ( doesEnd!=0 ) {
00524 cErr.Add( x, y+k, 125, "Not strictly valid character(s) in end-tag." );
00525 notOkCount++;
00526 }
00527 }
00528 }
00529
00530 }
00531
00532 for (k=2; k<=kLen && (uChr = sCmd[ k ])!=0; k++) {
00533 switch ( uChr ) {
00534 case '"':
00535 if ( quoting2==0 ) quoting = (quoting==0)*((short)k);
00536 break;
00537 case '\'':
00538 if ( quoting==0 ) quoting2 = (quoting2==0)*((short)k);
00539 break;
00540 default:
00541 if ( quoting!=0 || quoting2!=0 ) continue;
00542
00543 if ( uChr>=127 ) {
00544
00545
00546
00547 cErr.Add( x, y+k-1, 115, "Non 7bit symbol" );
00548 continue;
00549 }
00550 ;
00551 if ( uChr=='<' ) {
00552 cErr.Add( x, y+k-1, 121, "Too many '<'" );
00553 continue;
00554 }
00555 if ( k<kLen && uChr=='>' ) {
00556 cErr.Add( x, y+k-1, 122, "Too many '>'" );
00557 continue;
00558 }
00559
00560
00561
00562 switch ( tagCaseCh ) {
00563 case e_CaseUnchange:
00564 break;
00565 case e_CaseConvUpper:
00566 if ( uChr>='a' && uChr<='z' ) {
00567 uChr -= 32;
00568 }
00569 str[ k-1 ] = uChr;
00570 break;
00571 case e_CaseConvDown:
00572 break;
00573 default:
00574 break;
00575 }
00576 break;
00577 }
00578 }
00579 if ( quoting!=0 ) {
00580 cErr.Add( x, y+quoting-1, 117, "Unterminated double-quote" );
00581 }
00582 if ( quoting2!=0 ) {
00583 cErr.Add( x, y+quoting-1, 118, "Unterminated single-quote" );
00584 }
00585 }
00586
00587
00588 n = cErr.N();
00589 nHtmlErrors += n;
00590 if ( n>0 ) {
00591 firstHtmlErrLNr = cErr.GetCoord( 1 ).GetX();
00592 lastHtmlErrLNr = cErr.GetCoord( n ).GetX();
00593 }
00594 return 0;
00595 }
00596
00597 int gUnweb::thisCutCommentsInTxt (sCoordText& coordCmt, sCoordText& zRes)
00598 {
00599 unsigned i, nPost = zRes.N();
00600 unsigned idx;
00601 int iLen;
00602 int x, y, z;
00603 int y1;
00604 char* str;
00605
00606 if ( coordCmt.N()==0 ) return 0;
00607
00608 for (i=1; i<=nPost; i++) {
00609 z = zRes.GetCoord( i ).GetZ();
00610 if ( z!=0 ) continue;
00611 gList foundL, foundTxtL;
00612 x = zRes.GetCoord( i ).GetX();
00613 idx = coordCmt.FindCoordX( x, foundL, foundTxtL );
00614 if ( idx==0 ) continue;
00615 y = zRes.GetCoord( i ).GetY();
00616 unsigned ilIdx, ilN;
00617 int strIter, cmtIter, cmtIterMax;
00618 str = zRes.Str( i );
00619 iLen = strlen(str);
00620 for (ilIdx=1, ilN=foundL.N(); ilIdx<=ilN; ilIdx++) {
00621 gIntCoord* pCoord;
00622 pCoord = (gIntCoord*)foundL.GetObjectPtr( ilIdx );
00623 y1 = pCoord->GetY();
00624 if ( y1 < y+iLen ) {
00625 cmtIterMax = strlen( foundTxtL.Str( ilIdx ) );
00626 strIter = y1-y;
00627 if ( strIter<0 ) continue;
00628 for (cmtIter=0; cmtIter<cmtIterMax; cmtIter++) {
00629 ASSERTION(strIter<iLen,"strIter...");
00630 str[ strIter++ ] = cCHR_HTM_PARSED;;
00631
00632 }
00633 }
00634 }
00635 gString sTemp;
00636 for (strIter=0; strIter<iLen; strIter++) {
00637 t_uchar uChr( (t_uchar)str[ strIter ] );
00638 if ( uChr!=cCHR_HTM_PARSED ) sTemp.Add( uChr );
00639 }
00640 strcpy( str, sTemp.Str() );
00641 }
00642 return 0;
00643 }
00644
00645 int gUnweb::thisPreProcess (FILE* fRepErr, gList& iL, gList& oL)
00646 {
00647 ;
00648
00649
00650
00651
00652
00653 int error;
00654 int len;
00655 unsigned i, n;
00656 char* str;
00657
00658 if ( HasProcessed()==false ) return -1;
00659
00660
00661 firstHtmlError = firstHtmlErrLNr = 0;
00662 lastHtmlError = lastHtmlErrLNr = 0;
00663 nHtmlErrors = 0;
00664
00665
00666 oL.Delete();
00667
00668
00669 error = 0;
00670 for (i=1, n=iL.N(); i<=n; i++) {
00671 str = iL.Str( i );
00672
00673 while ( (len = strlen(str))>0 && str[--len]=='\r' ) str[len] = 0;
00674
00675 if ( gStrControl::Self().Find( str, cCHR_HTM_PARSED )>0 ) {
00676 fprintf(fRepErr,"Found chr 0x%02X on line: %u\n",cCHR_HTM_PARSED,i);
00677 error++;
00678 }
00679 if ( gStrControl::Self().Find( str, '\r' )>0 ) {
00680 fprintf(fRepErr,"Found chr 0x%02X on line: %u\n",'\r',i);
00681 error++;
00682 }
00683 gString sTrim( str );
00684 sTrim.TrimRight();
00685 stripL.Add( sTrim );
00686 }
00687
00688 if ( error!=0 ) return 1;
00689
00690 if ( isVerbose ) fprintf(VerboseStream(),"gUnweb:PreJoin (%u lines)\n",n);
00691
00692 error = thisPreJoin( fRepErr, iL, stripL, oL );
00693 if ( error!=0 ) return error;
00694
00695
00696 ASSERTION(iL.N()==stripL.N(),"iL.N()==stripL.N()");
00697
00698
00699
00700 error = thisPreStripComments( fRepErr, stripL, coordComments );
00701 if ( error!=0 ) return error;
00702
00703 if ( isVerbose ) fprintf(VerboseStream(),"gUnweb:StripComments (%u lines)\n",coordComments.N());
00704
00705
00706
00707
00708 thisConsolidateData( stripL, 1, n, 1 );
00709
00710 if ( isVerbose ) fprintf(VerboseStream(),"gUnweb:Post\n");
00711
00712 return error;
00713 }
00714
00715 int gUnweb::thisPreJoin (FILE* fRepErr, gList& iL, gList& sL, gList& oL)
00716 {
00717 unsigned i, n;
00718 unsigned uLen, pos, pos2;
00719 unsigned nOcc = 0;
00720 char* stripStr;
00721
00722
00723
00724
00725 for (i=1, n=iL.N(); i<=n; i++) {
00726 stripStr = sL.Str( i );
00727 gString s( stripStr );
00728 uLen = s.Length();
00729
00730 while ( (pos = s.Find("<!DOCTYPE ",nOcc,false))>0 ) {
00731
00732
00733
00734 gString sEnd;
00735 sEnd.CopyFromTo( s, pos, uLen );
00736 pos2 = sEnd.Find( ">" );
00737 if ( pos2>0 ) {
00738 pos2 += pos - 1;
00739
00740
00741
00742
00743 thisStrMove( s, pos, pos2, oL );
00744
00745
00746
00747
00748
00749
00750
00751 thisStripReplace( s, stripStr );
00752 continue;
00753 }
00754
00755
00756
00757
00758
00759 gString sCat, sLimp;
00760 thisStrSetParse( s, pos, uLen, sCat );
00761 thisStripReplace( s, stripStr );
00762 for (i=i+1, pos=0; i<=n && pos==0; i++) {
00763 stripStr = sL.Str( i );
00764 s = stripStr;
00765 uLen = s.Length();
00766 if ( uLen==0 ) continue;
00767 sCat.Add( " " );
00768 pos = s.Find( ">" );
00769 if ( pos==0 ) {
00770 gString sTrim( s );
00771 sTrim.Trim();
00772 sCat.AddString( sTrim );
00773 thisStrSetParse( s, pos, uLen, sLimp );
00774 thisStripReplace( s, stripStr );
00775 }
00776 }
00777 if ( pos==0 ) return 101;
00778
00779 thisStrSetParse( s, 1, pos, sLimp );
00780
00781 thisStripReplace( s, stripStr );
00782
00783 sCat.AddString( sLimp );
00784
00785 thisStrMove( sCat, 1, sCat.Length(), oL );
00786 }
00787 }
00788
00789 return 0;
00790 }
00791
00792 int gUnweb::thisPreStripComments (FILE* fRepErr, gList& iL, sCoordText& zRes)
00793 {
00794 unsigned i, n;
00795 unsigned k;
00796 unsigned uLen, pos, pos2;
00797 char* stripStr;
00798
00799
00800
00801 for (i=1, n=iL.N(); i<=n; ) {
00802 stripStr = iL.Str( i );
00803 uLen = (unsigned)strlen( stripStr );
00804 pos = gStrControl::Self().Find( stripStr, "<!--" );
00805 if ( pos==0 ) {
00806 i++;
00807 continue;
00808 }
00809 zRes.AddCoord( i, pos );
00810
00811 gString sCom;
00812
00813 pos--;
00814
00815
00816 pos2 = gStrControl::Self().Find( stripStr+pos, "-->" );
00817 if ( pos2>0 ) {
00818 pos2--;
00819 pos2 += pos;
00820
00821 for (k=pos; k<pos2+3; k++) {
00822
00823 sCom.Add( stripStr[k] );
00824 stripStr[k] = cCHR_HTM_PARSED;
00825 }
00826 zRes.AddText( sCom );
00827
00828 continue;
00829 }
00830
00831
00832 for (k=pos; k<uLen; k++) {
00833 sCom.Add( stripStr[k] );
00834 stripStr[k] = cCHR_HTM_PARSED;
00835 }
00836 sCom.Add( '\n' );
00837
00838 i++;
00839 for (pos2=0; i<=n && pos2==0; i++) {
00840 stripStr = iL.Str( i );
00841 uLen = (unsigned)strlen( stripStr );
00842 pos2 = gStrControl::Self().Find( stripStr, "-->" );
00843 if ( pos2==0 ) {
00844 sCom.Add( stripStr );
00845 sCom.Add( '\n' );
00846 for (k=0; k<uLen; k++) stripStr[k] = cCHR_HTM_PARSED;
00847 continue;
00848 }
00849 for (k=0; k+1<pos2+3; k++) {
00850
00851 sCom.Add( stripStr[k] );
00852 stripStr[k] = cCHR_HTM_PARSED;
00853 }
00854 }
00855 zRes.AddText( sCom );
00856 if ( pos2==0 ) return 102;
00857 }
00858 return 0;
00859 }
00860
00861 int gUnweb::thisStrMove (gString& s, unsigned startPos, unsigned endPos, gList& oL)
00862 {
00863 unsigned x;
00864 int count=0;
00865 gString sOut;
00866 bool anyScript=false;
00867
00868 sOut.CopyFromTo( s, startPos, endPos );
00869 sOut.Trim();
00870 gString sUp( sOut );
00871 sUp.UpString();
00872
00873 unsigned pos = sUp.Find("<SCRIPT");
00874 if ( (anyScript = (pos>0))==true ) {
00875 scriptLevel++;
00876 }
00877 else {
00878 pos = sUp.Find("</SCRIPT");
00879 scriptLevel -= pos>0;
00880 if ( scriptLevel<0 ) scriptLevel = 0;
00881 anyScript = scriptLevel>0;
00882 }
00883
00884
00885
00886
00887 for (x=startPos; x<=endPos; x++, count++) s[x] = cCHR_HTM_PARSED;
00888
00889
00890
00891
00892 if ( sOut.Length()==0 ) return 0;
00893 oL.Add( sOut );
00894
00895 return count;
00896 }
00897
00898 int gUnweb::thisStripReplace (gString& s, char* resStr)
00899 {
00900 unsigned uLen = s.Length();
00901 unsigned rLen;
00902
00903 if ( resStr==nil ) return -1;
00904 rLen = (unsigned)strlen( resStr );
00905 ASSERTION(uLen==rLen,"uLen==rLen");
00906 strcpy( resStr, s.Str() );
00907 return 0;
00908 }
00909
00910 int gUnweb::thisStrSetParse (gString& s, unsigned startPos, unsigned endPos, gString& sResult)
00911 {
00912 gList tempL;
00913 ASSERTION(endPos>0,"endPos>0");
00914 if ( thisStrMove( s, startPos, endPos, tempL )==0 ) return 0;
00915 sResult = tempL.Str( 1 );
00916 return 1;
00917 }
00918
00919
00920
00921
00922 int gUnweb::thisPostProcess (FILE* fRepErr, gList& headL, gList& iL, gList& oL, sCoordText& coord)
00923 {
00924 unsigned i, n;
00925 unsigned uLen, pos, pos2;
00926 unsigned posStart;
00927 char* stripStr;
00928 gString sLimp;
00929
00930 ASSERTION(coord.N()==0,"coord.N()==0");
00931 scriptLevel = 0;
00932
00933
00934 for (i=1, n=headL.N(); i<=n; i++) {
00935 oL.Add( stripStr = headL.Str( i ) );
00936 thisAddCmd( 1, 1, stripStr, coord );
00937 }
00938
00939
00940
00941
00942 for (i=1, n=iL.N(), posStart=1; i<=n; ) {
00943 stripStr = iL.Str( i );
00944 gString s( stripStr );
00945
00946 uLen = s.Length();
00947 if ( posStart>uLen ) {
00948 i++; posStart = 1;
00949 continue;
00950 }
00951
00952 ASSERTION(posStart>0,"posStart>0");
00953 pos = FindStringKey( s, "<", posStart, uLen );
00954 if ( pos==0 ) {
00955 thisAddTxt( i, posStart, stripStr+posStart-1, coord );
00956 thisStrMove( s, posStart, uLen, oL );
00957 thisStripReplace( s, stripStr );
00958 i++; posStart = 1;
00959 continue;
00960 }
00961
00962 ASSERTION(pos>=posStart,"pos>=posStart");
00963
00964
00965 if ( pos>posStart ) {
00966 gString sTemp;
00967 unsigned aKeepPos;
00968 if ( scriptLevel>0 ) {
00969 DBGPRINT_WEB("WEB: Post: XXX<tag>YYY [%s] scriptLevel=%d pos=%u,posStart=%u,len=%d\n",stripStr,scriptLevel,pos,posStart,strlen(stripStr));
00970
00971
00972 char* strTagEtc = stripStr+pos-1;
00973 if ( gStrControl::Self().Find( strTagEtc, "</SCRIPT", true )==1 ) {
00974 sTemp.CopyFromTo( s, posStart, pos+gStrControl::Self().Find( strTagEtc, ">" ) );
00975 DBGPRINT_WEB("WEB: Post: scriptLevel_now=%d [strTagEtc=%s] [%s]\n",scriptLevel,strTagEtc,sTemp.Str());
00976 pos--;
00977 thisAddTxt( i, posStart, sTemp, coord );
00978 thisStrMove( s, posStart, pos, oL );
00979 thisStripReplace( s, stripStr );
00980 DBGPRINT_WEB("WEB: Post: after1 <tag>YYY [%s] scriptLevel=%d pos=%u,posStart=%u\n",stripStr,scriptLevel,pos,posStart);
00981 posStart = pos+1;
00982 continue;
00983 }
00984 i++; posStart = 1;
00985 continue;
00986 }
00987 aKeepPos = pos;
00988 pos--;
00989 sTemp.CopyFromTo( s, posStart, pos );
00990 thisAddTxt( i, posStart, sTemp, coord );
00991 thisStrMove( s, posStart, pos, oL );
00992 thisStripReplace( s, stripStr );
00993 posStart = aKeepPos;
00994 continue;
00995 }
00996
00997
00998
00999
01000
01001
01002 pos2 = FindStringKey( s, ">", pos+1, uLen );
01003 DBGPRINT("FIND(%s):posStart=%d,uLen=%d:POS=%d, POS2=%d\n",stripStr,posStart,uLen,pos,pos2);
01004 if ( pos2>0 ) {
01005 gString sTemp;
01006 ASSERTION(pos2>pos,"pos2>pos");
01007 sTemp.CopyFromTo( s, pos, pos2 );
01008 if ( gStrControl::Self().Find( sTemp.Str(), "</SCRIPT", true ) ||
01009 scriptLevel>0 ) {
01010 DBGPRINT_WEB("DBG:/SCRIPT:sTemp=%s!\n",sTemp.Str());
01011 thisAddCmd( i, pos, sTemp, coord );
01012 thisStrMove( s, pos, pos2, oL );
01013 }
01014 else {
01015 thisAddCmd( i, pos, sTemp, coord );
01016 thisStrMove( s, pos, pos2, oL );
01017 thisStripReplace( s, stripStr );
01018 }
01019 posStart = pos2+1;
01020 continue;
01021 }
01022
01023
01024 ASSERTION(pos2==0,"pos2==0");
01025 gString sCmd;
01026 sCmd.CopyFromTo( s, pos, uLen );
01027
01028 DBGPRINT("DBG: sCmd=[%s]\n",sCmd.Str());
01029
01030 int iKeep = i;
01031
01032 i++; posStart = 1;
01033 for (pos2=0; i<=n && pos2==0; ) {
01034 char* stripStr = iL.Str( i );
01035 pos2 = FindStringKey( stripStr, ">" );
01036 gString s( stripStr );
01037 s.Trim();
01038 if ( s.IsEmpty() ) {
01039 i++;
01040 continue;
01041 }
01042 if ( pos2==0 ) {
01043 sCmd.Add( ' ' );
01044 sCmd.AddString( s );
01045 s.Set( stripStr );
01046 thisStrSetParse( s, 1, s.Length(), sLimp );
01047 thisStripReplace( s, stripStr );
01048 i++;
01049 }
01050 }
01051 if ( pos2==0 ) return 111;
01052
01053
01054 thisStrSetParse( s, pos, uLen, sLimp );
01055 thisStripReplace( s, stripStr );
01056
01057 s.Set( stripStr = iL.Str( i ) );
01058 sLimp.CopyFromTo( s, 1, pos2 );
01059 sLimp.Trim();
01060
01061 if ( FindStringKey( sLimp, ">" )>1 ) sCmd.Add(' ');
01062 sCmd.AddString( sLimp );
01063
01064 thisAddCmd( iKeep, pos, sCmd, coord );
01065 oL.Add( sCmd );
01066
01067 thisStrSetParse( s, 1, pos2, sLimp );
01068 thisStripReplace( s, stripStr );
01069
01070 posStart = pos2+1;
01071 }
01072 return 0;
01073 }
01074
01075 int gUnweb::thisAddTxt (int x, int y, char* s, sCoordText& coord)
01076 {
01077 if ( s==nil ) return -1;
01078 gString aS( s );
01079 return thisAddTxt( x, y, aS, coord );
01080 }
01081
01082 int gUnweb::thisAddTxt (int x, int y, gString& sTxt, sCoordText& coord)
01083 {
01084
01085 bool isOk;
01086 int iPos = (int)sTxt.FindExcept( " \t" ), iDelta;
01087
01088 iDelta = ( iPos>0 ? iPos-1 : 0 );
01089 y += iDelta;
01090
01091 if ( scriptLevel>0 && (showKind & e_ShowScript)==0 ) return 0;
01092 if ( scriptLevel<=0 && (showKind & e_ShowNonscript)==0 ) return 0;
01093
01094 isOk = coord.Add( x, y, sTxt );
01095 if ( isOk==false ) return -1;
01096 isOk = thisTxtCheck( x, y, sTxt, coordParseErr )==0;
01097 if ( isOk==false ) return -1;
01098
01099 unsigned n = coord.N();
01100 coord.GetCoord(n).iAux = iDelta;
01101
01102 return 0;
01103 }
01104
01105 int gUnweb::thisAddCmd (int x, int y, char* s, sCoordText& coord)
01106 {
01107 if ( s==nil ) return -1;
01108 gString aS( s );
01109 return thisAddCmd( x, y, aS, coord );
01110 }
01111
01112 int gUnweb::thisAddCmd (int x, int y, gString& sCmd, sCoordText& coord)
01113 {
01114 int z=-1;
01115 coord.Add( x, y, z, sCmd );
01116
01117 return z;
01118 }
01119
01120 int gUnweb::thisTxtCheck (int x, int y, gString& s, sCoordText& cErr)
01121 {
01122
01123 unsigned pos, posCount = 0;
01124
01125 if ( scriptLevel>0 ) return 0;
01126 pos = FindStringKey( s, "<" );
01127 posCount += pos;
01128 if ( pos>0 ) {
01129
01130 cErr.Add( x, y+pos-1, 105, "Invalid symbol: '<'" );
01131 }
01132 pos = FindStringKey( s, ">" );
01133 posCount += pos;
01134 if ( pos>0 ) {
01135
01136 cErr.Add( x, y+pos-1, 106, "Invalid symbol: '>'" );
01137 }
01138 return posCount==0 ? 0 : 1;
01139 }
01140
01141