// Copyright 2009, Gigablast Inc. // . runs a series of tests on a gigablast instance // . right now just performs injections to test parsing and indexing #undef _XOPEN_SOURCE #define _XOPEN_SOURCE 500 #include "gb-include.h" #include "Test.h" #include "Rdb.h" #include "Spider.h" #include "Msg1.h" #include "Datedb.h" #include "Pages.h" #include "PingServer.h" #include "Spider.h" #include "Process.h" #include "Placedb.h" #include "Threads.h" #include "Msge1.h" #include "Parms.h" //static void testWrapper ( int fd , void *state ) ; static void injectedWrapper ( void *state ) ; // the global class Test g_test; Test::Test() { m_urlBuf = NULL; m_isRunning = false; m_isAdding = false; m_urlsAdded = 0; m_urlsIndexed = 0; //m_spiderLinks = true;//false; m_bypassMenuElimination = false; // assume if they just turn spiders on we use this //m_testDir = "test-spider"; } // main.cpp calls g_repair.init() bool Test::init ( ) { m_isRunning = false; m_isAdding = false; m_urlsAdded = 0; m_urlsIndexed = 0; //if( ! g_loop.registerSleepCallback( 1 , NULL , testWrapper ) ) // return log("repair: Failed register callback."); // record current value //m_testSpiderEnabledSaved = g_conf.m_testSpiderEnabled; //m_testParserEnabledSaved = g_conf.m_testParserEnabled; return true; } void Test::reset ( ) { if ( m_urlBuf ) mfree ( m_urlBuf , m_urlEnd - m_urlBuf , "test999"); //m_spiderLinks = true;//false; m_bypassMenuElimination = false; } // . call this once every second // . this is responsible for advancing from one g_repairMode to the next //void testWrapper ( int fd , void *state ) { // // call it from the class // g_test.loop(); //} char *Test::getTestDir ( ) { // sanity if ( g_conf.m_testSpiderEnabled && g_conf.m_testParserEnabled ) { char *xx=NULL;*xx=0; } if ( g_conf.m_testSpiderEnabled ) return "test-spider"; if ( g_conf.m_testParserEnabled ) return "test-parser"; // default if they just turn on spiders (spiders on cmd) //return "test-spider"; //if ( ! m_testDir ) { char *xx=NULL;*xx=0; } char *xx=NULL;*xx=0; return NULL; } void Test::removeFiles ( ) { // reset m_errno = 0; if ( g_conf.m_testParserEnabled ) { // remove all old files for now to avoid system diffs log("test: removing old parse critical and run files from " "last run."); //system ("rm /home/mwells/gigablast/test/parse*.?.*" ); //system ("rm /home/mwells/gigablast/test/critical*.?.*" ); char sbuf[1024]; char *testDir = getTestDir(); sprintf(sbuf,"rm %s/%s/run.?.*" , g_hostdb.m_dir,testDir); system (sbuf); // use this one instead since rm doesn't always work sprintf(sbuf,"ls -1 %s/%s/ | grep parse | xargs --verbose " "-I xxx rm %s/%s/xxx" , g_hostdb.m_dir, testDir , g_hostdb.m_dir, testDir ); log("test: %s",sbuf); system(sbuf); sprintf(sbuf,"ls -1 %s/%s/ | grep critical | xargs --verbose " "-I xxx rm %s/%s/xxx" , g_hostdb.m_dir, testDir , g_hostdb.m_dir, testDir ); log("test: %s",sbuf); system(sbuf); } // do not crash for lack of quickpoll now long saved = g_conf.m_useQuickpoll; g_conf.m_useQuickpoll = false; CollectionRec *cr = g_collectiondb.getRec("qatest123"); // . reset the qatest collection to zero docs // . TODO: implement this. only allow it for qatest coll. // . kinda like Collectiondb::deleteRec() i guess but we need to // preserve the parms!! // . deletetagdb = false if ( cr ) g_collectiondb.resetColl2 ( cr->m_collnum , cr->m_collnum , true ); // reset event count //g_collectiondb.countEvents(); // turn it back on g_conf.m_useQuickpoll = saved; } // come here once per second i guess void Test::initTestRun ( ) { g_errno = 0; // . all hosts should have their g_conf.m_repairMode parm set // . it is global now, not collection based, since we need to // lock down titledb for the scan and there could be recs from // the collection we are repairing in titledb's rdbtree, which, // when dumped, would mess up our scan. if ( ! g_conf.m_testSpiderEnabled && ! g_conf.m_testParserEnabled ) { char *xx=NULL;*xx=0; } // if both enabled, core if ( g_conf.m_testSpiderEnabled && g_conf.m_testParserEnabled ) { char *xx=NULL;*xx=0; } // if the power went off if ( ! g_process.m_powerIsOn ) return; // return if currently running // no, admin can re-init even if running now //if ( m_isRunning ) { char *xx=NULL;*xx=0; }//return; // must be host #0 only if ( g_hostdb.m_myHost->m_hostId != 0 ) return; // if was initially in this mode, don't do anything //if ( m_testSpiderEnabledSaved ) return; //if ( m_testParserEnabledSaved ) return; // you must have the "qatest123" coll already setup! CollectionRec *cr = g_collectiondb.getRec("qatest123"); if ( ! cr ) { // note it log("test: please add a collection named \"test\" first."); // stop the test g_conf.m_testParserEnabled = false; g_conf.m_testSpiderEnabled = false; // all done return; } char *testDir = getTestDir(); // scan for file named "run.start.%li.txt" which is a dump of all // the conf and parms char filename[100]; File f; long i; for ( i = 0 ; i < 9999 ; i++ ) { // make filename. base it off working dir, g_hostdb.m_dir sprintf ( filename,"%s/%s/run.%li.collparms.txt", g_hostdb.m_dir,testDir,i ); // exist? f.set ( filename ); // open files long status = f.doesExist(); // error? if ( status == -1 ) { // note it in the log log("test: doesExist() returned -1"); // end the test g_conf.m_testParserEnabled = false; g_conf.m_testSpiderEnabled = false; // all done return; } // try next i if this one in use if ( status ) continue; // got one break; } // close it f.close(); // create the run.%li.version.txt file char cmd[1000]; char vfile[200]; sprintf(vfile,"%s/%s/run.%li.version.txt",g_hostdb.m_dir,testDir,i); sprintf(cmd, "%s/gb -v >& %s ; " "echo -n \"RUN START TIME: \" >> %s ; " "date >> %s", g_hostdb.m_dir,vfile, vfile, vfile); system(cmd); // save it m_runId = i; cr = g_collectiondb.getRec ( "qatest123" ); if ( ! cr ) { // and no more of this g_conf.m_testParserEnabled = false; g_conf.m_testSpiderEnabled = false; return; } // set these m_coll = cr->m_coll; // turn on spiders //cr->m_spideringEnabled = 1; // crap i guess this too!!! //g_conf.m_spideringEnabled = 1; // // log out the global parms // char fbuf[100]; // print our global parms into a file called run.%li.start.txt sprintf(fbuf,"%s/%s/run.%li.confparms.txt",g_hostdb.m_dir,testDir,i); // this saves it as xml i think g_parms.saveToXml ( (char *)&g_conf , fbuf , OBJ_CONF); // // log out the coll specific parms // // update name sprintf(fbuf,"%s/%s/run.%li.collparms.txt",g_hostdb.m_dir,testDir,i); // save that g_parms.saveToXml ( (char *)cr , fbuf , OBJ_COLL); // get the list of urls to download and inject in order sprintf(fbuf,"%s/%s/urls.txt",g_hostdb.m_dir,testDir); // set it f.set ( fbuf ) ; // read it in long fsize = f.getFileSize(); // add one for \0 termination long need = fsize + 1; // read it in char *buf = (char *)mmalloc ( need ,"qatest"); // error? if ( ! buf ) { // note it log("test: failed to alloc %li bytes for url buf",fsize); // disable testing g_conf.m_testParserEnabled = false; g_conf.m_testSpiderEnabled = false; // all done return; } // open it f.open ( O_RDONLY ); // read it in long rs = f.read ( buf , fsize , 0 ) ; // check it if ( rs != fsize ) { // note it log("test: failed to read %li bytes of urls.txt file",fsize); // disable testing g_conf.m_testParserEnabled = false; g_conf.m_testSpiderEnabled = false; // all done return; } // save it m_urlBuf = buf; // null term it just in case buf[need-1] = '\0'; // end of it, including the terminating \0 m_urlEnd = buf + need; // init url offset m_urlPtr = m_urlBuf; // reset just in case //m_spiderLinks = false; m_bypassMenuElimination = false; // first check for spiderlinks=1|true for ( char *p = m_urlBuf ; p < m_urlEnd ; p++ ) { //if ( p[0] != 's' ) continue; //if ( p[1] != 'p' ) continue; //if ( ! strncmp(p,"spiderlinks",11) ) // m_spiderLinks = true; //if ( ! strncmp(p,"bypassmenuelimination",21) ) // m_bypassMenuElimination = true; } // force max spiders to one because one page is often dependent // on the previous page! //if ( ! m_spiderLinks ) cr->m_maxNumSpiders = 1; // need to make it 6 since some priorities essentially lock the // ips up that have urls in higher priorities. i.e. once we dole // a url out for ip X, then if later we add a high priority url for // IP X it can't get spidered until the one that is doled does. //else cr->m_maxNumSpiders = 6; // . first space out all comments // . comments are nice because we know why the url is in urls.txt for ( char *p = m_urlBuf ; p < m_urlEnd ; p++ ) { // skip if not start of a comment line if ( *p != '#' ) continue; // if not preceeded by a \n or start, skip if ( p > m_urlBuf && *(p-1) != '\n' ) continue; // ok, nuke it for ( ; *p && *p !='\n' ; p++ ) *p = ' '; } // if we hit "\nSTOP\n" then white out that and all past it for ( char *p = m_urlBuf ; p < m_urlEnd ; p++ ) { // skip if not start of a comment line if ( *p != '\n' ) continue; // check it if ( strncmp(p,"\nSTOP\n",6) ) continue; // white out for ( ; *p ; p++ ) { // until we HIT RESUME if ( *p == '\n' && ! strncmp(p,"\nRESUME\n",8) ) { p[1] = ' '; p[2] = ' '; p[3] = ' '; p[4] = ' '; p[5] = ' '; p[6] = ' '; break; } *p = ' '; } // all done //break; } // then NULL terminate all urls by converting all white space to \0s for ( char *p = m_urlBuf ; p < m_urlEnd ; p++ ) // all non url chars to \0 if ( is_wspace_a(*p) ) *p = '\0'; // flag this m_isRunning = true; // and this m_isAdding = true; m_testStartTime = gettimeofdayInMilliseconds(); // set up dedup table m_dt.set ( 8,0,0,NULL,0,false,MAX_NICENESS,"testdedup"); // remove all old files for now to avoid system diffs log("test: beginning injection"); // . now inject each url in order, one at a time using msg7 i guess // . returns true if all done if ( ! injectLoop() ) return; // close it up //stopIt(); } // this should be called when all docs have finished spidering void Test::stopIt ( ) { // sanity if ( m_isAdding ) { char *xx=NULL;*xx=0; } // flag that we are done m_isRunning = false; // print time log("test: took %lli ms to complete injections.", gettimeofdayInMilliseconds() - m_testStartTime ); // get this before setting testParserEnabled to false char *testDir = g_test.getTestDir(); // turn this off now too g_conf.m_testParserEnabled = false; g_conf.m_testSpiderEnabled = false; // save all! bool disabled = g_threads.m_disabled; g_threads.disableThreads(); // save it blocking style g_process.save(); if ( ! disabled ) g_threads.enableThreads(); // save ips.txt saveTestBuf ( testDir ); log("test: test completed. making qa.html"); // // // NOW MAKE THE qa.html FILE // // // only analyze up to last 7 runs long start = m_runId - 7; if ( start < 0 ) start = 0; SafeBuf sb; sb.safePrintf("\n"); sb.safePrintf("" "" "" "" "" "\n"); // take diffs between this run and the last run for confparms for ( long i = m_runId ; i > start ; i-- ) { // shortcut char *dir = g_hostdb.m_dir; // make diff filename char diff1[200]; sprintf(diff1,"%s/%s/run.%li.confparms.txt.diff",dir, testDir,i); File f1; f1.set(diff1); if ( ! f1.doesExist() ) { char df1[200]; char df2[200]; sprintf(df1,"%s/%s/run.%li.confparms.txt",dir, testDir,i); sprintf(df2,"%s/%s/run.%li.confparms.txt",dir, testDir,i-1); // do the diff char cmd[600]; sprintf(cmd,"diff %s %s > %s",df1,df2,diff1); log("test: system(\"%s\")",cmd); system (cmd); } long fs1 = f1.getFileSize(); sb.safePrintf("", i,fs1); // make diff filename char diff2[200]; sprintf(diff2,"%s/%s/run.%li.collparms.txt.diff",dir, testDir,i); File f2; f2.set(diff2); if ( ! f2.doesExist() ) { char df1[200]; char df2[200]; sprintf(df1,"%s/%s/run.%li.collparms.txt",dir, testDir,i); sprintf(df2,"%s/%s/run.%li.collparms.txt",dir, testDir,i-1); // do the diff char cmd[600]; sprintf(cmd,"diff %s %s > %s",df1,df2,diff2); log("test: system(\"%s\")",cmd); system (cmd); } long fs2 = f2.getFileSize(); sb.safePrintf("", fs2); // the version char vf[200]; sprintf(vf,"%s/%s/run.%li.version.txt",dir,testDir,i); File f3; f3.set ( vf ); long fs3 = f3.getFileSize(); char vbuf[1000]; vbuf[0] = 0; if ( fs3 > 0 ) { f3.open(O_RDONLY); long rs = f3.read(vbuf,fs3,0); vbuf[fs3] = '\0'; if ( rs <= 0 ) continue; f3.close(); } // show it sb.safePrintf("\n", vbuf); } sb.safePrintf("
run idconf diffcoll diffrun info
%li%li%li
%s
\n"); sb.safePrintf("
\n"); // // now diff each parser output file for each url in urls.txt // // // loop over url buf first so we can print one table per url // char *next = NULL; // reset the url buf ptr m_urlPtr = m_urlBuf; // count em long count = 0; // ptrs to each url table long un = 0; long uptr [5000]; // offsets now, not char ptr since buf gets reallocd char udiff[5000]; long ulen [5000]; long uhits[5000]; // critical errors! validateOutput() choked! long uunchecked[5000]; // events/addresses found but were not validatd long umiss[5000]; long usort[5000]; long uevents[5000]; SafeBuf tmp; long niceness = MAX_NICENESS; // advance to next url for ( ; m_urlPtr < m_urlEnd ; m_urlPtr = next ) { // breathe QUICKPOLL(niceness); // we converted all non-url chars into \0's so skip those! for ( ; m_urlPtr= m_urlEnd ) break; // set this up next = m_urlPtr; // compute next url ptr for ( ; next < m_urlEnd && *next ; next++ ); // point to this url char *u = m_urlPtr; // get hash long long h = hash64 ( u , gbstrlen(u) ); // shortcut char *dir = g_hostdb.m_dir; // print into a secondary safe buf with a ptr to // it so we can sort that and transfer into the // primary safebuf later uptr[un] = tmp.length(); // assume no diff udiff[un] = 0; // print number tmp.safePrintf("%li) ",count++); // . link to our stored http server reply // . TODO: link it to our [cached] copy in the test coll!!! char local[1200]; sprintf(local,"/%s/doc.%llu.html",testDir,h); tmp.safePrintf("%s ",local,u); // link to live page tmp.safePrintf(" live ",u); // link to page parser char ubuf[2000]; urlEncode(ubuf,2000,u,gbstrlen(u),true); tmp.safePrintf(" parser ",ubuf); //tmp.safePrintf(" (%llu)",h); tmp.safePrintf("
\n"); //tmp.safePrintf("
\n"); tmp.safePrintf("\n"); tmp.safePrintf("" "" "" "" "" "" "" "" "" "\n"); //SafeBuf sd; // loop over all the runs now, starting with latest run first for ( long ri = m_runId ; ri >= start ; ri-- ) { QUICKPOLL(niceness); // the diff filename char pdiff[200]; sprintf(pdiff,"%s/%s/parse.%llu.%li.html.diff",dir, testDir,h,ri); File f; f.set(pdiff); long fs = f.getFileSize(); if ( ! f.doesExist() && ri > 0 ) { // make the parse filename char pbuf1[200]; char pbuf2[200]; sprintf(pbuf1,"%s/%s/parse.%llu.%li.html", dir,testDir,h,ri); sprintf(pbuf2,"%s/%s/parse.%llu.%li.html", dir,testDir,h,ri-1); // sanity check //File tf; tf.set(pbuf1); //if ( ! tf.doesExist()) {char *xx=NULL;*xx=0;} // tmp file name char tmp1[200]; char tmp2[200]; sprintf(tmp1,"%s/%s/t1.html",dir,testDir); sprintf(tmp2,"%s/%s/t2.html",dir,testDir); // filter first char cmd[600]; sprintf(cmd, "cat %s | " "grep -v \"\" " " > %s", pbuf1,tmp1); system(cmd); sprintf(cmd, "cat %s | " "grep -v \"\" " " > %s", pbuf2,tmp2); system(cmd); // make the system cmd to do the diff sprintf(cmd, "echo \"
\" > %s ; "
					"diff -w --text %s %s "
					// ignore this table header row
					//" | grep -v \"R#4\""
					" >> %s",
					pdiff,
					tmp1,tmp2,pdiff);
				log("test: system(\"%s\")",cmd);
				system(cmd);
				// try again
				f.set(pdiff);
				fs = f.getFileSize();
			}

			QUICKPOLL(niceness);

			// this means 0 . it just has the 
 tag in it!
			if ( fs < 0 || fs == 6 ) fs = 0;
			// . if no diff and NOT current run, do not print it
			// . print it if the run right before the current 
			//   now always too
			if ( ri != m_runId && ri != m_runId-1 && fs == 0 ) 
				continue;
			// relative filename
			char rel[200];
			sprintf(rel,"/%s/parse.%llu.%li.html.diff",
				testDir,h,ri);
			char full[200];
			sprintf(full,"/%s/parse.%llu.%li.html",
				testDir,h,ri);
			char validate[200];
			sprintf(validate,
				"/%s/parse-shortdisplay.%llu.%li.html",
				testDir,h,ri);
			// use red font for current run that has a diff!
			char *t1 = "";
			char *t2 = "";
			if ( ri == m_runId && fs != 0 ) {
				t1 = "";
				t2 = "";
				// a diff
				udiff[un] = 1;
			}

			// . get critical errors
			// . i.e. XmlDoc::validateOutput() could not validate
			//   a particular event or address that was in the
			//   url's "validated.uh64.txt" file since the admin
			//   clicked on the checkbox in the page parser output
			// . if we do not find such a tag in the parser output
			//   any more then Spider.cpp creates this file!
			if ( ri == m_runId ) {
				char cfile[256];
				sprintf(cfile,"%s/%s/critical.%llu.%li.txt",
					g_hostdb.m_dir,testDir,h,ri);
				SafeBuf ttt;
				ttt.fillFromFile(cfile);
				// first long is misses, then hits then events
				umiss[un] = 0;
				uhits[un] = 0;
				uevents[un] = 0;
				uunchecked[un] = 0;
				if ( ttt.length() >= 3 )
					sscanf(ttt.getBufStart(),
					       "%li %li %li %li",
					       &umiss[un],
					       &uhits[un],
					       &uevents[un],
					       &uunchecked[un]);
				usort[un] = umiss[un] + uunchecked[un];
				//File cf;
				//cf.set(cfile);
				//if ( cf.doesExist()) ucrit[un] = 1;
				//else                 ucrit[un] = 0;
			}

			// more critical?
			if ( ri == m_runId && umiss[un] != 0 ) {
				t1 = "";
				t2 = "";
			}

			// . these are good to have
			// . if you don't have 1+ critical hits then you
			//   probably need to be validate by the qa guy
			char *uhb1 = "";
			char *uhb2 = "";
			if ( ri == m_runId && uhits[un] != 0 ) {
				uhb1 = "**";
				uhb2 = "**";
			}

			QUICKPOLL(niceness);

			char *e1 = "
"; long ne = uevents[un]; if ( ne ) { e1=""; } char *u1 = ""; if ( uunchecked[un] ) { u1=""; } // print the row! tmp.safePrintf("" "" "" // critical hits "" // critical misses "%s%li%s" // # events "%s%li%s" // unchecked "" // filesize of diff // diff filename "" // full parser output "" "\n", t1,ri,t2, uhb1,uhits[un],uhb2, t1,umiss[un],t2, e1,ne,e2, u1,uunchecked[un],u2, t1,fs,t2, rel,t1,rel,t2, full, validate); // only fill "sd" for the most recent guy if ( ri != m_runId ) continue; // now concatenate the parse-shortdisplay file // to this little table so qa admin can check/uncheck // validation checkboxes for addresses and events //sprintf(cfile, // "%s/test/parse-shortdisplay.%llu.%li.html", // g_hostdb.m_dir,h,ri); //sd.fillFromFile ( cfile ); } // end table tmp.safePrintf("
run idcrit hitscrit errors# euncheckeddiff charsdiff filefull output
"; char *e2 = ""; e2=""; char *u2 = ""; u2="
%s%li%s%s%li%s%s%li%s%s%li%s%s%s%s" "full | " "validate " "
\n"); // . and a separate little section for the checkboxes // . should already be in tables, etc. // . each checkbox should provide its own uh64 when it // calls senddiv() when clicked now //tmp.cat ( sd ); tmp.safePrintf("
\n"); tmp.safePrintf("
\n"); // set this ulen[un] = tmp.length() - uptr[un] ; // sanity check if ( ulen[un] > 10000000 ) { char *xx=NULL;*xx=0; } // inc it un++; // increase the 5000!! if ( un >= 5000 ) { char *xx=NULL; *xx=0; } } char flag ; bubble: flag = 0; // sort the url tables for ( long i = 0 ; i < un - 1 ; i++ ) { QUICKPOLL(niceness); if ( usort[i] > usort[i+1] ) continue; if ( usort[i] == usort[i+1] ) if ( udiff[i] >= udiff[i+1] ) continue; // swap em long tp = uptr[i]; long td = udiff[i]; long um = umiss[i]; long us = usort[i]; long uh = uhits[i]; long tl = ulen [i]; uptr[i] = uptr[i+1]; umiss[i] = umiss[i+1]; usort[i] = usort[i+1]; uhits[i] = uhits[i+1]; udiff[i] = udiff[i+1]; ulen[i] = ulen[i+1]; uptr[i+1] = tp; umiss[i+1] = um; usort[i+1] = us; uhits[i+1] = uh; udiff[i+1] = td; ulen [i+1] = tl; flag = 1; } if ( flag ) goto bubble; // transfer into primary safe buf now for ( long i = 0 ; i < un ; i++ ) sb.safeMemcpy(tmp.getBufStart() + uptr[i],ulen[i]); sb.safePrintf("\n"); char dfile[200]; sprintf(dfile,"%s/%s/qa.html",g_hostdb.m_dir,testDir); sb.dumpToFile ( dfile ); // free the buffer of urls reset(); // turn off spiders g_conf.m_spideringEnabled = 0; // all done return; } void injectedWrapper ( void *state ) { // wait for all msg4 buffers to flush //if ( ! flushMsg4Buffers ( state , injectedWrapper ) ) return; // this function is in Msge1.cpp. save ip file in test subdir //saveTestBuf(); if ( ! g_test.injectLoop() ) return; //g_test.stopIt(); } static long s_count = 0; // . returns true if all done! // . returns false if still doing stuff bool Test::injectLoop ( ) { long dlen ; char *dom ; long fakeIp ; loop: // advance to next url for ( ; m_urlPtr < m_urlEnd && ! *m_urlPtr ; m_urlPtr++ ) ; // all done? if ( m_urlPtr >= m_urlEnd ) { // flush em out if ( ! flushMsg4Buffers ( this , injectedWrapper ) ) return false; // note it m_isAdding = false; // all done return true; } // error means all done if ( m_errno ) { m_isAdding = false; return true; } // point to it char *u = m_urlPtr; // advance to point to the next url for the next loop! for ( ; m_urlPtr < m_urlEnd && *m_urlPtr ; m_urlPtr++ ) ; // hash it long long h = hash64b ( u ); // dedup it lest we freeze up and stopIt() never gets called because // m_urlsAdded is never decremented all the way to zero in Spider.cpp if ( m_dt.isInTable ( &h ) ) goto loop; // add it. return true with g_errno set on error if ( ! m_dt.addKey ( &h ) ) goto hadError; // make the SpiderRequest from it m_sreq.reset(); // url strcpy ( m_sreq.m_url , u ); // get domain of url dom = getDomFast ( m_sreq.m_url , &dlen ); // make a fake ip fakeIp = 0x123456; // use domain if we got that if ( dom && dlen ) fakeIp = hash32 ( dom , dlen ); // first ip is fake m_sreq.m_firstIp = fakeIp; // 0x123456; // these too m_sreq.m_domHash32 = fakeIp; m_sreq.m_hostHash32 = fakeIp; m_sreq.m_siteHash32 = fakeIp; m_sreq.m_probDocId = g_titledb.getProbableDocId( m_sreq.m_url ); // this crap is fake m_sreq.m_isInjecting = 1; // use test-spider subdir for storing pages and spider times? if ( g_conf.m_testSpiderEnabled ) m_sreq.m_useTestSpiderDir = 1; // use this later m_sreq.m_hasContent = 0; // injected requests use this as the spider time i guess // so we can sort them by this m_sreq.m_addedTime = ++s_count; // no, because to compute XmlDoc::m_min/maxPubDate we need this to // be valid for our test run.. no no we will fix it to be // basically 2 days before spider time in the code... //m_sreq.m_addedTime = spiderTime; m_sreq.m_fakeFirstIp = 1; // make the key (parentDocId=0) m_sreq.setKey ( fakeIp, 0LL , false ); // test it if ( g_spiderdb.getFirstIp(&m_sreq.m_key) != fakeIp ) { char *xx=NULL;*xx=0;} // sanity check. check for http(s):// if ( m_sreq.m_url[0] != 'h' ) { char *xx=NULL;*xx=0; } // reset this g_errno = 0; // count it m_urlsAdded++; // note it //log("crazyout: %s",m_sreq.m_url ); logf(LOG_DEBUG,"spider: injecting test url %s",m_sreq.m_url); // the receiving end will realize that we are injecting into the test // collection and use the "/test/" subdir to load the file // "ips.txt" to do our ip lookups, and search for any downloads in // that subdirectory as well. if ( ! m_msg4.addMetaList ( (char *)&m_sreq , m_sreq.getRecSize() , m_coll , NULL , injectedWrapper , MAX_NICENESS , RDB_SPIDERDB ) ) // return false if blocked return false; // error? if ( g_errno ) { // jump down here from above on error hadError: // save it m_errno = g_errno; // flag it m_isAdding = false; // note it log("test: inject had error: %s",mstrerror(g_errno)); // stop, we are all done! return true; } // add the next spider request goto loop; }