mirror of
https://github.com/gigablast/open-source-search-engine.git
synced 2024-10-04 12:17:35 +03:00
query scrape fixes
This commit is contained in:
parent
9f70d43a4b
commit
3f584ecdaa
@ -43,6 +43,7 @@ bool sendPageInject ( TcpSocket *sock , HttpRequest *hr ) {
|
||||
}
|
||||
mnew ( msg7, sizeof(Msg7) , "PageInject" );
|
||||
|
||||
msg7->m_socket = sock;
|
||||
|
||||
char format = hr->getReplyFormat();
|
||||
|
||||
@ -566,7 +567,7 @@ void doneInjectingLinksWrapper ( void *state ) {
|
||||
// return if it blocks
|
||||
if ( ! msg7->scrapeQuery() ) return;
|
||||
}
|
||||
TcpSocket *s = msg7->m_socket;
|
||||
|
||||
// otherwise, parse out the search results so steve can display them
|
||||
if ( g_errno )
|
||||
sb->safePrintf("<error><![CDATA[%s]]></error>\n",
|
||||
@ -580,7 +581,8 @@ void doneInjectingLinksWrapper ( void *state ) {
|
||||
//p += sprintf ( p , "scraping status ");
|
||||
// print error msg out, too or "Success"
|
||||
//p += sprintf ( p , "%s", mstrerror(g_errno));
|
||||
g_httpServer.sendDynamicPage ( s,
|
||||
TcpSocket *sock = msg7->m_socket;
|
||||
g_httpServer.sendDynamicPage ( sock,
|
||||
sb->getBufStart(),
|
||||
sb->length(),
|
||||
-1/*cachetime*/);
|
||||
@ -610,6 +612,7 @@ bool Msg7::scrapeQuery ( ) {
|
||||
// first encode the query
|
||||
SafeBuf ebuf;
|
||||
ebuf.urlEncode ( qts ); // queryUNEncoded );
|
||||
ebuf.nullTerm();
|
||||
|
||||
char *uf;
|
||||
if ( m_round == 1 )
|
||||
@ -672,7 +675,7 @@ bool Msg7::scrapeQuery ( ) {
|
||||
if ( m_useAhrefs )
|
||||
m_xd.m_useAhrefs = true;
|
||||
|
||||
m_xd.m_reallyInjectLinks = gr->m_injectLinks;
|
||||
m_xd.m_reallyInjectLinks = true;//gr->m_injectLinks;
|
||||
|
||||
//
|
||||
// rather than just add the links of the page to spiderdb,
|
||||
|
@ -14239,7 +14239,8 @@ void Parms::init ( ) {
|
||||
|
||||
m->m_title = "query to scrape";
|
||||
m->m_desc = "Scrape popular search engines for this query "
|
||||
"and inject their links.";
|
||||
"and inject their links. You are not required to supply "
|
||||
"the <i>url</i> parm if you supply this parm.";
|
||||
m->m_cgi = "qts";
|
||||
m->m_obj = OBJ_GBREQUEST;
|
||||
m->m_type = TYPE_CHARPTR;
|
||||
|
66
qa.cpp
66
qa.cpp
@ -1193,6 +1193,66 @@ bool qaspider2 ( ) {
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
bool qascrape ( ) {
|
||||
//
|
||||
// delete the 'qatest123' collection
|
||||
//
|
||||
//static bool s_x1 = false;
|
||||
if ( ! s_flags[0] ) {
|
||||
s_flags[0] = true;
|
||||
if ( ! getUrl ( "/admin/delcoll?xml=1&delcoll=qatest123" ) )
|
||||
return false;
|
||||
}
|
||||
|
||||
//
|
||||
// add the 'qatest123' collection
|
||||
//
|
||||
//static bool s_x2 = false;
|
||||
if ( ! s_flags[1] ) {
|
||||
s_flags[1] = true;
|
||||
if ( ! getUrl ( "/admin/addcoll?addcoll=qatest123&xml=1" ,
|
||||
// checksum of reply expected
|
||||
238170006 ) )
|
||||
return false;
|
||||
}
|
||||
|
||||
|
||||
// scrape it
|
||||
if ( ! s_flags[3] ) {
|
||||
s_flags[3] = true;
|
||||
SafeBuf sb;
|
||||
sb.safePrintf( "/admin/inject?c=qatest123&"
|
||||
"format=xml&qts=test");
|
||||
if ( ! getUrl ( sb.getBufStart() , 0 ) )
|
||||
return false;
|
||||
}
|
||||
|
||||
|
||||
|
||||
// verify no results for gbhopcount:2 query
|
||||
//static bool s_y4 = false;
|
||||
if ( ! s_flags[6] ) {
|
||||
s_flags[6] = true;
|
||||
if ( ! getUrl ( "/search?c=qatest123&qa=1&format=xml&"
|
||||
"q=test",
|
||||
-1310551262 ) )
|
||||
return false;
|
||||
}
|
||||
|
||||
|
||||
//static bool s_fee2 = false;
|
||||
if ( ! s_flags[13] ) {
|
||||
s_flags[13] = true;
|
||||
log("qa: SUCCESSFULLY COMPLETED "
|
||||
"QA SCRAPE TEST");
|
||||
return true;
|
||||
}
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
|
||||
/*
|
||||
bool qaspider ( ) {
|
||||
|
||||
@ -1229,7 +1289,11 @@ static QATest s_qatests[] = {
|
||||
|
||||
{qaspider2,
|
||||
"spiderHopCountTest",
|
||||
"Test spidering walmart.com and ibm.com using hopcount limit."}
|
||||
"Test spidering ibm.com using hopcount limit."},
|
||||
|
||||
{qascrape,
|
||||
"queryScrapeTest",
|
||||
"Scrape and inject results from google and bing."}
|
||||
|
||||
};
|
||||
|
||||
|
Loading…
Reference in New Issue
Block a user