use &format=0 1 or 2 for html/xml/json now.

use &icc=1 to get dump of json objects in serps.
This commit is contained in:
Matt Wells 2013-11-08 18:00:30 -08:00
parent aa9a77674f
commit e395628d5a
7 changed files with 162 additions and 107 deletions

View File

@ -1552,7 +1552,9 @@ bool HttpServer::sendErrorReply ( TcpSocket *s , long error , char *errmsg ,
*/
}
bool HttpServer::sendQueryErrorReply( TcpSocket *s , long error ,
char *errmsg, long rawFormat,
char *errmsg,
//long rawFormat,
char format ,
int errnum, char *content) {
// clear g_errno so the send goes through
g_errno = 0;
@ -1569,7 +1571,7 @@ bool HttpServer::sendQueryErrorReply( TcpSocket *s , long error ,
// sanity check
if ( strncasecmp(errmsg,"Success",7)==0 ) {char*xx=NULL;*xx=0;}
if (!rawFormat){
if ( format == FORMAT_HTML ) {
// Page content
char cbuf[1024];
sprintf (cbuf,

View File

@ -135,7 +135,8 @@ class HttpServer {
long *bytesSent = NULL );
// send a "prettier" error reply, formatted in XML if necessary
bool sendQueryErrorReply ( TcpSocket *s , long error , char *errmsg,
long rawFormat, int errnum,
// FORMAT_HTML=0,FORMAT_XML,FORMAT_JSON
char format, int errnum,
char *content=NULL);

View File

@ -1107,7 +1107,7 @@ bool Msg40::launchMsg20s ( bool recalled ) {
req.m_bigSampleMaxLen = bigSampleMaxLen;
req.m_titleMaxLen = 256;
req.m_titleMaxLen = cr->m_titleMaxLen;
if(m_si->m_isAdmin && m_si->m_xml == 0)
if(m_si->m_isAdmin && m_si->m_format == FORMAT_HTML )
req.m_getGigabitVector = true;
else req.m_getGigabitVector = false;
req.m_flags = 0;

View File

@ -78,13 +78,14 @@ bool sendReply ( State0 *st , char *reply ) {
if ( ! s ) { char *xx=NULL;*xx=0; }
SearchInput *si = &st->m_si;
char *ct = "text/html";
if ( si && si->m_xml ) ct = "text/xml";
if ( si && si->m_format == FORMAT_XML ) ct = "text/xml";
if ( si && si->m_format == FORMAT_JSON ) ct = "application/json";
char *charset = "utf-8";
// . filter anything < 0x20 to 0x20 to keep XML legal
// . except \t, \n and \r, they're ok
// . gotta set "f" down here in case it realloc'd the buf
if ( si->m_xml && reply ) {
if ( si->m_format == FORMAT_XML && reply ) {
unsigned char *f = (unsigned char *)reply;
for ( ; *f ; f++ )
if ( *f < 0x20 && *f!='\t' && *f!='\n' && *f!='\r' )
@ -99,7 +100,7 @@ bool sendReply ( State0 *st , char *reply ) {
// . use light brown if coming directly from an end user
// . use darker brown if xml feed
long color = 0x00b58869;
if ( si->m_xml )color = 0x00753d30 ;
if ( si->m_format != FORMAT_HTML )color = 0x00753d30 ;
long long nowms = gettimeofdayInMilliseconds();
long long took = nowms - st->m_startTime ;
g_stats.addStat_r ( took ,
@ -123,7 +124,7 @@ bool sendReply ( State0 *st , char *reply ) {
st->m_msg40.getNumResults());
}
bool xml = si->m_xml;
//bool xml = si->m_xml;
g_stats.logAvgQueryTime(st->m_startTime);
@ -159,7 +160,7 @@ bool sendReply ( State0 *st , char *reply ) {
if ( savedErr != ENOPERM )
g_stats.m_numFails++;
if ( xml ) {
if ( si->m_format == FORMAT_XML ) {
SafeBuf sb;
sb.safePrintf("<?xml version=\"1.0\" "
"encoding=\"UTF-8\" ?>\n"
@ -195,7 +196,7 @@ bool sendReply ( State0 *st , char *reply ) {
g_httpServer.sendQueryErrorReply(s,
status,
mstrerror(savedErr),
xml,
si->m_format,//xml,
savedErr,
"There was an error!");
return true;
@ -209,8 +210,14 @@ bool sendPageResults ( TcpSocket *s , HttpRequest *hr ) {
// . need to pre-query the directory first to get the sites to search
// this will likely have just been cached so it should be quick
// . then need to construct a site search query
long rawFormat = hr->getLong("xml", 0); // was "raw"
long xml = hr->getLong("xml",0);
//long rawFormat = hr->getLong("xml", 0); // was "raw"
//long xml = hr->getLong("xml",0);
// what format should search results be in? default is html
long format = hr->getLong("format", FORMAT_HTML );
if ( hr->getLong("xml",0) ) format = FORMAT_XML;
if ( hr->getLong("json",0) ) format = FORMAT_JSON;
// get the dmoz catid if given
//long searchingDmoz = hr->getLong("dmoz",0);
@ -220,7 +227,7 @@ bool sendPageResults ( TcpSocket *s , HttpRequest *hr ) {
// search results. do not do this if a "&dir=" (dmoz category)
// is given
//
if ( hr->getLong("id",0) == 0 && ! xml ) {
if ( hr->getLong("id",0) == 0 && format == FORMAT_HTML ) {
SafeBuf sb;
sb.safePrintf(
"<!DOCTYPE HTML PUBLIC \"-//W3C//DTD HTML "
@ -427,7 +434,7 @@ bool sendPageResults ( TcpSocket *s , HttpRequest *hr ) {
g_stats.m_numFails++;
return g_httpServer.sendQueryErrorReply
(s,500,mstrerror(g_errno),
rawFormat, g_errno, "Query failed. "
format, g_errno, "Query failed. "
"Could not allocate memory to execute a search. "
"Please try later." );
}
@ -681,20 +688,20 @@ bool gotResults ( void *state ) {
SearchInput *si = &st->m_si;
// xml
if ( si->m_xml )
if ( si->m_format == FORMAT_XML )
sb.safePrintf("<?xml version=\"1.0\" "
"encoding=\"UTF-8\" ?>\n"
"<response>\n" );
// show current time
if ( si->m_xml ) {
if ( si->m_format == FORMAT_XML ) {
long long globalNowMS = localToGlobalTimeMilliseconds(nowMS);
sb.safePrintf("\t<currentTimeUTC>%lu</currentTimeUTC>\n",
(long)(globalNowMS/1000));
}
// show response time
if ( si->m_xml )
if ( si->m_format == FORMAT_XML )
sb.safePrintf("\t<responseTimeMS>%lli</responseTimeMS>\n",
st->m_took);
@ -762,7 +769,7 @@ bool gotResults ( void *state ) {
//if ( base ) docsInColl = base->getNumGlobalRecs();
docsInColl = g_hostdb.getNumGlobalRecs ( );
// include number of docs in the collection corpus
if ( si->m_xml && docsInColl >= 0LL )
if ( si->m_format == FORMAT_XML && docsInColl >= 0LL )
sb.safePrintf ( "\t<docsInCollection>%lli"
"</docsInCollection>\n", docsInColl );
@ -773,7 +780,7 @@ bool gotResults ( void *state ) {
// only adjust upwards for first page now so it doesn't keep chaning
if ( totalHits < numResults ) totalHits = numResults;
if ( si->m_xml )
if ( si->m_format == FORMAT_XML )
sb.safePrintf("\t<hits>%lli</hits>\n"
"\t<moreResultsFollow>%li</moreResultsFollow>\n"
,(long long)totalHits
@ -783,7 +790,7 @@ bool gotResults ( void *state ) {
// . did he get a spelling recommendation?
// . do not use htmlEncode() on this anymore since receiver
// of the XML feed usually does not want that.
if ( si->m_xml && st->m_spell[0] ) {
if ( si->m_format == FORMAT_XML && st->m_spell[0] ) {
sb.safePrintf ("\t<spell><![CDATA[");
sb.safeStrcpy(st->m_spell);
sb.safePrintf ("]]></spell>\n");
@ -795,7 +802,7 @@ bool gotResults ( void *state ) {
numResults);
// tell browser again
//if ( ! si->m_xml )
//if ( si->m_format == FORMAT_HTML )
// sb.safePrintf("<meta http-equiv=\"Content-Type\" "
// "content=\"text/html; charset=utf-8\">\n");
@ -818,7 +825,7 @@ bool gotResults ( void *state ) {
DocIdScore *dpx = NULL;
if ( numResults > 0 ) dpx = msg40->getScoreInfo(0);
if ( si->m_xml && dpx ) {
if ( si->m_format == FORMAT_XML && dpx ) {
// # query terms used!
//long nr = dpx->m_numRequiredTerms;
float max = 0.0;
@ -922,13 +929,13 @@ bool gotResults ( void *state ) {
Query *qq2;
bool firstIgnored;
bool isAdmin = si->m_isAdmin;
if ( si->m_xml ) isAdmin = false;
if ( si->m_format == FORMAT_XML ) isAdmin = false;
// otherwise, we had no error
if ( numResults == 0 && ! si->m_xml ) {
if ( numResults == 0 && si->m_format == FORMAT_HTML ) {
sb.safePrintf ( "No results found." );
}
else if ( moreFollow && ! si->m_xml ) {
else if ( moreFollow && si->m_format == FORMAT_HTML ) {
if ( isAdmin && si->m_docsToScanForReranking > 1 )
sb.safePrintf ( "PQR'd " );
sb.safePrintf ("Results <b>%li</b> to <b>%li</b> of "
@ -941,7 +948,7 @@ bool gotResults ( void *state ) {
);
}
// otherwise, we didn't get enough results to show this page
else if ( ! si->m_xml ) {
else if ( si->m_format == FORMAT_HTML ) {
if ( isAdmin && si->m_docsToScanForReranking > 1 )
sb.safePrintf ( "PQR'd " );
sb.safePrintf ("Results <b>%li</b> to <b>%li</b> of "
@ -965,7 +972,7 @@ bool gotResults ( void *state ) {
// find end of url
char *ue = url;
for ( ; ue && *ue && ! is_wspace_a(*ue) ; ue++ ) ;
if ( numResults == 0 && ! si->m_xml && url ) {
if ( numResults == 0 && si->m_format == FORMAT_HTML && url ) {
sb.safePrintf("<br><br>"
"Could not find that url in the "
"index. Try <a href=/addurl?u=");
@ -975,7 +982,7 @@ bool gotResults ( void *state ) {
// sometimes ppl search for "www.whatever.com" so ask them if they
// want to search for url:www.whatever.com
if ( numResults > 0 && ! si->m_xml && url && url == q ) {
if ( numResults > 0 && si->m_format == FORMAT_HTML && url && url ==q){
sb.safePrintf("<br><br>"
"Did you mean to "
"search for the url "
@ -1004,7 +1011,7 @@ bool gotResults ( void *state ) {
if ( ! pwd ) pwd = "";
/*
if ( ! si->m_xml )
if ( si->m_format == FORMAT_HTML )
sb.safePrintf(" &nbsp; <u><b><font color=blue><a onclick=\""
"for (var i = 0; i < %li; i++) {"
"var nombre;"
@ -1138,7 +1145,7 @@ bool gotResults ( void *state ) {
if ( qw->m_ignoreWord != IGNORE_QSTOP ) continue;
// print header -- we got one
if ( firstIgnored ) {
if ( si->m_xml )
if ( si->m_format == FORMAT_XML )
sb.safePrintf ("\t<ignoredWords><![CDATA[");
else
sb.safePrintf (" &nbsp; <font "
@ -1157,7 +1164,7 @@ bool gotResults ( void *state ) {
// print tail if we had ignored terms
if ( ! firstIgnored ) {
sb.incrementLength(-1);
if ( si->m_xml )
if ( si->m_format == FORMAT_XML )
sb.safePrintf("]]></ignoredWords>\n");
else
sb.safePrintf ("</b>. Preceed each with a '+' or "
@ -1165,26 +1172,26 @@ bool gotResults ( void *state ) {
"quotes to not ignore.</font>");
}
if ( ! si->m_xml ) sb.safePrintf("<br><br>");
if ( si->m_format == FORMAT_HTML ) sb.safePrintf("<br><br>");
if ( ! si->m_xml )
if ( si->m_format == FORMAT_HTML )
sb.safePrintf("<table cellpadding=0 cellspacing=0>"
"<tr><td valign=top>");
SafeBuf *gbuf = &msg40->m_gigabitBuf;
long numGigabits = gbuf->length()/sizeof(Gigabit);
if ( si->m_xml ) numGigabits = 0;
if ( si->m_format == FORMAT_XML ) numGigabits = 0;
// print gigabits
Gigabit *gigabits = (Gigabit *)gbuf->getBufStart();
//long numCols = 5;
//long perRow = numGigabits / numCols;
if ( numGigabits && ! si->m_xml )
if ( numGigabits && si->m_format == FORMAT_HTML )
sb.safePrintf("<table cellspacing=7 bgcolor=lightgray>"
"<tr><td width=200px; valign=top>");
for ( long i = 0 ; i < numGigabits ; i++ ) {
if ( i > 0 && ! si->m_xml )
if ( i > 0 && si->m_format == FORMAT_HTML )
sb.safePrintf("<hr>");
//if ( perRow && (i % perRow == 0) )
// sb.safePrintf("</td><td valign=top>");
@ -1193,14 +1200,15 @@ bool gotResults ( void *state ) {
printGigabit ( st,sb , msg40 , gi , si );
sb.safePrintf("<br><br>");
}
if ( numGigabits && ! si->m_xml )
if ( numGigabits && si->m_format == FORMAT_HTML )
sb.safePrintf("</td></tr></table>");
// two pane table
if ( ! si->m_xml ) sb.safePrintf("</td><td valign=top>");
if ( si->m_format == FORMAT_HTML )
sb.safePrintf("</td><td valign=top>");
// did we get a spelling recommendation?
if ( ! si->m_xml && st->m_spell[0] ) {
if ( si->m_format == FORMAT_HTML && st->m_spell[0] ) {
// encode the spelling recommendation
long len = gbstrlen ( st->m_spell );
char qe2[MAX_FRAG_SIZE];
@ -1262,7 +1270,7 @@ bool gotResults ( void *state ) {
*/
/*
if ( ! si->m_xml )
if ( si->m_format == FORMAT_HTML )
sb.safePrintf("<a onclick=\""
"var e = "
"document.getElementsByTagName('html')[0];\n"
@ -1289,7 +1297,7 @@ bool gotResults ( void *state ) {
//
// end the two-pane table
if ( ! si->m_xml ) sb.safePrintf("</td></tr></table>");
if ( si->m_format == FORMAT_HTML ) sb.safePrintf("</td></tr></table>");
// for storing a list of all of the sites we displayed, now we print a
// link at the bottom of the page to ban all of the sites displayed
@ -1305,7 +1313,7 @@ bool gotResults ( void *state ) {
//
// center everything below here
if ( ! si->m_xml ) sb.safePrintf ( "<br><center>" );
if ( si->m_format == FORMAT_HTML ) sb.safePrintf ( "<br><center>" );
long remember = sb.length();
@ -1326,7 +1334,7 @@ bool gotResults ( void *state ) {
args.safePrintf("&sites=%s",si->m_whiteListBuf.getBufStart());
if ( firstNum > 0 && ! si->m_xml ) {
if ( firstNum > 0 && si->m_format == FORMAT_HTML ) {
long ss = firstNum - msg40->getDocsWanted();
sb.safePrintf("<a href=\"/search?s=%li&q=",ss);
// our current query parameters
@ -1341,7 +1349,7 @@ bool gotResults ( void *state ) {
}
// now print "Next X Results"
if ( moreFollow && ! si->m_xml ) {
if ( moreFollow && si->m_format == FORMAT_HTML ) {
long ss = firstNum + msg40->getDocsWanted();
// print a separator first if we had a prev results before us
if ( sb.length() > remember ) sb.safePrintf ( " &nbsp; " );
@ -1370,9 +1378,9 @@ bool gotResults ( void *state ) {
// end results table cell... and print calendar at top
//tail = cr->m_htmlTail;
//tailLen = gbstrlen (tail );
//if ( ! si->m_xml ) sb.safeMemcpy ( tail , tailLen );
//if ( si->m_format == FORMAT_HTML ) sb.safeMemcpy ( tail , tailLen );
if ( ! si->m_xml ) {
if ( si->m_format == FORMAT_HTML ) {
/*
sb.safePrintf("<table cellpadding=2 cellspacing=0 border=0>"
"<tr><td></td>"
@ -1407,7 +1415,7 @@ bool gotResults ( void *state ) {
// TODO: print cache line in light gray here
// TODO: "these results were cached X minutes ago"
if ( msg40->getCachedTime() > 0 && ! si->m_xml ) {
if ( msg40->getCachedTime() > 0 && si->m_format == FORMAT_HTML ) {
sb.safePrintf("<br><br><font size=1 color=707070><b><center>");
sb.safePrintf ( " These results were cached " );
// this cached time is this local cpu's time
@ -1421,7 +1429,7 @@ bool gotResults ( void *state ) {
}
if ( si->m_xml )
if ( si->m_format == FORMAT_XML )
sb.safePrintf("</response>\n");
return sendReply ( st , sb.getBufStart() );
@ -1499,7 +1507,7 @@ bool printInlinkText ( SafeBuf &sb , Msg20Reply *mr , SearchInput *si ,
// sort them
gbsort ( ptrs , numLinks , 4 , linkSiteRankCmp );
// print xml starter
if ( si->m_xml ) sb.safePrintf("\t\t<inlinks>\n");
if ( si->m_format == FORMAT_XML ) sb.safePrintf("\t\t<inlinks>\n");
// loop through the inlinks
bool printedInlinkText = false;
bool firstTime = true;
@ -1512,7 +1520,9 @@ bool printInlinkText ( SafeBuf &sb , Msg20Reply *mr , SearchInput *si ,
for ( long i = 0 ; i < numLinks ; i++ ) {
k = ptrs[i];
if ( ! k->ptr_linkText ) continue;
if ( ! si->m_doQueryHighlighting && ! si->m_xml ) continue;
if ( ! si->m_doQueryHighlighting &&
si->m_format == FORMAT_HTML )
continue;
char *str = k-> ptr_linkText;
long strLen = k->size_linkText;
//char tt[1024*3];
@ -1520,7 +1530,7 @@ bool printInlinkText ( SafeBuf &sb , Msg20Reply *mr , SearchInput *si ,
char *frontTag =
"<font style=\"color:black;background-color:yellow\">" ;
char *backTag = "</font>";
if ( si->m_xml ) {
if ( si->m_format == FORMAT_XML ) {
frontTag = "<b>";
backTag = "</b>";
}
@ -1543,7 +1553,7 @@ bool printInlinkText ( SafeBuf &sb , Msg20Reply *mr , SearchInput *si ,
// skip it if nothing highlighted
if ( hi.getNumMatches() == 0 ) continue;
if ( si->m_xml ) {
if ( si->m_format == FORMAT_XML ) {
sb.safePrintf("\t\t\t<inlink "
"docId=\"%lli\" "
"url=\"",
@ -1626,7 +1636,7 @@ bool printInlinkText ( SafeBuf &sb , Msg20Reply *mr , SearchInput *si ,
// closer for xml
if ( si->m_xml ) sb.safePrintf("\t\t</inlinks>\n");
if ( si->m_format == FORMAT_XML ) sb.safePrintf("\t\t</inlinks>\n");
//if ( printedInlinkText ) sb.safePrintf("<br>\n");
if ( printedInlinkText )
sb.safePrintf("</font>"
@ -1710,10 +1720,24 @@ static int printResult ( SafeBuf &sb,
Msg20 *m20 = msg40->m_msg20[ix];
Msg20Reply *mr = m20->m_r;
if ( si->m_xml ) sb.safePrintf("\t<result>\n" );
// just print cached web page?
if ( mr->ptr_content ) {
sb.safeStrcpy ( mr->ptr_content );
//mr->size_content );
if ( si->m_format == FORMAT_HTML )
sb.safePrintf("\n\n<br><br>\n\n");
if ( si->m_format != FORMAT_HTML )
sb.safePrintf("\n\n");
// just in case
sb.nullTerm();
return true;
}
if ( si->m_format == FORMAT_XML ) sb.safePrintf("\t<result>\n" );
if ( si->m_docIdsOnly ) {
if ( si->m_xml )
if ( si->m_format == FORMAT_XML )
sb.safePrintf("\t\t<docId>%lli</docId>\n"
"\t</result>\n",
mr->m_docId );
@ -1743,13 +1767,14 @@ static int printResult ( SafeBuf &sb,
bool indent = false;
bool isAdmin = si->m_isAdmin;
if ( si->m_xml ) isAdmin = false;
if ( si->m_format == FORMAT_XML ) isAdmin = false;
//unsigned long long lastSiteHash = siteHash;
if ( indent && ! si->m_xml ) sb.safePrintf("<blockquote>");
if ( indent && si->m_format == FORMAT_HTML )
sb.safePrintf("<blockquote>");
// print the rank. it starts at 0 so add 1
if ( ! si->m_xml )
if ( si->m_format == FORMAT_HTML )
sb.safePrintf("<table><tr><td valign=top>%li.</td><td>",
ix+1 + si->m_firstResultNum );
@ -1803,14 +1828,14 @@ static int printResult ( SafeBuf &sb,
// http://www.youtube.com/watch?v=auQbi_fkdGE
// http://img.youtube.com/vi/auQbi_fkdGE/2.jpg
// get the thumbnail url
if ( mr->ptr_imgUrl && ! si->m_xml )
if ( mr->ptr_imgUrl && si->m_format == FORMAT_HTML )
sb.safePrintf ("<a href=%s><image src=%s></a>",
url,mr->ptr_imgUrl);
// the a href tag
if ( ! si->m_xml ) sb.safePrintf ( "\n\n" );
if ( si->m_format == FORMAT_HTML ) sb.safePrintf ( "\n\n" );
// then if it is banned
if ( mr->m_isBanned && ! si->m_xml )
if ( mr->m_isBanned && si->m_format == FORMAT_HTML )
sb.safePrintf("<font color=red><b>BANNED</b></font> ");
///////
@ -1821,7 +1846,7 @@ static int printResult ( SafeBuf &sb,
// the a href tag
if ( ! si->m_xml ) {
if ( si->m_format == FORMAT_HTML ) {
sb.safePrintf ( "<a href=" );
// print the url in the href tag
sb.safeMemcpy ( url , urlLen );
@ -1880,12 +1905,13 @@ static int printResult ( SafeBuf &sb,
char *frontTag =
"<font style=\"color:black;background-color:yellow\">" ;
char *backTag = "</font>";
if ( si->m_xml ) {
if ( si->m_format == FORMAT_XML ) {
frontTag = "<b>";
backTag = "</b>";
}
long cols = 80;
if ( si->m_xml ) sb.safePrintf("\t\t<title><![CDATA[");
if ( si->m_format == FORMAT_XML )
sb.safePrintf("\t\t<title><![CDATA[");
SafeBuf hb;
if ( str && strLen && si->m_doQueryHighlighting ) {
hlen = hi.set ( &hb,
@ -1921,10 +1947,10 @@ static int printResult ( SafeBuf &sb,
return false;
}
// close up the title tag
if ( si->m_xml ) sb.safePrintf("]]></title>\n");
if ( si->m_format == FORMAT_XML ) sb.safePrintf("]]></title>\n");
if ( ! si->m_xml ) sb.safePrintf ("</a><br>\n" ) ;
if ( si->m_format == FORMAT_HTML ) sb.safePrintf ("</a><br>\n" ) ;
/////
//
@ -1934,7 +1960,7 @@ static int printResult ( SafeBuf &sb,
unsigned char ctype = mr->m_contentType;
if ( ctype > 2 && ctype <= 13 ) {
char *cs = g_contentTypeStrings[ctype];
if ( si->m_xml )
if ( si->m_format == FORMAT_XML )
sb.safePrintf("\t\t<contentType>"
"<![CDATA["
"%s"
@ -1967,11 +1993,11 @@ static int printResult ( SafeBuf &sb,
strLen = gbstrlen(dmozSummary);
}
if ( si->m_xml ) sb.safePrintf("\t\t<sum><![CDATA[");
if ( si->m_format == FORMAT_XML ) sb.safePrintf("\t\t<sum><![CDATA[");
sb.brify ( str , strLen, 0 , cols ); // niceness = 0
// close xml tag
if ( si->m_xml ) sb.safePrintf("]]></sum>\n");
if ( si->m_format == FORMAT_XML ) sb.safePrintf("]]></sum>\n");
// new line if not xml
else if ( strLen ) sb.safePrintf("<br>\n");
@ -2026,7 +2052,7 @@ static int printResult ( SafeBuf &sb,
// so hack off the last slash
if ( j < 0 ) urlLen--;
}
if ( ! si->m_xml ) {
if ( si->m_format == FORMAT_HTML ) {
sb.safePrintf ("<font color=gray>" );
//sb.htmlEncode ( url , gbstrlen(url) , false );
// 20 for the date after it
@ -2034,7 +2060,7 @@ static int printResult ( SafeBuf &sb,
// turn off the color
sb.safePrintf ( "</font>\n" );
}
if ( si->m_xml ) {
if ( si->m_format == FORMAT_XML ) {
sb.safePrintf("\t\t<url><![CDATA[");
sb.safeMemcpy ( url , urlLen );
sb.safePrintf("]]></url>\n");
@ -2043,16 +2069,18 @@ static int printResult ( SafeBuf &sb,
// now the last spidered date of the document
time_t ts = mr->m_lastSpidered;
if ( ! si->m_xml ) printTimeAgo ( sb , ts , "indexed" , si );
if ( si->m_format == FORMAT_HTML )
printTimeAgo ( sb , ts , "indexed" , si );
// the date it was last modified
ts = mr->m_lastModified;
if ( ! si->m_xml ) printTimeAgo ( sb , ts , "modified" , si );
if ( si->m_format == FORMAT_HTML )
printTimeAgo ( sb , ts , "modified" , si );
//
// more xml stuff
//
if ( si->m_xml ) {
if ( si->m_format == FORMAT_XML ) {
// doc size in Kilobytes
sb.safePrintf ( "\t\t<size><![CDATA[%4.0fk]]></size>\n",
(float)mr->m_contentLen/1024.0);
@ -2103,7 +2131,7 @@ static int printResult ( SafeBuf &sb,
// NULLify if empty
if ( mr->size_outlinks <= 0 ) outlinks = NULL;
// only for xml for now
if ( ! si->m_xml ) outlinks = NULL;
if ( si->m_format == FORMAT_HTML ) outlinks = NULL;
Inlink *k;
// do we need absScore2 for outlinks?
//k = NULL;
@ -2119,7 +2147,7 @@ static int printResult ( SafeBuf &sb,
k->m_ip, // hostHash, but use ip for now
(long)k->m_firstIndexedDate ,
(long)k->m_datedbDate );
if ( si->m_xml ) {
if ( si->m_format == FORMAT_XML ) {
// result
sb.safePrintf("\t\t<language><![CDATA[%s]]>"
"</language>\n",
@ -2146,7 +2174,7 @@ static int printResult ( SafeBuf &sb,
if ( charset ) sb.safePrintf(" - %s ", charset);
}
if ( ! si->m_xml ) sb.safePrintf("<br>\n");
if ( si->m_format == FORMAT_HTML ) sb.safePrintf("<br>\n");
char *coll = si->m_cr->m_coll;
@ -2155,7 +2183,7 @@ static int printResult ( SafeBuf &sb,
if ( mr->m_noArchive ) printCached = false;
if ( isAdmin ) printCached = true;
if ( mr->m_contentLen <= 0 ) printCached = false;
if ( si->m_xml ) printCached = false;
if ( si->m_format == FORMAT_XML ) printCached = false;
if ( printCached && cr->m_clickNScrollEnabled )
sb.safePrintf ( " - <a href=/scroll.html?page="
"get?"
@ -2177,7 +2205,7 @@ static int printResult ( SafeBuf &sb,
mr->m_docId );
// the new links
if ( ! si->m_xml ) {
if ( si->m_format == FORMAT_HTML ) {
//sb.safePrintf(" - <a href=\"/scoring?"
// "c=%s&\">scoring</a>",
// coll );
@ -2199,7 +2227,7 @@ static int printResult ( SafeBuf &sb,
// g_conf.m_addUrlEnabled &&
// cr->m_addUrlEnabled ) {
/*
if ( ! si->m_xml ) {
if ( si->m_format == FORMAT_HTML ) {
// the [respider] link
// save this for seo iframe!
sb.safePrintf (" - <a href=\"/admin/inject?u=" );
@ -2218,7 +2246,7 @@ static int printResult ( SafeBuf &sb,
// unhide the divs on click
long placeHolder = -1;
long placeHolderLen;
if ( ! si->m_xml ) {
if ( si->m_format == FORMAT_HTML ) {
// place holder for backlink table link
placeHolder = sb.length();
sb.safePrintf (" - <a onclick="
@ -2274,7 +2302,7 @@ static int printResult ( SafeBuf &sb,
// this stuff is secret just for local guys!
if ( ! si->m_xml && ( isAdmin || cr->m_isCustomCrawl ) ) {
if ( si->m_format == FORMAT_HTML && ( isAdmin || cr->m_isCustomCrawl)){
// now the ip of url
//long urlip = msg40->getIp(i);
// don't combine this with the sprintf above cuz
@ -2323,7 +2351,7 @@ static int printResult ( SafeBuf &sb,
}
// admin always gets the site: option so he can ban
if ( ! si->m_xml && ( isAdmin || cr->m_isCustomCrawl ) ) {
if ( si->m_format == FORMAT_HTML && ( isAdmin || cr->m_isCustomCrawl)){
char dbuf [ MAX_URL_LEN ];
long dlen = uu.getDomainLen();
memcpy ( dbuf , uu.getDomain() , dlen );
@ -2482,18 +2510,20 @@ static int printResult ( SafeBuf &sb,
*/
if ( ! si->m_xml )
if ( si->m_format == FORMAT_HTML )
sb.safePrintf ( "<br><br>\n");
// done?
DocIdScore *dp = msg40->getScoreInfo(ix);
if ( ! dp ) {
if ( si->m_xml ) sb.safePrintf ("\t</result>\n\n");
if ( si->m_format == FORMAT_XML )
sb.safePrintf ("\t</result>\n\n");
// wtf?
//char *xx=NULL;*xx=0;
// at least close up the table
if ( ! si->m_xml ) sb.safePrintf("</table>\n");
if ( si->m_format == FORMAT_HTML )
sb.safePrintf("</table>\n");
return true;
}
@ -2512,7 +2542,7 @@ static int printResult ( SafeBuf &sb,
//Query *q = si->m_q;
// put in a hidden div so you can unhide it
if ( ! si->m_xml )
if ( si->m_format == FORMAT_HTML )
sb.safePrintf("<div id=bl%li style=display:none;>\n", ix );
// print xml and html inlinks
@ -2520,7 +2550,7 @@ static int printResult ( SafeBuf &sb,
printInlinkText ( sb , mr , si , &numInlinks );
if ( ! si->m_xml ) {
if ( si->m_format == FORMAT_HTML ) {
sb.safePrintf("</div>");
sb.safePrintf("<div id=sc%li style=display:none;>\n", ix );
}
@ -2555,7 +2585,7 @@ static int printResult ( SafeBuf &sb,
// skip if 0. neighborhood terms have weight of 0 now
if ( ps->m_finalScore == 0.0 ) continue;
// first time?
if ( firstTime && ! si->m_xml ) {
if ( firstTime && si->m_format == FORMAT_HTML ) {
Query *q = si->m_q;
printTermPairs ( sb , q , ps );
printScoresHeader ( sb );
@ -2574,7 +2604,7 @@ static int printResult ( SafeBuf &sb,
if ( minScore < 0.0 || totalPairScore < minScore )
minScore = totalPairScore;
// we need to set "ft" for xml stuff below
if ( si->m_xml ) continue;
if ( si->m_format == FORMAT_XML ) continue;
//sb.safePrintf("<table border=1><tr><td><center><b>");
// print pair text
//long qtn1 = fps->m_qtermNum1;
@ -2640,7 +2670,7 @@ static int printResult ( SafeBuf &sb,
// skip if 0. skip neighborhoods i guess
if ( ss->m_finalScore == 0.0 ) continue;
// first time?
if ( firstTime && ! si->m_xml ) {
if ( firstTime && si->m_format == FORMAT_HTML ) {
Query *q = si->m_q;
printSingleTerm ( sb , q , ss );
printScoresHeader ( sb );
@ -2657,7 +2687,7 @@ static int printResult ( SafeBuf &sb,
if ( minScore < 0.0 || totalSingleScore < minScore )
minScore = totalSingleScore;
// we need to set "ft" for xml stuff below
if ( si->m_xml ) continue;
if ( si->m_format == FORMAT_XML ) continue;
//sb.safePrintf("<table border=1><tr><td><center><b>");
// print pair text
//long qtn = fss->m_qtermNum;
@ -2680,7 +2710,7 @@ static int printResult ( SafeBuf &sb,
//if ( nr ) sb.safePrintf("</table>");
//sb.safePrintf("<br>");
// final score!!!
if ( si->m_xml ) {
if ( si->m_format == FORMAT_XML ) {
sb.safePrintf ("\t\t<siteRank>%li</siteRank>\n",
(long)dp->m_siteRank );
@ -2852,7 +2882,7 @@ static int printResult ( SafeBuf &sb,
/*
// UN-indent it if level is 1
if ( ! si->m_xml && si->m_doIpClustering ) {
if ( si->m_format == FORMAT_HTML && si->m_doIpClustering ) {
sb.safePrintf (" - [ <a href=\"/search?"
"q=%%2Bip%%3A%s+%s&sc=0&c=%s\">"
"More from this ip</a> ]",
@ -2861,7 +2891,7 @@ static int printResult ( SafeBuf &sb,
if ( indent ) sb.safePrintf ( "</blockquote><br>\n");
else sb.safePrintf ( "<br><br>\n");
}
else if ( ! si->m_xml && si->m_doSiteClustering ) {
else if ( si->m_format == FORMAT_HTML && si->m_doSiteClustering ) {
char hbuf [ MAX_URL_LEN ];
long hlen = uu.getHostLen();
memcpy ( hbuf , uu.getHost() , hlen );
@ -3006,7 +3036,7 @@ bool printPairScore ( SafeBuf &sb , SearchInput *si , PairScore *ps ,
bes = "+ <b>1.0</b>";
}
if ( si->m_xml ) {
if ( si->m_format == FORMAT_XML ) {
sb.safePrintf("\t\t<pairInfo>\n");
@ -3698,7 +3728,7 @@ bool printSingleScore ( SafeBuf &sb ,
long long tf = msg40->m_msg3a.m_termFreqs[qtn];
float tfw = ss->m_tfWeight;
if ( si->m_xml ) {
if ( si->m_format == FORMAT_XML ) {
sb.safePrintf("\t\t<termInfo>\n");
/*

View File

@ -14965,13 +14965,13 @@ void Parms::init ( ) {
m++;
m->m_title = "format of the returned search results";
m->m_desc = "X is 0 to get back results in regular html, and 8 to "
"get back results in XML.";
m->m_desc = "X is 0 to get back results in regular html, 1 to "
"get back results in XML, 2 for JSON.";
m->m_def = "0";
m->m_soff = (char *)&si.m_xml - y;
m->m_type = TYPE_LONG;
m->m_soff = (char *)&si.m_format - y;
m->m_type = TYPE_CHAR;
m->m_sparm = 1;
m->m_scgi = "xml";
m->m_scgi = "format";
m->m_smin = 0;
m->m_smax = 12;
m++;

View File

@ -342,7 +342,7 @@ m if (! cr->hasSearchPermission ( sock, encapIp ) ) {
// we need to get some cgi values in order to correct the defaults
// based on if we're doing an xml feed, have a site: query, etc.
long xml = r->getLong ( "xml" , 0 ); // was "raw"
//long xml = r->getLong ( "xml" , 0 ); // was "raw"
long siteLen = 0; r->getString ("site",&siteLen);
long sitesLen = 0;
char *sites = r->getString ("sites",&sitesLen,NULL);
@ -353,8 +353,23 @@ m if (! cr->hasSearchPermission ( sock, encapIp ) ) {
! m_whiteListBuf.nullTerm() ) )
return log("query: unable to strcpy whitelist");
// what format should search results be in? default is html
long format = r->getLong("format", FORMAT_HTML );
// support old api &xml=1 to mean &format=1
if ( r->getLong("xml",0) ) {
format = FORMAT_XML;
}
// also support &json=1
if ( r->getLong("json",0) ) {
format = FORMAT_JSON;
}
// now override automatic defaults for special cases
if ( xml > 0 ) {
if ( format != FORMAT_HTML ) {
m_familyFilter = 0;
// this is causing me a headache when on when i dont know it
m_restrictIndexdbForQuery = false;
@ -654,10 +669,12 @@ m if (! cr->hasSearchPermission ( sock, encapIp ) ) {
// use "&dg=1" to debug gigabits
m_debugGigabits = r->getLong("dg",0);
// override
m_format = format;
// . omit scoring info from the xml feed for now
// . we have to roll this out to gk144 net i think
if ( xml > 0 )
if ( m_format != FORMAT_HTML )
m_getDocIdScoringInfo = 0;
// turn off by default!
@ -665,7 +682,6 @@ m if (! cr->hasSearchPermission ( sock, encapIp ) ) {
m_numTopicGroups = 0;
}
//////////////////////////////////////
//
// transform input into classes

View File

@ -43,6 +43,10 @@ class TopicGroup {
long m_topicMaxPunctLen;
};
#define FORMAT_HTML 0
#define FORMAT_XML 1
#define FORMAT_JSON 2
class SearchInput {
public:
@ -211,7 +215,9 @@ class SearchInput {
// tier sizes can change with different "raw" values, therefore,
// so can search results
long m_xml; // msg40
//long m_xml; // msg40
// can be 0 for FORMAT_HTML, 1 = FORMAT_XML, 2=FORMAT_JSON
char m_format;
// this should be part of the key because it will affect the results!
char m_queryExpansion;