show gigabits in xml/json feeds. update optimizing section

in admin.html by adding a 'disable gigabits' section for
making queries faster.
This commit is contained in:
mwells 2014-08-26 08:46:59 -07:00
parent d0ccbdd455
commit 042ec4b5cd
3 changed files with 284 additions and 162 deletions

View File

@ -380,8 +380,8 @@ bool sendPageResults ( TcpSocket *s , HttpRequest *hr ) {
long dg = hr->getLong("dg",-1);
if ( dg >= 0 ) sb.safePrintf("&dg=%li",dg);
// show gigabits?
long gb = hr->getLong("gigabits",1);
if ( gb >= 1 ) sb.safePrintf("&gigabits=%li",gb);
//long gb = hr->getLong("gigabits",1);
//if ( gb >= 1 ) sb.safePrintf("&gigabits=%li",gb);
// show banned results?
long showBanned = hr->getLong("sb",0);
if ( showBanned ) sb.safePrintf("&sb=1");
@ -743,53 +743,81 @@ static bool printGigabitContainingSentences ( State0 *st,
Msg40 *msg40 ,
Gigabit *gi ,
SearchInput *si ,
Query *gigabitQuery ) {
Query *gigabitQuery ,
long gigabitId ) {
static long s_gigabitCount = 0;
sb->safePrintf("<nobr><b>");
//"<img src=http://search.yippy.com/"
//"images/new/button-closed.gif><b>");
HttpRequest *hr = &st->m_hr;
CollectionRec *cr = si->m_cr;//g_collectiondb.getRec ( collnum );
// make a new query
sb->safePrintf("<a href=\"/search?gigabits=1&c=%s&q=",cr->m_coll);
sb->urlEncode(gi->m_term,gi->m_termLen);
sb->safeMemcpy("+|+",3);
char *q = hr->getString("q",NULL,"");
sb->urlEncode(q);
sb->safePrintf("\">");
sb->safeMemcpy(gi->m_term,gi->m_termLen);
sb->safePrintf("</a></b>");
sb->safePrintf(" <font color=gray size=-1>");
long numOff = sb->m_length;
sb->safePrintf(" ");//,gi->m_numPages);
sb->safePrintf("</font>");
sb->safePrintf("</b>");
if ( si->m_isAdmin && 1 == 2 )
sb->safePrintf("[%.0f]{%li}",
gi->m_gbscore,
gi->m_minPop);
long revert = sb->length();
sb->safePrintf("<font color=blue style=align:right;>"
"<a style=cursor:hand;cursor:pointer; onclick=ccc(%li);>"
, s_gigabitCount
);
long spaceOutOff = sb->length();
sb->safePrintf( "%c%c%c",
0xe2,
0x87,
0x93);
sb->safePrintf(//"[more]"
"</a></font>");
//static long s_gigabitCount = 0;
sb->safePrintf("</nobr>"); // <br>
char format = si->m_format;
HttpRequest *hr = &st->m_hr;
CollectionRec *cr = si->m_cr;//g_collectiondb.getRec(collnum );
long numOff;
long revert;
long spaceOutOff;
if ( format == FORMAT_HTML ) {
sb->safePrintf("<nobr><b>");
//"<img src=http://search.yippy.com/"
//"images/new/button-closed.gif><b>");
// make a new query
sb->safePrintf("<a href=\"/search?c=%s&q=",cr->m_coll);
sb->urlEncode(gi->m_term,gi->m_termLen);
sb->safeMemcpy("+|+",3);
char *q = hr->getString("q",NULL,"");
sb->urlEncode(q);
sb->safePrintf("\">");
sb->safeMemcpy(gi->m_term,gi->m_termLen);
sb->safePrintf("</a></b>");
sb->safePrintf(" <font color=gray size=-1>");
numOff = sb->m_length;
sb->safePrintf(" ");//,gi->m_numPages);
sb->safePrintf("</font>");
sb->safePrintf("</b>");
if ( si->m_isAdmin && 1 == 2 )
sb->safePrintf("[%.0f]{%li}",
gi->m_gbscore,
gi->m_minPop);
revert = sb->length();
sb->safePrintf("<font color=blue style=align:right;>"
"<a style=cursor:hand;cursor:pointer; "
"onclick=ccc(%li);>"
, gigabitId // s_gigabitCount
);
spaceOutOff = sb->length();
sb->safePrintf( "%c%c%c",
0xe2,
0x87,
0x93);
sb->safePrintf(//"[more]"
"</a></font>");
sb->safePrintf("</nobr>"); // <br>
}
if ( format == FORMAT_XML ) {
sb->safePrintf("\t\t<gigabit>\n");
sb->safePrintf("\t\t\t<term><![CDATA[");
sb->cdataEncode(gi->m_term,gi->m_termLen);
sb->safePrintf("]]></term>\n");
sb->safePrintf("\t\t\t<score>%f</score>\n",gi->m_gbscore);
sb->safePrintf("\t\t\t<minPop>%li</minPop>\n",gi->m_minPop);
}
if ( format == FORMAT_JSON ) {
sb->safePrintf("\t\"gigabit\":{\n");
sb->safePrintf("\t\t\"term\":\"");
sb->jsonEncode(gi->m_term,gi->m_termLen);
sb->safePrintf("\",\n");
sb->safePrintf("\t\t\"score\":%f,\n",gi->m_gbscore);
sb->safePrintf("\t\t\"minPop\":%li,\n",gi->m_minPop);
}
// get facts
long numNuggets = 0;
@ -821,27 +849,27 @@ static bool printGigabitContainingSentences ( State0 *st,
again:
// first time, print in the single fact div
if ( first ) {
if ( first && format == FORMAT_HTML ) {
sb->safePrintf("<div "
//"style=\"border:1px lightgray solid;\"
"id=fd%li>",s_gigabitCount);
"id=fd%li>",gigabitId);//s_gigabitCount);
}
if ( second ) {
if ( second && format == FORMAT_HTML ) {
sb->safePrintf("<div style=\"max-height:300px;"
"display:none;"
"overflow-x:hidden;"
"overflow-y:auto;"//scroll;"
//"border:1px lightgray solid; "
"\" "
"id=sd%li>",s_gigabitCount);
"id=sd%li>",gigabitId);//s_gigabitCount);
printedSecond = true;
}
Msg20Reply *reply = fi->m_reply;
// ok, print it out
if ( ! first && ! second ) {
if ( ! first && ! second && format == FORMAT_HTML ) {
//if ( reply->m_docId != lastDocId )
sb->safePrintf("<br><br>\n");
//else {
@ -875,7 +903,8 @@ static bool printGigabitContainingSentences ( State0 *st,
0 , // fieldCode
0 ); // niceness
// now highlight the original query as well but in black bold
h.set ( sb , // print it out here
SafeBuf tmpBuf2;
h.set ( &tmpBuf2 , // print it out here
tmpBuf.getBufStart() , // content
tmpBuf.length() , // len
si->m_queryLangId , // from m_defaultSortLang
@ -889,16 +918,59 @@ static bool printGigabitContainingSentences ( State0 *st,
0 ); // niceness
long dlen; char *dom = getDomFast(reply->ptr_ubuf,&dlen);
// print the sentence
if ( format == FORMAT_HTML )
sb->safeStrcpy(tmpBuf2.getBufStart());
if ( format == FORMAT_XML ) {
sb->safePrintf("\t\t\t<instance>\n"
"\t\t\t\t<sentence><![CDATA[");
sb->cdataEncode(tmpBuf2.getBufStart());
sb->safePrintf("]]></sentence>\n");
sb->safePrintf("\t\t\t\t<url><![CDATA[");
sb->cdataEncode(reply->ptr_ubuf);
sb->safePrintf("]]></url>\n");
sb->safePrintf("\t\t\t\t<domain><![CDATA[");
sb->cdataEncode(dom,dlen);
sb->safePrintf("]]></domain>\n");
sb->safePrintf("\t\t\t</instance>\n");
}
if ( format == FORMAT_JSON ) {
sb->safePrintf("\t\t\"instance\":{\n"
"\t\t\t\"sentence\":\"");
sb->jsonEncode(tmpBuf2.getBufStart());
sb->safePrintf("\",\n");
sb->safePrintf("\t\t\t\"url\":\"");
sb->jsonEncode(reply->ptr_ubuf);
sb->safePrintf("\",\n");
sb->safePrintf("\t\t\t\"domain\":\"");
sb->jsonEncode(dom,dlen);
sb->safePrintf("\"\n");
sb->safePrintf("\t\t},\n");
}
fi->m_printed = 1;
saveOffset = sb->length();
sb->safePrintf(" <a href=/get?c=%s&cnsp=0&"
"strip=0&d=%lli>",cr->m_coll,reply->m_docId);
long dlen; char *dom = getDomFast(reply->ptr_ubuf,&dlen);
sb->safeMemcpy(dom,dlen);
sb->safePrintf("</a>\n");
if ( format == FORMAT_HTML )
sb->safePrintf(" <a href=/get?c=%s&cnsp=0&"
"strip=0&d=%lli>",
cr->m_coll,reply->m_docId);
if ( format == FORMAT_HTML )
sb->safeMemcpy(dom,dlen);
if ( format == FORMAT_HTML )
sb->safePrintf("</a>\n");
//lastDocId = reply->m_docId;
if ( first ) {
if ( first && format == FORMAT_HTML ) {
sb->safePrintf("</div>");
}
@ -914,6 +986,20 @@ static bool printGigabitContainingSentences ( State0 *st,
}
}
if ( format == FORMAT_XML )
sb->safePrintf("\t</gigabit>\n");
if ( format == FORMAT_JSON ) {
// remove last ,\n
sb->m_length -= 2;
// replace with just \n
sb->safePrintf("\n\t},\n");
}
// all done if not html
if ( format != FORMAT_HTML )
return true;
// we counted the first one twice since we had to throw it into
// the hidden div too!
if ( numNuggets > 1 ) numNuggets--;
@ -943,7 +1029,7 @@ static bool printGigabitContainingSentences ( State0 *st,
dst[k] = src[k];
}
s_gigabitCount++;
//s_gigabitCount++;
if ( printedSecond ) {
sb->safePrintf("</div>");
@ -1435,96 +1521,106 @@ bool expandHtml ( SafeBuf& sb,
bool printLeftNavColumn ( SafeBuf &sb, State0 *st ) {
char *title = "Search Results";
sb.safePrintf("<title>Gigablast - %s</title>\n",title);
sb.safePrintf("<style><!--\n");
sb.safePrintf("body {\n");
sb.safePrintf("font-family:Arial, Helvetica, sans-serif;\n");
sb.safePrintf("color: #000000;\n");
sb.safePrintf("font-size: 12px;\n");
sb.safePrintf("margin: 0px 0px;\n");
sb.safePrintf("letter-spacing: 0.04em;\n");
sb.safePrintf("}\n");
sb.safePrintf("a {text-decoration:none;}\n");
//sb.safePrintf("a:link {color:#00c}\n");
//sb.safePrintf("a:visited {color:#551a8b}\n");
//sb.safePrintf("a:active {color:#f00}\n");
sb.safePrintf(".bold {font-weight: bold;}\n");
sb.safePrintf(".bluetable {background:#d1e1ff;margin-bottom:15px;font-size:12px;}\n");
sb.safePrintf(".url {color:#008000;}\n");
sb.safePrintf(".cached, .cached a {font-size: 10px;color: #666666;\n");
sb.safePrintf("}\n");
sb.safePrintf("table {\n");
sb.safePrintf("font-family:Arial, Helvetica, sans-serif;\n");
sb.safePrintf("color: #000000;\n");
sb.safePrintf("font-size: 12px;\n");
sb.safePrintf("}\n");
sb.safePrintf(".directory {font-size: 16px;}\n"
".nav {font-size:20px;align:right;}\n"
);
sb.safePrintf("-->\n");
sb.safePrintf("</style>\n");
sb.safePrintf("\n");
sb.safePrintf("</head>\n");
sb.safePrintf("<script>\n");
sb.safePrintf("<!--\n");
sb.safePrintf("var openmenu=''; var inmenuclick=0;");
sb.safePrintf("function x(){document.f.q.focus();}\n");
sb.safePrintf("// --></script>\n");
sb.safePrintf("<body "
SearchInput *si = &st->m_si;
Msg40 *msg40 = &st->m_msg40;
"onmousedown=\""
char format = si->m_format;
"if (openmenu != '' && inmenuclick==0) {"
"document.getElementById(openmenu)."
"style.display='none'; openmenu='';"
"}"
if ( format == FORMAT_HTML ) {
char *title = "Search Results";
sb.safePrintf("<title>Gigablast - %s</title>\n",title);
sb.safePrintf("<style><!--\n");
sb.safePrintf("body {\n");
sb.safePrintf("font-family:Arial, Helvetica, sans-serif;\n");
sb.safePrintf("color: #000000;\n");
sb.safePrintf("font-size: 12px;\n");
sb.safePrintf("margin: 0px 0px;\n");
sb.safePrintf("letter-spacing: 0.04em;\n");
sb.safePrintf("}\n");
sb.safePrintf("a {text-decoration:none;}\n");
//sb.safePrintf("a:link {color:#00c}\n");
//sb.safePrintf("a:visited {color:#551a8b}\n");
//sb.safePrintf("a:active {color:#f00}\n");
sb.safePrintf(".bold {font-weight: bold;}\n");
sb.safePrintf(".bluetable {background:#d1e1ff;"
"margin-bottom:15px;font-size:12px;}\n");
sb.safePrintf(".url {color:#008000;}\n");
sb.safePrintf(".cached, .cached a {font-size: 10px;"
"color: #666666;\n");
sb.safePrintf("}\n");
sb.safePrintf("table {\n");
sb.safePrintf("font-family:Arial, Helvetica, sans-serif;\n");
sb.safePrintf("color: #000000;\n");
sb.safePrintf("font-size: 12px;\n");
sb.safePrintf("}\n");
sb.safePrintf(".directory {font-size: 16px;}\n"
".nav {font-size:20px;align:right;}\n"
);
sb.safePrintf("-->\n");
sb.safePrintf("</style>\n");
sb.safePrintf("\n");
sb.safePrintf("</head>\n");
sb.safePrintf("<script>\n");
sb.safePrintf("<!--\n");
sb.safePrintf("var openmenu=''; var inmenuclick=0;");
sb.safePrintf("function x(){document.f.q.focus();}\n");
sb.safePrintf("// --></script>\n");
sb.safePrintf("<body "
"inmenuclick=0;"
"\" "
"onmousedown=\""
"onload=\"x()\">\n");
"if (openmenu != '' && inmenuclick==0) {"
"document.getElementById(openmenu)."
"style.display='none'; openmenu='';"
"}"
//
// DIVIDE INTO TWO PANES, LEFT COLUMN and MAIN COLUMN
//
sb.safePrintf("<TABLE border=0 height=100%% cellpadding=0 "
"cellspacing=0>"
"\n<TR>\n");
"inmenuclick=0;"
"\" "
//
// first the nav column
//
sb.safePrintf("<TD bgcolor=#f3c714 " // yellow/gold
"valign=top "
"style=\""
"width:210px;"
"border-right:3px solid blue;"
"\">"
"onload=\"x()\">\n");
"<br>"
//
// DIVIDE INTO TWO PANES, LEFT COLUMN and MAIN COLUMN
//
sb.safePrintf("<TABLE border=0 height=100%% cellpadding=0 "
"cellspacing=0>"
"\n<TR>\n");
"<center>"
"<a href=/>"
"<div style=\""
"background-color:white;"
"padding:10px;"
"border-radius:100px;"
"border-color:blue;"
"border-width:3px;"
"border-style:solid;"
"width:100px;"
"height:100px;"
"\">"
"<br style=line-height:10px;>"
"<img width=54 height=79 alt=HOME src=/rocket.jpg>"
"</div>"
"</a>"
"</center>"
//
// first the nav column
//
sb.safePrintf("<TD bgcolor=#f3c714 " // yellow/gold
"valign=top "
"style=\""
"width:210px;"
"border-right:3px solid blue;"
"\">"
"<br>"
"<br>"
);
"<br>"
"<center>"
"<a href=/>"
"<div style=\""
"background-color:white;"
"padding:10px;"
"border-radius:100px;"
"border-color:blue;"
"border-width:3px;"
"border-style:solid;"
"width:100px;"
"height:100px;"
"\">"
"<br style=line-height:10px;>"
"<img width=54 height=79 "
"alt=HOME src=/rocket.jpg>"
"</div>"
"</a>"
"</center>"
"<br>"
"<br>"
);
}
/*
@ -1562,11 +1658,6 @@ bool printLeftNavColumn ( SafeBuf &sb, State0 *st ) {
*/
SearchInput *si = &st->m_si;
Msg40 *msg40 = &st->m_msg40;
char format = si->m_format;
//
// BEGIN FACET PRINTING
//
@ -1588,7 +1679,7 @@ bool printLeftNavColumn ( SafeBuf &sb, State0 *st ) {
long numGigabits = gbuf->length()/sizeof(Gigabit);
// MDW: support gigabits in xml/json format again
if ( format != FORMAT_HTML ) numGigabits = 0;
//if ( format != FORMAT_HTML ) numGigabits = 0;
// print gigabits
@ -1596,6 +1687,13 @@ bool printLeftNavColumn ( SafeBuf &sb, State0 *st ) {
//long numCols = 5;
//long perRow = numGigabits / numCols;
if ( numGigabits && format == FORMAT_XML )
sb.safePrintf("\t<gigabits>\n");
if ( numGigabits && format == FORMAT_JSON )
sb.safePrintf("\"gigabits\":{\n");
if ( numGigabits && format == FORMAT_HTML )
// gigabit unhide function
sb.safePrintf (
@ -1688,7 +1786,8 @@ bool printLeftNavColumn ( SafeBuf &sb, State0 *st ) {
//printGigabit ( st,sb , msg40 , gi , si );
//sb.safePrintf("<br>");
printGigabitContainingSentences(st,&sb,msg40,gi,si,
&gigabitQuery);
&gigabitQuery,
i);
if ( format == FORMAT_HTML )
sb.safePrintf("<br><br>");
}
@ -1699,6 +1798,16 @@ bool printLeftNavColumn ( SafeBuf &sb, State0 *st ) {
sb.safePrintf("</td></tr></table></div><br>");
if ( numGigabits && format == FORMAT_XML )
sb.safePrintf("\t</gigabits>\n");
if ( numGigabits && format == FORMAT_JSON ) {
// remove ,\n
sb.m_length -=2;
// add back just \n
sb.safePrintf("\n},\n");
}
//
// now print various knobs
//
@ -1787,7 +1896,7 @@ bool printLeftNavColumn ( SafeBuf &sb, State0 *st ) {
//
// print date contraint functions now
// print date constraint functions now
//
if ( format == FORMAT_HTML && 1 == 2)
sb.safePrintf(
@ -1978,7 +2087,9 @@ bool printSearchResultsHeader ( State0 *st ) {
if ( header ) sb->safeStrcpy ( header );
}
// this also prints gigabits and nuggabits
// if we are xml/json we call this below otherwise we lose
// the header of <?xml...> or whatever
if ( ! g_conf.m_isMattWells && si->m_format == FORMAT_HTML ) {
printLeftNavColumn ( *sb,st );
}
@ -2241,6 +2352,12 @@ bool printSearchResultsHeader ( State0 *st ) {
if ( si->m_format != FORMAT_HTML && ! si->m_streamResults )
msg40->printFacetTables ( sb );
// now print gigabits if we are xml/json
if ( si->m_format != FORMAT_HTML ) {
// this will print gigabits
printLeftNavColumn ( *sb,st );
}
// for diffbot collections only...
if ( st->m_header &&
@ -2985,19 +3102,21 @@ bool printSearchResultsTail ( State0 *st ) {
// TODO: print cache line in light gray here
// TODO: "these results were cached X minutes ago"
if ( msg40->getCachedTime() > 0 && si->m_format == FORMAT_HTML ) {
sb->safePrintf("<br><br><font size=1 color=707070><b><center>");
sb->safePrintf("<br><br><font size=1 color=707070>"
"<b><center>");
sb->safePrintf ( " These results were cached " );
// this cached time is this local cpu's time
long diff = getTime() - msg40->getCachedTime();
if ( diff < 60 ) sb->safePrintf ( "%li seconds" , diff );
else if ( diff < 2*60 ) sb->safePrintf ( "1 minute");
else sb->safePrintf ( "%li minutes",diff/60);
if ( diff < 60 ) sb->safePrintf ("%li seconds", diff );
else if ( diff < 2*60 ) sb->safePrintf ("1 minute");
else sb->safePrintf ("%li minutes",diff/60);
sb->safePrintf ( " ago. [<a href=\"/pageCache.html\">"
"<font color=707070>Info</font></a>]");
sb->safePrintf ( "</center></font>");
}
if ( si->m_format == FORMAT_XML ) {
// when streaming results we lookup the facets last

View File

@ -28916,15 +28916,6 @@ Msg20Reply *XmlDoc::getMsg20Reply ( ) {
// if they provided a query with gbfacet*: terms then we have
// to get those facet values.
if ( ! m_gotFacets ) {
// need this for storeFacetValues() if we are json
if ( m_contentType == CT_JSON ) {
Json *jp = getParsedJson();
if ( ! jp || jp == (void *)-1)return (Msg20Reply *)jp;
}
if ( m_contentType == CT_HTML ) {
Xml *xml = getXml();
if ( ! xml || xml==(void *)-1)return (Msg20Reply *)xml;
}
// only do this once
m_gotFacets = true;
// get facet term
@ -28945,6 +28936,17 @@ Msg20Reply *XmlDoc::getMsg20Reply ( ) {
}
// if we had a facet, get the values it has in the doc
if ( qs && *qs ) {
// need this for storeFacetValues() if we are json
if ( m_contentType == CT_JSON ) {
Json *jp = getParsedJson();
if ( ! jp || jp == (void *)-1)
return (Msg20Reply *)jp;
}
if ( m_contentType == CT_HTML ) {
Xml *xml = getXml();
if ( ! xml || xml==(void *)-1)
return (Msg20Reply *)xml;
}
// find end of it
char *e = qs;
for ( ; *e && ! is_wspace_a(*e) ; e++ );

View File

@ -1141,6 +1141,7 @@ cluster records are loaded. You can also turn them off by default (site cluster
specifies <i>&sc=1</i> or <i>&dr=1</i> in their query URL then they will
override that switch.
<li> Disable gigabit generation. If accessing from the API set &amp;dsrt=0 otherwise set <i>results to scan for gigabits generation by dfefault</i> to 0 in the search controls page for your collection.
</ul>
<br>