Set value length to 0 for something that does not return a string value

in Json.cpp.
Fix the '-' -> '_' when indexing generic fields.
Add a StackBuf macro which is a Safebuf initialized with a small
stack buffer for use in a local scope.
This commit is contained in:
Zak Betz 2015-06-30 14:09:57 -06:00
parent f490847eb2
commit 7b507a70ef
10 changed files with 109 additions and 49 deletions

View File

@ -149,6 +149,7 @@ JsonItem *Json::parseJsonStringIntoJsonItems ( char *json , int32_t niceness ) {
// set the name
ji->m_name = NAME;
ji->m_nameLen = NAMELEN;
ji->m_valueLen = 0;
// this goes on the stack
if ( m_stackPtr >= MAXJSONPARENTS ) return NULL;
m_stack[m_stackPtr++] = ji;
@ -463,7 +464,7 @@ bool JsonItem::isInArray ( ) {
return false;
}
// convert nubers and bools to strings for this one
// convert numbers and bools to strings for this one
char *JsonItem::getValueAsString ( int32_t *valueLen ) {
// strings are the same

2
Json.h
View File

@ -61,7 +61,7 @@ class JsonItem {
return (char *)this + sizeof(JsonItem);
};
// convert nubers and bools to strings for this one
// convert numbers and bools to strings for this one
char *getValueAsString ( int32_t *valueLen ) ;
// like acme.product.offerPrice if "acme:{product:{offerprice:1.23}}"

View File

@ -455,6 +455,7 @@ void handleRequest20 ( UdpSlot *slot , int32_t netnice ) {
bool gotReplyWrapperxd ( void *state ) {
// grab it
XmlDoc *xd = (XmlDoc *)state;
// get it
UdpSlot *slot = (UdpSlot *)xd->m_slot;
// parse the request
@ -587,12 +588,7 @@ bool Msg20Reply::sendReply ( XmlDoc *xd ) {
gettimeofdayInMilliseconds() ,
color );
// FIXME
// int32_t mdLen = 0;
// char* md = xd->getMetadata(&mdLen);
// if(mdLen) {
// log("this has metadata %s", md);
// }
// . del the list at this point, we've copied all the data into reply
// . this will free a non-null State20::m_ps (ParseState) for us
mdelete ( xd , sizeof(XmlDoc) , "xd20" );

View File

@ -481,13 +481,14 @@ public:
char *ptr_rssItem ; // set for m_getLinkText
char *ptr_categories ;
char *ptr_gigabitQuery ; // , separated list of gigabits
int32_t *ptr_gigabitScores ; // 1-1 with the terms in query
int32_t *ptr_gigabitScores ; // 1-1 with the terms in query
char *ptr_content ; // page content in utf8
char *ptr_sectionVotingInfo ; // in JSON
char *ptr_tr ; // like just using msg22
char *ptr_tlistBuf ;
char *ptr_tiBuf ; // terminfobuf
char *ptr_templateVector ;
char *ptr_metadataBuf;
// . for eventIds include the title and text of the event, and the addr
// serialized using Address::serialize(), and all the start dates
@ -579,6 +580,7 @@ public:
int32_t size_tlistBuf ;
int32_t size_tiBuf ;
int32_t size_templateVector ;
int32_t size_metadataBuf ;
//int32_t size_eventSummaryLines ;
//int32_t size_eventAddr ;

View File

@ -1538,6 +1538,7 @@ bool gotResults ( void *state ) {
break;
}
// limit it
count--;
}
@ -1814,7 +1815,8 @@ bool printLeftNavColumn ( SafeBuf &sb, State0 *st ) {
);
Query gigabitQuery;
SafeBuf ttt;
char tmp[1024];
SafeBuf ttt(tmp, 1024);
// limit it to 40 gigabits for now
for ( int32_t i = 0 ; i < numGigabits && i < 40 ; i++ ) {
Gigabit *gi = &gigabits[i];
@ -3282,26 +3284,26 @@ bool printSearchResultsTail ( State0 *st ) {
// . add our cgi to the original url
// . so if it has &qlang=de and they select &qlang=en
// we have to replace it... etc.
SafeBuf newUrl;
StackBuf(newUrl);
// show banned results
replaceParm2 ("sb=1",
&newUrl,
hr->m_origUrlRequest,
hr->m_origUrlRequestLen );
// no deduping by summary or content hash etc.
SafeBuf newUrl2;
StackBuf(newUrl2);
replaceParm2("dr=0",&newUrl2,newUrl.getBufStart(),
newUrl.length());
// and no site clustering
SafeBuf newUrl3;
StackBuf( newUrl3 );
replaceParm2 ( "sc=0", &newUrl3 , newUrl2.getBufStart(),
newUrl2.length());
// start at results #0 again
SafeBuf newUrl4;
StackBuf( newUrl4 );
replaceParm2 ( "s=0", &newUrl4 , newUrl3.getBufStart(),
newUrl3.length());
// show errors
SafeBuf newUrl5;
StackBuf( newUrl5 );
replaceParm2 ( "showerrors=1",
&newUrl5 ,
newUrl4.getBufStart(),
@ -3341,7 +3343,7 @@ bool printSearchResultsTail ( State0 *st ) {
char nsbuf[128];
sprintf(nsbuf,"s=%"INT32"",ss);
// get the original url and add/replace in &s=xxx
SafeBuf newUrl;
StackBuf ( newUrl );
replaceParm ( nsbuf , &newUrl , hr );
@ -3373,7 +3375,7 @@ bool printSearchResultsTail ( State0 *st ) {
char nsbuf[128];
sprintf(nsbuf,"s=%"INT32"",ss);
// get the original url and add/replace in &s=xxx
SafeBuf newUrl;
StackBuf(newUrl);
replaceParm ( nsbuf , &newUrl , hr );
// close it up
@ -3901,7 +3903,6 @@ bool printResult ( State0 *st, int32_t ix , int32_t *numPrintedSoFar ) {
return true;
}
Msg20 *m20 ;
if ( si->m_streamResults )
m20 = msg40->getCompletedSummary(ix);
@ -4541,7 +4542,8 @@ bool printResult ( State0 *st, int32_t ix , int32_t *numPrintedSoFar ) {
int32_t cols = 80;
cols = si->m_summaryMaxWidth;
SafeBuf hb;
char tmp3[1024];
SafeBuf hb(tmp3, 1024);
if ( str && strLen && si->m_doQueryHighlighting ) {
hlen = hi.set ( &hb,
//tt ,
@ -5436,7 +5438,8 @@ bool printResult ( State0 *st, int32_t ix , int32_t *numPrintedSoFar ) {
qq.safeStrcpy(st->m_qe);
qq.nullTerm();
// get the original url and add/replace in query
SafeBuf newUrl;
char tmp2[512];
SafeBuf newUrl(tmp2, 512);
replaceParm ( qq.getBufStart() , &newUrl , hr );
// put show more results from this site link
sb->safePrintf (" - <nobr><a href=\"%s\">"
@ -5611,6 +5614,47 @@ bool printResult ( State0 *st, int32_t ix , int32_t *numPrintedSoFar ) {
help.safePrintf("<br><br>"
*/
if ( mr->size_metadataBuf && si->m_format == FORMAT_JSON) {
sb->safePrintf("\t\t\"metadata\":");
sb->safeMemcpy(mr->ptr_metadataBuf, mr->size_metadataBuf);
sb->pushChar(',');
}
if ( mr->size_metadataBuf && si->m_format == FORMAT_HTML) {
Json md;
JsonItem *ji = md.parseJsonStringIntoJsonItems(mr->ptr_metadataBuf,
0);
char tmpBuf1[1024];
char tmpBuf2[1024];
SafeBuf nameBuf(tmpBuf1, 1024);
for ( ; ji ; ji = ji->m_next ) {
if(ji->isInArray()) continue;
ji->getCompoundName ( nameBuf ) ;
if(nameBuf.length() == 0) {
continue;
}
//nameBuf.replaceChar('-', '_');
nameBuf.nullTerm();
int32_t valLen;
char* valBuf = ji->getValueAsString(&valLen);
SafeBuf queryBuf(tmpBuf2, 1024);
// log("compound name is %s %d %d",nameBuf.getBufStart(),
// nameBuf.length(), valLen);
queryBuf.safePrintf("/search?q=%s:%%22",nameBuf.getBufStart());
queryBuf.urlEncode(valBuf, valLen);
queryBuf.safePrintf("%%22&c=%s",coll);
queryBuf.nullTerm();
sb->safePrintf(" - <a href=\"%s\">%s:\"", queryBuf.getBufStart(),
nameBuf.getBufStart());
sb->safeMemcpy(valBuf, valLen);
sb->safeStrcpy("\"</a>");
}
}
// end serp div
if ( si->m_format == FORMAT_WIDGET_IFRAME ||
@ -5663,7 +5707,8 @@ bool printResult ( State0 *st, int32_t ix , int32_t *numPrintedSoFar ) {
// print breakout tables here for distance matrix
//SafeBuf bt;
// final score calc
SafeBuf ft;
char tmp[1024];
SafeBuf ft(tmp, 1024);;
// int16_tcut
//Query *q = si->m_q;
@ -5758,7 +5803,6 @@ bool printResult ( State0 *st, int32_t ix , int32_t *numPrintedSoFar ) {
}
// close the distance table
//if ( nr ) sb->safePrintf("</table>");
@ -5828,6 +5872,7 @@ bool printResult ( State0 *st, int32_t ix , int32_t *numPrintedSoFar ) {
if ( ! firstTime ) sb->safePrintf("</table><br>");
}
char *ff = "";
if ( si->m_useMinAlgo ) ff = "MIN ";
@ -9123,6 +9168,7 @@ bool printSearchFiltersBar ( SafeBuf *sb , HttpRequest *hr ) {
};
*/
SafeBuf cu;
hr->getCurrentUrl ( cu );
@ -9586,7 +9632,8 @@ bool printMenu ( SafeBuf *sb , int32_t menuNum , HttpRequest *hr ) {
// . add our cgi to the original url
// . so if it has &qlang=de and they select &qlang=en
// we have to replace it... etc.
SafeBuf newUrl;
char tmp2[512];
SafeBuf newUrl(tmp2, 512);
replaceParm ( mi->m_cgi , &newUrl , hr );
// print each item in there

View File

@ -388,4 +388,11 @@ public:
};
#define TOKENPASTE(x, y) x ## y
#define TOKENPASTE2(x, y) TOKENPASTE(x, y)
#define StackBuf(name) char TOKENPASTE2(tmpsafebuf, __LINE__)[1024]; \
SafeBuf name(TOKENPASTE2(tmpsafebuf, __LINE__), 1024)
#endif

View File

@ -1446,11 +1446,6 @@ bool XmlDoc::set4 ( SpiderRequest *sreq ,
if ( m_sreqValid )
m_recycleContent = m_sreq.m_recycleContent;
if(metadata) {
log("metadata is %s", metadata);
} else {
log("metadata is empty");
}
m_hasMetadata = (bool)metadata;
ptr_metadata = metadata;
size_metadata = metadataLen;
@ -29211,16 +29206,11 @@ bool XmlDoc::hashMetaTags ( HashTableX *tt ) {
if (jpMetadata.parseJsonStringIntoJsonItems (ptr_metadata, m_niceness)){
hashJSONFields2 ( tt , &hi , &jpMetadata , false );
log("we hashed the terms in %s", ptr_metadata);
} else {
log("had error parsing json in %s", ptr_metadata);
log("XmlDoc had error parsing json in metadata %s", ptr_metadata);
}
}
return true;
}
@ -31875,6 +31865,7 @@ Msg20Reply *XmlDoc::getMsg20Reply ( ) {
return reply;
}
// if they provided a query with gbfacet*: terms then we have
// to get those facet values.
if ( ! m_gotFacets ) {
@ -32176,10 +32167,10 @@ Msg20Reply *XmlDoc::getMsg20Reply ( ) {
ptr_utf8Content[size_utf8Content-1] = '\0';
}
*/
// does they want a summary?
// do they want a summary?
if ( m_req->m_numSummaryLines>0 && ! reply->ptr_displaySum ) {
char *hsum = getHighlightedSummary();
if ( ! hsum || hsum == (void *)-1 ) return (Msg20Reply *)hsum;
//Summary *s = getSummary();
//if ( ! s || s == (void *)-1 ) return (Msg20Reply *)s;
@ -32500,6 +32491,8 @@ Msg20Reply *XmlDoc::getMsg20Reply ( ) {
reply->ptr_dmozTitles = ptr_dmozTitles;
reply->ptr_dmozSumms = ptr_dmozSumms;
reply->ptr_dmozAnchors = ptr_dmozAnchors;
reply->ptr_metadataBuf = ptr_metadata;
reply->size_ubuf = getFirstUrl()->getUrlLen() + 1;
reply->size_rubuf = rulen;
@ -32508,6 +32501,8 @@ Msg20Reply *XmlDoc::getMsg20Reply ( ) {
reply->size_dmozTitles = size_dmozTitles;
reply->size_dmozSumms = size_dmozSumms;
reply->size_dmozAnchors = size_dmozAnchors;
reply->size_metadataBuf = size_metadata;
// breathe
QUICKPOLL( m_req->m_niceness );
@ -33519,6 +33514,7 @@ char *XmlDoc::getHighlightedSummary ( ) {
}
Summary *s = getSummary();
if ( ! s || s == (void *)-1 ) return (char *)s;
Query *q = getQuery();
@ -33546,7 +33542,7 @@ char *XmlDoc::getHighlightedSummary ( ) {
//char tt[5000];
Highlight hi;
SafeBuf hb;
StackBuf(hb);
// highlight the query in it
int32_t hlen = hi.set ( &hb,
//tt ,
@ -33563,6 +33559,7 @@ char *XmlDoc::getHighlightedSummary ( ) {
0,
m_niceness );
// highlight::set() returns 0 on error
if ( hlen < 0 ) {
log("build: highlight class error = %s",mstrerror(g_errno));
@ -33600,6 +33597,7 @@ char *XmlDoc::getHighlightedSummary ( ) {
//
SafeBuf *XmlDoc::getSampleForGigabits ( ) {
if ( m_gsbufValid ) return &m_gsbuf;
// assume empty
@ -35232,6 +35230,7 @@ bool XmlDoc::hashWords3 ( //int32_t wordStart ,
// the distance cursor, m_dist
dt->addTerm144 ( &k );
// . make the m_wordPosInfoBuf here because we need to set
// WordPosInfo::m_wordPtr/m_wordLen.
// . could also use instead of the "wts" buffer?
@ -51659,7 +51658,7 @@ char *XmlDoc::hashJSONFields2 ( HashTableX *table ,
// change all :'s in names to .'s since : is reserved!
char *px = nameBuf.getBufStart();
for ( ; *px ; px++ ) if ( *px == ':' ) *px = '.';
for ( px = nameBuf.getBufStart(); *px ; px++ ) if ( *px == '-' ) *px = '_';
//for ( px = nameBuf.getBufStart(); *px ; px++ ) if ( *px == '-' ) *px = '_';
//
// DIFFBOT special field hacks
//
@ -51858,7 +51857,7 @@ bool XmlDoc::storeFacetValues ( char *qs , SafeBuf *sb , FacetValHash_t fvh ) {
// sanity
if ( ! m_contentTypeValid ) { char *xx=NULL;*xx=0; }
storeFacetValuesSite ( qs, sb, fvh );
storeFacetValuesSite ( qs, sb, fvh );
// if "qa" is a gbxpathsitehash123456 type of beastie then we
// gotta scan the sections
@ -51868,7 +51867,7 @@ bool XmlDoc::storeFacetValues ( char *qs , SafeBuf *sb , FacetValHash_t fvh ) {
// if a json doc, get json field
// spider status docs are really json now
if ( m_contentType == CT_JSON || m_contentType == CT_STATUS )
return storeFacetValuesJSON ( qs , sb , fvh );
return storeFacetValuesJSON ( qs , sb , fvh, getParsedJson());
if ( m_contentType == CT_HTML )
return storeFacetValuesHtml ( qs , sb , fvh );
@ -51876,6 +51875,13 @@ bool XmlDoc::storeFacetValues ( char *qs , SafeBuf *sb , FacetValHash_t fvh ) {
if ( m_contentType == CT_XML )
return storeFacetValuesXml ( qs , sb , fvh );
if ( m_hasMetadata) {
Json jpMetadata;
if (jpMetadata.parseJsonStringIntoJsonItems (ptr_metadata, m_niceness)) {
storeFacetValuesJSON ( qs, sb, fvh, &jpMetadata );
}
}
return true;
}
@ -52126,11 +52132,11 @@ bool XmlDoc::storeFacetValuesXml(char *qs, SafeBuf *sb, FacetValHash_t fvh ) {
return true;
}
bool XmlDoc::storeFacetValuesJSON (char *qs, SafeBuf *sb,FacetValHash_t fvh ) {
bool XmlDoc::storeFacetValuesJSON (char *qs,
SafeBuf *sb,
FacetValHash_t fvh,
Json *jp ) {
// use new json parser
Json *jp = getParsedJson();
JsonItem *ji = jp->getFirstItem();
char nb[1024];

View File

@ -959,7 +959,8 @@ class XmlDoc {
bool storeFacetValuesXml ( char *qs , class SafeBuf *sb ,
FacetValHash_t fvh ) ;
bool storeFacetValuesJSON ( char *qs , class SafeBuf *sb ,
FacetValHash_t fvh ) ;
FacetValHash_t fvh,
Json* jp ) ;
// print out for PageTitledb.cpp and PageParser.cpp
bool printDoc ( class SafeBuf *pbuf );

6
qa.cpp
View File

@ -1634,7 +1634,7 @@ bool qaInjectMetadata ( ) {
if ( s_flags[ADD_INITIAL_URLS] == 0) {
char* metadata = "{\"testtest\":42,\"a-hyphenated-name\":5, "
"\"a-string-value\":\"can we search for this\", "
"\"a field with spaces\":6}";
"\"a field with spaces\":6, \"compound\":{\"field\":7}}";
s_flags[ADD_INITIAL_URLS]++;
SafeBuf sb;
@ -1666,7 +1666,7 @@ bool qaInjectMetadata ( ) {
if ( s_flags[EXAMINE_RESULTS2] == 0) {
s_flags[EXAMINE_RESULTS2]++;
log("searching for metadata");
if ( ! getUrl ( "/search?c=qatest123&q=a_hyphenated_name%3A5"
if ( ! getUrl ( "/search?c=qatest123&q=a-hyphenated-name%3A5"
"&n=1000&sb=1&dr=0&sc=0&s=0&showerrors=1&format=json",
1,// Checksum
NULL,
@ -1678,7 +1678,7 @@ bool qaInjectMetadata ( ) {
if ( s_flags[EXAMINE_RESULTS3] == 0) {
s_flags[EXAMINE_RESULTS3]++;
log("searching for metadata");
if ( ! getUrl ( "/search?c=qatest123&q=a_string_value%3A\"can+we+search+for+this\""
if ( ! getUrl ( "/search?c=qatest123&q=a-string-value%3A\"can+we+search+for+this\""
"&n=1000&sb=1&dr=0&sc=0&s=0&showerrors=1&format=json",
1,// Checksum
NULL,

View File

@ -26,7 +26,7 @@ def injectItem(item):
'c':'ait'}
print "sending", postVars,' to gb'
rp = requests.post("http://localhost:8000/admin/inject", postVars)
print rp.content
print postVars['url'], rp.status_code: