added gbfieldmatch: operator for exactly matching

full field names. case sensitive. uses gbfacetstr:
values that were hashed at index time. example:
gbfieldmatch:object.field:"Some Value"
See help.html for more examples.
This commit is contained in:
mwells 2014-08-25 13:57:55 -07:00
parent 2800ce0e04
commit 6607cc2cbe
5 changed files with 137 additions and 26 deletions

View File

@ -4324,6 +4324,8 @@ bool PosdbTable::setQueryTermInfo ( ) {
qti->m_bigramFlags[nn]|=BF_NUMBER;
if (qt->m_fieldCode == FIELD_GBNUMBEREQUALINT )
qti->m_bigramFlags[nn]|=BF_NUMBER;
if (qt->m_fieldCode == FIELD_GBFIELDMATCH )
qti->m_bigramFlags[nn]|=BF_NUMBER;
if (qt->m_fieldCode == FIELD_GBFACETSTR )
@ -4706,6 +4708,11 @@ inline bool isInRange( char *p , QueryTerm *qt ) {
return ( score2 == qt->m_qword->m_int );
}
if ( qt->m_fieldCode == FIELD_GBFIELDMATCH ) {
long score2 = g_posdb.getInt ( p );
return ( score2 == qt->m_qword->m_int );
}
// how did this happen?
char *xx=NULL;*xx=0;
return true;
@ -4753,6 +4760,8 @@ void PosdbTable::addDocIdVotes ( QueryTermInfo *qti , long listGroupNum ) {
isRangeTerm = true;
if ( qt->m_fieldCode == FIELD_GBNUMBEREQUALINT )
isRangeTerm = true;
if ( qt->m_fieldCode == FIELD_GBFIELDMATCH )
isRangeTerm = true;
// . just scan each sublist vs. the docid list
// . a sublist is a termlist for a particular query term, for instance
@ -7429,6 +7438,8 @@ bool PosdbTable::makeDocIdVoteBufForBoolQuery_r ( ) {
isRangeTerm = true;
if ( qt->m_fieldCode == FIELD_GBNUMBEREQUALINT )
isRangeTerm = true;
if ( qt->m_fieldCode == FIELD_GBFIELDMATCH )
isRangeTerm = true;
// . make it consistent with Query::isTruth()
// . m_bitNum is set above to the QueryTermInfo #

View File

@ -2297,6 +2297,13 @@ bool Query::setQWords ( char boolFlag ,
ph = hash64 ("gbsortbyint", 11);
// really just like the gbfacetstr operator but we do not
// display the facets, instead we try to match the provided
// facet value exactly, case sensitvely
if ( fieldCode == FIELD_GBFIELDMATCH )
ph = hash64 ("gbfacetstr", 10);
if ( fieldCode == FIELD_GBFACETFLOAT )
ph = hash64 ("gbsortby",8);
if ( fieldCode == FIELD_GBFACETINT )
@ -2339,6 +2346,7 @@ bool Query::setQWords ( char boolFlag ,
fieldCode == FIELD_GBFACETSTR ||
fieldCode == FIELD_GBFACETINT ||
fieldCode == FIELD_GBFACETFLOAT ||
fieldCode == FIELD_GBFIELDMATCH ||
fieldCode == FIELD_GBAD ) {
// . find 1st space -- that terminates the field value
@ -2395,7 +2403,8 @@ bool Query::setQWords ( char boolFlag ,
fieldCode == FIELD_GBNUMBEREQUALFLOAT ||
fieldCode == FIELD_GBNUMBEREQUALINT ||
fieldCode == FIELD_GBNUMBERMININT ||
fieldCode == FIELD_GBNUMBERMAXINT ) ) {
fieldCode == FIELD_GBNUMBERMAXINT ||
fieldCode == FIELD_GBFIELDMATCH ) ) {
// record the field
wid = hash64Lower_utf8(w,lastColonLen , 0LL );
// fix gbminint:gbfacetstr:gbxpath...:165004297
@ -2417,27 +2426,61 @@ bool Query::setQWords ( char boolFlag ,
qw->m_float = atof ( w + lastColonLen + 1 );
qw->m_int = (long)atoll( w + lastColonLen+1);
// if it is like
// gbequalint:tag.uri:"http://xyz.com/poo"
// gbfieldhash:tag.uri:"http://xyz.com/poo"
// then we should hash the string into
// an int just like how the field value would
// be hashed when adding gbequalint: terms
// be hashed when adding gbfacetstr: terms
// in XmlDoc.cpp:hashFacet2(). the hash of
// the tag.uri field, for example, is set
// in hashFacet1() and set to "val32". so
// hash it just like that does here.
if ( colonCount >= 1 &&
fieldCode == FIELD_GBNUMBEREQUALINT &&
firstColonLen > 0 &&
wlen>3 && // something must be in the ""'s
w[wlen-1] == '\"' &&
w[firstColonLen] == ':' &&
w[firstColonLen+1] == '\"' ) {
fieldCode == FIELD_GBFIELDMATCH &&
firstColonLen > 0 ) {
char *a = w + firstColonLen + 1;
// . skip over colon at start
if ( a[0] == ':' ) a++;
// . skip over quotes at start/end
char *a = w + firstColonLen + 2;
char *b = w + wlen - 1;
bool inQuotes = false;
if ( a[0] == '\"' ) {
inQuotes = true;
a++;
}
// end of field
char *b = a;
// if not in quotes advance until
// we hit whitespace
char cs;
for ( ; ! inQuotes && *b ; b += cs ) {
cs = getUtf8CharSize(b);
if ( is_wspace_utf8(b) ) break;
}
// if in quotes, go until we hit quote
for ( ; inQuotes && *b != '\"';b++);
// now hash the value
qw->m_int = hash32 ( a , b - a );
qw->m_float = (float)qw->m_int;
//
// hash it like
// gbfacetstr:object.price
// even though its
// gbfieldhash:object.title:"some foo"
//
/*
long long wid1;
long long wid2;
a = w;
b = w + firstColonLen;
wid1 = hash64Lower_utf8(a,b-a);
a = w + firstColonLen+1;
b = w + lastColonLen;
wid2 = hash64Lower_utf8(a,b-a);
// keep prefix as 2nd arg to this
wid = hash64 ( wid2 , wid1 );
// we need this for it to work
ph = 0LL;
*/
}
}
@ -3753,8 +3796,15 @@ struct QueryField g_fields[] = {
"Similar to gbminint and gbmaxint but is an equality constraint.",
0},
{"gbfieldmatch",
FIELD_GBFIELDMATCH,
true,
"gbfieldhash:strings.vendor:\"My Vendor Inc.\"",
"Matches all the meta tag or JSON or XML fields that have "
"the name \"strings.vendor\" and contain the exactly provided "
"value, in this case, <i>My Vendor Inc.</i>. This is case "
"sensitive, so it's exact match.",
0 },
{"gbdocspiderdate",

View File

@ -127,6 +127,7 @@ typedef unsigned long long qvec_t;
#define FIELD_GBNUMBEREQUALINT 66
#define FIELD_GBNUMBEREQUALFLOAT 67
#define FIELD_SUBURL2 68
#define FIELD_GBFIELDMATCH 69
#define FIELD_GBOTHER 92

View File

@ -32381,10 +32381,12 @@ bool XmlDoc::hashFacet2 ( char *prefix,
// s_facetPrefixHash = hash64n ( "gbfacet" );
long long prefixHash = hash64n ( prefix );
// term is like something like "object.price" or whatever.
// it is the json field itself, or the meta tag name, etc.
long long termId64 = hash64n ( term );
// combine with the "gbfacet" prefix. old prefix hash on right.
// like "price" on left and "gbfacetfloat" on left... see Query.cpp
// like "price" on right and "gbfacetfloat" on left... see Query.cpp.
long long ph2 = hash64 ( termId64, prefixHash );
// . now store it
@ -32458,13 +32460,16 @@ bool XmlDoc::hashFacet2 ( char *prefix,
// make a special hashinfo for this facet
HashInfo hi;
hi.m_tt = tt;
hi.m_prefix = prefix;//"gbfacet";
// the full prefix
char fullPrefix[64];
snprintf(fullPrefix,64,"%s:%s",prefix,term);
hi.m_prefix = fullPrefix;//"gbfacet";
// add to wts for PageParser.cpp display
// store it
if ( ! storeTerm ( buf,
bufLen,
prefixHash, // s_facetPrefixHash,
ph2, // prefixHash, // s_facetPrefixHash,
&hi,
0, // word#, i,
0, // wordPos

64
qa.cpp
View File

@ -1518,16 +1518,6 @@ bool qajson ( ) {
}
// test gbequalint:field:"quoted value" query to ensure it converts
// the quoted value into the right int32
if ( ! s_flags[11] ) {
s_flags[11] = true;
if ( ! getUrl ( "/search?c=qatest123&qa=1&format=json&"
"q=gbequalint%3AAstrings.key%3A\"samsung\"",
-1310551262 ) )
return false;
}
// other query tests...
if ( ! s_flags[12] ) {
s_flags[12] = true;
@ -1546,6 +1536,60 @@ bool qajson ( ) {
}
// test gbfieldmatch:field:"quoted value" query to ensure it converts
// the quoted value into the right int32
if ( ! s_flags[14] ) {
s_flags[14] = true;
if ( ! getUrl ( "/search?c=qatest123&qa=1&format=json&"
"q=gbfieldmatch%3Astrings.key"
"%3Ainvestigate-tweet",
-1310551262 ) )
return false;
}
if ( ! s_flags[15] ) {
s_flags[15] = true;
if ( ! getUrl ( "/search?c=qatest123&qa=1&format=json&"
"q=gbfieldmatch%3Astrings.key"
"%3A\"Maemo+Browser\"",
-1310551262 ) )
return false;
}
if ( ! s_flags[16] ) {
s_flags[16] = true;
if ( ! getUrl ( "/search?c=qatest123&qa=1&format=json&"
"q=gbfieldmatch%3Astrings.key"
"%3A\"Google+Wireless+Transcoder\"",
-1310551262 ) )
return false;
}
// this should have no results, not capitalized
if ( ! s_flags[17] ) {
s_flags[17] = true;
if ( ! getUrl ( "/search?c=qatest123&qa=1&format=json&"
"q=gbfieldmatch%3Astrings.key%3A\"samsung\"",
-1310551262 ) )
return false;
}
if ( ! s_flags[18] ) {
s_flags[18] = true;
if ( ! getUrl ( "/search?c=qatest123&qa=1&format=json&"
"q=gbfieldmatch%3Astrings.key%3ASamsung",
-1310551262 ) )
return false;
}
if ( ! s_flags[18] ) {
s_flags[18] = true;
if ( ! getUrl ( "/search?c=qatest123&qa=1&format=json&"
"q=gbfieldmatch%3Astrings.key%3A\"Samsung\"",
-1310551262 ) )
return false;
}
//static bool s_fee2 = false;