mirror of
https://github.com/gigablast/open-source-search-engine.git
synced 2024-10-04 12:17:35 +03:00
added gbfieldmatch: operator for exactly matching
full field names. case sensitive. uses gbfacetstr: values that were hashed at index time. example: gbfieldmatch:object.field:"Some Value" See help.html for more examples.
This commit is contained in:
parent
2800ce0e04
commit
6607cc2cbe
11
Posdb.cpp
11
Posdb.cpp
@ -4324,6 +4324,8 @@ bool PosdbTable::setQueryTermInfo ( ) {
|
||||
qti->m_bigramFlags[nn]|=BF_NUMBER;
|
||||
if (qt->m_fieldCode == FIELD_GBNUMBEREQUALINT )
|
||||
qti->m_bigramFlags[nn]|=BF_NUMBER;
|
||||
if (qt->m_fieldCode == FIELD_GBFIELDMATCH )
|
||||
qti->m_bigramFlags[nn]|=BF_NUMBER;
|
||||
|
||||
|
||||
if (qt->m_fieldCode == FIELD_GBFACETSTR )
|
||||
@ -4706,6 +4708,11 @@ inline bool isInRange( char *p , QueryTerm *qt ) {
|
||||
return ( score2 == qt->m_qword->m_int );
|
||||
}
|
||||
|
||||
if ( qt->m_fieldCode == FIELD_GBFIELDMATCH ) {
|
||||
long score2 = g_posdb.getInt ( p );
|
||||
return ( score2 == qt->m_qword->m_int );
|
||||
}
|
||||
|
||||
// how did this happen?
|
||||
char *xx=NULL;*xx=0;
|
||||
return true;
|
||||
@ -4753,6 +4760,8 @@ void PosdbTable::addDocIdVotes ( QueryTermInfo *qti , long listGroupNum ) {
|
||||
isRangeTerm = true;
|
||||
if ( qt->m_fieldCode == FIELD_GBNUMBEREQUALINT )
|
||||
isRangeTerm = true;
|
||||
if ( qt->m_fieldCode == FIELD_GBFIELDMATCH )
|
||||
isRangeTerm = true;
|
||||
|
||||
// . just scan each sublist vs. the docid list
|
||||
// . a sublist is a termlist for a particular query term, for instance
|
||||
@ -7429,6 +7438,8 @@ bool PosdbTable::makeDocIdVoteBufForBoolQuery_r ( ) {
|
||||
isRangeTerm = true;
|
||||
if ( qt->m_fieldCode == FIELD_GBNUMBEREQUALINT )
|
||||
isRangeTerm = true;
|
||||
if ( qt->m_fieldCode == FIELD_GBFIELDMATCH )
|
||||
isRangeTerm = true;
|
||||
|
||||
// . make it consistent with Query::isTruth()
|
||||
// . m_bitNum is set above to the QueryTermInfo #
|
||||
|
76
Query.cpp
76
Query.cpp
@ -2297,6 +2297,13 @@ bool Query::setQWords ( char boolFlag ,
|
||||
ph = hash64 ("gbsortbyint", 11);
|
||||
|
||||
|
||||
// really just like the gbfacetstr operator but we do not
|
||||
// display the facets, instead we try to match the provided
|
||||
// facet value exactly, case sensitvely
|
||||
if ( fieldCode == FIELD_GBFIELDMATCH )
|
||||
ph = hash64 ("gbfacetstr", 10);
|
||||
|
||||
|
||||
if ( fieldCode == FIELD_GBFACETFLOAT )
|
||||
ph = hash64 ("gbsortby",8);
|
||||
if ( fieldCode == FIELD_GBFACETINT )
|
||||
@ -2339,6 +2346,7 @@ bool Query::setQWords ( char boolFlag ,
|
||||
fieldCode == FIELD_GBFACETSTR ||
|
||||
fieldCode == FIELD_GBFACETINT ||
|
||||
fieldCode == FIELD_GBFACETFLOAT ||
|
||||
fieldCode == FIELD_GBFIELDMATCH ||
|
||||
|
||||
fieldCode == FIELD_GBAD ) {
|
||||
// . find 1st space -- that terminates the field value
|
||||
@ -2395,7 +2403,8 @@ bool Query::setQWords ( char boolFlag ,
|
||||
fieldCode == FIELD_GBNUMBEREQUALFLOAT ||
|
||||
fieldCode == FIELD_GBNUMBEREQUALINT ||
|
||||
fieldCode == FIELD_GBNUMBERMININT ||
|
||||
fieldCode == FIELD_GBNUMBERMAXINT ) ) {
|
||||
fieldCode == FIELD_GBNUMBERMAXINT ||
|
||||
fieldCode == FIELD_GBFIELDMATCH ) ) {
|
||||
// record the field
|
||||
wid = hash64Lower_utf8(w,lastColonLen , 0LL );
|
||||
// fix gbminint:gbfacetstr:gbxpath...:165004297
|
||||
@ -2417,27 +2426,61 @@ bool Query::setQWords ( char boolFlag ,
|
||||
qw->m_float = atof ( w + lastColonLen + 1 );
|
||||
qw->m_int = (long)atoll( w + lastColonLen+1);
|
||||
// if it is like
|
||||
// gbequalint:tag.uri:"http://xyz.com/poo"
|
||||
// gbfieldhash:tag.uri:"http://xyz.com/poo"
|
||||
// then we should hash the string into
|
||||
// an int just like how the field value would
|
||||
// be hashed when adding gbequalint: terms
|
||||
// be hashed when adding gbfacetstr: terms
|
||||
// in XmlDoc.cpp:hashFacet2(). the hash of
|
||||
// the tag.uri field, for example, is set
|
||||
// in hashFacet1() and set to "val32". so
|
||||
// hash it just like that does here.
|
||||
if ( colonCount >= 1 &&
|
||||
fieldCode == FIELD_GBNUMBEREQUALINT &&
|
||||
firstColonLen > 0 &&
|
||||
wlen>3 && // something must be in the ""'s
|
||||
w[wlen-1] == '\"' &&
|
||||
w[firstColonLen] == ':' &&
|
||||
w[firstColonLen+1] == '\"' ) {
|
||||
fieldCode == FIELD_GBFIELDMATCH &&
|
||||
firstColonLen > 0 ) {
|
||||
char *a = w + firstColonLen + 1;
|
||||
// . skip over colon at start
|
||||
if ( a[0] == ':' ) a++;
|
||||
// . skip over quotes at start/end
|
||||
char *a = w + firstColonLen + 2;
|
||||
char *b = w + wlen - 1;
|
||||
bool inQuotes = false;
|
||||
if ( a[0] == '\"' ) {
|
||||
inQuotes = true;
|
||||
a++;
|
||||
}
|
||||
// end of field
|
||||
char *b = a;
|
||||
// if not in quotes advance until
|
||||
// we hit whitespace
|
||||
char cs;
|
||||
for ( ; ! inQuotes && *b ; b += cs ) {
|
||||
cs = getUtf8CharSize(b);
|
||||
if ( is_wspace_utf8(b) ) break;
|
||||
}
|
||||
// if in quotes, go until we hit quote
|
||||
for ( ; inQuotes && *b != '\"';b++);
|
||||
|
||||
// now hash the value
|
||||
qw->m_int = hash32 ( a , b - a );
|
||||
qw->m_float = (float)qw->m_int;
|
||||
//
|
||||
// hash it like
|
||||
// gbfacetstr:object.price
|
||||
// even though its
|
||||
// gbfieldhash:object.title:"some foo"
|
||||
//
|
||||
/*
|
||||
long long wid1;
|
||||
long long wid2;
|
||||
a = w;
|
||||
b = w + firstColonLen;
|
||||
wid1 = hash64Lower_utf8(a,b-a);
|
||||
a = w + firstColonLen+1;
|
||||
b = w + lastColonLen;
|
||||
wid2 = hash64Lower_utf8(a,b-a);
|
||||
// keep prefix as 2nd arg to this
|
||||
wid = hash64 ( wid2 , wid1 );
|
||||
// we need this for it to work
|
||||
ph = 0LL;
|
||||
*/
|
||||
}
|
||||
}
|
||||
|
||||
@ -3753,8 +3796,15 @@ struct QueryField g_fields[] = {
|
||||
"Similar to gbminint and gbmaxint but is an equality constraint.",
|
||||
0},
|
||||
|
||||
|
||||
|
||||
{"gbfieldmatch",
|
||||
FIELD_GBFIELDMATCH,
|
||||
true,
|
||||
"gbfieldhash:strings.vendor:\"My Vendor Inc.\"",
|
||||
"Matches all the meta tag or JSON or XML fields that have "
|
||||
"the name \"strings.vendor\" and contain the exactly provided "
|
||||
"value, in this case, <i>My Vendor Inc.</i>. This is case "
|
||||
"sensitive, so it's exact match.",
|
||||
0 },
|
||||
|
||||
|
||||
{"gbdocspiderdate",
|
||||
|
1
Query.h
1
Query.h
@ -127,6 +127,7 @@ typedef unsigned long long qvec_t;
|
||||
#define FIELD_GBNUMBEREQUALINT 66
|
||||
#define FIELD_GBNUMBEREQUALFLOAT 67
|
||||
#define FIELD_SUBURL2 68
|
||||
#define FIELD_GBFIELDMATCH 69
|
||||
|
||||
#define FIELD_GBOTHER 92
|
||||
|
||||
|
11
XmlDoc.cpp
11
XmlDoc.cpp
@ -32381,10 +32381,12 @@ bool XmlDoc::hashFacet2 ( char *prefix,
|
||||
// s_facetPrefixHash = hash64n ( "gbfacet" );
|
||||
long long prefixHash = hash64n ( prefix );
|
||||
|
||||
// term is like something like "object.price" or whatever.
|
||||
// it is the json field itself, or the meta tag name, etc.
|
||||
long long termId64 = hash64n ( term );
|
||||
|
||||
// combine with the "gbfacet" prefix. old prefix hash on right.
|
||||
// like "price" on left and "gbfacetfloat" on left... see Query.cpp
|
||||
// like "price" on right and "gbfacetfloat" on left... see Query.cpp.
|
||||
long long ph2 = hash64 ( termId64, prefixHash );
|
||||
|
||||
// . now store it
|
||||
@ -32458,13 +32460,16 @@ bool XmlDoc::hashFacet2 ( char *prefix,
|
||||
// make a special hashinfo for this facet
|
||||
HashInfo hi;
|
||||
hi.m_tt = tt;
|
||||
hi.m_prefix = prefix;//"gbfacet";
|
||||
// the full prefix
|
||||
char fullPrefix[64];
|
||||
snprintf(fullPrefix,64,"%s:%s",prefix,term);
|
||||
hi.m_prefix = fullPrefix;//"gbfacet";
|
||||
|
||||
// add to wts for PageParser.cpp display
|
||||
// store it
|
||||
if ( ! storeTerm ( buf,
|
||||
bufLen,
|
||||
prefixHash, // s_facetPrefixHash,
|
||||
ph2, // prefixHash, // s_facetPrefixHash,
|
||||
&hi,
|
||||
0, // word#, i,
|
||||
0, // wordPos
|
||||
|
64
qa.cpp
64
qa.cpp
@ -1518,16 +1518,6 @@ bool qajson ( ) {
|
||||
}
|
||||
|
||||
|
||||
// test gbequalint:field:"quoted value" query to ensure it converts
|
||||
// the quoted value into the right int32
|
||||
if ( ! s_flags[11] ) {
|
||||
s_flags[11] = true;
|
||||
if ( ! getUrl ( "/search?c=qatest123&qa=1&format=json&"
|
||||
"q=gbequalint%3AAstrings.key%3A\"samsung\"",
|
||||
-1310551262 ) )
|
||||
return false;
|
||||
}
|
||||
|
||||
// other query tests...
|
||||
if ( ! s_flags[12] ) {
|
||||
s_flags[12] = true;
|
||||
@ -1546,6 +1536,60 @@ bool qajson ( ) {
|
||||
}
|
||||
|
||||
|
||||
// test gbfieldmatch:field:"quoted value" query to ensure it converts
|
||||
// the quoted value into the right int32
|
||||
if ( ! s_flags[14] ) {
|
||||
s_flags[14] = true;
|
||||
if ( ! getUrl ( "/search?c=qatest123&qa=1&format=json&"
|
||||
"q=gbfieldmatch%3Astrings.key"
|
||||
"%3Ainvestigate-tweet",
|
||||
-1310551262 ) )
|
||||
return false;
|
||||
}
|
||||
|
||||
if ( ! s_flags[15] ) {
|
||||
s_flags[15] = true;
|
||||
if ( ! getUrl ( "/search?c=qatest123&qa=1&format=json&"
|
||||
"q=gbfieldmatch%3Astrings.key"
|
||||
"%3A\"Maemo+Browser\"",
|
||||
-1310551262 ) )
|
||||
return false;
|
||||
}
|
||||
|
||||
if ( ! s_flags[16] ) {
|
||||
s_flags[16] = true;
|
||||
if ( ! getUrl ( "/search?c=qatest123&qa=1&format=json&"
|
||||
"q=gbfieldmatch%3Astrings.key"
|
||||
"%3A\"Google+Wireless+Transcoder\"",
|
||||
-1310551262 ) )
|
||||
return false;
|
||||
}
|
||||
|
||||
// this should have no results, not capitalized
|
||||
if ( ! s_flags[17] ) {
|
||||
s_flags[17] = true;
|
||||
if ( ! getUrl ( "/search?c=qatest123&qa=1&format=json&"
|
||||
"q=gbfieldmatch%3Astrings.key%3A\"samsung\"",
|
||||
-1310551262 ) )
|
||||
return false;
|
||||
}
|
||||
|
||||
if ( ! s_flags[18] ) {
|
||||
s_flags[18] = true;
|
||||
if ( ! getUrl ( "/search?c=qatest123&qa=1&format=json&"
|
||||
"q=gbfieldmatch%3Astrings.key%3ASamsung",
|
||||
-1310551262 ) )
|
||||
return false;
|
||||
}
|
||||
|
||||
if ( ! s_flags[18] ) {
|
||||
s_flags[18] = true;
|
||||
if ( ! getUrl ( "/search?c=qatest123&qa=1&format=json&"
|
||||
"q=gbfieldmatch%3Astrings.key%3A\"Samsung\"",
|
||||
-1310551262 ) )
|
||||
return false;
|
||||
}
|
||||
|
||||
|
||||
|
||||
//static bool s_fee2 = false;
|
||||
|
Loading…
Reference in New Issue
Block a user