mirror of
https://github.com/gigablast/open-source-search-engine.git
synced 2024-10-04 04:07:13 +03:00
get summary "ns" parm and collectionrec
knobs for summary gen working.
This commit is contained in:
parent
af014abdcd
commit
b0caf3eb00
@ -615,7 +615,6 @@ class CollectionRec {
|
||||
long m_summaryMaxLen;
|
||||
long m_summaryMaxNumLines;
|
||||
long m_summaryMaxNumCharsPerLine;
|
||||
long m_summaryDefaultNumLines;
|
||||
char m_useNewSummaries;
|
||||
|
||||
char m_getDocIdScoringInfo;
|
||||
|
6
Msg20.h
6
Msg20.h
@ -418,7 +418,8 @@ public:
|
||||
char *ptr_tbuf ; // title buffer
|
||||
char *ptr_ubuf ; // url buffer
|
||||
char *ptr_rubuf ; // redirect url buffer
|
||||
char *ptr_sum ; // summary
|
||||
char *ptr_displaySum ; // summary for displaying
|
||||
char *ptr_dedupSum ; // summary for deduping
|
||||
char *ptr_dbuf ; // display metas \0 separated
|
||||
//char *ptr_sbuf ; // big sample buf for gigabits
|
||||
char *ptr_gigabitSample ;
|
||||
@ -514,7 +515,8 @@ public:
|
||||
long size_tbuf ;
|
||||
long size_ubuf ;
|
||||
long size_rubuf ;
|
||||
long size_sum ;
|
||||
long size_displaySum ;
|
||||
long size_dedupSum ;
|
||||
long size_dbuf ;
|
||||
//long size_sbuf ;
|
||||
long size_gigabitSample ; // includes \0
|
||||
|
@ -1493,7 +1493,9 @@ bool Msg40::launchMsg20s ( bool recalled ) {
|
||||
req.m_titleMaxLen = 256;
|
||||
req.m_titleMaxLen = cr->m_titleMaxLen;
|
||||
req.m_summaryMaxLen = cr->m_summaryMaxLen;
|
||||
req.m_numSummaryLines = cr->m_summaryMaxNumLines;
|
||||
//req.m_numSummaryLines = cr->m_summaryMaxNumLines;
|
||||
// let "ns" parm override
|
||||
req.m_numSummaryLines = m_si->m_numLinesInSummary;
|
||||
if(m_si->m_isAdmin && m_si->m_format == FORMAT_HTML )
|
||||
req.m_getGigabitVector = true;
|
||||
else req.m_getGigabitVector = false;
|
||||
|
@ -3399,13 +3399,12 @@ bool printResult ( State0 *st, long ix , long *numPrintedSoFar ) {
|
||||
// . "s" is a string of null terminated strings
|
||||
char *send;
|
||||
// do the normal summary
|
||||
str = mr->ptr_sum;
|
||||
str = mr->ptr_displaySum;
|
||||
// sometimes the summary is longer than requested because for
|
||||
// summary deduping purposes (see "pss" parm in Parms.cpp) we do not
|
||||
// get it as short as request. so use mr->m_sumPrintSize here
|
||||
// not mr->size_sum
|
||||
strLen = mr->size_sum-1;
|
||||
//strLen = mr->m_sumPrintSize-1;
|
||||
strLen = mr->size_displaySum-1;
|
||||
|
||||
// this includes the terminating \0 or \0\0 so back up
|
||||
if ( strLen < 0 ) strLen = 0;
|
||||
|
11
Parms.cpp
11
Parms.cpp
@ -7749,12 +7749,11 @@ void Parms::init ( ) {
|
||||
m->m_obj = OBJ_COLL;
|
||||
m++;
|
||||
|
||||
m->m_title = "default number of summary excerpts";
|
||||
m->m_desc = "What is the default number of "
|
||||
"summary excerpts displayed per search result?";
|
||||
m->m_title = "number of summary excerpts";
|
||||
m->m_desc = "How many summary excerpts to display per search result?";
|
||||
m->m_cgi = "ns";
|
||||
m->m_type = TYPE_LONG;
|
||||
m->m_defOff= (char *)&cr.m_summaryDefaultNumLines - x;
|
||||
m->m_defOff= (char *)&cr.m_summaryMaxNumLines - x;
|
||||
m->m_group = 0;
|
||||
m->m_off = (char *)&si.m_numLinesInSummary - y;
|
||||
m->m_flags = PF_API;
|
||||
@ -7763,7 +7762,6 @@ void Parms::init ( ) {
|
||||
m++;
|
||||
|
||||
|
||||
|
||||
m->m_title = "max summary line width";
|
||||
m->m_desc = "<br> tags are inserted to keep the number "
|
||||
"of chars in the summary per line at or below this width. "
|
||||
@ -14211,6 +14209,7 @@ void Parms::init ( ) {
|
||||
m->m_obj = OBJ_COLL;
|
||||
m++;
|
||||
|
||||
/*
|
||||
m->m_title = "default number of summary excerpts by default";
|
||||
m->m_desc = "What is the default number of "
|
||||
"summary excerpts displayed per search result?";
|
||||
@ -14223,7 +14222,7 @@ void Parms::init ( ) {
|
||||
m->m_page = PAGE_SEARCH;
|
||||
m->m_obj = OBJ_COLL;
|
||||
m++;
|
||||
|
||||
*/
|
||||
|
||||
m->m_title = "max summary line width by default";
|
||||
m->m_desc = "<br> tags are inserted to keep the number "
|
||||
|
35
Summary.cpp
35
Summary.cpp
@ -58,6 +58,7 @@ bool Summary::set2 ( Xml *xml ,
|
||||
bool doStemming ,
|
||||
long maxSummaryLen ,
|
||||
long maxNumLines ,
|
||||
long numDisplayLines ,
|
||||
long maxNumCharsPerLine ,
|
||||
//long bigSampleRadius ,
|
||||
//long bigSampleMaxLen ,
|
||||
@ -81,6 +82,9 @@ bool Summary::set2 ( Xml *xml ,
|
||||
// to see if it has all the query terms...
|
||||
//if ( maxNumLines <= 0 ) return true;
|
||||
|
||||
m_numDisplayLines = numDisplayLines;
|
||||
m_displayLen = 0;
|
||||
|
||||
//m_useDateLists = useDateLists;
|
||||
//m_exclDateList = exclDateList;
|
||||
//m_begPubDateList = begPubDateList;
|
||||
@ -232,7 +236,12 @@ bool Summary::set2 ( Xml *xml ,
|
||||
// highest scoring window around each term. And then find the highest
|
||||
// of those over all the matching terms.
|
||||
//
|
||||
for ( long numFinal = 0; numFinal < maxNumLines; numFinal++ ){
|
||||
long numFinal;
|
||||
for ( numFinal = 0; numFinal < maxNumLines; numFinal++ ){
|
||||
|
||||
if ( numFinal == m_numDisplayLines )
|
||||
m_displayLen = p - m_summary;
|
||||
|
||||
// reset these at the top of each loop
|
||||
Match *maxm;
|
||||
long long maxScore = 0;
|
||||
@ -508,6 +517,9 @@ bool Summary::set2 ( Xml *xml ,
|
||||
bb[j] |= D_USED;
|
||||
}
|
||||
|
||||
if ( numFinal <= m_numDisplayLines )
|
||||
m_displayLen = p - m_summary;
|
||||
|
||||
/*end = gettimeofdayInMilliseconds();
|
||||
if ( end - start > 10 )
|
||||
log ( LOG_WARN,"summary: took %llims to finish doing summary "
|
||||
@ -530,18 +542,25 @@ bool Summary::set2 ( Xml *xml ,
|
||||
m_summaryExcerptLen[0] = p - m_summary;
|
||||
m_numExcerpts = 1;
|
||||
}
|
||||
// in this case we only have one summary line
|
||||
if ( m_numDisplayLines > 0 )
|
||||
m_displayLen = p - m_summary;
|
||||
}
|
||||
|
||||
|
||||
// If we still didn't find a summary, get the default summary
|
||||
if ( p == m_summary )
|
||||
if ( p == m_summary ) {
|
||||
// then return the default summary
|
||||
return getDefaultSummary ( xml,
|
||||
words,
|
||||
sections,
|
||||
pos,
|
||||
//bigSampleRadius,
|
||||
maxSummaryLen );
|
||||
bool status = getDefaultSummary ( xml,
|
||||
words,
|
||||
sections,
|
||||
pos,
|
||||
//bigSampleRadius,
|
||||
maxSummaryLen );
|
||||
if ( m_numDisplayLines > 0 )
|
||||
m_displayLen = m_summaryLen;
|
||||
return status;
|
||||
}
|
||||
|
||||
// if we don't find a summary, theres no need to NULL terminate
|
||||
if ( p != m_summary ) *p++ = '\0';
|
||||
|
@ -78,6 +78,7 @@ class Summary {
|
||||
//long collLen ,
|
||||
bool doStemming ,
|
||||
long maxSummaryLen ,
|
||||
long numDisplayLines ,
|
||||
long maxNumLines ,
|
||||
long maxNumCharsPerLine ,
|
||||
//long bigSampleRadius ,
|
||||
@ -237,6 +238,12 @@ class Summary {
|
||||
//bool m_freeBuf;
|
||||
//char m_localBuf[10032];
|
||||
|
||||
// if getting more lines for deduping than we need for displaying,
|
||||
// how big is that part of the summary to display?
|
||||
long m_numDisplayLines;
|
||||
long m_displayLen;
|
||||
long getSummaryDisplayLen() { return m_displayLen; }
|
||||
|
||||
long m_maxNumCharsPerLine;
|
||||
|
||||
long m_titleVersion;
|
||||
|
23
XmlDoc.cpp
23
XmlDoc.cpp
@ -28402,9 +28402,11 @@ Msg20Reply *XmlDoc::getMsg20Reply ( ) {
|
||||
*/
|
||||
|
||||
// does they want a summary?
|
||||
if ( m_req->m_numSummaryLines>0 && ! reply->ptr_sum ) {
|
||||
if ( m_req->m_numSummaryLines>0 && ! reply->ptr_displaySum ) {
|
||||
char *sum = getHighlightedSummary();
|
||||
if ( ! sum || sum == (void *)-1 ) return (Msg20Reply *)sum;
|
||||
Summary *s = getSummary();
|
||||
if ( ! s || s == (void *)-1 ) return (Msg20Reply *)s;
|
||||
//long sumLen = m_finalSummaryBuf.length();
|
||||
// is it size and not length?
|
||||
long sumLen = 0;
|
||||
@ -28422,8 +28424,14 @@ Msg20Reply *XmlDoc::getMsg20Reply ( ) {
|
||||
//long max = m_req->m_numSummaryLines;
|
||||
// grab stuff from it!
|
||||
//reply->m_proximityScore = s->getProximityScore();
|
||||
reply-> ptr_sum = sum;//s->getSummary();
|
||||
reply->size_sum = sumSize;//s->getSummaryLen(max)+1;
|
||||
reply-> ptr_displaySum = sum;//s->getSummary();
|
||||
reply->size_displaySum = sumSize;//s->getSummaryLen(max)+1;
|
||||
// this is unhighlighted for deduping, and it might be longer
|
||||
// . seems like we are not using this for deduping but using
|
||||
// the gigabit vector in Msg40.cpp, so take out for now
|
||||
//reply-> ptr_dedupSum = s->m_summary;
|
||||
//reply->size_dedupSum = s->m_summaryLen+1;
|
||||
//if ( s->m_summaryLen == 0 ) reply->size_dedupSum = 0;
|
||||
//reply->m_diversity = s->getDiversity();
|
||||
}
|
||||
|
||||
@ -29591,6 +29599,10 @@ Summary *XmlDoc::getSummary () {
|
||||
false , // doStemming
|
||||
m_req->m_summaryMaxLen ,
|
||||
numLines ,
|
||||
// . displayLines, # lines we are displaying
|
||||
// . Summary::getDisplayLen() will return the
|
||||
// length of the summary to display
|
||||
m_req->m_numSummaryLines ,
|
||||
cr->m_summaryMaxNumCharsPerLine,
|
||||
m_req->m_ratInSummary ,
|
||||
getFirstUrl() ,
|
||||
@ -29623,7 +29635,10 @@ char *XmlDoc::getHighlightedSummary ( ) {
|
||||
|
||||
// get the summary
|
||||
char *sum = s->getSummary();
|
||||
long sumLen = s->getSummaryLen();
|
||||
//long sumLen = s->getSummaryLen();
|
||||
long sumLen = s->getSummaryDisplayLen();
|
||||
|
||||
//sum[sumLen] = 0;
|
||||
|
||||
// assume no highlighting?
|
||||
if ( ! m_req->m_highlightQueryTerms || sumLen == 0 ) {
|
||||
|
Loading…
Reference in New Issue
Block a user