get summary "ns" parm and collectionrec

knobs for summary gen working.
This commit is contained in:
mwells 2014-07-03 07:29:44 -07:00
parent af014abdcd
commit b0caf3eb00
8 changed files with 67 additions and 25 deletions

View File

@ -615,7 +615,6 @@ class CollectionRec {
long m_summaryMaxLen;
long m_summaryMaxNumLines;
long m_summaryMaxNumCharsPerLine;
long m_summaryDefaultNumLines;
char m_useNewSummaries;
char m_getDocIdScoringInfo;

View File

@ -418,7 +418,8 @@ public:
char *ptr_tbuf ; // title buffer
char *ptr_ubuf ; // url buffer
char *ptr_rubuf ; // redirect url buffer
char *ptr_sum ; // summary
char *ptr_displaySum ; // summary for displaying
char *ptr_dedupSum ; // summary for deduping
char *ptr_dbuf ; // display metas \0 separated
//char *ptr_sbuf ; // big sample buf for gigabits
char *ptr_gigabitSample ;
@ -514,7 +515,8 @@ public:
long size_tbuf ;
long size_ubuf ;
long size_rubuf ;
long size_sum ;
long size_displaySum ;
long size_dedupSum ;
long size_dbuf ;
//long size_sbuf ;
long size_gigabitSample ; // includes \0

View File

@ -1493,7 +1493,9 @@ bool Msg40::launchMsg20s ( bool recalled ) {
req.m_titleMaxLen = 256;
req.m_titleMaxLen = cr->m_titleMaxLen;
req.m_summaryMaxLen = cr->m_summaryMaxLen;
req.m_numSummaryLines = cr->m_summaryMaxNumLines;
//req.m_numSummaryLines = cr->m_summaryMaxNumLines;
// let "ns" parm override
req.m_numSummaryLines = m_si->m_numLinesInSummary;
if(m_si->m_isAdmin && m_si->m_format == FORMAT_HTML )
req.m_getGigabitVector = true;
else req.m_getGigabitVector = false;

View File

@ -3399,13 +3399,12 @@ bool printResult ( State0 *st, long ix , long *numPrintedSoFar ) {
// . "s" is a string of null terminated strings
char *send;
// do the normal summary
str = mr->ptr_sum;
str = mr->ptr_displaySum;
// sometimes the summary is longer than requested because for
// summary deduping purposes (see "pss" parm in Parms.cpp) we do not
// get it as short as request. so use mr->m_sumPrintSize here
// not mr->size_sum
strLen = mr->size_sum-1;
//strLen = mr->m_sumPrintSize-1;
strLen = mr->size_displaySum-1;
// this includes the terminating \0 or \0\0 so back up
if ( strLen < 0 ) strLen = 0;

View File

@ -7749,12 +7749,11 @@ void Parms::init ( ) {
m->m_obj = OBJ_COLL;
m++;
m->m_title = "default number of summary excerpts";
m->m_desc = "What is the default number of "
"summary excerpts displayed per search result?";
m->m_title = "number of summary excerpts";
m->m_desc = "How many summary excerpts to display per search result?";
m->m_cgi = "ns";
m->m_type = TYPE_LONG;
m->m_defOff= (char *)&cr.m_summaryDefaultNumLines - x;
m->m_defOff= (char *)&cr.m_summaryMaxNumLines - x;
m->m_group = 0;
m->m_off = (char *)&si.m_numLinesInSummary - y;
m->m_flags = PF_API;
@ -7763,7 +7762,6 @@ void Parms::init ( ) {
m++;
m->m_title = "max summary line width";
m->m_desc = "&lt;br&gt; tags are inserted to keep the number "
"of chars in the summary per line at or below this width. "
@ -14211,6 +14209,7 @@ void Parms::init ( ) {
m->m_obj = OBJ_COLL;
m++;
/*
m->m_title = "default number of summary excerpts by default";
m->m_desc = "What is the default number of "
"summary excerpts displayed per search result?";
@ -14223,7 +14222,7 @@ void Parms::init ( ) {
m->m_page = PAGE_SEARCH;
m->m_obj = OBJ_COLL;
m++;
*/
m->m_title = "max summary line width by default";
m->m_desc = "&lt;br&gt; tags are inserted to keep the number "

View File

@ -58,6 +58,7 @@ bool Summary::set2 ( Xml *xml ,
bool doStemming ,
long maxSummaryLen ,
long maxNumLines ,
long numDisplayLines ,
long maxNumCharsPerLine ,
//long bigSampleRadius ,
//long bigSampleMaxLen ,
@ -81,6 +82,9 @@ bool Summary::set2 ( Xml *xml ,
// to see if it has all the query terms...
//if ( maxNumLines <= 0 ) return true;
m_numDisplayLines = numDisplayLines;
m_displayLen = 0;
//m_useDateLists = useDateLists;
//m_exclDateList = exclDateList;
//m_begPubDateList = begPubDateList;
@ -232,7 +236,12 @@ bool Summary::set2 ( Xml *xml ,
// highest scoring window around each term. And then find the highest
// of those over all the matching terms.
//
for ( long numFinal = 0; numFinal < maxNumLines; numFinal++ ){
long numFinal;
for ( numFinal = 0; numFinal < maxNumLines; numFinal++ ){
if ( numFinal == m_numDisplayLines )
m_displayLen = p - m_summary;
// reset these at the top of each loop
Match *maxm;
long long maxScore = 0;
@ -508,6 +517,9 @@ bool Summary::set2 ( Xml *xml ,
bb[j] |= D_USED;
}
if ( numFinal <= m_numDisplayLines )
m_displayLen = p - m_summary;
/*end = gettimeofdayInMilliseconds();
if ( end - start > 10 )
log ( LOG_WARN,"summary: took %llims to finish doing summary "
@ -530,18 +542,25 @@ bool Summary::set2 ( Xml *xml ,
m_summaryExcerptLen[0] = p - m_summary;
m_numExcerpts = 1;
}
// in this case we only have one summary line
if ( m_numDisplayLines > 0 )
m_displayLen = p - m_summary;
}
// If we still didn't find a summary, get the default summary
if ( p == m_summary )
if ( p == m_summary ) {
// then return the default summary
return getDefaultSummary ( xml,
words,
sections,
pos,
//bigSampleRadius,
maxSummaryLen );
bool status = getDefaultSummary ( xml,
words,
sections,
pos,
//bigSampleRadius,
maxSummaryLen );
if ( m_numDisplayLines > 0 )
m_displayLen = m_summaryLen;
return status;
}
// if we don't find a summary, theres no need to NULL terminate
if ( p != m_summary ) *p++ = '\0';

View File

@ -78,6 +78,7 @@ class Summary {
//long collLen ,
bool doStemming ,
long maxSummaryLen ,
long numDisplayLines ,
long maxNumLines ,
long maxNumCharsPerLine ,
//long bigSampleRadius ,
@ -237,6 +238,12 @@ class Summary {
//bool m_freeBuf;
//char m_localBuf[10032];
// if getting more lines for deduping than we need for displaying,
// how big is that part of the summary to display?
long m_numDisplayLines;
long m_displayLen;
long getSummaryDisplayLen() { return m_displayLen; }
long m_maxNumCharsPerLine;
long m_titleVersion;

View File

@ -28402,9 +28402,11 @@ Msg20Reply *XmlDoc::getMsg20Reply ( ) {
*/
// does they want a summary?
if ( m_req->m_numSummaryLines>0 && ! reply->ptr_sum ) {
if ( m_req->m_numSummaryLines>0 && ! reply->ptr_displaySum ) {
char *sum = getHighlightedSummary();
if ( ! sum || sum == (void *)-1 ) return (Msg20Reply *)sum;
Summary *s = getSummary();
if ( ! s || s == (void *)-1 ) return (Msg20Reply *)s;
//long sumLen = m_finalSummaryBuf.length();
// is it size and not length?
long sumLen = 0;
@ -28422,8 +28424,14 @@ Msg20Reply *XmlDoc::getMsg20Reply ( ) {
//long max = m_req->m_numSummaryLines;
// grab stuff from it!
//reply->m_proximityScore = s->getProximityScore();
reply-> ptr_sum = sum;//s->getSummary();
reply->size_sum = sumSize;//s->getSummaryLen(max)+1;
reply-> ptr_displaySum = sum;//s->getSummary();
reply->size_displaySum = sumSize;//s->getSummaryLen(max)+1;
// this is unhighlighted for deduping, and it might be longer
// . seems like we are not using this for deduping but using
// the gigabit vector in Msg40.cpp, so take out for now
//reply-> ptr_dedupSum = s->m_summary;
//reply->size_dedupSum = s->m_summaryLen+1;
//if ( s->m_summaryLen == 0 ) reply->size_dedupSum = 0;
//reply->m_diversity = s->getDiversity();
}
@ -29591,6 +29599,10 @@ Summary *XmlDoc::getSummary () {
false , // doStemming
m_req->m_summaryMaxLen ,
numLines ,
// . displayLines, # lines we are displaying
// . Summary::getDisplayLen() will return the
// length of the summary to display
m_req->m_numSummaryLines ,
cr->m_summaryMaxNumCharsPerLine,
m_req->m_ratInSummary ,
getFirstUrl() ,
@ -29623,7 +29635,10 @@ char *XmlDoc::getHighlightedSummary ( ) {
// get the summary
char *sum = s->getSummary();
long sumLen = s->getSummaryLen();
//long sumLen = s->getSummaryLen();
long sumLen = s->getSummaryDisplayLen();
//sum[sumLen] = 0;
// assume no highlighting?
if ( ! m_req->m_highlightQueryTerms || sumLen == 0 ) {