fixes for query reindex/delete.

This commit is contained in:
Matt Wells 2014-02-25 18:12:45 -08:00
parent 33c8123288
commit 8bb5d106db
3 changed files with 37 additions and 2 deletions

View File

@ -2125,8 +2125,13 @@ bool CollectionRec::rebuildUrlFilters ( ) {
long i = 0;
// 1st one! for query reindex/ query delete
m_regExs[i].set("isdocidbased");
m_spiderIpMaxSpiders [i] = 10;
m_spiderPriorities [i] = 70;
i++;
// 1st default url filter
// 2nd default url filter
m_regExs[i].set("ismedia && !ismanualadd");
m_spiderPriorities [i] = SPIDER_PRIORITY_FILTERED;
i++;

View File

@ -507,6 +507,7 @@ bool Msg1c::reindexQuery ( char *query ,
m_req.m_queryExpansion = true; // so it's like regular rslts
// add language dropdown or take from [query reindex] link
m_req.m_language = langId;
//m_req.m_debug = 1;
// log for now
logf(LOG_DEBUG,"reindex: qlangid=%li q=%s",langId,query);

View File

@ -2154,6 +2154,18 @@ bool XmlDoc::indexDoc ( ) {
"error reply.",
m_docId,mstrerror(g_errno));
// if docid not found when trying to do a query reindex...
// this really shouldn't happen but i think we were adding
// additional SpiderRequests since we were using a fake first ip.
// but i have since fixed that code. so if the titlerec was not
// found when trying to do a force delete... it's not a temporary
// error and should not be retried. if we set indexCode to
// EINTERNALERROR it seems to be retried.
if ( g_errno == ENOTFOUND ) {
m_indexCode = g_errno;
m_indexCodeValid = true;
}
if ( ! m_indexCodeValid ) {
m_indexCode = EINTERNALERROR;//g_errno;
m_indexCodeValid = true;
@ -2219,6 +2231,12 @@ bool XmlDoc::indexDoc ( ) {
m_spideredTime = getTimeGlobal();//0; use now!
}
// don't let it get the diffbot reply either! it should be empty.
if ( ! m_diffbotReplyValid ) {
m_diffbotReplyValid = true;
}
// if error is EFAKEFIRSTIP, do not core
//if ( ! m_isIndexedValid ) {
// m_isIndexed = false;
@ -2313,6 +2331,10 @@ bool XmlDoc::indexDoc2 ( ) {
// is really just adding the spider request and returning
// to the browser without delay.
! m_sreq.m_isInjecting &&
// not for page reindexes either!
! m_sreq.m_isPageReindex &&
// just add url
m_sreq.m_isAddUrl &&
// diffbot requests are ok though!
! strstr(m_sreq.m_url,"-diffbotxyz") ) {
m_indexCodeValid = true;
@ -18551,6 +18573,9 @@ bool XmlDoc::logIt ( ) {
else
sb.safePrintf("urlinjected=0 ");
if ( m_sreqValid && m_sreq.m_isPageReindex )
sb.safePrintf("pagereindex=1 ");
if ( m_spiderLinksValid && m_spiderLinks )
sb.safePrintf("spiderlinks=1 ");
if ( m_spiderLinksValid && ! m_spiderLinks )
@ -22331,7 +22356,11 @@ SpiderReply *XmlDoc::getNewSpiderReply ( ) {
// it came from!! if it has m_sreq.m_isAddUrl and
// m_sreq.m_fakeFirstIp then we actually do add the reply with that
// fake ip so that they will exist in the same shard.
if ( m_sreqValid && ! m_sreq.m_isInjecting )
// BUT if it is docid pased from PageReindex.cpp (a query reindex)
// we set the injection bit and the pagereindex bit, we should let
// thise guys keep the firstip because the docid-based spider request
// is in spiderdb. it needs to match up.
if ( m_sreqValid && (!m_sreq.m_isInjecting||m_sreq.m_isPageReindex) )
firstIp = m_sreq.m_firstIp;
// sanity