fix indexArc()

This commit is contained in:
Matt 2015-05-01 23:24:40 -07:00
parent 5c89bde956
commit ecb6d081d5
2 changed files with 9 additions and 3 deletions

View File

@ -535,8 +535,9 @@ bool Msg7::inject2 ( void *state ,
GigablastRequest *gr = &m_gr; GigablastRequest *gr = &m_gr;
char *coll2 = gr->m_coll; // char *coll2 = gr->m_coll;
CollectionRec *cr = g_collectiondb.getRec ( coll2 ); // CollectionRec *cr = g_collectiondb.getRec ( coll2 );
CollectionRec *cr = g_collectiondb.getRec ( gr->m_collnum );
if ( ! cr ) { if ( ! cr ) {
g_errno = ENOCOLLREC; g_errno = ENOCOLLREC;

View File

@ -2135,7 +2135,8 @@ bool XmlDoc::injectDoc ( char *url ,
m_hopCountValid = true; m_hopCountValid = true;
} }
if ( charset != -1 && charset != csUnknown ) { // PageInject calls memset on gigablastrequest so add '!= 0' here
if ( charset != -1 && charset != csUnknown && charset != 0 ) {
m_charset = charset; m_charset = charset;
m_charsetValid = true; m_charsetValid = true;
} }
@ -3264,6 +3265,7 @@ bool XmlDoc::indexArc ( ) {
// assume "start" has the http mime // assume "start" has the http mime
gr->m_hasMime = true; gr->m_hasMime = true;
gr->m_url = arcUrl;
// arcConType needs to indexable // arcConType needs to indexable
int32_t ct = getContentTypeFromStr ( arcConType ); int32_t ct = getContentTypeFromStr ( arcConType );
@ -17880,6 +17882,9 @@ char **XmlDoc::getFilteredContent ( ) {
// we now support JSON for diffbot // we now support JSON for diffbot
if ( *ct == CT_JSON ) return &m_filteredContent; if ( *ct == CT_JSON ) return &m_filteredContent;
if ( *ct == CT_ARC ) return &m_filteredContent;
if ( *ct == CT_WARC ) return &m_filteredContent;
// unknown content types are 0 since it is probably binary... and // unknown content types are 0 since it is probably binary... and
// we do not want to parse it!! // we do not want to parse it!!
if ( *ct == CT_PDF ) filterable = true; if ( *ct == CT_PDF ) filterable = true;