mirror of
https://github.com/gigablast/open-source-search-engine.git
synced 2024-10-04 20:27:43 +03:00
fix indexArc()
This commit is contained in:
parent
5c89bde956
commit
ecb6d081d5
@ -535,8 +535,9 @@ bool Msg7::inject2 ( void *state ,
|
|||||||
|
|
||||||
GigablastRequest *gr = &m_gr;
|
GigablastRequest *gr = &m_gr;
|
||||||
|
|
||||||
char *coll2 = gr->m_coll;
|
// char *coll2 = gr->m_coll;
|
||||||
CollectionRec *cr = g_collectiondb.getRec ( coll2 );
|
// CollectionRec *cr = g_collectiondb.getRec ( coll2 );
|
||||||
|
CollectionRec *cr = g_collectiondb.getRec ( gr->m_collnum );
|
||||||
|
|
||||||
if ( ! cr ) {
|
if ( ! cr ) {
|
||||||
g_errno = ENOCOLLREC;
|
g_errno = ENOCOLLREC;
|
||||||
|
@ -2135,7 +2135,8 @@ bool XmlDoc::injectDoc ( char *url ,
|
|||||||
m_hopCountValid = true;
|
m_hopCountValid = true;
|
||||||
}
|
}
|
||||||
|
|
||||||
if ( charset != -1 && charset != csUnknown ) {
|
// PageInject calls memset on gigablastrequest so add '!= 0' here
|
||||||
|
if ( charset != -1 && charset != csUnknown && charset != 0 ) {
|
||||||
m_charset = charset;
|
m_charset = charset;
|
||||||
m_charsetValid = true;
|
m_charsetValid = true;
|
||||||
}
|
}
|
||||||
@ -3264,6 +3265,7 @@ bool XmlDoc::indexArc ( ) {
|
|||||||
|
|
||||||
// assume "start" has the http mime
|
// assume "start" has the http mime
|
||||||
gr->m_hasMime = true;
|
gr->m_hasMime = true;
|
||||||
|
gr->m_url = arcUrl;
|
||||||
|
|
||||||
// arcConType needs to indexable
|
// arcConType needs to indexable
|
||||||
int32_t ct = getContentTypeFromStr ( arcConType );
|
int32_t ct = getContentTypeFromStr ( arcConType );
|
||||||
@ -17880,6 +17882,9 @@ char **XmlDoc::getFilteredContent ( ) {
|
|||||||
// we now support JSON for diffbot
|
// we now support JSON for diffbot
|
||||||
if ( *ct == CT_JSON ) return &m_filteredContent;
|
if ( *ct == CT_JSON ) return &m_filteredContent;
|
||||||
|
|
||||||
|
if ( *ct == CT_ARC ) return &m_filteredContent;
|
||||||
|
if ( *ct == CT_WARC ) return &m_filteredContent;
|
||||||
|
|
||||||
// unknown content types are 0 since it is probably binary... and
|
// unknown content types are 0 since it is probably binary... and
|
||||||
// we do not want to parse it!!
|
// we do not want to parse it!!
|
||||||
if ( *ct == CT_PDF ) filterable = true;
|
if ( *ct == CT_PDF ) filterable = true;
|
||||||
|
Loading…
Reference in New Issue
Block a user