mirror of
https://github.com/gigablast/open-source-search-engine.git
synced 2024-10-04 20:27:43 +03:00
fix indexArc()
This commit is contained in:
parent
5c89bde956
commit
ecb6d081d5
@ -535,8 +535,9 @@ bool Msg7::inject2 ( void *state ,
|
||||
|
||||
GigablastRequest *gr = &m_gr;
|
||||
|
||||
char *coll2 = gr->m_coll;
|
||||
CollectionRec *cr = g_collectiondb.getRec ( coll2 );
|
||||
// char *coll2 = gr->m_coll;
|
||||
// CollectionRec *cr = g_collectiondb.getRec ( coll2 );
|
||||
CollectionRec *cr = g_collectiondb.getRec ( gr->m_collnum );
|
||||
|
||||
if ( ! cr ) {
|
||||
g_errno = ENOCOLLREC;
|
||||
|
@ -2135,7 +2135,8 @@ bool XmlDoc::injectDoc ( char *url ,
|
||||
m_hopCountValid = true;
|
||||
}
|
||||
|
||||
if ( charset != -1 && charset != csUnknown ) {
|
||||
// PageInject calls memset on gigablastrequest so add '!= 0' here
|
||||
if ( charset != -1 && charset != csUnknown && charset != 0 ) {
|
||||
m_charset = charset;
|
||||
m_charsetValid = true;
|
||||
}
|
||||
@ -3264,6 +3265,7 @@ bool XmlDoc::indexArc ( ) {
|
||||
|
||||
// assume "start" has the http mime
|
||||
gr->m_hasMime = true;
|
||||
gr->m_url = arcUrl;
|
||||
|
||||
// arcConType needs to indexable
|
||||
int32_t ct = getContentTypeFromStr ( arcConType );
|
||||
@ -17880,6 +17882,9 @@ char **XmlDoc::getFilteredContent ( ) {
|
||||
// we now support JSON for diffbot
|
||||
if ( *ct == CT_JSON ) return &m_filteredContent;
|
||||
|
||||
if ( *ct == CT_ARC ) return &m_filteredContent;
|
||||
if ( *ct == CT_WARC ) return &m_filteredContent;
|
||||
|
||||
// unknown content types are 0 since it is probably binary... and
|
||||
// we do not want to parse it!!
|
||||
if ( *ct == CT_PDF ) filterable = true;
|
||||
|
Loading…
Reference in New Issue
Block a user