open-source-search-engine/PageInject.h

162 lines
3.3 KiB
C
Raw Normal View History

#ifndef GBINJECT_H
#define GBINJECT_H
2013-08-03 00:12:24 +04:00
// for getting list of injections currently being processed on this host
// for printing in the Spider Queue table in Spider.cpp
class XmlDoc *getInjectHead ( ) ;
2015-05-04 03:17:17 +03:00
void handleRequest7Import ( class UdpSlot *slot , int32_t netnice ) ;
2014-11-11 01:45:11 +03:00
void handleRequest7 ( class UdpSlot *slot , int32_t netnice ) ;
2014-06-12 04:24:33 +04:00
bool sendPageInject ( class TcpSocket *s, class HttpRequest *hr );
bool resumeImports ( ) ;
// called by Process.cpp
void saveImportStates ( ) ;
2013-08-03 00:12:24 +04:00
#include "XmlDoc.h"
#include "Users.h"
#include "Parms.h"
2013-08-03 00:12:24 +04:00
2015-05-04 03:17:17 +03:00
void setInjectionRequestFromParms ( class TcpSocket *sock ,
class HttpRequest *hr ,
class CollectionRec *cr ,
class InjectionRequest *ir ) ;
2015-05-04 03:17:17 +03:00
class InjectionRequest {
public:
int32_t m_injectDocIp;
char m_injectLinks;
char m_spiderLinks;
char m_shortReply;
char m_newOnly;
char m_deleteUrl;
char m_recycle;
char m_dedup;
char m_hasMime;
char m_doConsistencyTesting;
char m_getSections;
char m_gotSections;
int32_t m_charset;
int32_t m_hopCount;
collnum_t m_collnum; // more reliable than m_coll
uint32_t m_firstIndexed;
uint32_t m_lastSpidered;
char *ptr_url;
char *ptr_queryToScrape;
char *ptr_contentDelim;
char *ptr_contentFile;
2015-05-04 03:17:17 +03:00
char *ptr_contentTypeStr;
char *ptr_content;
char *ptr_diffbotReply; // secret thing from dan
char *ptr_metadata;
2015-05-04 03:17:17 +03:00
int32_t size_url;
int32_t size_queryToScrape;
int32_t size_contentDelim;
int32_t size_contentFile;
2015-05-04 03:17:17 +03:00
int32_t size_contentTypeStr;
int32_t size_content;
int32_t size_diffbotReply; // secret thing from dan
int32_t size_metadata;
2015-05-04 03:17:17 +03:00
// serialized space for the ptr_* strings above
char m_buf[0];
};
2013-08-03 00:12:24 +04:00
class Msg7 {
public:
2015-05-04 03:17:17 +03:00
//GigablastRequest m_gr;
2015-05-04 03:51:47 +03:00
InjectionRequest m_injectionRequest;
2015-05-04 03:17:17 +03:00
int64_t m_startTime;
int32_t m_replyIndexCode;
int64_t m_replyDocId;
//SafeBuf m_injectUrlBuf;
//bool m_firstTime;
//char *m_start;
//bool m_fixMe;
//char m_saved;
//int32_t m_injectCount;
//bool m_isDoneInjecting;
2014-06-12 04:24:33 +04:00
2015-05-04 03:51:47 +03:00
char *m_sir;
int32_t m_sirSize;
2013-08-03 00:12:24 +04:00
bool m_needsSet;
2015-05-04 03:17:17 +03:00
XmlDoc *m_xd;
2013-08-03 00:12:24 +04:00
TcpSocket *m_socket;
SafeBuf m_sb;
char m_round;
char m_useAhrefs;
HashTableX m_linkDedupTable;
2014-06-12 04:24:33 +04:00
2015-05-04 03:51:47 +03:00
// referenced by InjectionRequest::ptr_content
SafeBuf m_contentBuf;
SafeBuf m_sbuf; // for holding entire titlerec for importing
2014-06-12 04:24:33 +04:00
void *m_state;
void (* m_callback )(void *state);
2014-10-30 22:36:39 +03:00
//int64_t m_hackFileOff;
2014-11-11 01:45:11 +03:00
//int32_t m_hackFileId;
2014-11-11 01:45:11 +03:00
//int32_t m_crawlbotAPI;
2013-08-03 00:12:24 +04:00
class ImportState *m_importState;
//void constructor();
2013-08-03 00:12:24 +04:00
Msg7 ();
~Msg7 ();
2015-05-04 03:51:47 +03:00
bool m_inUse;
int32_t m_format;
HttpRequest m_hr;
2015-05-04 03:51:47 +03:00
class XmlDoc *m_stashxd;
void reset();
2013-08-03 00:12:24 +04:00
bool scrapeQuery ( );
void gotUdpReply ( class UdpSlot *slot ) ;
bool sendInjectionRequestToHost ( InjectionRequest *ir ,
void *state ,
void (* callback)(void *) );
2015-05-04 03:17:17 +03:00
// msg7request m_req7 must be valid
//bool inject ( char *coll,
// char *proxiedUrl,
// int32_t proxiedUrlLen,
// char *content,
// void *state ,
// void (*callback)(void *state) );
2015-05-04 03:17:17 +03:00
// msg7request m_req7 must be valid
// bool inject2 ( void *state , */
// void (*callback)(void *state) ); */
2013-08-03 00:12:24 +04:00
//bool injectTitleRec ( void *state ,
// void (*callback)(void *state) ,
// class CollectionRec *cr );
2015-05-04 03:17:17 +03:00
//void gotMsg7Reply ();
2013-08-03 00:12:24 +04:00
};
extern bool g_inPageInject;
#endif