2013-09-17 07:42:48 +04:00
|
|
|
|
2013-09-27 08:41:05 +04:00
|
|
|
#ifndef CRAWLBOT_H
|
|
|
|
#define CRAWLBOT_H
|
2013-09-17 07:42:48 +04:00
|
|
|
|
2014-07-13 20:35:44 +04:00
|
|
|
bool printCrawlDetails2(class SafeBuf *sb,class CollectionRec *cx,char format);
|
2014-07-11 19:00:30 +04:00
|
|
|
|
2014-05-28 21:41:32 +04:00
|
|
|
bool printCrawlDetailsInJson ( class SafeBuf *sb , class CollectionRec *cx ) ;
|
|
|
|
|
2014-05-28 07:11:12 +04:00
|
|
|
bool printCrawlDetailsInJson ( class SafeBuf *sb , class CollectionRec *cx, int version ) ;
|
2014-02-10 02:09:48 +04:00
|
|
|
|
2013-09-19 02:07:47 +04:00
|
|
|
// values for the diffbot dropdown
|
2013-09-30 19:18:22 +04:00
|
|
|
/*
|
2013-09-28 23:12:33 +04:00
|
|
|
#define DBA_NONE 0
|
|
|
|
#define DBA_ALL 1
|
|
|
|
#define DBA_ARTICLE_FORCE 2
|
|
|
|
#define DBA_ARTICLE_AUTO 3
|
|
|
|
#define DBA_PRODUCT_FORCE 4
|
|
|
|
#define DBA_PRODUCT_AUTO 5
|
|
|
|
#define DBA_IMAGE_FORCE 6
|
|
|
|
#define DBA_IMAGE_AUTO 7
|
|
|
|
#define DBA_FRONTPAGE_FORCE 8
|
|
|
|
#define DBA_FRONTPAGE_AUTO 9
|
2013-09-19 02:07:47 +04:00
|
|
|
|
|
|
|
// add new fields to END of list since i think we store the
|
|
|
|
// field we use as a number in the coll.conf, starting at 0
|
|
|
|
extern char *g_diffbotFields [];
|
2013-09-30 19:18:22 +04:00
|
|
|
*/
|
2013-09-19 02:07:47 +04:00
|
|
|
|
2013-10-02 01:14:39 +04:00
|
|
|
bool sendPageCrawlbot ( class TcpSocket *s , class HttpRequest *hr );
|
2013-09-26 01:37:20 +04:00
|
|
|
|
2013-09-17 21:25:54 +04:00
|
|
|
//bool handleDiffbotRequest ( TcpSocket *s , HttpRequest *hr ) ;
|
2013-10-02 01:14:39 +04:00
|
|
|
bool sendBackDump ( class TcpSocket *s, class HttpRequest *hr );
|
2013-09-17 07:42:48 +04:00
|
|
|
|
2013-10-02 01:14:39 +04:00
|
|
|
bool getSpiderRequestMetaList ( char *doc,
|
|
|
|
class SafeBuf *listBuf,
|
2013-10-22 04:35:14 +04:00
|
|
|
bool spiderLinks,
|
|
|
|
class CollectionRec *cr);
|
2013-09-26 02:04:16 +04:00
|
|
|
|
2013-09-17 07:42:48 +04:00
|
|
|
#endif
|