open-source-search-engine/Scraper.h

58 lines
906 B
C
Raw Normal View History

2013-08-03 00:12:24 +04:00
// Matt Wells, copyright Dec 2008
#ifndef _SCRAPER_H_
#define _SCRAPER_H_
#include "Url.h" // MAX_COLL_LEN
#include "XmlDoc.h"
#define MAX_SCRAPES_OUT 1
class Scraper {
public:
Scraper ( );
~Scraper ( );
bool init ( );
void wakeUp ( ) ;
void gotPhrase ( ) ;
2014-11-11 01:45:11 +03:00
//bool gotPages ( int32_t i, TcpSocket *s ) ;
//bool addedScrapedSites ( int32_t i ) ;
//bool gotUrlInfo ( int32_t i ) ;
2013-08-03 00:12:24 +04:00
//bool wrapItUp ( );
bool indexedDoc ( );
bool scrapeProCog();
char m_coll[MAX_COLL_LEN+1];
2014-11-11 01:45:11 +03:00
int32_t m_numReceived;
int32_t m_numSent;
2013-08-03 00:12:24 +04:00
2014-11-11 01:45:11 +03:00
int32_t m_qtype;
2013-08-03 00:12:24 +04:00
//Url m_urls[MAX_SCRAPES_OUT];
2014-11-11 01:45:11 +03:00
//int32_t m_numUrls;
2013-08-03 00:12:24 +04:00
//Msg9a m_msg9a[MAX_SCRAPES_OUT];
//char m_buf[50000];
//char *m_bufPtr;
//char *m_bufEnd;
char m_registered;
XmlDoc m_xd;
2014-11-11 01:45:11 +03:00
//int32_t m_numOutlinks;
2013-08-03 00:12:24 +04:00
//Links m_links;
//MsgE m_msge;
//RdbList m_list;
//Msg4 m_msg4;
};
extern class Scraper g_scraper;
#endif