open-source-search-engine/Parms.h

561 lines
15 KiB
C++

// Matt Wells, copyright Feb 2002
// Ideally, CollectionRec.h and SearchInput.h should be automatically generated
// from Parms.cpp. But Parms need to be marked if they contribute to
// SearchInput::makeKey() for caching the SERPS.
#ifndef _PARMS_H_
#define _PARMS_H_
#include "Rdb.h"
//#include "CollectionRec.h"
void handleRequest3e ( UdpSlot *slot , int32_t niceness ) ;
void handleRequest3f ( UdpSlot *slot , int32_t niceness ) ;
// "url filters profile" values. used to set default crawl rules
// in Collectiondb.cpp's CollectionRec::setUrlFiltersToDefaults().
// for instance, UFP_NEWS spiders sites more frequently but less deep in
// order to get "news" pages and articles
//enum {
// UFP_CUSTOM = 0 ,
// UFP_NONE = 0 ,
// UFP_WEB = 1 ,
// UFP_NEWS = 2 ,
// UFP_LANG = 3,
// UFP_SHALLOW = 4
//};
// special priorities for the priority drop down
// in the url filters table
//enum {
// SPIDER_PRIORITY_FILTERED = -3 ,
// SPIDER_PRIORITY_BANNED = -2 ,
// SPIDER_PRIORITY_UNDEFINED = -1 };
enum {
OBJ_CONF = 1 ,
OBJ_COLL ,
OBJ_SI , // SearchInput class
OBJ_GBREQUEST , // for GigablastRequest class of parms
OBJ_IR , // InjectionRequest class from PageInject.h
OBJ_NONE
};
enum {
TYPE_BOOL = 1 ,
TYPE_BOOL2 ,
TYPE_CHECKBOX ,
TYPE_CHAR ,
TYPE_CHAR2 , //needed to display char as a number (maxNumHops)
TYPE_CMD ,
TYPE_FLOAT ,
TYPE_IP ,
TYPE_LONG ,
TYPE_LONG_LONG , // 10
TYPE_NONE ,
TYPE_PRIORITY ,
TYPE_PRIORITY2 ,
TYPE_PRIORITY_BOXES ,
TYPE_RETRIES ,
TYPE_STRING ,
TYPE_STRINGBOX ,
TYPE_STRINGNONEMPTY ,
TYPE_TIME ,
TYPE_DATE2 , // 20
TYPE_DATE ,
TYPE_RULESET ,
TYPE_FILTER ,
TYPE_COMMENT ,
TYPE_CONSTANT ,
TYPE_MONOD2 ,
TYPE_MONOM2 ,
TYPE_LONG_CONST ,
TYPE_SITERULE , // 29
TYPE_SAFEBUF ,
TYPE_UFP ,
TYPE_FILEUPLOADBUTTON,
TYPE_DOUBLE,
TYPE_CHARPTR
};
//forward decls to make compiler happy:
class HttpRequest;
class TcpSocket;
class Page {
public:
int32_t m_page; // from the PAGE_* enums above
char *m_bgcolor; // color of the cells in the table
char *m_topcolor; // color of the table's first row
char *m_title; // browser title bar
};
#include "Msg4.h"
// generic gigablast request. for all apis offered.
class GigablastRequest {
public:
//
// make a copy of the http request because the original is
// on the stack. AND the "char *" types below will reference into
// this because they are listed as TYPE_CHARPTR in Parms.cpp.
// that saves us memory as opposed to making them all SafeBufs.
//
HttpRequest m_hr;
// ptr to socket to send reply back on
TcpSocket *m_socket;
// TYPE_CHARPTR
char *m_coll;
// pretty universal char ptr
char *m_formatStr;
////////////
//
// /admin/inject parms
//
////////////
// these all reference into m_hr or into the Parm::m_def string!
char *m_url; // also for /get
//char *m_queryToScrape;
//char *m_contentDelim;
//char m_containerContentType; // CT_UNKNOWN, CT_WARC, CT_ARC
//int32_t m_injectDocIp;
//char *m_contentTypeStr;
//char *m_contentFile;
//char *m_content;
//char *m_diffbotReply; // secret thing from dan
//char m_injectLinks;
//char m_spiderLinks;
//char m_shortReply;
//char m_newOnly;
//char m_deleteUrl;
//char m_recycle;
//char m_dedup;
//char m_hasMime;
//char m_doConsistencyTesting;
//char m_getSections;
//char m_gotSections;
//int32_t m_charset;
//int32_t m_hopCount; // hopcount
//collnum_t m_collnum; // more reliable than m_coll
// older ones
//uint32_t m_firstIndexed; // firstimdexed
//uint32_t m_lastSpidered; // lastspidered;
//SafeBuf m_contentBuf; // for holding a warc/arc file
///////////
//
// /admin/import parms
//
///////////
char *m_importDir; // TYPE_CHARPTR
int32_t m_importInjects;
///////////
//
// /get parms (for getting cached web pages)
//
///////////
int64_t m_docId;
int32_t m_strip;
char m_includeHeader;
char m_highlightQuery;
///////////
//
// /admin/addurl parms
//
///////////
char *m_urlsBuf;
char m_stripBox;
char m_harvestLinks;
SafeBuf m_listBuf;
Msg4 m_msg4;
/////////////
//
// /admin/reindex parms
//
////////////
char *m_query;
int32_t m_srn;
int32_t m_ern;
char *m_qlang;
bool m_forceDel;
char m_recycleContent;
// useful bufs to copy data over
SafeBuf m_tmpBuf1;
SafeBuf m_tmpBuf2;
SafeBuf m_tmpBuf3;
};
// values for Parm::m_subMenu
#define SUBMENU_DISPLAY 1
#define SUBMENU_MAP 2
#define SUBMENU_CALENDAR 3
#define SUBMENU_LOCATION 4
#define SUBMENU_SOCIAL 5
#define SUBMENU_TIME 6
#define SUBMENU_CATEGORIES 7
#define SUBMENU_LINKS 8
#define SUBMENU_WIDGET 9
#define SUBMENU_SUGGESTIONS 10
#define SUBMENU_SEARCH 11
#define SUBMENU_CHECKBOX 0x80 // flag
// values for Parm::m_flags
#define PF_COOKIE 0x01 // store in cookie?
#define PF_REDBOX 0x02 // redbox constraint on search results
#define PF_SUBMENU_HEADER 0x04
#define PF_WIDGET_PARM 0x08
#define PF_API 0x10
#define PF_REBUILDURLFILTERS 0x20
#define PF_NOSYNC 0x40
#define PF_DIFFBOT 0x80
#define PF_HIDDEN 0x0100
#define PF_NOSAVE 0x0200
#define PF_DUP 0x0400
#define PF_TEXTAREA 0x0800
#define PF_COLLDEFAULT 0x1000
#define PF_NOAPI 0x2000
#define PF_REQUIRED 0x4000
#define PF_REBUILDPROXYTABLE 0x8000
#define PF_NOHTML 0x10000
#define PF_CLONE 0x20000
#define PF_PRIVATE 0x40000 // for password to not show in api
#define PF_SMALLTEXTAREA 0x80000
#define PF_REBUILDACTIVELIST 0x100000
class Parm {
public:
char *m_title; // displayed above m_desc on admin gui page
char *m_desc; // description of variable displayed on admin gui page
char *m_cgi; // cgi name, contains %i if an array
char *m_cgi2; // alias
char *m_cgi3; // alias
char *m_cgi4; // alias
char *m_xml; // default to rendition of m_title if NULL
int32_t m_off; // this variable's offset into the CollectionRec class
char m_colspan;
char m_type; // TYPE_BOOL, TYPE_LONG, ...
int32_t m_page; // PAGE_MASTER, PAGE_SPIDER, ... see Pages.h
char m_obj; // OBJ_CONF or OBJ_COLL
// the maximum number of elements supported in the array.
// this is 1 if NOT an array (i.e. array of only one parm).
// in such cases a "count" is NOT stored before the parm in
// CollectionRec.h or Conf.h.
bool isArray() { return (m_max>1); };
int32_t getNumInArray() ;
int32_t m_max; // max elements in the array
// if array is fixed size, how many elements in it?
// this is 0 if not a FIXED size array.
int32_t m_fixed;
int32_t m_size; // max string size
char *m_def; // default value of this variable if not in either conf
int32_t m_defOff; // if default value points to a collectionrec parm!
char m_cast; // true if we should broadcast to all hosts (default)
char *m_units;
char m_addin; // add "insert above" link to gui when displaying array
char m_rowid; // id of row controls are in, if any
char m_rdonly;// if in read-only mode, blank out this control?
char m_hdrs; // print headers for row or print title/desc for single?
char m_perms; // 0 means same as WebPages' m_perms
char m_subMenu;
int32_t m_flags;
char *m_class;
char *m_icon;
char *m_qterm;
char *m_pstr; // for sorting by in sendPageAPI()
int32_t m_parmNum; // slot # in the m_parms[] array that we are
//bool (*m_func)(TcpSocket *s , HttpRequest *r,
// bool (*cb)(TcpSocket *s , HttpRequest *r));
bool (*m_func)(char *parmRec);
// some functions can block, like when deleting a coll because
// the tree might be saving, so they take a "we" ptr
bool (*m_func2)(char *parmRec,class WaitEntry *we);
int32_t m_plen; // offset of length for TYPE_STRINGS (m_htmlHeadLen...)
char m_group; // start of a new group of controls?
// m_priv = 1 means gigablast's software license clients cannot see
// or change.
// m_priv = 2 means gigablast's software license clients, including
// even metalincs, cannot see or change.
// m_priv = 3 means nobody can see in admin controls, but can be
// in search input by anybody. really a hack for yaron
// from quigo so he can set "t2" to something bigger.
char m_priv; // true if gigablast's software clients cannot see
char m_save; // save to xml file? almost always true
int32_t m_min;
// these are used for search parms in PageResults.cpp
//char m_sparm;// is this a search parm? for passing to PageResults.cpp
//char *m_scgi; // parm in the search url
char m_spriv; // is it private? only admins can see/use private parms
//char *m_scmd; // the url path for this m_scgi variable
//int32_t m_sdefo; // offset of default into CollectionRec (use m_off)
int32_t m_sminc ;// offset of min in CollectionRec (-1 for none)
int32_t m_smaxc ;// offset of max in CollectionRec (-1 for none)
int32_t m_smin; // absolute min
int32_t m_smax; // absolute max
//int32_t m_soff; // offset into SearchInput to store value in
char m_sprpg; // propagate the cgi variable to other pages via GET?
char m_sprpp; // propagate the cgi variable to other pages via POST?
bool m_sync; // this parm should be synced
int32_t m_hash; // hash of "title"
int32_t m_cgiHash; // hash of m_cgi
bool getValueAsBool ( class SearchInput *si ) ;
int32_t getValueAsLong ( class SearchInput *si ) ;
char * getValueAsString ( class SearchInput *si ) ;
int32_t getNumInArray ( collnum_t collnum ) ;
bool printVal ( class SafeBuf *sb , collnum_t collnum , int32_t occNum ) ;
};
#define MAX_PARMS 940
#define MAX_XML_CONF (200*1024)
#include "Xml.h"
#include "SafeBuf.h"
struct SerParm;
class Parms {
public:
Parms();
void init();
bool sendPageGeneric ( class TcpSocket *s, class HttpRequest *r );
bool printParmTable ( SafeBuf *sb , TcpSocket *s , HttpRequest *r );
//char *printParms (char *p, char *pend, TcpSocket *s, HttpRequest *r);
bool printParms (SafeBuf* sb, TcpSocket *s , HttpRequest *r );
bool printParms2 (SafeBuf* sb,
int32_t page,
CollectionRec *cr,
int32_t nc ,
int32_t pd ,
bool isCrawlbot ,
char format, //bool isJSON,
TcpSocket *sock,
bool isMasterAdmin,
bool isCollAdmin
);
/*
char *printParm ( char *p ,
char *pend ,
//int32_t user ,
char *username,
Parm *m ,
int32_t mm , // m = &m_parms[mm]
int32_t j ,
int32_t jend ,
char *THIS ,
char *coll ,
char *pwd ,
char *bg ,
int32_t nc ,
int32_t pd ) ;
*/
bool printParm ( SafeBuf* sb,
//int32_t user ,
char *username,
Parm *m ,
int32_t mm , // m = &m_parms[mm]
int32_t j ,
int32_t jend ,
char *THIS ,
char *coll ,
char *pwd ,
char *bg ,
int32_t nc ,
int32_t pd ,
bool lastRow ,
bool isCrawlbot ,//= false,
char format , //= FORMAT_HTML,
bool isMasterAdmin ,
bool isCollAdmin ,
class TcpSocket *sock );
char *getTHIS ( HttpRequest *r , int32_t page );
class Parm *getParmFromParmHash ( int32_t parmHash );
bool setFromRequest ( HttpRequest *r , //int32_t user,
TcpSocket* s,
class CollectionRec *newcr ,
char *THIS ,
int32_t objType );
bool insertParm ( int32_t i , int32_t an , char *THIS ) ;
bool removeParm ( int32_t i , int32_t an , char *THIS ) ;
void setParm ( char *THIS, Parm *m, int32_t mm, int32_t j, char *s,
bool isHtmlEncoded , bool fromRequest ) ;
void setToDefault ( char *THIS , char objType ,
CollectionRec *argcr );//= NULL ) ;
bool setFromFile ( void *THIS ,
char *filename ,
char *filenameDef ,
char objType ) ;
bool setParmsFromXml ( Xml &xml , void *THIS, char objType ) ;
bool setXmlFromFile(Xml *xml, char *filename, class SafeBuf *sb );
bool saveToXml ( char *THIS , char *f , char objType ) ;
bool convertToXml ( char *buf , char *THIS , char objType ) ;
// get the parm with the associated cgi name. must be NULL terminated.
Parm *getParm ( char *cgi ) ;
bool getParmHtmlEncoded ( SafeBuf *sb , Parm *m , char *s );
bool setGigablastRequest ( class TcpSocket *s ,
class HttpRequest *hr ,
class GigablastRequest *gr );
// . make it so a collectionrec can be copied in Collectiondb.cpp
// . so the rec can be copied and the old one deleted without
// freeing the safebufs now used by the new one.
void detachSafeBufs ( class CollectionRec *cr ) ;
// calc checksum of parms
uint32_t calcChecksum();
// get size of serialized parms
//int32_t getStoredSize();
// . serialized to buf
// . if buf is NULL, just calcs size
//bool serialize( char *buf, int32_t *bufSize );
//void deserialize( char *buf );
void overlapTest ( char step ) ;
/////
//
// parms now in parmdb
//
/////
// all parm recs need to be in the tree
//Rdb m_rdb;
//
// new functions
//
bool addNewParmToList1 ( SafeBuf *parmList ,
collnum_t collnum ,
char *parmValString ,
int32_t occNum ,
char *parmName ) ;
bool addNewParmToList2 ( SafeBuf *parmList ,
collnum_t collnum ,
char *parmValString ,
int32_t occNum ,
Parm *m ) ;
bool addCurrentParmToList1 ( SafeBuf *parmList ,
CollectionRec *cr ,
char *parmName ) ;
bool addCurrentParmToList2 ( SafeBuf *parmList ,
collnum_t collnum ,
int32_t occNum ,
Parm *m ) ;
bool convertHttpRequestToParmList (HttpRequest *hr,SafeBuf *parmList,
int32_t page , TcpSocket *sock );
Parm *getParmFast2 ( int32_t cgiHash32 ) ;
Parm *getParmFast1 ( char *cgi , int32_t *occNum ) ;
bool broadcastParmList ( SafeBuf *parmList ,
void *state ,
void (* callback)(void *) ,
bool sendToGrunts = true ,
bool sendToProxies = false ,
// send to this single hostid? -1 means all
int32_t hostId = -1 ,
int32_t hostId2 = -1 ); // hostid range?
bool doParmSendingLoop ( ) ;
bool syncParmsWithHost0 ( ) ;
bool makeSyncHashList ( SafeBuf *hashList ) ;
int32_t getNumInArray ( collnum_t collnum ) ;
bool addAllParmsToList ( SafeBuf *parmList, collnum_t collnum ) ;
bool updateParm ( char *rec , class WaitEntry *we ) ;
bool cloneCollRec ( char *srcCR , char *dstCR ) ;
//
// end new functions
//
bool m_inSyncWithHost0;
bool m_isDefaultLoaded;
Page m_pages [ 50 ];
int32_t m_numPages;
Parm m_parms [ MAX_PARMS ];
int32_t m_numParms;
// just those Parms that have a m_sparm of 1
Parm *m_searchParms [ MAX_PARMS ];
int32_t m_numSearchParms;
/*
private:
// these return true if overflow
bool serializeConfParm( Parm *m, int32_t i, char **p, char *end,
int32_t size, int32_t cnt,
bool sizeChk, int32_t *bufSz );
bool serializeCollParm( class CollectionRec *cr,
Parm *m, int32_t i, char **p, char *end,
int32_t size, int32_t cnt,
bool sizeChk, int32_t *bufSz );
void deserializeConfParm( Parm *m, SerParm *sp, char **p,
bool *confChgd );
void deserializeCollParm( class CollectionRec *cr,
Parm *m, SerParm *sp, char **p );
*/
// for holding default.conf file for collection recs for OBJ_COLL
char m_buf [ MAX_XML_CONF ];
// for parsing default.conf file for collection recs for OBJ_COLL
Xml m_xml2;
};
extern Parms g_parms;
#endif