open-source-search-engine/Syncdb.h
2013-08-02 13:12:24 -07:00

238 lines
8.3 KiB
C++

// Copyright Matt Wells, Apr 2009
// . for these descriptions, assume "you" are a host in the cluster
// ** STARTUP **
// . on startup, if you have your cleanshutdown.dat file then set
// your Host::m_cleanShutdown to true
// . if you had no cleanshutdown.dat file, reset your syncdb completely
// because nobody should sync from you at all!
// . on startup, if you have your insync.dat file, set
// your Host::m_inSync to true
// . on startup delete your cleanshutdown.dat file
// . on startup delete you insync.dat file
// . on exit touch cleanshutdown.dat
// . if you are shutdown in a shutdown request broadcast to all in the cluster
// then touch your insync.dat file iff your Host::m_inSync is true!
// . in PingServer.cpp send your m_cleanShutdown and m_inSync bits in your
// ping requests/replies so everyone knows your state
// . everyone should default everyone else to false for those bits
// . if a msg request is sent to a host that knows it is out of sync or
// did not have a clean shutdown, then it should send back an ENOSYNC error
// and multicast will re-route that to a twin. if all twins return ENOSYNC
// then the multicast just fails with ENOSYNC!!
// . Multicast should not send requests to such hosts, the same way it avoids
// dead hosts now.
// . if a host goes dead on you, sets its Host::m_inSync to false
// . display a red "X" in status column of hosts that did not shutdown cleanly
// . display a "Z" in the status column of out of sync hosts
// ** SYNCDB **
// . store all msg4 requests you receive into syncdb
// . every msg4 request has a unique, always increasing id, the "zid"
// . use the current timestamp in milliseconds as the zid (add a little
// to it to make sure they only increase)
// . ensure your clock is sync'ed with a host in group #0's clock, and ensure
// -they- are sync'ed with the official time server before doing any msg4s!
// . core dump if CLOCK not in sync and msg4 is called!
// . spiders are not allowed to start until our CLOCK is in sync because spider
// uses clock to determine what to spider.
// . sanity check zids to make sure they only strictly increase
// . every host in your group will add a "checkoff" record to your syncdb
// telling you what zids they have received.
// . once you have received all checkoff requests for a zid you can permanently
// delete that msg4 request from your syncdb. all your twins have it.
// ** SYNC LOOP **
// . tries to update the syncdb so that we can add the msg4 request lists.
// in order to add a list we must have received SUCCESSFUL checkoff replies
// from all *alive* twins. then we can add it. we remove the "need to add
// meta list" key from syncdb once we add it, to avoid re-adds.
// . keep a m_startKey that points to the first key in syncdb that is not
// referencing a dead twin! that will save A LOT OF TIME!!!
// . when scanning your keys in syncdb, if you find an incoming checkoff
// request that is 60+ seconds old and you have not received the
// corresponding msg4 request yourself yet, OR, you got a msg4 request zid
// from the same sid that occurs after that zid, then send a msg0 request
// for the msg4 request from that twin's syncdb. and set your Host::m_inSync
// to false. assume the zid is the timestamp as far as computing the 60
// second thing. add the msg0 list you get back to syncdb.
// . only actually add the msg4 request list after you have received
// replies for all your checkoff requests from all your *alive* twins. so
// msg4::handleRequest() should just add everything to syncdb... that
// prevents us from adding the list out of order!
// . launch those requests to all twins at once using Multicast. send out
// checkoff requests using msg1 for them to add to their syncdbs!
// Multicast will skip dead guys. if there was any other error Multicast
// will timeout or whatever and should set Multicast::m_errno! verify!!
// . if you receive a checkoff request from a host for zid #X but you
// are missing its checkoff request for zid #X-a then send back an
// ENOSYNC reply so it does not add the list yet!
// . if you complete a sync loop w/o finding such old checkoffs, then set
// your Host::m_inSync to true... UNLESS the MAJORITY of hosts in the
// network are DEAD as seen by you. (use Hostdb::m_numDeadHosts)
// . set your Host::m_inSync to false if the MAJORITY of hosts in your
// network are seen as "DEAD" by you. (use Hostdb::m_numDeadHosts)
// and only if you are not "shutting down!"
// . round robin over the *alive* twins to balance the load
// . when you get the msg4 request, then you can call handleRequest4()
// on it which will add stuff to the syncdb tree...
// . PROBLEM: how can we be sure that we are adding the zids in order? well
// every twin must get a reply to its checkoff request for moving on to
// the next checkoff request for a successive zid. it can also have multiple
// zids in one checkoff request, which is just a msg1 add request with a list
// of keys!
// . PROBLEM: let's say both eth0 and eth1 ports go down on you.
// you will still think you are in sync! i would say if the MAJORITY of
// hosts as seen by you are DEAD, then set your Host::m_inSync bit to
// false and do not allow it to be set to true until the MAJORITY of hosts
// are up again.
// . PROBLEM: the network is divided into two parts that can not communicate
// with each other. both pieces are then considered out of sync!! well
// when the network is re-enabled they will quickly realize they are all
// out of sync and set their Host::m_inSync values to false.
// . TODO: spider urls that a dead host should be spidering
// . TODO: use better routing when a host is dead. seems like right now we
// just reroute/bias towards its neighbor. i.e. Msg20, Msg39, etc.
// . keep all spares running at all times.
// . a spare should self-diagnose itself and generate then store its
// performance stats in performance.txt. it should certify itself! include
// its stats in its ping. but need to merge fff into gbmaster before doing
// this.
// . use Msg55 to sync a host whose twins are actively spidering. copies
// the rdb files over. suspends all merging operations on files.
// . get rid of gb replacehost ("replacehost=1&rhost=%li&rspare=%li")
// and have a "replace dead hosts" blue link in master controls.
// and have an 'automatically replace dead hosts' link in master controls.
#ifndef _SYNCDB_H_
#define _SYNCDB_H_
#include "Rdb.h"
#include "Msg0.h"
#define MAX_TO_ADD 500
#define MAX_CHECKOFF_KEYS 500
class Syncdb {
public:
// set up our private rdb
Rdb *getRdb ( ) { return &m_rdb; };
bool gotMetaListRequest ( class UdpSlot *slot ) ;
bool gotMetaListRequest ( char *req , long reqSize , uint32_t sid );
key128_t makeKey ( char a ,
char b ,
char c ,
char d ,
uint32_t tid ,
uint32_t sid ,
uint64_t zid ,
char delBit ) ;
uint64_t getZid ( key128_t *k ) { return k->n0 & 0xfffffffffffffffeLL;}
uint32_t getSid ( key128_t *k ) { return k->n1 & 0x00ffffff;}
uint32_t getTid ( key128_t *k ) { return (k->n1>>32) & 0x00ffffff;}
void loop1();
bool sentAllCheckoffRequests ( uint32_t sid , uint64_t zid ) ;
bool loop2 ( ) ;
bool gotList ( ) ;
void loop3 ( ) ;
bool canDeleteMetaList ( uint32_t sid , uint64_t zid ) ;
bool loop4 ( ) ;
bool gotList4 ( ) ;
void loop5 ( ) ;
bool addedList5 ( ) ;
void bigLoop ( ) ;
void loopReset() ;
void reset() ;
static bool registerHandlers ( ) ;
bool init ( ) ;
bool save ( ) ;
bool verify ( char *coll ) ;
bool syncHost ( long syncHostId ) ;
void rcpFiles ( ) ;
void syncStart_r ( bool amThread ) ;
void syncDone ( ) ;
bool addColl ( char *coll, bool doVerify = false );
// private:
// this rdb holds urls waiting to be spidered or being spidered
Rdb m_rdb;
RdbTree m_qt;
RdbMem m_stack;
uint32_t m_requestedSid;
bool m_doRcp;
bool m_rcpStarted;
bool m_calledLoop1;
bool m_calledLoop2;
bool m_calledLoop3;
bool m_calledLoop4;
bool m_calledLoop5;
// bookmarks
key128_t m_syncKey;
key128_t m_nextk;
bool m_outstanding;
long m_addCount;
key128_t m_addMe[MAX_TO_ADD];
long m_ia;
long m_na;
Msg5 m_msg5;
RdbList m_list;
// for syncing
Msg0 m_msg0;
// checkoff requests
key128_t m_keys [ MAX_CHECKOFF_KEYS ];
long m_nk;
};
extern class Syncdb g_syncdb;
#endif