#include "Facebook.h" #include "HttpServer.h" #include "sort.h" #include "Repair.h" // use this to get an access token for the event guru app // https://graph.facebook.com/oauth/access_token? // client_id=YOUR_APP_ID&client_secret=YOUR_APP_SECRET& // grant_type=client_credentials // use this to cache a facebook user's friends, and userid of themselves Facebookdb g_facebookdb; Likedb g_likedb; static void queueSleepWrapper ( int fd, void *state ); bool base64Decode ( char *dst, char *src, int32_t dstSize ) ; /////////////////////////// // // FACEBOOKDB // /////////////////////////// void Facebookdb::reset() { m_rdb.reset(); } bool Facebookdb::init ( ) { if ( ! g_conf.m_indexEventsOnly ) return true; // load this here so calling saveQueryLoopState() does not overwrite it loadQueryLoopState (); // hit the queue if ( ! g_loop.registerSleepCallback(500,NULL,queueSleepWrapper)) return false; /* char tmp[2048]; char *sr = "eyJhbGdvcml0aG0iOiJITUFDLVNIQTI1NiIsImV4cGlyZXMiOjEzMzA0NTkyMDAsImlzc3VlZF9hdCI6MTMzMDQ1Mjk3Miwib2F1dGhfdG9rZW4iOiJBQUFGRWczUUE1eWdCQUg1b3dJTEt6WkNaQ0FDNmNTUVRaQWVaQmE2WkFiT1J2dkllSzc2MktGWFg2RmxaQ29iZWFzcENzWE5BZGh1R2VMQm1VM1hnNmMyTm1JenhxRlZkQ3d1Z0ZLRzVHWkN6WXlWaUpwZkN4UlYiLCJ1c2VyIjp7ImNvdW50cnkiOiJ1cyIsImxvY2FsZSI6ImVuX1VTIiwiYWdlIjp7Im1pbiI6MjF9fSwidXNlcl9pZCI6IjEwMDAwMzUzMjQxMTAxMSJ9"; base64Decode ( tmp , sr , 2040 ); log("facebook: %s",tmp); */ // . what's max # of tree nodes? // . assume avg facebookdb rec size of about 1000 bytes // . NOTE: 32 bytes of the 1000 are overhead int32_t maxMem = 5000000; int32_t maxTreeNodes = maxMem / 1000;//82; // each entry in the cache is usually just a single record, no lists, // unless a hostname has multiple sites in it. has 24 bytes more // overhead in cache. //int32_t maxCacheNodes = g_conf.m_tagdbMaxCacheMem / 106; // we now use a page cache for the banned turks table which // gets hit all the time //if(! m_pc.init ("facebookdb",RDB_TAGDB,10000000,GB_TFNDB_PAGE_SIZE)) // return log("facebookdb: Tagdb init failed."); // initialize our own internal rdb if ( ! m_rdb.init ( g_hostdb.m_dir , "facebookdb" , true , // dedup same keys? -1 , // fixed record size 2,//g_conf.m_tagdbMinFilesToMerge , maxMem, // 5MB g_conf.m_tagdbMaxTreeMem , maxTreeNodes , // now we balance so Sync.cpp can ordered huge list true , // balance tree? 0 , //g_conf.m_tagdbMaxCacheMem , 0 , //maxCacheNodes , false , // half keys? false , //m_tagdbSaveCache NULL, // &m_pc , false, // is titledb false, // preload disk page cache sizeof(key96_t), // key size false , // bias disk page cache? true )) // iscollectionless? syncdb,facebookdb,... return false; // add the base since it is a collectionless rdb return m_rdb.addColl ( NULL ); } bool Facebookdb::addColl ( char *coll, bool doVerify ) { if ( ! m_rdb.addColl ( coll ) ) return false; return true; } /////////////////////// // // MSGFB // /////////////////////// Msgfb::Msgfb ( ) { m_facebookReply = NULL; //m_facebookReply2 = NULL; m_msg7 = NULL; m_fbId = 0LL; m_inProgress = false; reset(); } #include "Process.h" void Msgfb::reset ( ) { // this can happen if we try to save and exit while in progress if ( g_process.m_mode != EXIT_MODE && m_inProgress ) { char *xx=NULL;*xx=0; } m_requests = 0; m_replies = 0; m_errno = 0; m_errorCount = 0; m_fbId = 0; m_state = NULL; m_callback = NULL; m_collnum = 0; m_niceness = MAX_NICENESS; m_socket = NULL; m_retryCount = 0; m_widgetId = 0; m_userToUserWidgetId = 0LL; //m_fbrec.reset(); if ( m_facebookReply ) { mfree ( m_facebookReply , m_facebookAllocSize ,"msgfb"); m_facebookReply = NULL; } //if ( m_facebookReply2 ) { // mfree ( m_facebookReply2 , m_facebookAllocSize2 ,"msgfb"); // m_facebookReply2 = NULL; //} m_rbuf.purge(); if ( m_msg7 ) { mdelete ( m_msg7, sizeof(Msg7) , "FBInject" ); delete ( m_msg7); m_msg7 = NULL; } m_list1.freeList(); m_list2.freeList(); m_list3.freeList(); m_hr.reset();// = NULL; m_fbrecPtr = NULL; m_likedbTable.reset(); m_evPtrBuf.purge(); m_evIdsBuf.purge(); m_fidBuf.purge(); m_eidBuf.purge(); m_dedupEidBuf.reset(); // free mem m_fullReply.purge(); } Msgfb::~Msgfb ( ) { reset(); } /////////////////////// // // MSGFB PIPELINE #1 // /////////////////////// static void gotFBUserRecWrapper ( void *state ) { Msgfb *mfb = (Msgfb *)state; if ( ! mfb->gotFBUserRec ( ) ) return; mfb->m_callback ( mfb->m_state ); } static void gotFBAccessTokenWrapper ( void *state , TcpSocket *s ) { Msgfb *mfb = (Msgfb *)state; if ( ! mfb->gotFBAccessToken( s ) ) return; mfb->m_callback ( mfb->m_state ); } // format like strncpy() bool base64Decode ( char *dst, char *src, int32_t dstSize ) { // make the map static unsigned char s_bmap[256]; static bool s_init = false; if ( ! s_init ) { s_init = true; memset ( s_bmap , 0 , 256 ); unsigned char val = 0; for ( unsigned char c = 'A' ; c <= 'Z'; c++ ) s_bmap[c] = val++; for ( unsigned char c = 'a' ; c <= 'z'; c++ ) s_bmap[c] = val++; for ( unsigned char c = '0' ; c <= '9'; c++ ) s_bmap[c] = val++; if ( val != 62 ) { char *xx=NULL;*xx=0; } s_bmap['+'] = 62; s_bmap['/'] = 63; } // leave room for \0 char *dstEnd = dst + dstSize - 5; unsigned char *p = (unsigned char *)src; unsigned char val; for ( ; ; ) { if ( *p ) {val = s_bmap[*p]; p++; } else val = 0; // copy 6 bits *dst <<= 6; *dst |= val; if ( *p ) {val = s_bmap[*p]; p++; } else val = 0; // copy 2 bits *dst <<= 2; *dst |= (val>>4); dst++; // copy 4 bits *dst = val & 0xf; if ( *p ) {val = s_bmap[*p]; p++; } else val = 0; // copy 4 bits *dst <<= 4; *dst |= (val>>2); dst++; // copy 2 bits *dst = (val&0x3); if ( *p ) {val = s_bmap[*p]; p++; } else val = 0; // copy 6 bits *dst <<= 6; *dst |= val; dst++; // sanity if ( dst >= dstEnd ) { log("facebook: bas64decode breach"); //char *xx=NULL;*xx=0; *dst = '\0'; return false; } if ( ! *p ) break; } // null term just in case dst[1] = '\0'; return true; } bool Msgfb::getFacebookUserInfo ( HttpRequest *hr, TcpSocket *s, char *coll, void *state , char *redirPath, void (* callback)(void *state) , int32_t niceness ) { reset(); m_state = state; m_callback = callback; //m_hr = hr; m_oldFbrec = NULL; m_redirPath = redirPath; m_socket = s; m_collnum = g_collectiondb.getCollnum ( coll ); m_retryCount = 0; // they just logged out if ( hr->getLong("logout",0 ) ) return true; // need to make a copy of it so we can access it m_hr.copy ( hr ); // save this m_widgetId = hr->getLongLongFromCookie("widgetid",0LL); // make the facebook key url for tagdb lookup char *fbidStr = m_hr.getStringFromCookie("fbid",NULL); // check for "usefbid" char *useFbid = m_hr.getString("usefbid",NULL); // that overrides if ( useFbid ) { int64_t used = strtoull(useFbid,NULL,10); int32_t h32a = hash32 ( (char *)&used, 8 ); // this has to be a int64_t because fh is printed as // an uint32_t and atol() will croak int32_t h32b = m_hr.getLongLong("fh",0); if ( h32a != h32b ) { useFbid = NULL; log("facebook: bad usefbid=%"INT64" h32=%"UINT32" fh=%"UINT32"", used,h32a,h32b); } else fbidStr = useFbid; } // they've logged in before if we got this cookie, otherwise, // they got cookies off or they have not logged in ever yet, so // we gotta ask facebook for the access token. if ( ! fbidStr ) return downloadAccessToken ( ); // . make the facebook key url for facebook user rec // . we are here because we are assuming they are already logged in // at some point in time and we should try to get their list // of friends from the facebookdb rec so if they click // to show all the events their friends are going to we can do that int64_t fbId = strtoull ( fbidStr , NULL, 10 ); // if they come into the canvas page we receive an encoded access // token and fbid from facebook. so get that and do the lookup. // if we do not have the username in the facebookdb rec or we do not // even have a facebook id rec, then we will have to use cmd1 fql // before they can be logged in. char *sr = m_hr.getString("signed_request",NULL); if ( sr ) sr = strchr(sr,'.'); if ( sr ) sr++; // decode that char dsr[2048]; if ( sr ) base64Decode ( dsr , sr , 2040 ); // parse that json for fb user id char *jsonfbid = NULL; if ( sr ) jsonfbid = strstr ( dsr , "user_id\":\"" ); if ( jsonfbid ) jsonfbid += 10; if ( jsonfbid ) fbId = atoll(jsonfbid); // sometimes there's also an access token, if that is the case // we can just right to the cmd1 fql call downloadFBUserInfo(). char *at = NULL; if ( dsr ) at = strstr ( dsr , "oauth_token\":\"" ); if ( at ) at += 14; if ( at && fbId ) { // copy into m_accessToken char *p = at; for ( ; *p && *p != '\"' && *p != ',' ; p++); *p = '\0'; if ( p - at + 1 < MAX_TOKEN_LEN ) { strcpy ( m_accessToken , at ); log("facebook: got access token from canvas app"); if ( ! downloadUserToUserRequestInfo() ) return false; return downloadFBUserInfo(); } } // for the facebookdb lookup to be legit this must be non-zero if ( fbId == 0LL ) return downloadAccessToken ( ); key96_t startKey; key96_t endKey; startKey.n1 = 0; startKey.n0 = fbId; endKey.n1 = 0; endKey.n0 = fbId; startKey.n0 <<= 1; endKey.n0 <<= 1; endKey.n0 |= 0x01; if ( ! m_msg0.getList ( -1, // hostid 0 , // ip 0 , // port 0 , // maxcacheage false, // addtocache RDB_FACEBOOKDB, "",//coll, &m_list1, (char *)&startKey, (char *)&endKey, 10, // minrecsizes this , gotFBUserRecWrapper, niceness ) ) return false; // i guess we got it without blocking return gotFBUserRec(); } bool Msgfb::gotFBUserRec ( ) { // . it can be empty if never got saved! // . or if they logged out then logged back in, fbid will be 0 if ( m_list1.getListSize() <= 0 ) return downloadAccessToken(); // cast the list FBRec *fbrec = (FBRec *)m_list1.getList(); // save that ptr m_fbrecPtr = fbrec; // get timestamp on that //int32_t now = getTimeGlobal(); //int32_t elapsed = now - fbrec->m_accessTokenCreated; // if stale, re-get it all from facebook. //if ( elapsed >= 60*60 ) return downloadAccessToken ( ); // if still fresh, don't bother hitting facebook again, but we // need to deserialize deserializeMsg ( sizeof(FBRec) , &fbrec->size_accessToken , &fbrec->size_friendIds , &fbrec->ptr_accessToken , fbrec->m_buf ); // need to set this for printBlackBar() m_fbId = fbrec->m_fbId; log("facebook: loaded fbrec for fbid=%"UINT64"",fbrec->m_fbId); log("facebook: loaded emailfreq=%"INT32"",(int32_t)fbrec->m_emailFrequency); log("facebook: loaded myradius=%"INT32"",(int32_t)fbrec->m_myRadius); // or if its an hour old i guess, get another token //int32_t expires = fbrec->m_accessTokenCreated + 3600; // debug //expires = 0; //if ( getTimeGlobal() > expires ) { // // use this to save again with new access token // m_oldFbrec = fbrec; // return downloadAccessToken(); //} // use the same access token return true; } bool Msgfb::downloadAccessToken ( ) { // // ok, no cookie, so see if user just pushed the login button // int32_t fbcodeLen = 0; char *fbcode = m_hr.getString("code", &fbcodeLen, NULL); // no they did not, no user info is available? if ( ! fbcode ) return true; // get current page url (minus the facebook code=) SafeBuf cup; m_hr.getCurrentUrl(cup); //m_hr->getCurrentUrlPath(cup); // but take out the &code= char *fix = strstr(cup.getBufStart(),"&code="); if ( fix ) *fix = 0; cup.urlEncode(); // use code to get access token // that calls https://graph.facebook.com/oauth/access_token? // client_id=YOUR_APP_ID&redirect_uri=YOUR_URL& // client_secret=YOUR_APP_SECRET&code=THE_CODE_FROM_ABOVE char fbuf[1024]; SafeBuf fburl ( fbuf,1024 ); fburl.safePrintf ("https://graph.facebook.com/oauth/access_token?" "client_id=%s&" "redirect_uri=%s&" "client_secret=%s&" "code=%s" , APPID //, APPHOSTENCODEDNOSLASH //, m_redirPath // // KEEP It simple because this redirect_uri // must EXACTLY match the one we used in // PageEvents.cpp. and it can't have cgi crap in it // because facebook strips them out... // //, "/" , cup.getBufStart() , APPSECRET , fbcode ); // this fburl must match the previous url i think... i think the // "page" in it changes up and mess wit hit log("facebook: getting access token code=%s url=%s",fbcode, fburl.getBufStart()); // reset g_errno = 0; if ( ! g_httpServer.getDoc ( fburl.getBufStart() , 0 , // urlIp 0 , // offset -1 , 0 , // ifModifiedSince , this , // state gotFBAccessTokenWrapper , // callback 10*1000 , // 10 sec timeout 0 , // proxyip 0 , // proxyport 10000 , // maxTextDocLen , 10000 , // maxOtherDocLen , g_conf.m_spiderUserAgent ) ) // return false if blocked return false; // error? if ( ! g_errno ) { char *xx=NULL;*xx=0; } // let caller know we did not block return gotFBAccessToken ( NULL ); } bool Msgfb::gotFBAccessToken ( TcpSocket *s ) { // some kind of error? if ( g_errno ) { log("facebook: error launching read of access token: %s", mstrerror(g_errno)); m_errno = g_errno; m_errorCount++; return true; } // the access token should be in the reply char *reply = s->m_readBuf; int32_t replySize = s->m_readOffset; // mime error? HttpMime mime; // exclude the \0 i guess. use NULL for url. mime.set ( reply, replySize - 1, NULL ); // not good? int32_t httpStatus = mime.getHttpStatus(); if ( httpStatus != 200 ) { log("facebook: bad access request http status = %"INT32". " "reply=%s", httpStatus , reply ); g_errno = EBADREPLY; m_errno = g_errno; m_errorCount++; return true; } // point to content char *content = reply + mime.getMimeLen(); // assume no accesstoken provided //int32_t expires = 0; m_accessToken[0] = '\0'; // look for access token //sscanf(content,"access_token=%s&expires=%"INT32"",m_accessToken,&expires); char *at = strstr(content,"access_token="); if ( at ) { char *p = at + 13; char *start = p; for ( ; *p && *p != '&' ;p++ ); int32_t len = p - start; if ( len > MAX_TOKEN_LEN ) { char *xx=NULL;*xx=0; } gbmemcpy ( m_accessToken , start , len ); m_accessToken [ len ] = '\0'; } // error? if ( ! m_accessToken[0] ) { log("facebook: could not find access token"); g_errno = EBADREPLY; m_errno = g_errno; m_errorCount++; return true; } // set this timestamp //m_accessTokenCreated = getTimeGlobal(); // sanity if ( gbstrlen(m_accessToken) > MAX_TOKEN_LEN ) { char *xx=NULL;*xx=0;} if ( ! downloadUserToUserRequestInfo() ) return false; return downloadFBUserInfo(); } static void gotFQLUserInfoWrapper ( void *state , TcpSocket *s ) { Msgfb *mfb = (Msgfb *)state; // . returns false if it blocks // . returns true with g_errno set on error if ( ! mfb->gotFQLUserInfo ( s ) ) return; // error? if ( g_errno && mfb->m_retryCount++ < 5 ) { // retry again. this returns false if blocks; if ( ! mfb->downloadFBUserInfo() ) return; // probably an error if it returns true!! } mfb->m_callback ( mfb->m_state ); } //////////////// // // BEGIN SPECIAL FACEBOOK APPREQUEST parsing for m_userToUserWidgetId // /////////////// static void gotFBUserToUserRequestWrapper ( void *state , TcpSocket *s ) { Msgfb *mfb = (Msgfb *)state; if ( ! mfb->gotFBUserToUserRequest ( s ) ) return; mfb->m_callback ( mfb->m_state ); } // before calling downloadFBUserInfo() call this to set the m_widgetId // parameter correctly! so we can see which facebook user sent them // this user_to_user apprequest. we need to do this before downloading // the user info so we can store the m_originatingWidgetId into the // FBRec before we save it! bool Msgfb::downloadUserToUserRequestInfo ( ) { // hmmm. hr is invalid here. char *request_ids = m_hr.getString("request_ids",NULL); if ( ! request_ids ) return true; // if already got one, forget this... if ( m_widgetId ) return true; char fbuf[1024]; SafeBuf fburl ( fbuf,1024 ); fburl.safePrintf ("https://graph.facebook.com/me/apprequests/?" "request_ids=%s&access_token=%s" , request_ids , m_accessToken ); // this fburl must match the previous url i think... i think the // "page" in it changes up and mess wit hit log("facebook: getting referral fbid url=%s", fburl.getBufStart()); // reset g_errno = 0; if ( ! g_httpServer.getDoc ( fburl.getBufStart() , 0 , // urlIp 0 , // offset -1 , 0 , // ifModifiedSince , this , // state gotFBUserToUserRequestWrapper , 10*1000 , // 10 sec timeout 0 , // proxyip 0 , // proxyport 10000 , // maxTextDocLen , 10000 , // maxOtherDocLen , g_conf.m_spiderUserAgent ) ) // return false if blocked return false; // error? if ( ! g_errno ) { char *xx=NULL;*xx=0; } // let caller know we did not block return gotFBUserToUserRequest ( NULL ); } bool Msgfb::gotFBUserToUserRequest ( TcpSocket *s ) { // some kind of error? if ( g_errno ) { log("facebook: error launching read of fb request: %s", mstrerror(g_errno)); m_errno = g_errno; m_errorCount++; return true; } // the access token should be in the reply char *reply = s->m_readBuf; int32_t replySize = s->m_readOffset; // mime error? HttpMime mime; // exclude the \0 i guess. use NULL for url. mime.set ( reply, replySize - 1, NULL ); // not good? int32_t httpStatus = mime.getHttpStatus(); if ( httpStatus != 200 ) { log("facebook: bad fb request reply http status = %"INT32". " "reply=%s", httpStatus , reply ); g_errno = EBADREPLY; m_errno = g_errno; m_errorCount++; return true; } // point to content char *content = reply + mime.getMimeLen(); // https://graph.facebook.com/me/apprequests/?request_ids=417422284951090&access_token=AAAFEg3QA5ygBALw0wqIMOZAD6zfbZBZBaH9mf7sw92kLtOqVtPjasQibZCo4P5R0HHztOnObBeoDoKbwM1ZChht04JJ7KrgkxcXwtNWcrngZDZD //{ // "data": [ // { // "id": "417422284951090_100003381767946", // "application": { // "name": "Event Guru", // "namespace": "eventguru", // "canvas_name": "eventguru", // "id": "356806354331432" // }, // "to": { // "name": "Jezebel Wells", // "id": "100003381767946" // }, // "from": { // "name": "Matt Wells", // "id": "100003532411011" // }, // "data": "100003532411011", // "message": "Hi my friend, I recommend EventGuru.com for discovering interesting local events. Plus, if you login to Event Guru I make a buck.", // "created_time": "2012-03-29T02:41:37+0000" // } // ], // "paging": { // "previous": "https://graph.facebook.com/me/apprequests?request_ids=417422284951090&access_token=AAAFEg3QA5ygBALw0wqIMOZAD6zfbZBZBaH9mf7sw92kLtOqVtPjasQibZCo4P5R0HHztOnObBeoDoKbwM1ZChht04JJ7KrgkxcXwtNWcrngZDZD&limit=50&since=1332988897&__paging_token=417422284951090_100003381767946&__previous=1", // "next": "https://graph.facebook.com/me/apprequests?request_ids=417422284951090&access_token=AAAFEg3QA5ygBALw0wqIMOZAD6zfbZBZBaH9mf7sw92kLtOqVtPjasQibZCo4P5R0HHztOnObBeoDoKbwM1ZChht04JJ7KrgkxcXwtNWcrngZDZD&limit=50&until=1332988897&__paging_token=417422284951090_100003381767946" // } //} // mine out who sent it char *from = strstr ( content , "\"from\":" ); int64_t id1 = 0LL; if ( from ) { char *ids = strstr ( from , "\"id\":" ); for ( ; ids && *ids && ! is_digit(*ids) ; ids++ ); if ( ids && *ids ) id1 = atoll(ids); } // mine out who sent it again... this is only present for paid invites char *data = NULL; // start mining AFTER "from" because there is a top "data" field!! if ( from ) data = strstr ( from , "\"data\":" ); int64_t id2 = 0LL; if ( data ) { char *ids = strstr ( data , "\"id\":" ); for ( ; ids && *ids && ! is_digit(*ids) ; ids++ ); if ( ids && *ids ) id2 = atoll(ids); } // must match to be an official paid invite. we only include // the "data" for the paid invites. if ( id1 == id2 ) // set that to our widgetid then m_widgetId = id1; // download facebook info now return downloadFBUserInfo(); } //////////////// // // END SPECIAL FACEBOOK APPREQUEST parsing for user_to_user m_widgetId // /////////////// //int64_t mdw = 100003532411011LL; // my uid for Matt Wells // 100003316058818 // uid for flurbit // http://graph.facebook.com/502303355/picture shows pics for a uid bool Msgfb::downloadFBUserInfo ( ) { log("facebook: downloading user info initial login"); SafeBuf cmd1; // get your facebook id cmd1.safePrintf ( "SELECT uid,username,first_name,last_name,name,pic_square,profile_update_time,timezone,religion,birthday,birthday_date,sex,hometown_location,current_location,activities,interests,is_app_user,music,tv,movies,books,about_me,status,online_presence,proxied_email,verified,website,is_blocked,contact_email,email,is_minor,work,education,sports,languages,likes_count,friend_count FROM user where uid=me()"); log("facebook: cmd1 = %s",cmd1.getBufStart()); cmd1.urlEncode(); // www.howtobe.pro/facebook-graph-api-graph-api-for-issuing-fql-queries // make a url SafeBuf ubuf; ubuf.safePrintf("https://api.facebook.com/method/" "fql.query?query=%s" "&access_token=%s&format=xml" , cmd1.getBufStart() , m_accessToken ); log("facebook: getting url = %s",ubuf.getBufStart()); // reset g_errno = 0; // get the results if ( ! g_httpServer.getDoc ( ubuf.getBufStart() , 0 , // urlIp 0 , // offset -1 , 0 , // ifModifiedSince , this , // state gotFQLUserInfoWrapper , // callback 40*1000 , // 20 sec timeout 0 , // proxyip 0 , // proxyport 30000000 , // maxTextDocLen , 30000000 , // maxOtherDocLen , g_conf.m_spiderUserAgent ) ) // return false if blocked return false; // otherwise, somehow got it without blocking... wtf? //return gotFQLReply(); if ( ! g_errno ) { char *xx=NULL;*xx=0; } log("fql: error getting doc: %s",mstrerror(g_errno)); return true; } static bool queueFBId ( int64_t fbId , collnum_t collnum ); static void savedFBRecWrapper1 ( void *state ) { Msgfb *mfb = (Msgfb *)state; if ( ! g_errno ) queueFBId ( mfb->m_fbId , mfb->m_collnum ); mfb->m_callback ( mfb->m_state ); } static void doneRecheckingWrapper ( void *state ) { Msgfb *mfb = (Msgfb *)state; if ( ! mfb->doneRechecking ( ) ) return; mfb->m_callback ( mfb->m_state ); } // returns true with g_errno set on error bool Msgfb::gotFQLUserInfo ( TcpSocket *s ) { // bail on error if ( g_errno ) { log("fql: %s",mstrerror(g_errno)); m_errno = g_errno; m_errorCount++; return true; } // get reply char *reply = s->m_readBuf; int32_t replySize = s->m_readOffset; // we reference into this, so do not free it!! m_facebookReply = s->m_readBuf; m_facebookReplySize = s->m_readOffset; m_facebookAllocSize = s->m_readBufSize; // do not allow tcpsocket to free it. we free it in destructor. s->m_readBuf = NULL; // mime error? HttpMime mime; // exclude the \0 i guess. use NULL for url. mime.set ( reply, replySize - 1, NULL ); // not good? int32_t httpStatus = mime.getHttpStatus(); if ( httpStatus != 200 ) { log("facebook: bad fql request http status = %"INT32"", httpStatus ); g_errno = EBADREPLY; m_errno = g_errno; m_errorCount++; return true; } // point to content char *content = reply + mime.getMimeLen(); int32_t contentLen = reply + replySize - content; // check for error char *errMsg = strstr(content,""); if ( errMsg ) { log("facebook: error in fql reply: %s", content ); g_errno = EBADREPLY; m_errno = g_errno; m_errorCount++; return true; } // set it on stack i guess //FBRec fbrec; // all the FBRec::ptr_* things reference into "reply" // import friendids from existing rec if there, we just want to // save the new access token!!!! if ( ! setFBRecFromFQLReply ( content,contentLen, &m_fbrecGen) ){ log("fql: error setting fb rec from fql"); g_errno = EBADREPLY; return true; } // must be there! if ( ! m_fbrecGen.m_fbId ) { log("fql: failed to get facebook id from reply"); g_errno = EBADENGINEER; m_errno = g_errno; m_errorCount++; return true; } // sanity log("facebook: got initial facebook reply. fbid=%"INT64"",m_fbId); // point to it! m_fbrecPtr = &m_fbrecGen; // now that we got the fbid see if its in facebookdb again int64_t fbId = m_fbrecGen.m_fbId; key96_t startKey; key96_t endKey; startKey.n1 = 0; startKey.n0 = fbId; endKey.n1 = 0; endKey.n0 = fbId; startKey.n0 <<= 1; endKey.n0 <<= 1; endKey.n0 |= 0x01; //char *coll = g_collectiondb.getColl(m_collnum); if ( ! m_msg0.getList ( -1, // hostid 0 , // ip 0 , // port 0 , // maxcacheage false, // addtocache RDB_FACEBOOKDB, "",//coll, &m_list4, (char *)&startKey, (char *)&endKey, 12, // minrecsizes this , doneRecheckingWrapper, 0 ) )//niceness ) ) return false; // i guess we got it without blocking return doneRechecking(); } // // MERGE/UPDATE the old fbrec with the new fbrec // void mergeFBRec ( FBRec *dst , FBRec *src ) { // if we had a pre-existing fbrec in facebookdb, do not just // save m_fbrecGen because it does not have ptr_friendIds set! // so inherit or import the friendIds from the pre-existing // rec. otherwise we lose the friends until the cmd2 executes // and that could be a while or that could fail! if ( dst->size_friendIds <= 0 ) { dst->ptr_friendIds = src-> ptr_friendIds; dst->size_friendIds = src->size_friendIds; } // also this stuff too! dst->m_emailFrequency = src->m_emailFrequency; dst->m_myRadius = src->m_myRadius; dst->ptr_mergedInterests = src->ptr_mergedInterests; dst->size_mergedInterests = src->size_mergedInterests; dst->ptr_myLocation = src->ptr_myLocation; dst->size_myLocation = src->size_myLocation; // save this stuff now too for Emailer dst->m_nextRetry = src->m_nextRetry; dst->m_timeToEmail = src->m_timeToEmail; dst->m_lastEmailAttempt = src->m_lastEmailAttempt; // other stuff dst->m_eventsDownloaded = src->m_eventsDownloaded; dst->m_accessTokenCreated = src->m_accessTokenCreated; // new stuff dst->m_flags = src->m_flags; // FB_INQUEUE // for payment info: dst->m_originatingWidgetId = src->m_originatingWidgetId; // overwrite it if zero if ( src->m_firstFacebookLogin ) dst->m_firstFacebookLogin = src->m_firstFacebookLogin; // if it is zero in the old rec, overwrite it! if ( src->m_lastLoginIP ) dst->m_lastLoginIP = src->m_lastLoginIP; //dst-> = src->; } // returns false if blocked, true otherwise bool Msgfb::saveFBRec ( FBRec *fbrec ) { log("facebook: saving fbrec for fbid=%"INT64"",m_fbId); log("facebook: saving myradius=%"INT32"",fbrec->m_myRadius); log("facebook: saving mylocation=%s",fbrec->ptr_myLocation); log("facebook: saving emailfreq=%"INT32"",(int32_t)fbrec->m_emailFrequency); // . returns NULL and sets g_errno on error // . "true" means we should make mr.ptr_* reference into the newly // serialized buffer int32_t replySize; char *reply = serializeMsg ( sizeof(FBRec) , &fbrec->size_accessToken,//1stsizeparm &fbrec->size_friendIds,// lastsizeparm &fbrec->ptr_accessToken , // firststrptr fbrec , // thisptr &replySize , NULL , 0 , false ); // true ); if ( ! reply ) { log("facebook: could not save fbrec: %s",mstrerror(g_errno)); return true; } // make the binary tag then for this facebook user //char *rec = (char *)&m_key;//fbId; //int32_t recSize = (char *)&m_ids[m_numIds] - rec; FBRec *serializedRec = (FBRec *)reply; // set this after we know it serializedRec->m_dataSize = replySize - sizeof(key96_t) - 4; //char *coll = g_collectiondb.getColl ( m_collnum ); // use the list we got key128_t startKey; key128_t endKey; startKey.setMin(); endKey.setMax(); // use m_list2 and not m_list since m_fbrecPtr is referencing // m_list's content from our facebookdb lookup above m_list2.set ( reply , replySize , NULL , 0 , (char *)&startKey , (char *)&endKey , -1 , true , // own data? yeah, free it when done false , sizeof(key96_t) ); // . just use TagRec::m_msg1 now // . no, can't use that because tags are added using SafeBuf::addTag() // which first pushes the rdbid, so we gotta use msg4 // . if a host is down we have to fix msg1 (and msg4) so they both // just write to a file until that host comes back up. if ( ! m_msg1.addList ( &m_list2 , RDB_FACEBOOKDB , "none",//coll , this , m_afterSaveCallback, // savedFBRecWrapper , false , 0 ) ) // niceness return false; // no block? //return downloadEvents(); return true; } // returns false if blocked, true otherwise bool Msgfb::doneRechecking ( ) { if ( g_errno ) { log("facebook: recheck lookup failed: %s", mstrerror(g_errno)); return true; } // try to set the old fbrec if ( m_list4.getListSize() > 0 ) { // cast the list m_oldFbrec = (FBRec *)m_list4.getList(); deserializeMsg ( sizeof(FBRec) , &m_oldFbrec->size_accessToken , &m_oldFbrec->size_friendIds , &m_oldFbrec->ptr_accessToken , m_oldFbrec->m_buf ); // sanity if ( m_fbId != m_oldFbrec->m_fbId ) log("facebook: fbid mismatch 3 %"UINT64" != %"UINT64"", m_fbId,m_oldFbrec->m_fbId); // int16_tcuts FBRec *src = m_oldFbrec; FBRec *dst = &m_fbrecGen; // merge src into dst. merge the old into the new. mergeFBRec ( dst , src ); } // // if it did not exist, then this is the first time they logged // in i guess... // else { // . try to save this // . PageEvents.cpp issues a Set-Cookie: widgetid=xxxxxx; // if si->m_widget is '1'. it uses si->m_widgetId which // is set from the cgi parms of the iframe. // . use 1 if this is zero, they did not have one then... // . it's important we save the widgetid here correctly // so we can payout the $1 reward. // . if it was a facebook user_to_user app request we // should have called downloadUserToUserRequestInfo() to // set m_widgetId if it was not already set. int64_t widgetId = m_widgetId; if ( ! widgetId ) widgetId = 1; m_fbrecGen.m_originatingWidgetId = widgetId; m_fbrecGen.m_lastLoginIP = m_socket->m_ip; // avoid a core int32_t now ; if ( isClockInSync() ) now = getTimeGlobal(); else now = getTimeLocal(); m_fbrecGen.m_firstFacebookLogin = now; } // saveFBRec should queue it after it saves it. we want to make sure // its only after a successful save because the facebookdb rec // needs to have been saved so we can use its access token to get // get event info of the user and his friends. but since that stuff // is more involved we have a queue that handles the requests. // that 2nd pipeline will set the FBRec::ptr_* things we did not set // here and save them back to facebookdb. m_afterSaveCallback = savedFBRecWrapper1; log("facebook: saving rec for fbid=%"INT64"",m_fbId); // . queue it up. it returns true if added to the queue. // . it won't add it if no room or its already in the queue. if ( ! g_errno && queueFBId ( m_fbrecGen.m_fbId , m_collnum ) ) // . before saving it, mark it has being in the queue!! // . then PageEvents.cpp can tell you that it is still // downloading you and your friends event info so its // incomplete if you try to search for that stuff // . once we process it we clear this flag and re-save this rec m_fbrecGen.m_flags |= FB_INQUEUE; // this calls serializeMsg() which mallocs a new reply to add if ( ! saveFBRec( &m_fbrecGen ) ) return false; // all done return true; } class LikedbTableSlot { public: int64_t m_uid; int32_t m_start_time; int32_t m_rsvp; }; bool Msgfb::setFBRecFromFQLReply ( char *content, int32_t contentLen, FBRec *fbrec ) { // int16_tcut char *reply = content; // i've seen crappy facebook return essentially an empty // reply here, so make sure we have all the fields!! if ( ! strstr(reply,"") || ! strstr(reply," ! strstr(reply,"reset(); f->m_fbId = xml.getLongLong("uid",0LL); // need this too i guess m_fbId = f->m_fbId; // scan for friend ids first since we add \0s below char *p = strstr ( content , "" ); for ( ; p ; p = strstr (p+1,"") ) { int64_t uid2 = strtoull(p+6,NULL,10); if ( uid2 <= 0 ) continue; // add it if ( m_fidBuf.getAvail() < 8 && ! m_fidBuf.reserve(5000) ) return false; // push it m_fidBuf.pushLongLong ( uid2 ); } // point to those. might have to re-set if we grow m_fidBuf more! f->ptr_friendIds = m_fidBuf.getBufStart(); f->size_friendIds = m_fidBuf.length(); f->ptr_firstName = xml.getString("first_name",&f->size_firstName); f->ptr_lastName = xml.getString("last_name",&f->size_lastName); // get the tag right AFTER , otherwise we // get the multi query name tag "query1" int32_t n0 = xml.getNodeNum ("last_name"); if ( n0 >= 0 ) f->ptr_name = xml.getString(n0,n0+5,"name",&f->size_name); f->ptr_pic_square = xml.getString("pic_square",&f->size_pic_square); f->ptr_religion = xml.getString("religion",&f->size_religion); f->ptr_birthday = xml.getString("birthday",&f->size_birthday); f->ptr_birthday_date = xml.getString("birthday_date", &f->size_birthday_date); f->ptr_sex = xml.getString("sex",&f->size_sex); f->ptr_hometown_location =xml.getString("hometown_location", &f->size_hometown_location); f->ptr_current_location = xml.getString("current_location", &f->size_current_location); f->ptr_activities = xml.getString("activities",&f->size_activities); f->ptr_interests = xml.getString("interests",&f->size_interests); f->ptr_music = xml.getString("music",&f->size_music); f->ptr_tv = xml.getString("tv",&f->size_tv); f->ptr_movies = xml.getString("movies",&f->size_movies); f->ptr_books = xml.getString("books",&f->size_books); f->ptr_about_me = xml.getString("about_me",&f->size_about_me); f->ptr_status = xml.getString("status",&f->size_status); f->ptr_online_presence = xml.getString("online_presence", &f->size_online_presence); f->ptr_proxied_email = xml.getString("proxied_email", &f->size_proxied_email); f->ptr_website = xml.getString("website",&f->size_website); f->ptr_contact_email = xml.getString("contact_email", &f->size_contact_email); f->ptr_email = xml.getString("email",&f->size_email); f->ptr_sports = xml.getString("sports",&f->size_sports); // full nodes (lists) f->ptr_work = xml.getNode("work",&f->size_work); f->ptr_education = xml.getNode("education",&f->size_education); f->ptr_languages = xml.getNode("languages",&f->size_languages); f->m_timezone = xml.getLong("timezone",99); f->m_is_app_user = xml.getLong("is_app_user",0); f->m_is_blocked = xml.getLong("is_blocked",0); f->m_verified = xml.getLong("verified",0); f->m_is_minor = xml.getLong("is_minor",0); f->m_likes_count = xml.getLong("likes_count",0); f->m_friend_count = xml.getLong("friend_count",0); // // now that we got what we want, NULL term everything // if ( f->ptr_firstName) f->ptr_firstName[f->size_firstName++] = '\0'; if ( f->ptr_lastName) f->ptr_lastName[f->size_lastName++] = '\0'; if ( f->ptr_name) f->ptr_name[f->size_name++] = '\0'; if ( f->ptr_pic_square)f->ptr_pic_square[f->size_pic_square++] = '\0'; if ( f->ptr_religion) f->ptr_religion[f->size_religion++] = '\0'; if ( f->ptr_birthday)f->ptr_birthday[f->size_birthday++] = '\0'; if ( f->ptr_birthday_date) f->ptr_birthday_date[f->size_birthday_date++] = '\0'; if ( f->ptr_sex) f->ptr_sex[f->size_sex++] = '\0'; if ( f->ptr_hometown_location) f->ptr_hometown_location[f->size_hometown_location++] = '\0'; if ( f->ptr_current_location) f->ptr_current_location[f->size_current_location++] = '\0'; if ( f->ptr_activities) f->ptr_activities[f->size_activities++] = '\0'; if ( f->ptr_interests)f->ptr_interests[f->size_interests++] = '\0'; if ( f->ptr_music)f->ptr_music[f->size_music++] = '\0'; if ( f->ptr_tv)f->ptr_tv[f->size_tv++] = '\0'; if ( f->ptr_movies)f->ptr_movies[f->size_movies++] = '\0'; if ( f->ptr_books)f->ptr_books[f->size_books++] = '\0'; if ( f->ptr_about_me)f->ptr_about_me[f->size_about_me++] = '\0'; if ( f->ptr_status)f->ptr_status[f->size_status++] = '\0'; if ( f->ptr_online_presence) f->ptr_online_presence[f->size_online_presence++] = '\0'; if ( f->ptr_proxied_email) f->ptr_proxied_email[f->size_proxied_email++] = '\0'; if ( f->ptr_website) f->ptr_website[f->size_website++] = '\0'; if ( f->ptr_contact_email) f->ptr_contact_email[f->size_contact_email++] = '\0'; if ( f->ptr_email) f->ptr_email[f->size_email++] = '\0'; if ( f->ptr_sports)f->ptr_sports[f->size_sports++] = '\0'; if ( f->ptr_work) f->ptr_work[f->size_work++] = '\0'; if ( f->ptr_education)f->ptr_education[f->size_education++] = '\0'; if ( f->ptr_languages)f->ptr_languages[f->size_languages++] = '\0'; // allow dups! one eid can have many likes m_likedbTable.set(8,sizeof(LikedbTableSlot),0,NULL,0,true,0,"lkdbtab"); // // . add the eids facebook provided into m_eidBuf // . create a likedb record for each one to add to likedb // bool hadError = false; char *line = NULL; // scan the xmlnodes, looking for event_member int32_t i; for ( i = 0 ; i < xml.m_numNodes ; i++ ) { // get an tag XmlNode *node = &xml.m_nodes[i]; char *s = node->m_node; if ( strncmp(s," s++; // next tag for ( ; *s && *s != '<' ; s++ ) ; // must be afterwards if ( strncmp ( s, "" , 5 ) ) break; // skip that s += 5; // int64_t int64_t uid = strtoull ( s , NULL, 10 ); // skip til next tag for ( ; *s && *s != '<' ; s++ ) ; // must be /uid if ( strncmp ( s, "", 6 ) ) break; // skip that s += 6; // next tag for ( ; *s && *s != '<' ; s++ ) ; // must be if ( strncmp ( s, "" , 5 ) ) break; // skip that s += 5; // int64_t int64_t eid = strtoull ( s , NULL, 10 ); // skip til next tag for ( ; *s && *s != '<' ; s++ ) ; // must be /uid if ( strncmp ( s, "", 6 ) ) break; // skip that s += 6; // next tag for ( ; *s && *s != '<' ; s++ ) ; // must be if ( strncmp ( s, "" , 13 ) ) break; // skip that s += 13; // skip whitespace for ( ; *s && is_wspace_a(*s) ; s++ ); // int64_t char *rsvp_status = s; // skip til next tag for ( ; *s && *s != '<' ; s++ ) ; // must be /uid if ( strncmp ( s, "", 14 ) ) break; // null term so "status" is null terminated //*s = '\0'; // skip that s += 14; // next tag for ( ; *s && *s != '<' ; s++ ) ; // must be if ( strncmp ( s, "" , 12 ) ) break; // skip that s += 12; // int64_t int32_t start_time = (uint32_t)(atoll ( s )); // skip til next tag for ( ; *s && *s != '<' ; s++ ) ; // must be /uid if ( strncmp ( s, "", 13 ) ) break; // skip that s += 13; // next tag hadError = false; if ( g_conf.m_logDebugFacebook ) { char c = rsvp_status[6]; rsvp_status[6] = '\0'; log("facebook: got event " "eid=%"UINT64" " "uid=%"UINT64" " "rsvp_status=%s " "start_time=%"UINT32"" , eid , uid , rsvp_status , start_time ); rsvp_status[6] = c; } // ensure enough room if ( m_eidBuf.getAvail() < 8 && ! m_eidBuf.reserve(5000) ) return false; // dedup eidbuf if ( ! m_dedupEidBuf.isInTable ( &eid ) ) { // push it in m_eidBuf.pushLongLong (eid ); // add it now m_dedupEidBuf.addKey ( &eid ); } // TODO: what about uid/rsvp_status/start_time ??? // we need that info for adding to likedb so make a likedb // key here and we'll add that if the inject goes through //int32_t recSize; //char *rec = g_likedb.makeRec ( uid , // docId , // gbeventId , // rsvp_status , // start_time , // &recSize ); // a problem? //if ( ! rec ) // continue; // convert rsvpstatus to a value int32_t rsvp = 0; if ( ! strncmp(rsvp_status,"not_replied",11) ) rsvp = LF_INVITED; else if ( ! strncmp(rsvp_status,"attending",9 ) ) rsvp = LF_GOING; else if ( ! strncmp(rsvp_status,"declined",8 ) ) rsvp = LF_DECLINED; else if ( ! strncmp(rsvp_status,"unsure" ,6 ) ) rsvp = LF_MAYBE_FB; else log("facebook: unknown rsvp_status=%s",rsvp_status); // the data LikedbTableSlot lts; lts.m_uid = uid; lts.m_start_time = start_time; lts.m_rsvp = rsvp; // store that in table so if we end up injecting this // eid successfully, we'll scan the table and add all these // likedb recs for it. return false with g_errno set on error. if ( ! m_likedbTable.addKey ( &eid , <s ) ) return false; } // if we broke out when we should not have... if ( hadError ) { log("facebook: had error processing event_members reply %s", line); g_errno = EBADREPLY; return false; } // point to the access token from the old facebookdb rec so the // new one keeps it going fbrec->ptr_accessToken = m_accessToken; fbrec->size_accessToken = gbstrlen(m_accessToken)+1; fbrec->m_accessTokenCreated = getTimeGlobal(); // set key, just the fbid fbrec->m_key.n1 = 0; fbrec->m_key.n0 = fbrec->m_fbId; fbrec->m_key.n0 <<= 1; fbrec->m_key.n0 |= 0x01; return true; } ///////////////////////////////////// // // MSGFB PIPELINE #2 // ///////////////////////////////////// // high priority queue for ppl that login int64_t g_fbq1 [100]; collnum_t g_colls1 [100]; int32_t g_n1 = 0; // low priority queue for passive facebookdb scanning //int64_t g_fbq2 [100]; //collnum_t g_colls2 [100]; //int32_t g_n2 = 0; // used for queue Msgfb g_msgfb; bool isInQueue ( int64_t fbId , collnum_t collnum ) { for ( int32_t i = 0 ; i < g_n1 ; i++ ) if ( g_fbq1 [ i ] == fbId ) return true; return false; } bool queueFBId ( int64_t fbId , collnum_t collnum ) { // skip matt wells for now //if ( fbId == 100003532411011LL ) { // log("facebook: skipping matt wells in queue"); // return true; //} if ( g_n1 >= 100 ) return log("facebook: could not add fbid=%"INT64" to queue",fbId); // make sure not already in if ( isInQueue ( fbId , collnum ) ) return false; log("facebook: queueing fbid=%"INT64"",fbId); g_fbq1 [ g_n1 ] = fbId; g_colls1 [ g_n1 ] = collnum; g_n1++; return true; } /* static void doneProcessingWrapper ( void *state ) { // int16_tcut int32_t err = g_msgfb.m_errno; // or inherit this. we might have forgotten to set m_errno if ( ! err && g_errno ) err = g_errno; // no longer in progress g_msgfb.m_inProgress = false; g_msgfb.reset(); // note it log("facebook: done with queue for fbid=%"UINT64". error=%s", g_fbq1[0],mstrerror(err)); // save it for potential re-add //int64_t fsaved = g_fbq1 [0]; //collnum_t csaved = g_colls1[0]; // shift queue down for ( int32_t i = 1 ; i < g_n1 ; i++ ) { g_fbq1 [i-1] = g_fbq1 [i]; g_colls1[i-1] = g_colls1[i]; } // one less in queue g_n1--; // . on error, re-add to the end of the queue // . leave this out for now. we should have some kinda download // loop perhaps to download it. at least leave this out until we // have a backoff scheme in place. //if ( err ) queueFBId ( fsaved , csaved ); } */ #define NUM_MSGFBS 3 Msgfb g_msgfbs[NUM_MSGFBS]; int32_t g_numOut = 0; // evaluate events associated with the fbuserids in the queue void queueSleepWrapper ( int fd, void *state ) { // skip for now //return; // return if empty if ( g_n1 == 0 ) return; // sanity if ( g_n1 >= 100 ) { char *xx=NULL;*xx=0; } // wait for clock to be in sync if ( ! isClockInSync() ) return; // wait until done repairing... so we do not inject events! if ( g_repair.isRepairActive() ) return; // get an fbid if ( g_numOut >= (int32_t)NUM_MSGFBS ) return; // return if all out and no more to put out if ( g_n1 <= g_numOut ) return; // get the next fbid int64_t fbId = g_fbq1[g_numOut]; // get one not in use Msgfb *mfb = NULL; for ( int32_t i = 0 ; i < NUM_MSGFBS ; i++ ) { if ( g_msgfbs[i].m_inProgress ) continue; mfb = &g_msgfbs[i]; break; } // return if all in progress. how can this be? if ( ! mfb ) return; // get the fbid //int64_t fbId = g_fbq1[0]; //collnum_t collnum = g_colls1[0]; // inc this now! g_numOut++; // set it up mfb->m_fbId = fbId; mfb->m_phase = 0; // and launch it. will not re-launch since it sets m_inProgress = true mfb->queueLoop(); // set it up //if( ! g_msgfb.processFBId (fbid,collnum,NULL,doneProcessingWrapper) ) // return; // error? //log("fbqueue: error of some sort = %s",mstrerror(g_errno)); // wtf? //g_msgfb.m_inProgress = false; } //////////// // // the queue loop // //////////// static bool s_init = false; static int32_t s_flip = 0; static SafeBuf s_tbuf1; static SafeBuf s_tbuf2; static int32_t s_ptr1 = 0; // facebook dictionary query cursor static int32_t s_ptr2 = 0; // facebook location query cursor static int32_t s_ptr3 = 0; // stubhub cursor static int64_t s_ptr4 = 0; // eventbrite cursor static int64_t s_ptr5 = 0; // local facebookdb scanner cursor static int32_t s_holdOffStubHubTill = 0; static int64_t s_lastEventBriteEventId = 0; static int32_t s_eventBriteWaitUntil = 0; static int32_t s_localWaitUntil = 0; static char *getNextQuery (); static void queueLoopWrapper ( void *state ) { Msgfb *msgfb = (Msgfb *)state; msgfb->queueLoop(); } static void queueLoopWrapper2 ( void *state , TcpSocket *s ) { Msgfb *msgfb = (Msgfb *)state; msgfb->m_socket = s; msgfb->queueLoop(); } static void queueLoopWrapper5 ( void *state , RdbList *list, Msg5 *msg5 ) { Msgfb *msgfb = (Msgfb *)state; msgfb->queueLoop(); } void Msgfb::queueLoop ( ) { // skip for now //return; // assume in use //m_inProgress = true; if ( m_phase == 0 ) { // do not reset this! int64_t saved = m_fbId; // make sure this is empty and ready to go reset(); // re-establish this m_fbId = saved; // set this m_inProgress = true; } // if an fbid of -3 means to get scan facebookdb locally if ( m_phase == 0 && m_fbId == -3LL ) { // use msg5 then i guess key96_t startKey; key96_t endKey; startKey.n1 = 0; startKey.n0 = s_ptr5; endKey.n1 = 0; endKey.n0 = 0xffffffffffffffffLL; startKey.n0 <<= 1; endKey.n0 <<= 1; endKey.n0 |= 0x01; // advance phase to a special phase m_phase = 50; if ( ! m_msg5.getList ( RDB_FACEBOOKDB, "", // coll &m_list33, &startKey, &endKey, 13, // minrecsizes true , // includetree? false, // addtocache? 0, // maxcacheage 0, // startfilenum -1, // numfiles this , queueLoopWrapper5, m_niceness , true // doErrorCorrection? ) ) return; } // handle special phase from msg5 call above if ( m_phase == 50 ) { // grab the rec if ( g_errno ) { log("fbqueue: error getting local facebookdb rec: %s", mstrerror(g_errno)); goto error; } // empty is means done if ( m_list33.getListSize() <= 0 ) { // set this int32_t now = getTimeGlobal(); // wait for a day then s_localWaitUntil = now + 86400; // reset the scan for the scan tomorrow s_ptr5 = 0; // all done m_phase = 16; // remove from queue goto skipdown; } // get the facebook rec... why? m_oldFbrec = (FBRec *)m_list33.getList(); // get is fbid and queue that if ( m_oldFbrec->m_fbId < 0 ) { log("fbqueue: bad fbid in local scan: %"INT64"", m_oldFbrec->m_fbId); g_errno = EBADREPLY; goto error; } // ok, queue that fbid now queueFBId ( m_oldFbrec->m_fbId , m_collnum ); // update our scan cursor s_ptr5 = m_oldFbrec->m_fbId + 1; // and remove ourselves from the queue m_phase = 16; goto skipdown; } // if an fbid of -2 means to get some eventbrite if ( m_fbId == -2LL && m_phase == 0 ) { // . loop over all events sorted by id // . use &since_id= to avoid duppage // . we assume they only increment that id, if that is // not the case we'll have to restart from 0 each // time, and maybe two a roll twice per day // . they say they limit to 1000 requests per day! SafeBuf cmd10; cmd10.safePrintf( "http://www.eventbrite.com/xml/event_search?" "app_key=QSMWWLSZUJT3D5ZGPF&" "page=1&" // ask for 100 results "max=100&" "sort_by=id&" "since_id=%"INT64"" // no need to add 1 to s_ptr4, just make // sure that it exactly equals the LAST // id we injected because it is EXCLUDED // from these results and eventbrite starts // with s_ptr4+1 ,s_ptr4 ); log("fbspider: getting eventbrite results: cmd10 = %s", cmd10.getBufStart() ); // advance to phase to parse the events out of the reply m_phase = 11; // this will be set in queueLoopWrapper2() m_socket = NULL; // use defaults i guess char *ua = g_conf.m_spiderUserAgent; char *proto = "HTTP/1.0"; // pillage from eventbrite if ( ! g_httpServer.getDoc ( cmd10.getBufStart() , 0 , // urlIp 0 , // offset -1 , 0 , // ifModifiedSince , this , // state queueLoopWrapper2, 80*1000 , // 20 sec timeout 0 , // proxyip 0 , // proxyport 100000000 , // maxTextDocLen , 100000000 , // maxOtherDocLen , ua , proto )) // return false if blocked return; // this is not possible, must be error! log("fbspider: got error downloading eventbrite results = %s", mstrerror(g_errno)); goto error; } // if an fbid of -1 means to close our eyes and suck down some stubhub if ( m_fbId == -1LL && m_phase == 0 ) { int32_t now = getTimeGlobal(); // now subtract 12 so that gmtime() call below will // return current time in hawaii or right before that... now -= 12*3600; SafeBuf cmd9; cmd9.safePrintf("http://www.stubhub.com/" "listingCatalog/select/?q=" "event_date%%3A[" ); if ( s_ptr3 == 0 ) s_ptr3 = now; // . convert into canonical time format // . this should be basically close to the time in hawaii // which lags pretty much everyone else, that way // we will get events about to happen right now in hawaii struct tm *tt = gmtime(&s_ptr3); // . now print it out for the stubhub query // . it uses the SOLR format that lucene uses for queries // like "event_data:[X TO Y]" for doing ranges // http://www.stubhub.com/listingCatalog/select/?q=event_date%3A[2012-05-04T00:00:00.001Z%20TO%202012-05-04T13:59:00.001Z] // &start=0&rows=10 char tbuf[512]; strftime ( tbuf , 512, "%Y-%m-%dT%H:00:00Z" , tt ); cmd9.urlEncode ( tbuf ); cmd9.safePrintf("+TO+"); // add the half day here int32_t delta = 12*3600; // 3+ months out use a full day if ( s_ptr3 - now > 90*86400 ) delta = 24*3600; // subtract 1 to avoid dups for next call in case stubhub // uses a closed interval, and not half open s_ptr3 += delta - 1; // get the endpoint then tt = gmtime(&s_ptr3); strftime ( tbuf , 512, "%Y-%m-%dT%H:00:00Z" , tt ); cmd9.urlEncode ( tbuf ); cmd9.safePrintf("]"); cmd9.safePrintf("&fl=event_id," "act_primary,event_date_time_local," "keywords,lat_lon," "urlpath,venueDetailUrlPath,description," "ancestorGenreDescriptions" ); // add the second back s_ptr3++; log("fbspider: getting stubhub results: cmd9 = %s", cmd9.getBufStart() ); // advance to phase to parse the events out of the reply m_phase = 11; // this will be set in queueLoopWrapper2() m_socket = NULL; char *ua = g_conf.m_spiderUserAgent; char *proto = "HTTP/1.0"; //if ( m_fbId < 0 ) { // ua = "Mozilla/5.0 (X11; U; Linux i686; " // "en-US; rv:1.9.2.7) Gecko/20100715 " // "Ubuntu/10.04 (lucid) Firefox/3.6.7"; // proto = "HTTP/1.1"; //} // pillage from stubhub if ( ! g_httpServer.getDoc ( cmd9.getBufStart() , 0 , // urlIp 0 , // offset -1 , 0 , // ifModifiedSince , this , // state queueLoopWrapper2, 80*1000 , // 20 sec timeout 0 , // proxyip 0 , // proxyport 100000000 , // maxTextDocLen , 100000000 , // maxOtherDocLen , ua , proto )) // return false if blocked return; // this is not possible, must be error! log("fbspider: got error downloading stubhub results = %s", mstrerror(g_errno)); goto error; } // if an fbid of 0 is used that means to do a query against // facebook's massively lame event search engine if ( m_fbId == 0LL && m_phase == 0 ) { // . ok, pick a query to send to facebook // . pick the next most popular word, // or the next most popular location name // . interleave between words and location names char *fbq = getNextQuery ( ); // don't core! if ( ! fbq ) { log("fbspider: getNextQuery() returned null. " "possibly a state without an official name " "and getStateName() returned NULL?"); g_errno = EBADENGINEER; goto error; } // . make the query // . &format=xml is pointless and will not work! // they made it 100% json now SafeBuf cmd8; cmd8.safePrintf("https://graph.facebook.com/" "search?" "fields=id,privacy,picture,name,location," "venue,description," "start_time,end_time&" "q=" ); // encode the query cmd8.urlEncode ( fbq ); // and put in the rest cmd8.safePrintf("&" "type=event&" //"access_token=AAAAAAITEghMBADXfRdrOzCBZAV7" //"2K1kQurpnnyPDBIYHCSoPpRLOvdJFBZBESUZA0BgZ" //"AWr8dGCpNCOZBMh58opiYAnAZAavKPGaMnoKq32v9" //"CiYxy2WtD&" "limit=600&" "offset=0" ); log("fbspider: getting fb results: cmd8 = %s", cmd8.getBufStart() ); // advance to phase to parse the events out of the reply m_phase = 11; // this will be set in queueLoopWrapper2() m_socket = NULL; // pillage from facebook! if ( ! g_httpServer.getDoc ( cmd8.getBufStart() , 0 , // urlIp 0 , // offset -1 , 0 , // ifModifiedSince , this , // state queueLoopWrapper2, 40*1000 , // 20 sec timeout 0 , // proxyip 0 , // proxyport 100000000 , // maxTextDocLen , 100000000 , // maxOtherDocLen , g_conf.m_spiderUserAgent ) ) // return false if blocked return; // this is not possible, must be error! log("fbspider: got error downloading fb results = %s", mstrerror(g_errno)); goto error; } // first phase is loading the facebook rec if ( m_phase == 0 ) { key96_t startKey; key96_t endKey; startKey.n1 = 0; startKey.n0 = m_fbId; endKey.n1 = 0; endKey.n0 = m_fbId; startKey.n0 <<= 1; endKey.n0 <<= 1; endKey.n0 |= 0x01; // now we use this m_dedupEidBuf.set (8,0,0,NULL,0,false,m_niceness,"deibuf"); // advance phase m_phase = 1; if ( ! m_msg0.getList ( -1, // hostid 0 , // ip 0 , // port 0 , // maxcacheage false, // addtocache RDB_FACEBOOKDB, "",//coll, &m_list1, (char *)&startKey, (char *)&endKey, 13, // minrecsizes this , queueLoopWrapper, m_niceness ) ) return; } // check for errors if ( m_phase == 1 ) { if ( g_errno ) { log("fbqueue: error getting facebookdb rec: %s", mstrerror(g_errno)); goto error; } // empty is bad if ( m_list1.getListSize() <= 0 ) { log("fbqueue: facebookdb rec is empty. wtf? fbid=%"INT64"", m_fbId); g_errno = EBADREPLY; goto error; } // get the facebook rec... why? m_oldFbrec = (FBRec *)m_list1.getList(); // sanity if ( m_oldFbrec->m_fbId != m_fbId ) { log("fbqueue: fbid mismatch. fbid=%"INT64"", m_fbId); g_errno = EBADREPLY; goto error; } // deserialize... deserializeMsg ( sizeof(FBRec) , &m_oldFbrec->size_accessToken , &m_oldFbrec->size_friendIds , &m_oldFbrec->ptr_accessToken , m_oldFbrec->m_buf ); // copy for calling fql strcpy ( m_accessToken , m_oldFbrec->ptr_accessToken ); retryDownload: // note it log("facebook: downloading user info again"); // get your facebook user info again in case it changed SafeBuf cmd; cmd.safePrintf ( "SELECT uid,username,first_name,last_name,name,pic_square,profile_update_time,timezone,religion,birthday,birthday_date,sex,hometown_location,current_location,activities,interests,is_app_user,music,tv,movies,books,about_me,status,online_presence,proxied_email,verified,website,is_blocked,contact_email,email,is_minor,work,education,sports,languages,likes_count,friend_count FROM user where uid=me()"); cmd.urlEncode(); // www.howtobe.pro/facebook-graph-api-graph-api-for-issuing- // fql-queries make a url SafeBuf ubuf; ubuf.safePrintf("https://api.facebook.com/method/" "fql.query?query=%s" "&access_token=%s&format=xml" , cmd.getBufStart() , m_accessToken ); log("facebook: cmd = %s",ubuf.getBufStart()); // advance phase m_phase = 2; // reset g_errno = 0; // get the results if ( ! g_httpServer.getDoc ( ubuf.getBufStart() , 0 , // urlIp 0 , // offset -1 , 0 , // ifModifiedSince , this , // state queueLoopWrapper2 , 40*1000 , // 20 sec timeout 0 , // proxyip 0 , // proxyport 30000000 , // maxTextDocLen , 30000000 , // maxOtherDocLen , g_conf.m_spiderUserAgent ) ) // return false if blocked return; // otherwise, somehow got it without blocking... wtf? //return gotFQLReply(); if ( ! g_errno ) { char *xx=NULL;*xx=0; } log("fql: error getting doc: %s",mstrerror(g_errno)); goto error; } // got reply if ( m_phase == 2 ) { // bail on http reply error if ( g_errno ) { log("fbqueue: error getting fb user rec: %s", mstrerror(g_errno)); goto error; } m_phase = 3; // a quick sanity check char *reply = m_socket->m_readBuf; int32_t replyLen = m_socket->m_readOffset; // i've seen crappy facebook return essentially an empty // reply here, so make sure we have all the fields!! if ( ! strstr(reply,"") || ! strstr(reply," ! strstr(reply,"m_readBuf, m_socket->m_readOffset ); } // download friend list if ( m_phase == 3 ) { // note it log("facebook: downloading friend list fbid=%"INT64"",m_fbId); // get your facebook user info again in case it changed SafeBuf cmd; cmd.safePrintf ( "SELECT uid2 from friend WHERE uid1=me()"); cmd.urlEncode(); SafeBuf ubuf; ubuf.safePrintf("https://api.facebook.com/method/" "fql.query?query=%s" "&access_token=%s&format=xml" , cmd.getBufStart() , m_accessToken ); log("facebook: cmd = %s",ubuf.getBufStart()); // advance phase m_phase = 4; // reset start for phase 3 m_chunkStart = 0; // reset chunksize for phase 3 m_chunkSize = 300; // reset g_errno = 0; // get the results if ( ! g_httpServer.getDoc ( ubuf.getBufStart() , 0 , // urlIp 0 , // offset -1 , 0 , // ifModifiedSince , this , // state queueLoopWrapper2 , 40*1000 , // 20 sec timeout 0 , // proxyip 0 , // proxyport 30000000 , // maxTextDocLen , 30000000 , // maxOtherDocLen , g_conf.m_spiderUserAgent ) ) // return false if blocked return; // otherwise, somehow got it without blocking... wtf? if ( ! g_errno ) { char *xx=NULL;*xx=0; } log("fql: error getting doc: %s",mstrerror(g_errno)); goto error; } // got reply if ( m_phase == 4 ) { // mime error? HttpMime mime; // exclude the \0 i guess. use NULL for url. mime.set ( m_socket->m_readBuf, m_socket->m_readOffset-1,NULL); // not good? int32_t httpStatus = mime.getHttpStatus(); if ( httpStatus != 200 ) g_errno = EBADREPLY; // bail on http reply error if ( g_errno ) { log("fbqueue: error getting friend list fbid=%"INT64": " "%s", m_fbId, mstrerror(g_errno)); goto error; } m_phase = 5; // copy reply into our final SafeBuf buffer m_fullReply.safeMemcpy ( m_socket->m_readBuf, m_socket->m_readOffset ); } // download some event member rsvp statuses if ( m_phase == 5 ) { recallMembers: // note it log("facebook: downloading event members #%"INT32"-#%"INT32" " "fbid=%"INT64"", m_chunkStart,m_chunkStart+m_chunkSize,m_fbId); // get status of each friend attending an event & what eventid SafeBuf cmd; // if this fails then chunk down to 50 instead of 300 cmd.safePrintf ( "SELECT uid, eid, rsvp_status, start_time " "FROM event_member where uid IN " "( SELECT uid2 from friend WHERE uid1=me() " "LIMIT %"INT32",%"INT32") " , m_chunkStart , m_chunkSize // this start_time from the event_member table // is not accurate. it is often in the past! // wtf? //"AND start_time > now()" ); cmd.urlEncode(); SafeBuf ubuf; ubuf.safePrintf("https://api.facebook.com/method/" "fql.query?query=%s" "&access_token=%s&format=xml" , cmd.getBufStart() , m_accessToken ); log("facebook: cmd = %s",ubuf.getBufStart()); // advance chunkstart m_chunkStart += m_chunkSize; // advance phase m_phase = 6; // reset g_errno = 0; // get the results if ( ! g_httpServer.getDoc ( ubuf.getBufStart() , 0 , // urlIp 0 , // offset -1 , 0 , // ifModifiedSince , this , // state queueLoopWrapper2 , 40*1000 , // 20 sec timeout 0 , // proxyip 0 , // proxyport 30000000 , // maxTextDocLen , 30000000 , // maxOtherDocLen , g_conf.m_spiderUserAgent ) ) // return false if blocked return; // otherwise, somehow got it without blocking... wtf? if ( ! g_errno ) { char *xx=NULL;*xx=0; } log("fql: error getting doc: %s",mstrerror(g_errno)); goto error; } // got a reply if ( m_phase == 6 ) { // mime error? HttpMime mime; // exclude the \0 i guess. use NULL for url. mime.set ( m_socket->m_readBuf, m_socket->m_readOffset-1,NULL); // not good? int32_t httpStatus = mime.getHttpStatus(); if ( httpStatus != 200 ) g_errno = EBADREPLY; // bail on http reply error if ( g_errno ) { log("fbqueue: error getting event members fbid=%"INT64": " "%s", m_fbId, mstrerror(g_errno)); // count it m_errorCount++; // retry? if ( m_errorCount < 10 ) goto recallMembers; // final phase. wrap it up goto error; } // advance m_phase = 7; // copy reply into our final SafeBuf buffer m_fullReply.safeMemcpy ( m_socket->m_readBuf, m_socket->m_readOffset ); // if we had some repeat! if ( strstr ( m_socket->m_readBuf , "" ) ) { log("facebook: recalling event members"); goto recallMembers; } // reset for my events m_myChunkStart = 0; m_myChunkSize = 300; } if ( m_phase == 7 ) { // get MY events recallMyEvents: // note it log("facebook: downloading my event ids #%"INT32"-#%"INT32" " "fbid=%"INT64"", m_myChunkStart,m_myChunkStart+m_myChunkSize,m_fbId); // get status of each friend attending an event & what eventid SafeBuf cmd; // if this fails then chunk down to 50 instead of 300 cmd.safePrintf ( "SELECT uid, eid, rsvp_status, start_time " "FROM event_member where uid=me() " "LIMIT %"INT32",%"INT32"" , m_myChunkStart , m_myChunkSize // this start_time from the event_member table // is not accurate. it is often in the past! // wtf? //"AND start_time > now()" ); cmd.urlEncode(); SafeBuf ubuf; ubuf.safePrintf("https://api.facebook.com/method/" "fql.query?query=%s" "&access_token=%s&format=xml" , cmd.getBufStart() , m_accessToken ); log("facebook: cmd = %s",ubuf.getBufStart()); // advance chunkstart m_myChunkStart += m_myChunkSize; // advance phase m_phase = 8; // reset g_errno = 0; // get the results if ( ! g_httpServer.getDoc ( ubuf.getBufStart() , 0 , // urlIp 0 , // offset -1 , 0 , // ifModifiedSince , this , // state queueLoopWrapper2 , 40*1000 , // 20 sec timeout 0 , // proxyip 0 , // proxyport 30000000 , // maxTextDocLen , 30000000 , // maxOtherDocLen , g_conf.m_spiderUserAgent ) ) // return false if blocked return; // otherwise, somehow got it without blocking... wtf? if ( ! g_errno ) { char *xx=NULL;*xx=0; } log("fql: error getting doc: %s",mstrerror(g_errno)); goto error; } // got a reply if ( m_phase == 8 ) { // mime error? HttpMime mime; // exclude the \0 i guess. use NULL for url. mime.set ( m_socket->m_readBuf, m_socket->m_readOffset-1,NULL); // not good? int32_t httpStatus = mime.getHttpStatus(); if ( httpStatus != 200 ) g_errno = EBADREPLY; // bail on http reply error if ( g_errno ) { log("fbqueue: error getting my events fbid=%"INT64": " "%s", m_fbId, mstrerror(g_errno)); // count it m_errorCount++; // retry? if ( m_errorCount < 10 ) goto recallMembers; // final phase. wrap it up goto error; } // advance m_phase = 9; // copy reply into our final SafeBuf buffer m_fullReply.safeMemcpy ( m_socket->m_readBuf, m_socket->m_readOffset ); // if we had some repeat! if ( strstr ( m_socket->m_readBuf , "" ) ) { log("facebook: recalling my events"); goto recallMyEvents; } // init this for downloading events m_eventStartNum = 0; // start off high, reduce to 10 on error m_eventStep = 100; } // save the fbrec from m_fullReply if ( m_phase == 9 ) { // save to disk for debug char fname[64]; sprintf(fname,"%"INT64".txt",m_fbId); m_fullReply.save("/tmp/",fname); // compile into a new facebook rec char *content = m_fullReply.getBufStart(); int32_t contentLen = m_fullReply.length(); // sanity int64_t origFbId = m_fbId; // is this messing up our Msgfb::m_fbId??? if ( ! setFBRecFromFQLReply(content,contentLen,&m_fbrecGen)) { log("fql: error setting fb rec from fql"); g_errno = EBADREPLY; goto error; } // must be there! if ( ! m_fbrecGen.m_fbId ) { log("fql: failed to get facebook id from reply"); g_errno = EBADENGINEER; goto error; } // sanity if ( m_fbId != origFbId ) { char *xx=NULL;*xx=0; } // we loaded this in phase 0 FBRec *src = m_oldFbrec; // we just made this from facebook replies FBRec *dst = &m_fbrecGen; // merge src into dst. merge the old into the new. mergeFBRec ( dst , src ); // advance to downloading events now m_phase = 10; // come here when done saving m_afterSaveCallback = queueLoopWrapper; // save the merged rec now if ( ! saveFBRec( &m_fbrecGen ) ) return; } // get events now if ( m_phase == 10 ) { recallEvents: // reset these so we do not re-inject the same events m_evPtrBuf.reset(); m_evIdsBuf.reset(); // skip matt wells for now if ( m_fbId == MATTWELLS ) { // 100003532411011LL ) { log("facebook: skipping matt wells event members " "download"); // save it so we can turn off the in queue bit //m_phase = 15; //goto skipdown; } // note it log("facebook: downloading events #%"INT32"-#%"INT32" fbid=%"INT64"", m_eventStartNum,m_eventStartNum+m_eventStep,m_fbId); SafeBuf cmd; cmd.safePrintf ( "SELECT eid, " "name, " // deprecated as of july 5th 2012 //"tagline, " //"nid, " "pic_small, " "pic_big, " "pic_square, " "pic, " "host, " "description, " // deprecated as of july 5th 2012 //"event_type, " //"event_subtype, " "start_time, " "end_time, " "creator, " "update_time, " "location, " "venue, " "privacy, " "hide_guest_list, " "can_invite_friends " "FROM event WHERE " // this does not seem to work either! //"start_time > now() AND " "eid IN (" ); // from buf int32_t max = m_eidBuf.length() / 8; // did we add any? bool printed = false; // int16_tcut int64_t *eids = (int64_t *)m_eidBuf.getBufStart(); // list some eids here for ( int32_t i = m_eventStartNum ; i < max && i < m_eventStartNum + m_eventStep ; i++ ) { if ( printed ) cmd.pushChar(','); printed = true; cmd.safePrintf("%"INT64"",eids[i]); } // end it cmd.safePrintf(")"); cmd.urlEncode(); SafeBuf ubuf; ubuf.safePrintf("https://api.facebook.com/method/" "fql.query?query=%s" "&access_token=%s&format=xml" , cmd.getBufStart() , m_accessToken ); log("facebook: cmd = %s",ubuf.getBufStart()); // advance chunkstart m_eventStartNum += m_eventStep; // advance phase m_phase = 11; // reset g_errno = 0; // get the results if ( ! g_httpServer.getDoc ( ubuf.getBufStart() , 0 , // urlIp 0 , // offset -1 , 0 , // ifModifiedSince , this , // state queueLoopWrapper2 , 40*1000 , // 20 sec timeout 0 , // proxyip 0 , // proxyport 30000000 , // maxTextDocLen , 30000000 , // maxOtherDocLen , g_conf.m_spiderUserAgent ) ) // return false if blocked return; // otherwise, somehow got it without blocking... wtf? if ( ! g_errno ) { char *xx=NULL;*xx=0; } log("fql: error getting doc: %s",mstrerror(g_errno)); goto error; } // got events reply if ( m_phase == 11 ) { // mime error? HttpMime mime; // exclude the \0 i guess. use NULL for url. if ( m_socket && ! g_errno ) { mime.set ( m_socket->m_readBuf, m_socket->m_readOffset-1,NULL); // not good? int32_t httpStatus = mime.getHttpStatus(); if ( httpStatus != 200 ) g_errno = EBADREPLY; } // bail on http reply error if ( g_errno ) { char *reply = ""; if ( m_socket && m_socket->m_readBuf ) reply = m_socket->m_readBuf; log("fbqueue: error getting events fbid=%"INT64": " "%s : %s", m_fbId, mstrerror(g_errno),reply); // bail right away if doing spider if ( m_fbId <= 0 ) goto error; // final phase. wrap it up if ( ++m_errorCount >= 10 ) goto error; // first time? if ( m_eventStep > 10 ) m_eventStartNum -= m_eventStep; // reduce step m_eventStep = 10; // retry goto recallEvents; } m_rbuf.purge(); // convert json to xml m_rbuf.safeMemcpy ( m_socket->m_readBuf , m_socket->m_readOffset ); // nuke the read buf then from the http server mfree ( m_socket->m_readBuf , m_socket->m_readBufSize,"sss" ); // do not allow tcpsocket to free it again m_socket->m_readBuf = NULL; char *type = NULL; if ( m_fbId == 0LL ) type = "facebook"; if ( m_fbId == -1LL ) type = "stubhub"; if ( m_fbId == -2LL ) type = "eventbrite"; // show it if ( type ) log("facebook: got %s reply (%"INT32" bytes)" , type , m_rbuf.length() ); //fprintf(stderr,"full raw reply=%s",m_rbuf.getBufStart()); // do not do jsontoxml on the mime header! char *rb = m_rbuf.getBufStart(); char *rc = strstr(rb,"\r\n\r\n"); int32_t pos = 0; if ( rc ) pos = (rc+4) - rb; // convert json to xml if it is in json. only for facebook m_rbuf.convertJSONtoXML ( m_niceness , pos ); //fprintf(stderr, //"full converted reply=%s",m_rbuf.getBufStart()); // show it //if ( m_fbId == 0LL ) // log("facebook: got fb reply xml: %s", // m_rbuf.getBufStart() ); // did it have some events? //char *hadSome = strstr ( m_socket->m_readBuf , "" ) ; // save reply //m_facebookReply2 = m_socket->m_readBuf; //m_facebookReplySize2 = m_socket->m_readOffset; //m_facebookAllocSize2 = m_socket->m_readBufSize; // sanity. must be \0. httpserver should add this if not // already there! //if ( m_facebookReply2 [ m_facebookReplySize2 ] ) { // log("fbqueue: facebookreply2 does not end in \\0"); // g_errno = EBADREPLY; // goto error; //} // . set m_numEvents and m_eventPtrs from m_facebookReply // . they reference into the reply, so do not free // m_facebookReply until destructor is called m_numEvents = 0; // . get the event delimeter in the xml // . both facebook and eventbrite use this char *delim = ""; char *dend = ""; // stubhub uses "" to delineate events if ( m_fbId == -1 ) { delim = ""; dend = ""; } // parse the events into an array of ptrs for easier injection // scan for tags and set m_evptrs safebuf to // each ptr to those char *p = m_rbuf.getBufStart(); // facebookReply2; for ( ; *p ; ) { // scan to first tag p = strstr ( p , delim ); if ( ! p ) break; // store start for ptr char *start = p + gbstrlen(delim); // find end char *end = strstr ( p , dend ); if ( ! end ) break; // null term it *end = '\0'; // for next round p = end + gbstrlen(dend); // try to get event id char *ep = NULL; if ( m_fbId >= 0 ) { ep = strstr ( start, ""); if ( ep ) ep += 5; } // is it a stubhub event? // 2659223 if ( m_fbId == -1 ) { ep = strstr ( start, "=\"event_id\">"); if ( ep ) ep += 12; } // . is it an eventbrite event? // . the first after the is the event id if ( m_fbId == -2 ) { ep = strstr ( start, ""); if ( ep ) ep += 4; } if ( ! ep ) continue; int64_t eid = strtoull ( ep , NULL , 10 ); if ( eid == 0 ) continue; if ( eid < 0 ) log("facebook: wtf? eid is 0"); // for eventbrite record the last eventid because // we use that for paging through if ( m_fbId == -2 ) s_lastEventBriteEventId = eid; // store it if ( ! m_evPtrBuf.pushLong ( (int32_t)start)) goto error; if ( ! m_evIdsBuf.pushLongLong ( eid)) goto error; // count them m_numEvents++; } log("facebook: got %"INT32" events from %s",m_numEvents,type); // if we got 0 events from an eventbrite request, then // wait for an hour before retrying if ( m_numEvents == 0 && m_fbId == -2 ) { log("facebook: pausing eventbrite feed for 1 hour"); int32_t now = getTimeGlobal(); s_eventBriteWaitUntil = now + 3600; } // reset loop m_i = 0; // make a new state if ( m_numEvents > 0 ) { Msg7 *msg7; try { msg7 = new (Msg7); } catch ( ... ) { g_errno = ENOMEM; log("facebook: inject msg7 new(%i): %s", sizeof(Msg7),mstrerror(g_errno)); goto error; } mnew ( msg7, sizeof(Msg7) , "PageInjct7" ); // save it for freeing in destructor m_msg7 = msg7; } // advance m_phase = 12; // if no events, skip injection and adding likes i guess... if ( m_numEvents == 0 ) m_phase = 15; } // injection loop if ( m_phase == 12 ) { recallInject: char *coll = g_collectiondb.getColl ( m_collnum ); char **eventPtrs = (char **)m_evPtrBuf.getBufStart(); int64_t *eventIds = (int64_t *)m_evIdsBuf.getBufStart(); // get ptr to it char *content = eventPtrs[m_i]; int32_t contentLen = gbstrlen(content); // debug thing //if ( eventIds[m_i] != 314901535212815LL ) {m_i++; continue;} // make a fake url char url[128]; sprintf(url,"http://www.facebook.com/events/%"UINT64"", eventIds[m_i]); // is it a stubhub event? 2659223 if ( m_fbId == -1 ) sprintf(url,"http://www.stubhub.com/%"UINT64"", eventIds[m_i]); if ( m_fbId == -2 ) sprintf(url,"http://www.eventbrite.com/%"UINT64"", eventIds[m_i]); // test debug (on for eventbrite now) if ( g_conf.m_logDebugFacebook ) log("facebook: %s",content); // // set m_privacy for event being injected // char *s = strstr(content,"' for ( ; s && *s && *s !='>' ; s++ ); // skip actual > if ( s && *s == '>' ) s++; // skip whitespace for ( ; s && *s && is_wspace_a(*s) ; s++ ); // compare m_privacy = 0; if ( s && ! strncasecmp(s,"secret",6) ) m_privacy = LF_PRIVATE; if ( s && ! strncasecmp(s,"closed",6) ) m_privacy = LF_PRIVATE; // event brite privacy. 0 means private event. if ( m_fbId == -2 && s && ! strncasecmp(s,"0",1) ) m_privacy = LF_PRIVATE; //if ( eventIds[m_i] == 273883416016761LL ) // m_privacy = LF_PRIVATE; // test //m_privacy = LF_PRIVATE; // use a forced ip for speed! otherwise it takes // forever lookup up www.facebook.com for some reason! int32_t forcedIp = atoip("69.171.224.39"); // advance up here m_i++; // and the phase too m_phase = 13; // inject just that if ( ! m_msg7->inject ( url , forcedIp, content , contentLen , false, // recyclecontent CT_XML, // contentType, coll , false , NULL, // username NULL , // pwd m_niceness, this , queueLoopWrapper ) ) return; // bail on error if ( g_errno ) goto error; // how did this happen? it needs to block... otherwise // we have to add to likedb here. char *xx=NULL;*xx=0; } if ( m_phase == 13 ) { // error? if ( g_errno ) log("facebook: injection had error: %s", mstrerror(g_errno)); // add the likes XmlDoc *xd = &m_msg7->m_xd; /* // if event was not found or added for some reason... if ( xd->m_numHashableEvents <= 0 ) { // try to do the next one if ( m_i < m_numEvents ) goto recallInject; // or another round? if ( m_numEvents > 0 ) goto recall } if ( xd->m_indexCode && xd->m_indexCodeValid ) { // note it log("facebook: could not index doc: %s", mstrerror(xd->m_indexCode)); // try to do the next one if ( m_i < m_numEvents ) goto recallInject; } */ if ( xd->m_indexCode && xd->m_indexCodeValid ) // note it log("facebook: could not index doc: %s", mstrerror(xd->m_indexCode)); // advancwe m_phase = 14; // // ADD LIKES // // scan the event_members reply we got and cross-reference // those facebook eventids with our eventhash/evid/docid guys // we got in the injection reply to see if we added the // facebook event to our db. in that case, we also want to add // the maybe/goingto/invitedto/notgoing flags. // uses the eventhash64, eventid, docid of event added! // returns false with g_errno set on error. if ( ! makeLikedbKeyList ( m_msg7 , &m_list3 ) ) goto error; // if nothing to add, we are done //if ( m_list3.getListSize() == 0 && m_i < m_numEvents ) // goto recallInject; char *coll = g_collectiondb.getColl ( m_collnum ); // add that if ( ! m_msg1.addList ( &m_list3 , RDB_LIKEDB , coll , this , queueLoopWrapper, false , m_niceness ) ) // niceness return; } if ( m_phase == 14 ) { // we saved the likes... // try to do the next one if ( m_i < m_numEvents ) goto recallInject; // . all done, but need to load more events? // . skip if fbid is zero, which means its facebook spider if ( m_fbId > 0 && m_numEvents > 0 ) goto recallEvents; // ok, i guess, we are done m_phase = 15; } skipdown: if ( m_phase == 15 ) { // we can nuke this now m_rbuf.purge(); // advance m_phase = 16; // a 0 fbid is spidering facebook if ( m_fbId > 0 ) { // final save of rec to clear the FB_INQUEUE bit m_afterSaveCallback = queueLoopWrapper; log("facebook: saving final rec for fbid=%"INT64"",m_fbId); m_fbrecGen.m_flags &= ~FB_INQUEUE; m_fbrecGen.m_eventsDownloaded = getTimeGlobal(); // this calls serializeMsg() which mallocs a // new reply to add if ( ! saveFBRec( &m_fbrecGen ) ) return; } } // . remove from queue // . all done! if ( m_phase == 16 ) { // int16_tcut int32_t err = m_errno; // or inherit this. we might have forgotten to set m_errno if ( ! err && g_errno ) err = g_errno; // no longer in progress m_inProgress = false; // this will purge fullreply reset(); // note it log("facebook: done with queue for fbid=%"INT64". error=%s", g_fbq1[0],mstrerror(err)); // if we are eventbrite, update s_ptr4 to the last eventid // that we had on that page s_ptr4 = s_lastEventBriteEventId; // save it for potential re-add //int64_t fsaved = g_fbq1 [0]; //collnum_t csaved = g_colls1[0]; // shift queue down for ( int32_t i = 1 ; i < g_n1 ; i++ ) { g_fbq1 [i-1] = g_fbq1 [i]; g_colls1[i-1] = g_colls1[i]; } // one less in queue g_n1--; g_numOut--; return; } error: log("facebook: queue fbid %"INT64" had error: %s", m_fbId,mstrerror(g_errno)); // no longer in progress m_inProgress = false; // this will purge fullreply reset(); // save it for potential re-add int64_t fsaved = g_fbq1 [0]; collnum_t csaved = g_colls1[0]; // shift queue down for ( int32_t i = 1 ; i < g_n1 ; i++ ) { g_fbq1 [i-1] = g_fbq1 [i]; g_colls1[i-1] = g_colls1[i]; } // one less in queue g_n1--; g_numOut--; // re-add on certain errors if ( g_errno == ETIMEDOUT || g_errno == ESOCKETCLOSED ) { log("facebook: re-queue fbid %"INT64" from error: %s", m_fbId,mstrerror(g_errno)); queueFBId ( fsaved , csaved ); } } // . these are ptrs to likedb records // . these first int64_t is the least significant // . the 2nd int64_t is more int likedbCmp ( const void *a , const void *b ) { const key192_t *k1 = (key192_t *)a; const key192_t *k2 = (key192_t *)b; if ( k1->n2 < k2->n2 ) return -1; if ( k1->n2 > k2->n2 ) return 1; if ( k1->n1 < k2->n1 ) return -1; if ( k1->n1 > k2->n1 ) return 1; if ( k1->n0 < k2->n0 ) return -1; if ( k1->n0 > k2->n0 ) return 1; return 0; } // scan the event_members reply we got and cross-reference // those facebook eventids with our eventhash/evid/docid guys // we got in the injection reply to see if we added the facebook // event to our db. in that case, we also want to add the // maybe/goingto/invitedto/notgoing flags // returns false with g_errno set on error. bool Msgfb::makeLikedbKeyList ( Msg7 *msg7 , RdbList *list ) { // reset list->reset(); // sanity if ( m_i-1 < 0 ) { char *xx=NULL;*xx=0; } // int16_tcuts XmlDoc *xd = &msg7->m_xd; int64_t docId = xd->m_docId; // none if no events! if ( ! xd->size_eventData ) return true; if ( ! xd->m_eventDataValid ) return true; // . get first event and use that one // . really, being facebook, there should only be one eventid! // but maybe later it may be different and we'll have to "like" // each eventid/docid combo EventDisplay *ed = (EventDisplay *)xd->ptr_eventData; if ( ! ed ) { log("facebook: eventdisplay is null! wtf?"); return true; } int32_t gbeventId = ed->m_indexedEventId; //int32_t gbeventHash32 = (int32_t)((uint32_t)ed->m_eventHash64); uint64_t evh64 = ed->m_eventHash64; // int16_tcuts //char **eventPtrs = (char **)m_evPtrBuf.getBufStart(); int64_t *eventIds = (int64_t *)m_evIdsBuf.getBufStart(); // what facebook eventid did we just inject? int64_t eid = eventIds[m_i-1]; int32_t count = 0; SafeBuf tmpBuf; if ( ! tmpBuf.reserve ( 50 ) ) return false; // but don't use lts->m_start_time, it is not UTC really // so use our own... Interval *ii = (Interval *)ed->m_int; // must be there if ( ed->m_intSize <= 0 ) { log("facebook: wtf? no intervals!"); return true; } int32_t start_time = ii->m_a; // scan the table to see what users had an rsvp_status for this event for ( int32_t i = 0 ; i < m_likedbTable.m_numSlots ; i++ ) { // skip empties if ( ! m_likedbTable.m_flags[i] ) continue; // get the record in there int64_t *eid2 = (int64_t *)m_likedbTable.getKeyFromSlot(i); // skip if not a match if ( *eid2 != eid ) continue; // the data is a key that we made above LikedbTableSlot *lts; lts = (LikedbTableSlot *)m_likedbTable.getDataFromSlot(i); // debug note if ( g_conf.m_logDebugFacebook ) log("facebook: makerec uid=%"INT64"",lts->m_uid); // assume they "like" it int64_t value = 1LL; // unless it is "negative" //if ( negative ) value = 0LL; // make the flags int32_t flag = lts->m_rsvp; // indicate from facebook so we do not upload it flag |= LF_FROMFACEBOOK; // . this makes two recs to add to likedb // . the first one starts with uid in the key so it will be // used to find events that your friends are // . the second one starts with docid/evid so it can be used // to lookup who likes an event when we generate the summary // for the event because its in the search results char *recs = g_likedb.makeRecs ( lts->m_uid , docId , gbeventId , start_time , //lts->m_start_time , //lts->m_rsvp , flag , evh64 , value ); //continue; // add to list otherwise tmpBuf.safeMemcpy ( recs , (int32_t)LIKEDB_RECSIZE*2 ); count++; } // add a separate rec for LF_PRIVATE if we need to because we can't // set more than one bit in a likedb key otherwise you can't UNDO // or delete key bits easily... i guess the event could change // privacy status the next time we index it, so this needs to // override... and since the uid is 0 (the system uid) let's ignore // the first record and only add the second. // and because i only want to add these keys when the event is // private to avoid bloat, let's just add a negative key when its // public in order to nuke any possible private key. char *recs = g_likedb.makeRecs ( 0LL , // uid docId , gbeventId , start_time , LF_FROMFACEBOOK|LF_PRIVATE, evh64 , 1LL ); // point to the 2nd key char *rec2 = recs + LIKEDB_RECSIZE; int32_t recSize = (int32_t)LIKEDB_RECSIZE; // make it a del/negative key by clearing the lowest bit if ( ! m_privacy ) { rec2[0] &= 0xfe; // negative keys are keys only - no data!!! recSize = (int32_t)LIKEDB_KEYSIZE; } // add to list otherwise tmpBuf.safeMemcpy ( rec2 , recSize ); count++; // debug //key192_t *k2 = (key192_t *)rec2; //log("key: 0x%"XINT64" 0x%"XINT64" 0x%"XINT64"",k2->n2,k2->n1,k2->n0); // all done if nothing if ( count == 0 ) return true; // sort the records in tmp now char *buf = tmpBuf.getBufStart(); // sort for rdblist gbqsort ( buf , count , (int32_t)LIKEDB_RECSIZE, likedbCmp ); // use the list we got key192_t startKey; key192_t endKey; startKey.setMin(); endKey.setMax(); // that is our list list->set ( buf , tmpBuf.length() , buf, // alloc tmpBuf.getCapacity() , // allocSize (char *)&startKey , (char *)&endKey , LIKEDB_DATASIZE , // fixed datasize true , // own data? yeah, free it when done false , // use half keys? no. sizeof(key192_t) ); // steal it from safebuf so it doesn't free it tmpBuf.detachBuf(); return true; } /* YE OLD PIPELINE static void gotRecWrapper ( void *state ) { Msgfb *mfb = (Msgfb *)state; if ( ! mfb->hitFacebook ( ) ) return; mfb->m_callback ( mfb->m_state ); } // get it from facebookdb bool Msgfb::processFBId ( int64_t fbId , collnum_t collnum, void *state , void (* callback) (void *) ) { reset(); m_inProgress = true; m_fbId = fbId; m_collnum = collnum; m_state = state; m_callback = callback; // sanity if ( ! m_fbId ) { char *xx=NULL;*xx=0; } int32_t niceness = 0; key96_t startKey; key96_t endKey; startKey.n1 = 0; startKey.n0 = m_fbId; endKey.n1 = 0; endKey.n0 = m_fbId; startKey.n0 <<= 1; endKey.n0 <<= 1; endKey.n0 |= 0x01; //char *coll = g_collectiondb.getColl ( m_collnum ); if ( ! m_msg0.getList ( -1, // hostid 0 , // ip 0 , // port 0 , // maxcacheage false, // addtocache RDB_FACEBOOKDB, "none",//coll, &m_list1, (char *)&startKey, (char *)&endKey, 10, // minrecsizes this , gotRecWrapper, niceness ) ) return false; // i guess we got it without blocking return hitFacebook(); } static void gotFQLReplyWrapper ( void *state , TcpSocket *s ) { Msgfb *mfb = (Msgfb *)state; if ( ! mfb->gotFQLReply( s ) ) return; mfb->m_callback ( mfb->m_state ); } // once we get the facebook access token, we can get the user info // https://graph.facebook.com/me?access_token=ACCESS_TOKEN // NO... i would use one fql call at this point... // if it says "Requires valid signature" you need the access token // fql console: http://developers.facebook.com/docs/reference/rest/fql.query/ bool Msgfb::hitFacebook ( ) { log("facebook: downloading event_members for %"UINT64"",m_fbId); if ( g_errno ) { log("fbqueue: error getting facebookdb rec: %s", mstrerror(g_errno)); return true; } // empty is bad if ( m_list1.getListSize() <= 0 ) { log("fbqueue: facebookdb rec is empty. wtf? fbid=%"INT64"", m_fbId); g_errno = EBADREPLY; return true; } // get the facebook rec... why? m_fbrecPtr = (FBRec *)m_list1.getList(); // sanity if ( m_fbrecPtr->m_fbId != m_fbId ) { log("fbqueue: fbid mismatch. fbid=%"INT64"", m_fbId); g_errno = EBADREPLY; return true; } // deserialize... deserializeMsg ( sizeof(FBRec) , &m_fbrecPtr->size_accessToken , &m_fbrecPtr->size_friendIds , &m_fbrecPtr->ptr_accessToken , m_fbrecPtr->m_buf ); // copy for calling fql strcpy ( m_accessToken , m_fbrecPtr->ptr_accessToken ); // get your facebook user info again in case it changed SafeBuf cmd1; cmd1.safePrintf ( "SELECT uid,username,first_name,last_name,name,pic_square,profile_update_time,timezone,religion,birthday,birthday_date,sex,hometown_location,current_location,activities,interests,is_app_user,music,tv,movies,books,about_me,status,online_presence,proxied_email,verified,website,is_blocked,contact_email,email,is_minor,work,education,sports,languages,likes_count,friend_count FROM user where uid=me()"); // get all friends for saving into likedb SafeBuf cmd2; cmd2.safePrintf ( "SELECT uid2 from friend WHERE uid1=me()"); // get status of each friend attending an event and what eventid SafeBuf cmd3; cmd3.safePrintf ( "SELECT uid, eid, rsvp_status, start_time " "FROM event_member where uid IN " "( SELECT uid2 from friend WHERE uid1=me()) " // this start_time from the event_member table // is not accurate. it is often in the past! wtf? //"AND start_time > now()" ); // composite SafeBuf json; json.safePrintf("{" "\"query1\":\"%s\"" "," "\"query2\":\"%s\"" "," "\"query3\":\"%s\"" , cmd1.getBufStart() , cmd2.getBufStart() , cmd3.getBufStart() ); json.safePrintf("}"); json.urlEncode(); // www.howtobe.pro/facebook-graph-api-graph-api-for-issuing-fql-queries // make a url SafeBuf ubuf; ubuf.safePrintf("https://api.facebook.com/method/" //"fql.query?query=" "fql.multiquery?queries=%s" "&access_token=%s&format=xml" , json.getBufStart() , m_accessToken ); log("facebook: queryurl = %s", ubuf.getBufStart() ); // reset g_errno = 0; // get the results if ( ! g_httpServer.getDoc ( ubuf.getBufStart() , 0 , // urlIp 0 , // offset -1 , 0 , // ifModifiedSince , this , // state gotFQLReplyWrapper , // callback 40*1000 , // 20 sec timeout 0 , // proxyip 0 , // proxyport 30000000 , // maxTextDocLen , 30000000 , // maxOtherDocLen , g_conf.m_spiderUserAgent ) ) // return false if blocked return false; // otherwise, somehow got it without blocking... wtf? //return gotFQLReply(); if ( ! g_errno ) { char *xx=NULL;*xx=0; } log("fql: error getting doc: %s",mstrerror(g_errno)); return true; } static void savedFBRecWrapper2 ( void *state ) { Msgfb *mfb = (Msgfb *)state; if ( ! mfb->downloadEvents ( ) ) return; // final save of rec to clear the FB_INQUEUE bit if ( ! mfb->doFinalFBRecSave ( ) ) return; mfb->m_callback ( mfb->m_state ); } bool Msgfb::gotFQLReply ( TcpSocket *s ) { // bail on error if ( g_errno ) { log("fql: %s",mstrerror(g_errno)); m_errno = g_errno; m_errorCount++; return true; } // get reply char *reply = s->m_readBuf; int32_t replySize = s->m_readOffset; // we reference into this, so do not free it!! m_facebookReply = s->m_readBuf; m_facebookReplySize = s->m_readOffset; m_facebookAllocSize = s->m_readBufSize; // do not allow tcpsocket to free it. we free it in destructor. s->m_readBuf = NULL; // mime error? HttpMime mime; // exclude the \0 i guess. use NULL for url. mime.set ( reply, replySize - 1, NULL ); // not good? int32_t httpStatus = mime.getHttpStatus(); if ( httpStatus != 200 ) { log("facebook: bad fql request 2 http status = %"INT32". reply=%s" , httpStatus , reply ); log("facebook: resuming despite error to download friends " "for fbid=%"INT64"",m_fbId); //g_errno = EBADREPLY; m_errno = EBADREPLY; //m_errorCount++; //return true; } // point to content char *content = reply + mime.getMimeLen(); int32_t contentLen = reply + replySize - content; // check for error char *errMsg = strstr(content,""); if ( errMsg ) { log("facebook: error in fql reply: %s", content ); log("facebook: resuming despite error to download friends " "for fbid=%"INT64"",m_fbId); //g_errno = EBADREPLY; m_errno = EBADREPLY; //m_errorCount++; //return true; } m_fbrecGen.reset(); // // . set m_fbrecGen now !! // . compare to m_fbrecPtr to see what eventids are new // if ( ! m_errno && ! setFBRecFromFQLReply ( content , contentLen , &m_fbrecGen ) ) { log("fql: error setting fb rec from fql. pipeline 2."); g_errno = EBADREPLY; return true; } // merge fbrecPtr into m_fbrecGen if ( ! m_errno ) mergeFBRec ( &m_fbrecGen , m_fbrecPtr ); // must match if ( ! m_errno && m_fbId != m_fbrecGen.m_fbId ) { log("fql: fbid mismatch in fql reply."); g_errno = EBADENGINEER; m_errno = g_errno; m_errorCount++; return true; } // just make sure m_afterSaveCallback = savedFBRecWrapper2; // start at event #0 m_eventStartNum = 0; // 100 events at a time m_eventStep = 100; // save that before we start downloading the events if ( ! m_errno && ! saveFBRec ( &m_fbrecGen ) ) return false; // try to download new events return downloadEvents ( ); } static void injectFBEventsWrapper ( void *state, TcpSocket *s ) { Msgfb *mfb = (Msgfb *)state; if ( ! mfb->injectFBEvents ( s ) ) return; // error? try advancing! if ( mfb->m_errno && mfb->m_eventStartNum < 500 ) { if ( mfb->m_eventStep == 100 ) mfb->m_eventStep = 10; else mfb->m_eventStartNum += mfb->m_eventStep; if ( ! mfb->downloadEvents() ) return; } // final save of rec to clear the FB_INQUEUE bit if ( ! mfb->doFinalFBRecSave ( ) ) return; if ( mfb->m_callback ) mfb->m_callback ( mfb->m_state ); } // download the new events and then inject them (ptr_newEvents) bool Msgfb::downloadEvents ( ) { // reset this since we check for it in injectFBEventsWrapper m_errno = 0; log("facebook: downloading events for %"UINT64" (#%"INT32"-#%"INT32")", m_fbId,m_eventStartNum,m_eventStartNum+m_eventStep); // skip matt wells for now if ( m_fbId == MATTWELLS ) { // 100003532411011LL ) { log("facebook: skipping matt wells event download"); // final save of rec to clear the FB_INQUEUE bit return doFinalFBRecSave ( ); } // save some mem //m_list1.freeList(); //int32_t now = getTimeGlobal(); // sanity checks if ( m_eventStartNum < 0 ) { char *xx=NULL; *xx=0; } if ( m_eventStep <= 0 ) { char *xx=NULL; *xx=0; } if ( m_eventStartNum >= 100 && m_eventStartNum < 130 && m_errorCount >= 10 ) { log("facebook: too many errors in event downloads. " "fbid=%"INT64" . giving up. start=%"INT32" errcount=%"INT32"", m_fbId,m_eventStartNum,m_errorCount); return doFinalFBRecSave(); } // get the events your friends are related to SafeBuf cmd4; cmd4.safePrintf ( "SELECT eid, " "name, " "tagline, " "nid, " "pic_small, " "pic_big, " "pic_square, " "pic, " "host, " "description, " "event_type, " "event_subtype, " "start_time, " "end_time, " "creator, " "update_time, " "location, " "venue, " "privacy, " "hide_guest_list, " "can_invite_friends " "FROM event WHERE " "start_time > now() AND " "eid IN (" // how do i include events i am assoc. with too? " SELECT eid FROM event_member WHERE " "uid IN " "( SELECT uid2 from friend WHERE uid1=me()) ) " "LIMIT %"INT32",%"INT32"" , m_eventStartNum , m_eventStep ); // list all the new event ids here //int64_t *newIds = (int64_t *)m_eidBuf.getBufStart(); //int32_t n = m_eidBuf.length() / 8; //bool firstOne = true; //for ( int32_t i = 0 ; i < n ; i++ ) { // if ( ! firstOne ) cmd4.pushChar(','); // firstOne = false; // cmd4.safePrintf("%"UINT64"",newIds[i]); //} //cmd4.safePrintf ( ")" // ORDER by start_time ASC " // // LIMIT 1001,100 etc. // // LIMIT x,y (x is offset, y is # results) // //"LIMIT 0,100" // //, fbId // ); // composite cmd4.urlEncode(); // www.howtobe.pro/facebook-graph-api-graph-api-for-issuing-fql-queries // make a url SafeBuf ubuf; ubuf.safePrintf("https://api.facebook.com/method/" "fql.query?query=%s" "&access_token=%s&format=xml" , cmd4.getBufStart() , m_accessToken ); log("facebook: cmd4url = %s", ubuf.getBufStart() ); // reset g_errno = 0; // get the results if ( ! g_httpServer.getDoc ( ubuf.getBufStart() , 0 , // urlIp 0 , // offset -1 , 0 , // ifModifiedSince , this , // state injectFBEventsWrapper , // callback 40*1000 , // 20 sec timeout 0 , // proxyip 0 , // proxyport 30000000 , // maxTextDocLen , 30000000 , // maxOtherDocLen , g_conf.m_spiderUserAgent ) ) // return false if blocked return false; // otherwise, somehow got it without blocking... wtf? //return gotFQLReply(); log("fql: http get did not block!"); if ( ! g_errno ) { char *xx=NULL;*xx=0; } log("fql: error getting doc: %s",mstrerror(g_errno)); // final save of rec to clear the FB_INQUEUE bit return doFinalFBRecSave ( ); } static void addLikesWrapper ( void *state ) { Msgfb *mfb = (Msgfb *)state; if ( ! mfb->addLikes ( ) ) return; if ( ! mfb->doInjectionLoop ( ) ) return; // try to read more!! if ( mfb->m_numEvents > 0 ) { mfb->m_eventStartNum += mfb->m_eventStep; if ( ! mfb->downloadEvents() ) return; } // final save of rec to clear the FB_INQUEUE bit if ( ! mfb->doFinalFBRecSave ( ) ) return; if ( mfb->m_callback ) mfb->m_callback ( mfb->m_state ); } #define MAXEVENTPTRS 1000 bool Msgfb::injectFBEvents ( TcpSocket *s ) { // bail on error if ( g_errno ) { log("fql: %s",mstrerror(g_errno)); m_errno = g_errno; m_errorCount++; return true; } // get reply char *reply = s->m_readBuf; int32_t replySize = s->m_readOffset; // mime error? HttpMime mime; // exclude the \0 i guess. use NULL for url. mime.set ( reply, replySize - 1, NULL ); // not good? int32_t httpStatus = mime.getHttpStatus(); if ( httpStatus != 200 ) { log("facebook: bad fql request 3 http status = %"INT32". reply=%s", httpStatus ,reply ); g_errno = EBADREPLY; m_errno = g_errno; m_errorCount++; return true; } // point to content char *content = reply + mime.getMimeLen(); // check for error char *errMsg = strstr(content,""); if ( errMsg ) { log("facebook: error in fql reply2: %s", content ); g_errno = EBADREPLY; m_errno = g_errno; m_errorCount++; return true; } // if we are re-using this class. need to reset some things. m_evPtrBuf.reset(); m_evIdsBuf.reset(); // save reply m_facebookReply2 = s->m_readBuf; m_facebookReplySize2 = s->m_readOffset; m_facebookAllocSize2 = s->m_readBufSize; // do not allow tcpsocket to free it. we free it in destructor. s->m_readBuf = NULL; // . set m_numEvents and m_eventPtrs from m_facebookReply // . they reference into the reply, so do not free m_facebookReply // until destructor is called m_numEvents = 0; // scan for tags and set m_evptrs safebuf to each ptr to those char *p = content; for ( ; *p ; ) { // scan to first tag p = strstr ( p , "" ); if ( ! p ) break; // store start for ptr char *start = p + 7; // find end char *end = strstr ( p , "" ); if ( ! end ) break; // null term it *end = '\0'; // for next round p = end + 8; // try to get event id char *ep = strstr ( start, ""); if ( ! ep ) continue; int64_t eid = strtoull ( ep + 5 , NULL , 10 ); if ( eid == 0 ) continue; if ( eid < 0 ) log("facebook: wtf? eid is 0"); // store it if ( ! m_evPtrBuf.pushLong ( (int32_t)start ) ) return false; if ( ! m_evIdsBuf.pushLongLong ( eid ) ) return false; // count them m_numEvents++; } int32_t askedFor = m_eidBuf.length() / 8; if ( askedFor != m_numEvents ) log("facebook: asked for %"INT32" events but got %"INT32"", askedFor,m_numEvents ); // bail if none! if ( m_numEvents <= 0 ) return true; // make a new state Msg7 *msg7; try { msg7 = new (Msg7); } catch ( ... ) { g_errno = ENOMEM; log("facebook: inject msg7 new(%i): %s", sizeof(Msg7),mstrerror(g_errno)); return true; } mnew ( msg7, sizeof(Msg7) , "PageInject" ); // save it for freeing in destructor m_msg7 = msg7; m_i = 0; return doInjectionLoop ( ); } bool Msgfb::doInjectionLoop ( ) { char *coll = g_collectiondb.getColl ( m_collnum ); char **eventPtrs = (char **)m_evPtrBuf.getBufStart(); int64_t *eventIds = (int64_t *)m_evIdsBuf.getBufStart(); for ( ; m_i < m_numEvents ;) { // get ptr to it char *content = eventPtrs[m_i]; int32_t contentLen = gbstrlen(content); // debug thing //if ( eventIds[m_i] != 314901535212815LL ) {m_i++; continue; } // yoyo tuesdays: //if ( eventIds[m_i] != 371365776212884LL ) {m_i++; continue; } // latin dance night //if ( eventIds[m_i] != 111680095620647LL ) {m_i++; continue; } //if (eventIds[m_i] != 273883416016761LL ) {m_i++; continue; } // temp thing //m_c = content[contentLen]; //content[contentLen] = '\0'; // make a fake url char url[128]; sprintf(url,"http://www.facebook.com/events/%"UINT64"", eventIds[m_i]); // test debug if ( g_conf.m_logDebugFacebook ) log("facebook: %s",content); // // set m_privacy for event being injected // char *s = strstr(content,"' for ( ; s && *s && *s !='>' ; s++ ); // skip actual > if ( s && *s == '>' ) s++; // skip whitespace for ( ; s && *s && is_wspace_a(*s) ; s++ ); // compare m_privacy = 0; if ( ! strncasecmp(s,"secret",6) ) m_privacy = LF_PRIVATE; if ( ! strncasecmp(s,"closed",6) ) m_privacy = LF_PRIVATE; //if ( eventIds[m_i] == 273883416016761LL ) // m_privacy = LF_PRIVATE; // test //m_privacy = LF_PRIVATE; // use a forced ip for speed! otherwise it takes forever // lookup up www.facebook.com for some reason!!! int32_t forcedIp = atoip("69.171.224.39"); // advance to next event to inject m_i++; // inject just that if ( ! m_msg7->inject ( url , forcedIp, content , contentLen , false, // recyclecontent CT_XML, // contentType, coll , false , NULL, // username NULL , // pwd MAX_NICENESS, this , addLikesWrapper ) ) return false; // bail on error if ( g_errno ) return true; // how did this happen? it needs to block... otherwise // we have to add to likedb here. char *xx=NULL;*xx=0; } // it did not block, i gues we are done //if ( ! doneInjecting ( ) ) return false; return true; } void doneAddingLikesWrapper ( void *state ) { Msgfb *mfb = (Msgfb *)state; if ( ! mfb->doInjectionLoop() ) return; // try to read more!! if ( mfb->m_numEvents > 0 ) { mfb->m_eventStartNum += mfb->m_eventStep; if ( ! mfb->downloadEvents() ) return; } // final save of rec to clear the FB_INQUEUE bit if ( ! mfb->doFinalFBRecSave ( ) ) return; mfb->m_callback ( mfb->m_state ); } bool Msgfb::addLikes ( ) { // doneInjecting ( ) { // get ptr to it so we can revert the character //char *content = m_eventPtrs[m_i-1]; //int32_t contentLen = m_eventLens[m_i-1]; //content[contentLen] = m_c; XmlDoc *xd = &m_msg7->m_xd; // if event was not found or added for some reason... if ( xd->m_numHashableEvents <= 0 ) { // try to do the next one if ( m_i < m_numEvents && ! doInjectionLoop ( ) ) return false; // all done! no need to loop back up for more, they're done return true; } if ( xd->m_indexCode && xd->m_indexCodeValid ) { // note it log("facebook: could not index doc: %s", mstrerror(xd->m_indexCode)); // try to do the next one if ( m_i < m_numEvents && ! doInjectionLoop ( ) ) return false; // all done! no need to loop back up for more, they're done return true; } // get msg7 reply. it needs to have // // ADD LIKES // // scan the event_members reply we got and cross-reference // those facebook eventids with our eventhash/evid/docid guys // we got in the injection reply to see if we added the // facebook event to our db. in that case, we also want to add // the maybe/goingto/invitedto/notgoing flags. // uses the eventhash64, eventid, docid of event added! // returns false with g_errno set on error. if ( ! makeLikedbKeyList ( m_msg7 , &m_list3 ) ) return true; // if nothing to add, we are done if ( m_list3.getListSize() == 0 ) { // try to do the next one if ( m_i < m_numEvents && ! doInjectionLoop ( ) ) return false; // all done! no need to loop back up for more, they're done return true; } // extract info from state //TcpSocket *s = m_msg7->m_socket; //int64_t docId = xd->m_docId; //int32_t hostId = 0;//msg7->m_msg7.m_hostId; char *coll = g_collectiondb.getColl ( m_collnum ); // add that if ( ! m_msg1.addList ( &m_list3 , RDB_LIKEDB , coll , this , doneAddingLikesWrapper , false , 0 ) ) // niceness return false; // this might just add to tree in a single server setup so it // will not block... //if ( ! g_errno ) { char *xx=NULL;*xx=0; } // error must be set! return true; //return doInjectionLoop ( ); } // all done with everything! static void savedFinalFBRecWrapper3 ( void *state ) { Msgfb *mfb = (Msgfb *)state; mfb->m_callback ( mfb->m_state ); } // final save of rec to clear the FB_INQUEUE bit bool Msgfb::doFinalFBRecSave ( ) { m_afterSaveCallback = savedFinalFBRecWrapper3; log("facebook: saving final rec for fbid=%"INT64"",m_fbId); m_fbrecGen.m_flags &= ~FB_INQUEUE; m_fbrecGen.m_eventsDownloaded = getTimeGlobal(); // this calls serializeMsg() which mallocs a new reply to add return saveFBRec( &m_fbrecGen ); } */ ///////////////////// // // LIKEDB // ////////////////////// void Likedb::reset() { m_rdb.reset(); } bool Likedb::init ( ) { int64_t uid = 123456789123LL; int64_t docId = 999888777666LL; int32_t eventId = 12345; int32_t rsvp_status = LF_GOING;//|LF_HIDE; int32_t start_time = 6543210; uint64_t eventHash64 = 9999997398453LL; uint32_t eventHash32 = (uint32_t)eventHash64; int32_t value = 999888; char *recs = g_likedb.makeRecs ( uid , docId , eventId , start_time , rsvp_status , eventHash64 , value ); char *p = recs; int64_t uid2 = g_likedb.getUserIdFromRec ( p ); if ( uid2 != uid ) { char *xx=NULL;*xx=0; } int32_t flags = g_likedb.getFlagsFromRec ( p ); if ( flags != rsvp_status ) { char *xx=NULL;*xx=0; } uint32_t eh = g_likedb.getEventHash32FromRec ( p ); if ( eh != eventHash32 ) { char *xx=NULL;*xx=0; } if ( g_likedb.getValueFromRec ( p ) != value ) { char *xx=NULL;*xx=0;} p += LIKEDB_RECSIZE; uid2 = g_likedb.getUserIdFromRec ( p ); if ( uid2 != uid ) { char *xx=NULL;*xx=0; } flags = g_likedb.getFlagsFromRec ( p ); eh = g_likedb.getEventHash32FromRec ( p ); if ( eh != eventHash32 ) { char *xx=NULL;*xx=0; } if ( flags != rsvp_status ) { char *xx=NULL;*xx=0; } // . what's max # of tree nodes? // . NOTE: 32 bytes of the 82 are overhead int32_t maxMem = 50000000; int32_t maxTreeNodes = maxMem / 48; // initialize our own internal rdb return m_rdb.init ( g_hostdb.m_dir , "likedb" , true , // dedup same keys? LIKEDB_DATASIZE , // fixed record size 2,//g_conf.m_tagdbMinFilesToMerge , maxMem, // 5MB g_conf.m_tagdbMaxTreeMem , maxTreeNodes , // now we balance so Sync.cpp can ordered huge list true , // balance tree? 0 , //g_conf.m_tagdbMaxCacheMem , 0 , //maxCacheNodes , false , // half keys? false , //m_tagdbSaveCache NULL , // pagecache - tree only! false, // is titledb false, // preload disk page cache sizeof(key192_t), // key size false ); // bias disk page cache? } bool Likedb::addColl ( char *coll, bool doVerify ) { if ( ! m_rdb.addColl ( coll ) ) return false; return true; } // FIRST REC KEY: // // key192_t: // uuuuuuuu uuuuuuuu uuuuuuuu uuuuuuuu // uuuuuuuu uuuuuuuu uuuuuuuu uuuuuuuu u=uid (fb userid) // dddddddd dddddddd dddddddd dddddddd d=docid // dddddd00 00000000 eeeeeeee eeeeeeee e=gbeventid D=delbit // ssssssss ssssssss ssssssss ssssssss start_time // ffffffff ffffffff ffffffff ffffff0D flags (LF_HIDE,etc.) // SECOND REC KEY: // // key192_t: // dddddddd dddddddd dddddddd dddddddd d=docid // dddddd00 00000000 eeeeeeee eeeeeeee e=gbeventid // uuuuuuuu uuuuuuuu uuuuuuuu uuuuuuuu u=uid (fb userid) // uuuuuuuu uuuuuuuu uuuuuuuu uuuuuuuu // ssssssss ssssssss ssssssss ssssssss start_time // ffffffff ffffffff ffffffff ffffff1D flags (LF_HIDE,etc.) // data: // hhhhhhhh hhhhhhhh hhhhhhhh hhhhhhhh eventhash32 // vvvvvvvv vvvvvvvv vvvvvvvv vvvvvvvv v=value of flag or facebook event id // vvvvvvvv vvvvvvvv vvvvvvvv vvvvvvvv // // NOTE: we set "v" to facebook event id for the LF_ADDEDTOFACEBOOK flag // int32_t Likedb::getEventIdFromRec ( void *rec ) { key192_t *k = (key192_t *)rec; if ( k->n0 & LF_TYPEBIT ) return k->n2 & 0xffff; return k->n1 & 0xffff; } void Likedb::setEventId ( char *rec , int32_t eventId ) { key192_t *k = (key192_t *)rec; // sanbity if ( eventId & 0xffff0000 ) { char *xx=NULL;*xx=0;} // is it a SECOND REC? if ( k->n0 & LF_TYPEBIT ) { // clear out old event id bits k->n2 &= 0xffffffffffff0000LL; // or in new, shifted up 1 for delbit k->n2 |= eventId; } else { // clear out old event id bits k->n1 &= 0xffffffffffff0000LL; // or in new, shifted up 1 for delbit k->n1 |= eventId; } } // use our docid/eventid because that is what we use in datedb when // doing a search. the docid and eventid should be returned by the msg7 // inject reply. char *Likedb::makeRecs ( int64_t uid , int64_t docId , int32_t eventId , int32_t start_time , int32_t rsvp_status , uint64_t eventHash64 , int64_t value ) { // sanity if ( rsvp_status & LF_TYPEBIT ) { char *xx=NULL;*xx=0; } if ( rsvp_status & LF_DELBIT ) { char *xx=NULL;*xx=0; } // only one flag can be set!!! int32_t ignore = 0; ignore |= LF_DELBIT; ignore |= LF_TYPEBIT; ignore |= LF_ISEVENTGURUID; ignore |= LF_FROMFACEBOOK; if ( getNumBitsOn32(rsvp_status & ~ignore)!=1) { char *xx=NULL;*xx=0;} if ( docId < 0 ) { char *xx=NULL;*xx=0; } if ( eventId > 0xffff ) { char *xx=NULL;*xx=0; } if ( eventId < 0 ) { char *xx=NULL;*xx=0; } // the record static char s_buf[2*LIKEDB_RECSIZE]; // store a 16 byte key first key192_t k; // the destination ptr char *p = s_buf; uint32_t eventHash32 = (uint32_t)eventHash64; //log("facebook: adding eventhash64 = %"UINT64"",eventHash64 ); //log("facebook: adding eventhash32 = %"UINT32"",eventHash32 ); // // make the first type of rec // // . this is the facebook userid OR the eventGuruId // . can also be the userid we assign someone i guess on our end k.n2 = uid; // reset k.n1 = 0; // then docid k.n1 |= docId; // then 10 zero bits k.n1 <<= 10; // make room for event id k.n1 <<= 16; k.n1 |= eventId; // starttime k.n0 = start_time; k.n0 <<= 32; k.n0 |= rsvp_status; // we are a positive key! k.n0 |= LF_DELBIT; // store that *(key192_t *)p = k; // skip key p += sizeof(key192_t); // then 4 byte data *(int32_t *)p = eventHash32; p += 4; // then the value *(int64_t *)p = value; p += 8; // // now make the 2nd rec // k.n2 = k.n1; k.n1 = uid; // this is the 2nd type of rec, so set this bit k.n0 |= LF_TYPEBIT; // store second key *(key192_t *)p = k; // skip second key p += sizeof(key192_t); // now this *(int32_t *)p = eventHash32; p += 4; // then the value *(int64_t *)p = value; p += 8; return s_buf; } // make a "type 2" key (docid leads) key192_t Likedb::makeStartKey ( int64_t docId, int32_t eventId ) { key192_t k; // reset k.n2 = docId; // then 10 zero bits k.n2 <<= 10; // make room for event id k.n2 <<= 16; k.n2 |= eventId; // any user id k.n1 = 0LL; // any starttime or flags k.n0 = 0LL; return k; } // make a "type 2" key (docid leads) key192_t Likedb::makeEndKey ( int64_t docId, int32_t eventId ) { key192_t k; // reset k.n2 = docId; // then 10 zero bits k.n2 <<= 10; // make room for event id k.n2 <<= 16; k.n2 |= eventId; // max user id k.n1 = 0xffffffffffffffffLL; // max starttime and flags k.n0 = 0xffffffffffffffffLL; return k; } int64_t Likedb::getUserIdFromRec ( void *rec ) { key192_t *k = (key192_t *)rec; if ( k->n0 & LF_TYPEBIT ) return k->n1; return k->n2; } int64_t Likedb::getDocIdFromRec ( char *rec ) { key192_t *k = (key192_t *)rec; if ( k->n0 & LF_TYPEBIT ) return k->n2 >> 26; return k->n1 >> 26; } key192_t Likedb::makeStartKey2 ( int64_t uid ) { key192_t k; k.n2 = uid; k.n1 = 0; k.n0 = 0; return k; } // . a facebook user uses likedb to tag an event rsvp_status // . similar to facebook's event_members table, but uses our technology // . use Rdb, but prohibit from dumping to disk! must always be in tree. // . add this to likedb (or remove if "neg" is true. bool Msgfc::addLikedbTag ( int64_t userId , int64_t docId, int32_t gbeventId, uint64_t eventHash64 , int32_t start_time, int32_t rsvp , // LF_* #define's above bool negative , // turn off that flag? char *coll, void *state , void (* callback)(void *) ) { if ( userId < 0 ) { char *xx=NULL;*xx=0; } char *p = m_recs; int32_t size = (int32_t)LIKEDB_RECSIZE*2; int32_t count = 0; //int32_t eventHash32 = (int32_t)((uint64_t)eventHash64); int64_t value = 1LL; if ( negative ) value = 0LL; //if ( eventHash == 0 || eventHash32 == -1 ) { char *xx=NULL;*xx=0;} // sanity int32_t ignore; ignore |= LF_DELBIT; ignore |= LF_TYPEBIT; ignore |= LF_ISEVENTGURUID; ignore |= LF_FROMFACEBOOK; if ( getNumBitsOn32 ( rsvp & ~ignore ) != 1 ) { char *xx=NULL;*xx=0; } // only use start_time for "going". everything else tags all // occurences of the event: like,hide,accept,reject,invited. // otherwise if you like an event you have to unlike the exact // same occurence to turn the flags off! if ( ! ( rsvp & LF_GOING ) ) start_time = 0; // . this makes two recs to add to likedb // . the first one starts with uid in the key so it will be // used to find events that your friends are // . the second one starts with docid/evid so it can be used // to lookup who likes an event when we generate the summary // for the event because its in the search results char *recs = g_likedb.makeRecs ( userId , docId , gbeventId , start_time , rsvp , eventHash64 , value ); // add to list otherwise gbmemcpy ( p , recs , size ); p += size; count += 2; // sort for rdblist gbqsort ( m_recs , count , (int32_t)LIKEDB_RECSIZE, likedbCmp ); // use the list we got key192_t startKey; key192_t endKey; startKey.setMin(); endKey.setMax(); // that is our list m_list6.set ( m_recs , p - m_recs , // listsize NULL, // alloc 0 , // allocSize (char *)&startKey , (char *)&endKey , LIKEDB_DATASIZE , // fixed datasize true , // own data? yes, free it when done false , // use half keys? no. LIKEDB_KEYSIZE ); // add that if ( ! m_msg1.addList ( &m_list6 , RDB_LIKEDB , coll , state, // this , callback, //doneAddingLikedbTagWrapper , false , 0 ) ) // niceness return false; return true; } int32_t Likedb::getUserFlags ( int64_t userId , int32_t start_time , char *list, int32_t listSize ) { // bail if not valid user id if ( userId == 0LL ) return 0; // scan char *p = list; char *pend = list + listSize; int32_t flags = 0; for ( ; p < pend ; p += LIKEDB_RECSIZE ) { // check for matching userid int64_t uid = g_likedb.getUserIdFromRec ( p ); if ( uid != userId ) continue; // got it int32_t ff = g_likedb.getFlagsFromRec ( p ); // get value int64_t val = g_likedb.getValueFromRec ( p ); // skip if 0, that means unset! if ( ! val ) continue; // restrict to just this instance of its a "GOING" flag if ( ff & LF_GOING ) { int32_t start = g_likedb.getStartTimeFromRec ( p ); if ( start&&start_time && start!=start_time) continue; } // keep tabs flags |= ff; } return flags; } int32_t Likedb::getPositiveFlagsFromRec ( char *rec ) { if ( ! g_likedb.getValueFromRec ( rec ) ) return 0; int32_t flags = (*(int32_t *)rec) & ~(LF_DELBIT|LF_TYPEBIT); return flags; } char *Likedb::getRecFromLikedbList ( int64_t userId , int32_t start_time , int32_t flags , char *list , int32_t listSize ) { // bail if not valid user id if ( userId == 0LL ) return NULL; // scan char *p = list; char *pend = list + listSize; for ( ; p < pend ; p += LIKEDB_RECSIZE ) { // check for matching userid int64_t uid = g_likedb.getUserIdFromRec ( p ); if ( uid != userId ) continue; // got it int32_t ff = g_likedb.getFlagsFromRec ( p ); // must match if ( ! ( ff & flags) ) continue; // get value int64_t val = g_likedb.getValueFromRec ( p ); // skip if 0, that means unset! if ( ! val ) continue; // restrict to just this instance of its a "GOING" flag if ( flags & LF_GOING ) { int32_t start = g_likedb.getStartTimeFromRec ( p ); if ( start&&start_time && start!=start_time) continue; } return p; } return NULL; } // http://developers.facebook.com/docs/reference/rest/events.create/ // http://developers.facebook.com/docs/reference/api/event/ // scan over all events that have someone going to them or maybe going // and make sure facebook has that status. // therefore maybe add LF_EGMAYBE and LF_FBMAYBE as separate flags // so we know if the bit can from facebook or not. once we update facebook // with the event then we can set LF_FBMAYBE or LF_FBGOING for that user // assuming they have a facebook id. also, if we initially use their // eventguruid and they tag an event then later they log in and we get // their facebookid, we have to make sure to update the event on facebook // to reflect they are unsure/goingtoit. // also, if an event has LF_ACCEPTED we should upload it to facebook under // our appid. // so, let's be perpetually scanning likedb to do this... // maybe just host #0 should do it to avoid slamming facebook? /* ////////////////////////// // // // ADD EVENT TO FACEBOOK // // ////////////////////////// bool Msgfb::addEventToFacebook ( char *title , char *desc , int32_t start_time , int32_t end_time , void *state , void (* callback)(void *state) , int32_t niceness ) { // how do we get the accesstoken for the app? must have to pass // in our secret somehow. if ( ! getAppAccessToken ( ) ) return false; return gotAppAccessToken(); } static void addedFBEventWrapper ( void *state , TcpSocket *s ) { Msgfb *mfb = (Msgfb *)state; if ( ! mfb->addedFBEvent ( s ) ) return; mfb->m_callback ( mfb->m_state ); } bool Msgfb::gotAppAccessToken ( ) { SafeBuf args; args.safePrintf("name="); args.urlEncode( title ); args.safePrintf("&description="); args.urlEncode( description ); args.safePrintf("&start_time=%"UINT32"" "&end_time=%"UINT32"" "&latitude=%.07f" "&longitude=%.07f" , start_time , end_time , latitude , longitude ); SafeBuf purl; purl.safePrintf("https://graph.facebook.com/%s/events?" "access_token=%s&" "%s" , APPID , m_appAccessToken , args.getBufStart() ); // reset g_errno = 0; // . get the results // . TODO: make sure post puts the args in the post section if ( ! g_httpServer.getDoc ( purl.getBufStart() , 0 , // urlIp 0 , // offset -1 , 0 , // ifModifiedSince , this , // state addedFBEventWrapper , // callback 40*1000 , // 40 sec timeout 0 , // proxyip 0 , // proxyport 30000000 , // maxTextDocLen , 30000000 , // maxOtherDocLen , g_conf.m_spiderUserAgent , "HTTP/1.0", true ) ) // doPost? // return false if blocked return false; // otherwise, somehow got it without blocking... wtf? if ( ! g_errno ) { char *xx=NULL;*xx=0; } log("facebook: error adding event: %s",mstrerror(g_errno)); return true; } bool Msgfb::addedFBEvent ( TcpSocket *s ) { // get the event id from reply char *reply = s->m_readBuf; int32_t replySize = s->m_readOffset; char *s = strstr ( reply , "id" ); if ( ! s ) { log("facebook: add event reply had no eid"); g_errno = EBADREPLY; return true; } // get it otherwise char *p = s; for ( ; *p && !is_digit(*p) ; p++ ); if ( ! is_digit ( *p ) ) { log("facebook: add event reply had no eid 2"); g_errno = EBADREPLY; return true; } int64_t eid = strtoull(p,NULL,10); // add it to likedb as being in facebook now! char flags = LF_ADDEDTOFACEBOOK; if ( ! m_msgfc.addLikedbTag ( 0 , // eventGuruId, APPID , // facebookId, m_docIdToAdd, m_eventIdToAdd, m_eventHash64ToAdd, m_start_timeToAdd flags , // LF_* #define's above false, // negative - turn off that flag? m_coll , this , addedLikedbTag2 ) ) return false; } /////////////////////////// // // // GET APP ACCESS TOKEN // // /////////////////////////// static char s_appAccessToken[256]; static bool s_appAccessTokenValid = false; static bool s_inProgress = false; static void gotAppAccessTokenWrapper ( void *state , TcpSocket *s ) { Msgfb *mfb = (Msgfb *)state; if ( ! mfb->gotAppAccessToken ( s ) ) return; mfb->m_callback ( mfb->m_state ); } bool Msgfb::getAppAccessToken ( ) { if ( s_appAccessTokenValid ) { strcpy ( m_appAccessToken , s_appAccessToken ); return true; } // must not be in progress if ( s_inProgress ) { char *xx=NULL;*xx=0; } s_inProgress = true; // use code to get access token // that calls https://graph.facebook.com/oauth/access_token? // client_id=YOUR_APP_ID&redirect_uri=YOUR_URL& // client_secret=YOUR_APP_SECRET char fburl[1024]; sprintf(fburl, "https://graph.facebook.com/oauth/access_token?" "client_id=%s&" "client_secret=%s&" "grant_type=client_credentials" , APPID , APPSECRET ); // reset g_errno = 0; if ( ! g_httpServer.getDoc ( fburl , 0 , // urlIp 0 , // offset -1 , 0 , // ifModifiedSince , this , // state gotAppAccessTokenWrapper , // callback 10*1000 , // 10 sec timeout 0 , // proxyip 0 , // proxyport 10000 , // maxTextDocLen , 10000 , // maxOtherDocLen , g_conf.m_spiderUserAgent ) ) // return false if blocked return false; // error? if ( ! g_errno ) { char *xx=NULL;*xx=0; } // all done s_inProgress = false; // let caller know we did not block return gotAppAccessToken ( NULL ); } bool Msgfb::gotAppAccessToken ( TcpSocket *s ) { // all done s_inProgress = false; // some kind of error? if ( g_errno ) { log("facebook: error launching read of app access token: %s", mstrerror(g_errno)); return; } // the access token should be in the reply char *reply = s->m_readBuf; int32_t replySize = s->m_readOffset; // mime error? HttpMime mime; // exclude the \0 i guess. use NULL for url. mime.set ( reply, replySize - 1, NULL ); // not good? int32_t httpStatus = mime.getHttpStatus(); if ( httpStatus != 200 ) { log("facebook: bad app access request http status = %"INT32"", httpStatus ); g_errno = EBADREPLY; return; } // point to content char *content = reply + mime.getMimeLen(); // assume no accesstoken provided s_appAccessToken[0] = '\0'; // look for access token //sscanf(content,"access_token=%s&expires=%"INT32"",m_accessToken,&expires); char *at = strstr(content,"access_token="); if ( at ) { char *p = at + 13; char *start = p; for ( ; *p && *p != '&' ;p++ ); int32_t len = p - start; if ( len > MAX_TOKEN_LEN ) { char *xx=NULL;*xx=0; } gbmemcpy ( s_appAccessToken , start , len ); s_appAccessToken [ len ] = '\0'; } // error? if ( ! s_appAccessToken[0] ) { log("facebook: could not find app access token"); g_errno = EBADREPLY; return; } // sanity if ( gbstrlen(m_accessToken) > MAX_TOKEN_LEN ) { char *xx=NULL;*xx=0;} // set this timestamp //m_accessTokenCreated = getTimeGlobal(); s_appAccessTokenValid = true; strcpy ( m_appAccessToken , s_appAccessToken ); return true; } // we should also have a spiderloop to continually search for and find // events on facebook and add them to our db */ /////////////////////// // // // THE EMAILER // // . g_emailer is in Process.cpp and so is its 60 second sleep callback // /////////////////////// // . some sleepwrapper should call this once every 10 seconds or so bool Emailer::emailEntryLoop ( ) { // temporarily disable return true; // skip if in progress already if ( m_emailInProgress ) return true; // wait for clock to be in sync if ( ! isClockInSync() ) return true; int32_t now = getTimeGlobal(); if ( m_lastEmailLoop && now - m_lastEmailLoop < 60 ) return true; // just use first event collection CollectionRec *cr = NULL; for ( int32_t i = 0 ; i < g_collectiondb.m_numRecs ; i++ ) { cr = g_collectiondb.m_recs[i]; if ( ! cr ) continue; if ( ! cr->m_indexEventsOnly ) { cr = NULL; continue; } break; } if ( ! cr ) return true; // lock it out m_emailInProgress = true; // we are doing a scan loop, not sending a single email //m_sendSingleEmail = false; // save this m_coll = cr->m_coll; m_collnum = g_collectiondb.getCollnum ( m_coll ); // . make sure m_emailTree is full // . this returns false if blocked, true otherwise // . it should call emailScan() when done if it blocks if ( ! populateEmailTree ( ) ) return false; // do the scan loop return emailScan( NULL ); } // . ok, now m_emailTree should be fully populated // . returns false if blocked, true otherwise bool Emailer::emailScan ( EmailState *es ) { if ( es && es->m_sendSingleEmail ) { es->m_singleCallback ( es->m_singleState ); return true; } log("emailer: scanning fbids"); loop: int32_t now = getTimeGlobal(); // scan for the fbids in the email tree int32_t n = m_emailTree.getFirstNode(); // get the key if n is good key96_t *kp = NULL; if ( n >= 0 ) kp = (key96_t *)m_emailTree.getKey(n); // check time. stop scanning if in the future! if ( kp && kp->n1 > (uint32_t)now ) n = -1; // none remain? if ( n < 0 ) { // return false if waiting for email replies if ( m_emailRequests > m_emailReplies ) return false; // clear this so we can run again later m_emailInProgress = false; // i guess update this to the completion time int32_t now = getTimeGlobal(); m_lastEmailLoop = now; return true; } // save id int64_t fbId = kp->n0; // nuke that node m_emailTree.deleteNode ( n , true ); // . ok launch an email. pass in the facebook id // . returns false if blocked, true otherwise launchEmail ( fbId ); // . if we hit the outstanding limit, "block" // . when an email reply comes it it should re-call // emailScan()... and it should remove itself from // m_emailTree so we do not re-do it! if ( m_emailRequests - m_emailReplies >= MAX_OUTSTANDING_EMAILS ) return false; // do another one! goto loop; } //static void generateEventsEmailWrapper ( void *state ) { // Emailer *em = (Emailer *)state; // if ( ! em->generateEventsEmail( ) ) return; //} // from PageEvents.cpp: extern bool sendPageEvents2 ( TcpSocket *s , HttpRequest *hr , SafeBuf *resultsBuf, SafeBuf *emailLikedbListBuf, void *state , void (* emailCallback)(void *state) , SafeBuf *providedBuf , void *providedState , void (* providedCallback)(void *state) ); static void gotPageToEmailWrapper ( void *state ) { EmailState *es = (EmailState *)state; Emailer *em = es->m_emailer; if ( ! em->getMailServerIP( es ) ) return; // scan next em->emailScan( es ); } bool Emailer::launchEmail ( int64_t fbId ) { // we need an email state now! EmailState *es = NULL; for ( int32_t i = 0 ; i < MAX_OUTSTANDING_EMAILS ; i++ ) { if ( m_emailStates[i].m_inUse ) continue; es = &m_emailStates[i]; break; } // how can this happen? if ( ! es ) { char *xx=NULL;*xx=0; } // make a fake http request //SafeBuf hrsb; //return true; // must remain on stack since the copied HttpRequest will point into // this or the SearchInput will point into this es->m_hrsb.safePrintf("GET /?" "c=%s&" "showpersonal=1&" //"where=anywhere&" // this should override the fbid in the cookie "usefbid=%"INT64"&" "fh=%"UINT32"&" "usecookie=0&" "map=0&" "n=25&" "emailformat=1" " HTTP/1.0\r\n\r\n" , m_coll , fbId , hash32((char *)&fbId,8) ); HttpRequest hr; hr.set (es->m_hrsb.getBufStart(), es->m_hrsb.length() , (TcpSocket *)NULL ); TcpSocket *s = NULL; // two counts m_emailRequests++; // claim it es->m_inUse = true; // point to emailer es->m_emailer = this; // who are we sending to? es->m_fbId = fbId; // container class es->m_emailer = this; // reset this es->m_errno = 0; // our collection es->m_coll = m_coll; // we are doing a loop! so return to emailScan() function es->m_sendSingleEmail = false; // clear these es->m_emailResultsBuf .purge(); es->m_emailLikedbListBuf.purge(); // . use that to generate the search results // . returns false if blocked, true otherwise if ( ! sendPageEvents2 ( s , &hr , // this is copied right away! // our special parms: &es->m_emailResultsBuf, &es->m_emailLikedbListBuf, es , gotPageToEmailWrapper , NULL , NULL , NULL ) ) return false; // we got it return getMailServerIP ( es ); } static void gotMXIpWrapper ( void *state , int32_t ip ) { EmailState *es = (EmailState *)state; Emailer *em = es->m_emailer; if ( ! em->gotMXIp ( es ) ) return; // scan next em->emailScan( es ); } // . PageEvents.cpp should have stored the html content into m_emailResultsBuf // . so email that to the recipient bool Emailer::getMailServerIP ( EmailState *es ) { // // if no results, skip it the next couple functions // if ( es->m_emailResultsBuf.length() == 0 ) { log("emailer: no results for user %"UINT64"",es->m_fbId); return gotEmailReply ( es , NULL ); } // get email address from the msg char *rb = es->m_emailResultsBuf.getBufStart(); char *emailTo = NULL; if ( rb ) emailTo = strstr(rb,"RCPT To:<"); // bail on error if ( ! emailTo ) { log("emailer: no email address for %"UINT64"",es->m_fbId); es->m_emailResultsBuf.purge(); es->m_emailLikedbListBuf.purge(); es->m_inUse = false; m_emailReplies++; return true; } // get domain from that char *p = emailTo; char *pend = p + 256; for ( ; *p && *p != '@' && p < pend ; p++ ) ; if ( p >= pend || ! *p ) { log("emailer: no at sign in email address " "for %"UINT64"",es->m_fbId); es->m_emailResultsBuf.purge(); es->m_emailLikedbListBuf.purge(); es->m_inUse = false; m_emailReplies++; return true; } // skip over '@' sign p++; // set domain char *dom = p; // scan domain length for ( ; *p && *p != '>' && p < pend ; p++ ) ; int32_t domLen = p - dom; if ( p >= pend || ! *p || domLen > 80 ) { log("emailer: no valid subdomain in email address " "for %"UINT64"",es->m_fbId); es->m_emailResultsBuf.purge(); es->m_emailLikedbListBuf.purge(); es->m_inUse = false; m_emailReplies++; return true; } // get the ip. use kinda a fake hostname to pass into MsgC // so that it understands its a special MX record lookup char *dst = es->m_emailSubdomain; gbmemcpy ( dst , "gbmxrec-" , 8 ); dst += 8; gbmemcpy ( dst , dom , domLen ); dst += domLen; *dst = '\0'; // . now get the ip for that. get the MX record IP!!! // . it will recognize the gbmxrec- prepension and ask for the // MX record if ( ! es->m_msgc.getIp ( es->m_emailSubdomain , dst - es->m_emailSubdomain , &es->m_ip , es , gotMXIpWrapper ) ) return false; return gotMXIp ( es ); } static void gotEmailReplyWrapper ( void *state , TcpSocket *s ) { EmailState *es = (EmailState *)state; Emailer *em = es->m_emailer; if ( ! em->gotEmailReply ( es , s ) ) return; // scan next em->emailScan( es ); } bool Emailer::gotMXIp ( EmailState *es ) { log("facebook: got mx ip of %s for %s", iptoa(es->m_ip), es->m_emailSubdomain ); // our problem? like ENOME? if ( g_errno ) { log("emailer: had server side error getting ip: %s", mstrerror(g_errno)); es->m_errno = g_errno; return gotEmailReply ( es , NULL ); } // int16_tcut int32_t ip = es->m_ip;//msgc.getIp(); // problem? if ( ip == 0 || ip == -1 ) { log("emailer: bad ip of %"INT32" for %s for %"UINT64"", ip, es->m_emailSubdomain, es->m_fbId); es->m_errno = EBADIP; g_errno = EBADIP; return gotEmailReply ( es , NULL ); } // send the message TcpServer *ts = g_httpServer.getTcp(); // log it log ( LOG_WARN, "emailer: Sending email to %"UINT64" size=%"INT32"", es->m_fbId , es->m_emailResultsBuf.length()); /* // // THIS ONE WORKS so work backwards from here if you have issues // SafeBuf *eb = &es->m_emailResultsBuf; eb->reset(); eb->safePrintf( "EHLO gigablast.com\r\n" "Mail From:\r\n" "RCPT To:\r\n" "DATA\r\n" "From: mwells \r\n" "MIME-Version: 1.0\r\n" "To: app+a4gdq01pp8.2qufhgd443.c6277eee23cba81f44f0decbcb1a4d03@proxymail.facebook.com\r\n" "Subject: testing\r\n" "Content-Type: text/html; charset=UTF-8; format=flowed\r\n" "Content-Transfer-Encoding: 8bit\r\n" "\r\n" "\r\n" "
\r\n" ".\r\n" "QUIT\r\n" ); */ // // debug by dumping to file!!! // char filename[512]; int32_t now = getTimeLocal(); sprintf ( filename,"html/email/email.%"UINT64".%"UINT32"" , es->m_fbId , now ); es->m_emailResultsBuf.save(g_hostdb.m_dir,filename); log("facebook: saving email %s", filename); SafeBuf embuf; embuf.load(g_hostdb.m_dir,"html/email/email.html"); embuf.safePrintf("email.%"UINT64".%"UINT32"
" , es->m_fbId , now , es->m_fbId , now ); embuf.save(g_hostdb.m_dir,"html/email/email.html"); log("facebook: emailing %"INT32" bytes", es->m_emailResultsBuf.length() ); // // skip actual email for now! // //gotEmailReply( es , NULL ); //if ( ! es->m_sendSingleEmail ) return true; if ( ! ts->sendMsg ( ip, 25, // smtp (send mail transfer protocol) port es->m_emailResultsBuf.getBufStart(), es->m_emailResultsBuf.length(), es->m_emailResultsBuf.length(), es->m_emailResultsBuf.length(), es, gotEmailReplyWrapper, 60*1000, 1000*1024, 1000*1024 ) ) return false; // we did not block, so update facebook rec with timestamps gotEmailReply( es , NULL ); // we did not block return true; } static void gotRecWrapper3 ( void *state ) { EmailState *es = (EmailState *)state; Emailer *em = es->m_emailer; if ( ! em->gotRec3 ( es ) ) return; // scan next em->emailScan( es ); } bool Emailer::gotEmailReply ( EmailState *es , TcpSocket *s ) { // don't free it that's our job! if ( s ) s->m_sendBuf = NULL; // free allocated memory es->m_emailResultsBuf.purge(); if ( g_errno ) { log("emailer: got error sending to fbid=%"INT64": %s",es->m_fbId, mstrerror(g_errno)); es->m_errno = g_errno; // reset these errors just in case g_errno = 0; } // . show the reply // . seems to crash if we log the read buffer... no \0? if ( s && s->m_readBuf ) log("emailer: got email server reply: %s", s->m_readBuf ); else log("emailer: missing email server reply!"); log("emailer: getting fbrec for fbid=%"INT64"",es->m_fbId); // load the facebookdb rec so we can update it and save it then key96_t startKey; key96_t endKey; startKey.n1 = 0; startKey.n0 = es->m_fbId; endKey.n1 = 0; endKey.n0 = es->m_fbId; startKey.n0 <<= 1; endKey.n0 <<= 1; endKey.n0 |= 0x01; if ( ! m_msg0.getList ( -1, // hostid 0 , // ip 0 , // port 0 , // maxcacheage false, // addtocache RDB_FACEBOOKDB, "",//m_coll, &es->m_list9, (char *)&startKey, (char *)&endKey, 11, // minrecsizes es, // this , gotRecWrapper3, MAX_NICENESS ) ) return false; // i guess we got it without blocking return gotRec3 ( es ); } static void savedUpdatedRecWrapper ( void *state ) { EmailState *es = (EmailState *)state; Emailer *em = es->m_emailer; if ( ! em->savedUpdatedRec ( es ) ) return; // scan next em->emailScan( es ); } bool Emailer::gotRec3 ( EmailState *es ) { // error loading? if ( g_errno ) { log("emailer: error loading facebookdb rec for %"UINT64"", es->m_fbId ); es->m_errno = g_errno; } // empty is bad if ( es->m_list9.getListSize() <= 0 ) { log("emailer: facebookdb rec is empty. wtf? fbid=%"INT64"", es->m_fbId); es->m_errno = EBADREPLY; } // get the facebook rec... why? FBRec *rec = (FBRec *)es->m_list9.getList(); // assume no error if ( ! es->m_errno ) rec->m_nextRetry = 0; int32_t now = getTimeGlobal(); // on error... if ( es->m_errno ) { // did we have a previous attempt? int32_t elapsed = now - rec->m_lastEmailAttempt ; // our first time? set to 6 hours retry then. if ( rec->m_nextRetry == 0 ) elapsed = 0; // ok, add 3 hours and double that int32_t wait = 2 * (elapsed + 3*3600); // store that then rec->m_nextRetry = now + wait; } // update the last send time rec->m_lastEmailAttempt = now; // . add the facebookdb rec back now with updated times // . just use TagRec::m_msg1 now // . no, can't use that because tags are added using SafeBuf::addTag() // which first pushes the rdbid, so we gotta use msg4 // . if a host is down we have to fix msg1 (and msg4) so they both // just write to a file until that host comes back up. if ( ! es->m_msg1.addList ( &es->m_list9 , RDB_FACEBOOKDB , "none",//m_coll , es , savedUpdatedRecWrapper, false , 0 ) ) // niceness return false; // this does not block if only one host and in memory return savedUpdatedRec ( es ); } static void doneAddingEmailedLikesWrapper ( void *state ) { EmailState *es = (EmailState *)state; Emailer *em = es->m_emailer; if ( ! em->doneAddingEmailedLikes ( es ) ) return; // scan next em->emailScan(es); } bool Emailer::savedUpdatedRec ( EmailState *es ) { // now add to likedb if no error so we do not re-email these // same events. start_time is non-zero so it is just the single // instances of each event in the case of recurring events. SafeBuf *eb = &es->m_emailLikedbListBuf; // sort the records in tmp now char *buf = eb->getBufStart(); int32_t bufSize = eb->length(); // how many? int32_t count = bufSize / (int32_t)LIKEDB_RECSIZE; // sort for rdblist gbqsort ( buf , count , (int32_t)LIKEDB_RECSIZE, likedbCmp ); // use the list we got key192_t startKey; key192_t endKey; startKey.setMin(); endKey.setMax(); // that is our list es->m_list5.set ( buf , bufSize , buf, // alloc eb->getCapacity() , // allocSize (char *)&startKey , (char *)&endKey , LIKEDB_DATASIZE , // fixed datasize true , // own data? yeah, free it when done false , // use half keys? no. sizeof(key192_t) ); // steal it from safebuf so it doesn't free it eb->detachBuf(); // note it log("facebook: adding events to likedb to prevent re-emailing. " "listsize=%"INT32"",es->m_list5.getListSize()); // add that if ( ! es->m_msg1.addList ( &es->m_list5 , RDB_LIKEDB , es->m_coll , es , // this , doneAddingEmailedLikesWrapper, false , 0 ) ) // niceness return false; // it did not block return doneAddingEmailedLikes ( es ); } bool Emailer::doneAddingEmailedLikes ( EmailState *es ) { m_emailReplies++; es->m_emailLikedbListBuf.purge(); es->m_inUse = false; return true; } ///////////// // // code to send an individual email // //////////// // need to set EmailState::m_fbId, m_emailResultsBuf bool Emailer::sendSingleEmail ( EmailState *es , int64_t fbId ) { es->m_sendSingleEmail = true; // claim it es->m_inUse = true; // point to emailer es->m_emailer = this; // who are we sending to? es->m_fbId = fbId; // container class es->m_emailer = this; // reset this es->m_errno = 0; // send it off if ( ! getMailServerIP( es ) ) return false; return true; } ///////////// // // code to populate the m_emailTree // ///////////// // returns false if blocked, true otherwise bool Emailer::populateEmailTree ( ) { // not if already in progress if ( m_populateInProgress ) return true; // stop if emailing now, it needs the tree //if ( m_emailInProgress ) return true; // re-scan only once per hour int32_t now = getTimeGlobal(); if ( m_lastScan && now - m_lastScan < 3600 ) return true; // update that m_lastScan = now; // lock it up just in case... m_populateInProgress = true; // init the tree the first tim eonly if ( ! m_init ) { // . what's max # of tree nodes? // . assume avg facebookdb rec size of about 1000 bytes // . NOTE: 32 bytes overhead? int32_t maxMem = 10000000; int32_t maxTreeNodes = maxMem / 32; if ( ! m_emailTree.set ( 0 , maxTreeNodes , true , // balance? maxMem, true , // owndata? "emailtree", // allocname false , // datainptrs NULL , // dbname sizeof(key96_t) )) { // keysize log("email: failed to init email tree"); return true; } // only do once m_init = true; } // clear out all nodes m_emailTree.clear(); // reset start key for scan m_startKey.setMin(); // returns false if blocked, true otherwise return scanLoop(); } static void gotScanListWrapper ( void *state, RdbList *list , Msg5 *msg5 ) { // use this Emailer *em = (Emailer *)state; // this never blocks em->gotScanList (); // and resume the loop. return if it blocked. if ( ! em->scanLoop () ) return; // it did not block, it must be done... // we were spawned from emailEntryLoop(), so go back there em->emailScan( NULL ); } // . scan facebookdb and get every facebookid, and couple it with the // time we gotta send the email // . sort by that in emailTree // . re-scan facebookdb every few hours in case of new entries or if // someone updates their email // . i would also call addToEmailTree if a new facebookdb rec comes in. // perhaps do that from Rdb.cpp? // . returns false if blocked true otherwise bool Emailer::scanLoop ( ) { key96_t endKey ; endKey.setMax(); // get a meg at a time int32_t minRecSizes = 1024*1024; key96_t oldk; oldk.setMin(); loop: // use msg5 to get the list, should ALWAYS block since no threads if ( ! m_msg5.getList ( RDB_FACEBOOKDB , m_coll , &m_list7 , &m_startKey , &endKey , minRecSizes , true , // includeTree false , // add to cache? 0 , // max cache age 0 , // startFileNum , -1 , // numFiles , this , // state gotScanListWrapper , // callback MAX_NICENESS , // niceness false )) // err correction? // return false if we blocked return false; // stuff the m_emailTree with some data based on m_list gotScanList( ); // if something, get more if ( ! m_list7.isEmpty() ) goto loop; // stop m_populateInProgress = false; // i guess we did not block? return true; } void Emailer::gotScanList ( ) { int32_t now = getTimeGlobal(); int32_t dayStart = now - ( now % 86400 ); if ( m_list7.isEmpty() ) return; // loop over entries in list for ( m_list7.resetListPtr() ; ! m_list7.isExhausted() ; m_list7.skipCurrentRecord() ) { // get it char *drec = m_list7.getCurrentRec(); // sanity check. delete key? if ( (drec[0] & 0x01) == 0x00 ) continue; FBRec *fr = (FBRec *)drec; char ef = fr->m_emailFrequency; // 0 means none provided, so let's default it to weekly //if ( ef == 0 ) continue; if ( ef == 0 ) ef = 2; // 3 means never // 1 is daily, 2 is weekly if ( ef == 3 ) continue; // strange? if ( ef != 1 && ef != 2 ) { log("email: strange freq = %"INT32"",(int32_t)ef); continue; } // int16_tcut uint64_t fbId = fr->m_fbId; // is assigned to us for emailing? Host *group = g_hostdb.getMyGroup(); int32_t hpg = g_hostdb.getNumHostsPerShard(); int32_t i = fbId % hpg; Host *h = &group[i]; // skip if not assigned to us if ( h->m_hostId != g_hostdb.m_hostId ) continue; // add him to our list. sorted by next email time and // fbid. so its a key96_t key96_t k; k.n0 = fr->m_fbId; k.n1 = fr->m_nextRetry; // at what time of day to email ( in minutes)? UTC int32_t tte = dayStart + fr->m_timeToEmail * 60; // when was the last attempt to email? int32_t success = fr->m_lastEmailAttempt; // reset this for debug //success = 0; // . "success" is non-zero if we had at least one successful // emailing to this person // . for daily frequency we must wait at least a day after // the last successful email // . we have minus 4 hours in case the email got off to // a late start if ( ef == 1 && success && now - success < 20*3600 ) tte += 24*3600; // same goes for weekly emails if ( ef == 2 && success && now - success < 7*24*3600-4*3600 ) tte += 7*24*3600; // assume that's the unix timestamp then (UTC) k.n1 = tte; // if non-zero, this overrides. this is non-zero if we // had our last email fail if ( fr->m_nextRetry ) k.n1 = fr->m_nextRetry; // HACK TIME! //k.n1 = 0; // add to the tree now if ( m_emailTree.addNode(m_collnum,(char *)&k,NULL,0) >= 0 ) continue; // error! log("email: email tree add error: %s",mstrerror(g_errno)); } m_startKey = *(key96_t *)m_list7.getLastKey(); m_startKey += (uint32_t) 1; // watch out for wrap around //if ( startKey < *(key96_t *)list.getLastKey() ) return; } /////////////////////// // // FACEBOOK SPIDER // /////////////////////// // https://graph.facebook.com/search?fields=id,privacy,picture,name,location,venue,description,"start_time,end_time&type=event&q=china&limit=10&offset=0 // . call this every second // . https://developers.facebook.com/docs/reference/api/#searching void facebookSpiderSleepWrapper ( int fd , void *state ) { // only for host #0 //if ( g_hostdb.m_hostId != 0 ) return; // all spiders off? if ( ! g_conf.m_spideringEnabled ) return; // if nothing on queue, push a 0 fbid on there to initiate // the query spider algo on facebook //if ( g_n1 >= 2 ) return; // for now //if ( g_n1 >= 1 ) return; // flag bool gotIt = false; collnum_t collnum; // get event collection for ( int32_t i = 0 ; i < g_collectiondb.m_numRecs ; i++ ) { // get it CollectionRec *cr = g_collectiondb.m_recs[i]; // skip if empty if ( ! cr ) continue; // or not events if ( ! cr->m_indexEventsOnly ) continue; // ok, use that collnum = cr->m_collnum; // flag it gotIt = true; } // return if no such collection if ( ! gotIt ) return; // do we have an stubhubs already queued? bool hasLocalFBId = false; bool hasEventBrite = false; bool hasStubHub = false; bool hasFacebook = false; for ( int32_t i = 0 ; i < g_n1 ; i++ ) { if ( g_fbq1[i] == -3 ) hasLocalFBId = true; if ( g_fbq1[i] == -2 ) hasEventBrite = true; if ( g_fbq1[i] == -1 ) hasStubHub = true; if ( g_fbq1[i] >= 0 ) hasFacebook = true; } // need this int32_t now = getTimeGlobal(); // ok, it's empty! 0 fbid has special meaning. it means to // do a spider round on facebook if ( ! hasFacebook && g_conf.m_facebookSpideringEnabled && // only for host #0 g_hostdb.m_hostId == 0 ) queueFBId ( 0 , collnum ); // . s_ptr3 is now used for stub hub and is a time_t! // . reset it if it's over a year out if ( ! hasStubHub && g_conf.m_stubHubSpideringEnabled && // only for host #0 g_hostdb.m_hostId == 0 ) { // if ptr is over a year into the future then reset to // 0 and wait for 12 hours!!! stubhub.com spiders through // in like an hour! if ( s_ptr3 - now > 365*86400 ) { // give it a delay too! s_holdOffStubHubTill = now + 12*3600; // log it log("stubhub: stubhub spider completed. " "waiting for 12 hours before " "hitting stubhub again." ); // and reset our timer thing s_ptr3 = 0; } // are we done waiting? if ( now > s_holdOffStubHubTill ) // queue the stubhub queueFBId ( -1 , collnum ); } // . s_ptr3 is on the day we need to download events from eventbrite // that were create on that day // . if its 0 then play catch up until we hit today, then just // hit it like once per hour for events created today if ( ! hasEventBrite && g_conf.m_eventBriteSpideringEnabled && // only for host #0 g_hostdb.m_hostId == 0 && // if we got no results we delay like an hour until we // try again, in hopes someone added some new events to // eventbrite's index now > s_eventBriteWaitUntil ) // queue the eventbrite queueFBId ( -2 , collnum ); // this is for all hosts in stripe #0, not just host #0 if ( ! hasLocalFBId && // skip if not in bottom part g_hostdb.m_myHost->m_stripe == 0 && // we do this once per day now > s_localWaitUntil ) // queue the local facebookdb scan queueFBId ( -3 , collnum ); } #include "Speller.h" // use unified dictionary char *getNextQuery ( ) { if ( g_hostdb.m_hostId != 0 ) { log("qloop: wtf! not host #0"); return NULL; } if ( ! s_init ) { // load query loop state //loadQueryLoopState(); // try to load from disk //if ( loadSortByPopTable() ) s_init = true; bool s1 = false; bool s2 = false; s1 = s_tbuf1.load ( g_hostdb.m_dir,"/popsortwords.dat" ); s2 = s_tbuf2.load ( g_hostdb.m_dir,"/popsortplaces.dat" ); // if both loaded we are done if ( s1 && s2 ) s_init = true; // clear if one loaded but the other did not else { s_tbuf1.reset(); s_tbuf2.reset(); } } // if load was unsuccessful, then create if ( ! s_init ) { // ok, create it and save it HashTableX *ud = &g_speller.m_unifiedDict; // init trees RdbTree tree1; RdbTree tree2; tree1.set ( 4, // fixeddatasize 3000000, // maxnodes true, // do balancing -1 , // maxmem false, // owndata? "tree1", // allocname false, // datainptrs? NULL, // dbname 12 ); // keysize tree2.set ( 4, // fixeddatasize 5000000, // maxnodes true, // do balancing -1 , // maxmem false, // owndata? "tree2", // allocname false , // datainptrs? NULL, // dbname 12 ); // keysize // for keeping keys unique int32_t count = 0; // scan the unified dictionary int32_t n1 = ud->m_numSlots; for ( int32_t i = 0 ; i < n1 ; i++ ) { // skip if empty if ( ! ud->m_flags[i] ) continue; // . get the ptr into m_unifiedBuf // . word/phrase\tlangid\tpop\tlangid\tpop.... char *p = *(char **)ud->getValueFromSlot(i); // point to \0 ending the word or the phonetic char *w = p - 1; // back up again until we are at the beginning of // that word or phonetic for ( ; w[-1] ; w-- ); // is it a phonetic? if ( is_upper_a(w[0]) || w[0]=='*' ) { // point to the \0 before it w--; // and back up to the start of the word/phrase // that the phonetic represents for ( ;w > g_speller.m_unifiedBuf &&w[-1];w--); } // scan word or phrase, we only want words not // phrases for this... phrases are too spammy! bool hadSpace = false; for ( char *x = w; *x ; x++ ) { if ( ! is_wspace_a ( *x ) ) continue; hadSpace = true; break; } // skip if its a phrase if ( hadSpace ) continue; // get the max pop from all the language/pop tuples int32_t maxPop = -2; int32_t pop; subloop: // skip over langid for ( ; *p && *p !='\t';p++ ); // crazy? a pop should follow it! if ( ! *p ) goto done; // skip that p++; // get the pop pop = atol(p); // if negative make 0 if ( pop < 0 ) pop = 0; // get max pop if ( pop > maxPop ) maxPop = pop; // skip over next tab, should be langid or \0 for ( ; *p && *p !='\t';p++ ); // if no more, get next word line if ( ! *p ) goto done; // skip that if ( *p ) p++; // get more if they are there goto subloop; // store it in tree done: // how is this possible? if ( maxPop < 0 ) continue; // make the key key_t k; k.n1 = ~((uint32_t)maxPop); k.n0 = count++; // store offset int32_t woff = w - g_speller.m_unifiedBuf; // . add to b-tree to sort by pop // . data is the word/phrase ptr tree1.addNode(0,k,(char *)woff,4); } // . now add the cities in there too! // . scan the cities // . g_nameTable is from Address.cpp int32_t n2 = g_nameTable.m_numSlots; for ( int32_t i = 0 ; i < n2 ; i++ ) { // skip if empty if ( ! g_nameTable.m_flags[i] ) continue; // get it int32_t offset = *(int32_t *)g_nameTable.getValueFromSlot(i); // get the ptr into g_pbuf for it PlaceDesc *pd = (PlaceDesc *)(g_pbuf+offset); // get the pop uint32_t pop = pd->m_population; // make the key key_t k; k.n1 = ~pop; // "China" has many spellings and each one has an // entry in the g_nameTable BUT they hash to the // same PlaceDesc ptr, so use that as part of they // key for making sure we have no dups in tree2. k.n0 = (uint64_t)pd; //note it //log("adding pop=%"UINT32"",pop); // add to b-tree to sort by pop tree2.addNode(0,k,(char *)offset,4); } // serialize tree1 for (int32_t n=tree1.getLowestNode();n>=0;n=tree1.getNextNode(n)){ // get data. this one is a slot # in m_unifiedDict int32_t woff = (int32_t)tree1.getData(n); // store it s_tbuf1.pushLong(woff); } int32_t reps = 0; // serialize tree2 for (int32_t n=tree2.getLowestNode();n>=0;n=tree2.getNextNode(n)){ // get data. this one is an offset into g_pbuf int32_t i = (int32_t)tree2.getData(n); // sample PlaceDesc *pd = (PlaceDesc *)(g_pbuf+i); // print it if ( reps++ == 0 && pd->m_population < 1000000 ) { char *xx=NULL;*xx=0; } // store it s_tbuf2.pushLong(i); } // save both s_tbuf1.save ( g_hostdb.m_dir,"/popsortwords.dat" ); s_tbuf2.save ( g_hostdb.m_dir,"/popsortplaces.dat" ); // do not re-do s_init = true; // init the ptrs then //s_ptr1 = 0; //s_ptr2 = 0; // save state saveQueryLoopState(); } if ( s_flip == 0 ) s_flip = 1; else s_flip = 0; if ( s_flip == 0 && ! g_speller.m_unifiedBuf ) { log("facebook: unifiedDict not loaded! skipping pop words " "facebook spidering."); s_flip = 1; } // get the next word or location if ( s_flip == 0 ) { int32_t woff = ((int32_t *)(s_tbuf1.getBufStart())) [s_ptr1]; s_ptr1++; if ( s_ptr1 * 4 > s_tbuf1.length() ) s_ptr1 = 0; // just to keep things somewhat fresh, let's cycle // once we hit 15,000 words which is about 5 days // at the current rate. i am planning on increasing // the fb spider rate though a little if i can since it // seems to not be rate-limited so far if ( s_ptr1 > 15000 ) s_ptr1 = 0; char *v = g_speller.m_unifiedBuf + woff; return v; } if ( s_flip == 1 ) { int32_t poff = s_ptr2 * 4; s_ptr2++; if ( s_ptr2 * 4 > s_tbuf2.length() ) s_ptr2 = 0; // just to keep things somewhat fresh, let's cycle // once we hit 15,000 words which is about 5 days // at the current rate. i am planning on increasing // the fb spider rate though a little if i can since it // seems to not be rate-limited so far if ( s_ptr2 > 15000 ) s_ptr2 = 0; int32_t offset = *(int32_t *)(s_tbuf2.getBufStart() + poff); // get the ptr into g_pbuf for it PlaceDesc *pd = (PlaceDesc *)(g_pbuf+offset); if ( pd->m_flags & PDF_STATE ) return (char *)pd->getStateName(); if ( pd->m_flags & PDF_COUNTRY ) return (char *)pd->getCountryName(); // crap, official names has many dups because we have one // place desc for every slang name of a place. return pd->getOfficialName(); } // shouldn't be here! char *xx=NULL;*xx=0; return NULL; } // . save the state of getNextQuery() // . save the queue g_fbq1[100],g_colls1[100],g_n1 // . called from Process.cpp bool saveQueryLoopState ( ) { SafeBuf ss; ss.pushLong(s_flip); ss.pushLong(s_ptr1); ss.pushLong(s_ptr2); // the queue of fbids ss.pushLong(g_n1); ss.safeMemcpy((char *)g_fbq1,g_n1*4); ss.safeMemcpy((char *)g_colls1,g_n1*4); ss.pushLong(s_ptr3); ss.pushLong(s_holdOffStubHubTill); ss.pushLongLong(s_ptr4); ss.pushLong(s_eventBriteWaitUntil); // local scan ss.pushLongLong(s_ptr5); ss.pushLong (s_localWaitUntil); log("facebook: saving fbloop.dat. " "s_ptr1=%"INT32" " "s_ptr2=%"INT32" " "s_ptr3=%"INT32" " "s_ptr4=%"INT64" " "s_ptr5=%"INT64" " "s_holdOffStubHubTill=%"UINT32" " "s_eventBriteWaitUntil=%"UINT32" " "s_localWaitUntil=%"UINT32" " "g_n1=%"INT32"", s_ptr1, s_ptr2, s_ptr3, s_ptr4, s_ptr5, s_holdOffStubHubTill, s_eventBriteWaitUntil, s_localWaitUntil, g_n1); return ss.save(g_hostdb.m_dir,"fbloop.dat"); } bool loadQueryLoopState ( ) { SafeBuf ss; if ( ! ss.load(g_hostdb.m_dir,"fbloop.dat") ) return false; // assign char *p = ss.getBufStart(); char *pend = p + ss.length(); s_flip = *(int32_t *)p; p += 4; s_ptr1 = *(int32_t *)p; p += 4; s_ptr2 = *(int32_t *)p; p += 4; g_n1 = *(int32_t *)p; p += 4; gbmemcpy ( g_fbq1 , p , g_n1 * 4 ); p += g_n1 * 4; gbmemcpy ( g_colls1 , p , g_n1 * 4 ); p += g_n1 * 4; if ( p >= pend ) goto done; s_ptr3 = *(int32_t *)p; p += 4; if ( p >= pend ) goto done; s_holdOffStubHubTill = *(int32_t *)p; p += 4; if ( p >= pend ) goto done; s_ptr4 = *(int64_t *)p; p += 8; s_eventBriteWaitUntil = *(int32_t *)p; p += 4; // local scan if ( p >= pend ) goto done; s_ptr5 = *(int64_t *)p; p += 8; s_localWaitUntil = *(int32_t *)p; p += 4; done: log("facebook: loaded fbloop.dat. " "s_ptr1=%"INT32" " "s_ptr2=%"INT32" " "s_ptr3=%"INT32" " "s_ptr4=%"INT64" " "s_ptr5=%"INT64" " "s_holdOffStubHubTill=%"UINT32" " "s_eventBriteWaitUntil=%"UINT32" " "s_localWaitUntil=%"UINT32" " "g_n1=%"INT32"", s_ptr1, s_ptr2, s_ptr3, s_ptr4, s_ptr5, s_holdOffStubHubTill, s_eventBriteWaitUntil, s_localWaitUntil, g_n1); return true; }