#include "gb-include.h" #include "Ads.h" #include "HttpServer.h" #include "Xml.h" #include "Pages.h" #include "Titledb.h" // . lets get feeds from: // 1. overture // 2. findwhat // 3. looksmart // 4. about // 5. ah-ha (http://www.ah-ha.com/partners/partners_xmlv4_integration.asp) static void gotDocWrapper( void *state , TcpSocket *s ) ; char *removeCDATA ( char *buf, long *bufLen ) { if(*bufLen >= 12 && strncasecmp(buf, "./search // *q= // *n= // *s= // *raw= // . Fromatted Response: // // LONG LONG // // <![CDATA[STRING]]> // // // // // // // ... // // // . This gets ads from 3rd party ad provider: searchfeed.com // formerly miva // . Request Parameters (* = required): // URL: http://www.searchfeed.com/rd/feed/XMLFeed.jsp // *trackID= // *pID= // cat= // nl= // page= // *ip= // excID= (N/A currently) // . Formatted Response: // // // // // // // // // // // // // // // // // // ... // // // . This gets ads from 3rd party ad provider: 7search.com // . Request Parameters (* = required): // URL: http://meta.7search.com/feed/xml.aspx // *affiliate= // *qu= // pn= // r= // filter= // *ip_address= // *st= // *rid= // . Formatted Response: // // // // // // // // // // // ... // // // bool Ads::getAds ( char *q , long qlen , long pageNum , long queryIP , char *coll , void *state , void (*callback)(void *state) ){ // bail if no collection specified if(!coll) { log("query: Collection for ads is empty."); return true; } // bail if query too big if ( qlen > MAX_AD_QUERY_LEN) { log("query: Query length of %li is too long to get " "ads for.",qlen); return true; } // bail if empty, too if ( ! q || qlen <= 0 ) return true; // save stuff strncpy(m_q, q, MAX_AD_QUERY_LEN); m_qlen = qlen; m_coll = coll; m_state = state; m_callback = callback; m_pageAds = pageNum; m_queryIP = queryIP; m_cr = g_collectiondb.getRec(m_coll); // . this means that it was not specifically mentioned, choose next // in round robin /* if(!feedIndex) m_feedIndex = getAdFeedIndex(g_collectiondb.getCollnum(m_coll)); else // . an ad feed was specified, so use this one instead. m_feedIndex = feedIndex - 1; */ long numPIAds = m_cr->m_adPINumAds; long numBPIAds = m_cr->m_adPINumAds; if(!m_cr->m_adSSSameasPI) { size_t size = gbstrlen(m_cr->m_adCGI[0]); if((long)size == gbstrlen(m_cr->m_adCGI[2])) if(!memcmp(m_cr->m_adCGI[0], m_cr->m_adCGI[2], size)) m_adSSSameasPI = true; } else m_adSSSameasPI = true; if(!m_cr->m_adBSSSameasBPI) { size_t size = gbstrlen(m_cr->m_adCGI[1]); if((long)size == gbstrlen(m_cr->m_adCGI[3])) if(!memcmp(m_cr->m_adCGI[1], m_cr->m_adCGI[3], size)) m_adBSSSameasBPI = true; } else m_adBSSSameasBPI = true; if(m_adSSSameasPI && m_cr->m_adPIEnable && m_cr->m_adSSEnable) { numPIAds += m_cr->m_adSSNumAds; m_indexSSAds = PI_PRIMARY; } else m_adSSSameasPI = false; if(m_adBSSSameasBPI && m_cr->m_adPIEnable && m_cr->m_adSSEnable) { numBPIAds += m_cr->m_adSSNumAds; } else m_adBSSSameasBPI = false; // get paid inclusion ad and its backup m_numGotAds = 0; if(m_cr->m_adPIEnable) { if(getAd(PI_PRIMARY, m_cr->m_adCGI[PI_PRIMARY], numPIAds)) m_numGotAds++; if(getAd(PI_BACKUP, m_cr->m_adCGI[PI_BACKUP], numBPIAds)) m_numGotAds++; } else m_numGotAds += 2; // get skyskraper ad and its backup if(m_cr->m_adSSEnable) { // if skyscraper is not using the same feed as paid inclusion // get ads if(!m_adSSSameasPI) { if(getAd(SS_PRIMARY, m_cr->m_adCGI[SS_PRIMARY], m_cr->m_adSSNumAds)) m_numGotAds++; } else m_numGotAds++; // if backup skyscraper is not using the same feed as backup // paid inclusion get ads if(!m_adBSSSameasBPI) { if(getAd(SS_BACKUP, m_cr->m_adCGI[SS_BACKUP], m_cr->m_adSSNumAds)) m_numGotAds++; } else m_numGotAds++; } else m_numGotAds += 2; // if we got all of them back already, return true...we failed if(m_numGotAds >= 4) return true; return false; } bool Ads::getAd ( long index , char *cgi , long numAds ) { char tmp[1024] = {0}; long clen = gbstrlen(cgi); // if there is no cgi string, bail if(clen <= 0) return true; // if string is greater than buffer, bail if(clen > 1024) { log("query: Ad Feed CGI string %li is > 1024.", index); return true; } char *p = tmp; long plen = htmlDecode(tmp, cgi, clen,false,0); SafeBuf sb(1024); // . replace standard characters with our search values while(*p) { char *beg = p; p = strstr(p, "%"); if(p) { p++; if( beg != p ) sb.safeMemcpy(beg, (long)(p-beg-1)); switch(*p) { // insert url encoded query case 'q': sb.urlEncode(m_q, m_qlen); break; // insert page number case 'p': sb.safePrintf("%ld", m_pageAds); break; // insert number of ads to return case 'n': sb.safePrintf("%ld", numAds); break; // insert querying IP case 'i': sb.safePrintf("%s", iptoa(m_queryIP)); break; // insert % case '%': sb.safePrintf("%%"); break; default : break; } p++; } else { p = tmp; long len = plen - (beg-p); sb.safeMemcpy(beg, len); sb.pushChar('\0'); break; } } log(LOG_DEBUG, "query: Ad feed request[%ld] url is: %s", index, sb.getBufStart()); // make it a url //m_url[index].set(sb.getBufStart(), sb.length(), false /*addWWW?*/); // get the url char *url = sb.getBufStart(); AdFeed *af = (AdFeed *)mmalloc(sizeof(AdFeed), "AdFeed"); if(!af) { log("query: Ad feed malloc failed."); return true; } af->m_ads = this; af->m_index = index; // get the xml if (!g_httpServer.getDoc( url , // &m_url[index] , 0 , // ip 0 , // offset -1 , // size 0 , // if mod since af , // state gotDocWrapper, m_cr->m_adFeedTimeOut, // declared in collRec 0 , // proxyIp --> none 0 , // proxyPort 20*1024 , // 20k maxTextDocLen 20*1024 ))// 20k maxOhterDocLen return false; // we got it w/o blocking, MUST be an error!! g_errno should be set return true; } void gotDocWrapper ( void *state , TcpSocket *s ) { AdFeed *af = (AdFeed *)state; // extract our class Ads *ads = af->m_ads; ads->gotDoc(s, af->m_index); mfree(af, sizeof(AdFeed), "AdFeed"); if(!ads->gotAllRequests()) return; ads->selectDisplayAds(); // call callback ads->m_callback ( ads->m_state ); } void Ads::gotDoc ( TcpSocket *ts, long index ) { m_numGotAds++; // return if g_errno set if ( g_errno ) { log("query: Ad feed returned an error: %s", mstrerror(g_errno)); return; } // this is guaranteed now to be NULL terminated char *p = ts->m_readBuf; long plen = ts->m_readBufSize; long len = ts->m_readOffset; if ( ! p || plen <= 0 ) { log("query: Ad feed gave empty reply."); return; } HttpMime mime; mime.set(p, len, NULL);//&m_url[index]); if(mime.getHttpStatus() != 200) { log("query: Ad feed returned %ld status, bailing.", mime.getHttpStatus()); return; } char *content = p + mime.getMimeLen(); long contentLen = mime.getContentLen(); if((contentLen == -1) && (len != 0)) contentLen = len - mime.getMimeLen(); log(LOG_DEBUG, "query: Ad feed response: %s", content); short charset = get_iana_charset( mime.getCharset(), mime.getCharsetLen() ); // sanity check if ( charset != csUTF8 && charset != csASCII ) { char *xx=NULL;*xx=0; } Xml *xml = &m_xml[index]; xml->set( content, contentLen, false, 0, true, TITLEREC_CURRENT_VERSION); char *rsltStr = m_cr->m_adResultXml[index]; char *titleStr = m_cr->m_adTitleXml [index]; char *urlStr = m_cr->m_adUrlXml [index]; char *descStr = m_cr->m_adDescXml [index]; char *linkStr = m_cr->m_adLinkXml [index]; long rsltStrLen = gbstrlen(rsltStr); long begPtr = 0; long endPtr = xml->getNumNodes(); char **titles = m_titles [index]; long *titlesLen = m_titlesLen[index]; char **desc = m_desc [index]; long *descLen = m_descLen [index]; char **sites = m_sites [index]; long *sitesLen = m_sitesLen [index]; char **urls = m_urls [index]; long *urlsLen = m_urlsLen [index]; long numAds = 0; while(begPtr < endPtr) { // . We will loop until we are out of nodes and should break // . Search for result tag begPtr = xml->getNodeNum(begPtr, endPtr, rsltStr, rsltStrLen); // . If there is no response.result, consider parsing finished if( begPtr == -1 ) break; // . We don't want to use the back result tags, so skip it if( xml->isBackTag( begPtr++ ) ) continue; // . save pointer to ad title titles[numAds] = xml->getString(begPtr, endPtr, titleStr, &titlesLen[numAds]); titles[numAds] =removeCDATA(titles[numAds],&titlesLen[numAds]); // . save pointers for summary lines desc[numAds] = xml->getString(begPtr, endPtr, descStr, &descLen[numAds]); desc[numAds] = removeCDATA(desc[numAds], &descLen[numAds]); // . save pointer for site sites[numAds] = xml->getString(begPtr, endPtr, linkStr, &sitesLen[numAds]); sites[numAds] = removeCDATA(sites[numAds], &sitesLen[numAds]); // . save pointer to click url urls[numAds] = xml->getString(begPtr, endPtr, urlStr, &urlsLen[numAds]); urls[numAds] = removeCDATA(urls[numAds], &urlsLen[numAds]); // . increment number of ads found numAds++; } log(LOG_DEBUG, "query: Ad feed[%ld] returned %ld ads on page %ld", index, numAds, m_pageAds); // . let mem table know we stole the buffer as well if( !relabel( p, plen, "Ads-sktXml" ) ) { log("query: Could not relabel ad memory from socket!"); return; } // . we are now going to free the socket buffer when we are done, // so let socket know m_buf [index] = p; m_bufLen [index] = plen; m_numAds [index] = numAds; ts->m_readBuf = NULL; } void Ads::selectDisplayAds( ) { if(m_adSSSameasPI) { // Same primary AND backup AND backup has MORE if(m_adBSSSameasBPI && m_numAds[PI_PRIMARY] < m_numAds[PI_BACKUP]) { m_indexPIAds = PI_BACKUP; m_indexSSAds = PI_BACKUP; } // Same primary AND different backup AND backup has MORE else if(!m_adBSSSameasBPI && (m_numAds[PI_PRIMARY] < (m_numAds[PI_BACKUP] + m_numAds[SS_BACKUP]))) { m_indexPIAds = PI_BACKUP; m_indexSSAds = SS_BACKUP; } } else if(!m_adSSSameasPI && m_adBSSSameasBPI) { // Different primary AND same backup AND backup has MORE if(m_numAds[PI_BACKUP] > (m_numAds[PI_PRIMARY] + m_numAds[SS_PRIMARY])) { m_indexPIAds = PI_BACKUP; m_indexSSAds = PI_BACKUP; } } else { // Different primary AND different backup // AND backup has MORE if(!m_numAds[PI_PRIMARY] && m_numAds[PI_BACKUP]) m_indexPIAds = PI_BACKUP; // AND backup has MORE if(!m_numAds[SS_PRIMARY] && m_numAds[SS_BACKUP]) m_indexSSAds = SS_BACKUP; } } void Ads::printAd( SafeBuf *sb , char *url , long urlLen, char *title , long titleLen, char *desc , long descLen, char *site , long siteLen, long numCharPerLine ) { sb->safePrintf( "" ); sb->safeMemcpy (title, titleLen); sb->safePrintf( "
\n" ); sb->safeMemcpy (desc, descLen); sb->safePrintf( "
\n" ); sb->safePrintf( "" ); long len = siteLen; long numToPrint = numCharPerLine; long numSplits = len / numCharPerLine; long index = 0; if(len % numCharPerLine) numSplits++; if(numToPrint > len) numToPrint = len; sb->safeMemcpy (site, numToPrint); for(long i = 1; i < numSplits; i++) { sb->safePrintf("
"); index += numCharPerLine; if(numCharPerLine*(i+1) > len) { numToPrint = len % numCharPerLine; } sb->safeMemcpy (site+index, numToPrint); } sb->safePrintf( "

\n" ); } void Ads::printPaidInclusionAds(SafeBuf *sb, long numCharPerLine) { long end = m_numAds[m_indexPIAds]; if((m_indexPIAds == PI_PRIMARY && m_adSSSameasPI ) || (m_indexPIAds == PI_BACKUP && m_adBSSSameasBPI) ) end = m_cr->m_adPINumAds; for( long i = 0; i < end; i++) { printAd(sb, m_urls [m_indexPIAds][i], m_urlsLen [m_indexPIAds][i], m_titles[m_indexPIAds][i], m_titlesLen[m_indexPIAds][i], m_desc [m_indexPIAds][i], m_descLen [m_indexPIAds][i], m_sites [m_indexPIAds][i], m_sitesLen [m_indexPIAds][i], numCharPerLine ); if(i != end-1) sb->safePrintf("
\n"); } } void Ads::printSkyscraperAds (SafeBuf *sb, long numCharPerLine) { long i = 0; long end = m_numAds[m_indexSSAds]; if(m_indexSSAds == PI_PRIMARY || m_indexSSAds == PI_BACKUP) { i = m_cr->m_adPINumAds; end += i; } for( ; i < end; i++) { printAd(sb, m_urls [m_indexSSAds][i], m_urlsLen [m_indexSSAds][i], m_titles[m_indexSSAds][i], m_titlesLen[m_indexSSAds][i], m_desc [m_indexSSAds][i], m_descLen [m_indexSSAds][i], m_sites [m_indexSSAds][i], m_sitesLen [m_indexSSAds][i], numCharPerLine); if(i != end-1) sb->safePrintf("
\n"); } } /* long Ads::s_availableAds[16][MAX_AD_FEEDS] = {{0}}; long Ads::s_numAvailableAds[16] = {0} ; void Ads::initCollAvailAds ( ) { for(collnum_t cn = g_collectiondb.getFirstCollnum(); cn >= 0; cn = g_collectiondb.getNextCollnum(cn) ) setAvailableAds(g_collectiondb.getCollName(cn)); } void Ads::setAvailableAds ( char *coll ) { CollectionRec *cr = g_collectiondb.getRec(coll); s_numAvailableAds[cr->m_collnum] = 0; for(long i = 0; i < MAX_AD_FEEDS; i++) { if(cr->m_adFeedEnable[i]) { if(cr->m_adCGI[i][0] != '\0') s_availableAds[cr->m_collnum] [s_numAvailableAds[cr->m_collnum]++]=i; } } } long Ads::getAdFeedIndex ( collnum_t cn ) { static long count = 0; if(!s_numAvailableAds[cn]) return 0; count++; count %= s_numAvailableAds[cn]; return s_availableAds[cn][count]; } */