// Matt Wells, copyright Aug 2001 // . calls for intersecting a bunch of IndexLists to generate docIds // . IndexLists are data-less lists (keys only) // . each key in an IndexList is a termId/score/adultBit/docId tuple // . we try to use as small a sublist of each IndexList as possible to avoid // wasting network bandwidth // TODO: implement site clustering??????? in getNumResults() // TODO: if we have in cache we can hash right into the table, but // we must do that before blocking on something in case it disappears // from the cache // TODO: the search "cell phone cable hp jornada 680", w/o quotes, should // be quote forced anyway. Some pages will match all but "cable hp" // so we should break that up into it's 2 terms, cable and hp // 6912 (30%) of queries are 1 word queries 138 // 7825 (34%) of queries are 2 word queries 156 // 4512 (20%) of queries are 3 word queries 90 // 1771 ( 8%) of queries are 4 word queries 32 // 869 ( 4%) of queries are 5 word queries 17 // 391 ( 2%) of queries are 6 word queries 8 // 290 ( 1%) of queries are 7 word queries 6 // 183 ( 1%) of queries are 8 word queries (4 per second) #ifndef _INDEXTABLE2_H_ #define _INDEXTABLE2_H_ #include "Query.h" // MAX_QUERY_TERMS, qvec_t #include "Indexdb.h" // makeStartKey(), getTruncationLimit() #include "IndexList.h" // for m_lists[] #include "HashTableT.h" // the final score of a docid typedef uint32_t score_t; //typedef float score_t; // . get the docid from the ptr // . works for both docid ptrs from m_topTree and m_topDocIdPtrs[] inline int64_t getDocIdFromPtr ( char *docIdPtr ) { int64_t d; gbmemcpy ( &d , docIdPtr , 6 ); d >>= 2; d &= DOCID_MASK; return d; }; // max # search results that can be viewed without using TopTree #define MAX_RESULTS 1000 class IndexTable2 { public: // . returns false on error and sets errno // . we now support multiple plus signs before the query term // . start/endTermNums apply to phrase termIds only // . allows us to set multiple bits when a phrase termId is matched // in case the singleton was truncated, but doc has the phrase // . if you want Default AND behaviour set requireAllTerms to true // it is much faster, too // . "termFreqs" are 1-1 with q->m_qterms[] // . sets m_q to point to q void init (Query *q , bool isDebug , void *logstate , bool requireAllTerms , class TopTree *topTree , char *coll , IndexList *lists , int32_t numLists , HashTableX *sortByDateTablePtr , int32_t docsWanted , int64_t *termFreqs , bool useDateLists , bool sortByDate , char sortBy , //1=time,2=dist,3=rel,4=pop bool showExpiredEvents , float userLat , float userLon , bool doInnerLoopSiteClustering , bool doSiteClustering , bool getWeights , class Msg39Request *r ); // pre-allocate memory since intersection runs in a thread bool alloc ( ); bool allocTopTree ( ); bool makeHashTables ( ) ; // . returns false on error and sets errno // . we assume there are "m_numTerms" lists passed in (see set() above) void addLists_r ( int32_t *totalListSizes , float sortByDateWeight ); // some generic stuff IndexTable2(); ~IndexTable2(); void reset(); // Msg39 needs to call these void freeMem ( ) ; // sets m_affWeights and m_freqWeights from the provided weights // in the Msg39Request. void setAffWeights ( Msg39Request *r ) ; bool recompute ( class Msg39Request *r ) ; bool cacheIntersectionForRecompute ( class Msg39Request *r ) ; void freeCacheRec ( int32_t i ) ; // sets m_freqWeights[] based on termFreqs (IDF) in IMAP space void setFreqWeights ( Query *q , bool phrase ); // computes a single final score from a score vector score_t getWeightedScore ( unsigned char *scoresVec , int32_t nqt , float *freqWeights , float *affWeights , bool requireAllTerms ); void addLists2_r ( int32_t numLists , int32_t *imap , bool lastRound , int32_t numBaseLists , float sortByDateWeight , int32_t *minHardCountPtr ); bool eventHashLoop ( int32_t *listIndexes , char *listSigns , qvec_t *listExplicitBits , char **listEnds , char *listHash , char *listHardCount , int32_t *listPoints , char **ptrs , char **ptrEnds , char **oldPtrs , int32_t numPtrs , int64_t maxDocId , int32_t numListsToDo , int32_t numSlots , char **docIdPtrs , qvec_t *explicitBits , int16_t *hardCounts , uint8_t *eventIds , uint8_t *scoresVec , uint32_t *latVec, uint32_t *lonVec, //uint32_t *timeVec, //uint32_t *endVec, int32_t nqt ) ; void hashTmpDocIds2 ( uint32_t *maxDocId , char **docIdPtrs , unsigned char *scoresVec , uint8_t *eventIds , uint32_t *latVec , uint32_t *lonVec , //uint32_t *timeVec , //uint32_t *endVec , qvec_t *explicitBits , int16_t *hardCounts , uint32_t mask , int32_t numSlots , int32_t nqt ) ; // . used for getting which topDocId to kick out of the top list int32_t getWeakestTopDocId ( char **topp , score_t *tops , unsigned char *topb , int32_t numTop , unsigned char *minBitScore2 , score_t *score , char **docIdPtr ) ; void zeroOutVectorComponents ( unsigned char *scoresVec , qvec_t *ebits , int16_t *hardCounts , int32_t numDocIds , char rat ) ; // . compute phrase and word weights based on phrase affinities // and set final scores based on those weights void computeAffWeights ( bool rat , int32_t numDocIds , char **docIdPtrs , uint8_t *scoresVec , qvec_t *explicitBits , int16_t *hardCounts , float *affWeights , float *affinities ); //void computeWeightedScores ( int32_t numDocIds , // int32_t *finalScores , // unsigned char *scoresVec , // qvec_t *explicitBits , // float *affWeights , // char **docIdPtrs ); // fill top docids with the best from top docids 2 int32_t fillTopDocIds ( //char **topp , //score_t *tops , //unsigned char *topb , int32_t numTop , char **tmpp2 , uint8_t *tmpsv2 , uint32_t *tmpdv2 , uint32_t *tmplatv2 , uint32_t *tmplonv2 , //uint32_t *tmptimev2 , //uint32_t *tmpendv2 , qvec_t *tmpev2 , int16_t *tmphc2 , uint8_t *tmpeids2, int32_t numTmp2 ); // comparison between a result and the min top result bool isBetterThanWeakest ( unsigned char bscore , score_t score , char *docIdPtr , unsigned char minTopBitScore , score_t minTopScore , char *minTopDocIdPtr ); // has init already been called? bool isInitialized ( ) { return m_initialized; }; // . these are set from calling addLists() above // . we log all matching topDocIds if isDebug is true char **getTopDocIdPtrs ( ) { return m_topDocIdPtrs; }; uint8_t *getTopBitScores ( ) { return m_topBitScores; }; score_t *getTopScores ( ) { return m_topScores; }; int32_t getNumTopDocIds ( ) { return m_numTopDocIds; }; // . get how many results we have in the topDocIds list // . if "thatIncludeAllTerms" is true, results must have all terms // from all indexLists that we haven't read ALL of yet // . these are just used for stats keeping purposes in Msg3a for now // . these are all 0 for top tree right now.. until i fix it int32_t getNumExactExplicitMatches (){return m_numExactExplicitMatches;}; int32_t getNumExactImplicitMatches (){return m_numExactImplicitMatches;}; float *getFreqWeights ( ) { return m_freqWeights; }; // includes both synonym and phrase affinties combined float *getAffWeights ( ) { return m_affWeights; }; float *getAffinities ( ) { return m_affinities; }; float *getFreqWeightsQS ( ) { return m_freqWeightsQS; }; float *getAffWeightsQS ( ) { return m_affWeightsQS; }; void setStuffFromImap(); // how long to add the last batch of lists int64_t m_addListsTime; int64_t m_t1 ; int64_t m_t2 ; uint32_t m_totalDocIds; int32_t m_numPanics; int32_t m_numCollisions; int32_t m_numPtrs; // in the beginning at least int32_t m_numLoops; int64_t m_estimatedTotalHits; int32_t m_errno; int32_t m_numSlots; char *m_coll; collnum_t m_collnum; // this is true if no use to read more termlist data from disk, we // will not get any more docids... bool m_isDiskExhausted; // point to array of term freqs, 1-1 with qterms int64_t *m_termFreqs; // how many docs in the collection? int64_t m_docsInColl; // . the imap stuff // . m_imap is set in Query.cpp, but we contain it here // . m_imap really only needed when doing rat=1 queries, but we use it // for rat=0 because we may filter out UOR'ed QueryTerms or // irrelevant synoynms // . allows us to speed up the intersection process by intersecting // smaller termlists first and thereby keeping the ongoing result // set as small as possible // . imap maps the query term's INTERSECTION ORDER to the query term // number in Query::m_qterms[]. // . so if it is the the ith termlist to intersect, then its // corresponding QueryTerm would be Query::m_qterms[m_imap[i]] // . sometimes query terms are ignored as far as the intersection goes // and we also intersect the term's termlists in a different order // than the term's number in the Query::m_qterms[] array has it // . m_blocksize[0] + m_blocksize[1] is how many QueryTerms termlists // to intersect for the first call to IndexTable2::addLists2_r() for // rat=1 operations. // . we choose to intersect smaller termlists first to minimize the // result set and maximize the speed of future intersections, since // less results are involved // . m_blocksize[0]=X and m_blocksize[1]=Y means to intersect // m_lists[m_imap[0]], m_lists[m_imap[ 1]],... m_lists[m_imap[X-1]] // (block #0) // with // m_lists[m_imap[X]], m_lists[m_imap[X+1]],... m_lists[m_imap[Y-1]] // (block #1) // . m_blocksize[i] can be > 1 because we need to intersect phrase // termlists when intersecting their constituent word termlists in // order to salvage docids that may not have the word terms // explicitly, but do have the phrase terms explicitly, and thereby // have the word terms implicitly. // . m_nb is how many total blocks we have // . m_sizes[i] is how many docids are in block #i total... *i think*.. // . after we intersect block #0 with block #1, further intersections // are performed by calling addLists2_r() with one block at a time. // . addLists2_r() will preserve the ongoing intersection in // m_topDocIds2[], etc. // . when it is called with a single block it hashes the docids in // m_topDocIds2[] with the provided block of termlists to get the // new result set which is again stored in m_topDocIds2[], ... int32_t m_imap [ MAX_QUERY_TERMS ]; int32_t m_ni; int32_t m_sizes [ MAX_QUERY_TERMS ]; int32_t m_blocksize [ MAX_QUERY_TERMS ]; int32_t m_nb; // maps from query term space to imap space int32_t m_revImap [ MAX_QUERY_TERMS ]; bool m_imapIsValid; // for speed we must map Query::m_qterms[].m_leftPhraseTermNum into // "imap space" int32_t m_imapLeftPhraseTermNum [ MAX_QUERY_TERMS ]; int32_t m_imapRightPhraseTermNum [ MAX_QUERY_TERMS ]; // . these describe the lists associated with each m_termId // . each query term has a corresponding // term frequency weight (m_freqWeights[]) // . each query term has a corresponding // phrase affinity weight (m_affWeights[]) // . each query term has a corresponding // phrase affinity (m_phraseAffinities[][]) // . these are NOT exactly 1-1 with Query::m_qterms[] // . m_freqWeights[i] is the freq weight for m_qterms[m_imap[i]], // NOT m_qterms[i] // . m_imap essentially REORDERS the QueryTerms for better intersection // and may also // remove some QueryTerms (like UOR'ed query terms and synonyms) // . "nqt" is used throughout IndexTable2.cpp to indicate number of // QueryTerms in // "imap" which is always <= Query::m_numTerms = number of // Query::m_qterms[] float m_freqWeights [ MAX_QUERY_TERMS ]; float m_affWeights [ MAX_QUERY_TERMS ]; float m_affinities [ MAX_QUERY_TERMS ]; bool m_computedAffWeights; // these two correlate with the Query::m_qterms[] instead of being // in imap space. we pass these back in the Msg39Reply float m_freqWeightsQS [ MAX_QUERY_TERMS ]; float m_affWeightsQS [ MAX_QUERY_TERMS ]; // we have a list of the top docids char *m_topDocIdPtrs [ MAX_RESULTS ]; score_t m_topScores [ MAX_RESULTS ]; // 0x80: we have all hard-required terms // 0x40: we have all other terms explicitly // 0x20: we have all other terms implicitly // & 0x1f: count of all terms we have implicitly (includes hard counts) unsigned char m_topBitScores [ MAX_RESULTS ]; int32_t m_numTopDocIds ; // if getting more than MAX_RESULTS results, use this top tree to hold // them rather than the m_top*[] arrays above class TopTree *m_topTree; // NSD: Search for 'gigablast' on gk0 gave just 30 results. But when // a freshness of 30 days was given it gave over 200k results. That // is because we limit the maxDocIdsToCompute, and in the normal case // all the results were from gigablast.com and were being clustered out // So add a check to limit the number of results from the same // 8 bit dom hash having the same score to X (say 100) //bool m_doInnerLoopSiteClustering; bool m_doSiteClustering; // justed used by Msg3a for stats tracking int32_t m_numExactExplicitMatches ; int32_t m_numExactImplicitMatches ; // a reference to the query Query *m_q; bool m_useDateLists; bool m_sortByDate; // for events char m_sortBy; bool m_showExpiredEvents; bool m_showInProgress; // pointLat/pointLon multiplied by 10M into an int uint32_t m_userLatIntComp; uint32_t m_userLonIntComp; int32_t m_latTermOff; int32_t m_lonTermOff; int32_t m_timeTermOff; int32_t m_endTermOff; // these are NOT in imap space, but in query term space, 1-1 with // Query::m_qterms[] IndexList *m_lists; int32_t m_numLists; int32_t m_docsWanted; // has init() been called? bool m_initialized; // are we in debug mode? bool m_isDebug; // for debug msgs int32_t m_logstate; bool m_doRecalc; bool m_requireAllTerms; int64_t m_numDocsInColl; // the current "intersection" is stored in this table char **m_tmpDocIdPtrs2; uint32_t *m_tmpDateVec2; uint32_t *m_tmpLatVec2; uint32_t *m_tmpLonVec2; //uint32_t *m_tmpTimeVec2; //uint32_t *m_tmpEndVec2; uint8_t *m_tmpScoresVec2; uint8_t *m_tmpEventIds2; qvec_t *m_tmpEbitVec2; int16_t *m_tmpHardCounts2; int32_t m_maxTmpDocIds2; int32_t m_numTmpDocIds2; int32_t m_nexti; int32_t m_oldnexti; time_t m_nowUTCMod; // allocated memory char *m_buf; int32_t m_bufSize; char *m_bufMiddle; // for large hashtable for sortByDate //char *m_bigBuf; //int32_t m_bigBufSize; // little buffer for the intersection char *m_localBuf; int32_t m_localBufSize; // used by our call to Query::setBitScoresBoolean() //char *m_bitScoresBuf; //int32_t m_bitScoresBufSize; // these are for removing component lists replaced by their compounds int32_t *m_componentCodes; bool m_getWeights; bool m_searchingEvents; HashTableX *m_sortByDateTablePtr; class Msg39Request *m_r; // . a hash table of docids // . the &sq=docid1+docid2+...+docidN cgi parm can restrict the // search results to this list of docids bool m_useYesDocIdTable; HashTableT m_dt; // likewise, exclude any docid in this table bool m_useNoDocIdTable; HashTableT m_et; // cache boolean results bool m_useBoolTable; HashTableT m_bt; // . encapsulate bool cache lookup for bitscores // . FUCK! addLists2_r() calls this and tries to grow the table // while in a thread! inline uint8_t getBitScore(qvec_t ebits){ if (!m_useBoolTable) return m_q->getBitScore(ebits); // if table is mostly full do not add anything to it! we are // like in a thread if ( 100 * (m_bt.m_numSlotsUsed+1) >= m_bt.m_numSlots * 75 ) return m_q->getBitScore(ebits); int32_t slot = -1; uint8_t bscore; if ((slot = m_bt.getSlot(ebits)) >=0){ //if (m_isDebug) // logf(LOG_DEBUG, // "query: getBitScoreCacheHit " // "bits=0x%016"XINT64"", // (int64_t) ebits); bscore = m_bt.getValueFromSlot(slot); } else { //if (m_isDebug) // logf(LOG_DEBUG, // "query: getBitScoreCacheMiss " // "bits=0x%016"XINT64"", // (int64_t) ebits); bscore = m_q->getBitScore(ebits); } // store new bool value if (slot < 0){ //if (m_isDebug) // logf(LOG_DEBUG, // "query: getBitScoreCacheAdd " // "bits=0x%016"XINT64"", // (int64_t) ebits); m_bt.addKey(ebits,bscore,NULL); } return bscore; }; }; // . get the LOWEST scoring docId from our list of top docIds // . set "minBitScore22" and "score" for that lowest docId // . inline this for speed // . BUT lower docIds are considered higher scoring than higher docIds inline int32_t IndexTable2::getWeakestTopDocId ( char **topp , score_t *tops , unsigned char *topb , int32_t numTop , unsigned char *minBitScore2 , score_t *score , char **docIdPtr ) { int64_t tmp = 0LL; score_t minScore = 0x7fffffff; unsigned char minBitScore = 0xff; char *minDocIdPtr = (char *)&tmp; int32_t mini = -1; for ( int32_t i = 0 ; i < numTop ; i++ ) { if ( topb [i] > minBitScore ) continue; if ( topb [i] < minBitScore ) goto gotIt; if ( tops [i] > minScore ) continue; if ( tops [i] < minScore ) goto gotIt; if ( *(uint32_t *)(topp[i]+1 ) < *(uint32_t *)(minDocIdPtr+1) ) continue; if ( *(uint32_t *)(topp[i]+1 ) > *(uint32_t *)(minDocIdPtr+1) ) goto gotIt; if ( (*(unsigned char *)(topp[i] ) & 0xfc) < (*(unsigned char *)(minDocIdPtr) & 0xfc) ) continue; // ties should not be happening for docid, unless // it tied with initial setting of minDocIdPtr, in that // case we should add it! gotIt: minScore = tops [i]; minBitScore = topb [i]; minDocIdPtr = topp [i]; mini = i; } // set the callers ptrs *minBitScore2 = minBitScore; *score = minScore; *docIdPtr = minDocIdPtr; // return the lowest scoring docId's position return mini; } // . checks a result against the weakest to see which is better inline bool IndexTable2::isBetterThanWeakest ( unsigned char bscore , score_t score , char *docIdPtr , unsigned char minTopBitScore , score_t minTopScore , char *minTopDocIdPtr ) { // . branch on the bit score // . ignore 0x80 | 0x40 bits if ( (bscore&~0xc0) < (minTopBitScore&~0xc0) ) return false; // automatically add to top if our bscore is the highest so far if ( (bscore&~0xc0) > (minTopBitScore&~0xc0) ) return true; // docId must have a better score if it tied matching terms if ( score < minTopScore ) { // clear the slot //explicitBits[i] = 0; //docIdPtrs[i] = NULL; return false; } if ( score > minTopScore ) return true; // continue if docId is too big if ( *(uint32_t *)(docIdPtr+1) > *(uint32_t *)(minTopDocIdPtr+1) ) { // clear the slot //explicitBits[i] = 0; //docIdPtrs[i] = NULL; return false; } // if top is equal, compare lower 6 bits if ( (*(uint32_t *)(docIdPtr +1) == *(uint32_t *)(minTopDocIdPtr+1)) && (*(unsigned char *)(docIdPtr ) & 0xfc) >= (*(unsigned char *)(minTopDocIdPtr) & 0xfc) ) { // clear the slot //explicitBits[i] = 0; //docIdPtrs[i] = NULL; return false; } return true; } inline score_t IndexTable2::getWeightedScore ( unsigned char *scoresVec , int32_t nqt , float *freqWeights , float *affWeights , bool requireAllTerms){ // get the min of the scores float min = 9999999.0; float phraseMin = 9999999.0; float pscore; float pre; int32_t j; // reset score float score = 0.0; // loop vars //unsigned char iscore; float s; qvec_t ebit; // tends to neutralize the affects of outliers for ( int32_t t = 0; t < nqt; t++ ) { // . if this is zero, ignore it completely // . we do this for gbxlatitude2: etc. // query terms whose scores are really // eventids if ( freqWeights[t] == 0.0 ) continue; // invert score pre = 255 - scoresVec[t]; // apply affinity weights if ( affWeights[t] > 0.0 ) pre = pre * affWeights[t]; // get score s = freqWeights[t] * 100.0 * pre; // add it up score += s; // look at this term and its corresponding word or phrse // terms and get the max of all. must apply the affWeights! int32_t it = m_imap[t]; // which is the explicit bit for term #t? ebit = m_q->m_qterms[it].m_explicitBit; // . skip if term #t is not "hard" required // . it could be a phrase term or a synonym term and not // necessary if ( (ebit & (m_q->m_requiredBits)) == 0 ) { // demote if phrase //pre /= 30.0; // if we are a phrase term, keep the phrase min if ( pre < phraseMin ) phraseMin = pre; // get next term continue; } // . get the max between us and our phrase terms // . do not consider ourselves if phrase term has // higher affinity than us!! if ( (j = m_imapLeftPhraseTermNum[t]) >= 0 ) { pscore = 255-scoresVec[j] ; // if phrase is not really strong, demote it //if ( affWeights[j] < affWeights[t] ) pscore /= 30.0; if ( affWeights[j] > 0 ) pscore *= affWeights[j]; if ( pscore > pre ) pre = pscore; // we should also weight it by our affinity weight, but // punish by .5 since it is a phrase pscore = 255-scoresVec[j] ; if ( affWeights[t] > 0 ) pscore *= affWeights[t]; if ( pscore > pre ) pre = pscore; } if ( (j = m_imapRightPhraseTermNum[t]) >= 0 ) { pscore = 255-scoresVec[j] ; // if phrase is not really strong, demote it //if ( affWeights[j] < affWeights[t] ) pscore /= 30.0; if ( affWeights[j] > 0 ) pscore *= affWeights[j]; if ( pscore > pre ) pre = pscore; // we should also weight it by our affinity weight, but // punish by .5 since it is a phrase pscore = 255-scoresVec[j] ; if ( affWeights[t] > 0 ) pscore *= affWeights[t]; if ( pscore > pre ) pre = pscore; } // we must also consider all of our synonyms!! if ( m_q->m_hasSynonyms ) { for ( int32_t k = 0 ; k < m_q->m_numTerms ; k++ ) { // skip if term #k is not our synonym //if ( m_q->m_qterms[k].m_synonymOf != // & m_q->m_qterms[it] ) continue; // if term #k IMPLIES us, use his score if bigr if ( (m_q->m_qterms[k].m_implicitBits & m_q->m_qterms[it].m_explicitBit ) == 0 ) continue; // convert him to imap space j = m_imap[k]; // get his score pscore = 255-scoresVec[j] ; // . get the scores // . 'bib' for the query 'michael bibby fact' // has a syn weight of 0... so allow 0, it // should punish us! //if ( affWeights[j] > 0)pscore*=affWeights[j]; if ( affWeights[j] >= 0) pscore*=affWeights[j]; //if ( pscore > pre ) pre = pscore; // we should also weight it by our affinity // weight,but punish by .5 since it is a phrase //pscore = 255-scoresVec[j] ; if ( affWeights[t] >= 0) pscore*=affWeights[t]; if ( pscore > pre ) pre = pscore; } } // do we have a new min score? if ( pre >= min ) continue; // rat=0 and boolean queries might not have this term! // do not allow min to be 0! if ( pre <= 0.0 && ! requireAllTerms ) continue; // ok, we got a winner if ( pre < min ) min = pre; } // use phrase min if we had no terms explicitly if ( min == 9999999.0 ) min = phraseMin; // multiply by the min, this rewards non-outliers //if ( min > 1 && min != 9999999.0 ) score *= min * min; if ( min != 9999999.0 ) score *= min * min; //if ( min > 1 && min != 9999999.0 ) score *= min ; //if ( min > 1 ) score *= min; // scale down score /= 100; // sanity check if ( ((float)((int32_t)(score+1.0))) < score ) { logf(LOG_DEBUG,"query: got score breach, score=%f",score); score = (float)0x7ffffff0; //char *xx=NULL; *xx = 0; } // make sure never 0 return (score_t)(score + 1.0); } /* // . computes a final score from a score vector and term weights inline int32_t IndexTable2::getWeightedScore ( unsigned char *scoresVec , int32_t nqt , float *affWeights ) { // tends to neutralize the affects of outliers float score = 0.0; for ( int32_t t = 0; t < nqt; t++ ) { float s = m_freqWeights[t] * 100.0 *(255-scoresVec[t]); // let's take this out until we figure out a way // to deal with small samples without having // to make Msg3a go to the next tier, when the // combined samples from each split would be // enough to compute an affinity. also, we do not // want splits using different affinities! if ( affWeights[t] > 0.0 ) s = s * affWeights[t]; // square root it s = sqrt((double)s); // do it again s = sqrt((double)s); // add up score += s; } // average float avg = score / nqt; // square //avg *= avg; // scale up for round off avg *= 100; // never 0 avg += 1; // square return (int32_t)avg; } */ #endif