index numbers as integers too, not just floats

so we can sort by spider date without losing 128 seconds of resolution.
2024-10-04 12:17:35 +03:00 · 2014-02-06 20:57:54 -08:00 · 2014-02-06 20:57:54 -08:00 · 2d4af1aefe
commit 2d4af1aefe
parent 63e95c3b2d
16 changed files with 355 additions and 47 deletions
--- a/Msg39.cpp
+++ b/Msg39.cpp
@ -1096,7 +1096,7 @@ void Msg39::estimateHits ( ) {

 	// convenience ptrs. we will store the docids/scores into these arrays
 	long long *topDocIds;
-	float     *topScores;
+	double    *topScores;
 	key_t     *topRecs;

 	// numDocIds counts docs in all tiers when using toptree.
@ -1163,7 +1163,7 @@ void Msg39::estimateHits ( ) {
 		mr.ptr_clusterRecs  = NULL;
 		// this is how much space to reserve
 		mr.size_docIds      = 8 * numDocIds; // long long
-		mr.size_scores      = 4 * numDocIds; // float
+		mr.size_scores      = sizeof(double) * numDocIds; // float
 		// if not doing site clustering, we won't have these perhaps...
 		if ( m_gotClusterRecs ) 
 			mr.size_clusterRecs = sizeof(key_t) *numDocIds;
@ -1191,7 +1191,7 @@ void Msg39::estimateHits ( ) {
 			return ; 
 		}
 		topDocIds    = (long long *) mr.ptr_docIds;
-		topScores    = (float     *) mr.ptr_scores;
+		topScores    = (double    *) mr.ptr_scores;
 		topRecs      = (key_t     *) mr.ptr_clusterRecs;
 	}

@ -1225,6 +1225,8 @@ void Msg39::estimateHits ( ) {
 		//add it to the reply
 		topDocIds         [docCount] = t->m_docId;
 		topScores         [docCount] = t->m_score;
+		if ( m_tt.m_useIntScores ) 
+			topScores[docCount] = (double)t->m_intScore;
 		// supply clusterdb rec? only for full splits
 		if ( m_gotClusterRecs ) 
 			topRecs [docCount] = t->m_clusterRec;
--- a/Msg39.h
+++ b/Msg39.h
@ -158,7 +158,7 @@ public:
 	long   m_errno;

 	char  *ptr_docIds         ; // the results, long long
-	char  *ptr_scores;        ; // floats
+	char  *ptr_scores;        ; // now doubles! so we can have intScores
 	char  *ptr_scoreInfo      ; // transparency info
 	char  *ptr_pairScoreBuf   ; // transparency info
 	char  *ptr_singleScoreBuf ; // transparency info
--- a/Msg3a.cpp
+++ b/Msg3a.cpp
@ -277,8 +277,8 @@ bool Msg3a::gotCacheReply ( ) {
 		m_docIds = (long long *)p;
 		p += 8 * m_numDocIds;
 		// scores
-		m_scores = (float *)p;
-		p += sizeof(float) * m_numDocIds;
+		m_scores = (double *)p;
+		p += sizeof(double) * m_numDocIds;
 		// site hashes
 		m_siteHashes26 = (long *)p;
 		p += 4 * m_numDocIds;
@ -727,20 +727,20 @@ bool Msg3a::gotAllSplitReplies ( ) {
 		if ( ! m_debug ) continue;
 		// cast these for printing out
 		long long *docIds    = (long long *)mr->ptr_docIds;
-		score_t   *scores    = (score_t   *)mr->ptr_scores;
+		double    *scores    = (double    *)mr->ptr_scores;
 		// print out every docid in this split reply
 		for ( long j = 0; j < mr->m_numDocIds ; j++ ) {
 			// print out score_t
 			logf( LOG_DEBUG,
 			     "query: msg3a: [%lu] %03li) "
 			     "split=%li docId=%012llu domHash=0x%02lx "
-			     "score=%lu"                     ,
+			     "score=%f"                     ,
 			     (unsigned long)this                      ,
 			     j                                        , 
 			     i                                        ,
 			     docIds [j] ,
 			     (long)g_titledb.getDomHash8FromDocId(docIds[j]),
-			      (long)scores[j] );
+			      (float)scores[j] );
 		}
 	}

@ -772,7 +772,7 @@ bool Msg3a::gotAllSplitReplies ( ) {
 	for ( long i = 0 ; i < max ; i++ ) 
 		cr.pushLongLong(m_docIds[i] );
 	for ( long i = 0 ; i < max ; i++ ) 
-		cr.pushFloat(m_scores[i]);
+		cr.pushDouble(m_scores[i]);
 	for ( long i = 0 ; i < max ; i++ ) 
 		cr.pushLong(getSiteHash26(i));
 	// sanity
@ -849,7 +849,7 @@ bool Msg3a::mergeLists ( ) {
 	// . tcPtr = term count. how many required query terms does the doc 
 	//   have? formerly called topExplicits in IndexTable2.cpp
 	long long     *diPtr [MAX_INDEXDB_SPLIT];
-	float         *rsPtr [MAX_INDEXDB_SPLIT];
+	double        *rsPtr [MAX_INDEXDB_SPLIT];
 	key_t         *ksPtr [MAX_INDEXDB_SPLIT];
 	long long     *diEnd [MAX_INDEXDB_SPLIT];
 	for ( long j = 0; j < m_numHosts ; j++ ) {
@ -863,7 +863,7 @@ bool Msg3a::mergeLists ( ) {
 			continue;
 		}
 		diPtr [j] = (long long *)mr->ptr_docIds;
-		rsPtr [j] = (float     *)mr->ptr_scores;
+		rsPtr [j] = (double    *)mr->ptr_scores;
 		ksPtr [j] = (key_t     *)mr->ptr_clusterRecs;
 		diEnd [j] = (long long *)(mr->ptr_docIds +
 					  mr->m_numDocIds * 8);
@ -919,7 +919,8 @@ bool Msg3a::mergeLists ( ) {

 	// . how much do we need to store final merged docids, etc.?
 	// . docid=8 score=4 bitScore=1 clusterRecs=key_t clusterLevls=1
-	long need = m_docsToGet * (8+4+sizeof(key_t)+sizeof(DocIdScore *)+1);
+	long need = m_docsToGet * (8+sizeof(double)+
+				   sizeof(key_t)+sizeof(DocIdScore *)+1);
 	// allocate it
 	m_finalBuf     = (char *)mmalloc ( need , "finalBuf" );
 	m_finalBufSize = need;
@ -928,7 +929,7 @@ bool Msg3a::mergeLists ( ) {
 	// hook into it
 	char *p = m_finalBuf;
 	m_docIds        = (long long *)p; p += m_docsToGet * 8;
-	m_scores        = (float     *)p; p += m_docsToGet * sizeof(float);
+	m_scores        = (double    *)p; p += m_docsToGet * sizeof(double);
 	m_clusterRecs   = (key_t     *)p; p += m_docsToGet * sizeof(key_t);
 	m_clusterLevels = (char      *)p; p += m_docsToGet * 1;
 	m_scoreInfos    = (DocIdScore **)p;p+=m_docsToGet*sizeof(DocIdScore *);
@ -1078,7 +1079,7 @@ bool Msg3a::mergeLists ( ) {

 			// turn it into a float, that is what rscore_t is.
 			// we do this to make it easier for PostQueryRerank.cpp
-			m_scores    [m_numDocIds]=(float)*rsPtr[maxj];
+			m_scores    [m_numDocIds]=(double)*rsPtr[maxj];
 			if ( m_r->m_doSiteClustering ) 
 				m_clusterRecs[m_numDocIds]= *ksPtr[maxj];
 			// clear this out
@ -1142,7 +1143,7 @@ bool Msg3a::mergeLists ( ) {
 long Msg3a::getStoredSize ( ) {
 	// docId=8, scores=sizeof(rscore_t), clusterLevel=1 bitScores=1
 	// eventIds=1
-	long need = m_numDocIds * ( 8 + sizeof(rscore_t) + 1 ) + 
+	long need = m_numDocIds * ( 8 + sizeof(double) + 1 ) + 
 		4 + // m_numDocIds
 		8 ; // m_numTotalEstimatedHits (estimated # of results)
 	return need;
@ -1158,8 +1159,8 @@ long Msg3a::serialize   ( char *buf , char *bufEnd ) {
 	// store each docid, 8 bytes each
 	memcpy ( p , m_docIds , m_numDocIds * 8 ); p += m_numDocIds * 8;
 	// store scores
-	memcpy ( p , m_scores , m_numDocIds * sizeof(rscore_t) );
-	p +=  m_numDocIds * sizeof(rscore_t) ;
+	memcpy ( p , m_scores , m_numDocIds * sizeof(double) );
+	p +=  m_numDocIds * sizeof(double) ;
 	// store cluster levels
 	memcpy ( p , m_clusterLevels , m_numDocIds ); p += m_numDocIds;
 	// sanity check
@ -1178,7 +1179,7 @@ long Msg3a::deserialize ( char *buf , char *bufEnd ) {
 	// get each docid, 8 bytes each
 	m_docIds = (long long *)p; p += m_numDocIds * 8;
 	// get scores
-	m_scores = (rscore_t *)p; p += m_numDocIds * sizeof(rscore_t) ;
+	m_scores = (double *)p; p += m_numDocIds * sizeof(double) ;
 	// get cluster levels
 	m_clusterLevels = (char *)p; p += m_numDocIds;
 	// sanity check
--- a/Msg3a.h
+++ b/Msg3a.h
@ -61,7 +61,7 @@ public:
 	// we basically turn the scores we get from each msg39 split into
 	// floats (rscore_t) and store them as floats so that PostQueryRerank
 	// has an easier time
-	float *getScores        ( ) { return m_scores;        };
+	double *getScores        ( ) { return m_scores;        };
 	long   getNumDocIds     ( ) { return m_numDocIds; };

 	long getSiteHash26 ( long i ) { 
@ -160,7 +160,7 @@ public:

 	// final merged lists go here
 	long long      *m_docIds        ;
-	float          *m_scores        ;
+	double         *m_scores        ;
 	class DocIdScore **m_scoreInfos ;
 	//key_t          *m_recs          ; // clusterdb recs
 	key_t          *m_clusterRecs   ;
--- a/PageCrawlBot.cpp
+++ b/PageCrawlBot.cpp
@ -3047,9 +3047,9 @@ bool printCrawlBotPage2 ( TcpSocket *socket ,
 			      "stream=1&" // stream results back as we get them
 			      "q="
 			      // put NEWEST on top
-			      "gbsortby%%3Agbspiderdate+"
+			      "gbsortbyint%%3Agbspiderdate+"
 			      // min spider date = now - 10 mins
-			      "gbmin%%3Agbspiderdate%%3A%li&"
+			      "gbminint%%3Agbspiderdate%%3A%li&"
 			      //"debug=1"
 			      "prepend=type%%3Ajson"
 			      ">"
--- a/PageResults.cpp
+++ b/PageResults.cpp
@ -2100,8 +2100,10 @@ bool printResult ( State0 *st, long ix ) {
 			sb->incrementLength(-1);
 			// crap, we lose resolution storing as a float
 			// so fix that shit here...
-			float f = mr->m_lastSpidered;
-			sb->safePrintf(",\"lastSpiderTimeUTC\":%.0f}",f);
+			//float f = mr->m_lastSpidered;
+			//sb->safePrintf(",\"lastCrawlTimeUTC\":%.0f}",f);
+			sb->safePrintf(",\"lastCrawlTimeUTC\":%li}",
+				       mr->m_lastSpidered);
 		}

 		//mr->size_content );
--- a/Posdb.cpp
+++ b/Posdb.cpp
@ -4118,11 +4118,16 @@ bool PosdbTable::setQueryTermInfo ( ) {

 	// assume not sorting by a numeric termlist
 	m_sortByTermNum = -1;
+	m_sortByTermNumInt = -1;

 	// now we have score ranges for gbmin:price:1.99 etc.
 	m_minScoreTermNum = -1;
 	m_maxScoreTermNum = -1;

+	// for gbminint:count:99 etc.
+	m_minScoreTermNumInt = -1;
+	m_maxScoreTermNumInt = -1;
+
 	//for ( long i = 0 ; i < m_msg2->getNumLists() ; i++ ) {
 	for ( long i = 0 ; i < m_q->m_numTerms ; i++ ) {
 		QueryTerm *qt = &m_q->m_qterms[i];
@ -4141,6 +4146,14 @@ bool PosdbTable::setQueryTermInfo ( ) {
 		if ( qt->m_fieldCode == FIELD_GBSORTBY ||
 		     qt->m_fieldCode == FIELD_GBREVSORTBY )
 			m_sortByTermNum = i;
+
+		if ( qt->m_fieldCode == FIELD_GBSORTBYINT ||
+		     qt->m_fieldCode == FIELD_GBREVSORTBYINT ) {
+			m_sortByTermNumInt = i;
+			// tell topTree to use int scores
+			m_topTree->m_useIntScores = true;
+		}
+
 		// is it gbmin:price:1.99?
 		if ( qt->m_fieldCode == FIELD_GBNUMBERMIN ) {
 			m_minScoreTermNum = i;
@ -4150,6 +4163,14 @@ bool PosdbTable::setQueryTermInfo ( ) {
 			m_maxScoreTermNum = i;
 			m_maxScoreVal = qt->m_qword->m_float;
 		}
+		if ( qt->m_fieldCode == FIELD_GBNUMBERMININT ) {
+			m_minScoreTermNumInt = i;
+			m_minScoreValInt = qt->m_qword->m_int;
+		}
+		if ( qt->m_fieldCode == FIELD_GBNUMBERMAXINT ) {
+			m_maxScoreTermNumInt = i;
+			m_maxScoreValInt = qt->m_qword->m_int;
+		}
 		// count
 		long nn = 0;
 		// also add in bigram lists
@ -4277,6 +4298,15 @@ bool PosdbTable::setQueryTermInfo ( ) {
 		if (qt->m_fieldCode == FIELD_GBNUMBERMAX )
 			qti->m_bigramFlags[nn]|=BF_NUMBER;

+		if (qt->m_fieldCode == FIELD_GBSORTBYINT )
+			qti->m_bigramFlags[nn]|=BF_NUMBER;
+		if (qt->m_fieldCode == FIELD_GBREVSORTBYINT )
+			qti->m_bigramFlags[nn]|=BF_NUMBER;
+		if (qt->m_fieldCode == FIELD_GBNUMBERMININT )
+			qti->m_bigramFlags[nn]|=BF_NUMBER;
+		if (qt->m_fieldCode == FIELD_GBNUMBERMAXINT )
+			qti->m_bigramFlags[nn]|=BF_NUMBER;
+
 		// only really add if useful
 		// no, because when inserting NEW (related) terms that are
 		// not currently in the document, this list may initially
@ -5295,6 +5325,7 @@ void PosdbTable::intersectLists10_r ( ) {
 	char siteRank =0;
 	char docLang =0;
 	float score;
+	long intScore;
 	float minScore;
 	float minPairScore;
 	float minSingleScore;
@ -5365,6 +5396,7 @@ void PosdbTable::intersectLists10_r ( ) {

 	// do not do it if we got a gbsortby: field
 	if ( m_sortByTermNum >= 0 ) nnn = 0;
+	if ( m_sortByTermNumInt >= 0 ) nnn = 0;

 	/*
 	// skip all this if getting score of just one docid on special
@ -5653,6 +5685,7 @@ void PosdbTable::intersectLists10_r ( ) {
 	pass0++;

 	if ( m_sortByTermNum >= 0 ) goto skipScoringFilter;
+	if ( m_sortByTermNumInt >= 0 ) goto skipScoringFilter;

 	// test why we are slow
 	//if ( (s_sss++ % 8) != 0 ) { docIdPtr += 6; fail0++; goto docIdLoop;}
@ -6493,11 +6526,18 @@ void PosdbTable::intersectLists10_r ( ) {
 		score = g_posdb.getFloat ( miniMergedList[m_sortByTermNum] );
 	}

+	if ( m_sortByTermNumInt >= 0 ) {
+		// no term?
+		if ( ! miniMergedList[m_sortByTermNumInt] ) goto advance;
+		intScore = g_posdb.getInt( miniMergedList[m_sortByTermNumInt]);
+	}
+
 	// skip docid if outside of range
 	if ( m_minScoreTermNum >= 0 ) {
 		// no term?
 		if ( ! miniMergedList[m_minScoreTermNum] ) goto advance;
-		float score2 = g_posdb.getFloat ( miniMergedList[m_minScoreTermNum] );
+		float score2 ;
+		score2= g_posdb.getFloat ( miniMergedList[m_minScoreTermNum] );
 		if ( score2 < m_minScoreVal ) goto advance;
 	}

@ -6505,10 +6545,29 @@ void PosdbTable::intersectLists10_r ( ) {
 	if ( m_maxScoreTermNum >= 0 ) {
 		// no term?
 		if ( ! miniMergedList[m_maxScoreTermNum] ) goto advance;
-		float score2 = g_posdb.getFloat ( miniMergedList[m_maxScoreTermNum] );
+		float score2 ;
+		score2= g_posdb.getFloat ( miniMergedList[m_maxScoreTermNum] );
 		if ( score2 > m_maxScoreVal ) goto advance;
 	}

+	// skip docid if outside of range
+	if ( m_minScoreTermNumInt >= 0 ) {
+		// no term?
+		if ( ! miniMergedList[m_minScoreTermNumInt] ) goto advance;
+		long score3;
+		score3=g_posdb.getInt(miniMergedList[m_minScoreTermNumInt]);
+		if ( score3 < m_minScoreValInt ) goto advance;
+	}
+
+	// skip docid if outside of range
+	if ( m_maxScoreTermNumInt >= 0 ) {
+		// no term?
+		if ( ! miniMergedList[m_maxScoreTermNumInt] ) goto advance;
+		long score3 ;
+		score3= g_posdb.getInt ( miniMergedList[m_maxScoreTermNumInt]);
+		if ( score3 > m_maxScoreValInt ) goto advance;
+	}
+

 	// . seoDebug hack so we can set "dcs"
 	// . we only come here if we actually made it into m_topTree
@ -6606,6 +6665,12 @@ void PosdbTable::intersectLists10_r ( ) {
 		// set the score and docid ptr
 		t->m_score = score;
 		t->m_docId = m_docId;
+		// use an integer score like lastSpidered timestamp?
+		if ( m_sortByTermNumInt >= 0 ) {
+			t->m_intScore = intScore;
+			t->m_score = 0.0;
+			if ( ! m_topTree->m_useIntScores){char *xx=NULL;*xx=0;}
+		}
 		// . this will not add if tree is full and it is less than the 
 		//   m_lowNode in score
 		// . if it does get added to a full tree, lowNode will be 
--- a/Posdb.h
+++ b/Posdb.h
@ -208,10 +208,16 @@ class Posdb {
 	void setFloat ( void *vkp , float f ) {
 		*(float *)(((char *)vkp) + 2) = f; };

+	void setInt ( void *vkp , long x ) {
+		*(long *)(((char *)vkp) + 2) = x; };
+
 	// and read the float as well
 	float getFloat ( void *vkp ) {
 		return *(float *)(((char *)vkp) + 2); };

+	long getInt ( void *vkp ) {
+		return *(long *)(((char *)vkp) + 2); };
+
 	void setAlignmentBit ( void *vkp , char val ) {
 		char *p = (char *)vkp;
 		if ( val ) p[1] = p[1] | 0x02;
@ -610,6 +616,7 @@ class PosdbTable {

 	// for gbsortby:item.price ...
 	long m_sortByTermNum;
+	long m_sortByTermNumInt;

 	// for gbmin:price:1.99
 	long m_minScoreTermNum;
@ -619,6 +626,14 @@ class PosdbTable {
 	float m_minScoreVal;
 	float m_maxScoreVal;

+	// for gbmin:count:99
+	long m_minScoreTermNumInt;
+	long m_maxScoreTermNumInt;
+
+	// for gbmin:count:99
+	long m_minScoreValInt;
+	long m_maxScoreValInt;
+

 	// the new intersection/scoring algo
 	void intersectLists10_r ( );	
--- a/Query.cpp
+++ b/Query.cpp
@ -2187,6 +2187,11 @@ bool Query::setQWords ( char boolFlag ,
 		if ( fieldCode == FIELD_GBNUMBERMAX )
 			ph = hash64 ("gbsortby", 8);

+		if ( fieldCode == FIELD_GBNUMBERMININT )
+			ph = hash64 ("gbsortbyint", 11);
+		if ( fieldCode == FIELD_GBNUMBERMAXINT )
+			ph = hash64 ("gbsortbyint", 11);
+
 		// ptr to field, if any

 		qw->m_fieldCode = fieldCode;
@ -2213,8 +2218,14 @@ bool Query::setQWords ( char boolFlag ,
 		     // gbmin:price:1.23
 		     fieldCode == FIELD_GBNUMBERMIN ||
 		     fieldCode == FIELD_GBNUMBERMAX ||
+
+		     fieldCode == FIELD_GBSORTBYINT ||
+		     fieldCode == FIELD_GBREVSORTBYINT ||
+		     fieldCode == FIELD_GBNUMBERMININT ||
+		     fieldCode == FIELD_GBNUMBERMAXINT ||
+
 		     fieldCode == FIELD_GBAD  ) {
-			// . find first space -- that terminates the field value
+			// . find 1st space -- that terminates the field value
 			// . make "end" point to the end of the entire query
 			char *end = 
 				(words.m_words[words.m_numWords-1] +
@ -2222,13 +2233,14 @@ bool Query::setQWords ( char boolFlag ,
 			// use this for gbmin:price:1.99 etc.
 			long firstColonLen = -1;
 			// "w" points to the first alnumword after the field,
-			// so for site:xyz.com "w" points to the 'x' and wlen would
-			// be 3 in that case sinze xyz is a word of 3 chars. so advance
+			// so for site:xyz.com "w" points to the 'x' and wlen 
+			// would be 3 in that case sinze xyz is a word of 3 
+			// chars. so advance
 			// wlen until we hit a space.
 			while ( w + wlen < end ) {
 				// stop at first white space
 				if ( is_wspace_utf8(w+wlen) ) break;
-				// in the case of gbmin:price:1.99 record first ':'
+				// in case of gbmin:price:1.99 record first ':'
 				if ( w[wlen]==':' ) firstColonLen = wlen;
 				wlen++;
 			}
@ -2238,21 +2250,28 @@ bool Query::setQWords ( char boolFlag ,
 			unsigned long long wid = hash64 ( w , wlen, 0LL );

 			// i've decided not to make 
-			// gbsortby:products.offerPrice gbmin:price:1.23 case insensitive
+			// gbsortby:products.offerPrice 
+			// gbmin:price:1.23 case insensitive
 			if ( fieldCode == FIELD_GBSORTBY ||
-			     fieldCode == FIELD_GBREVSORTBY )
+			     fieldCode == FIELD_GBREVSORTBY ||
+			     fieldCode == FIELD_GBSORTBYINT ||
+			     fieldCode == FIELD_GBREVSORTBYINT )
 				wid = hash64Lower_utf8 ( w , wlen , 0LL );

 			// gbmin:price:1.23
 			if ( firstColonLen>0 &&
 			     ( fieldCode == FIELD_GBNUMBERMIN ||
-			       fieldCode == FIELD_GBNUMBERMAX ) ) {
+			       fieldCode == FIELD_GBNUMBERMAX ||
+			       fieldCode == FIELD_GBNUMBERMAXINT ||
+			       fieldCode == FIELD_GBNUMBERMAXINT ) ) {
 				// record the field
-				wid = hash64Lower_utf8 ( w , firstColonLen , 0LL );
+				wid = hash64Lower_utf8(w,firstColonLen , 0LL );
 				// and also the floating point after that
 				qw->m_float = atof ( w + firstColonLen + 1 );
+				qw->m_int = (long)atoll( w + firstColonLen+1);
 			}

+
 			// should we have normalized before hashing?
 			if ( fieldCode == FIELD_URL ||
 			     fieldCode == FIELD_GBPARENTURL ||
@ -3078,9 +3097,12 @@ struct QueryField g_fields[] = {
 	{"gbgigabitvector", FIELD_GBGIGABITVECTOR, false,""},
 	{"gbsamplevector", FIELD_GBSAMPLEVECTOR, false,""},
 	{"gbcontenthash", FIELD_GBCONTENTHASH, false,""},
-	{"gbsortby", FIELD_GBSORTBY, false,"Example: gbsortby:price. Fields can be "
+
+	{"gbsortby", FIELD_GBSORTBY, false,
+	 "Example: gbsortby:price. Fields can be "
 	 "in JSON or in meta tag."},
-	{"gbrevsortby", FIELD_GBREVSORTBY, false,"Example: gbrevsortby:item.price . "
+	{"gbrevsortby", FIELD_GBREVSORTBY, false,
+	 "Example: gbrevsortby:item.price . "
 	 "Fields can be in JSON or in meta tag."},

 	// gbmin:price:1.23
@ -3088,6 +3110,20 @@ struct QueryField g_fields[] = {
 	 "fields can be in JSON or in meta tag."},
 	{"gbmax", FIELD_GBNUMBERMAX, false,"Usage: gbmax:price:1.99"},

+
+	{"gbsortbyint", FIELD_GBSORTBYINT, false,
+	 "Example: gbsortbyint:intfield . Fields can be "
+	 "in JSON or in meta tag."},
+	{"gbrevsortbyint", FIELD_GBREVSORTBYINT, false,
+	 "Example: gbrevsortbyint:item.count . "
+	 "Fields can be in JSON or in meta tag."},
+	{"gbminint", FIELD_GBNUMBERMININT, false,
+	 "Usage: gbminint:count:99 . Numeric "
+	 "fields can be in JSON or in meta tag."},
+	{"gbmaxint", FIELD_GBNUMBERMAXINT, false,
+	 "Usage: gbmaxint:count:99"},
+
+
 	{"gbcountry",FIELD_GBCOUNTRY,false,""},
 	{"gbad",FIELD_GBAD,false,""},

@ -3108,7 +3144,9 @@ struct QueryField g_fields[] = {

 	{"gbpermalink",FIELD_GBPERMALINK,false,""},
 	//{"gbcsenum",FIELD_GBCSENUM,false,""},
-	{"gbparenturl", FIELD_GBPARENTURL, true,"Match the json urls that were extract from this parent url. Example: gbparenturl:www.gigablast.com/addurl.htm"},
+	{"gbparenturl", FIELD_GBPARENTURL, true,"Match the json urls that "
+	 "were extract from this parent url. Example: "
+	 "gbparenturl:www.gigablast.com/addurl.htm"},
 	{"gbdocid",FIELD_GBDOCID,false,"restrict results to this docid"}
 	
 };
--- a/Query.h
+++ b/Query.h
@ -110,6 +110,12 @@ typedef unsigned long long qvec_t;
 #define FIELD_GBNUMBERMAX      57
 #define FIELD_GBPARENTURL      58

+#define FIELD_GBSORTBYINT      59
+#define FIELD_GBREVSORTBYINT   60
+#define FIELD_GBNUMBERMININT   61
+#define FIELD_GBNUMBERMAXINT   62
+
+
 #define FIELD_GBOTHER 92

 // returns a FIELD_* code above, or FIELD_GENERIC if not in the list
@ -365,6 +371,8 @@ class QueryWord {

 	// for min/max score ranges like gbmin:price:1.99
 	float m_float;
+	// for gbminint:99 etc. uses integers instead of floats for better res
+	long  m_int;
 };

 // . we filter the QueryWords and turn them into QueryTerms
--- a/SafeBuf.cpp
+++ b/SafeBuf.cpp
@ -220,6 +220,15 @@ bool SafeBuf::pushFloat ( float i) {
 	return true;
 }

+bool SafeBuf::pushDouble ( double i) {
+	if ( m_length + (long)sizeof(double) > m_capacity ) 
+		if(!reserve(sizeof(double)))
+			return false;
+	*(double *)(m_buf+m_length) = i;
+	m_length += sizeof(double);
+	return true;
+}
+
 long SafeBuf::popLong ( ) {
 	if ( m_length < 4 ) { char *xx=NULL;*xx=0; }
 	long ret = *(long *)(m_buf+m_length-4);
--- a/SafeBuf.h
+++ b/SafeBuf.h
@ -306,6 +306,7 @@ struct SafeBuf {
 	bool  pushLong (long i);
 	bool  pushLongLong (long long i);
 	bool  pushFloat (float i);
+	bool  pushDouble (double i);
 	long  popLong();
 	float popFloat();

--- a/TopTree.cpp
+++ b/TopTree.cpp
@ -36,6 +36,7 @@ TopTree::~TopTree() { reset(); }
 void TopTree::reset ( ) {
 	if ( m_nodes ) mfree(m_nodes,m_allocSize,"TopTree");
 	m_nodes = NULL;
+	m_useIntScores = false;
 	//m_sampleVectors  = NULL;
 	m_numNodes = 0;
 	m_numUsedNodes = 0;
@ -200,9 +201,18 @@ bool TopTree::addNode ( TopNode *t , long tnn ) {
 	if ( m_vcount >= m_docsWanted ) {
 		long i = m_lowNode;

-		if ( t->m_score < m_nodes[i].m_score ) {
-			m_kickedOutDocIds = true; return false; }
-		if ( t->m_score > m_nodes[i].m_score ) goto addIt;
+		if ( m_useIntScores ) {
+			if ( t->m_intScore < m_nodes[i].m_intScore ) {
+				m_kickedOutDocIds = true; return false; }
+			if ( t->m_intScore > m_nodes[i].m_intScore) goto addIt;
+		}
+
+		else {
+			if ( t->m_score < m_nodes[i].m_score ) {
+				m_kickedOutDocIds = true; return false; }
+			if ( t->m_score > m_nodes[i].m_score ) goto addIt;
+		}
+
 		// . finally, compare docids, store lower ones first
 		// . docids should not tie...
 		if ( t->m_docId >= m_nodes[i].m_docId ) {
@ -243,11 +253,23 @@ bool TopTree::addNode ( TopNode *t , long tnn ) {
 	// . if a node exists with our key then do NOT replace it
 	else while ( i >= 0 ) {
 		iparent = i;
+
 		// . compare to the ith node
-		if ( t->m_score < m_nodes[i].m_score ) {
-			i = LEFT(i); dir = 0; continue; }
-		if ( t->m_score > m_nodes[i].m_score ) {
-			i = RIGHT(i); dir = 1; continue; }
+		if ( m_useIntScores ) {
+			if ( t->m_intScore < m_nodes[i].m_intScore ) {
+				i = LEFT(i); dir = 0; continue; }
+			if ( t->m_intScore > m_nodes[i].m_intScore ) {
+				i = RIGHT(i); dir = 1; continue; }
+
+		}
+		else {
+			if ( t->m_score < m_nodes[i].m_score ) {
+				i = LEFT(i); dir = 0; continue; }
+			if ( t->m_score > m_nodes[i].m_score ) {
+				i = RIGHT(i); dir = 1; continue; }
+		}
+
+
 		// . finally, compare docids, store lower ones first
 		// . docids should not tie...
 		if ( t->m_docId > m_nodes[i].m_docId ) {
@ -293,7 +315,13 @@ bool TopTree::addNode ( TopNode *t , long tnn ) {
 	// . WARNING: if t->m_score is fractional, the fraction will be
 	//   dropped and could result in the lower scoring of the two docids
 	//   being kept.
-	uint32_t cs = ((uint32_t)t->m_score);
+	uint32_t cs ;
+
+	if ( m_useIntScores )
+		cs = (uint32_t) t->m_intScore;
+	else
+		cs = ((uint32_t)t->m_score);
+
 	key_t k;
 	k.n1  =  domHash                 << 24; // 1 byte domHash
 	//k.n1 |= (t->m_bscore & ~0xc0)    << 16; // 1 byte bscore
@ -421,7 +449,13 @@ bool TopTree::addNode ( TopNode *t , long tnn ) {
 		// WARNING: if t->m_score is fractional, the fraction will be
 		// dropped and could result in the lower scoring of the two 
 		// docids being kept.
-		uint32_t cs = ((uint32_t)t->m_score);
+		uint32_t cs ;
+
+		if ( m_useIntScores )
+			cs = (uint32_t) t->m_intScore;
+		else
+			cs = ((uint32_t)t->m_score);
+
 		k.n1  =  domHash2                << 24; // 1 byte domHash
 		//k.n1 |= (t->m_bscore & ~0xc0)    << 16; // 1 byte bscore
 		k.n1 |=  cs                      >> 16; // 4 byte score
--- a/TopTree.h
+++ b/TopTree.h
@ -30,6 +30,10 @@ class TopNode {
 	//unsigned char  m_tier     ;
 	float          m_score    ;
 	long long      m_docId;
+
+	// option for using int scores
+	long m_intScore;
+	
 	// clustering info
 	//long           m_kid      ; // result from our same site below us
 	//unsigned long  m_siteHash ;
@ -124,6 +128,7 @@ class TopTree {
 	long  m_cap     ;
 	float m_partial ;
 	bool  m_doSiteClustering;
+	bool  m_useIntScores;
 	long  m_docsWanted;
 	long  m_ridiculousMax;
 	char  m_kickedOutDocIds;
--- a/XmlDoc.cpp
+++ b/XmlDoc.cpp
@ -29871,6 +29871,23 @@ bool XmlDoc::hashNumber ( char *beginBuf ,
 	if ( ! hashNumber2 ( f , hi , "gbrevsortby" ) )
 		return false;

+	//
+	// also hash as an int, 4 byte-integer so our lastSpidered timestamps
+	// dont lose 128 seconds of resolution
+	//
+
+	long i = (long) atoll2 ( p , bufEnd - p );
+
+	if ( ! hashNumber3 ( i , hi , "gbsortbyint" ) )
+		return false;
+
+	// also hash in reverse order for sorting from low to high
+	i = -1 * i;
+
+	if ( ! hashNumber3 ( i , hi , "gbrevsortbyint" ) )
+		return false;
+
+
 	return true;
 }

@ -29979,6 +29996,113 @@ bool XmlDoc::hashNumber2 ( float f , HashInfo *hi , char *sortByStr ) {
 	return true;
 }

+bool XmlDoc::hashNumber3 ( long n , HashInfo *hi , char *sortByStr ) {
+
+	// prefix is something like price. like the meta "name" or
+	// the json name with dots in it like "product.info.price" or something
+	long long nameHash = 0LL;
+	long nameLen = 0;
+	if ( hi->m_prefix ) nameLen = gbstrlen ( hi->m_prefix );
+	if ( hi->m_prefix && nameLen ) 
+		nameHash = hash64Lower_utf8 ( hi->m_prefix , nameLen );
+	// need a prefix for hashing numbers... for now
+	else { char *xx=NULL; *xx=0; }
+		
+	// combine prefix hash with a special hash to make it unique to avoid
+	// collisions. this is the "TRUE" prefix.
+	long long truePrefix64 = hash64n ( sortByStr ); // "gbsortby");
+	// hash with the "TRUE" prefix
+	long long ph2 = hash64 ( nameHash , truePrefix64 );
+
+	// . now store it
+	// . use field hash as the termid. normally this would just be
+	//   a prefix hash
+	// . use mostly fake value otherwise
+	key144_t k;
+	g_posdb.makeKey ( &k ,
+			  ph2 ,
+			  0,//docid
+			  0,// word pos #
+			  0,// densityRank , // 0-15
+			  0 , // MAXDIVERSITYRANK
+			  0 , // wordSpamRank ,
+			  0 , //siterank
+			  0 , // hashGroup,
+			  // we set to docLang final hash loop
+			  //langUnknown, // langid
+			  // unless already set. so set to english here
+			  // so it will not be set to something else
+			  // otherwise our floats would be ordered by langid!
+			  // somehow we have to indicate that this is a float
+			  // termlist so it will not be mangled any more.
+			  //langEnglish,
+			  langUnknown,
+			  0 , // multiplier
+			  false, // syn?
+			  false , // delkey?
+			  hi->m_shardByTermId );
+
+	//long long final = hash64n("products.offerprice",0);
+	//long long prefix = hash64n("gbsortby",0);
+	//long long h64 = hash64 ( final , prefix);
+	//if ( ph2 == h64 )
+	//	log("hey: got offer price");
+
+	// now set the float in that key
+	//g_posdb.setFloat ( &k , f );
+	g_posdb.setInt ( &k , n );
+
+	// HACK: this bit is ALWAYS set by Posdb::makeKey() to 1
+	// so that we can b-step into a posdb list and make sure
+	// we are aligned on a 6 byte or 12 byte key, since they come
+	// in both sizes. but for this, hack it off to tell
+	// addTable144() that we are a special posdb key, a "numeric"
+	// key that has a float stored in it. then it will NOT
+	// set the siterank and langid bits which throw our sorting
+	// off!!
+	g_posdb.setAlignmentBit ( &k , 0 );
+
+	// sanity
+	//float t = g_posdb.getFloat ( &k );
+	long x = g_posdb.getInt ( &k );
+	if ( x != n ) { char *xx=NULL;*xx=0; }
+
+	HashTableX *dt = hi->m_tt;
+
+	// the key may indeed collide, but that's ok for this application
+	if ( ! dt->addTerm144 ( &k ) ) 
+		return false;
+
+	if ( ! m_wts ) 
+		return true;
+
+	// store in buffer
+	char buf[128];
+	long bufLen = sprintf(buf,"%li",n);
+
+	// add to wts for PageParser.cpp display
+	// store it
+	if ( ! storeTerm ( buf,
+			   bufLen,
+			   truePrefix64,
+			   hi,
+			   0, // word#, i,
+			   0, // wordPos
+			   0,// densityRank , // 0-15
+			   0, // MAXDIVERSITYRANK,//phrase
+			   0, // ws,
+			   0, // hashGroup,
+			   //true,
+			   &m_wbuf,
+			   m_wts,
+			   // a hack for display in wts:
+			   SOURCE_NUMBER, // SOURCE_BIGRAM, // synsrc
+			   langUnknown ) )
+		return false;
+
+	return true;
+}
+
 // . many many websites got hijacked pages in them...
 // . revkim.org/mcdrt/mgntf/sata/sata.htm 
 // . collegefootballweekly.net/hswsj/riime/sata/sata.htm 
--- a/XmlDoc.h
+++ b/XmlDoc.h
@ -863,6 +863,10 @@ class XmlDoc {
 			   class HashInfo *hi ,
 			   char *gbsortByStr ) ;

+	bool hashNumber3 ( long x,
+			   class HashInfo *hi ,
+			   char *gbsortByStr ) ;
+
 	// print out for PageTitledb.cpp and PageParser.cpp
 	bool printDoc ( class SafeBuf *pbuf );
 	bool printMenu ( class SafeBuf *pbuf );