#include "gb-include.h" #include "Pos.h" #include "Sections.h" Pos::Pos() { m_buf = NULL; m_needsFree = false; } Pos::~Pos () { reset(); } void Pos::reset() { if ( m_buf && m_needsFree ) mfree ( m_buf , m_bufSize , "Pos" ); m_buf = NULL; } // . the interval is half-open [a,b) // . do not print out any alnum word with negative score int32_t Pos::filter( char *p, char *pend, class Words *words, int32_t a, int32_t b, Sections *sections ) { int32_t plen = 0; set ( words , sections , p , pend, &plen , a , b ); return plen; } // . set the filtered position of each word // . used by Summary.cpp to determine how many chars are in the summary, // be those chars single byte or utf8 chars that are 4 bytes // . returns false and sets g_errno on error // . if f is non-NULL store filtered words into there. back to back spaces // are eliminated. bool Pos::set ( Words *words , Sections *sections , char *f , char *fend, int32_t *len , int32_t a , int32_t b , char *buf , int32_t bufSize ) { // free m_buf in case this is a second call if ( ! f ) reset(); int32_t nw = words->getNumWords(); int32_t *wlens = words->m_wordLens; nodeid_t *tids = words->getTagIds(); // m_tagIds; char **wp = words->m_words; //int32_t *ss = NULL; //int64_t *wids = words->m_wordIds; //if ( scores ) ss = scores->m_scores; // save start point for filtering char *fstart = f; // -1 is the default value if ( b == -1 ) b = nw; // alloc array if need to int32_t need = (nw+1) * 4; // do not destroy m_pos/m_numWords if only filtering into a buffer if ( f ) goto skip; m_needsFree = false; m_buf = m_localBuf; if ( need > POS_LOCALBUFSIZE && need < bufSize ) m_buf = buf; else if ( need > POS_LOCALBUFSIZE ) { m_buf = (char *)mmalloc(need,"Pos"); m_needsFree = true; } // bail on error if ( ! m_buf ) return false; m_bufSize = need; m_pos = (int32_t *)m_buf; m_numWords = nw; skip: // this is the CHARACTER count. int32_t pos = 0; bool trunc = false; char *p , *pend; //char *nextp; //int32_t skip; char* lastBreak = NULL; // utf8 char //int32_t c; // its size in bytes //char cs; // int16_tcut //Section **sp = NULL; //if ( sections ) sp = sections->m_sectionPtrs; //int32_t badFlags = SEC_SCRIPT|SEC_STYLE|SEC_SELECT|SEC_MARQUEE; // flag for stopping back-to-back spaces. only count those as one char. bool lastSpace = false; int32_t maxCharSize = 4; // we are utf8 for ( int32_t i = a ; i < b ; i++ ) { if (trunc) break; // set pos for the ith word to "pos" if ( ! f ) m_pos[i] = pos; // if inside a bad tag, skip it //if ( sp && (sp[i]->m_flags & badFlags) ) continue; // is tag? if ( tids && tids[i] ) { // if not breaking, does nothing if ( ! g_nodes[tids[i]&0x7f].m_isBreaking ) continue; // list tag?