#include "gb-include.h" #include "Pos.h" #include "Sections.h" Pos::Pos() { m_buf = NULL; m_needsFree = false; } Pos::~Pos () { reset(); } void Pos::reset() { if ( m_buf && m_needsFree ) mfree ( m_buf , m_bufSize , "Pos" ); m_buf = NULL; } // . the interval is half-open [a,b) // . do not print out any alnum word with negative score long Pos::filter( char *p, char *pend, class Words *words, long a, long b, Sections *sections ) { long plen = 0; set ( words , sections , p , pend, &plen , a , b ); return plen; } // . set the filtered position of each word // . used by Summary.cpp to determine how many chars are in the summary, // be those chars single byte or utf8 chars that are 4 bytes // . returns false and sets g_errno on error // . if f is non-NULL store filtered words into there. back to back spaces // are eliminated. bool Pos::set ( Words *words , Sections *sections , char *f , char *fend, long *len , long a , long b , char *buf , long bufSize ) { // free m_buf in case this is a second call if ( ! f ) reset(); long nw = words->getNumWords(); long *wlens = words->m_wordLens; nodeid_t *tids = words->getTagIds(); // m_tagIds; char **wp = words->m_words; //long *ss = NULL; //long long *wids = words->m_wordIds; //if ( scores ) ss = scores->m_scores; // save start point for filtering char *fstart = f; // -1 is the default value if ( b == -1 ) b = nw; // alloc array if need to long need = (nw+1) * 4; // do not destroy m_pos/m_numWords if only filtering into a buffer if ( f ) goto skip; m_needsFree = false; m_buf = m_localBuf; if ( need > POS_LOCALBUFSIZE && need < bufSize ) m_buf = buf; else if ( need > POS_LOCALBUFSIZE ) { m_buf = (char *)mmalloc(need,"Pos"); m_needsFree = true; } // bail on error if ( ! m_buf ) return false; m_bufSize = need; m_pos = (long *)m_buf; m_numWords = nw; skip: // this is the CHARACTER count. long pos = 0; bool trunc = false; char *p , *pend; //char *nextp; //long skip; char* lastBreak = NULL; // utf8 char //long c; // its size in bytes //char cs; // shortcut //Section **sp = NULL; //if ( sections ) sp = sections->m_sectionPtrs; //long badFlags = SEC_SCRIPT|SEC_STYLE|SEC_SELECT|SEC_MARQUEE; // flag for stopping back-to-back spaces. only count those as one char. bool lastSpace = false; long maxCharSize = 4; // we are utf8 for ( long i = a ; i < b ; i++ ) { if (trunc) break; // set pos for the ith word to "pos" if ( ! f ) m_pos[i] = pos; // if inside a bad tag, skip it //if ( sp && (sp[i]->m_flags & badFlags) ) continue; // is tag? if ( tids && tids[i] ) { // if not breaking, does nothing if ( ! g_nodes[tids[i]&0x7f].m_isBreaking ) continue; // list tag?