open-source-search-engine/Highlight.h

87 lines
2.3 KiB
C
Raw Normal View History

2013-08-03 00:12:24 +04:00
// Matt Wells, copyright Jul 2001
// . highlights the terms in Query "q" in "xml" and puts results in m_buf
#include "Words.h"
#include "Query.h"
#include "Matches.h"
#include "Xml.h"
#include "Url.h"
#ifndef _HIGHLIGHT_H_
#define _HIGHLIGHT_H_
class Highlight {
public:
// . content is an html/xml doc
// . we highlight Query "q" in "xml" as best as we can
// . store highlighted text into "buf"
// . return length stored into "buf"
2014-11-11 01:45:11 +03:00
int32_t set ( //char *buf ,
//int32_t bufLen ,
SafeBuf *sb,
2013-08-03 00:12:24 +04:00
char *content ,
2014-11-11 01:45:11 +03:00
int32_t contentLen ,
2013-08-03 00:12:24 +04:00
char docLangId ,
Query *q ,
bool doStemming ,
bool useAnchors , // = false ,
const char *baseUrl , // = NULL ,
const char *frontTag , // = NULL ,
const char *backTag , // = NULL ,
2014-11-11 01:45:11 +03:00
int32_t fieldCode , // = 0 ,
int32_t niceness ) ;
2013-08-03 00:12:24 +04:00
2014-11-11 01:45:11 +03:00
int32_t set ( //char *buf ,
//int32_t bufLen ,
SafeBuf *sb ,
2013-08-03 00:12:24 +04:00
Words *words ,
Matches *matches ,
bool doStemming ,
bool useAnchors = false ,
const char *baseUrl = NULL ,
const char *frontTag = NULL ,
const char *backTag = NULL ,
2014-11-11 01:45:11 +03:00
int32_t fieldCode = 0 ,
2013-08-03 00:12:24 +04:00
Query *q = NULL ) ;
2014-11-11 01:45:11 +03:00
int32_t getNumMatches() { return m_numMatches; }
2013-08-03 00:12:24 +04:00
private:
bool highlightWords ( Words *words , Matches *m , Query *q=NULL );
// null terminate and store the highlighted content in m_buf
//char *m_buf ;
2014-11-11 01:45:11 +03:00
//int32_t m_bufLen;
//char *m_bufPtr;
//char *m_bufEnd;
class SafeBuf *m_sb;
2013-08-03 00:12:24 +04:00
//Words m_words;
Matches m_matches;
//Xml *m_xml;
const char *m_frontTag;
const char *m_backTag;
2014-11-11 01:45:11 +03:00
int32_t m_frontTagLen;
int32_t m_backTagLen;
2013-08-03 00:12:24 +04:00
bool m_doStemming;
bool m_useAnchors; // click and scroll technology for cached pages
2014-11-11 01:45:11 +03:00
int32_t m_anchorCounts [ MAX_QUERY_TERMS ];
2013-08-03 00:12:24 +04:00
const char *m_baseUrl;
2014-11-11 01:45:11 +03:00
int32_t m_numMatches;
2013-08-03 00:12:24 +04:00
// so we don't repeat the same buf overflow error msg a gazillion times
bool m_didErrMsg;
// . field code of the text excerpt to highlight
// . only query terms with this fieldCode will be highlighted
2014-11-11 01:45:11 +03:00
int32_t m_fieldCode;
2013-08-03 00:12:24 +04:00
};
#endif