open-source-search-engine/StopWords.h

44 lines
1.2 KiB
C
Raw Normal View History

2013-08-03 00:12:24 +04:00
// Matt Wells, copyright Jul 2001
#ifndef _STOPWORDS_H_
#define _STOPWORDS_H_
#include "Unicode.h"
// . this returns true if h is the hash of an ENGLISH stop word
// . list taken from www.superjournal.ac.uk/sj/application/demo/stopword.htm
// . stop words with "mdw" next to them are ones I added
2014-11-11 01:45:11 +03:00
bool isStopWord ( char *s , int32_t len , int64_t h ) ;
2013-08-03 00:12:24 +04:00
// used by Synonyms.cpp
2014-10-30 22:36:39 +03:00
bool isStopWord2 ( int64_t *h ) ;
2013-08-03 00:12:24 +04:00
2014-11-11 01:45:11 +03:00
bool isStopWord32 ( int32_t h ) ;
2013-08-03 00:12:24 +04:00
//just a stub for now
2014-11-11 01:45:11 +03:00
//bool isStopWord ( UChar *s , int32_t len , int64_t h );
2013-08-03 00:12:24 +04:00
// . damn i forgot to include these above
// . i need these so m_bitScores in IndexTable.cpp doesn't have to require
// them! Otherwise, it's like all queries have quotes around them again...
2014-11-11 01:45:11 +03:00
bool isQueryStopWord ( char *s , int32_t len , int64_t h ) ;
//bool isQueryStopWord ( UChar *s , int32_t len , int64_t h ) ;
2013-08-03 00:12:24 +04:00
// is it a COMMON word?
2014-11-11 01:45:11 +03:00
int32_t isCommonWord ( int64_t h ) ;
2013-08-03 00:12:24 +04:00
2014-11-11 01:45:11 +03:00
int32_t isCommonQueryWordInEnglish ( int64_t h ) ;
2013-08-03 00:12:24 +04:00
2014-11-11 01:45:11 +03:00
bool initWordTable(class HashTableX *table, char* words[], int32_t size ,
2013-08-03 00:12:24 +04:00
char *label);
2014-10-30 22:36:39 +03:00
bool isVerb ( int64_t *hp ) ;
2013-08-03 00:12:24 +04:00
// for Process.cpp::resetAll() to call when exiting to free all mem
void resetStopWordTables();
extern HashTableX s_table32;
#endif