open-source-search-engine/Language.h


#ifndef _LANGUAGE_H_
#define _LANGUAGE_H_
//#include <wchar.h>
#include "gb-include.h"
//#include "UnicodeProperties.h" //UChar32
#include "File.h"
#include "HashTableT.h"
#include "Query.h"
#include "Lang.h"
#include "Multicast.h"
#include "Threads.h"
#include "Titledb.h"
#include "Iso8859.h"
#include "IndexList.h"
//#include "Msg3a.h"

#include "Msg20.h"
#include "Msg37.h"

// max chars in any language
#define MAX_WORDS_PER_PHRASE 5
#define MAX_CHARS 256
#define TOP_POP_PHRASES 40 * 1024
#define NUM_CHARS 40
#define MAX_FRAG_SIZE 1024
// max chars that start the rule

#define MAX_PHRASE_LEN 80
#define MAX_RECOMMENDATIONS 10
#define LARGE_SCORE 0xfffff
#define MAX_NARROW_SEARCHES 19

/*
// used only while generating titles from wikipedia pages, makeWikiFiles()
class StateWik {
public:
	bool getIndexList(  );
	bool getSummary (  );
	bool gotSummary (  );

	int       m_fdw;
	Msg0      m_msg0;
	IndexList m_list;
	Query     m_q;
	key_t     m_startKey;
	key_t     m_endKey;
	char     *m_coll;
	int32_t      m_collLen;
	int64_t m_termId;
	int32_t      m_minRecSize;
	Msg20     m_msg20s[MAX_FRAG_SIZE];
	int32_t      m_numMsg20sOutstanding;
	int32_t      m_numMsg20sLaunched;
	int32_t      m_numMsg20sReceived;
};

class StateDict{
 public:
	char      *m_dictBuf;
	int32_t       m_dictBufSize;
	char      *m_buf;
	int32_t       m_bufSize;
	char     **m_wordsPtr;
	int64_t *m_termIds;
	int64_t *m_termFreqs;
	int32_t       m_numTuples;
	Msg37      m_msg37;
};
*/

/*class StateAff{
 public:
	bool openAffinityFile ( );
	bool launchAffinity ( );
	bool gotAffinityFreqs1 ( );
	bool gotAffinityFreqs2 ( );
	bool doneAffinities ( );

	FILE      *m_fdr;
	int        m_fdw;
	int32_t       m_fileNum;
	char       m_buf[1026];
	Msg3a      m_msg3a;
	Query      m_q;
	int64_t  m_numerator;
	int64_t  m_denominator;
	};*/

typedef struct Reco{
	char reco[MAX_PHRASE_LEN];
	int32_t score;
}Reco;

class Language {

 public:

	Language();
	~Language();

	void reset();

	bool init( char *unifiedBuf, int32_t unifiedBufSize, int32_t lang, 
		   int32_t hostsPerSplit, uint32_t myHash );

	void setLang( int32_t lang ) { m_lang = lang; };
	
	//bool makeAffinities();

	//int32_t getPhrasePopularity ( char *s, uint64_t h,
	//		       bool checkTitleRecDict );

	bool checkDict(char *s, int32_t slen, char encodeType);

	bool getRecommendation( char *origWord, int32_t origWordLen,
				char *recommendation, int32_t recommendationLen,
				bool *found, int32_t *score, int32_t *popularity, 
				bool  forceReco = false );

	//int32_t narrowPhrase ( char *request, char *phrases, int32_t *pops, 
	//		    int32_t maxPhrases );

	//bool generateDicts ( int32_t numWordsToDump , char *coll );

	//bool convertLatin1DictToUTF8 ( char *infile );

	// needed for makeDict
	//bool       gotTermFreqs( StateDict *st );
	//StateDict *m_stateDict;

	// hash table of the dictionary
	HashTableT <uint64_t, int32_t>m_dict;

 private:
	int32_t spellcheckDict();

	// always accepts only ascii chars. makeClean() converts unicode into
	// ascii
	bool getPhonetic( char *origWord, int32_t origWordLen,
			  char *target, int32_t targetLen );

	bool loadRules();

	bool loadSpellerDict( char *spellerBuf, int32_t spellerbufSize,
			      int32_t hostsPerSplit, uint32_t myHash );

	//bool loadTitleRecDicts( );

	//bool loadNarrow( char *spellerBuf, int32_t spellerBufSize, 
	//		 int32_t hostsPerSplit, uint32_t myHash );

	bool loadDictHashTable( );

	//bool genTopPopFile ( char *infile );

	bool genDistributedPopFile ( char *infile, uint32_t myHash );
	
	//bool cleanDictFile ( );

	bool makeClean( char *inBuf, int32_t inBufSize,
			char *outBuf, int32_t outBufSize );//, bool isUTF16 );
	
	//bool makePhonet( char *infile);

	//bool makeDict();

	//bool makeQueryFiles ( );

	//bool makeWikiFiles ( );

	bool loadWikipediaWords();

	bool loadMispelledWords();
	
	bool hasMispelling(char *phrase, int32_t phraseLen);

	int32_t tryPhonet( char *phonetTmp, char *origPhonet,
			char *origClean, int32_t tryForScore,
			Reco *recos, int32_t numRecos, int32_t *lowestScore );

	int32_t editDistance( char *a, char *b, int32_t level, // starting level
			   int32_t limit ); // maximum level

	int32_t weightedAverage(int32_t soundslikeScore, int32_t wordScore);

	int32_t limitEditDistance( char *a, char *b, int32_t limit );

	int32_t limit1EditDistance( char *a, char *b );

	int32_t limit2EditDistance( char *a, char *b );

	int32_t checkRest( char *a, char *b, int32_t w, char *amax, int32_t min );

	int32_t check2( char *a, char *b, int32_t w, char *amax, int32_t min );

	int16_t editDistance( char *a0, char *b0 );

	int16_t reduceScore ( char *a, char *b );

	//bool makeWordFiles ( int32_t numWordsToDump , int32_t numWordsPerPhrase ,
	//		     char *coll );

	//bool makePopFiles ( int32_t numWordsToDump , int32_t numWordsPerPhrase ,
	//			    char *coll);

	//bool makeScoreFiles ( int32_t maxWordsPerFile );

	// this map maps a char to a "dict char"
	//unsigned char m_map [ 256 ];

	// . when comparing letter pairs, we only allow them to consist of
	//   certain chars: 0-9, A-Z, apostrophe and space and \0 otherwise
	//   m_table gets too big. This implies a NUM_CHARS of 
	// . this compressed the value, too
	// . \0, space, 0-9, A-Z, \'   is the ordering
	//unsigned char to_dict_char ( unsigned char c ) { return m_map[c]; };

	// Temporary unicode workaround for latin-1 compatibility
	//unsigned char uc_to_dict_char ( UChar c ) { 
	//	if (c>255)c=0;
	//	return m_map[c]; 
	//};

	// what language loaded
	int32_t  m_lang;

	// what charset does this language use
	unsigned char    m_charset;

	// buffer to store the phonetic rules
	char   *m_rulesBuf;
	int32_t    m_rulesBufSize;
	char  **m_rulesPtr;
	int32_t    m_rulesPtrSize;
	int32_t    m_numRules;
	// points to the index of each rule that starts with a new character
	int32_t    m_ruleStarts[MAX_CHARS];
	// the chars that are in a phonet
	bool    m_ruleChars[MAX_CHARS];

	// buffers to store the dictionaries
	char  *m_distributedBuf;
	int32_t   m_distributedBufSize;
	char **m_tuplePtr;
	int32_t   m_tuplePtrSize;
	int32_t   m_numTuples;

	// total number of phonets
	int32_t m_numPhonets;

	// narrow phrase
	char  *m_narrowBuf;
	int32_t   m_narrowBufSize;
	int32_t   m_numNarrowPtrs;
	char **m_frntPtrs;
	char **m_bckPtrs;
	int32_t  *m_frntCharPtrs;//[NUM_CHARS][NUM_CHARS][NUM_CHARS];
	int32_t  *m_bckCharPtrs;//[NUM_CHARS][NUM_CHARS][NUM_CHARS];

	// m_phonetics stores the hash of the phonetic as the key.
	// the value is a composite of index in m_tuplePtrs where the list
	// starts as the high 32 bits of the value and the number of 
	// words having the same phonetic as the low 32 bits of the value
	HashTableT <uint64_t, uint64_t > m_phonetics;

	// hash table of the distributed pop words dictionary
	//	HashTableT <uint32_t, int32_t> m_titlerecDict;

	// hash table of the distributed pop words dictionary
	HashTableT <uint64_t, int32_t>m_distributedPopPhrases;

	// hash table of the top popular words in the dictionary
	//	HashTableT <uint32_t, char *> m_topPopPhrases;

	// hash table of misspelled words
	HashTableT <uint32_t, bool>m_misp;

	// hash table of wikipedia words
	HashTableT <uint32_t, bool>m_wiki;

	// PARMS, which can be adjusted. Currently all languages have the 
	// same adjustments, so using the same parms.
	int32_t m_editDistanceWeightsDel1;
	int32_t m_editDistanceWeightsDel2;
	int32_t m_editDistanceWeightsSwap;
	int32_t m_editDistanceWeightsSub;
	int32_t m_editDistanceWeightsSimilar;
	int32_t m_editDistanceWeightsMin;
	int32_t m_editDistanceWeightsMax;
	int32_t m_soundslikeWeight;
	int32_t m_wordWeight;
	int32_t m_span;

	bool m_followup;
	bool m_collapseResult;
	bool m_removeAccents;
};

#endif
Initial file population. 2013-08-03 00:12:24 +04:00
			`#ifndef _LANGUAGE_H_`
			`#define _LANGUAGE_H_`
			`//#include <wchar.h>`
			`#include "gb-include.h"`
			`//#include "UnicodeProperties.h" //UChar32`
			`#include "File.h"`
			`#include "HashTableT.h"`
			`#include "Query.h"`
			`#include "Lang.h"`
			`#include "Multicast.h"`
			`#include "Threads.h"`
			`#include "Titledb.h"`
			`#include "Iso8859.h"`
			`#include "IndexList.h"`
			`//#include "Msg3a.h"`

			`#include "Msg20.h"`
			`#include "Msg37.h"`

			`// max chars in any language`
			`#define MAX_WORDS_PER_PHRASE 5`
			`#define MAX_CHARS 256`
			`#define TOP_POP_PHRASES 40 * 1024`
			`#define NUM_CHARS 40`
			`#define MAX_FRAG_SIZE 1024`
			`// max chars that start the rule`

			`#define MAX_PHRASE_LEN 80`
			`#define MAX_RECOMMENDATIONS 10`
			`#define LARGE_SCORE 0xfffff`
			`#define MAX_NARROW_SEARCHES 19`

			`/*`
			`// used only while generating titles from wikipedia pages, makeWikiFiles()`
			`class StateWik {`
			`public:`
			`bool getIndexList( );`
			`bool getSummary ( );`
			`bool gotSummary ( );`

			`int m_fdw;`
			`Msg0 m_msg0;`
			`IndexList m_list;`
			`Query m_q;`
			`key_t m_startKey;`
			`key_t m_endKey;`
			`char *m_coll;`
now it compiles with -m32 2014-11-11 01:45:11 +03:00			`int32_t m_collLen;`
replace long long with int64_t 2014-10-30 22:36:39 +03:00			`int64_t m_termId;`
now it compiles with -m32 2014-11-11 01:45:11 +03:00			`int32_t m_minRecSize;`
Initial file population. 2013-08-03 00:12:24 +04:00			`Msg20 m_msg20s[MAX_FRAG_SIZE];`
now it compiles with -m32 2014-11-11 01:45:11 +03:00			`int32_t m_numMsg20sOutstanding;`
			`int32_t m_numMsg20sLaunched;`
			`int32_t m_numMsg20sReceived;`
Initial file population. 2013-08-03 00:12:24 +04:00			`};`

			`class StateDict{`
			`public:`
			`char *m_dictBuf;`
now it compiles with -m32 2014-11-11 01:45:11 +03:00			`int32_t m_dictBufSize;`
Initial file population. 2013-08-03 00:12:24 +04:00			`char *m_buf;`
now it compiles with -m32 2014-11-11 01:45:11 +03:00			`int32_t m_bufSize;`
Initial file population. 2013-08-03 00:12:24 +04:00			`char **m_wordsPtr;`
replace long long with int64_t 2014-10-30 22:36:39 +03:00			`int64_t *m_termIds;`
			`int64_t *m_termFreqs;`
now it compiles with -m32 2014-11-11 01:45:11 +03:00			`int32_t m_numTuples;`
Initial file population. 2013-08-03 00:12:24 +04:00			`Msg37 m_msg37;`
			`};`
			`*/`

			`/*class StateAff{`
			`public:`
			`bool openAffinityFile ( );`
			`bool launchAffinity ( );`
			`bool gotAffinityFreqs1 ( );`
			`bool gotAffinityFreqs2 ( );`
			`bool doneAffinities ( );`

			`FILE *m_fdr;`
			`int m_fdw;`
now it compiles with -m32 2014-11-11 01:45:11 +03:00			`int32_t m_fileNum;`
Initial file population. 2013-08-03 00:12:24 +04:00			`char m_buf[1026];`
			`Msg3a m_msg3a;`
			`Query m_q;`
replace long long with int64_t 2014-10-30 22:36:39 +03:00			`int64_t m_numerator;`
			`int64_t m_denominator;`
Initial file population. 2013-08-03 00:12:24 +04:00			`};*/`

			`typedef struct Reco{`
			`char reco[MAX_PHRASE_LEN];`
now it compiles with -m32 2014-11-11 01:45:11 +03:00			`int32_t score;`
Initial file population. 2013-08-03 00:12:24 +04:00			`}Reco;`

			`class Language {`

			`public:`

			`Language();`
			`~Language();`

			`void reset();`

now it compiles with -m32 2014-11-11 01:45:11 +03:00			`bool init( char *unifiedBuf, int32_t unifiedBufSize, int32_t lang,`
			`int32_t hostsPerSplit, uint32_t myHash );`
Initial file population. 2013-08-03 00:12:24 +04:00
now it compiles with -m32 2014-11-11 01:45:11 +03:00			`void setLang( int32_t lang ) { m_lang = lang; };`
Initial file population. 2013-08-03 00:12:24 +04:00
			`//bool makeAffinities();`

now it compiles with -m32 2014-11-11 01:45:11 +03:00			`//int32_t getPhrasePopularity ( char *s, uint64_t h,`
Initial file population. 2013-08-03 00:12:24 +04:00			`// bool checkTitleRecDict );`

now it compiles with -m32 2014-11-11 01:45:11 +03:00			`bool checkDict(char *s, int32_t slen, char encodeType);`
Initial file population. 2013-08-03 00:12:24 +04:00
now it compiles with -m32 2014-11-11 01:45:11 +03:00			`bool getRecommendation( char *origWord, int32_t origWordLen,`
			`char *recommendation, int32_t recommendationLen,`
			`bool found, int32_t score, int32_t *popularity,`
Initial file population. 2013-08-03 00:12:24 +04:00			`bool forceReco = false );`

now it compiles with -m32 2014-11-11 01:45:11 +03:00			`//int32_t narrowPhrase ( char request, char phrases, int32_t *pops,`
			`// int32_t maxPhrases );`
Initial file population. 2013-08-03 00:12:24 +04:00
now it compiles with -m32 2014-11-11 01:45:11 +03:00			`//bool generateDicts ( int32_t numWordsToDump , char *coll );`
Initial file population. 2013-08-03 00:12:24 +04:00
			`//bool convertLatin1DictToUTF8 ( char *infile );`

			`// needed for makeDict`
			`//bool gotTermFreqs( StateDict *st );`
			`//StateDict *m_stateDict;`

			`// hash table of the dictionary`
now it compiles with -m32 2014-11-11 01:45:11 +03:00			`HashTableT <uint64_t, int32_t>m_dict;`
Initial file population. 2013-08-03 00:12:24 +04:00
			`private:`
now it compiles with -m32 2014-11-11 01:45:11 +03:00			`int32_t spellcheckDict();`
Initial file population. 2013-08-03 00:12:24 +04:00
			`// always accepts only ascii chars. makeClean() converts unicode into`
			`// ascii`
now it compiles with -m32 2014-11-11 01:45:11 +03:00			`bool getPhonetic( char *origWord, int32_t origWordLen,`
			`char *target, int32_t targetLen );`
Initial file population. 2013-08-03 00:12:24 +04:00
			`bool loadRules();`

now it compiles with -m32 2014-11-11 01:45:11 +03:00			`bool loadSpellerDict( char *spellerBuf, int32_t spellerbufSize,`
			`int32_t hostsPerSplit, uint32_t myHash );`
Initial file population. 2013-08-03 00:12:24 +04:00
			`//bool loadTitleRecDicts( );`

now it compiles with -m32 2014-11-11 01:45:11 +03:00			`//bool loadNarrow( char *spellerBuf, int32_t spellerBufSize,`
			`// int32_t hostsPerSplit, uint32_t myHash );`
Initial file population. 2013-08-03 00:12:24 +04:00
			`bool loadDictHashTable( );`

			`//bool genTopPopFile ( char *infile );`

now it compiles with -m32 2014-11-11 01:45:11 +03:00			`bool genDistributedPopFile ( char *infile, uint32_t myHash );`
Initial file population. 2013-08-03 00:12:24 +04:00
			`//bool cleanDictFile ( );`

now it compiles with -m32 2014-11-11 01:45:11 +03:00			`bool makeClean( char *inBuf, int32_t inBufSize,`
			`char *outBuf, int32_t outBufSize );//, bool isUTF16 );`
Initial file population. 2013-08-03 00:12:24 +04:00
			`//bool makePhonet( char *infile);`

			`//bool makeDict();`

			`//bool makeQueryFiles ( );`

			`//bool makeWikiFiles ( );`

			`bool loadWikipediaWords();`

			`bool loadMispelledWords();`

now it compiles with -m32 2014-11-11 01:45:11 +03:00			`bool hasMispelling(char *phrase, int32_t phraseLen);`
Initial file population. 2013-08-03 00:12:24 +04:00
now it compiles with -m32 2014-11-11 01:45:11 +03:00			`int32_t tryPhonet( char phonetTmp, char origPhonet,`
			`char *origClean, int32_t tryForScore,`
			`Reco recos, int32_t numRecos, int32_t lowestScore );`
Initial file population. 2013-08-03 00:12:24 +04:00
now it compiles with -m32 2014-11-11 01:45:11 +03:00			`int32_t editDistance( char a, char b, int32_t level, // starting level`
			`int32_t limit ); // maximum level`
Initial file population. 2013-08-03 00:12:24 +04:00
now it compiles with -m32 2014-11-11 01:45:11 +03:00			`int32_t weightedAverage(int32_t soundslikeScore, int32_t wordScore);`
Initial file population. 2013-08-03 00:12:24 +04:00
now it compiles with -m32 2014-11-11 01:45:11 +03:00			`int32_t limitEditDistance( char a, char b, int32_t limit );`
Initial file population. 2013-08-03 00:12:24 +04:00
now it compiles with -m32 2014-11-11 01:45:11 +03:00			`int32_t limit1EditDistance( char a, char b );`
Initial file population. 2013-08-03 00:12:24 +04:00
now it compiles with -m32 2014-11-11 01:45:11 +03:00			`int32_t limit2EditDistance( char a, char b );`
Initial file population. 2013-08-03 00:12:24 +04:00
now it compiles with -m32 2014-11-11 01:45:11 +03:00			`int32_t checkRest( char a, char b, int32_t w, char *amax, int32_t min );`
Initial file population. 2013-08-03 00:12:24 +04:00
now it compiles with -m32 2014-11-11 01:45:11 +03:00			`int32_t check2( char a, char b, int32_t w, char *amax, int32_t min );`
Initial file population. 2013-08-03 00:12:24 +04:00
now it compiles with -m32 2014-11-11 01:45:11 +03:00			`int16_t editDistance( char a0, char b0 );`
Initial file population. 2013-08-03 00:12:24 +04:00
now it compiles with -m32 2014-11-11 01:45:11 +03:00			`int16_t reduceScore ( char a, char b );`
Initial file population. 2013-08-03 00:12:24 +04:00
now it compiles with -m32 2014-11-11 01:45:11 +03:00			`//bool makeWordFiles ( int32_t numWordsToDump , int32_t numWordsPerPhrase ,`
Initial file population. 2013-08-03 00:12:24 +04:00			`// char *coll );`

now it compiles with -m32 2014-11-11 01:45:11 +03:00			`//bool makePopFiles ( int32_t numWordsToDump , int32_t numWordsPerPhrase ,`
Initial file population. 2013-08-03 00:12:24 +04:00			`// char *coll);`

now it compiles with -m32 2014-11-11 01:45:11 +03:00			`//bool makeScoreFiles ( int32_t maxWordsPerFile );`
Initial file population. 2013-08-03 00:12:24 +04:00
			`// this map maps a char to a "dict char"`
			`//unsigned char m_map [ 256 ];`

			`// . when comparing letter pairs, we only allow them to consist of`
			`// certain chars: 0-9, A-Z, apostrophe and space and \0 otherwise`
			`// m_table gets too big. This implies a NUM_CHARS of`
			`// . this compressed the value, too`
			`// . \0, space, 0-9, A-Z, \' is the ordering`
			`//unsigned char to_dict_char ( unsigned char c ) { return m_map[c]; };`

			`// Temporary unicode workaround for latin-1 compatibility`
			`//unsigned char uc_to_dict_char ( UChar c ) {`
			`// if (c>255)c=0;`
			`// return m_map[c];`
			`//};`

			`// what language loaded`
now it compiles with -m32 2014-11-11 01:45:11 +03:00			`int32_t m_lang;`
Initial file population. 2013-08-03 00:12:24 +04:00
			`// what charset does this language use`
			`unsigned char m_charset;`

			`// buffer to store the phonetic rules`
			`char *m_rulesBuf;`
now it compiles with -m32 2014-11-11 01:45:11 +03:00			`int32_t m_rulesBufSize;`
Initial file population. 2013-08-03 00:12:24 +04:00			`char **m_rulesPtr;`
now it compiles with -m32 2014-11-11 01:45:11 +03:00			`int32_t m_rulesPtrSize;`
			`int32_t m_numRules;`
Initial file population. 2013-08-03 00:12:24 +04:00			`// points to the index of each rule that starts with a new character`
now it compiles with -m32 2014-11-11 01:45:11 +03:00			`int32_t m_ruleStarts[MAX_CHARS];`
Initial file population. 2013-08-03 00:12:24 +04:00			`// the chars that are in a phonet`
			`bool m_ruleChars[MAX_CHARS];`

			`// buffers to store the dictionaries`
			`char *m_distributedBuf;`
now it compiles with -m32 2014-11-11 01:45:11 +03:00			`int32_t m_distributedBufSize;`
Initial file population. 2013-08-03 00:12:24 +04:00			`char **m_tuplePtr;`
now it compiles with -m32 2014-11-11 01:45:11 +03:00			`int32_t m_tuplePtrSize;`
			`int32_t m_numTuples;`
Initial file population. 2013-08-03 00:12:24 +04:00
			`// total number of phonets`
now it compiles with -m32 2014-11-11 01:45:11 +03:00			`int32_t m_numPhonets;`
Initial file population. 2013-08-03 00:12:24 +04:00
			`// narrow phrase`
			`char *m_narrowBuf;`
now it compiles with -m32 2014-11-11 01:45:11 +03:00			`int32_t m_narrowBufSize;`
			`int32_t m_numNarrowPtrs;`
Initial file population. 2013-08-03 00:12:24 +04:00			`char **m_frntPtrs;`
			`char **m_bckPtrs;`
now it compiles with -m32 2014-11-11 01:45:11 +03:00			`int32_t *m_frntCharPtrs;//[NUM_CHARS][NUM_CHARS][NUM_CHARS];`
			`int32_t *m_bckCharPtrs;//[NUM_CHARS][NUM_CHARS][NUM_CHARS];`
Initial file population. 2013-08-03 00:12:24 +04:00
			`// m_phonetics stores the hash of the phonetic as the key.`
			`// the value is a composite of index in m_tuplePtrs where the list`
			`// starts as the high 32 bits of the value and the number of`
			`// words having the same phonetic as the low 32 bits of the value`
replaced unsigned long long with uint64_t 2014-10-30 22:30:39 +03:00			`HashTableT <uint64_t, uint64_t > m_phonetics;`
Initial file population. 2013-08-03 00:12:24 +04:00
			`// hash table of the distributed pop words dictionary`
now it compiles with -m32 2014-11-11 01:45:11 +03:00			`// HashTableT <uint32_t, int32_t> m_titlerecDict;`
Initial file population. 2013-08-03 00:12:24 +04:00
			`// hash table of the distributed pop words dictionary`
now it compiles with -m32 2014-11-11 01:45:11 +03:00			`HashTableT <uint64_t, int32_t>m_distributedPopPhrases;`
Initial file population. 2013-08-03 00:12:24 +04:00
			`// hash table of the top popular words in the dictionary`
now it compiles with -m32 2014-11-11 01:45:11 +03:00			`// HashTableT <uint32_t, char *> m_topPopPhrases;`
Initial file population. 2013-08-03 00:12:24 +04:00
codespell: spelling corrections 2021-05-05 18:52:55 +03:00			`// hash table of misspelled words`
now it compiles with -m32 2014-11-11 01:45:11 +03:00			`HashTableT <uint32_t, bool>m_misp;`
Initial file population. 2013-08-03 00:12:24 +04:00
			`// hash table of wikipedia words`
now it compiles with -m32 2014-11-11 01:45:11 +03:00			`HashTableT <uint32_t, bool>m_wiki;`
Initial file population. 2013-08-03 00:12:24 +04:00
			`// PARMS, which can be adjusted. Currently all languages have the`
			`// same adjustments, so using the same parms.`
now it compiles with -m32 2014-11-11 01:45:11 +03:00			`int32_t m_editDistanceWeightsDel1;`
			`int32_t m_editDistanceWeightsDel2;`
			`int32_t m_editDistanceWeightsSwap;`
			`int32_t m_editDistanceWeightsSub;`
			`int32_t m_editDistanceWeightsSimilar;`
			`int32_t m_editDistanceWeightsMin;`
			`int32_t m_editDistanceWeightsMax;`
			`int32_t m_soundslikeWeight;`
			`int32_t m_wordWeight;`
			`int32_t m_span;`
Initial file population. 2013-08-03 00:12:24 +04:00
			`bool m_followup;`
			`bool m_collapseResult;`
			`bool m_removeAccents;`
			`};`

			`#endif`