2015-01-25 17:26:39 +03:00
|
|
|
#pragma once
|
|
|
|
|
|
|
|
#include <string>
|
|
|
|
#include <vector>
|
|
|
|
|
|
|
|
#ifdef TOKENIZER_NAMESPACE
|
|
|
|
namespace TOKENIZER_NAMESPACE {
|
|
|
|
#endif
|
|
|
|
|
|
|
|
struct Parameters
|
|
|
|
{
|
|
|
|
std::string lang_iso;
|
|
|
|
std::vector<std::string> args;
|
|
|
|
std::string out_path;
|
2015-04-01 22:49:32 +03:00
|
|
|
int nthreads;
|
|
|
|
int chunksize;
|
2015-01-25 17:26:39 +03:00
|
|
|
const char *cfg_path;
|
|
|
|
bool verbose_p;
|
|
|
|
bool detag_p;
|
|
|
|
bool alltag_p;
|
2015-03-19 08:02:18 +03:00
|
|
|
bool entities_p;
|
2015-01-25 17:26:39 +03:00
|
|
|
bool escape_p;
|
|
|
|
bool aggro_p;
|
|
|
|
bool supersub_p;
|
|
|
|
bool url_p;
|
|
|
|
bool downcase_p;
|
2015-02-15 07:37:44 +03:00
|
|
|
bool normalize_p;
|
2015-01-25 17:26:39 +03:00
|
|
|
bool penn_p;
|
|
|
|
bool words_p;
|
2015-02-15 07:37:44 +03:00
|
|
|
bool denumber_p;
|
|
|
|
bool narrow_latin_p;
|
|
|
|
bool narrow_kana_p;
|
|
|
|
bool refined_p;
|
|
|
|
bool unescape_p;
|
|
|
|
bool drop_bad_p;
|
2015-03-19 08:02:18 +03:00
|
|
|
bool split_p;
|
|
|
|
bool notokenization_p;
|
2015-04-01 04:53:14 +03:00
|
|
|
bool para_marks_p;
|
2015-04-01 05:03:32 +03:00
|
|
|
bool split_breaks_p;
|
2015-01-25 17:26:39 +03:00
|
|
|
|
|
|
|
Parameters();
|
2015-02-15 07:37:44 +03:00
|
|
|
|
|
|
|
Parameters(const Parameters& _);
|
2015-01-25 17:26:39 +03:00
|
|
|
};
|
|
|
|
|
|
|
|
|
|
|
|
#ifdef TOKENIZER_NAMESPACE
|
|
|
|
}
|
|
|
|
#endif
|
|
|
|
|
|
|
|
|