mirror of
https://github.com/tstack/lnav.git
synced 2024-10-26 13:16:11 +03:00
some more iterations on data extraction
This commit is contained in:
parent
cd8f0bfddb
commit
da500ce393
@ -14,19 +14,116 @@ static data_token_t UPTO_SEPARATOR[] = {
|
||||
DT_LINE,
|
||||
};
|
||||
|
||||
static data_token_t PATTERN_PAIR[] = {
|
||||
DNT_ROW,
|
||||
DT_SEPARATOR,
|
||||
DNT_KEY,
|
||||
};
|
||||
|
||||
static data_token_t PATTERN_AGGREGATE[] = {
|
||||
DT_ANY,
|
||||
DT_COMMA,
|
||||
DNT_AGGREGATE,
|
||||
};
|
||||
|
||||
void data_parser::reduceAggregate(void)
|
||||
{
|
||||
std::list<element> reduction;
|
||||
|
||||
if (this->reducePattern(reduction,
|
||||
PATTERN_AGGREGATE,
|
||||
PATTERN_AGGREGATE +
|
||||
sizeof(PATTERN_AGGREGATE) / sizeof(data_token_t))) {
|
||||
struct element &top = this->dp_stack.front();
|
||||
|
||||
this->dp_stack.push_front(element(reduction, DNT_AGGREGATE));
|
||||
top.assign_elements(*reduction.front().e_sub_elements);
|
||||
if (reduction.back().e_sub_elements != NULL)
|
||||
top.assign_elements(*reduction.back().e_sub_elements);
|
||||
else
|
||||
top.e_sub_elements->push_back(reduction.back());
|
||||
}
|
||||
}
|
||||
|
||||
void data_parser::reducePair(void)
|
||||
{
|
||||
std::list<element> reduction;
|
||||
|
||||
this->reduceAggregate();
|
||||
if (this->reduceUpTo(reduction,
|
||||
UPTO_SEPARATOR,
|
||||
UPTO_SEPARATOR +
|
||||
sizeof(UPTO_SEPARATOR) / sizeof(data_token_t))) {
|
||||
this->dp_stack.push_front(element(reduction, DNT_ROW));
|
||||
this->dp_stack.front().assign_elements(reduction);
|
||||
}
|
||||
|
||||
if (this->reducePattern(reduction,
|
||||
PATTERN_PAIR,
|
||||
PATTERN_PAIR +
|
||||
sizeof(PATTERN_PAIR) / sizeof(data_token_t))) {
|
||||
std::list<element>::iterator middle = reduction.begin();
|
||||
|
||||
++middle;
|
||||
reduction.erase(middle);
|
||||
this->dp_stack.push_front(element(reduction, DNT_PAIR));
|
||||
this->dp_stack.front().assign_elements(reduction);
|
||||
}
|
||||
}
|
||||
|
||||
void data_parser::reduce(const element &lookahead)
|
||||
{
|
||||
struct element &top_elem = this->dp_stack.front();
|
||||
std::list<element> reduction;
|
||||
bool push_lookahead = true;
|
||||
|
||||
switch (lookahead.e_token) {
|
||||
case DT_INVALID:
|
||||
case DT_WHITE:
|
||||
push_lookahead = false;
|
||||
break;
|
||||
|
||||
case DT_WHITE:
|
||||
case DT_LINE:
|
||||
this->reducePair();
|
||||
push_lookahead = false;
|
||||
break;
|
||||
|
||||
case DT_COMMA:
|
||||
this->reduceAggregate();
|
||||
if (!this->dp_stack.empty() &&
|
||||
this->dp_stack.front().e_token != DNT_AGGREGATE) {
|
||||
if (this->dp_stack.front().e_token == DT_SEPARATOR) {
|
||||
push_lookahead = false;
|
||||
}
|
||||
else {
|
||||
std::list<element>::iterator next_elem = this->dp_stack.begin();
|
||||
|
||||
advance(next_elem, 1);
|
||||
reduction.splice(reduction.end(),
|
||||
this->dp_stack,
|
||||
this->dp_stack.begin(),
|
||||
next_elem);
|
||||
this->dp_stack.push_front(element(reduction, DNT_AGGREGATE));
|
||||
this->dp_stack.front().assign_elements(reduction);
|
||||
}
|
||||
}
|
||||
break;
|
||||
|
||||
case DT_SEPARATOR:
|
||||
|
||||
if (this->reducePattern(reduction,
|
||||
PATTERN_KEY,
|
||||
PATTERN_KEY +
|
||||
sizeof(PATTERN_KEY) / sizeof(data_token_t),
|
||||
true)) {
|
||||
this->reducePair();
|
||||
this->dp_stack.push_front(element(reduction, DNT_KEY));
|
||||
}
|
||||
break;
|
||||
}
|
||||
|
||||
if (push_lookahead) {
|
||||
this->dp_stack.push_front(lookahead);
|
||||
}
|
||||
|
||||
// this->print();
|
||||
printf("----\n");
|
||||
}
|
||||
|
@ -12,15 +12,46 @@ class data_parser {
|
||||
|
||||
public:
|
||||
struct element {
|
||||
element() : e_token(DT_INVALID) { };
|
||||
element() : e_token(DT_INVALID), e_sub_elements(NULL) { };
|
||||
element(std::list<element> &subs, data_token_t token)
|
||||
: e_capture(subs.front().e_capture.c_begin,
|
||||
subs.back().e_capture.c_end),
|
||||
e_token(token),
|
||||
e_sub_elements(NULL) {
|
||||
};
|
||||
|
||||
element(const element &other) {
|
||||
assert(other.e_sub_elements == NULL);
|
||||
|
||||
this->e_capture = other.e_capture;
|
||||
this->e_token = other.e_token;
|
||||
};
|
||||
|
||||
~element() {
|
||||
if (this->e_sub_elements != NULL) {
|
||||
delete this->e_sub_elements;
|
||||
this->e_sub_elements = NULL;
|
||||
}
|
||||
};
|
||||
|
||||
void assign_elements(std::list<element> &subs) {
|
||||
this->e_sub_elements = new std::list<element>();
|
||||
this->e_sub_elements->splice(this->e_sub_elements->begin(), subs);
|
||||
};
|
||||
|
||||
pcre_context::capture_t e_capture;
|
||||
data_token_t e_token;
|
||||
|
||||
std::list<element> *e_sub_elements;
|
||||
};
|
||||
|
||||
struct element_cmp {
|
||||
bool operator()(data_token_t token, const element &b) const {
|
||||
return token == b.e_token;
|
||||
bool operator()(data_token_t token, const element &elem) const {
|
||||
return token == elem.e_token || token == DT_ANY;
|
||||
};
|
||||
|
||||
bool operator()(const element &elem, data_token_t token) const {
|
||||
return (*this)(token, elem);
|
||||
};
|
||||
};
|
||||
|
||||
@ -43,7 +74,7 @@ public:
|
||||
|
||||
while (this->dp_scanner->tokenize(pc, elem.e_token)) {
|
||||
elem.e_capture = *(pc.begin());
|
||||
|
||||
|
||||
this->reduce(elem);
|
||||
}
|
||||
};
|
||||
@ -55,24 +86,85 @@ public:
|
||||
const data_token_t *pattern_end,
|
||||
bool repeating = false) {
|
||||
size_t pattern_size = (pattern_end - pattern_start);
|
||||
bool found, retval = false;
|
||||
|
||||
reduction.clear();
|
||||
|
||||
do {
|
||||
found = false;
|
||||
if (pattern_size <= this->dp_stack.size() &&
|
||||
std::equal(pattern_start, pattern_end,
|
||||
this->dp_stack.begin(),
|
||||
element_cmp())) {
|
||||
std::list<element>::iterator match_end = this->dp_stack.begin();
|
||||
|
||||
advance(match_end, pattern_size);
|
||||
reduction.splice(reduction.end(),
|
||||
this->dp_stack,
|
||||
this->dp_stack.begin(),
|
||||
match_end);
|
||||
|
||||
retval = found = true;
|
||||
}
|
||||
} while (found && repeating);
|
||||
|
||||
reduction.reverse();
|
||||
|
||||
return retval;
|
||||
};
|
||||
|
||||
bool reduceUpTo(std::list<element> &reduction,
|
||||
const data_token_t *possibilities_start,
|
||||
const data_token_t *possibilities_end) {
|
||||
size_t poss_size = (possibilities_end - possibilities_start);
|
||||
std::list<element>::iterator iter;
|
||||
bool retval = false;
|
||||
|
||||
reduction.clear();
|
||||
if (pattern_size <= this->dp_stack.size() &&
|
||||
std::equal(pattern_start, pattern_end,
|
||||
this->dp_stack.begin(),
|
||||
element_cmp())) {
|
||||
std::list<element>::iterator match_end = this->dp_stack.begin();
|
||||
|
||||
advance(match_end, pattern_size);
|
||||
reduction.splice(reduction.begin(),
|
||||
iter = std::find_first_of(this->dp_stack.begin(), this->dp_stack.end(),
|
||||
possibilities_start, possibilities_end,
|
||||
element_cmp());
|
||||
if (iter != this->dp_stack.end()) {
|
||||
reduction.splice(reduction.end(),
|
||||
this->dp_stack,
|
||||
this->dp_stack.begin(),
|
||||
match_end);
|
||||
iter);
|
||||
|
||||
retval = true;
|
||||
}
|
||||
|
||||
reduction.reverse();
|
||||
|
||||
return retval;
|
||||
};
|
||||
|
||||
void reduceAggregate(void);
|
||||
void reducePair(void);
|
||||
|
||||
void print(void) {
|
||||
for (std::list<data_parser::element>::iterator iter = this->dp_stack.begin();
|
||||
iter != this->dp_stack.end();
|
||||
++iter) {
|
||||
printf("%d %d:%d %s\n",
|
||||
iter->e_token,
|
||||
iter->e_capture.c_begin,
|
||||
iter->e_capture.c_end,
|
||||
this->dp_scanner->get_input().get_substr(&iter->e_capture).c_str());
|
||||
if (iter->e_sub_elements != NULL) {
|
||||
for (std::list<data_parser::element>::iterator iter2 =
|
||||
iter->e_sub_elements->begin();
|
||||
iter2 != iter->e_sub_elements->end();
|
||||
++iter2) {
|
||||
printf(" %d %d:%d %s\n",
|
||||
iter2->e_token,
|
||||
iter2->e_capture.c_begin,
|
||||
iter2->e_capture.c_end,
|
||||
this->dp_scanner->get_input().get_substr(&iter2->e_capture).c_str());
|
||||
}
|
||||
}
|
||||
}
|
||||
};
|
||||
|
||||
std::list<element> dp_stack;
|
||||
|
||||
|
@ -40,8 +40,8 @@ enum data_token_t {
|
||||
DNT_MEASUREMENT,
|
||||
DNT_VARIABLE_KEY,
|
||||
DNT_ROWRANGE,
|
||||
|
||||
T_ANY = 100,
|
||||
|
||||
DT_ANY = 100,
|
||||
};
|
||||
|
||||
class data_scanner {
|
||||
@ -53,6 +53,8 @@ public:
|
||||
|
||||
bool tokenize(pcre_context &pc, data_token_t &token_out);
|
||||
|
||||
pcre_input &get_input() { return this->ds_pcre_input; };
|
||||
|
||||
private:
|
||||
std::string ds_line;
|
||||
pcre_input ds_pcre_input;
|
||||
|
@ -38,7 +38,10 @@
|
||||
*/
|
||||
class pcre_context {
|
||||
public:
|
||||
typedef struct {
|
||||
typedef struct capture {
|
||||
capture() { };
|
||||
capture(int begin, int end) : c_begin(begin), c_end(end) { };
|
||||
|
||||
int c_begin;
|
||||
int c_end;
|
||||
|
||||
|
@ -7,12 +7,15 @@
|
||||
#include "data_scanner.hh"
|
||||
#include "data_parser.hh"
|
||||
|
||||
using namespace std;
|
||||
|
||||
int main(int argc, char *argv[])
|
||||
{
|
||||
pcre_context_static<30> pc;
|
||||
|
||||
data_scanner ds("a=1 b=2");
|
||||
data_scanner ds2("a=1 b=2");
|
||||
data_scanner ds("a=1 b=2\n");
|
||||
//data_scanner ds2("a=1 b=2 c=3,4\n");
|
||||
data_scanner ds2("c=3,4\n");
|
||||
data_token_t token;
|
||||
|
||||
while (ds.tokenize(pc, token)) {
|
||||
@ -24,4 +27,27 @@ int main(int argc, char *argv[])
|
||||
data_parser dp(&ds2);
|
||||
|
||||
dp.parse();
|
||||
|
||||
printf("done\n");
|
||||
for (list<data_parser::element>::iterator iter = dp.dp_stack.begin();
|
||||
iter != dp.dp_stack.end();
|
||||
++iter) {
|
||||
printf("%d %d:%d %s\n",
|
||||
iter->e_token,
|
||||
iter->e_capture.c_begin,
|
||||
iter->e_capture.c_end,
|
||||
ds2.get_input().get_substr(&iter->e_capture).c_str());
|
||||
if (iter->e_sub_elements != NULL) {
|
||||
for (list<data_parser::element>::iterator iter2 =
|
||||
iter->e_sub_elements->begin();
|
||||
iter2 != iter->e_sub_elements->end();
|
||||
++iter2) {
|
||||
printf(" %d %d:%d %s\n",
|
||||
iter2->e_token,
|
||||
iter2->e_capture.c_begin,
|
||||
iter2->e_capture.c_end,
|
||||
ds2.get_input().get_substr(&iter2->e_capture).c_str());
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
Loading…
Reference in New Issue
Block a user