1
1
mirror of https://github.com/tstack/lnav.git synced 2024-10-26 13:16:11 +03:00

some more iterations on data extraction

This commit is contained in:
Tim Stack 2011-06-14 07:21:53 -07:00
parent cd8f0bfddb
commit da500ce393
5 changed files with 240 additions and 20 deletions

View File

@ -14,19 +14,116 @@ static data_token_t UPTO_SEPARATOR[] = {
DT_LINE,
};
static data_token_t PATTERN_PAIR[] = {
DNT_ROW,
DT_SEPARATOR,
DNT_KEY,
};
static data_token_t PATTERN_AGGREGATE[] = {
DT_ANY,
DT_COMMA,
DNT_AGGREGATE,
};
void data_parser::reduceAggregate(void)
{
std::list<element> reduction;
if (this->reducePattern(reduction,
PATTERN_AGGREGATE,
PATTERN_AGGREGATE +
sizeof(PATTERN_AGGREGATE) / sizeof(data_token_t))) {
struct element &top = this->dp_stack.front();
this->dp_stack.push_front(element(reduction, DNT_AGGREGATE));
top.assign_elements(*reduction.front().e_sub_elements);
if (reduction.back().e_sub_elements != NULL)
top.assign_elements(*reduction.back().e_sub_elements);
else
top.e_sub_elements->push_back(reduction.back());
}
}
void data_parser::reducePair(void)
{
std::list<element> reduction;
this->reduceAggregate();
if (this->reduceUpTo(reduction,
UPTO_SEPARATOR,
UPTO_SEPARATOR +
sizeof(UPTO_SEPARATOR) / sizeof(data_token_t))) {
this->dp_stack.push_front(element(reduction, DNT_ROW));
this->dp_stack.front().assign_elements(reduction);
}
if (this->reducePattern(reduction,
PATTERN_PAIR,
PATTERN_PAIR +
sizeof(PATTERN_PAIR) / sizeof(data_token_t))) {
std::list<element>::iterator middle = reduction.begin();
++middle;
reduction.erase(middle);
this->dp_stack.push_front(element(reduction, DNT_PAIR));
this->dp_stack.front().assign_elements(reduction);
}
}
void data_parser::reduce(const element &lookahead)
{
struct element &top_elem = this->dp_stack.front();
std::list<element> reduction;
bool push_lookahead = true;
switch (lookahead.e_token) {
case DT_INVALID:
case DT_WHITE:
push_lookahead = false;
break;
case DT_WHITE:
case DT_LINE:
this->reducePair();
push_lookahead = false;
break;
case DT_COMMA:
this->reduceAggregate();
if (!this->dp_stack.empty() &&
this->dp_stack.front().e_token != DNT_AGGREGATE) {
if (this->dp_stack.front().e_token == DT_SEPARATOR) {
push_lookahead = false;
}
else {
std::list<element>::iterator next_elem = this->dp_stack.begin();
advance(next_elem, 1);
reduction.splice(reduction.end(),
this->dp_stack,
this->dp_stack.begin(),
next_elem);
this->dp_stack.push_front(element(reduction, DNT_AGGREGATE));
this->dp_stack.front().assign_elements(reduction);
}
}
break;
case DT_SEPARATOR:
if (this->reducePattern(reduction,
PATTERN_KEY,
PATTERN_KEY +
sizeof(PATTERN_KEY) / sizeof(data_token_t),
true)) {
this->reducePair();
this->dp_stack.push_front(element(reduction, DNT_KEY));
}
break;
}
if (push_lookahead) {
this->dp_stack.push_front(lookahead);
}
// this->print();
printf("----\n");
}

View File

@ -12,15 +12,46 @@ class data_parser {
public:
struct element {
element() : e_token(DT_INVALID) { };
element() : e_token(DT_INVALID), e_sub_elements(NULL) { };
element(std::list<element> &subs, data_token_t token)
: e_capture(subs.front().e_capture.c_begin,
subs.back().e_capture.c_end),
e_token(token),
e_sub_elements(NULL) {
};
element(const element &other) {
assert(other.e_sub_elements == NULL);
this->e_capture = other.e_capture;
this->e_token = other.e_token;
};
~element() {
if (this->e_sub_elements != NULL) {
delete this->e_sub_elements;
this->e_sub_elements = NULL;
}
};
void assign_elements(std::list<element> &subs) {
this->e_sub_elements = new std::list<element>();
this->e_sub_elements->splice(this->e_sub_elements->begin(), subs);
};
pcre_context::capture_t e_capture;
data_token_t e_token;
std::list<element> *e_sub_elements;
};
struct element_cmp {
bool operator()(data_token_t token, const element &b) const {
return token == b.e_token;
bool operator()(data_token_t token, const element &elem) const {
return token == elem.e_token || token == DT_ANY;
};
bool operator()(const element &elem, data_token_t token) const {
return (*this)(token, elem);
};
};
@ -43,7 +74,7 @@ public:
while (this->dp_scanner->tokenize(pc, elem.e_token)) {
elem.e_capture = *(pc.begin());
this->reduce(elem);
}
};
@ -55,24 +86,85 @@ public:
const data_token_t *pattern_end,
bool repeating = false) {
size_t pattern_size = (pattern_end - pattern_start);
bool found, retval = false;
reduction.clear();
do {
found = false;
if (pattern_size <= this->dp_stack.size() &&
std::equal(pattern_start, pattern_end,
this->dp_stack.begin(),
element_cmp())) {
std::list<element>::iterator match_end = this->dp_stack.begin();
advance(match_end, pattern_size);
reduction.splice(reduction.end(),
this->dp_stack,
this->dp_stack.begin(),
match_end);
retval = found = true;
}
} while (found && repeating);
reduction.reverse();
return retval;
};
bool reduceUpTo(std::list<element> &reduction,
const data_token_t *possibilities_start,
const data_token_t *possibilities_end) {
size_t poss_size = (possibilities_end - possibilities_start);
std::list<element>::iterator iter;
bool retval = false;
reduction.clear();
if (pattern_size <= this->dp_stack.size() &&
std::equal(pattern_start, pattern_end,
this->dp_stack.begin(),
element_cmp())) {
std::list<element>::iterator match_end = this->dp_stack.begin();
advance(match_end, pattern_size);
reduction.splice(reduction.begin(),
iter = std::find_first_of(this->dp_stack.begin(), this->dp_stack.end(),
possibilities_start, possibilities_end,
element_cmp());
if (iter != this->dp_stack.end()) {
reduction.splice(reduction.end(),
this->dp_stack,
this->dp_stack.begin(),
match_end);
iter);
retval = true;
}
reduction.reverse();
return retval;
};
void reduceAggregate(void);
void reducePair(void);
void print(void) {
for (std::list<data_parser::element>::iterator iter = this->dp_stack.begin();
iter != this->dp_stack.end();
++iter) {
printf("%d %d:%d %s\n",
iter->e_token,
iter->e_capture.c_begin,
iter->e_capture.c_end,
this->dp_scanner->get_input().get_substr(&iter->e_capture).c_str());
if (iter->e_sub_elements != NULL) {
for (std::list<data_parser::element>::iterator iter2 =
iter->e_sub_elements->begin();
iter2 != iter->e_sub_elements->end();
++iter2) {
printf(" %d %d:%d %s\n",
iter2->e_token,
iter2->e_capture.c_begin,
iter2->e_capture.c_end,
this->dp_scanner->get_input().get_substr(&iter2->e_capture).c_str());
}
}
}
};
std::list<element> dp_stack;

View File

@ -40,8 +40,8 @@ enum data_token_t {
DNT_MEASUREMENT,
DNT_VARIABLE_KEY,
DNT_ROWRANGE,
T_ANY = 100,
DT_ANY = 100,
};
class data_scanner {
@ -53,6 +53,8 @@ public:
bool tokenize(pcre_context &pc, data_token_t &token_out);
pcre_input &get_input() { return this->ds_pcre_input; };
private:
std::string ds_line;
pcre_input ds_pcre_input;

View File

@ -38,7 +38,10 @@
*/
class pcre_context {
public:
typedef struct {
typedef struct capture {
capture() { };
capture(int begin, int end) : c_begin(begin), c_end(end) { };
int c_begin;
int c_end;

View File

@ -7,12 +7,15 @@
#include "data_scanner.hh"
#include "data_parser.hh"
using namespace std;
int main(int argc, char *argv[])
{
pcre_context_static<30> pc;
data_scanner ds("a=1 b=2");
data_scanner ds2("a=1 b=2");
data_scanner ds("a=1 b=2\n");
//data_scanner ds2("a=1 b=2 c=3,4\n");
data_scanner ds2("c=3,4\n");
data_token_t token;
while (ds.tokenize(pc, token)) {
@ -24,4 +27,27 @@ int main(int argc, char *argv[])
data_parser dp(&ds2);
dp.parse();
printf("done\n");
for (list<data_parser::element>::iterator iter = dp.dp_stack.begin();
iter != dp.dp_stack.end();
++iter) {
printf("%d %d:%d %s\n",
iter->e_token,
iter->e_capture.c_begin,
iter->e_capture.c_end,
ds2.get_input().get_substr(&iter->e_capture).c_str());
if (iter->e_sub_elements != NULL) {
for (list<data_parser::element>::iterator iter2 =
iter->e_sub_elements->begin();
iter2 != iter->e_sub_elements->end();
++iter2) {
printf(" %d %d:%d %s\n",
iter2->e_token,
iter2->e_capture.c_begin,
iter2->e_capture.c_end,
ds2.get_input().get_substr(&iter2->e_capture).c_str());
}
}
}
}