Flesh out csv command

This commit is contained in:
Axel Liljencrantz 2019-10-17 15:03:24 +02:00
parent bf216d6e10
commit 8c94655d18
8 changed files with 141 additions and 43 deletions

16
Cargo.lock generated
View File

@ -124,6 +124,7 @@ dependencies = [
"chrono 0.4.9 (registry+https://github.com/rust-lang/crates.io-index)",
"either 1.5.3 (registry+https://github.com/rust-lang/crates.io-index)",
"lazy_static 1.4.0 (registry+https://github.com/rust-lang/crates.io-index)",
"map_in_place 0.1.0 (registry+https://github.com/rust-lang/crates.io-index)",
"regex 1.3.1 (registry+https://github.com/rust-lang/crates.io-index)",
"rustyline 5.0.3 (registry+https://github.com/rust-lang/crates.io-index)",
]
@ -196,6 +197,14 @@ dependencies = [
"cfg-if 0.1.10 (registry+https://github.com/rust-lang/crates.io-index)",
]
[[package]]
name = "map_in_place"
version = "0.1.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
dependencies = [
"scopeguard 0.2.0 (registry+https://github.com/rust-lang/crates.io-index)",
]
[[package]]
name = "memchr"
version = "2.2.1"
@ -348,6 +357,11 @@ dependencies = [
"winapi 0.3.8 (registry+https://github.com/rust-lang/crates.io-index)",
]
[[package]]
name = "scopeguard"
version = "0.2.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
[[package]]
name = "syn"
version = "0.15.44"
@ -457,6 +471,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index"
"checksum lazy_static 1.4.0 (registry+https://github.com/rust-lang/crates.io-index)" = "e2abad23fbc42b3700f2f279844dc832adb2b2eb069b2df918f455c4e18cc646"
"checksum libc 0.2.62 (registry+https://github.com/rust-lang/crates.io-index)" = "34fcd2c08d2f832f376f4173a231990fa5aef4e99fb569867318a227ef4c06ba"
"checksum log 0.4.8 (registry+https://github.com/rust-lang/crates.io-index)" = "14b6052be84e6b71ab17edffc2eeabf5c2c3ae1fdb464aae35ac50c67a44e1f7"
"checksum map_in_place 0.1.0 (registry+https://github.com/rust-lang/crates.io-index)" = "c59b419989c3a157f724d5bf720468729f1345d4d704cab69b5d7ba58c62c125"
"checksum memchr 2.2.1 (registry+https://github.com/rust-lang/crates.io-index)" = "88579771288728879b57485cc7d6b07d648c9f0141eb955f8ab7f9d45394468e"
"checksum nix 0.14.1 (registry+https://github.com/rust-lang/crates.io-index)" = "6c722bee1037d430d0f8e687bbdbf222f27cc6e4e68d5caf630857bb2b6dbdce"
"checksum nodrop 0.1.13 (registry+https://github.com/rust-lang/crates.io-index)" = "2f9667ddcc6cc8a43afc9b7917599d7216aa09c463919ea32c59ed6cac8bc945"
@ -475,6 +490,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index"
"checksum rust-argon2 0.5.1 (registry+https://github.com/rust-lang/crates.io-index)" = "4ca4eaef519b494d1f2848fc602d18816fed808a981aedf4f1f00ceb7c9d32cf"
"checksum rustc-demangle 0.1.16 (registry+https://github.com/rust-lang/crates.io-index)" = "4c691c0e608126e00913e33f0ccf3727d5fc84573623b8d65b2df340b5201783"
"checksum rustyline 5.0.3 (registry+https://github.com/rust-lang/crates.io-index)" = "4795e277e6e57dec9df62b515cd4991371daa80e8dc8d80d596e58722b89c417"
"checksum scopeguard 0.2.0 (registry+https://github.com/rust-lang/crates.io-index)" = "551cc7e5812ffa253138ea53b3de48306005422a66a5f0cc9a5704b3d92b298a"
"checksum syn 0.15.44 (registry+https://github.com/rust-lang/crates.io-index)" = "9ca4b3b69a77cbe1ffc9e198781b7acb0c7365a883670e8f1c1bc66fba79a5c5"
"checksum synstructure 0.10.2 (registry+https://github.com/rust-lang/crates.io-index)" = "02353edf96d6e4dc81aea2d8490a7e9db177bf8acb0e951c24940bf866cb313f"
"checksum thread_local 0.3.6 (registry+https://github.com/rust-lang/crates.io-index)" = "c6b53e329000edc2b34dbe8545fd20e55a333362d0a321909685a19bd28c3f1b"

View File

@ -12,3 +12,4 @@ regex = "1"
lazy_static = "1.4.0"
rustyline = "5.0.3"
either = "1.5.3"
map_in_place = "0.1.0"

3
example_data/address.csv Normal file
View File

@ -0,0 +1,3 @@
eva, some street 1, 1234
alice, other street 6, 2345
ada, made up road 1, 3456
1 eva some street 1 1234
2 alice other street 6 2345
3 ada made up road 1 3456

View File

@ -20,6 +20,8 @@ use std::{
path::Path
};
use either::Either;
extern crate map_in_place;
use map_in_place::MapVecInPlace;
#[derive(Clone)]
struct Config {
@ -43,17 +45,47 @@ fn parse(input_type: &Vec<CellType>, arguments: &Vec<Argument>) -> Result<Config
arg.cell.file_expand(&mut files);
},
Some(name) => {
if name.as_str() == "col" {
match name.as_str() {
"col" =>
match &arg.cell {
Cell::Text(s) => {
let split: Vec<&str> = s.split(':').collect();
match split.len() {
2 => columns.push(CellType::named(split[0], CellDataType::from(split[1]))),
_ => panic!("No no no")
2 => columns.push(CellType::named(split[0], CellDataType::from(split[1])?)),
_ => return Err(argument_error(format!("Expected a column description on the form name:type, got {}", s).as_str())),
}
}
_ => panic!("Noooo"),
_ => return Err(argument_error("Expected a text value")),
}
"sep" =>
match &arg.cell {
Cell::Text(s) => {
if s.len() == 1 {
separator = s.chars().next().unwrap();
} else {
return Err(argument_error("Separator must be exactly one character long"))
}
}
_ => return Err(argument_error("Expected a text value")),
}
"trim" =>
match &arg.cell {
Cell::Text(s) => {
if s.len() == 1 {
trim = Some(s.chars().next().unwrap());
} else {
return Err(argument_error("Separator must be exactly one character long"))
}
}
_ => return Err(argument_error("Expected a text value")),
}
_ => return Err(argument_error(format!("Unknown parameter {}", name).as_str())),
}
}
}
@ -95,7 +127,7 @@ fn handle(file: Box<Path>, cfg: &Config, output: &mut OutputStream) -> Result<()
break;
}
let line_without_newline = &line[0..line.len() - 1];
let split: Vec<&str> = line_without_newline
let mut split: Vec<&str> = line_without_newline
.split(cfg_clone.separator)
.map(|s| cfg_clone.trim
.map(|c| s.trim_matches(c))
@ -105,6 +137,9 @@ fn handle(file: Box<Path>, cfg: &Config, output: &mut OutputStream) -> Result<()
panic!("Wrong number of columns in CSV file");
// return Err(error("Wrong number of columns in CSV file"))
}
if let Some(trim) = cfg_clone.trim {
split = split.map(|s| s.trim_matches(trim));
}
let cells: Result<Vec<Cell>, JobError> = split.iter()
.zip(cfg_clone.columns.iter())
.map({ |(s, t)| t.cell_type.parse(*s) }).collect();

View File

@ -13,6 +13,8 @@ use crate::{
use std::collections::HashMap;
use crate::stream::Readable;
use crate::replace::Replace;
use crate::errors::argument_error;
use crate::commands::command_util::find_field;
struct Config {
left_table_idx: usize,
@ -21,21 +23,62 @@ struct Config {
right_column_idx: usize,
}
fn parse(_input_type: &Vec<CellType>, arguments: &Vec<Argument>) -> Result<Config, JobError> {
Ok(Config {
left_table_idx: 0,
right_table_idx: 2,
left_column_idx: 0,
right_column_idx: 0
})
pub fn guess_tables(input_type: &Vec<CellType>) -> Result<(usize, usize, &Vec<CellType>, &Vec<CellType>), JobError> {
let tables:Vec<(usize, &Vec<CellType>)> = input_type.iter().enumerate().flat_map(|(idx, t)| {
match &t.cell_type {
CellDataType::Output(sub_types) => Some((idx, sub_types)),
_ => None,
}
}).collect();
if tables.len() == 2 {
Ok((tables[0].0, tables[1].0, tables[0].1, tables[1].1))
} else {
Err(argument_error(format!("Could not guess tables to join, expected two tables, found {}", tables.len()).as_str()))
}
}
fn do_join(l: &mut impl Readable, r: &mut impl Readable, output: &OutputStream) {
fn parse(input_type: &Vec<CellType>, arguments: &Vec<Argument>) -> Result<Config, JobError> {
if (arguments.len() != 3) {
return Err(argument_error("Expected exactly 3 aguments"));
}
return match (&arguments[0].cell, &arguments[1].cell, &arguments[2].cell) {
(Cell::Field(l), Cell::Op(op), Cell::Field(r)) => {
if op.as_str() != "==" {
return Err(argument_error("Only == currently supported"));
}
match (l.matches('.').count(), r.matches('.').count()) {
(0, 0) => {
let (left_table_idx, right_table_idx, left_types, right_types) = guess_tables(input_type)?;
Ok(Config {
left_table_idx,
right_table_idx,
left_column_idx: find_field(&l, left_types)?,
right_column_idx: find_field(&r, right_types)?,
})
}
(1, 1) => Err(argument_error("Not implemented yet!")),
_ => Err(argument_error("Expected both fields on the form %table.column or %column")),
}
}
_ => Err(argument_error("Expected arguments like %table1.col == %table2.col")),
};
}
fn combine(mut l: Row, mut r: Row, cfg: &Config) -> Row {
for (idx, c) in r.cells.drain(..).enumerate() {
if idx != cfg.right_column_idx {
l.cells.push(c);
}
}
return Row {cells: l.cells}
}
fn do_join(cfg: &Config, l: &mut impl Readable, r: &mut impl Readable, output: &OutputStream) {
let mut l_data: HashMap<Cell, Row> = HashMap::new();
loop {
match l.read() {
Ok(row) => {
l_data.insert(row.cells[0].concrete(), row);
l_data.insert(row.cells[cfg.left_column_idx].concrete(), row);
}
Err(_) => break,
}
@ -44,16 +87,11 @@ fn do_join(l: &mut impl Readable, r: &mut impl Readable, output: &OutputStream)
loop {
match r.read() {
Ok(r_row) => {
l_data.get(&r_row.cells[0].concrete()).map(|l_row| {
output.send(Row {
cells: vec![
r_row.cells[0].concrete(),
l_row.cells[1].concrete(),
r_row.cells[1].concrete(),
]
});
}
);
l_data
.remove(&r_row.cells[cfg.right_column_idx])
.map(|l_row| {
output.send(combine( l_row, r_row, cfg));
});
}
Err(_) => break,
}
@ -72,7 +110,7 @@ fn run(
Ok(mut row) => {
match (row.cells.replace(cfg.left_table_idx, Cell::Integer(0)), row.cells.replace(cfg.right_table_idx, Cell::Integer(0))) {
(Cell::Output(mut l), Cell::Output(mut r)) => {
do_join(&mut l.stream, &mut r.stream, &output);
do_join(&cfg, &mut l.stream, &mut r.stream, &output);
}
_ => panic!("Wrong row format"),
}
@ -86,9 +124,9 @@ fn run(
pub fn join(input_type: Vec<CellType>, arguments: Vec<Argument>) -> Result<Call, JobError> {
let cfg = parse(&input_type, &arguments);
let output_type = vec![
CellType::named("name", CellDataType::Text ),
CellType::named("age", CellDataType::Integer ),
CellType::named("home", CellDataType::Text ),
CellType::named("name", CellDataType::Text),
CellType::named("age", CellDataType::Integer),
CellType::named("home", CellDataType::Text),
];
return Ok(Call {
name: String::from("join"),

View File

@ -37,18 +37,18 @@ pub enum CellDataType {
}
impl CellDataType {
pub fn from(s: &str) -> CellDataType {
pub fn from(s: &str) -> Result<CellDataType, JobError> {
match s {
"text" => CellDataType::Text,
"integer" => CellDataType::Integer,
"time" => CellDataType::Time,
"field" => CellDataType::Field,
"glob" => CellDataType::Glob,
"regex" => CellDataType::Regex,
"op" => CellDataType::Op,
"command" => CellDataType::Command,
"file" => CellDataType::Command,
_ => panic!(format!("Missing conversion for {} in CellDataType", s)),
"text" => Ok(CellDataType::Text),
"integer" => Ok(CellDataType::Integer),
"time" => Ok(CellDataType::Time),
"field" => Ok(CellDataType::Field),
"glob" => Ok(CellDataType::Glob),
"regex" => Ok(CellDataType::Regex),
"op" => Ok(CellDataType::Op),
"command" => Ok(CellDataType::Command),
"file" => Ok(CellDataType::File),
_ => Err(error(format!("Unknown cell type {}", s).as_str())),
}
}

View File

@ -58,8 +58,8 @@ lazy_static! {
(TokenType::Integer, Regex::new(r"^[0-9]+").unwrap()),
(TokenType::Variable, Regex::new(r"^\$[a-zA-Z_][a-zA-Z_0-9]*").unwrap()),
(TokenType::Field, Regex::new("^%[a-zA-Z_][a-zA-Z_0-9]*").unwrap()),
(TokenType::Variable, Regex::new(r"^\$[a-zA-Z_][\.a-zA-Z_0-9]*").unwrap()),
(TokenType::Field, Regex::new(r"^%[a-zA-Z_][\.a-zA-Z_0-9]*").unwrap()),
(TokenType::BlockStart, Regex::new(r"^[`*]?\{").unwrap()),
(TokenType::BlockEnd, Regex::new(r"^\}").unwrap()),
@ -229,10 +229,10 @@ mod tests {
#[test]
fn variables_and_fields() {
let mut l = Lexer::new(&String::from("$foo %bar"));
let mut l = Lexer::new(&String::from("$foo %bar $foo.bar %baz.qux"));
let tt = tokens(&mut l);
assert_eq!(tt, vec![
TokenType::Variable, TokenType::Field, TokenType::EOF]);
TokenType::Variable, TokenType::Field, TokenType::Variable, TokenType::Field, TokenType::EOF]);
}
#[test]

View File

@ -53,6 +53,10 @@ fn parse_job(lexer: &mut Lexer, state: &State, commands: &mut Vec<Call>, depende
return Ok(());
}
fn unescape(s: &str) -> String {
s[1..s.len()-1].to_string()
}
fn parse_unnamed_argument(lexer: &mut Lexer, dependencies: &mut Vec<Job>, state: &State) -> Result<Cell, JobError> {
let token_type = lexer.peek().0;
match token_type {
@ -117,6 +121,7 @@ fn parse_unnamed_argument(lexer: &mut Lexer, dependencies: &mut Vec<Job>, state:
Err(e) => Err(argument_error(e.description())),
}
},
TokenType::QuotedString => Ok(Cell::Text(unescape(lexer.pop().1))),
_ => {
lexer.pop();