mirror of
https://github.com/liljencrantz/crush.git
synced 2024-10-04 06:17:17 +03:00
Flesh out csv command
This commit is contained in:
parent
bf216d6e10
commit
8c94655d18
16
Cargo.lock
generated
16
Cargo.lock
generated
@ -124,6 +124,7 @@ dependencies = [
|
||||
"chrono 0.4.9 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||
"either 1.5.3 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||
"lazy_static 1.4.0 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||
"map_in_place 0.1.0 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||
"regex 1.3.1 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||
"rustyline 5.0.3 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||
]
|
||||
@ -196,6 +197,14 @@ dependencies = [
|
||||
"cfg-if 0.1.10 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "map_in_place"
|
||||
version = "0.1.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
dependencies = [
|
||||
"scopeguard 0.2.0 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "memchr"
|
||||
version = "2.2.1"
|
||||
@ -348,6 +357,11 @@ dependencies = [
|
||||
"winapi 0.3.8 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "scopeguard"
|
||||
version = "0.2.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
|
||||
[[package]]
|
||||
name = "syn"
|
||||
version = "0.15.44"
|
||||
@ -457,6 +471,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
"checksum lazy_static 1.4.0 (registry+https://github.com/rust-lang/crates.io-index)" = "e2abad23fbc42b3700f2f279844dc832adb2b2eb069b2df918f455c4e18cc646"
|
||||
"checksum libc 0.2.62 (registry+https://github.com/rust-lang/crates.io-index)" = "34fcd2c08d2f832f376f4173a231990fa5aef4e99fb569867318a227ef4c06ba"
|
||||
"checksum log 0.4.8 (registry+https://github.com/rust-lang/crates.io-index)" = "14b6052be84e6b71ab17edffc2eeabf5c2c3ae1fdb464aae35ac50c67a44e1f7"
|
||||
"checksum map_in_place 0.1.0 (registry+https://github.com/rust-lang/crates.io-index)" = "c59b419989c3a157f724d5bf720468729f1345d4d704cab69b5d7ba58c62c125"
|
||||
"checksum memchr 2.2.1 (registry+https://github.com/rust-lang/crates.io-index)" = "88579771288728879b57485cc7d6b07d648c9f0141eb955f8ab7f9d45394468e"
|
||||
"checksum nix 0.14.1 (registry+https://github.com/rust-lang/crates.io-index)" = "6c722bee1037d430d0f8e687bbdbf222f27cc6e4e68d5caf630857bb2b6dbdce"
|
||||
"checksum nodrop 0.1.13 (registry+https://github.com/rust-lang/crates.io-index)" = "2f9667ddcc6cc8a43afc9b7917599d7216aa09c463919ea32c59ed6cac8bc945"
|
||||
@ -475,6 +490,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
"checksum rust-argon2 0.5.1 (registry+https://github.com/rust-lang/crates.io-index)" = "4ca4eaef519b494d1f2848fc602d18816fed808a981aedf4f1f00ceb7c9d32cf"
|
||||
"checksum rustc-demangle 0.1.16 (registry+https://github.com/rust-lang/crates.io-index)" = "4c691c0e608126e00913e33f0ccf3727d5fc84573623b8d65b2df340b5201783"
|
||||
"checksum rustyline 5.0.3 (registry+https://github.com/rust-lang/crates.io-index)" = "4795e277e6e57dec9df62b515cd4991371daa80e8dc8d80d596e58722b89c417"
|
||||
"checksum scopeguard 0.2.0 (registry+https://github.com/rust-lang/crates.io-index)" = "551cc7e5812ffa253138ea53b3de48306005422a66a5f0cc9a5704b3d92b298a"
|
||||
"checksum syn 0.15.44 (registry+https://github.com/rust-lang/crates.io-index)" = "9ca4b3b69a77cbe1ffc9e198781b7acb0c7365a883670e8f1c1bc66fba79a5c5"
|
||||
"checksum synstructure 0.10.2 (registry+https://github.com/rust-lang/crates.io-index)" = "02353edf96d6e4dc81aea2d8490a7e9db177bf8acb0e951c24940bf866cb313f"
|
||||
"checksum thread_local 0.3.6 (registry+https://github.com/rust-lang/crates.io-index)" = "c6b53e329000edc2b34dbe8545fd20e55a333362d0a321909685a19bd28c3f1b"
|
||||
|
@ -12,3 +12,4 @@ regex = "1"
|
||||
lazy_static = "1.4.0"
|
||||
rustyline = "5.0.3"
|
||||
either = "1.5.3"
|
||||
map_in_place = "0.1.0"
|
||||
|
3
example_data/address.csv
Normal file
3
example_data/address.csv
Normal file
@ -0,0 +1,3 @@
|
||||
eva, some street 1, 1234
|
||||
alice, other street 6, 2345
|
||||
ada, made up road 1, 3456
|
|
@ -20,6 +20,8 @@ use std::{
|
||||
path::Path
|
||||
};
|
||||
use either::Either;
|
||||
extern crate map_in_place;
|
||||
use map_in_place::MapVecInPlace;
|
||||
|
||||
#[derive(Clone)]
|
||||
struct Config {
|
||||
@ -43,17 +45,47 @@ fn parse(input_type: &Vec<CellType>, arguments: &Vec<Argument>) -> Result<Config
|
||||
arg.cell.file_expand(&mut files);
|
||||
},
|
||||
Some(name) => {
|
||||
if name.as_str() == "col" {
|
||||
match name.as_str() {
|
||||
"col" =>
|
||||
match &arg.cell {
|
||||
Cell::Text(s) => {
|
||||
let split: Vec<&str> = s.split(':').collect();
|
||||
match split.len() {
|
||||
2 => columns.push(CellType::named(split[0], CellDataType::from(split[1]))),
|
||||
_ => panic!("No no no")
|
||||
2 => columns.push(CellType::named(split[0], CellDataType::from(split[1])?)),
|
||||
_ => return Err(argument_error(format!("Expected a column description on the form name:type, got {}", s).as_str())),
|
||||
}
|
||||
}
|
||||
_ => panic!("Noooo"),
|
||||
_ => return Err(argument_error("Expected a text value")),
|
||||
}
|
||||
|
||||
"sep" =>
|
||||
match &arg.cell {
|
||||
Cell::Text(s) => {
|
||||
if s.len() == 1 {
|
||||
separator = s.chars().next().unwrap();
|
||||
} else {
|
||||
return Err(argument_error("Separator must be exactly one character long"))
|
||||
}
|
||||
}
|
||||
_ => return Err(argument_error("Expected a text value")),
|
||||
}
|
||||
|
||||
"trim" =>
|
||||
match &arg.cell {
|
||||
Cell::Text(s) => {
|
||||
if s.len() == 1 {
|
||||
trim = Some(s.chars().next().unwrap());
|
||||
} else {
|
||||
return Err(argument_error("Separator must be exactly one character long"))
|
||||
}
|
||||
}
|
||||
_ => return Err(argument_error("Expected a text value")),
|
||||
}
|
||||
|
||||
_ => return Err(argument_error(format!("Unknown parameter {}", name).as_str())),
|
||||
|
||||
|
||||
|
||||
}
|
||||
}
|
||||
}
|
||||
@ -95,7 +127,7 @@ fn handle(file: Box<Path>, cfg: &Config, output: &mut OutputStream) -> Result<()
|
||||
break;
|
||||
}
|
||||
let line_without_newline = &line[0..line.len() - 1];
|
||||
let split: Vec<&str> = line_without_newline
|
||||
let mut split: Vec<&str> = line_without_newline
|
||||
.split(cfg_clone.separator)
|
||||
.map(|s| cfg_clone.trim
|
||||
.map(|c| s.trim_matches(c))
|
||||
@ -105,6 +137,9 @@ fn handle(file: Box<Path>, cfg: &Config, output: &mut OutputStream) -> Result<()
|
||||
panic!("Wrong number of columns in CSV file");
|
||||
// return Err(error("Wrong number of columns in CSV file"))
|
||||
}
|
||||
if let Some(trim) = cfg_clone.trim {
|
||||
split = split.map(|s| s.trim_matches(trim));
|
||||
}
|
||||
let cells: Result<Vec<Cell>, JobError> = split.iter()
|
||||
.zip(cfg_clone.columns.iter())
|
||||
.map({ |(s, t)| t.cell_type.parse(*s) }).collect();
|
||||
|
@ -13,6 +13,8 @@ use crate::{
|
||||
use std::collections::HashMap;
|
||||
use crate::stream::Readable;
|
||||
use crate::replace::Replace;
|
||||
use crate::errors::argument_error;
|
||||
use crate::commands::command_util::find_field;
|
||||
|
||||
struct Config {
|
||||
left_table_idx: usize,
|
||||
@ -21,21 +23,62 @@ struct Config {
|
||||
right_column_idx: usize,
|
||||
}
|
||||
|
||||
fn parse(_input_type: &Vec<CellType>, arguments: &Vec<Argument>) -> Result<Config, JobError> {
|
||||
Ok(Config {
|
||||
left_table_idx: 0,
|
||||
right_table_idx: 2,
|
||||
left_column_idx: 0,
|
||||
right_column_idx: 0
|
||||
})
|
||||
pub fn guess_tables(input_type: &Vec<CellType>) -> Result<(usize, usize, &Vec<CellType>, &Vec<CellType>), JobError> {
|
||||
let tables:Vec<(usize, &Vec<CellType>)> = input_type.iter().enumerate().flat_map(|(idx, t)| {
|
||||
match &t.cell_type {
|
||||
CellDataType::Output(sub_types) => Some((idx, sub_types)),
|
||||
_ => None,
|
||||
}
|
||||
}).collect();
|
||||
if tables.len() == 2 {
|
||||
Ok((tables[0].0, tables[1].0, tables[0].1, tables[1].1))
|
||||
} else {
|
||||
Err(argument_error(format!("Could not guess tables to join, expected two tables, found {}", tables.len()).as_str()))
|
||||
}
|
||||
}
|
||||
|
||||
fn do_join(l: &mut impl Readable, r: &mut impl Readable, output: &OutputStream) {
|
||||
fn parse(input_type: &Vec<CellType>, arguments: &Vec<Argument>) -> Result<Config, JobError> {
|
||||
if (arguments.len() != 3) {
|
||||
return Err(argument_error("Expected exactly 3 aguments"));
|
||||
}
|
||||
return match (&arguments[0].cell, &arguments[1].cell, &arguments[2].cell) {
|
||||
(Cell::Field(l), Cell::Op(op), Cell::Field(r)) => {
|
||||
if op.as_str() != "==" {
|
||||
return Err(argument_error("Only == currently supported"));
|
||||
}
|
||||
match (l.matches('.').count(), r.matches('.').count()) {
|
||||
(0, 0) => {
|
||||
let (left_table_idx, right_table_idx, left_types, right_types) = guess_tables(input_type)?;
|
||||
Ok(Config {
|
||||
left_table_idx,
|
||||
right_table_idx,
|
||||
left_column_idx: find_field(&l, left_types)?,
|
||||
right_column_idx: find_field(&r, right_types)?,
|
||||
})
|
||||
}
|
||||
(1, 1) => Err(argument_error("Not implemented yet!")),
|
||||
_ => Err(argument_error("Expected both fields on the form %table.column or %column")),
|
||||
}
|
||||
}
|
||||
_ => Err(argument_error("Expected arguments like %table1.col == %table2.col")),
|
||||
};
|
||||
}
|
||||
|
||||
fn combine(mut l: Row, mut r: Row, cfg: &Config) -> Row {
|
||||
for (idx, c) in r.cells.drain(..).enumerate() {
|
||||
if idx != cfg.right_column_idx {
|
||||
l.cells.push(c);
|
||||
}
|
||||
}
|
||||
return Row {cells: l.cells}
|
||||
}
|
||||
|
||||
fn do_join(cfg: &Config, l: &mut impl Readable, r: &mut impl Readable, output: &OutputStream) {
|
||||
let mut l_data: HashMap<Cell, Row> = HashMap::new();
|
||||
loop {
|
||||
match l.read() {
|
||||
Ok(row) => {
|
||||
l_data.insert(row.cells[0].concrete(), row);
|
||||
l_data.insert(row.cells[cfg.left_column_idx].concrete(), row);
|
||||
}
|
||||
Err(_) => break,
|
||||
}
|
||||
@ -44,16 +87,11 @@ fn do_join(l: &mut impl Readable, r: &mut impl Readable, output: &OutputStream)
|
||||
loop {
|
||||
match r.read() {
|
||||
Ok(r_row) => {
|
||||
l_data.get(&r_row.cells[0].concrete()).map(|l_row| {
|
||||
output.send(Row {
|
||||
cells: vec![
|
||||
r_row.cells[0].concrete(),
|
||||
l_row.cells[1].concrete(),
|
||||
r_row.cells[1].concrete(),
|
||||
]
|
||||
});
|
||||
}
|
||||
);
|
||||
l_data
|
||||
.remove(&r_row.cells[cfg.right_column_idx])
|
||||
.map(|l_row| {
|
||||
output.send(combine( l_row, r_row, cfg));
|
||||
});
|
||||
}
|
||||
Err(_) => break,
|
||||
}
|
||||
@ -72,7 +110,7 @@ fn run(
|
||||
Ok(mut row) => {
|
||||
match (row.cells.replace(cfg.left_table_idx, Cell::Integer(0)), row.cells.replace(cfg.right_table_idx, Cell::Integer(0))) {
|
||||
(Cell::Output(mut l), Cell::Output(mut r)) => {
|
||||
do_join(&mut l.stream, &mut r.stream, &output);
|
||||
do_join(&cfg, &mut l.stream, &mut r.stream, &output);
|
||||
}
|
||||
_ => panic!("Wrong row format"),
|
||||
}
|
||||
@ -86,9 +124,9 @@ fn run(
|
||||
pub fn join(input_type: Vec<CellType>, arguments: Vec<Argument>) -> Result<Call, JobError> {
|
||||
let cfg = parse(&input_type, &arguments);
|
||||
let output_type = vec![
|
||||
CellType::named("name", CellDataType::Text ),
|
||||
CellType::named("age", CellDataType::Integer ),
|
||||
CellType::named("home", CellDataType::Text ),
|
||||
CellType::named("name", CellDataType::Text),
|
||||
CellType::named("age", CellDataType::Integer),
|
||||
CellType::named("home", CellDataType::Text),
|
||||
];
|
||||
return Ok(Call {
|
||||
name: String::from("join"),
|
||||
|
@ -37,18 +37,18 @@ pub enum CellDataType {
|
||||
}
|
||||
|
||||
impl CellDataType {
|
||||
pub fn from(s: &str) -> CellDataType {
|
||||
pub fn from(s: &str) -> Result<CellDataType, JobError> {
|
||||
match s {
|
||||
"text" => CellDataType::Text,
|
||||
"integer" => CellDataType::Integer,
|
||||
"time" => CellDataType::Time,
|
||||
"field" => CellDataType::Field,
|
||||
"glob" => CellDataType::Glob,
|
||||
"regex" => CellDataType::Regex,
|
||||
"op" => CellDataType::Op,
|
||||
"command" => CellDataType::Command,
|
||||
"file" => CellDataType::Command,
|
||||
_ => panic!(format!("Missing conversion for {} in CellDataType", s)),
|
||||
"text" => Ok(CellDataType::Text),
|
||||
"integer" => Ok(CellDataType::Integer),
|
||||
"time" => Ok(CellDataType::Time),
|
||||
"field" => Ok(CellDataType::Field),
|
||||
"glob" => Ok(CellDataType::Glob),
|
||||
"regex" => Ok(CellDataType::Regex),
|
||||
"op" => Ok(CellDataType::Op),
|
||||
"command" => Ok(CellDataType::Command),
|
||||
"file" => Ok(CellDataType::File),
|
||||
_ => Err(error(format!("Unknown cell type {}", s).as_str())),
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -58,8 +58,8 @@ lazy_static! {
|
||||
|
||||
(TokenType::Integer, Regex::new(r"^[0-9]+").unwrap()),
|
||||
|
||||
(TokenType::Variable, Regex::new(r"^\$[a-zA-Z_][a-zA-Z_0-9]*").unwrap()),
|
||||
(TokenType::Field, Regex::new("^%[a-zA-Z_][a-zA-Z_0-9]*").unwrap()),
|
||||
(TokenType::Variable, Regex::new(r"^\$[a-zA-Z_][\.a-zA-Z_0-9]*").unwrap()),
|
||||
(TokenType::Field, Regex::new(r"^%[a-zA-Z_][\.a-zA-Z_0-9]*").unwrap()),
|
||||
|
||||
(TokenType::BlockStart, Regex::new(r"^[`*]?\{").unwrap()),
|
||||
(TokenType::BlockEnd, Regex::new(r"^\}").unwrap()),
|
||||
@ -229,10 +229,10 @@ mod tests {
|
||||
|
||||
#[test]
|
||||
fn variables_and_fields() {
|
||||
let mut l = Lexer::new(&String::from("$foo %bar"));
|
||||
let mut l = Lexer::new(&String::from("$foo %bar $foo.bar %baz.qux"));
|
||||
let tt = tokens(&mut l);
|
||||
assert_eq!(tt, vec![
|
||||
TokenType::Variable, TokenType::Field, TokenType::EOF]);
|
||||
TokenType::Variable, TokenType::Field, TokenType::Variable, TokenType::Field, TokenType::EOF]);
|
||||
}
|
||||
|
||||
#[test]
|
||||
|
@ -53,6 +53,10 @@ fn parse_job(lexer: &mut Lexer, state: &State, commands: &mut Vec<Call>, depende
|
||||
return Ok(());
|
||||
}
|
||||
|
||||
fn unescape(s: &str) -> String {
|
||||
s[1..s.len()-1].to_string()
|
||||
}
|
||||
|
||||
fn parse_unnamed_argument(lexer: &mut Lexer, dependencies: &mut Vec<Job>, state: &State) -> Result<Cell, JobError> {
|
||||
let token_type = lexer.peek().0;
|
||||
match token_type {
|
||||
@ -117,6 +121,7 @@ fn parse_unnamed_argument(lexer: &mut Lexer, dependencies: &mut Vec<Job>, state:
|
||||
Err(e) => Err(argument_error(e.description())),
|
||||
}
|
||||
},
|
||||
TokenType::QuotedString => Ok(Cell::Text(unescape(lexer.pop().1))),
|
||||
|
||||
_ => {
|
||||
lexer.pop();
|
||||
|
Loading…
Reference in New Issue
Block a user