Reform types inferred by infer_schema! on SQLite (#277)

`infer_schema!` is woefully undertested. Really our tests for it at the
moment are "we use it for our test suite so the cases our suite covers
work". However, even though I defined `tinyint` == `Bool`, I wanted to
make sure that `TINYINT(1)` was treated as bool as well, as I wasn't
certain how SQLite handles limit/precision/scale.

The answer is that it doesn't, and it's way looser about allowed type
names than I had thought. The process listed at
https://www.sqlite.org/datatype3.html is a literal description, and any
possible string is a valid type.

This adds tests for every example given on that page, plus a few extras.
We create a table with as many of these fields as possible, and do a
trivial roundtrip to make sure that it *actually* infers something we
can deserialize from, and that we're not doing anything dumb.

The new logic for type inference matches pretty closely to how SQLite
handles things with a few exceptions:

- "boolean" or types containing "tiny" and "int" are treated as bool
- smallint and bigint are separated from int
- float is separated from double
- varchar is separated from text
- We do not accept random unrecognized type names as numeric

Unresolved Questions
--------------------

This actually starts to make me a bit more nervous about our semantics
with SQLite. If you're just using Diesel, everything is fine. However,
you can definitely insert values that would fail to deserialize with so
little constraints on the backend. I'm starting to wonder if we should
truly embrace SQLite's definitions and map exactly to that, allowing
only the following types:

- BigInt
- VarChar (yes, it's the ANSI text type but we treat VarChar as the
  "default" string type)
- Binary
- Double

We're omitting numeric, as there's no observable difference in SQLite
between the real affinity and the numeric affinity.

This would have several *major* implications. Aside from not being able
to use basic things like an `i32`, it would also mean that there is no
boolean type, and no dates/times/datetimes. Functions for those do exist
on the SQLite side though, so some of the interactions might get super
janky.

That said, Diesel's goal is not to abstract away the backend. These are
the semantics of the backend chosen, and maybe we should go whole hog
and embrace them.

I'm still unsure. In the meantime, with our current semantics, this
should improve the reliability of `infer_schema!`
This commit is contained in:
Sean Griffin 2016-04-17 14:42:11 -06:00
parent 4b0a975ed7
commit ccad1510c1
6 changed files with 247 additions and 21 deletions

View File

@ -3,6 +3,12 @@ All user visible changes to this project will be documented in this file.
This project adheres to [Semantic Versioning](http://semver.org/), as described
for Rust libraries in [RFC #1105](https://github.com/rust-lang/rfcs/blob/master/text/1105-api-evolution.md)
## Unreleased
### Changed
* `infer_schema!` on SQLite now accepts a larger range of type names
## [0.6.1] 2016-04-14
### Added

View File

@ -26,29 +26,29 @@ pub fn get_table_data(conn: &SqliteConnection, table_name: &str)
sql::<pragma_table_info::SqlType>(&query).load(conn)
}
fn is_text(type_name: &str) -> bool {
type_name.contains("clob") ||
type_name.contains("text")
}
pub fn determine_column_type(cx: &mut ExtCtxt, attr: &ColumnInformation) -> P<ast::Ty> {
let type_name = attr.type_name.to_lowercase();
let tpe = match &*type_name {
"tinyint" => quote_ty!(cx, ::diesel::types::Bool),
"smallint" | "int2" => quote_ty!(cx, ::diesel::types::SmallInt),
"int" | "integer" | "int4" => quote_ty!(cx, ::diesel::types::Integer),
"bigint" => quote_ty!(cx, ::diesel::types::BigInt),
_ if type_name.contains("char") => quote_ty!(cx, ::diesel::types::VarChar),
_ if is_text(&type_name) => quote_ty!(cx, ::diesel::types::Text),
_ if type_name.contains("blob") || type_name.is_empty() => {
quote_ty!(cx, ::diesel::types::Binary)
}
"float" => quote_ty!(cx, ::diesel::types::Float),
"double" | "real" | "double precision" => quote_ty!(cx, ::diesel::types::Double),
_ => {
cx.span_err(cx.original_span(), &format!("Unsupported type: {}", type_name));
quote_ty!(cx, ())
}
let tpe = if is_bool(&type_name) {
quote_ty!(cx, ::diesel::types::Bool)
} else if is_smallint(&type_name) {
quote_ty!(cx, ::diesel::types::SmallInt)
} else if is_bigint(&type_name) {
quote_ty!(cx, ::diesel::types::BigInt)
} else if type_name.contains("int") {
quote_ty!(cx, ::diesel::types::Integer)
} else if type_name.contains("char") {
quote_ty!(cx, ::diesel::types::VarChar)
} else if is_text(&type_name) {
quote_ty!(cx, ::diesel::types::Text)
} else if type_name.contains("blob") || type_name.is_empty() {
quote_ty!(cx, ::diesel::types::Binary)
} else if is_float(&type_name) {
quote_ty!(cx, ::diesel::types::Float)
} else if is_double(&type_name) {
quote_ty!(cx, ::diesel::types::Double)
} else {
cx.span_err(cx.original_span(), &format!("Unsupported type: {}", type_name));
quote_ty!(cx, ())
};
if attr.nullable {
@ -58,6 +58,40 @@ pub fn determine_column_type(cx: &mut ExtCtxt, attr: &ColumnInformation) -> P<as
}
}
fn is_text(type_name: &str) -> bool {
type_name.contains("clob") ||
type_name.contains("text")
}
fn is_bool(type_name: &str) -> bool {
type_name == "boolean" ||
type_name.contains("tiny") &&
type_name.contains("int")
}
fn is_smallint(type_name: &str) -> bool {
type_name == "int2" ||
type_name.contains("small") &&
type_name.contains("int")
}
fn is_bigint(type_name: &str) -> bool {
type_name == "int8" ||
type_name.contains("big") &&
type_name.contains("int")
}
fn is_float(type_name: &str) -> bool {
type_name.contains("float") ||
type_name.contains("real")
}
fn is_double(type_name: &str) -> bool {
type_name.contains("double") ||
type_name.contains("num") ||
type_name.contains("dec")
}
table! {
sqlite_master (name) {
name -> VarChar,

View File

@ -3,4 +3,5 @@ mod annotations;
mod deserialization;
mod insert;
mod schema;
mod schema_inference;
mod update;

View File

@ -0,0 +1,135 @@
#[cfg(feature = "sqlite")]
mod sqlite {
use diesel::*;
use schema::*;
#[derive(Queryable, PartialEq, Debug)]
#[insertable_into(infer_all_the_ints)]
struct InferredInts {
col1: i32,
col2: i32,
col3: i32,
col4: i32,
col5: i16,
col6: i16,
col7: i16,
col8: i64,
col9: i64,
col10: i64,
col11: i16,
col12: i32,
col13: i64,
}
#[test]
fn integers_infer_to_semantically_correct_types() {
let conn = connection();
let inferred_ints = InferredInts {
col1: 1,
col2: 2,
col3: 3,
col4: 4,
col5: 5,
col6: 6,
col7: 7,
col8: 8,
col9: 9,
col10: 10,
col11: 11,
col12: 12,
col13: 13,
};
insert(&inferred_ints).into(infer_all_the_ints::table)
.execute(&conn).unwrap();
assert_eq!(Ok(vec![inferred_ints]), infer_all_the_ints::table.load(&conn));
}
#[derive(Queryable, PartialEq, Debug)]
#[insertable_into(infer_all_the_bools)]
struct InferredBools {
col1: bool,
col2: bool,
col3: bool,
col4: bool,
}
#[test]
fn bool_types_infer_to_bool() {
let conn = connection();
let inferred_bools = InferredBools {
col1: true,
col2: true,
col3: false,
col4: false,
};
insert(&inferred_bools).into(infer_all_the_bools::table)
.execute(&conn).unwrap();
assert_eq!(Ok(vec![inferred_bools]), infer_all_the_bools::table.load(&conn));
}
#[derive(Queryable, PartialEq, Debug)]
#[insertable_into(infer_all_the_strings)]
struct InferredStrings {
col1: String,
col2: String,
col3: String,
col4: String,
col5: String,
col6: String,
col7: String,
col8: String,
col9: Vec<u8>,
col10: Vec<u8>
}
#[test]
fn strings_infer_to_semantically_correct_types() {
let conn = connection();
let inferred_strings = InferredStrings {
col1: "Hello".into(),
col2: "Hello".into(),
col3: "Hello".into(),
col4: "Hello".into(),
col5: "Hello".into(),
col6: "Hello".into(),
col7: "Hello".into(),
col8: "Hello".into(),
col9: vec![1, 2, 3],
col10: vec![1, 2, 3],
};
insert(&inferred_strings).into(infer_all_the_strings::table)
.execute(&conn).unwrap();
assert_eq!(Ok(vec![inferred_strings]), infer_all_the_strings::table.load(&conn));
}
#[derive(Queryable, PartialEq, Debug)]
#[insertable_into(infer_all_the_floats)]
struct InferredFloats {
col1: f32,
col2: f32,
col3: f64,
col4: f64,
col5: f64,
col6: f64,
}
#[test]
fn floats_infer_to_semantically_correct_types() {
let conn = connection();
let inferred_floats = InferredFloats {
col1: 1.0,
col2: 2.0,
col3: 3.0,
col4: 4.0,
col5: 5.0,
col6: 6.0,
};
insert(&inferred_floats).into(infer_all_the_floats::table)
.execute(&conn).unwrap();
assert_eq!(Ok(vec![inferred_floats]), infer_all_the_floats::table.load(&conn));
}
}

View File

@ -0,0 +1,4 @@
DROP TABLE infer_all_the_ints;
DROP TABLE infer_all_the_bools;
DROP TABLE infer_all_the_strings;
DROP TABLE infer_all_the_floats;

View File

@ -0,0 +1,46 @@
-- Semi-exhaustive checking of many possible invocations of supported types
-- listed at https://www.sqlite.org/datatype3.html to ensure it compiles
CREATE TABLE infer_all_the_ints (
col1 INTEGER PRIMARY KEY NOT NULL,
col2 INT NOT NULL,
col3 INTEGER NOT NULL,
col4 LOL_WHAT_EVEN_IS_THIS_TYPE_CAN_I_HAVE_A_HINT NOT NULL,
col5 SMALLINT NOT NULL,
col6 SMALLINT(2) NOT NULL,
col7 SMALL INT NOT NULL,
col8 BIGINT NOT NULL,
col9 BIGINT(4) NOT NULL,
col10 BIG INT NOT NULL,
col11 INT2 NOT NULL,
col12 INT4 NOT NULL,
col13 INT8 NOT NULL
);
CREATE TABLE infer_all_the_bools (
col1 TINYINT(1) PRIMARY KEY NOT NULL,
col2 TINYINT NOT NULL,
col3 TINY INT NOT NULL,
col4 BOOLEAN NOT NULL
);
CREATE TABLE infer_all_the_strings (
col1 CHARACTER(20) PRIMARY KEY NOT NULL,
col2 VARCHAR(255) NOT NULL,
col3 VARYING CHARACTER(255) NOT NULL,
col4 NCHAR(55) NOT NULL,
col5 NATIVE CHARACTER(70) NOT NULL,
col6 NVARCHAR(100) NOT NULL,
col7 TEXT NOT NULL,
col8 CLOB NOT NULL,
col9 BLOB NOT NULL,
col10 NOT NULL
);
CREATE TABLE infer_all_the_floats (
col1 REAL PRIMARY KEY NOT NULL,
col2 FLOAT NOT NULL,
col3 DOUBLE NOT NULL,
col4 DOUBLE PRECISION NOT NULL,
col5 NUMERIC NOT NULL,
col6 DECIMAL(10, 5) NOT NULL
)