mirror of
https://github.com/enso-org/enso.git
synced 2024-12-27 17:22:11 +03:00
Simple CSV parser (#1268)
This commit is contained in:
parent
ef97d1dbbf
commit
150771c0e2
11
.github/workflows/scala.yml
vendored
11
.github/workflows/scala.yml
vendored
@ -268,6 +268,17 @@ jobs:
|
||||
run: |
|
||||
$ENGINE_DIST_DIR/bin/enso.bat --run test/Tests
|
||||
|
||||
- name: Test Tables Library (Unix)
|
||||
shell: bash
|
||||
if: runner.os != 'Windows'
|
||||
run: |
|
||||
$ENGINE_DIST_DIR/bin/enso --run test/Table_Tests
|
||||
- name: Test Tables Library (Windows)
|
||||
shell: bash
|
||||
if: runner.os == 'Windows'
|
||||
run: |
|
||||
$ENGINE_DIST_DIR/bin/enso.bat --run test/Table_Tests
|
||||
|
||||
# Publish
|
||||
- name: Publish the Engine Distribution Artifact
|
||||
uses: actions/upload-artifact@v2
|
||||
|
29
build.sbt
29
build.sbt
@ -59,6 +59,11 @@ GatherLicenses.distributions := Seq(
|
||||
"std-lib-Base",
|
||||
file("distribution/std-lib/Base/THIRD-PARTY"),
|
||||
Distribution.sbtProjects(`std-bits`)
|
||||
),
|
||||
Distribution(
|
||||
"std-lib-Table",
|
||||
file("distribution/std-lib/Table/THIRD-PARTY"),
|
||||
Distribution.sbtProjects(`table`)
|
||||
)
|
||||
)
|
||||
GatherLicenses.licenseConfigurations := Set("compile")
|
||||
@ -996,6 +1001,7 @@ lazy val runtime = (project in file("engine/runtime"))
|
||||
.settings(
|
||||
(Runtime / compile) := (Runtime / compile)
|
||||
.dependsOn(`std-bits` / Compile / packageBin)
|
||||
.dependsOn(table / Compile / packageBin)
|
||||
.value
|
||||
)
|
||||
.settings(
|
||||
@ -1214,6 +1220,7 @@ lazy val `runtime-version-manager-test` = project
|
||||
|
||||
val `std-lib-root` = file("distribution/std-lib/")
|
||||
val `std-lib-polyglot-root` = `std-lib-root` / "Base" / "polyglot" / "java"
|
||||
val `table-polyglot-root` = `std-lib-root` / "Table" / "polyglot" / "java"
|
||||
|
||||
lazy val `std-bits` = project
|
||||
.in(file("std-bits"))
|
||||
@ -1237,6 +1244,28 @@ lazy val `std-bits` = project
|
||||
}.value
|
||||
)
|
||||
|
||||
lazy val `table` = project
|
||||
.in(file("table"))
|
||||
.settings(
|
||||
autoScalaLibrary := false,
|
||||
Compile / packageBin / artifactPath :=
|
||||
`table-polyglot-root` / "table.jar",
|
||||
libraryDependencies ++= Seq(
|
||||
"com.univocity" % "univocity-parsers" % "2.9.0"
|
||||
),
|
||||
Compile / packageBin := Def.task {
|
||||
val result = (Compile / packageBin).value
|
||||
StdBits
|
||||
.copyDependencies(
|
||||
`table-polyglot-root`,
|
||||
"table.jar",
|
||||
ignoreScalaLibrary = true
|
||||
)
|
||||
.value
|
||||
result
|
||||
}.value
|
||||
)
|
||||
|
||||
/* Note [HTTPS in the Launcher]
|
||||
* ~~~~~~~~~~~~~~~~~~~~~~~~~~~~
|
||||
* The launcher uses Apache HttpClient for making web requests. It does not use
|
||||
|
@ -117,3 +117,25 @@ Any.to_json =
|
||||
instead define their own `to_json` implementations.
|
||||
Meta.Polyglot _ -> Null
|
||||
Meta.Primitive _ -> Null
|
||||
|
||||
## Method used by object builders to convert a value into a valid JSON key.
|
||||
Text.to_json_key : Text
|
||||
Text.to_json_key = this
|
||||
|
||||
## A smart constructor, building an object representation based on a vector
|
||||
of key-value pairs.
|
||||
|
||||
All values used as keys must define a `to_json_key : Text` method.
|
||||
|
||||
> Example
|
||||
The following code:
|
||||
Json.from_pairs [["foo", 533], ["bar", False]]
|
||||
Returns a JSON object, that after serialization becomes:
|
||||
{ "foo": 533, "bar": false }
|
||||
from_pairs : Vector -> Object
|
||||
from_pairs contents =
|
||||
fs = contents.fold Map.empty map-> kv_pair->
|
||||
key = kv_pair . at 0 . to_json_key
|
||||
val = kv_pair . at 1 . to_json
|
||||
map.insert key val
|
||||
Object fs
|
||||
|
@ -77,3 +77,6 @@ type Map
|
||||
empty : Map
|
||||
empty = Tip
|
||||
|
||||
## Returns a single-element map with the given key and value present.
|
||||
singleton : Any -> Any -> Map
|
||||
singleton key value = Bin 1 key value Tip Tip
|
||||
|
@ -143,3 +143,9 @@ Text.contains sequence = Text_Utils.contains [this, sequence]
|
||||
## Text to JSON conversion.
|
||||
Text.to_json : Json.String
|
||||
Text.to_json = Json.String this
|
||||
|
||||
## Takes a non-negative integer and returns a new text, consisting of `count`
|
||||
concatenated copies of `this`.
|
||||
Text.repeat : Integer -> Text
|
||||
Text.repeat count =
|
||||
0.upto count . fold "" acc-> _-> acc + this
|
||||
|
@ -6,6 +6,7 @@ export Base.System.File.Option
|
||||
polyglot java import java.nio.file.NoSuchFileException
|
||||
polyglot java import java.nio.file.AccessDeniedException
|
||||
polyglot java import java.io.IOException
|
||||
polyglot java import java.io.InputStream as Java_Input_Stream
|
||||
|
||||
type File_Error
|
||||
type No_Such_File_Error file
|
||||
@ -106,6 +107,13 @@ type Input_Stream
|
||||
close : Unit
|
||||
close = Managed_Resource.finalize this.stream_resource
|
||||
|
||||
## Exposes operations on the underlying Java input stream.
|
||||
|
||||
Useful when integrating with polyglot functions requiring an
|
||||
`InputStream` as an argument.
|
||||
with_java_stream : (Java_Input_Stream -> Any) -> Any
|
||||
with_java_stream f = Managed_Resource.with this.stream_resource f
|
||||
|
||||
type File
|
||||
type File prim_file
|
||||
|
||||
|
7
distribution/std-lib/Table/THIRD-PARTY/NOTICE
Normal file
7
distribution/std-lib/Table/THIRD-PARTY/NOTICE
Normal file
@ -0,0 +1,7 @@
|
||||
Enso
|
||||
Copyright 2020 New Byte Order sp. z o. o.
|
||||
|
||||
'univocity-parsers', licensed under the Apache 2, is distributed with the std-lib-Table.
|
||||
The license file can be found at `licenses/APACHE2.0`.
|
||||
Copyright notices related to this dependency can be found in the directory `com.univocity.univocity-parsers-2.9.0`.
|
||||
|
@ -0,0 +1,15 @@
|
||||
Copyright 2017 Univocity Software Pty Ltd
|
||||
|
||||
Copyright 2016 Univocity Software Pty Ltd
|
||||
|
||||
Copyright 2018 Univocity Software Pty Ltd
|
||||
|
||||
Copyright (c) 2018. Univocity Software Pty Ltd
|
||||
|
||||
Copyright 2019 Univocity Software Pty Ltd
|
||||
|
||||
Copyright 2014 Univocity Software Pty Ltd
|
||||
|
||||
Copyright 2015 Univocity Software Pty Ltd
|
||||
|
||||
Copyright (c) 2015. Univocity Software Pty Ltd
|
201
distribution/std-lib/Table/THIRD-PARTY/licenses/APACHE2.0
Normal file
201
distribution/std-lib/Table/THIRD-PARTY/licenses/APACHE2.0
Normal file
@ -0,0 +1,201 @@
|
||||
Apache License
|
||||
Version 2.0, January 2004
|
||||
http://www.apache.org/licenses/
|
||||
|
||||
TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
|
||||
|
||||
1. Definitions.
|
||||
|
||||
"License" shall mean the terms and conditions for use, reproduction,
|
||||
and distribution as defined by Sections 1 through 9 of this document.
|
||||
|
||||
"Licensor" shall mean the copyright owner or entity authorized by
|
||||
the copyright owner that is granting the License.
|
||||
|
||||
"Legal Entity" shall mean the union of the acting entity and all
|
||||
other entities that control, are controlled by, or are under common
|
||||
control with that entity. For the purposes of this definition,
|
||||
"control" means (i) the power, direct or indirect, to cause the
|
||||
direction or management of such entity, whether by contract or
|
||||
otherwise, or (ii) ownership of fifty percent (50%) or more of the
|
||||
outstanding shares, or (iii) beneficial ownership of such entity.
|
||||
|
||||
"You" (or "Your") shall mean an individual or Legal Entity
|
||||
exercising permissions granted by this License.
|
||||
|
||||
"Source" form shall mean the preferred form for making modifications,
|
||||
including but not limited to software source code, documentation
|
||||
source, and configuration files.
|
||||
|
||||
"Object" form shall mean any form resulting from mechanical
|
||||
transformation or translation of a Source form, including but
|
||||
not limited to compiled object code, generated documentation,
|
||||
and conversions to other media types.
|
||||
|
||||
"Work" shall mean the work of authorship, whether in Source or
|
||||
Object form, made available under the License, as indicated by a
|
||||
copyright notice that is included in or attached to the work
|
||||
(an example is provided in the Appendix below).
|
||||
|
||||
"Derivative Works" shall mean any work, whether in Source or Object
|
||||
form, that is based on (or derived from) the Work and for which the
|
||||
editorial revisions, annotations, elaborations, or other modifications
|
||||
represent, as a whole, an original work of authorship. For the purposes
|
||||
of this License, Derivative Works shall not include works that remain
|
||||
separable from, or merely link (or bind by name) to the interfaces of,
|
||||
the Work and Derivative Works thereof.
|
||||
|
||||
"Contribution" shall mean any work of authorship, including
|
||||
the original version of the Work and any modifications or additions
|
||||
to that Work or Derivative Works thereof, that is intentionally
|
||||
submitted to Licensor for inclusion in the Work by the copyright owner
|
||||
or by an individual or Legal Entity authorized to submit on behalf of
|
||||
the copyright owner. For the purposes of this definition, "submitted"
|
||||
means any form of electronic, verbal, or written communication sent
|
||||
to the Licensor or its representatives, including but not limited to
|
||||
communication on electronic mailing lists, source code control systems,
|
||||
and issue tracking systems that are managed by, or on behalf of, the
|
||||
Licensor for the purpose of discussing and improving the Work, but
|
||||
excluding communication that is conspicuously marked or otherwise
|
||||
designated in writing by the copyright owner as "Not a Contribution."
|
||||
|
||||
"Contributor" shall mean Licensor and any individual or Legal Entity
|
||||
on behalf of whom a Contribution has been received by Licensor and
|
||||
subsequently incorporated within the Work.
|
||||
|
||||
2. Grant of Copyright License. Subject to the terms and conditions of
|
||||
this License, each Contributor hereby grants to You a perpetual,
|
||||
worldwide, non-exclusive, no-charge, royalty-free, irrevocable
|
||||
copyright license to reproduce, prepare Derivative Works of,
|
||||
publicly display, publicly perform, sublicense, and distribute the
|
||||
Work and such Derivative Works in Source or Object form.
|
||||
|
||||
3. Grant of Patent License. Subject to the terms and conditions of
|
||||
this License, each Contributor hereby grants to You a perpetual,
|
||||
worldwide, non-exclusive, no-charge, royalty-free, irrevocable
|
||||
(except as stated in this section) patent license to make, have made,
|
||||
use, offer to sell, sell, import, and otherwise transfer the Work,
|
||||
where such license applies only to those patent claims licensable
|
||||
by such Contributor that are necessarily infringed by their
|
||||
Contribution(s) alone or by combination of their Contribution(s)
|
||||
with the Work to which such Contribution(s) was submitted. If You
|
||||
institute patent litigation against any entity (including a
|
||||
cross-claim or counterclaim in a lawsuit) alleging that the Work
|
||||
or a Contribution incorporated within the Work constitutes direct
|
||||
or contributory patent infringement, then any patent licenses
|
||||
granted to You under this License for that Work shall terminate
|
||||
as of the date such litigation is filed.
|
||||
|
||||
4. Redistribution. You may reproduce and distribute copies of the
|
||||
Work or Derivative Works thereof in any medium, with or without
|
||||
modifications, and in Source or Object form, provided that You
|
||||
meet the following conditions:
|
||||
|
||||
(a) You must give any other recipients of the Work or
|
||||
Derivative Works a copy of this License; and
|
||||
|
||||
(b) You must cause any modified files to carry prominent notices
|
||||
stating that You changed the files; and
|
||||
|
||||
(c) You must retain, in the Source form of any Derivative Works
|
||||
that You distribute, all copyright, patent, trademark, and
|
||||
attribution notices from the Source form of the Work,
|
||||
excluding those notices that do not pertain to any part of
|
||||
the Derivative Works; and
|
||||
|
||||
(d) If the Work includes a "NOTICE" text file as part of its
|
||||
distribution, then any Derivative Works that You distribute must
|
||||
include a readable copy of the attribution notices contained
|
||||
within such NOTICE file, excluding those notices that do not
|
||||
pertain to any part of the Derivative Works, in at least one
|
||||
of the following places: within a NOTICE text file distributed
|
||||
as part of the Derivative Works; within the Source form or
|
||||
documentation, if provided along with the Derivative Works; or,
|
||||
within a display generated by the Derivative Works, if and
|
||||
wherever such third-party notices normally appear. The contents
|
||||
of the NOTICE file are for informational purposes only and
|
||||
do not modify the License. You may add Your own attribution
|
||||
notices within Derivative Works that You distribute, alongside
|
||||
or as an addendum to the NOTICE text from the Work, provided
|
||||
that such additional attribution notices cannot be construed
|
||||
as modifying the License.
|
||||
|
||||
You may add Your own copyright statement to Your modifications and
|
||||
may provide additional or different license terms and conditions
|
||||
for use, reproduction, or distribution of Your modifications, or
|
||||
for any such Derivative Works as a whole, provided Your use,
|
||||
reproduction, and distribution of the Work otherwise complies with
|
||||
the conditions stated in this License.
|
||||
|
||||
5. Submission of Contributions. Unless You explicitly state otherwise,
|
||||
any Contribution intentionally submitted for inclusion in the Work
|
||||
by You to the Licensor shall be under the terms and conditions of
|
||||
this License, without any additional terms or conditions.
|
||||
Notwithstanding the above, nothing herein shall supersede or modify
|
||||
the terms of any separate license agreement you may have executed
|
||||
with Licensor regarding such Contributions.
|
||||
|
||||
6. Trademarks. This License does not grant permission to use the trade
|
||||
names, trademarks, service marks, or product names of the Licensor,
|
||||
except as required for reasonable and customary use in describing the
|
||||
origin of the Work and reproducing the content of the NOTICE file.
|
||||
|
||||
7. Disclaimer of Warranty. Unless required by applicable law or
|
||||
agreed to in writing, Licensor provides the Work (and each
|
||||
Contributor provides its Contributions) on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
|
||||
implied, including, without limitation, any warranties or conditions
|
||||
of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
|
||||
PARTICULAR PURPOSE. You are solely responsible for determining the
|
||||
appropriateness of using or redistributing the Work and assume any
|
||||
risks associated with Your exercise of permissions under this License.
|
||||
|
||||
8. Limitation of Liability. In no event and under no legal theory,
|
||||
whether in tort (including negligence), contract, or otherwise,
|
||||
unless required by applicable law (such as deliberate and grossly
|
||||
negligent acts) or agreed to in writing, shall any Contributor be
|
||||
liable to You for damages, including any direct, indirect, special,
|
||||
incidental, or consequential damages of any character arising as a
|
||||
result of this License or out of the use or inability to use the
|
||||
Work (including but not limited to damages for loss of goodwill,
|
||||
work stoppage, computer failure or malfunction, or any and all
|
||||
other commercial damages or losses), even if such Contributor
|
||||
has been advised of the possibility of such damages.
|
||||
|
||||
9. Accepting Warranty or Additional Liability. While redistributing
|
||||
the Work or Derivative Works thereof, You may choose to offer,
|
||||
and charge a fee for, acceptance of support, warranty, indemnity,
|
||||
or other liability obligations and/or rights consistent with this
|
||||
License. However, in accepting such obligations, You may act only
|
||||
on Your own behalf and on Your sole responsibility, not on behalf
|
||||
of any other Contributor, and only if You agree to indemnify,
|
||||
defend, and hold each Contributor harmless for any liability
|
||||
incurred by, or claims asserted against, such Contributor by reason
|
||||
of your accepting any such warranty or additional liability.
|
||||
|
||||
END OF TERMS AND CONDITIONS
|
||||
|
||||
APPENDIX: How to apply the Apache License to your work.
|
||||
|
||||
To apply the Apache License to your work, attach the following
|
||||
boilerplate notice, with the fields enclosed by brackets "[]"
|
||||
replaced with your own identifying information. (Don't include
|
||||
the brackets!) The text should be enclosed in the appropriate
|
||||
comment syntax for the file format. We also recommend that a
|
||||
file or class name and description of purpose be included on the
|
||||
same "printed page" as the copyright notice for easier
|
||||
identification within third-party archives.
|
||||
|
||||
Copyright [yyyy] [name of copyright owner]
|
||||
|
||||
Licensed under the Apache License, Version 2.0 (the "License");
|
||||
you may not use this file except in compliance with the License.
|
||||
You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
6
distribution/std-lib/Table/package.yaml
Normal file
6
distribution/std-lib/Table/package.yaml
Normal file
@ -0,0 +1,6 @@
|
||||
license: APLv2
|
||||
name: Table
|
||||
enso-version: default
|
||||
version: "0.0.1"
|
||||
author: "Enso Team <contact@enso.org>"
|
||||
maintainer: "Enso Team <contact@enso.org>"
|
21
distribution/std-lib/Table/src/Io/Csv.enso
Normal file
21
distribution/std-lib/Table/src/Io/Csv.enso
Normal file
@ -0,0 +1,21 @@
|
||||
from Base import all
|
||||
import Table.Table
|
||||
|
||||
polyglot java import org.enso.table.format.csv.Parser
|
||||
|
||||
## Reads the contents of `this` and parses them as a CSV dataframe.
|
||||
|
||||
Arguments
|
||||
- has_header: Specifies whether the first line of the file should be
|
||||
interpreted as a header, containing storage names. If set to `False`,
|
||||
storage names will be automatically generated.
|
||||
- prefix: text that should be prepended to automatically generated storage
|
||||
names. For example, if `prefix` is set to `X`, the columns will be named
|
||||
`X0`, `X1`, etc. This argument has no effect if the storage name is
|
||||
inferred from the CSV header row or set manually.
|
||||
File.File.read_csv : Boolean -> Text -> Table
|
||||
File.File.read_csv has_header=True prefix='C' =
|
||||
parser_inst = Parser.create [has_header, prefix]
|
||||
this.with_input_stream [File.Option.Read] stream->
|
||||
stream.with_java_stream java_stream->
|
||||
Table.Table (parser_inst.parse [java_stream])
|
5
distribution/std-lib/Table/src/Main.enso
Normal file
5
distribution/std-lib/Table/src/Main.enso
Normal file
@ -0,0 +1,5 @@
|
||||
from Base import all
|
||||
|
||||
import Table.Io.Csv
|
||||
|
||||
from Table.Io.Csv export all hiding Parser
|
95
distribution/std-lib/Table/src/Table.enso
Normal file
95
distribution/std-lib/Table/src/Table.enso
Normal file
@ -0,0 +1,95 @@
|
||||
from Base import all
|
||||
import Table.Io.Csv
|
||||
|
||||
## Represents a column-oriented table data structure.
|
||||
type Table
|
||||
type Table java_table
|
||||
|
||||
## Returns a text containing an ASCII-art table displaying this data.
|
||||
|
||||
Arguments:
|
||||
- show_rows: the number of initial rows that should be displayed.
|
||||
display : Integer -> Text
|
||||
display show_rows=10 =
|
||||
cols = Vector (this.java_table.getColumns [])
|
||||
col_names = cols.map (_.getName [])
|
||||
col_vals = cols.map (_.getStorage [])
|
||||
num_rows = this.java_table.nrows []
|
||||
display_rows = min num_rows show_rows
|
||||
rows = Vector.new display_rows row_num->
|
||||
col_vals.map col->
|
||||
if col.isNa [row_num] then "NA" else here.get_item_string col row_num
|
||||
table = here.print_table col_names rows
|
||||
if num_rows - display_rows <= 0 then table else
|
||||
missing = '\n\u2026 and ' + (num_rows - display_rows).to_text + ' hidden rows.'
|
||||
table + missing
|
||||
|
||||
## Converts this table to a JSON structure.
|
||||
to_json : Json
|
||||
to_json =
|
||||
col_jsons = Vector (this.java_table.getColumns []) . map here.column_to_json
|
||||
cols_json = Json.Array col_jsons
|
||||
fields = Map.singleton "columns" cols_json
|
||||
Json.Object fields
|
||||
|
||||
## PRIVATE
|
||||
|
||||
Keep this in sync with `org.enso.table.data.Storage.Type.LONG`
|
||||
storage_type_long = 1
|
||||
|
||||
## PRIVATE
|
||||
|
||||
Keep this in sync with `org.enso.table.data.Storage.Type.DOUBLE`
|
||||
storage_type_double = 2
|
||||
|
||||
## PRIVATE
|
||||
|
||||
Keep this in sync with `org.enso.table.data.Storage.Type.STRING`
|
||||
storage_type_string = 3
|
||||
|
||||
## PRIVATE
|
||||
string_storage_to_json storage =
|
||||
Vector.new (storage.size []) ix->
|
||||
if storage.isNa [ix] then Json.Null else
|
||||
Json.String (storage.getItem [ix])
|
||||
|
||||
## PRIVATE
|
||||
numeric_storage_to_json storage =
|
||||
Vector.new (storage.size []) ix->
|
||||
if storage.isNa [ix] then Json.Null else
|
||||
Json.Number (storage.getItem [ix])
|
||||
|
||||
## PRIVATE
|
||||
column_to_json col =
|
||||
name = col.getName []
|
||||
storage = col.getStorage []
|
||||
storage_type = storage.getType []
|
||||
storage_jsons = if storage_type == Storage_Type_String then here.string_storage_to_json storage else
|
||||
here.numeric_storage_to_json storage
|
||||
fields = Map.singleton "name" (Json.String name) . insert "data" (Json.Array storage_jsons)
|
||||
Json.Object fields
|
||||
|
||||
|
||||
## PRIVATE
|
||||
get_item_string column ix =
|
||||
tp = column.getType []
|
||||
if tp == Storage_Type_String then column.getItem [ix] else
|
||||
column.getItem [ix] . to_text
|
||||
|
||||
## PRIVATE
|
||||
pad txt len =
|
||||
true_len = txt.characters.length
|
||||
txt + (" ".repeat (len - true_len))
|
||||
|
||||
## PRIVATE
|
||||
print_table header rows =
|
||||
content_lengths = Vector.new header.length i->
|
||||
max_row = 0.upto rows.length . fold 0 a-> j-> max a (rows.at j . at i . characters . length)
|
||||
max max_row (header.at i . characters . length)
|
||||
header_line = zip header content_lengths here.pad . join ' | '
|
||||
divider = content_lengths . map (l -> "-".repeat l+2) . join '+'
|
||||
row_lines = rows.map r->
|
||||
x = zip r content_lengths here.pad . join ' | '
|
||||
" " + x
|
||||
([" " + header_line, divider] + row_lines).join '\n'
|
||||
|
@ -0,0 +1,48 @@
|
||||
package org.enso.table.data.column;
|
||||
|
||||
import java.util.BitSet;
|
||||
|
||||
/** A column containing floating point numbers. */
|
||||
public class DoubleStorage extends Storage {
|
||||
private final long[] data;
|
||||
private final BitSet isMissing;
|
||||
private final int size;
|
||||
|
||||
/**
|
||||
* @param data the underlying data
|
||||
* @param size the number of items stored
|
||||
* @param isMissing a bit set denoting at index {@code i} whether or not the value at index {@code
|
||||
* i} is missing.
|
||||
*/
|
||||
public DoubleStorage(long[] data, int size, BitSet isMissing) {
|
||||
this.data = data;
|
||||
this.isMissing = isMissing;
|
||||
this.size = size;
|
||||
}
|
||||
|
||||
/** @inheritDoc */
|
||||
@Override
|
||||
public long size() {
|
||||
return size;
|
||||
}
|
||||
|
||||
/**
|
||||
* @param idx an index
|
||||
* @return the data item contained at the given index.
|
||||
*/
|
||||
public double getItem(long idx) {
|
||||
return Double.longBitsToDouble(data[(int) idx]);
|
||||
}
|
||||
|
||||
/** @inheritDoc */
|
||||
@Override
|
||||
public long getType() {
|
||||
return Type.DOUBLE;
|
||||
}
|
||||
|
||||
/** @inheritDoc */
|
||||
@Override
|
||||
public boolean isNa(long idx) {
|
||||
return isMissing.get((int) idx);
|
||||
}
|
||||
}
|
@ -0,0 +1,48 @@
|
||||
package org.enso.table.data.column;
|
||||
|
||||
import java.util.BitSet;
|
||||
|
||||
/** A column storing 64-bit integers. */
|
||||
public class LongStorage extends Storage {
|
||||
private final long[] data;
|
||||
private final BitSet isMissing;
|
||||
private final int size;
|
||||
|
||||
/**
|
||||
* @param data the underlying data
|
||||
* @param size the number of items stored
|
||||
* @param isMissing a bit set denoting at index {@code i} whether or not the value at index {@code
|
||||
* i} is missing.
|
||||
*/
|
||||
public LongStorage(long[] data, int size, BitSet isMissing) {
|
||||
this.data = data;
|
||||
this.isMissing = isMissing;
|
||||
this.size = size;
|
||||
}
|
||||
|
||||
/** @inheritDoc */
|
||||
@Override
|
||||
public long size() {
|
||||
return size;
|
||||
}
|
||||
|
||||
/**
|
||||
* @param idx an index
|
||||
* @return the data item contained at the given index.
|
||||
*/
|
||||
public long getItem(long idx) {
|
||||
return data[(int) idx];
|
||||
}
|
||||
|
||||
/** @inheritDoc */
|
||||
@Override
|
||||
public long getType() {
|
||||
return Type.LONG;
|
||||
}
|
||||
|
||||
/** @inheritDoc */
|
||||
@Override
|
||||
public boolean isNa(long idx) {
|
||||
return isMissing.get((int) idx);
|
||||
}
|
||||
}
|
31
table/src/main/java/org/enso/table/data/column/Storage.java
Normal file
31
table/src/main/java/org/enso/table/data/column/Storage.java
Normal file
@ -0,0 +1,31 @@
|
||||
package org.enso.table.data.column;
|
||||
|
||||
/** An abstract representation of a data column. */
|
||||
public abstract class Storage {
|
||||
/** @return the number of elements in this column (including NAs) */
|
||||
public abstract long size();
|
||||
|
||||
/** @return the type tag of this column's storage. Must be one of {@link Type} */
|
||||
public abstract long getType();
|
||||
|
||||
/**
|
||||
* Checks whether the value at {@code idx} is missing.
|
||||
*
|
||||
* @param idx the index to check.
|
||||
* @return whether or not the value is missing.
|
||||
*/
|
||||
public abstract boolean isNa(long idx);
|
||||
|
||||
/**
|
||||
* Enumerating possible storage types.
|
||||
*
|
||||
* <p>Keep in sync with variables in {@code Table.Table}. These variables are copied between Enso
|
||||
* and Java code, in order to make them trivially constant on the Enso side, without invoking the
|
||||
* polyglot machinery to access them.
|
||||
*/
|
||||
public static final class Type {
|
||||
public static final long LONG = 1;
|
||||
public static final long DOUBLE = 2;
|
||||
public static final long STRING = 3;
|
||||
}
|
||||
}
|
@ -0,0 +1,42 @@
|
||||
package org.enso.table.data.column;
|
||||
|
||||
/** A column storing strings. */
|
||||
public class StringStorage extends Storage {
|
||||
private final String[] data;
|
||||
private final int size;
|
||||
|
||||
/**
|
||||
* @param data the underlying data
|
||||
* @param size the number of items stored
|
||||
*/
|
||||
public StringStorage(String[] data, int size) {
|
||||
this.data = data;
|
||||
this.size = size;
|
||||
}
|
||||
|
||||
/** @inheritDoc */
|
||||
@Override
|
||||
public long size() {
|
||||
return size;
|
||||
}
|
||||
|
||||
/**
|
||||
* @param idx an index
|
||||
* @return the data item contained at the given index.
|
||||
*/
|
||||
public String getItem(long idx) {
|
||||
return data[(int) idx];
|
||||
}
|
||||
|
||||
/** @inheritDoc */
|
||||
@Override
|
||||
public long getType() {
|
||||
return Type.STRING;
|
||||
}
|
||||
|
||||
/** @inheritDoc */
|
||||
@Override
|
||||
public boolean isNa(long idx) {
|
||||
return data[(int) idx] == null;
|
||||
}
|
||||
}
|
@ -0,0 +1,119 @@
|
||||
package org.enso.table.data.column.builder;
|
||||
|
||||
import org.enso.table.data.column.DoubleStorage;
|
||||
import org.enso.table.data.column.LongStorage;
|
||||
import org.enso.table.data.column.Storage;
|
||||
|
||||
import java.util.BitSet;
|
||||
|
||||
/**
|
||||
* A column builder for numeric types. Tries to interpret all data as 64-bit integers. If that
|
||||
* becomes impossible, retypes itself to store 64-bit floats. When even that fails, falls back to a
|
||||
* {@link StringStorageBuilder}.
|
||||
*/
|
||||
public class PrimInferredStorageBuilder extends StorageBuilder {
|
||||
private enum Type {
|
||||
LONG,
|
||||
DOUBLE
|
||||
}
|
||||
|
||||
private int size = 0;
|
||||
private long[] data = new long[64];
|
||||
private String[] rawData = new String[64];
|
||||
private final BitSet isMissing = new BitSet();
|
||||
private Type type = Type.LONG;
|
||||
|
||||
/** @inheritDoc */
|
||||
@Override
|
||||
public StorageBuilder parseAndAppend(String value) {
|
||||
if (value == null) {
|
||||
ensureAppendable();
|
||||
isMissing.set(size);
|
||||
size++;
|
||||
return this;
|
||||
}
|
||||
switch (type) {
|
||||
case LONG:
|
||||
return appendLong(value);
|
||||
case DOUBLE:
|
||||
return appendDouble(value);
|
||||
default:
|
||||
throw new IllegalStateException();
|
||||
}
|
||||
}
|
||||
|
||||
private StorageBuilder appendLong(String value) {
|
||||
try {
|
||||
long l = Long.parseLong(value);
|
||||
ensureAppendable();
|
||||
rawData[size] = value;
|
||||
data[size] = l;
|
||||
size++;
|
||||
return this;
|
||||
} catch (NumberFormatException ignored) {
|
||||
return failedLong(value);
|
||||
}
|
||||
}
|
||||
|
||||
private StorageBuilder appendDouble(String value) {
|
||||
try {
|
||||
double d = Double.parseDouble(value);
|
||||
ensureAppendable();
|
||||
data[size] = Double.doubleToRawLongBits(d);
|
||||
rawData[size] = value;
|
||||
size++;
|
||||
return this;
|
||||
} catch (NumberFormatException ignored) {
|
||||
return failedDouble(value);
|
||||
}
|
||||
}
|
||||
|
||||
private StorageBuilder failedLong(String value) {
|
||||
try {
|
||||
double d = Double.parseDouble(value);
|
||||
retypeToDouble();
|
||||
ensureAppendable();
|
||||
data[size] = Double.doubleToRawLongBits(d);
|
||||
rawData[size] = value;
|
||||
size++;
|
||||
return this;
|
||||
} catch (NumberFormatException ignored) {
|
||||
return failedDouble(value);
|
||||
}
|
||||
}
|
||||
|
||||
private StorageBuilder failedDouble(String value) {
|
||||
StringStorageBuilder newBuilder = new StringStorageBuilder(rawData, size);
|
||||
newBuilder.parseAndAppend(value);
|
||||
return newBuilder;
|
||||
}
|
||||
|
||||
private void retypeToDouble() {
|
||||
for (int i = 0; i < size; i++) {
|
||||
data[i] = Double.doubleToRawLongBits(data[i]);
|
||||
}
|
||||
type = Type.DOUBLE;
|
||||
}
|
||||
|
||||
// TODO[MK] Consider storing data `rawData` in non-linear storage to avoid reallocations.
|
||||
private void ensureAppendable() {
|
||||
if (size >= data.length) {
|
||||
long[] newData = new long[2 * data.length];
|
||||
String[] newRawData = new String[2 * data.length];
|
||||
System.arraycopy(data, 0, newData, 0, data.length);
|
||||
System.arraycopy(rawData, 0, newRawData, 0, rawData.length);
|
||||
data = newData;
|
||||
rawData = newRawData;
|
||||
}
|
||||
}
|
||||
|
||||
/** @inheritDoc */
|
||||
@Override
|
||||
public Storage seal() {
|
||||
if (type == Type.LONG) {
|
||||
return new LongStorage(data, size, isMissing);
|
||||
} else {
|
||||
return new DoubleStorage(data, size, isMissing);
|
||||
}
|
||||
}
|
||||
}
|
@ -0,0 +1,23 @@
|
||||
package org.enso.table.data.column.builder;
|
||||
|
||||
import org.enso.table.data.column.Storage;
|
||||
|
||||
/** A builder used by the parser to add items into a column. */
|
||||
public abstract class StorageBuilder {
|
||||
/**
|
||||
* Called by the parser to notify the builder about the next value being appended. The value is
|
||||
* passed in a String form and the builder is responsible for parsing it into its own format. The
|
||||
* value may be null, in which case it should be considered missing.
|
||||
*
|
||||
* @param value the value to parse and append
|
||||
* @return a storage builder instance to use for future calls
|
||||
*/
|
||||
public abstract StorageBuilder parseAndAppend(String value);
|
||||
|
||||
/**
|
||||
* Closes the storage builder and returns a fully parsed column.
|
||||
*
|
||||
* @return the storage resulting from this builder's operation.
|
||||
*/
|
||||
public abstract Storage seal();
|
||||
}
|
@ -0,0 +1,51 @@
|
||||
package org.enso.table.data.column.builder;
|
||||
|
||||
import org.enso.table.data.column.Storage;
|
||||
import org.enso.table.data.column.StringStorage;
|
||||
|
||||
/** A column builder appending all the values passed to it in an unchanged form. */
|
||||
public class StringStorageBuilder extends StorageBuilder {
|
||||
|
||||
private String[] data;
|
||||
private int size;
|
||||
|
||||
/**
|
||||
* Creates a new builder from given partial data. Useful for other builders when a type transition
|
||||
* is required.
|
||||
*
|
||||
* @param data the initial data storage
|
||||
* @param size the number of already filled elements
|
||||
*/
|
||||
public StringStorageBuilder(String[] data, int size) {
|
||||
this.data = data;
|
||||
this.size = size;
|
||||
}
|
||||
|
||||
/** Creates an empty builder. */
|
||||
public StringStorageBuilder() {
|
||||
data = new String[64];
|
||||
size = 0;
|
||||
}
|
||||
|
||||
/** @inheritDoc */
|
||||
@Override
|
||||
public StorageBuilder parseAndAppend(String value) {
|
||||
ensureAppendable();
|
||||
data[size++] = value;
|
||||
return this;
|
||||
}
|
||||
|
||||
private void ensureAppendable() {
|
||||
if (size >= data.length) {
|
||||
String[] newData = new String[2 * data.length];
|
||||
System.arraycopy(data, 0, newData, 0, data.length);
|
||||
data = newData;
|
||||
}
|
||||
}
|
||||
|
||||
/** @inheritDoc */
|
||||
@Override
|
||||
public Storage seal() {
|
||||
return new StringStorage(data, size);
|
||||
}
|
||||
}
|
30
table/src/main/java/org/enso/table/data/table/Column.java
Normal file
30
table/src/main/java/org/enso/table/data/table/Column.java
Normal file
@ -0,0 +1,30 @@
|
||||
package org.enso.table.data.table;
|
||||
|
||||
import org.enso.table.data.column.Storage;
|
||||
|
||||
/** A representation of a column. Consists of a column name and the underlying storage. */
|
||||
public class Column {
|
||||
private final String name;
|
||||
private final Storage storage;
|
||||
|
||||
/**
|
||||
* Creates a new column.
|
||||
*
|
||||
* @param name the column name
|
||||
* @param storage the underlying storage
|
||||
*/
|
||||
public Column(String name, Storage storage) {
|
||||
this.name = name;
|
||||
this.storage = storage;
|
||||
}
|
||||
|
||||
/** @return the column name */
|
||||
public String getName() {
|
||||
return name;
|
||||
}
|
||||
|
||||
/** @return the underlying storage */
|
||||
public Storage getStorage() {
|
||||
return storage;
|
||||
}
|
||||
}
|
30
table/src/main/java/org/enso/table/data/table/Table.java
Normal file
30
table/src/main/java/org/enso/table/data/table/Table.java
Normal file
@ -0,0 +1,30 @@
|
||||
package org.enso.table.data.table;
|
||||
|
||||
/** A representation of a table structure. */
|
||||
public class Table {
|
||||
|
||||
private final Column[] columns;
|
||||
|
||||
/**
|
||||
* Creates a new table
|
||||
*
|
||||
* @param columns the columns contained in this table.
|
||||
*/
|
||||
public Table(Column[] columns) {
|
||||
this.columns = columns;
|
||||
}
|
||||
|
||||
/** @return the number of rows in this table */
|
||||
public long nrows() {
|
||||
if (columns == null || columns.length == 0) {
|
||||
return 0;
|
||||
} else {
|
||||
return columns[0].getStorage().size();
|
||||
}
|
||||
}
|
||||
|
||||
/** @return the columns of this table */
|
||||
public Column[] getColumns() {
|
||||
return columns;
|
||||
}
|
||||
}
|
90
table/src/main/java/org/enso/table/format/csv/Parser.java
Normal file
90
table/src/main/java/org/enso/table/format/csv/Parser.java
Normal file
@ -0,0 +1,90 @@
|
||||
package org.enso.table.format.csv;
|
||||
|
||||
import com.univocity.parsers.csv.CsvParser;
|
||||
import com.univocity.parsers.csv.CsvParserSettings;
|
||||
import org.enso.table.data.column.Storage;
|
||||
import org.enso.table.data.column.builder.StorageBuilder;
|
||||
import org.enso.table.data.column.builder.PrimInferredStorageBuilder;
|
||||
import org.enso.table.data.table.Column;
|
||||
import org.enso.table.data.table.Table;
|
||||
|
||||
import java.io.InputStream;
|
||||
|
||||
/** A CSV parser. */
|
||||
public class Parser {
|
||||
private final boolean hasHeader;
|
||||
private final String unnamedColumnPrefix;
|
||||
|
||||
private Parser(boolean hasHeader, String unnamedColumnPrefix) {
|
||||
this.hasHeader = hasHeader;
|
||||
this.unnamedColumnPrefix = unnamedColumnPrefix;
|
||||
}
|
||||
|
||||
/**
|
||||
* Creates a new parser with given parameters.
|
||||
*
|
||||
* @param hasHeader whether or not the first line of the file should be used as a header line
|
||||
* @param unnamedColumnPrefix the string to prepend to column index for columns with unknown name.
|
||||
* @return a CSV parser
|
||||
*/
|
||||
public static Parser create(boolean hasHeader, String unnamedColumnPrefix) {
|
||||
return new Parser(hasHeader, unnamedColumnPrefix);
|
||||
}
|
||||
|
||||
/**
|
||||
* Parses the given input stream into a Table.
|
||||
*
|
||||
* @param inputStream the input stream to parse
|
||||
* @return a table corresponding to the contents of the stream
|
||||
*/
|
||||
public Table parse(InputStream inputStream) {
|
||||
CsvParserSettings settings = new CsvParserSettings();
|
||||
settings.setHeaderExtractionEnabled(hasHeader);
|
||||
settings.detectFormatAutomatically();
|
||||
CsvParser parser = new CsvParser(settings);
|
||||
parser.beginParsing(inputStream);
|
||||
StorageBuilder[] builders = null;
|
||||
String[] header = parser.getContext().headers();
|
||||
// TODO[MK] Handle irregular table sizes
|
||||
if (header != null) {
|
||||
builders = initBuilders(header.length);
|
||||
}
|
||||
String[] row = parser.parseNext();
|
||||
if (row == null) {
|
||||
return new Table(new Column[0]);
|
||||
}
|
||||
if (builders == null) {
|
||||
builders = initBuilders(row.length);
|
||||
}
|
||||
for (int i = 0; i < builders.length; i++) {
|
||||
builders[i] = builders[i].parseAndAppend(handleNa(row[i]));
|
||||
}
|
||||
while ((row = parser.parseNext()) != null) {
|
||||
for (int i = 0; i < builders.length; i++) {
|
||||
builders[i] = builders[i].parseAndAppend(handleNa(row[i]));
|
||||
}
|
||||
}
|
||||
Column[] columns = new Column[builders.length];
|
||||
for (int i = 0; i < builders.length; i++) {
|
||||
String name = header != null ? header[i] : unnamedColumnPrefix + i;
|
||||
Storage col = builders[i].seal();
|
||||
columns[i] = new Column(name, col);
|
||||
}
|
||||
return new Table(columns);
|
||||
}
|
||||
|
||||
private StorageBuilder[] initBuilders(int count) {
|
||||
StorageBuilder[] res = new StorageBuilder[count];
|
||||
for (int i = 0; i < count; i++) {
|
||||
res[i] = new PrimInferredStorageBuilder();
|
||||
}
|
||||
return res;
|
||||
}
|
||||
|
||||
private String handleNa(String raw) {
|
||||
if (raw == null || raw.length() == 0) {
|
||||
return null;
|
||||
}
|
||||
return raw;
|
||||
}
|
||||
}
|
5
test/Table_Tests/data/simple_empty.csv
Normal file
5
test/Table_Tests/data/simple_empty.csv
Normal file
@ -0,0 +1,5 @@
|
||||
a,b,c
|
||||
1,2,
|
||||
4,,6
|
||||
7,8,9
|
||||
10,11,12
|
|
8
test/Table_Tests/data/varied_column.csv
Normal file
8
test/Table_Tests/data/varied_column.csv
Normal file
@ -0,0 +1,8 @@
|
||||
2005-02-25,2005-02-25,1,1,1,1
|
||||
2005-02-28,2005-02-28,2,2,2,2
|
||||
4,2005-03-01,3,3,3,3
|
||||
2005-03-02,,4,4,4,4
|
||||
,2005-03-03,5,5,5,5
|
||||
2005-03-04,2005-03-04,,6,6.25,6.25
|
||||
2005-03-07,2005-03-07,7,7,7,7
|
||||
2005-03-08,2005-03-08,8,8,8,osiem
|
|
6
test/Table_Tests/package.yaml
Normal file
6
test/Table_Tests/package.yaml
Normal file
@ -0,0 +1,6 @@
|
||||
name: Table_Tests
|
||||
version: 0.0.1
|
||||
enso-version: default
|
||||
license: MIT
|
||||
author: enso-dev@enso.org
|
||||
maintainer: enso-dev@enso.org
|
6
test/Table_Tests/src/Main.enso
Normal file
6
test/Table_Tests/src/Main.enso
Normal file
@ -0,0 +1,6 @@
|
||||
import Test
|
||||
|
||||
import Table_Tests.Table_Spec
|
||||
|
||||
main = Test.Suite.runMain <|
|
||||
Table_Spec.spec
|
36
test/Table_Tests/src/Table_Spec.enso
Normal file
36
test/Table_Tests/src/Table_Spec.enso
Normal file
@ -0,0 +1,36 @@
|
||||
from Base import all
|
||||
from Table import all
|
||||
import Test
|
||||
|
||||
spec = describe "Tables" <|
|
||||
it "should parse a simple numeric table and convert it to JSON" <|
|
||||
simple_empty = (Enso_Project.data / "simple_empty.csv") . read_csv
|
||||
c_1_data = [1, 4, 7, 10]
|
||||
c_2_data = [2, Json.Null, 8, 11]
|
||||
c_3_data = [Json.Null, 6, 9, 12]
|
||||
|
||||
c_1 = Json.from_pairs [["name", "a"], ["data", c_1_data]]
|
||||
c_2 = Json.from_pairs [["name", "b"], ["data", c_2_data]]
|
||||
c_3 = Json.from_pairs [["name", "c"], ["data", c_3_data]]
|
||||
|
||||
expected = Json.from_pairs [["columns", [c_1, c_2, c_3]]]
|
||||
|
||||
simple_empty.to_json.should equal expected
|
||||
it "should correctly infer types of varied-type columns" <|
|
||||
varied_column = (Enso_Project.data / "varied_column.csv") . read_csv has_header=False
|
||||
c_1_data = ["2005-02-25", "2005-02-28", "4", "2005-03-02", Json.Null, "2005-03-04", "2005-03-07", "2005-03-08"]
|
||||
c_2_data = ["2005-02-25", "2005-02-28", "2005-03-01", Json.Null, "2005-03-03", "2005-03-04", "2005-03-07", "2005-03-08"]
|
||||
c_3_data = [1, 2, 3, 4, 5, Json.Null, 7, 8]
|
||||
c_4_data = [1, 2, 3, 4, 5, 6, 7, 8]
|
||||
c_5_data = [1.0, 2.0, 3.0, 4.0, 5.0, 6.25, 7.0, 8.0]
|
||||
c_6_data = ['1', '2', '3', '4', '5', '6.25', '7', 'osiem']
|
||||
|
||||
c_1 = Json.from_pairs [["name", "C0"], ["data", c_1_data]]
|
||||
c_2 = Json.from_pairs [["name", "C1"], ["data", c_2_data]]
|
||||
c_3 = Json.from_pairs [["name", "C2"], ["data", c_3_data]]
|
||||
c_4 = Json.from_pairs [["name", "C3"], ["data", c_4_data]]
|
||||
c_5 = Json.from_pairs [["name", "C4"], ["data", c_5_data]]
|
||||
c_6 = Json.from_pairs [["name", "C5"], ["data", c_6_data]]
|
||||
|
||||
expected = Json.from_pairs [["columns", [c_1, c_2, c_3, c_4, c_5, c_6]]]
|
||||
varied_column.to_json.should equal expected
|
@ -0,0 +1,16 @@
|
||||
Copyright (c) 2015. Univocity Software Pty Ltd
|
||||
Copyright (c) 2018. Univocity Software Pty Ltd
|
||||
Copyright 2014 Univocity Software Pty Ltd
|
||||
Copyright 2015 Univocity Software Pty Ltd
|
||||
Copyright 2016 Univocity Software Pty Ltd
|
||||
Copyright 2017 Univocity Software Pty Ltd
|
||||
Copyright 2018 Univocity Software Pty Ltd
|
||||
Copyright 2019 Univocity Software Pty Ltd
|
||||
Copyright (c) 2015. Univocity Software Pty Ltd
|
||||
Copyright (c) 2018. Univocity Software Pty Ltd
|
||||
Copyright 2014 Univocity Software Pty Ltd
|
||||
Copyright 2015 Univocity Software Pty Ltd
|
||||
Copyright 2016 Univocity Software Pty Ltd
|
||||
Copyright 2017 Univocity Software Pty Ltd
|
||||
Copyright 2018 Univocity Software Pty Ltd
|
||||
Copyright 2019 Univocity Software Pty Ltd
|
3
tools/legal-review/std-lib-Table/report-state
Normal file
3
tools/legal-review/std-lib-Table/report-state
Normal file
@ -0,0 +1,3 @@
|
||||
B0D696DAAB04B954B3E2B2259CA8B8E9A9632DE3FBCB462074D2C2C61382A211
|
||||
F365EBA0EFA7B64DA286C399792A4121F9F386B125CFD02C186936A3AB7B8E71
|
||||
0
|
@ -0,0 +1 @@
|
||||
tools/legal-review/license-texts/APACHE2.0
|
Loading…
Reference in New Issue
Block a user