Spreadsheet: Add a CSV reader and writer

This is not utilised yet.
This commit is contained in:
AnotherTest 2020-11-23 11:52:45 +03:30 committed by Andreas Kling
parent 438829a1d5
commit 31523f6c64
Notes: sideshowbarker 2024-07-19 01:16:36 +09:00
8 changed files with 989 additions and 0 deletions

View File

@ -15,6 +15,7 @@ set(SOURCES
CondFormattingViewUI.h
HelpWindow.cpp
JSIntegration.cpp
Readers/XSV.cpp
Spreadsheet.cpp
SpreadsheetModel.cpp
SpreadsheetView.cpp

View File

@ -0,0 +1,43 @@
/*
* Copyright (c) 2020, the SerenityOS developers.
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met:
*
* 1. Redistributions of source code must retain the above copyright notice, this
* list of conditions and the following disclaimer.
*
* 2. Redistributions in binary form must reproduce the above copyright notice,
* this list of conditions and the following disclaimer in the documentation
* and/or other materials provided with the distribution.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
* DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
* SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
* CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
* OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
* OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
#pragma once
#include "XSV.h"
#include <AK/Forward.h>
#include <AK/StringView.h>
namespace Reader {
class CSV : public XSV {
public:
CSV(StringView source, ParserBehaviour behaviours = default_behaviours())
: XSV(source, { ",", "\"", ParserTraits::Repeat }, behaviours)
{
}
};
}

View File

@ -0,0 +1,110 @@
/*
* Copyright (c) 2020, the SerenityOS developers.
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met:
*
* 1. Redistributions of source code must retain the above copyright notice, this
* list of conditions and the following disclaimer.
*
* 2. Redistributions in binary form must reproduce the above copyright notice,
* this list of conditions and the following disclaimer in the documentation
* and/or other materials provided with the distribution.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
* DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
* SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
* CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
* OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
* OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
#include <AK/TestSuite.h>
#include "../CSV.h"
#include "../XSV.h"
#include <LibCore/File.h>
TEST_CASE(should_parse_valid_data)
{
{
auto data = R"~~~(Foo, Bar, Baz
1, 2, 3
4, 5, 6
"""x", y"z, 9)~~~";
auto csv = Reader::CSV { data, Reader::default_behaviours() | Reader::ParserBehaviour::ReadHeaders | Reader::ParserBehaviour::TrimLeadingFieldSpaces };
EXPECT(!csv.has_error());
EXPECT_EQ(csv[0]["Foo"], "1");
EXPECT_EQ(csv[2]["Foo"], "\"x");
EXPECT_EQ(csv[2]["Bar"], "y\"z");
}
{
auto data = R"~~~(Foo, Bar, Baz
1 , 2, 3
4, "5 " , 6
"""x", y"z, 9 )~~~";
auto csv = Reader::CSV { data, Reader::default_behaviours() | Reader::ParserBehaviour::ReadHeaders | Reader::ParserBehaviour::TrimLeadingFieldSpaces | Reader::ParserBehaviour::TrimTrailingFieldSpaces };
EXPECT(!csv.has_error());
EXPECT_EQ(csv[0]["Foo"], "1");
EXPECT_EQ(csv[1]["Bar"], "5 ");
EXPECT_EQ(csv[2]["Foo"], "\"x");
EXPECT_EQ(csv[2]["Baz"], "9");
}
}
TEST_CASE(should_fail_nicely)
{
{
auto data = R"~~~(Foo, Bar, Baz
x, y)~~~";
auto csv = Reader::CSV { data, Reader::default_behaviours() | Reader::ParserBehaviour::ReadHeaders | Reader::ParserBehaviour::TrimLeadingFieldSpaces };
EXPECT(csv.has_error());
EXPECT_EQ(csv.error(), Reader::ReadError::NonConformingColumnCount);
}
{
auto data = R"~~~(Foo, Bar, Baz
x, y, "z)~~~";
auto csv = Reader::CSV { data, Reader::default_behaviours() | Reader::ParserBehaviour::ReadHeaders | Reader::ParserBehaviour::TrimLeadingFieldSpaces };
EXPECT(csv.has_error());
EXPECT_EQ(csv.error(), Reader::ReadError::QuoteFailure);
}
}
TEST_CASE(should_iterate_rows)
{
auto data = R"~~~(Foo, Bar, Baz
1, 2, 3
4, 5, 6
"""x", y"z, 9)~~~";
auto csv = Reader::CSV { data, Reader::default_behaviours() | Reader::ParserBehaviour::ReadHeaders | Reader::ParserBehaviour::TrimLeadingFieldSpaces };
EXPECT(!csv.has_error());
bool ran = false;
for (auto row : csv)
ran = !row[0].is_empty();
EXPECT(ran);
}
BENCHMARK_CASE(fairly_big_data)
{
auto file_or_error = Core::File::open(__FILE__ ".data", Core::IODevice::OpenMode::ReadOnly);
EXPECT_EQ_FORCE(file_or_error.is_error(), false);
auto data = file_or_error.value()->read_all();
auto csv = Reader::CSV { data, Reader::default_behaviours() | Reader::ParserBehaviour::ReadHeaders };
EXPECT(!csv.has_error());
EXPECT_EQ(csv.size(), 100000u);
}
TEST_MAIN(XSV)

View File

@ -0,0 +1,272 @@
/*
* Copyright (c) 2020, the SerenityOS developers.
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met:
*
* 1. Redistributions of source code must retain the above copyright notice, this
* list of conditions and the following disclaimer.
*
* 2. Redistributions in binary form must reproduce the above copyright notice,
* this list of conditions and the following disclaimer in the documentation
* and/or other materials provided with the distribution.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
* DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
* SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
* CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
* OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
* OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
#include "XSV.h"
#include <AK/StringBuilder.h>
namespace Reader {
ParserBehaviour operator&(ParserBehaviour left, ParserBehaviour right)
{
return static_cast<ParserBehaviour>(static_cast<u32>(left) & static_cast<u32>(right));
}
ParserBehaviour operator|(ParserBehaviour left, ParserBehaviour right)
{
return static_cast<ParserBehaviour>(static_cast<u32>(left) | static_cast<u32>(right));
}
void XSV::set_error(ReadError error)
{
if (m_error == ReadError::None)
m_error = error;
}
Vector<String> XSV::headers() const
{
Vector<String> headers;
for (auto& field : m_names)
headers.append(field.is_string_view ? field.as_string_view : field.as_string.view());
return headers;
}
void XSV::parse()
{
if ((m_behaviours & ParserBehaviour::ReadHeaders) != ParserBehaviour::None)
read_headers();
while (!has_error() && !m_lexer.is_eof())
m_rows.append(read_row());
if (!m_lexer.is_eof())
set_error(ReadError::DataPastLogicalEnd);
}
void XSV::read_headers()
{
if (!m_names.is_empty()) {
set_error(ReadError::InternalError);
m_names.clear();
}
m_names = read_row(true);
}
Vector<XSV::Field> XSV::read_row(bool header_row)
{
Vector<Field> row;
bool first = true;
while (!(m_lexer.is_eof() || m_lexer.next_is('\n') || m_lexer.next_is("\r\n")) && (first || m_lexer.consume_specific(m_traits.separator))) {
first = false;
row.append(read_one_field());
}
if (!m_lexer.is_eof()) {
auto crlf_ok = m_lexer.consume_specific("\r\n");
if (!crlf_ok) {
auto lf_ok = m_lexer.consume_specific('\n');
if (!lf_ok)
set_error(ReadError::DataPastLogicalEnd);
}
}
if (!header_row && (m_behaviours & ParserBehaviour::ReadHeaders) != ParserBehaviour::None && row.size() != m_names.size())
set_error(ReadError::NonConformingColumnCount);
return row;
}
XSV::Field XSV::read_one_field()
{
if ((m_behaviours & ParserBehaviour::TrimLeadingFieldSpaces) != ParserBehaviour::None)
m_lexer.consume_while(is_any_of(" \t\v"));
bool is_quoted = false;
Field field;
if (m_lexer.next_is(m_traits.quote.view())) {
is_quoted = true;
field = read_one_quoted_field();
} else {
field = read_one_unquoted_field();
}
if ((m_behaviours & ParserBehaviour::TrimTrailingFieldSpaces) != ParserBehaviour::None) {
m_lexer.consume_while(is_any_of(" \t\v"));
if (!is_quoted) {
// Also have to trim trailing spaces from unquoted fields.
StringView view;
if (field.is_string_view)
view = field.as_string_view;
else
view = field.as_string;
if (!view.is_empty()) {
ssize_t i = view.length() - 1;
for (; i >= 0; --i) {
if (!view.substring_view(i, 1).is_one_of(" ", "\t", "\v"))
break;
}
view = view.substring_view(0, i + 1);
}
if (field.is_string_view)
field.as_string_view = view;
else
field.as_string = field.as_string.substring(0, view.length());
}
}
return field;
}
XSV::Field XSV::read_one_quoted_field()
{
if (!m_lexer.consume_specific(m_traits.quote))
set_error(ReadError::InternalError);
size_t start = m_lexer.tell(), end = start;
bool is_copy = false;
StringBuilder builder;
auto allow_newlines = (m_behaviours & ParserBehaviour::AllowNewlinesInFields) != ParserBehaviour::None;
for (; !m_lexer.is_eof();) {
char ch;
switch (m_traits.quote_escape) {
case ParserTraits::Backslash:
if (m_lexer.consume_specific('\\') && m_lexer.consume_specific(m_traits.quote)) {
// If there is an escaped quote, we have no choice but to make a copy.
if (!is_copy) {
is_copy = true;
builder.append(m_source.substring_view(start, end - start));
}
builder.append(m_traits.quote);
end = m_lexer.tell();
continue;
}
break;
case ParserTraits::Repeat:
if (m_lexer.consume_specific(m_traits.quote)) {
if (m_lexer.consume_specific(m_traits.quote)) {
// If there is an escaped quote, we have no choice but to make a copy.
if (!is_copy) {
is_copy = true;
builder.append(m_source.substring_view(start, end - start));
}
builder.append(m_traits.quote);
end = m_lexer.tell();
continue;
}
for (size_t i = 0; i < m_traits.quote.length(); ++i)
m_lexer.retreat();
goto end;
}
break;
}
if (m_lexer.next_is(m_traits.quote.view()))
goto end;
if (!allow_newlines) {
if (m_lexer.next_is('\n') || m_lexer.next_is("\r\n"))
goto end;
}
ch = m_lexer.consume();
if (is_copy)
builder.append(ch);
end = m_lexer.tell();
continue;
end:
break;
}
if (!m_lexer.consume_specific(m_traits.quote))
set_error(ReadError::QuoteFailure);
if (is_copy)
return { {}, builder.to_string(), false };
return { m_source.substring_view(start, end - start), {}, true };
}
XSV::Field XSV::read_one_unquoted_field()
{
size_t start = m_lexer.tell(), end = start;
bool allow_quote_in_field = (m_behaviours & ParserBehaviour::QuoteOnlyInFieldStart) != ParserBehaviour::None;
for (; !m_lexer.is_eof();) {
if (m_lexer.next_is(m_traits.separator.view()))
break;
if (m_lexer.next_is("\r\n") || m_lexer.next_is("\n"))
break;
if (m_lexer.consume_specific(m_traits.quote)) {
if (!allow_quote_in_field)
set_error(ReadError::QuoteFailure);
end = m_lexer.tell();
continue;
}
m_lexer.consume();
end = m_lexer.tell();
}
return { m_source.substring_view(start, end - start), {}, true };
}
StringView XSV::Row::operator[](StringView name) const
{
ASSERT(!m_xsv.m_names.is_empty());
auto it = m_xsv.m_names.find([&](auto&& entry) { return name == entry; });
ASSERT(!it.is_end());
return (*this)[it.index()];
}
StringView XSV::Row::operator[](size_t column) const
{
auto& field = m_xsv.m_rows[m_index][column];
if (field.is_string_view)
return field.as_string_view;
return field.as_string;
}
const XSV::Row XSV::operator[](size_t index) const
{
return const_cast<XSV&>(*this)[index];
}
XSV::Row XSV::operator[](size_t index)
{
ASSERT(m_rows.size() > index);
return Row { *this, index };
}
}

View File

@ -0,0 +1,208 @@
/*
* Copyright (c) 2020, the SerenityOS developers.
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met:
*
* 1. Redistributions of source code must retain the above copyright notice, this
* list of conditions and the following disclaimer.
*
* 2. Redistributions in binary form must reproduce the above copyright notice,
* this list of conditions and the following disclaimer in the documentation
* and/or other materials provided with the distribution.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
* DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
* SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
* CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
* OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
* OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
#pragma once
#include <AK/GenericLexer.h>
#include <AK/String.h>
#include <AK/StringView.h>
#include <AK/Types.h>
#include <AK/Vector.h>
namespace Reader {
enum class ParserBehaviour : u32 {
None = 0,
ReadHeaders = 1,
AllowNewlinesInFields = ReadHeaders << 1,
TrimLeadingFieldSpaces = ReadHeaders << 2,
TrimTrailingFieldSpaces = ReadHeaders << 3,
QuoteOnlyInFieldStart = ReadHeaders << 4,
};
ParserBehaviour operator&(ParserBehaviour left, ParserBehaviour right);
ParserBehaviour operator|(ParserBehaviour left, ParserBehaviour right);
struct ParserTraits {
String separator;
String quote { "\"" };
enum {
Repeat,
Backslash,
} quote_escape { Repeat };
};
#define ENUMERATE_READ_ERRORS() \
E(None, "No errors") \
E(NonConformingColumnCount, "Header count does not match given column count") \
E(QuoteFailure, "Quoting failure") \
E(InternalError, "Internal error") \
E(DataPastLogicalEnd, "Exrta data past the logical end of the rows")
enum class ReadError {
#define E(name, _) name,
ENUMERATE_READ_ERRORS()
#undef E
};
inline constexpr ParserBehaviour default_behaviours()
{
return ParserBehaviour::QuoteOnlyInFieldStart;
}
class XSV {
public:
XSV(StringView source, const ParserTraits& traits, ParserBehaviour behaviours = default_behaviours())
: m_source(source)
, m_lexer(m_source)
, m_traits(traits)
, m_behaviours(behaviours)
{
parse();
}
virtual ~XSV() { }
bool has_error() const { return m_error != ReadError::None; }
ReadError error() const { return m_error; }
String error_string() const
{
switch (m_error) {
#define E(x, y) \
case ReadError::x: \
return y;
ENUMERATE_READ_ERRORS();
#undef E
}
ASSERT_NOT_REACHED();
}
size_t size() const { return m_rows.size(); }
Vector<String> headers() const;
class Row {
public:
explicit Row(XSV& xsv, size_t index)
: m_xsv(xsv)
, m_index(index)
{
}
StringView operator[](StringView name) const;
StringView operator[](size_t column) const;
size_t index() const { return m_index; }
// FIXME: Implement begin() and end(), keeping `Field' out of the API.
private:
XSV& m_xsv;
size_t m_index { 0 };
};
template<bool const_>
class RowIterator {
public:
explicit RowIterator(const XSV& xsv, size_t init_index = 0) requires(const_)
: m_xsv(const_cast<XSV&>(xsv))
, m_index(init_index)
{
}
explicit RowIterator(XSV& xsv, size_t init_index = 0) requires(!const_)
: m_xsv(xsv)
, m_index(init_index)
{
}
Row operator*() const { return Row { m_xsv, m_index }; }
Row operator*() requires(!const_) { return Row { m_xsv, m_index }; }
RowIterator& operator++()
{
++m_index;
return *this;
}
bool is_end() const { return m_index == m_xsv.m_rows.size(); }
bool operator==(const RowIterator& other) const
{
return m_index == other.m_index && &m_xsv == &other.m_xsv;
}
bool operator==(const RowIterator<!const_>& other) const
{
return m_index == other.m_index && &m_xsv == &other.m_xsv;
}
private:
XSV& m_xsv;
size_t m_index { 0 };
};
const Row operator[](size_t index) const;
Row operator[](size_t index);
auto begin() { return RowIterator<false>(*this); }
auto end() { return RowIterator<false>(*this, m_rows.size()); }
auto begin() const { return RowIterator<true>(*this); }
auto end() const { return RowIterator<true>(*this, m_rows.size()); }
using ConstIterator = RowIterator<true>;
using Iterator = RowIterator<false>;
private:
struct Field {
StringView as_string_view;
String as_string; // This member only used if the parser couldn't use the original source verbatim.
bool is_string_view { true };
bool operator==(StringView other) const
{
if (is_string_view)
return other == as_string_view;
return as_string == other;
}
};
void set_error(ReadError error);
void parse();
void read_headers();
Vector<Field> read_row(bool header_row = false);
Field read_one_field();
Field read_one_quoted_field();
Field read_one_unquoted_field();
StringView m_source;
GenericLexer m_lexer;
const ParserTraits& m_traits;
ParserBehaviour m_behaviours;
Vector<Field> m_names;
Vector<Vector<Field>> m_rows;
ReadError m_error { ReadError::None };
};
}

View File

@ -0,0 +1,44 @@
/*
* Copyright (c) 2020, the SerenityOS developers.
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met:
*
* 1. Redistributions of source code must retain the above copyright notice, this
* list of conditions and the following disclaimer.
*
* 2. Redistributions in binary form must reproduce the above copyright notice,
* this list of conditions and the following disclaimer in the documentation
* and/or other materials provided with the distribution.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
* DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
* SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
* CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
* OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
* OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
#pragma once
#include "XSV.h"
#include <AK/Forward.h>
#include <AK/StringView.h>
namespace Writer {
template<typename ContainerType>
class CSV : public XSV<ContainerType> {
public:
CSV(OutputStream& output, const ContainerType& data, const Vector<StringView>& headers = {}, WriterBehaviour behaviours = default_behaviours())
: XSV<ContainerType>(output, data, { ",", "\"", WriterTraits::Repeat }, headers, behaviours)
{
}
};
}

View File

@ -0,0 +1,96 @@
/*
* Copyright (c) 2020, the SerenityOS developers.
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met:
*
* 1. Redistributions of source code must retain the above copyright notice, this
* list of conditions and the following disclaimer.
*
* 2. Redistributions in binary form must reproduce the above copyright notice,
* this list of conditions and the following disclaimer in the documentation
* and/or other materials provided with the distribution.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
* DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
* SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
* CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
* OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
* OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
#include <AK/TestSuite.h>
#include "../CSV.h"
#include "../XSV.h"
#include <AK/MemoryStream.h>
TEST_CASE(can_write)
{
Vector<Vector<int>> data = {
{ 1, 2, 3 },
{ 4, 5, 6 },
{ 7, 8, 9 },
};
auto buffer = ByteBuffer::create_uninitialized(1024);
OutputMemoryStream stream { buffer };
Writer::CSV csv(stream, data);
auto expected_output = R"~(1,2,3
4,5,6
7,8,9
)~";
EXPECT_EQ(StringView { stream.bytes() }, expected_output);
}
TEST_CASE(can_write_with_header)
{
Vector<Vector<int>> data = {
{ 1, 2, 3 },
{ 4, 5, 6 },
{ 7, 8, 9 },
};
auto buffer = ByteBuffer::create_uninitialized(1024);
OutputMemoryStream stream { buffer };
Writer::CSV csv(stream, data, { "A", "B\"", "C" });
auto expected_output = R"~(A,"B""",C
1,2,3
4,5,6
7,8,9
)~";
EXPECT_EQ(StringView { stream.bytes() }, expected_output);
}
TEST_CASE(can_write_with_different_behaviours)
{
Vector<Vector<String>> data = {
{ "Well", "Hello\"", "Friends" },
{ "We\"ll", "Hello,", " Friends" },
};
auto buffer = ByteBuffer::create_uninitialized(1024);
OutputMemoryStream stream { buffer };
Writer::CSV csv(stream, data, { "A", "B\"", "C" }, Writer::WriterBehaviour::QuoteOnlyInFieldStart | Writer::WriterBehaviour::WriteHeaders);
auto expected_output = R"~(A,B",C
Well,Hello",Friends
We"ll,"Hello,", Friends
)~";
EXPECT_EQ(StringView { stream.bytes() }, expected_output);
}
TEST_MAIN(XSV)

View File

@ -0,0 +1,215 @@
/*
* Copyright (c) 2020, the SerenityOS developers.
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met:
*
* 1. Redistributions of source code must retain the above copyright notice, this
* list of conditions and the following disclaimer.
*
* 2. Redistributions in binary form must reproduce the above copyright notice,
* this list of conditions and the following disclaimer in the documentation
* and/or other materials provided with the distribution.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
* DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
* SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
* CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
* OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
* OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
#pragma once
#include <AK/GenericLexer.h>
#include <AK/OwnPtr.h>
#include <AK/Stream.h>
#include <AK/String.h>
#include <AK/StringView.h>
#include <AK/Types.h>
#include <AK/Vector.h>
namespace Writer {
enum class WriterBehaviour : u32 {
None = 0,
WriteHeaders = 1,
AllowNewlinesInFields = WriteHeaders << 1,
QuoteOnlyInFieldStart = WriteHeaders << 2,
QuoteAll = WriteHeaders << 3,
};
inline WriterBehaviour operator&(WriterBehaviour left, WriterBehaviour right)
{
return static_cast<WriterBehaviour>(static_cast<u32>(left) & static_cast<u32>(right));
}
inline WriterBehaviour operator|(WriterBehaviour left, WriterBehaviour right)
{
return static_cast<WriterBehaviour>(static_cast<u32>(left) | static_cast<u32>(right));
}
struct WriterTraits {
String separator;
String quote { "\"" };
enum {
Repeat,
Backslash,
} quote_escape { Repeat };
};
#define ENUMERATE_WRITE_ERRORS() \
E(None, "No errors") \
E(NonConformingColumnCount, "Header count does not match given column count") \
E(InternalError, "Internal error")
enum class WriteError {
#define E(name, _) name,
ENUMERATE_WRITE_ERRORS()
#undef E
};
inline constexpr WriterBehaviour default_behaviours()
{
return WriterBehaviour::None;
}
template<typename ContainerType>
class XSV {
public:
XSV(OutputStream& output, const ContainerType& data, const WriterTraits& traits, const Vector<StringView>& headers = {}, WriterBehaviour behaviours = default_behaviours())
: m_data(data)
, m_traits(traits)
, m_behaviours(behaviours)
, m_names(headers)
, m_output(output)
{
if (!headers.is_empty())
m_behaviours = m_behaviours | WriterBehaviour::WriteHeaders;
generate();
}
virtual ~XSV() { }
bool has_error() const { return m_error != WriteError::None; }
WriteError error() const { return m_error; }
String error_string() const
{
switch (m_error) {
#define E(x, y) \
case WriteError::x: \
return y;
ENUMERATE_WRITE_ERRORS();
#undef E
}
ASSERT_NOT_REACHED();
}
private:
void set_error(WriteError error)
{
if (m_error == WriteError::None)
m_error = error;
}
void generate()
{
auto with_headers = (m_behaviours & WriterBehaviour::WriteHeaders) != WriterBehaviour::None;
if (with_headers) {
write_row(m_names);
if (m_output.write({ "\n", 1 }) != 1)
set_error(WriteError::InternalError);
}
for (auto&& row : m_data) {
if (with_headers) {
if (row.size() != m_names.size())
set_error(WriteError::NonConformingColumnCount);
}
write_row(row);
if (m_output.write({ "\n", 1 }) != 1)
set_error(WriteError::InternalError);
}
}
template<typename T>
void write_row(T&& row)
{
bool first = true;
for (auto&& entry : row) {
if (!first) {
if (m_output.write(m_traits.separator.bytes()) != m_traits.separator.length())
set_error(WriteError::InternalError);
}
first = false;
write_entry(entry);
}
}
template<typename T>
void write_entry(T&& entry)
{
auto string = String::formatted("{}", FormatIfSupported(entry));
auto safe_to_write_normally = !string.contains("\n") && !string.contains(m_traits.separator);
if (safe_to_write_normally) {
if ((m_behaviours & WriterBehaviour::QuoteOnlyInFieldStart) == WriterBehaviour::None)
safe_to_write_normally = !string.contains(m_traits.quote);
else
safe_to_write_normally = !string.starts_with(m_traits.quote);
}
if (safe_to_write_normally) {
if (m_output.write(string.bytes()) != string.length())
set_error(WriteError::InternalError);
return;
}
if (m_output.write(m_traits.quote.bytes()) != m_traits.quote.length())
set_error(WriteError::InternalError);
GenericLexer lexer(string);
while (!lexer.is_eof()) {
if (lexer.consume_specific(m_traits.quote)) {
switch (m_traits.quote_escape) {
case WriterTraits::Repeat:
if (m_output.write(m_traits.quote.bytes()) != m_traits.quote.length())
set_error(WriteError::InternalError);
if (m_output.write(m_traits.quote.bytes()) != m_traits.quote.length())
set_error(WriteError::InternalError);
break;
case WriterTraits::Backslash:
if (m_output.write({ "\\", 1 }) != 1)
set_error(WriteError::InternalError);
if (m_output.write(m_traits.quote.bytes()) != m_traits.quote.length())
set_error(WriteError::InternalError);
break;
}
continue;
}
auto ch = lexer.consume();
if (m_output.write({ &ch, 1 }) != 1)
set_error(WriteError::InternalError);
}
if (m_output.write(m_traits.quote.bytes()) != m_traits.quote.length())
set_error(WriteError::InternalError);
}
const ContainerType& m_data;
const WriterTraits& m_traits;
WriterBehaviour m_behaviours;
const Vector<StringView>& m_names;
WriteError m_error { WriteError::None };
OutputStream& m_output;
};
}