mirror of
https://github.com/LadybirdBrowser/ladybird.git
synced 2024-11-11 01:06:01 +03:00
Spreadsheet: Add a CSV reader and writer
This is not utilised yet.
This commit is contained in:
parent
438829a1d5
commit
31523f6c64
Notes:
sideshowbarker
2024-07-19 01:16:36 +09:00
Author: https://github.com/alimpfard Commit: https://github.com/SerenityOS/serenity/commit/31523f6c644 Pull-request: https://github.com/SerenityOS/serenity/pull/4149 Issue: https://github.com/SerenityOS/serenity/issues/4010 Issue: https://github.com/SerenityOS/serenity/issues/4136
@ -15,6 +15,7 @@ set(SOURCES
|
||||
CondFormattingViewUI.h
|
||||
HelpWindow.cpp
|
||||
JSIntegration.cpp
|
||||
Readers/XSV.cpp
|
||||
Spreadsheet.cpp
|
||||
SpreadsheetModel.cpp
|
||||
SpreadsheetView.cpp
|
||||
|
43
Applications/Spreadsheet/Readers/CSV.h
Normal file
43
Applications/Spreadsheet/Readers/CSV.h
Normal file
@ -0,0 +1,43 @@
|
||||
/*
|
||||
* Copyright (c) 2020, the SerenityOS developers.
|
||||
* All rights reserved.
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions are met:
|
||||
*
|
||||
* 1. Redistributions of source code must retain the above copyright notice, this
|
||||
* list of conditions and the following disclaimer.
|
||||
*
|
||||
* 2. Redistributions in binary form must reproduce the above copyright notice,
|
||||
* this list of conditions and the following disclaimer in the documentation
|
||||
* and/or other materials provided with the distribution.
|
||||
*
|
||||
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
||||
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
|
||||
* DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
|
||||
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
|
||||
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
|
||||
* SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
|
||||
* CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
|
||||
* OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
||||
* OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
*/
|
||||
|
||||
#pragma once
|
||||
|
||||
#include "XSV.h"
|
||||
#include <AK/Forward.h>
|
||||
#include <AK/StringView.h>
|
||||
|
||||
namespace Reader {
|
||||
|
||||
class CSV : public XSV {
|
||||
public:
|
||||
CSV(StringView source, ParserBehaviour behaviours = default_behaviours())
|
||||
: XSV(source, { ",", "\"", ParserTraits::Repeat }, behaviours)
|
||||
{
|
||||
}
|
||||
};
|
||||
|
||||
}
|
110
Applications/Spreadsheet/Readers/Test/TestXSV.cpp
Normal file
110
Applications/Spreadsheet/Readers/Test/TestXSV.cpp
Normal file
@ -0,0 +1,110 @@
|
||||
/*
|
||||
* Copyright (c) 2020, the SerenityOS developers.
|
||||
* All rights reserved.
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions are met:
|
||||
*
|
||||
* 1. Redistributions of source code must retain the above copyright notice, this
|
||||
* list of conditions and the following disclaimer.
|
||||
*
|
||||
* 2. Redistributions in binary form must reproduce the above copyright notice,
|
||||
* this list of conditions and the following disclaimer in the documentation
|
||||
* and/or other materials provided with the distribution.
|
||||
*
|
||||
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
||||
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
|
||||
* DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
|
||||
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
|
||||
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
|
||||
* SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
|
||||
* CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
|
||||
* OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
||||
* OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
*/
|
||||
|
||||
#include <AK/TestSuite.h>
|
||||
|
||||
#include "../CSV.h"
|
||||
#include "../XSV.h"
|
||||
#include <LibCore/File.h>
|
||||
|
||||
TEST_CASE(should_parse_valid_data)
|
||||
{
|
||||
{
|
||||
auto data = R"~~~(Foo, Bar, Baz
|
||||
1, 2, 3
|
||||
4, 5, 6
|
||||
"""x", y"z, 9)~~~";
|
||||
auto csv = Reader::CSV { data, Reader::default_behaviours() | Reader::ParserBehaviour::ReadHeaders | Reader::ParserBehaviour::TrimLeadingFieldSpaces };
|
||||
EXPECT(!csv.has_error());
|
||||
|
||||
EXPECT_EQ(csv[0]["Foo"], "1");
|
||||
EXPECT_EQ(csv[2]["Foo"], "\"x");
|
||||
EXPECT_EQ(csv[2]["Bar"], "y\"z");
|
||||
}
|
||||
|
||||
{
|
||||
auto data = R"~~~(Foo, Bar, Baz
|
||||
1 , 2, 3
|
||||
4, "5 " , 6
|
||||
"""x", y"z, 9 )~~~";
|
||||
auto csv = Reader::CSV { data, Reader::default_behaviours() | Reader::ParserBehaviour::ReadHeaders | Reader::ParserBehaviour::TrimLeadingFieldSpaces | Reader::ParserBehaviour::TrimTrailingFieldSpaces };
|
||||
EXPECT(!csv.has_error());
|
||||
|
||||
EXPECT_EQ(csv[0]["Foo"], "1");
|
||||
EXPECT_EQ(csv[1]["Bar"], "5 ");
|
||||
EXPECT_EQ(csv[2]["Foo"], "\"x");
|
||||
EXPECT_EQ(csv[2]["Baz"], "9");
|
||||
}
|
||||
}
|
||||
|
||||
TEST_CASE(should_fail_nicely)
|
||||
{
|
||||
{
|
||||
auto data = R"~~~(Foo, Bar, Baz
|
||||
x, y)~~~";
|
||||
auto csv = Reader::CSV { data, Reader::default_behaviours() | Reader::ParserBehaviour::ReadHeaders | Reader::ParserBehaviour::TrimLeadingFieldSpaces };
|
||||
EXPECT(csv.has_error());
|
||||
EXPECT_EQ(csv.error(), Reader::ReadError::NonConformingColumnCount);
|
||||
}
|
||||
|
||||
{
|
||||
auto data = R"~~~(Foo, Bar, Baz
|
||||
x, y, "z)~~~";
|
||||
auto csv = Reader::CSV { data, Reader::default_behaviours() | Reader::ParserBehaviour::ReadHeaders | Reader::ParserBehaviour::TrimLeadingFieldSpaces };
|
||||
EXPECT(csv.has_error());
|
||||
EXPECT_EQ(csv.error(), Reader::ReadError::QuoteFailure);
|
||||
}
|
||||
}
|
||||
|
||||
TEST_CASE(should_iterate_rows)
|
||||
{
|
||||
auto data = R"~~~(Foo, Bar, Baz
|
||||
1, 2, 3
|
||||
4, 5, 6
|
||||
"""x", y"z, 9)~~~";
|
||||
auto csv = Reader::CSV { data, Reader::default_behaviours() | Reader::ParserBehaviour::ReadHeaders | Reader::ParserBehaviour::TrimLeadingFieldSpaces };
|
||||
EXPECT(!csv.has_error());
|
||||
|
||||
bool ran = false;
|
||||
for (auto row : csv)
|
||||
ran = !row[0].is_empty();
|
||||
|
||||
EXPECT(ran);
|
||||
}
|
||||
|
||||
BENCHMARK_CASE(fairly_big_data)
|
||||
{
|
||||
auto file_or_error = Core::File::open(__FILE__ ".data", Core::IODevice::OpenMode::ReadOnly);
|
||||
EXPECT_EQ_FORCE(file_or_error.is_error(), false);
|
||||
|
||||
auto data = file_or_error.value()->read_all();
|
||||
auto csv = Reader::CSV { data, Reader::default_behaviours() | Reader::ParserBehaviour::ReadHeaders };
|
||||
|
||||
EXPECT(!csv.has_error());
|
||||
EXPECT_EQ(csv.size(), 100000u);
|
||||
}
|
||||
|
||||
TEST_MAIN(XSV)
|
272
Applications/Spreadsheet/Readers/XSV.cpp
Normal file
272
Applications/Spreadsheet/Readers/XSV.cpp
Normal file
@ -0,0 +1,272 @@
|
||||
/*
|
||||
* Copyright (c) 2020, the SerenityOS developers.
|
||||
* All rights reserved.
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions are met:
|
||||
*
|
||||
* 1. Redistributions of source code must retain the above copyright notice, this
|
||||
* list of conditions and the following disclaimer.
|
||||
*
|
||||
* 2. Redistributions in binary form must reproduce the above copyright notice,
|
||||
* this list of conditions and the following disclaimer in the documentation
|
||||
* and/or other materials provided with the distribution.
|
||||
*
|
||||
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
||||
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
|
||||
* DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
|
||||
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
|
||||
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
|
||||
* SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
|
||||
* CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
|
||||
* OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
||||
* OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
*/
|
||||
|
||||
#include "XSV.h"
|
||||
#include <AK/StringBuilder.h>
|
||||
|
||||
namespace Reader {
|
||||
|
||||
ParserBehaviour operator&(ParserBehaviour left, ParserBehaviour right)
|
||||
{
|
||||
return static_cast<ParserBehaviour>(static_cast<u32>(left) & static_cast<u32>(right));
|
||||
}
|
||||
|
||||
ParserBehaviour operator|(ParserBehaviour left, ParserBehaviour right)
|
||||
{
|
||||
return static_cast<ParserBehaviour>(static_cast<u32>(left) | static_cast<u32>(right));
|
||||
}
|
||||
|
||||
void XSV::set_error(ReadError error)
|
||||
{
|
||||
if (m_error == ReadError::None)
|
||||
m_error = error;
|
||||
}
|
||||
|
||||
Vector<String> XSV::headers() const
|
||||
{
|
||||
Vector<String> headers;
|
||||
for (auto& field : m_names)
|
||||
headers.append(field.is_string_view ? field.as_string_view : field.as_string.view());
|
||||
|
||||
return headers;
|
||||
}
|
||||
|
||||
void XSV::parse()
|
||||
{
|
||||
if ((m_behaviours & ParserBehaviour::ReadHeaders) != ParserBehaviour::None)
|
||||
read_headers();
|
||||
|
||||
while (!has_error() && !m_lexer.is_eof())
|
||||
m_rows.append(read_row());
|
||||
|
||||
if (!m_lexer.is_eof())
|
||||
set_error(ReadError::DataPastLogicalEnd);
|
||||
}
|
||||
|
||||
void XSV::read_headers()
|
||||
{
|
||||
if (!m_names.is_empty()) {
|
||||
set_error(ReadError::InternalError);
|
||||
m_names.clear();
|
||||
}
|
||||
|
||||
m_names = read_row(true);
|
||||
}
|
||||
|
||||
Vector<XSV::Field> XSV::read_row(bool header_row)
|
||||
{
|
||||
Vector<Field> row;
|
||||
bool first = true;
|
||||
while (!(m_lexer.is_eof() || m_lexer.next_is('\n') || m_lexer.next_is("\r\n")) && (first || m_lexer.consume_specific(m_traits.separator))) {
|
||||
first = false;
|
||||
row.append(read_one_field());
|
||||
}
|
||||
|
||||
if (!m_lexer.is_eof()) {
|
||||
auto crlf_ok = m_lexer.consume_specific("\r\n");
|
||||
if (!crlf_ok) {
|
||||
auto lf_ok = m_lexer.consume_specific('\n');
|
||||
if (!lf_ok)
|
||||
set_error(ReadError::DataPastLogicalEnd);
|
||||
}
|
||||
}
|
||||
|
||||
if (!header_row && (m_behaviours & ParserBehaviour::ReadHeaders) != ParserBehaviour::None && row.size() != m_names.size())
|
||||
set_error(ReadError::NonConformingColumnCount);
|
||||
|
||||
return row;
|
||||
}
|
||||
|
||||
XSV::Field XSV::read_one_field()
|
||||
{
|
||||
if ((m_behaviours & ParserBehaviour::TrimLeadingFieldSpaces) != ParserBehaviour::None)
|
||||
m_lexer.consume_while(is_any_of(" \t\v"));
|
||||
|
||||
bool is_quoted = false;
|
||||
Field field;
|
||||
if (m_lexer.next_is(m_traits.quote.view())) {
|
||||
is_quoted = true;
|
||||
field = read_one_quoted_field();
|
||||
} else {
|
||||
field = read_one_unquoted_field();
|
||||
}
|
||||
|
||||
if ((m_behaviours & ParserBehaviour::TrimTrailingFieldSpaces) != ParserBehaviour::None) {
|
||||
m_lexer.consume_while(is_any_of(" \t\v"));
|
||||
|
||||
if (!is_quoted) {
|
||||
// Also have to trim trailing spaces from unquoted fields.
|
||||
StringView view;
|
||||
if (field.is_string_view)
|
||||
view = field.as_string_view;
|
||||
else
|
||||
view = field.as_string;
|
||||
|
||||
if (!view.is_empty()) {
|
||||
ssize_t i = view.length() - 1;
|
||||
for (; i >= 0; --i) {
|
||||
if (!view.substring_view(i, 1).is_one_of(" ", "\t", "\v"))
|
||||
break;
|
||||
}
|
||||
view = view.substring_view(0, i + 1);
|
||||
}
|
||||
|
||||
if (field.is_string_view)
|
||||
field.as_string_view = view;
|
||||
else
|
||||
field.as_string = field.as_string.substring(0, view.length());
|
||||
}
|
||||
}
|
||||
|
||||
return field;
|
||||
}
|
||||
|
||||
XSV::Field XSV::read_one_quoted_field()
|
||||
{
|
||||
if (!m_lexer.consume_specific(m_traits.quote))
|
||||
set_error(ReadError::InternalError);
|
||||
|
||||
size_t start = m_lexer.tell(), end = start;
|
||||
bool is_copy = false;
|
||||
StringBuilder builder;
|
||||
auto allow_newlines = (m_behaviours & ParserBehaviour::AllowNewlinesInFields) != ParserBehaviour::None;
|
||||
|
||||
for (; !m_lexer.is_eof();) {
|
||||
char ch;
|
||||
switch (m_traits.quote_escape) {
|
||||
case ParserTraits::Backslash:
|
||||
if (m_lexer.consume_specific('\\') && m_lexer.consume_specific(m_traits.quote)) {
|
||||
// If there is an escaped quote, we have no choice but to make a copy.
|
||||
if (!is_copy) {
|
||||
is_copy = true;
|
||||
builder.append(m_source.substring_view(start, end - start));
|
||||
}
|
||||
builder.append(m_traits.quote);
|
||||
end = m_lexer.tell();
|
||||
continue;
|
||||
}
|
||||
break;
|
||||
case ParserTraits::Repeat:
|
||||
if (m_lexer.consume_specific(m_traits.quote)) {
|
||||
if (m_lexer.consume_specific(m_traits.quote)) {
|
||||
// If there is an escaped quote, we have no choice but to make a copy.
|
||||
if (!is_copy) {
|
||||
is_copy = true;
|
||||
builder.append(m_source.substring_view(start, end - start));
|
||||
}
|
||||
builder.append(m_traits.quote);
|
||||
end = m_lexer.tell();
|
||||
continue;
|
||||
}
|
||||
for (size_t i = 0; i < m_traits.quote.length(); ++i)
|
||||
m_lexer.retreat();
|
||||
goto end;
|
||||
}
|
||||
break;
|
||||
}
|
||||
|
||||
if (m_lexer.next_is(m_traits.quote.view()))
|
||||
goto end;
|
||||
|
||||
if (!allow_newlines) {
|
||||
if (m_lexer.next_is('\n') || m_lexer.next_is("\r\n"))
|
||||
goto end;
|
||||
}
|
||||
|
||||
ch = m_lexer.consume();
|
||||
if (is_copy)
|
||||
builder.append(ch);
|
||||
end = m_lexer.tell();
|
||||
continue;
|
||||
|
||||
end:
|
||||
break;
|
||||
}
|
||||
|
||||
if (!m_lexer.consume_specific(m_traits.quote))
|
||||
set_error(ReadError::QuoteFailure);
|
||||
|
||||
if (is_copy)
|
||||
return { {}, builder.to_string(), false };
|
||||
|
||||
return { m_source.substring_view(start, end - start), {}, true };
|
||||
}
|
||||
|
||||
XSV::Field XSV::read_one_unquoted_field()
|
||||
{
|
||||
size_t start = m_lexer.tell(), end = start;
|
||||
bool allow_quote_in_field = (m_behaviours & ParserBehaviour::QuoteOnlyInFieldStart) != ParserBehaviour::None;
|
||||
|
||||
for (; !m_lexer.is_eof();) {
|
||||
if (m_lexer.next_is(m_traits.separator.view()))
|
||||
break;
|
||||
|
||||
if (m_lexer.next_is("\r\n") || m_lexer.next_is("\n"))
|
||||
break;
|
||||
|
||||
if (m_lexer.consume_specific(m_traits.quote)) {
|
||||
if (!allow_quote_in_field)
|
||||
set_error(ReadError::QuoteFailure);
|
||||
end = m_lexer.tell();
|
||||
continue;
|
||||
}
|
||||
|
||||
m_lexer.consume();
|
||||
end = m_lexer.tell();
|
||||
}
|
||||
|
||||
return { m_source.substring_view(start, end - start), {}, true };
|
||||
}
|
||||
|
||||
StringView XSV::Row::operator[](StringView name) const
|
||||
{
|
||||
ASSERT(!m_xsv.m_names.is_empty());
|
||||
auto it = m_xsv.m_names.find([&](auto&& entry) { return name == entry; });
|
||||
ASSERT(!it.is_end());
|
||||
|
||||
return (*this)[it.index()];
|
||||
}
|
||||
|
||||
StringView XSV::Row::operator[](size_t column) const
|
||||
{
|
||||
auto& field = m_xsv.m_rows[m_index][column];
|
||||
if (field.is_string_view)
|
||||
return field.as_string_view;
|
||||
return field.as_string;
|
||||
}
|
||||
|
||||
const XSV::Row XSV::operator[](size_t index) const
|
||||
{
|
||||
return const_cast<XSV&>(*this)[index];
|
||||
}
|
||||
|
||||
XSV::Row XSV::operator[](size_t index)
|
||||
{
|
||||
ASSERT(m_rows.size() > index);
|
||||
return Row { *this, index };
|
||||
}
|
||||
|
||||
}
|
208
Applications/Spreadsheet/Readers/XSV.h
Normal file
208
Applications/Spreadsheet/Readers/XSV.h
Normal file
@ -0,0 +1,208 @@
|
||||
/*
|
||||
* Copyright (c) 2020, the SerenityOS developers.
|
||||
* All rights reserved.
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions are met:
|
||||
*
|
||||
* 1. Redistributions of source code must retain the above copyright notice, this
|
||||
* list of conditions and the following disclaimer.
|
||||
*
|
||||
* 2. Redistributions in binary form must reproduce the above copyright notice,
|
||||
* this list of conditions and the following disclaimer in the documentation
|
||||
* and/or other materials provided with the distribution.
|
||||
*
|
||||
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
||||
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
|
||||
* DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
|
||||
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
|
||||
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
|
||||
* SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
|
||||
* CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
|
||||
* OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
||||
* OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
*/
|
||||
|
||||
#pragma once
|
||||
|
||||
#include <AK/GenericLexer.h>
|
||||
#include <AK/String.h>
|
||||
#include <AK/StringView.h>
|
||||
#include <AK/Types.h>
|
||||
#include <AK/Vector.h>
|
||||
|
||||
namespace Reader {
|
||||
|
||||
enum class ParserBehaviour : u32 {
|
||||
None = 0,
|
||||
ReadHeaders = 1,
|
||||
AllowNewlinesInFields = ReadHeaders << 1,
|
||||
TrimLeadingFieldSpaces = ReadHeaders << 2,
|
||||
TrimTrailingFieldSpaces = ReadHeaders << 3,
|
||||
QuoteOnlyInFieldStart = ReadHeaders << 4,
|
||||
};
|
||||
|
||||
ParserBehaviour operator&(ParserBehaviour left, ParserBehaviour right);
|
||||
ParserBehaviour operator|(ParserBehaviour left, ParserBehaviour right);
|
||||
|
||||
struct ParserTraits {
|
||||
String separator;
|
||||
String quote { "\"" };
|
||||
enum {
|
||||
Repeat,
|
||||
Backslash,
|
||||
} quote_escape { Repeat };
|
||||
};
|
||||
|
||||
#define ENUMERATE_READ_ERRORS() \
|
||||
E(None, "No errors") \
|
||||
E(NonConformingColumnCount, "Header count does not match given column count") \
|
||||
E(QuoteFailure, "Quoting failure") \
|
||||
E(InternalError, "Internal error") \
|
||||
E(DataPastLogicalEnd, "Exrta data past the logical end of the rows")
|
||||
|
||||
enum class ReadError {
|
||||
#define E(name, _) name,
|
||||
ENUMERATE_READ_ERRORS()
|
||||
#undef E
|
||||
};
|
||||
|
||||
inline constexpr ParserBehaviour default_behaviours()
|
||||
{
|
||||
return ParserBehaviour::QuoteOnlyInFieldStart;
|
||||
}
|
||||
|
||||
class XSV {
|
||||
public:
|
||||
XSV(StringView source, const ParserTraits& traits, ParserBehaviour behaviours = default_behaviours())
|
||||
: m_source(source)
|
||||
, m_lexer(m_source)
|
||||
, m_traits(traits)
|
||||
, m_behaviours(behaviours)
|
||||
{
|
||||
parse();
|
||||
}
|
||||
|
||||
virtual ~XSV() { }
|
||||
|
||||
bool has_error() const { return m_error != ReadError::None; }
|
||||
ReadError error() const { return m_error; }
|
||||
String error_string() const
|
||||
{
|
||||
switch (m_error) {
|
||||
#define E(x, y) \
|
||||
case ReadError::x: \
|
||||
return y;
|
||||
|
||||
ENUMERATE_READ_ERRORS();
|
||||
#undef E
|
||||
}
|
||||
ASSERT_NOT_REACHED();
|
||||
}
|
||||
|
||||
size_t size() const { return m_rows.size(); }
|
||||
Vector<String> headers() const;
|
||||
|
||||
class Row {
|
||||
public:
|
||||
explicit Row(XSV& xsv, size_t index)
|
||||
: m_xsv(xsv)
|
||||
, m_index(index)
|
||||
{
|
||||
}
|
||||
|
||||
StringView operator[](StringView name) const;
|
||||
StringView operator[](size_t column) const;
|
||||
|
||||
size_t index() const { return m_index; }
|
||||
|
||||
// FIXME: Implement begin() and end(), keeping `Field' out of the API.
|
||||
|
||||
private:
|
||||
XSV& m_xsv;
|
||||
size_t m_index { 0 };
|
||||
};
|
||||
|
||||
template<bool const_>
|
||||
class RowIterator {
|
||||
public:
|
||||
explicit RowIterator(const XSV& xsv, size_t init_index = 0) requires(const_)
|
||||
: m_xsv(const_cast<XSV&>(xsv))
|
||||
, m_index(init_index)
|
||||
{
|
||||
}
|
||||
|
||||
explicit RowIterator(XSV& xsv, size_t init_index = 0) requires(!const_)
|
||||
: m_xsv(xsv)
|
||||
, m_index(init_index)
|
||||
{
|
||||
}
|
||||
|
||||
Row operator*() const { return Row { m_xsv, m_index }; }
|
||||
Row operator*() requires(!const_) { return Row { m_xsv, m_index }; }
|
||||
|
||||
RowIterator& operator++()
|
||||
{
|
||||
++m_index;
|
||||
return *this;
|
||||
}
|
||||
|
||||
bool is_end() const { return m_index == m_xsv.m_rows.size(); }
|
||||
bool operator==(const RowIterator& other) const
|
||||
{
|
||||
return m_index == other.m_index && &m_xsv == &other.m_xsv;
|
||||
}
|
||||
bool operator==(const RowIterator<!const_>& other) const
|
||||
{
|
||||
return m_index == other.m_index && &m_xsv == &other.m_xsv;
|
||||
}
|
||||
|
||||
private:
|
||||
XSV& m_xsv;
|
||||
size_t m_index { 0 };
|
||||
};
|
||||
|
||||
const Row operator[](size_t index) const;
|
||||
Row operator[](size_t index);
|
||||
|
||||
auto begin() { return RowIterator<false>(*this); }
|
||||
auto end() { return RowIterator<false>(*this, m_rows.size()); }
|
||||
|
||||
auto begin() const { return RowIterator<true>(*this); }
|
||||
auto end() const { return RowIterator<true>(*this, m_rows.size()); }
|
||||
|
||||
using ConstIterator = RowIterator<true>;
|
||||
using Iterator = RowIterator<false>;
|
||||
|
||||
private:
|
||||
struct Field {
|
||||
StringView as_string_view;
|
||||
String as_string; // This member only used if the parser couldn't use the original source verbatim.
|
||||
bool is_string_view { true };
|
||||
|
||||
bool operator==(StringView other) const
|
||||
{
|
||||
if (is_string_view)
|
||||
return other == as_string_view;
|
||||
return as_string == other;
|
||||
}
|
||||
};
|
||||
void set_error(ReadError error);
|
||||
void parse();
|
||||
void read_headers();
|
||||
Vector<Field> read_row(bool header_row = false);
|
||||
Field read_one_field();
|
||||
Field read_one_quoted_field();
|
||||
Field read_one_unquoted_field();
|
||||
|
||||
StringView m_source;
|
||||
GenericLexer m_lexer;
|
||||
const ParserTraits& m_traits;
|
||||
ParserBehaviour m_behaviours;
|
||||
Vector<Field> m_names;
|
||||
Vector<Vector<Field>> m_rows;
|
||||
ReadError m_error { ReadError::None };
|
||||
};
|
||||
|
||||
}
|
44
Applications/Spreadsheet/Writers/CSV.h
Normal file
44
Applications/Spreadsheet/Writers/CSV.h
Normal file
@ -0,0 +1,44 @@
|
||||
/*
|
||||
* Copyright (c) 2020, the SerenityOS developers.
|
||||
* All rights reserved.
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions are met:
|
||||
*
|
||||
* 1. Redistributions of source code must retain the above copyright notice, this
|
||||
* list of conditions and the following disclaimer.
|
||||
*
|
||||
* 2. Redistributions in binary form must reproduce the above copyright notice,
|
||||
* this list of conditions and the following disclaimer in the documentation
|
||||
* and/or other materials provided with the distribution.
|
||||
*
|
||||
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
||||
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
|
||||
* DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
|
||||
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
|
||||
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
|
||||
* SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
|
||||
* CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
|
||||
* OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
||||
* OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
*/
|
||||
|
||||
#pragma once
|
||||
|
||||
#include "XSV.h"
|
||||
#include <AK/Forward.h>
|
||||
#include <AK/StringView.h>
|
||||
|
||||
namespace Writer {
|
||||
|
||||
template<typename ContainerType>
|
||||
class CSV : public XSV<ContainerType> {
|
||||
public:
|
||||
CSV(OutputStream& output, const ContainerType& data, const Vector<StringView>& headers = {}, WriterBehaviour behaviours = default_behaviours())
|
||||
: XSV<ContainerType>(output, data, { ",", "\"", WriterTraits::Repeat }, headers, behaviours)
|
||||
{
|
||||
}
|
||||
};
|
||||
|
||||
}
|
96
Applications/Spreadsheet/Writers/Test/TestXSVWriter.cpp
Normal file
96
Applications/Spreadsheet/Writers/Test/TestXSVWriter.cpp
Normal file
@ -0,0 +1,96 @@
|
||||
/*
|
||||
* Copyright (c) 2020, the SerenityOS developers.
|
||||
* All rights reserved.
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions are met:
|
||||
*
|
||||
* 1. Redistributions of source code must retain the above copyright notice, this
|
||||
* list of conditions and the following disclaimer.
|
||||
*
|
||||
* 2. Redistributions in binary form must reproduce the above copyright notice,
|
||||
* this list of conditions and the following disclaimer in the documentation
|
||||
* and/or other materials provided with the distribution.
|
||||
*
|
||||
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
||||
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
|
||||
* DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
|
||||
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
|
||||
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
|
||||
* SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
|
||||
* CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
|
||||
* OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
||||
* OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
*/
|
||||
|
||||
#include <AK/TestSuite.h>
|
||||
|
||||
#include "../CSV.h"
|
||||
#include "../XSV.h"
|
||||
#include <AK/MemoryStream.h>
|
||||
|
||||
TEST_CASE(can_write)
|
||||
{
|
||||
Vector<Vector<int>> data = {
|
||||
{ 1, 2, 3 },
|
||||
{ 4, 5, 6 },
|
||||
{ 7, 8, 9 },
|
||||
};
|
||||
|
||||
auto buffer = ByteBuffer::create_uninitialized(1024);
|
||||
OutputMemoryStream stream { buffer };
|
||||
|
||||
Writer::CSV csv(stream, data);
|
||||
|
||||
auto expected_output = R"~(1,2,3
|
||||
4,5,6
|
||||
7,8,9
|
||||
)~";
|
||||
|
||||
EXPECT_EQ(StringView { stream.bytes() }, expected_output);
|
||||
}
|
||||
|
||||
TEST_CASE(can_write_with_header)
|
||||
{
|
||||
Vector<Vector<int>> data = {
|
||||
{ 1, 2, 3 },
|
||||
{ 4, 5, 6 },
|
||||
{ 7, 8, 9 },
|
||||
};
|
||||
|
||||
auto buffer = ByteBuffer::create_uninitialized(1024);
|
||||
OutputMemoryStream stream { buffer };
|
||||
|
||||
Writer::CSV csv(stream, data, { "A", "B\"", "C" });
|
||||
|
||||
auto expected_output = R"~(A,"B""",C
|
||||
1,2,3
|
||||
4,5,6
|
||||
7,8,9
|
||||
)~";
|
||||
|
||||
EXPECT_EQ(StringView { stream.bytes() }, expected_output);
|
||||
}
|
||||
|
||||
TEST_CASE(can_write_with_different_behaviours)
|
||||
{
|
||||
Vector<Vector<String>> data = {
|
||||
{ "Well", "Hello\"", "Friends" },
|
||||
{ "We\"ll", "Hello,", " Friends" },
|
||||
};
|
||||
|
||||
auto buffer = ByteBuffer::create_uninitialized(1024);
|
||||
OutputMemoryStream stream { buffer };
|
||||
|
||||
Writer::CSV csv(stream, data, { "A", "B\"", "C" }, Writer::WriterBehaviour::QuoteOnlyInFieldStart | Writer::WriterBehaviour::WriteHeaders);
|
||||
|
||||
auto expected_output = R"~(A,B",C
|
||||
Well,Hello",Friends
|
||||
We"ll,"Hello,", Friends
|
||||
)~";
|
||||
|
||||
EXPECT_EQ(StringView { stream.bytes() }, expected_output);
|
||||
}
|
||||
|
||||
TEST_MAIN(XSV)
|
215
Applications/Spreadsheet/Writers/XSV.h
Normal file
215
Applications/Spreadsheet/Writers/XSV.h
Normal file
@ -0,0 +1,215 @@
|
||||
/*
|
||||
* Copyright (c) 2020, the SerenityOS developers.
|
||||
* All rights reserved.
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions are met:
|
||||
*
|
||||
* 1. Redistributions of source code must retain the above copyright notice, this
|
||||
* list of conditions and the following disclaimer.
|
||||
*
|
||||
* 2. Redistributions in binary form must reproduce the above copyright notice,
|
||||
* this list of conditions and the following disclaimer in the documentation
|
||||
* and/or other materials provided with the distribution.
|
||||
*
|
||||
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
||||
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
|
||||
* DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
|
||||
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
|
||||
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
|
||||
* SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
|
||||
* CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
|
||||
* OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
||||
* OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
*/
|
||||
|
||||
#pragma once
|
||||
|
||||
#include <AK/GenericLexer.h>
|
||||
#include <AK/OwnPtr.h>
|
||||
#include <AK/Stream.h>
|
||||
#include <AK/String.h>
|
||||
#include <AK/StringView.h>
|
||||
#include <AK/Types.h>
|
||||
#include <AK/Vector.h>
|
||||
|
||||
namespace Writer {
|
||||
|
||||
enum class WriterBehaviour : u32 {
|
||||
None = 0,
|
||||
WriteHeaders = 1,
|
||||
AllowNewlinesInFields = WriteHeaders << 1,
|
||||
QuoteOnlyInFieldStart = WriteHeaders << 2,
|
||||
QuoteAll = WriteHeaders << 3,
|
||||
};
|
||||
|
||||
inline WriterBehaviour operator&(WriterBehaviour left, WriterBehaviour right)
|
||||
{
|
||||
return static_cast<WriterBehaviour>(static_cast<u32>(left) & static_cast<u32>(right));
|
||||
}
|
||||
|
||||
inline WriterBehaviour operator|(WriterBehaviour left, WriterBehaviour right)
|
||||
{
|
||||
return static_cast<WriterBehaviour>(static_cast<u32>(left) | static_cast<u32>(right));
|
||||
}
|
||||
|
||||
struct WriterTraits {
|
||||
String separator;
|
||||
String quote { "\"" };
|
||||
enum {
|
||||
Repeat,
|
||||
Backslash,
|
||||
} quote_escape { Repeat };
|
||||
};
|
||||
|
||||
#define ENUMERATE_WRITE_ERRORS() \
|
||||
E(None, "No errors") \
|
||||
E(NonConformingColumnCount, "Header count does not match given column count") \
|
||||
E(InternalError, "Internal error")
|
||||
|
||||
enum class WriteError {
|
||||
#define E(name, _) name,
|
||||
ENUMERATE_WRITE_ERRORS()
|
||||
#undef E
|
||||
};
|
||||
|
||||
inline constexpr WriterBehaviour default_behaviours()
|
||||
{
|
||||
return WriterBehaviour::None;
|
||||
}
|
||||
|
||||
template<typename ContainerType>
|
||||
class XSV {
|
||||
public:
|
||||
XSV(OutputStream& output, const ContainerType& data, const WriterTraits& traits, const Vector<StringView>& headers = {}, WriterBehaviour behaviours = default_behaviours())
|
||||
: m_data(data)
|
||||
, m_traits(traits)
|
||||
, m_behaviours(behaviours)
|
||||
, m_names(headers)
|
||||
, m_output(output)
|
||||
{
|
||||
if (!headers.is_empty())
|
||||
m_behaviours = m_behaviours | WriterBehaviour::WriteHeaders;
|
||||
|
||||
generate();
|
||||
}
|
||||
|
||||
virtual ~XSV() { }
|
||||
|
||||
bool has_error() const { return m_error != WriteError::None; }
|
||||
WriteError error() const { return m_error; }
|
||||
String error_string() const
|
||||
{
|
||||
switch (m_error) {
|
||||
#define E(x, y) \
|
||||
case WriteError::x: \
|
||||
return y;
|
||||
|
||||
ENUMERATE_WRITE_ERRORS();
|
||||
#undef E
|
||||
}
|
||||
ASSERT_NOT_REACHED();
|
||||
}
|
||||
|
||||
private:
|
||||
void set_error(WriteError error)
|
||||
{
|
||||
if (m_error == WriteError::None)
|
||||
m_error = error;
|
||||
}
|
||||
|
||||
void generate()
|
||||
{
|
||||
auto with_headers = (m_behaviours & WriterBehaviour::WriteHeaders) != WriterBehaviour::None;
|
||||
if (with_headers) {
|
||||
write_row(m_names);
|
||||
if (m_output.write({ "\n", 1 }) != 1)
|
||||
set_error(WriteError::InternalError);
|
||||
}
|
||||
|
||||
for (auto&& row : m_data) {
|
||||
if (with_headers) {
|
||||
if (row.size() != m_names.size())
|
||||
set_error(WriteError::NonConformingColumnCount);
|
||||
}
|
||||
|
||||
write_row(row);
|
||||
if (m_output.write({ "\n", 1 }) != 1)
|
||||
set_error(WriteError::InternalError);
|
||||
}
|
||||
}
|
||||
|
||||
template<typename T>
|
||||
void write_row(T&& row)
|
||||
{
|
||||
bool first = true;
|
||||
for (auto&& entry : row) {
|
||||
if (!first) {
|
||||
if (m_output.write(m_traits.separator.bytes()) != m_traits.separator.length())
|
||||
set_error(WriteError::InternalError);
|
||||
}
|
||||
first = false;
|
||||
write_entry(entry);
|
||||
}
|
||||
}
|
||||
|
||||
template<typename T>
|
||||
void write_entry(T&& entry)
|
||||
{
|
||||
auto string = String::formatted("{}", FormatIfSupported(entry));
|
||||
|
||||
auto safe_to_write_normally = !string.contains("\n") && !string.contains(m_traits.separator);
|
||||
if (safe_to_write_normally) {
|
||||
if ((m_behaviours & WriterBehaviour::QuoteOnlyInFieldStart) == WriterBehaviour::None)
|
||||
safe_to_write_normally = !string.contains(m_traits.quote);
|
||||
else
|
||||
safe_to_write_normally = !string.starts_with(m_traits.quote);
|
||||
}
|
||||
if (safe_to_write_normally) {
|
||||
if (m_output.write(string.bytes()) != string.length())
|
||||
set_error(WriteError::InternalError);
|
||||
return;
|
||||
}
|
||||
|
||||
if (m_output.write(m_traits.quote.bytes()) != m_traits.quote.length())
|
||||
set_error(WriteError::InternalError);
|
||||
|
||||
GenericLexer lexer(string);
|
||||
while (!lexer.is_eof()) {
|
||||
if (lexer.consume_specific(m_traits.quote)) {
|
||||
switch (m_traits.quote_escape) {
|
||||
case WriterTraits::Repeat:
|
||||
if (m_output.write(m_traits.quote.bytes()) != m_traits.quote.length())
|
||||
set_error(WriteError::InternalError);
|
||||
if (m_output.write(m_traits.quote.bytes()) != m_traits.quote.length())
|
||||
set_error(WriteError::InternalError);
|
||||
break;
|
||||
case WriterTraits::Backslash:
|
||||
if (m_output.write({ "\\", 1 }) != 1)
|
||||
set_error(WriteError::InternalError);
|
||||
if (m_output.write(m_traits.quote.bytes()) != m_traits.quote.length())
|
||||
set_error(WriteError::InternalError);
|
||||
break;
|
||||
}
|
||||
continue;
|
||||
}
|
||||
|
||||
auto ch = lexer.consume();
|
||||
if (m_output.write({ &ch, 1 }) != 1)
|
||||
set_error(WriteError::InternalError);
|
||||
}
|
||||
|
||||
if (m_output.write(m_traits.quote.bytes()) != m_traits.quote.length())
|
||||
set_error(WriteError::InternalError);
|
||||
}
|
||||
|
||||
const ContainerType& m_data;
|
||||
const WriterTraits& m_traits;
|
||||
WriterBehaviour m_behaviours;
|
||||
const Vector<StringView>& m_names;
|
||||
WriteError m_error { WriteError::None };
|
||||
OutputStream& m_output;
|
||||
};
|
||||
|
||||
}
|
Loading…
Reference in New Issue
Block a user