LibGemini: Implement rendering text/gemini documents to HTML

This also sets Content-Type to whatever 'meta' contains on success, to
allow the browser to pick up what the document contains.
This commit is contained in:
AnotherTest 2020-05-16 15:38:13 +04:30 committed by Andreas Kling
parent a4902e0eec
commit 013cb76d77
Notes: sideshowbarker 2024-07-19 06:33:39 +09:00
8 changed files with 424 additions and 1 deletions

View File

@ -1,8 +1,10 @@
set(SOURCES
Document.cpp
GeminiJob.cpp
GeminiRequest.cpp
GeminiResponse.cpp
Job.cpp
Line.cpp
)
serenity_lib(LibGemini gemini)

View File

@ -0,0 +1,112 @@
/*
* Copyright (c) 2020, The SerenityOS developers.
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met:
*
* 1. Redistributions of source code must retain the above copyright notice, this
* list of conditions and the following disclaimer.
*
* 2. Redistributions in binary form must reproduce the above copyright notice,
* this list of conditions and the following disclaimer in the documentation
* and/or other materials provided with the distribution.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
* DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
* SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
* CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
* OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
* OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
#include <AK/NonnullRefPtr.h>
#include <AK/String.h>
#include <AK/StringBuilder.h>
#include <AK/Vector.h>
#include <LibGemini/Document.h>
namespace Gemini {
String Document::render_to_html() const
{
StringBuilder html_builder;
html_builder.append("<!DOCTYPE html>\n<html>\n");
html_builder.append("<head>\n<title>");
html_builder.append(m_url.path());
html_builder.append("</title>\n</head>\n");
html_builder.append("<body>\n");
for (auto& line : m_lines) {
html_builder.append(line.render_to_html());
}
html_builder.append("</body>");
html_builder.append("</html>");
return html_builder.build();
}
NonnullRefPtr<Document> Document::parse(const StringView& lines, const URL& url)
{
auto document = adopt(*new Document(url));
document->read_lines(lines);
return document;
}
void Document::read_lines(const StringView& source)
{
auto close_list_if_needed = [&] {
if (m_inside_unordered_list) {
m_inside_unordered_list = false;
m_lines.append(make<Control>(Control::UnorderedListEnd));
}
};
for (auto& line : source.lines()) {
if (line.starts_with("```")) {
close_list_if_needed();
m_inside_preformatted_block = !m_inside_preformatted_block;
if (m_inside_preformatted_block) {
m_lines.append(make<Control>(Control::PreformattedStart));
} else {
m_lines.append(make<Control>(Control::PreformattedEnd));
}
}
if (m_inside_preformatted_block) {
m_lines.append(make<Preformatted>(move(line)));
continue;
}
if (line.starts_with("*")) {
if (!m_inside_unordered_list)
m_lines.append(make<Control>(Control::UnorderedListStart));
m_lines.append(make<UnorderedList>(move(line)));
m_inside_unordered_list = true;
continue;
}
close_list_if_needed();
if (line.starts_with("=>")) {
m_lines.append(make<Link>(move(line), *this));
continue;
}
if (line.starts_with("#")) {
size_t level = 0;
while (line.length() > level && line[level] == '#')
++level;
m_lines.append(make<Heading>(move(line), level));
continue;
}
m_lines.append(make<Text>(move(line)));
}
}
}

View File

@ -0,0 +1,149 @@
/*
* Copyright (c) 2020, The SerenityOS developers.
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met:
*
* 1. Redistributions of source code must retain the above copyright notice, this
* list of conditions and the following disclaimer.
*
* 2. Redistributions in binary form must reproduce the above copyright notice,
* this list of conditions and the following disclaimer in the documentation
* and/or other materials provided with the distribution.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
* DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
* SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
* CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
* OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
* OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
#pragma once
#include <AK/Forward.h>
#include <AK/NonnullOwnPtrVector.h>
#include <AK/String.h>
#include <AK/URL.h>
#include <AK/Vector.h>
namespace Gemini {
class Line {
public:
Line(String string)
: m_text(move(string))
{
}
virtual ~Line();
virtual String render_to_html() const = 0;
protected:
String m_text;
};
class Document : public RefCounted<Document> {
public:
String render_to_html() const;
static NonnullRefPtr<Document> parse(const StringView& source, const URL&);
const URL& url() const { return m_url; };
private:
explicit Document(const URL& url)
: m_url(url)
{
}
void read_lines(const StringView&);
NonnullOwnPtrVector<Line> m_lines;
URL m_url;
bool m_inside_preformatted_block { false };
bool m_inside_unordered_list { false };
};
class Text : public Line {
public:
Text(String line)
: Line(move(line))
{
}
virtual ~Text() override;
virtual String render_to_html() const override;
};
class Link : public Line {
public:
Link(String line, const Document&);
virtual ~Link() override;
virtual String render_to_html() const override;
private:
URL m_url;
StringView m_name;
};
class Preformatted : public Line {
public:
Preformatted(String line)
: Line(move(line))
{
}
virtual ~Preformatted() override;
virtual String render_to_html() const override;
};
class UnorderedList : public Line {
public:
UnorderedList(String line)
: Line(move(line))
{
}
virtual ~UnorderedList() override;
virtual String render_to_html() const override;
};
class Control : public Line {
public:
enum Kind {
UnorderedListStart,
UnorderedListEnd,
PreformattedStart,
PreformattedEnd,
};
Control(Kind kind)
: Line("")
, m_kind(kind)
{
}
virtual ~Control() override;
virtual String render_to_html() const override;
private:
Kind m_kind;
};
class Heading : public Line {
public:
Heading(String line, int level)
: Line(move(line))
, m_level(level)
{
}
virtual ~Heading() override;
virtual String render_to_html() const override;
private:
int m_level { 1 };
};
}

View File

@ -26,6 +26,7 @@
namespace Gemini {
class Document;
class GeminiRequest;
class GeminiResponse;
class GeminiJob;

View File

@ -0,0 +1,143 @@
/*
* Copyright (c) 2020, The SerenityOS developers.
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met:
*
* 1. Redistributions of source code must retain the above copyright notice, this
* list of conditions and the following disclaimer.
*
* 2. Redistributions in binary form must reproduce the above copyright notice,
* this list of conditions and the following disclaimer in the documentation
* and/or other materials provided with the distribution.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
* DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
* SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
* CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
* OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
* OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
#include <AK/StringBuilder.h>
#include <LibGemini/Document.h>
namespace Gemini {
String Text::render_to_html() const
{
StringBuilder builder;
builder.append(escape_html_entities(m_text));
builder.append("<br>\n");
return builder.build();
}
Text::~Text()
{
}
String Heading::render_to_html() const
{
StringBuilder builder;
builder.appendf("<h%d>", m_level);
builder.append(escape_html_entities(m_text.substring_view(m_level, m_text.length() - m_level)));
builder.appendf("</h%d>", m_level);
return builder.build();
}
Heading::~Heading()
{
}
String UnorderedList::render_to_html() const
{
// 1.3.5.4.2 "Advanced clients can take the space of the bullet symbol into account"
// FIXME: The spec is unclear about what the space means, or where it goes
// somehow figure this out
StringBuilder builder;
builder.append("<li>");
builder.append(escape_html_entities(m_text.substring_view(1, m_text.length() - 1)));
builder.append("</li>");
return builder.build();
}
UnorderedList::~UnorderedList()
{
}
String Control::render_to_html() const
{
switch (m_kind) {
case Kind::PreformattedEnd:
return "</pre>";
case Kind::PreformattedStart:
return "<pre>";
case Kind::UnorderedListStart:
return "<ul>";
case Kind::UnorderedListEnd:
return "</ul>";
default:
dbg() << "Unknown control kind _" << m_kind << "_";
ASSERT_NOT_REACHED();
return "";
}
}
Control::~Control()
{
}
Link::Link(String text, const Document& document)
: Line(move(text))
{
size_t index = 2;
while (index < m_text.length() && (m_text[index] == ' ' || m_text[index] == '\t'))
++index;
auto url_string = m_text.substring_view(index, m_text.length() - index);
auto space_offset = url_string.find_first_of(" \t");
String url = url_string;
if (space_offset.has_value()) {
url = url_string.substring_view(0, space_offset.value());
auto offset = space_offset.value();
while (offset < url_string.length() && (url_string[offset] == ' ' || url_string[offset] == '\t'))
++offset;
m_name = url_string.substring_view(offset, url_string.length() - offset);
}
m_url = document.url().complete_url(url);
if (m_name.is_null())
m_name = m_url.to_string();
}
Link::~Link()
{
}
String Link::render_to_html() const
{
StringBuilder builder;
builder.append("<a href=\"");
builder.append(escape_html_entities(m_url.to_string()));
builder.append("\">");
builder.append(escape_html_entities(m_name));
builder.append("</a><br>\n");
return builder.build();
}
String Preformatted::render_to_html() const
{
StringBuilder builder;
builder.append(escape_html_entities(m_text.substring_view(3, m_text.length() - 3)));
builder.append("\n");
return builder.build();
}
Preformatted::~Preformatted()
{
}
Line::~Line()
{
}
}

View File

@ -128,4 +128,4 @@ add_custom_command(
)
serenity_lib(LibWeb web)
target_link_libraries(LibWeb LibCore LibJS LibMarkdown LibGUI LibGfx LibTextCodec LibProtocol)
target_link_libraries(LibWeb LibCore LibJS LibMarkdown LibGemini LibGUI LibGfx LibTextCodec LibProtocol)

View File

@ -32,6 +32,7 @@
#include <LibGUI/Painter.h>
#include <LibGUI/ScrollBar.h>
#include <LibGUI/Window.h>
#include <LibGemini/Document.h>
#include <LibGfx/ImageDecoder.h>
#include <LibJS/Runtime/Value.h>
#include <LibMarkdown/Document.h>
@ -387,6 +388,13 @@ static RefPtr<Document> create_image_document(const ByteBuffer& data, const URL&
return document;
}
static RefPtr<Document> create_gemini_document(const ByteBuffer& data, const URL& url)
{
auto markdown_document = Gemini::Document::parse({ (const char*)data.data(), data.size() }, url);
return parse_html_document(markdown_document->render_to_html(), url);
}
String encoding_from_content_type(const String& content_type)
{
auto offset = content_type.index_of("charset=");
@ -426,6 +434,8 @@ static RefPtr<Document> create_document_from_mime_type(const ByteBuffer& data, c
return create_text_document(data, url);
if (mime_type == "text/markdown")
return create_markdown_document(data, url);
if (mime_type == "text/gemini")
return create_gemini_document(data, url);
if (mime_type == "text/html")
return parse_html_document(data, url, encoding);
return nullptr;

View File

@ -40,6 +40,12 @@ GeminiDownload::GeminiDownload(ClientConnection& client, NonnullRefPtr<Gemini::G
if (!response->meta().is_empty()) {
HashMap<String, String, CaseInsensitiveStringTraits> headers;
headers.set("meta", response->meta());
// Note: We're setting content-type to meta only on status==SUCCESS
// we should prehaps have a better mechanism for this, since we
// are already shoehorning the concept of "headers" here
if (response->status() >= 20 && response->status() < 30) {
headers.set("content-type", response->meta());
}
set_response_headers(headers);
}
}