Refactoring & Update README.md & Update str

This commit is contained in:
liameno 2022-06-04 20:12:43 +03:00
parent 1a8f71037c
commit ef12195b93
26 changed files with 295 additions and 257 deletions

View File

@ -1,7 +1,7 @@
![](https://raw.githubusercontent.com/liameno/librengine/master/logo.png)
![](images/logo.png)
#### Privacy Web Search Engine
## Website
[![demo](https://raw.githubusercontent.com/liameno/librengine/master/demo.png)]()
![](images/demo.png)
## Features
#### Crawler
@ -12,15 +12,14 @@
- Queue (BFS)
- Detect trackers
- Http to https
- Normalize url (remove #fragment, ?query)
- Normalize url
#### Website / CLI
- Encryption (rsa)
- API
- Proxy
- Node Info
- Nodes
- Rating (min=0, max=200, def=100)
- Rating
## TODO
- [x] Encryption (assymetric)
@ -35,26 +34,16 @@
- typesense ([source](https://github.com/typesense/typesense))
- openssl ([source](https://github.com/openssl/openssl))
Arch:
#### Arch
```shell
yay -S curl lexbor openssl &&
curl -O https://dl.typesense.org/releases/0.23.0.rc20/typesense-server-0.23.0.rc20-linux-amd64.tar.gz &&
tar -xzf typesense-server-0.23.0.rc20-linux-amd64.tar.gz
sh scripts/install_dependencies_arch.sh
```
Debian:
#### Debian
```shell
sudo apt install libcurl4-openssl-dev &&
curl -O https://dl.typesense.org/releases/0.23.0.rc20/typesense-server-0.23.0.rc20-linux-amd64.tar.gz &&
tar -xzf typesense-server-0.23.0.rc20-linux-amd64.tar.gz &&
git clone https://github.com/lexbor/lexbor &&
cd lexbor &&
cmake . && make && sudo make install &&
sudo apt install libssl-dev
sh scripts/install_dependencies_debian.sh
```
## Build
```shell
#git clone ...
cd librengine &&
sh scripts/build_all.sh
```
## Run
@ -129,4 +118,4 @@ sh scripts/init_db.sh
```
## License
GNU AFFERO GENERAL PUBLIC LICENSE v3.0
GNU Affero General Public License v3.0

View File

@ -1,7 +1,6 @@
#include <librengine/config.h>
#include <librengine/logger.h>
#include <librengine/search.h>
#include <librengine/encryption.h>
int main(int argc, char **argv) {
using namespace librengine;
@ -31,8 +30,7 @@ int main(int argc, char **argv) {
const auto &url = result.url;
const auto &desc = result.desc;
const auto &rating = result.rating;
const auto &has_ads = result.has_ads ? "has" : "hasn't";
const auto &has_analytics = result.has_analytics ? "has" : "hasn't";
const auto &has_trackers = result.has_trackers ? "has" : "hasn't";
const auto &node_url = result.node_url;
std::cout << title << std::endl;
@ -40,8 +38,7 @@ int main(int argc, char **argv) {
std::cout << desc << std::endl;
std::cout << rating << " rating" << std::endl;
std::cout << node_url << " node url " << std::endl;
std::cout << has_ads << " ads" << std::endl;
std::cout << has_analytics << " analytics" << std::endl;
std::cout << has_trackers << " trackers" << std::endl;
std::cout << std::string(25, '=') << std::endl;
}

View File

@ -17,7 +17,7 @@ int main(int argc, char **argv) {
curl_global_init(CURL_GLOBAL_ALL);
auto content = config::helper::get_file_content(argv[1]);
auto splited = str::split(content, "\n");
auto splited = split(content, "\n");
auto w = std::make_shared<worker>(config);
for (const auto &s : splited) {

View File

@ -92,29 +92,29 @@ bool worker::normalize_url(http::url &url, const std::optional<std::string> &own
if (url.text.empty()) return false;
if (url.text.size() < 3 && !owner_host) return false;
if (str::starts_with(url.text, "//")) {
if (starts_with(url.text, "//")) {
url.text.insert(0, "http:");
url.parse();
}
if (!url.host && owner_host && !owner_host->empty()) {
http::url owner_url(str::to_lower(*owner_host));
http::url owner_url(*owner_host);
owner_url.parse();
owner_url.set(CURLUPART_QUERY, "");
owner_url.set(CURLUPART_FRAGMENT, "");
auto f_c = str::get_first_char(url.text);
auto f_c = get_first(url.text);
if (f_c == '.') {
str::remove_first_char(url.text);
remove_first(url.text);
} else if (f_c != '/') {
if (str::get_last_char(owner_url.text) == '/') str::remove_last_char(owner_url.text);
if (get_last(owner_url.text) == '/') remove_last(owner_url.text);
while(true) {
const char c = str::get_last_char(owner_url.text);
const char c = get_last(owner_url.text);
if (c == '/' || c == '\0') break;
else str::remove_last_char(owner_url.text);
else remove_last(owner_url.text);
}
} else {
owner_url.set(CURLUPART_PATH, "");
@ -123,8 +123,8 @@ bool worker::normalize_url(http::url &url, const std::optional<std::string> &own
owner_url.compute_text();
owner_url.parse();
if (str::get_first_char(url.text) == '/' && str::get_last_char(owner_url.text) == '/') {
str::remove_first_char(url.text);
if (get_first(url.text) == '/' && get_last(owner_url.text) == '/') {
remove_first(url.text);
}
url.text.insert(0, owner_url.text);
@ -142,10 +142,10 @@ bool worker::normalize_url(http::url &url, const std::optional<std::string> &own
url.set(CURLUPART_FRAGMENT, "");
while(true) {
char c = str::get_last_char(url.text);
char c = get_last(url.text);
if (c == '/' || c == '\0') {
str::remove_last_char(url.text);
remove_last(url.text);
} else {
break;
}
@ -171,8 +171,8 @@ void worker::main_thread() {
auto url_ = queue.front();
queue.pop();
http::url site_url(url_.site_url);
http::url owner_url(url_.owner_url);
http::url site_url(to_lower_copy(url_.site_url));
http::url owner_url(to_lower_copy(url_.owner_url));
site_url.parse();
owner_url.parse();
@ -192,7 +192,7 @@ void worker::main_thread() {
continue;
}
auto splited = str::split(*site_url.path, ".");
auto splited = split(*site_url.path, ".");
auto file_type = (splited.size() <= 1) ? "" : splited.back();
auto allowed_file_types = {"", "html", "html5", "php", "phtml"};
@ -272,9 +272,9 @@ worker::result worker::work(url &url_) {
auto response = request_result.response;
auto response_length = response->length();
if_debug_print(logger::type::info, "response length = " + str::to_string(response_length), url.text);
if_debug_print(logger::type::info, "response code = " + str::to_string(request_result.code), url.text);
if_debug_print(logger::type::info, "curl code = " + str::to_string(request_result.curl_code), url.text);
if_debug_print(logger::type::info, "response length = " + to_string(response_length), url.text);
if_debug_print(logger::type::info, "response code = " + to_string(request_result.code), url.text);
if_debug_print(logger::type::info, "curl code = " + to_string(request_result.curl_code), url.text);
if (request_result.code != 200) {
if_debug_print(logger::type::error, "code != 200", url.text);
@ -321,7 +321,7 @@ worker::result worker::work(url &url_) {
};
for (const auto &s : detect_trackers_strings) {
if (str::contains(*response, s, false)) {
if (contains(*response, s)) {
has_trackers = true;
break;
}
@ -343,14 +343,14 @@ worker::result worker::work(url &url_) {
auto element = lxb_dom_collection_element(collection, i);
const auto href_value = lxb_string_to_std(lxb_dom_element_get_attribute(element, std_string_to_lxb("href"), 4, nullptr));
if (!href_value || *href_value == url.text || str::starts_with(*href_value, "#")) {
if (!href_value || *href_value == url.text || starts_with(*href_value, "#")) {
continue;
}
http::url href_url(*href_value);
href_url.parse();
if (!str::starts_with(*href_value, "http")) {
if (!starts_with(*href_value, "http")) {
queue.push({href_url.text, url.text});
} else {
queue.push({*href_value, ""});

BIN
demo.png

Binary file not shown.

Before

Width:  |  Height:  |  Size: 14 KiB

BIN
images/demo.png Normal file

Binary file not shown.

After

Width:  |  Height:  |  Size: 12 KiB

View File

Before

Width:  |  Height:  |  Size: 3.8 KiB

After

Width:  |  Height:  |  Size: 3.8 KiB

View File

@ -4,10 +4,10 @@ project(librengine LANGUAGES CXX)
set(CMAKE_CXX_STANDARD 17)
set(include include/encryption.h include/typesense.h include/http.h include/str.h include/str_impl.h include/config.h include/logger.h include/json.hpp include/structs.h include/search.h include/helper.h include/cache.h include/robots_txt.h)
set(src src/encryption.cpp src/typesense.cpp src/http.cpp src/str.cpp src/logger.cpp src/config.cpp src/search.cpp src/helper.cpp src/robots_txt.cpp)
set(src src/encryption.cpp src/typesense.cpp src/http.cpp src/logger.cpp src/config.cpp src/search.cpp src/helper.cpp src/robots_txt.cpp)
set(include_all ${include})
set(src_all ${src})
set(src_all ${src} src/str.cpp)
#lexbor - html, css / https://github.com/lexbor/lexbor
#curl - http lib / https://github.com/curl/curl

View File

@ -1,18 +1,19 @@
#ifndef LOGGER_H
#define LOGGER_H
#include <iostream>
#include <string>
namespace logger {
const std::string reset = "\033[0m";
const std::string black = "\033[30m";
const std::string red = "\033[31m";
const std::string green = "\033[32m";
const std::string yellow = "\033[33m";
const std::string blue = "\033[34m";
const std::string magenta = "\033[35m";
const std::string cyan = "\033[36m";
const std::string white = "\033[37m";
const static std::string reset = "\033[0m";
const static std::string black = "\033[30m";
const static std::string red = "\033[31m";
const static std::string green = "\033[32m";
const static std::string yellow = "\033[33m";
const static std::string blue = "\033[34m";
const static std::string magenta = "\033[35m";
const static std::string cyan = "\033[36m";
const static std::string white = "\033[37m";
enum class type {
info,
@ -20,6 +21,7 @@ namespace logger {
};
void print(const type &type_, const std::string &text, const std::string &id);
void print(const std::string &text, const std::string &color = reset);
}
#endif

View File

@ -26,8 +26,6 @@ namespace librengine {
explicit search(const config::all &config);
void init();
void remove_html_tags(std::string &html);
std::vector<search_result> local(const std::string &q, const size_t &p);
std::vector<search_result> nodes(std::string &query, const size_t &page, const bool &is_encryption_enabled);
};

View File

@ -4,29 +4,49 @@
#include <algorithm>
#include <string>
#include <vector>
#include <regex>
namespace librengine::str {
std::string to_string(const std::string &value);
char get_first_char(const std::string &s);
char get_last_char(const std::string &s);
void remove_first_char(std::string &s);
void remove_last_char(std::string &s);
namespace librengine {
std::string to_string(const std::string &value); //for format (str_impl.h)
char get_first(const std::string &s);
char get_last(const std::string &s);
void remove_first(std::string &s);
void remove_last(std::string &s);
void trim_start(std::string &s);
void trim_end(std::string &s);
void trim(std::string &s);
std::string trim_start(const std::string &s);
std::string trim_end(const std::string &s);
std::string trim(const std::string &s);
std::vector<std::string> split(const std::string &s, const std::string &delimiter);
std::string to_lower(const std::string &s);
std::string to_upper(const std::string &s);
bool contains(const std::string &s, const char &value, const bool &is_value_to_lower);
bool contains(const std::string &s, const std::string &value, const bool &is_value_to_lower);
bool starts_with(const std::string &s, const std::string &value);
bool ends_with(const std::string &s, const std::string &value);
void to_lower(std::string &s);
void to_upper(std::string &s);
std::string to_lower_copy(const std::string &s);
std::string to_upper_copy(const std::string &s);
bool contains(const std::string &s, const char &value);
bool contains(const std::string &s, const std::string &value);
bool starts_with(const std::string &s, const std::string &v);
bool ends_with(const std::string &s, const std::string &v);
std::string replace(const std::string &s, const std::string &from, const std::string &to);
void replace_ref(std::string &s, const std::string &from, const std::string &to);
size_t find_end(const std::string &s, const std::string &v);
bool replace(std::string &s, const std::string &from, const std::string &to = "");
bool replace(std::string &s, size_t start_pos, const std::string &from, const std::string &to = "");
std::string replace_copy(const std::string &s, const std::string &from, const std::string &to = "");
std::string replace_copy(const std::string &s, size_t start_pos, const std::string &from, const std::string &to = "");
bool replace_end(std::string &s, const std::string &from, const std::string &to = "");
bool replace_end(std::string &s, const size_t &start, const std::string &from, const std::string &to = "");
std::string replace_end_copy(const std::string &s, const std::string &from, const std::string &to = "");
std::string replace_end_copy(const std::string &s, size_t start_pos, const std::string &from, const std::string &to = "");
std::string reverse(const std::string &s);
bool is_number(const std::string &s);
void remove_special_chars(std::string &s);
void remove_html_tags(std::string &html);
}
#endif

View File

@ -3,19 +3,19 @@
#include "str.h"
namespace librengine::str {
namespace librengine {
template<typename T>
typename std::enable_if<false == std::is_convertible<T, std::string>::value, std::string>::type
to_string(T const &value) {
to_string(const T &value) {
return std::to_string(value);
}
template<typename T>
typename std::enable_if<false == std::is_convertible<T, std::vector<std::string>>::value, std::string> ::type
to_string(std::vector<T> const &values) {
to_string(const std::vector<T> &values) {
std::string result;
for (auto value: values) {
for (auto value : values) {
result.append(value);
}

View File

@ -8,7 +8,7 @@ namespace librengine::config {
stream.seekg(0, std::ios::end);
buffer.resize(stream.tellg());
stream.seekg(0);
stream.read(const_cast<char *>(buffer.data()), buffer.size());
stream.read(buffer.data(), buffer.size());
return buffer;
}
@ -16,48 +16,48 @@ namespace librengine::config {
void global::load_from_file(const std::string &path) {
const std::string content = helper::get_file_content(path);
nlohmann::json json = nlohmann::json::parse(content, nullptr, true, true);
auto json_global = json["global"];
json = json["global"];
auto nodes = json_global["nodes"];
auto nodes = json["nodes"];
for (auto node : nodes) {
this->nodes.push_back(node_s{node["name"], node["url"]});
}
rsa_key_length = json_global["rsa_key_length"].get<size_t>();
max_title_show_size = json_global["max_title_show_size"].get<size_t>();
max_desc_show_size = json_global["max_desc_show_size"].get<size_t>();
rsa_key_length = json["rsa_key_length"].get<size_t>();
max_title_show_size = json["max_title_show_size"].get<size_t>();
max_desc_show_size = json["max_desc_show_size"].get<size_t>();
}
void crawler::load_from_file(const std::string &path) {
const std::string content = helper::get_file_content(path);
nlohmann::json json = nlohmann::json::parse(content, nullptr, true, true);
auto json_crawler = json["crawler"];
json = json["crawler"];
user_agent = json_crawler["user_agent"].get<std::string>();
user_agent = json["user_agent"].get<std::string>();
std::string proxy_string = json_crawler["proxy"].get<std::string>();
std::string proxy_string = json["proxy"].get<std::string>();
if (!proxy_string.empty()) proxy = http::proxy{proxy_string};
load_page_timeout_s = json_crawler["load_page_timeout_s"].get<size_t>();
update_time_site_info_s_after = json_crawler["update_time_site_info_s_after"].get<size_t>();
delay_time_s = json_crawler["delay_time_s"].get<size_t>();
max_pages_site = json_crawler["max_pages_site"].get<size_t>();
max_page_symbols = json_crawler["max_page_symbols"].get<size_t>();
max_robots_txt_symbols = json_crawler["max_robots_txt_symbols"].get<size_t>();
max_lru_cache_size_host = json_crawler["max_lru_cache_size_host"].get<size_t>();
max_lru_cache_size_url = json_crawler["max_lru_cache_size_url"].get<size_t>();
is_http_to_https = json_crawler["is_http_to_https"].get<bool>();
is_check_robots_txt = json_crawler["is_check_robots_txt"].get<bool>();
load_page_timeout_s = json["load_page_timeout_s"].get<size_t>();
update_time_site_info_s_after = json["update_time_site_info_s_after"].get<size_t>();
delay_time_s = json["delay_time_s"].get<size_t>();
max_pages_site = json["max_pages_site"].get<size_t>();
max_page_symbols = json["max_page_symbols"].get<size_t>();
max_robots_txt_symbols = json["max_robots_txt_symbols"].get<size_t>();
max_lru_cache_size_host = json["max_lru_cache_size_host"].get<size_t>();
max_lru_cache_size_url = json["max_lru_cache_size_url"].get<size_t>();
is_http_to_https = json["is_http_to_https"].get<bool>();
is_check_robots_txt = json["is_check_robots_txt"].get<bool>();
}
void cli::load_from_file(const std::string &path) {
const std::string content = helper::get_file_content(path);
nlohmann::json json = nlohmann::json::parse(content, nullptr, true, true);
auto json_cli = json["cli"];
json = json["cli"];
std::string proxy_string = json_cli["proxy"].get<std::string>();
std::string proxy_string = json["proxy"].get<std::string>();
if (!proxy_string.empty()) proxy = http::proxy{proxy_string};
}
@ -65,11 +65,11 @@ namespace librengine::config {
void website::load_from_file(const std::string &path) {
const std::string content = helper::get_file_content(path);
nlohmann::json json = nlohmann::json::parse(content, nullptr, true, true);
auto json_website = json["website"];
json = json["website"];
port = json_website["port"].get<size_t>();
port = json["port"].get<size_t>();
std::string proxy_string = json_website["proxy"].get<std::string>();
std::string proxy_string = json["proxy"].get<std::string>();
if (!proxy_string.empty()) proxy = http::proxy{proxy_string};
}
@ -77,10 +77,10 @@ namespace librengine::config {
void db::load_from_file(const std::string &path) {
const std::string content = helper::get_file_content(path);
nlohmann::json json = nlohmann::json::parse(content, nullptr, true, true);
auto json_db = json["db"];
json = json["db"];
url = json_db["url"].get<std::string>();
api_key = json_db["api_key"].get<std::string>();
url = json["url"].get<std::string>();
api_key = json["api_key"].get<std::string>();
websites = typesense(url, "websites", api_key);
robots = typesense(url, "robots", api_key);

View File

@ -201,7 +201,7 @@ namespace librengine::encryption {
auto block_size = get_size();
std::string result;
auto blocks = str::split(text, "\n");
auto blocks = split(text, "\n");
auto size = blocks.size();
for (int i = 0; i < size; ++i) {
@ -227,7 +227,7 @@ namespace librengine::encryption {
auto block_size = get_size();
std::string result;
auto blocks = str::split(text, "\n");
auto blocks = split(text, "\n");
auto size = blocks.size();
for (int i = 0; i < size; ++i) {

View File

@ -11,10 +11,10 @@ namespace librengine::helper {
std::optional<std::string> lxb_string_to_std(const lxb_char_t *s) {
if (s == nullptr) return std::nullopt;
return reinterpret_cast<const char *>(s);
return (const char *)s;
}
lxb_char_t *std_string_to_lxb(const std::string &s) {
return (lxb_char_t *) s.c_str();
return (lxb_char_t *)s.c_str();
}
std::optional<lxb_html_document*> parse_html(const std::string &response) {

View File

@ -27,11 +27,11 @@ namespace librengine::http {
set_full(full);
}
proxy::proxy(const std::string &full, const proxy_type &type) {
auto split = str::split(full, ":");
auto splited = split(full, ":");
if (split.size() > 1) {
this->ip = split[0];
this->port = split[1];
if (splited.size() > 1) {
this->ip = splited[0];
this->port = splited[1];
}
this->type = type;
@ -42,7 +42,7 @@ namespace librengine::http {
std::string result;
if (full.empty()) return {};
if (str::starts_with(full, "http") || str::starts_with(full, "socks")) return full;
if (starts_with(full, "http") || starts_with(full, "socks")) return full;
switch (type) {
case proxy_type::http:
@ -70,12 +70,12 @@ namespace librengine::http {
this->full.append(": ");
this->full.append(this->value);
} else {
const auto splited = str::split(full, ":");
const auto splited = split(full, ":");
if (splited.size() > 1) {
this->name = splited[0];
this->value = splited[1];
str::remove_first_char(this->value);
remove_first(this->value);
}
this->full = full;
@ -186,8 +186,8 @@ namespace librengine::http {
if (!c) {
this->text = url;
if (str::get_last_char(this->text) == '#') {
str::remove_last_char(this->text);
if (get_last(this->text) == '#') {
remove_last(this->text);
}
curl_free(url);
@ -210,7 +210,7 @@ namespace librengine::http {
this->curl = curl_easy_init();
this->options.headers = std::make_shared<std::vector<header>>();
this->url = str::replace(this->url, " ", "%20");
this->url = replace_copy(this->url, " ", "%20");
if (is_set_secure_headers) {
this->options.headers->emplace_back("DNT", "1"); //don't track

View File

@ -18,4 +18,7 @@ void logger::print(const type &type_, const std::string &text, const std::string
}
std::cout << type_color << "[" << type_s << "] " << green << text << white << " [" << id << "]" << std::endl;
}
void logger::print(const std::string &text, const std::string &color) {
std::cout << color << text << std::endl;
}

View File

@ -75,26 +75,26 @@ namespace librengine {
agents.emplace_back("");
}
void robots_txt::parse() {
auto splited = str::split(text, "\n");
auto splited = split(text, "\n");
for (const auto &pair : splited) {
auto splited_pair = str::split(pair, ":");
auto splited_pair = split(pair, ":");
auto splited_pair_size = splited_pair.size();
if (splited_pair_size != 2) continue;
auto key = str::to_lower(splited_pair[0]);
auto value = str::to_lower(splited_pair[1]);
auto key = to_lower_copy(splited_pair[0]);
auto value = to_lower_copy(splited_pair[1]);
key = str::trim(key);
value = str::trim_start(value);
trim(key);
trim_start(value);
if (!value.empty()) value = str::trim_end(value);
if (!value.empty()) trim_end(value);
auto comment_index = value.find('#');
if (comment_index != -1) {
value = value.substr(0, comment_index);
value = str::trim_end(value);
trim_end(value);
}
if (key.empty()) continue;

View File

@ -24,11 +24,6 @@ namespace librengine {
}
}
void search::remove_html_tags(std::string &html) {
std::regex regex(R"(<\/?(\w+)(\s+\w+=(\w+|"[^"]*"|'[^']*'))*(( |)\/|)>)"); //<[^<>]+>
html = regex_replace(html, regex, "");
}
std::vector<search_result> search::local(const std::string &q, const size_t &p) {
const auto response = config.db_.websites.search(q, "url,title,desc", {{"page", std::to_string(p)}});
nlohmann::json result_json = nlohmann::json::parse(response);
@ -87,9 +82,9 @@ namespace librengine {
return {};
}
node_url_params = str::format("?q={0}&p={1}&e=1&ek={2}", encrypted_query, page_, rsa_public_key_base64);
node_url_params = format("?q={0}&p={1}&e=1&ek={2}", encrypted_query, page_, rsa_public_key_base64);
} else {
node_url_params = str::format("?q={0}&p={1}&e=0", query, page_);
node_url_params = format("?q={0}&p={1}&e=0", query, page_);
}
http::request node_request(node.url + "/api/search" + node_url_params);

View File

@ -1,56 +1,40 @@
#include "str.h"
#include <algorithm>
#include <string>
#include <vector>
#include <regex>
namespace librengine::str {
std::string to_string(const std::string &value) {
namespace librengine {
std::string to_string(const std::string &value) { //for format (str_impl.h)
return value;
}
char get_first_char(const std::string &s) {
char get_first(const std::string &s) {
return s.front();
}
char get_last_char(const std::string &s) {
char get_last(const std::string &s) {
return s.back();
}
void remove_first_char(std::string &s) {
void remove_first(std::string &s) {
s.erase(s.begin());
}
void remove_last_char(std::string &s) {
void remove_last(std::string &s) {
s.pop_back();
}
std::string trim_start(const std::string &s) {
size_t size = s.size();
for (int i = 0; i < size; ++i) {
char c = s[i];
if (c != ' ') {
return s.substr(i, (size - i));
}
}
return {};
void trim_start(std::string &s) {
static const auto lambada_space = [](const char &c) { return std::isspace(c); };
auto first = std::find_if_not(s.begin(), s.end(), lambada_space);
s.erase(s.begin(), first);
}
std::string trim_end(const std::string &s) {
size_t size = s.size();
for (size_t i = size - 1; i >= 0; --i) {
char c = s[i];
if (c != ' ') {
return s.substr(0, i + 1);
}
}
return {};
void trim_end(std::string &s) {
static const auto lambada_space = [](const char &c) { return std::isspace(c); };
auto last = std::find_if_not(s.rbegin(), s.rend(), lambada_space);
s.erase(last.base(), s.end());
}
std::string trim(const std::string &s) {
return trim_end(trim_start(s));
void trim(std::string &s) {
trim_start(s);
trim_end(s);
}
std::vector<std::string> split(const std::string &s, const std::string &delimiter) {
@ -74,68 +58,57 @@ namespace librengine::str {
return result;
}
std::string to_lower(const std::string &s) {
std::string result;
for (const auto &c: s) {
result.push_back(std::tolower(c));
void to_lower(std::string &s) {
for (auto &c : s) {
c = std::tolower(c);
}
}
void to_upper(std::string &s) {
for (auto &c : s) {
c = std::toupper(c);
}
}
std::string to_lower_copy(const std::string &s) {
std::string result = s;
to_lower(result);
return result;
}
std::string to_upper_copy(const std::string &s) {
std::string result = s;
to_upper(result);
return result;
}
std::string to_upper(const std::string &s) {
std::string result;
for (const auto &c: s) {
result.push_back(std::toupper(c));
}
return result;
bool contains(const std::string &s, const char &value) {
return s.find(value) != std::string::npos;
}
bool contains(const std::string &s, const char &value, const bool &is_value_to_lower) {
if (is_value_to_lower) {
return s.find(tolower(value)) != std::string::npos;
}
bool contains(const std::string &s, const std::string &value) {
return s.find(value) != std::string::npos;
}
bool contains(const std::string &s, const std::string &value, const bool &is_value_to_lower) {
if (is_value_to_lower) {
return s.find(to_lower(value)) != std::string::npos;
}
bool starts_with(const std::string &s, const std::string &v) {
const size_t v_size = v.size();
const size_t s_size = s.size();
return s.find(value) != std::string::npos;
}
if (s_size < v_size) return false;
bool starts_with(const std::string &s, const std::string &value) {
size_t value_size = value.size();
if (s.size() < value_size) {
return false;
}
for (int i = 0; i < value_size; ++i) {
if (s[i] != value[i]) {
for (int i = 0; i < v_size; ++i) {
if (s[i] != v[i]) {
return false;
}
}
return true;
}
bool ends_with(const std::string &s, const std::string &v) {
const size_t v_size = v.size();
const size_t s_size = s.size();
const size_t size = s_size - v_size;
bool ends_with(const std::string &s, const std::string &value) {
size_t value_size = value.size();
size_t size = s.size() - value_size;
if (s_size < v_size) return false;
if (s.size() < value_size) {
return false;
}
for (int i = 0; i < value_size; ++i) {
if (s[size + i] != value[i]) {
for (int i = 0; i < v_size; ++i) {
if (s[size + i] != v[i]) {
return false;
}
}
@ -162,8 +135,18 @@ namespace librengine::str {
return result;
}
void replace_ref(std::string &s, const std::string &from, const std::string &to) {
size_t find_end(const std::string &s, const std::string &v) {
auto s_length = s.length();
auto v_length = v.length();
if (v_length > s_length) return std::string::npos;
return s.find(v, s_length - v_length);
}
bool replace(std::string &s, const std::string &from, const std::string &to) {
if (from.length() > s.length()) return false;
size_t start_pos = 0;
bool result = false;
while (true) {
start_pos = s.find(from, start_pos);
@ -172,22 +155,79 @@ namespace librengine::str {
break;
}
result = true;
s.replace(start_pos, from.length(), to);
start_pos += to.length();
}
return result;
}
bool replace(std::string &s, size_t start_pos, const std::string &from, const std::string &to) {
if (from.length() > s.length()) return false;
bool result = false;
while (true) {
start_pos = s.find(from, start_pos);
if (start_pos == std::string::npos) {
break;
}
result = true;
s.replace(start_pos, from.length(), to);
start_pos += to.length();
}
return result;
}
std::string replace_copy(const std::string &s, const std::string &from, const std::string &to) {
std::string result = s;
replace(result, from, to);
return result;
}
std::string replace_copy(const std::string &s, size_t start_pos, const std::string &from, const std::string &to) {
std::string result = s;
replace(result, start_pos, from, to);
return result;
}
bool replace_end(std::string &s, const std::string &from, const std::string &to) {
auto s_length = s.length();
auto from_length = from.length();
if (from_length > s_length) return false;
return replace(s, s_length - from_length, from, to);
}
bool replace_end(std::string &s, const size_t &start, const std::string &from, const std::string &to) {
if (from.length() > s.length()) return false;
auto found = find_end(s, from);
if (found != std::string::npos && found >= start) {
return replace_end(s, from, to);
}
return false;
}
std::string replace_end_copy(const std::string &s, const std::string &from, const std::string &to) {
std::string result = s;
replace_end(result, from, to);
return result;
}
std::string replace_end_copy(const std::string &s, size_t start_pos, const std::string &from, const std::string &to) {
std::string result = s;
replace_end(result, start_pos, from, to);
return result;
}
std::string reverse(const std::string &s) {
return {s.rbegin(), s.rend()};
}
bool is_number(const std::string &s) {
for (const auto &c : s) {
if (!std::isdigit(c)) {
return false;
}
}
return true;
void remove_special_chars(std::string &s) {
static auto special_char_lambada = [](const char &c) { return !std::isalpha(c) && !std::isdigit(c); };
s.erase(std::remove_if(s.begin(), s.end(), special_char_lambada), s.end());
}
void remove_html_tags(std::string &html) {
std::regex regex(R"(<\/?(\w+)(\s+\w+=(\w+|"[^"]*"|'[^']*'))*(( |)\/|)>)"); //<[^<>]+>
html = regex_replace(html, regex, "");
}
}

View File

@ -5,4 +5,4 @@ cd ../../crawler
mkdir build && cd build && cmake .. && make
cd ../../website
mkdir build && cd build && cmake .. && make
mkdir build && cd build && cmake .. && make

View File

@ -0,0 +1,3 @@
yay -S curl lexbor openssl &&
curl -O https://dl.typesense.org/releases/0.23.0.rc20/typesense-server-0.23.0.rc20-linux-amd64.tar.gz &&
tar -xzf typesense-server-0.23.0.rc20-linux-amd64.tar.gz

View File

@ -0,0 +1,7 @@
sudo apt install libcurl4-openssl-dev &&
curl -O https://dl.typesense.org/releases/0.23.0.rc20/typesense-server-0.23.0.rc20-linux-amd64.tar.gz &&
tar -xzf typesense-server-0.23.0.rc20-linux-amd64.tar.gz &&
git clone https://github.com/lexbor/lexbor &&
cd lexbor &&
cmake . && make && sudo make install &&
sudo apt install libssl-dev

Binary file not shown.

Before

Width:  |  Height:  |  Size: 115 KiB

View File

@ -15,22 +15,6 @@ int main(int argc, char **argv) {
auto server = std::make_shared<Server>();
auto pages = std::make_shared<website::pages>(config);
nlohmann::json a;
a["results"][0]["title"] = std::string("捐款 | LibreOffice 正體中文站 - 自由的辦<E79A84>...");
a["results"][0]["url"] = "https://zh-tw.libreoffice.org/donate/";
a["results"][0]["desc"] = "Donate, donation, donations, funding, help, support, LibreOffice";
a["results"][0]["rating"] = "100";
a["results"][0]["id"] = "9";
a["results"][0]["has_trackers"] = "0";
a["results"][1]["title"] = std::string("Qu'est-ce que LibreOffice | Communauté LibreOffice ...");
a["results"][1]["url"] = "https://fr.libreoffice.org/discover/libreoffice/";
a["results"][1]["desc"] = "LibreOffice, Free Office Suite, Fun Project, Fantastic People, Writer, Calc, Impress, Draw, Base, Charts, Diagrams, extensions, templates, word processor, text editor, spreadsheet, presentation, database, documents, Document Foundation";
a["results"][1]["rating"] = "100";
a["results"][1]["id"] = "6";
a["results"][1]["has_trackers"] = "0";
std::cout << a.dump();
//return 0;
std::thread server_thread([&] {
server->set_mount_point("/", "../frontend/");
server->Get("/home", [&](const Request &req, Response &res) { pages->home_p(req, res); });

View File

@ -46,9 +46,9 @@ namespace website {
const std::string noscript_src = R"(<noscript><span class="noscript">Encryption doesn't work</span></noscript>)";
const std::string header_src = R"(<li><a href="/home">Home</a></li><li><a href="/node/info">Node Info</a></li><li><a href="https://github.com/liameno/librengine">Github</a></li>)";
str::replace_ref(page_src, "{RSA_PUBLIC_KEY}", search_->rsa_public_key_base64);
str::replace_ref(page_src, "{NOSCRIPT_CONTENT}", noscript_src);
str::replace_ref(page_src, "{HEADER_CONTENT}", header_src);
replace(page_src, "{RSA_PUBLIC_KEY}", search_->rsa_public_key_base64);
replace(page_src, "{NOSCRIPT_CONTENT}", noscript_src);
replace(page_src, "{HEADER_CONTENT}", header_src);
}
void pages::update(const std::string &id, const std::string &field, const size_t &value) {
@ -79,7 +79,7 @@ namespace website {
void pages::home_p(const Request &request, Response &response) {
std::string page_src = config::helper::get_file_content("../frontend/src/index.html");
const std::string query = request.get_param_value("q");
str::replace_ref(page_src, "{QUERY}", query);
replace(page_src, "{QUERY}", query);
set_variables(page_src);
response.status = 200;
@ -90,12 +90,12 @@ namespace website {
std::string page_ = request.get_param_value("p");
std::string is_encryption_enabled_ = request.get_param_value("e");
std::string query = str::replace(request.get_param_value("q"), " ", "+");
std::string query = replace_copy(request.get_param_value("q"), " ", "+");
bool is_encryption_enabled = is_encryption_enabled_ == "1";
std::string encryption_key = request.get_param_value("ek");
size_t page = (!page_.empty()) ? std::stoi(page_) : 1;
std::string url_params = str::format("?q={0}&p={1}&e={2}&ek={3}", query, page_, is_encryption_enabled_, encryption_key);
std::string url_params = format("?q={0}&p={1}&e={2}&ek={3}", query, page_, is_encryption_enabled_, encryption_key);
encryption_key = encryption::base64::easy_decode(encryption_key);
if (encryption_key.find("END PUBLIC KEY") == -1) {
@ -128,7 +128,7 @@ namespace website {
const auto &has_trackers = result.has_trackers ? "bad" : "good";
const auto &node_url = result.node_url;
std::string result_src = str::format(center_result_src_format, title, url, desc, rating, id, node_url, has_trackers);
std::string result_src = format(center_result_src_format, title, url, desc, rating, id, node_url, has_trackers);
center_results_src.append(result_src);
}
@ -147,10 +147,10 @@ namespace website {
std::string url = request.path + url_params;
str::replace_ref(page_src, "{CENTER_RESULTS}", center_results_src);
str::replace_ref(page_src, "{QUERY}", query);
str::replace_ref(page_src, "{PREV_PAGE}", str::replace(url, "&p=" + page_, "&p=" + std::to_string((page > 1) ? page - 1 : 1)));
str::replace_ref(page_src, "{NEXT_PAGE}", str::replace(url, "&p=" + page_, "&p=" + std::to_string(page + 1)));
replace(page_src, "{CENTER_RESULTS}", center_results_src);
replace(page_src, "{QUERY}", query);
replace(page_src, "{PREV_PAGE}", replace_copy(url, "&p=" + page_, "&p=" + std::to_string((page > 1) ? page - 1 : 1)));
replace(page_src, "{NEXT_PAGE}", replace_copy(url, "&p=" + page_, "&p=" + std::to_string(page + 1)));
set_variables(page_src);
response.status = 200;
@ -158,7 +158,7 @@ namespace website {
}
void pages::node_info_p(const Request &request, Response &response) {
std::string page_src = config::helper::get_file_content("../frontend/src/node/info.html");
str::replace_ref(page_src, "{PAGES_COUNT}", std::to_string(get_field_count("url")));
replace(page_src, "{PAGES_COUNT}", std::to_string(get_field_count("url")));
set_variables(page_src);
response.status = 200;
@ -206,7 +206,7 @@ namespace website {
}
}
void pages::api_search(const Request &request, Response &response) {
std::string query = str::replace(request.get_param_value("q"), " ", "+");
std::string query = replace_copy(request.get_param_value("q"), " ", "+");
std::string page_ = request.get_param_value("p");
std::string is_encryption_enabled_ = request.get_param_value("e");