mirror of
https://github.com/liameno/librengine.git
synced 2024-10-03 17:30:02 +03:00
Update May (opensearch->typesense)
This commit is contained in:
parent
e0866f6e41
commit
61bf47e493
7
CMakeLists.txt
Normal file
7
CMakeLists.txt
Normal file
@ -0,0 +1,7 @@
|
||||
cmake_minimum_required(VERSION 3.2)
|
||||
project(librengine)
|
||||
|
||||
add_subdirectory("${PROJECT_SOURCE_DIR}/cli" "${PROJECT_SOURCE_DIR}/cli/build")
|
||||
add_subdirectory("${PROJECT_SOURCE_DIR}/lib" "${PROJECT_SOURCE_DIR}/lib/build")
|
||||
add_subdirectory("${PROJECT_SOURCE_DIR}/crawler" "${PROJECT_SOURCE_DIR}/crawler/build")
|
||||
add_subdirectory("${PROJECT_SOURCE_DIR}/website" "${PROJECT_SOURCE_DIR}/website/build")
|
47
README.md
47
README.md
@ -1,18 +1,17 @@
|
||||
# Librengine
|
||||
|
||||
![GitHub top language](https://img.shields.io/github/languages/top/liameno/librengine) ![GitHub](https://img.shields.io/github/license/liameno/librengine)
|
||||
|
||||
Privacy Opensource Web Search Engine
|
||||
Privacy Web Search Engine
|
||||
## Website
|
||||
[![https://raw.githubusercontent.com/liameno/librengine/master/preview.gif](https://raw.githubusercontent.com/liameno/librengine/master/demo.png)]()
|
||||
## Donate to web-hosting
|
||||
| Cryptocurrency | Address |
|
||||
| Сurrency | Address |
|
||||
| --- | --- |
|
||||
| Bitcoin (BTC) | bc1qxpu9vfzah3vw5pzanny0zmfsgd64klcj24pa8x |
|
||||
| Dogecoin (DOGE) | DM8cqzbrW2rrmGk4K6UCD7rfeoqnKjJTum |
|
||||
| Ethereum (ETH)| 0x1857A1A7a543ED123151ACCAbBF4EB058741e614 |
|
||||
| Litecoin (LTC) | LLQMiWpF1cxET7p7UMYoWjJ26JuTp14u8K |
|
||||
| Monero (XMR) | 4AkPUBr4uoFV1K4fSitpGJjRHo4dfSzZ257YR9HxiQi3DvmgLW1rteRQfRRCFYytKugcygfHAvvJu3Tt96mSoVUE6JKJDZL |
|
||||
|
||||
## Features
|
||||
- Crawler
|
||||
- Proxy
|
||||
@ -31,19 +30,20 @@ Privacy Opensource Web Search Engine
|
||||
## Dependencies
|
||||
- libcurl (https://github.com/curl/curl)
|
||||
- lexbor (https://github.com/lexbor/lexbor)
|
||||
- opensearch (https://www.opensearch.org/)
|
||||
- openssl (https://www.openssl.org/)
|
||||
- typesense (https://typesense.org)
|
||||
- openssl (https://www.openssl.org)
|
||||
|
||||
Arch:
|
||||
```shell
|
||||
yay -S curl lexbor opensearch openssl
|
||||
yay -S curl lexbor openssl &&
|
||||
wget https://dl.typesense.org/releases/0.22.2/typesense-server-0.22.2-linux-amd64.tar.gz &&
|
||||
tar -zxf typesense-server-0.22.2-linux-amd64.tar.gz
|
||||
```
|
||||
Debian:
|
||||
```shell
|
||||
sudo apt install libcurl4-openssl-dev &&
|
||||
wget https://artifacts.opensearch.org/releases/bundle/opensearch/1.2.4/opensearch-1.2.4-linux-x64.tar.gz &&
|
||||
tar -zxf opensearch-1.2.4-linux-x64.tar.gz && cd opensearch-1.2.4 &&
|
||||
./opensearch-tar-install.sh &&
|
||||
wget https://dl.typesense.org/releases/0.22.2/typesense-server-0.22.2-linux-amd64.tar.gz &&
|
||||
tar -zxf typesense-server-0.22.2-linux-amd64.tar.gz &&
|
||||
git clone https://github.com/lexbor/lexbor &&
|
||||
cd lexbor &&
|
||||
cmake . && make && sudo make install &&
|
||||
@ -51,23 +51,23 @@ sudo apt install libssl-dev
|
||||
```
|
||||
## Build
|
||||
```shell
|
||||
git clone https://github.com/liameno/librengine
|
||||
cd librengine
|
||||
git clone https://github.com/liameno/librengine &&
|
||||
cd librengine &&
|
||||
sh scripts/build_all.sh
|
||||
```
|
||||
## Run
|
||||
```shell
|
||||
opensearch
|
||||
sh scripts/set_opensearch.sh
|
||||
./typesense-server --data-dir=/tmp/typesense-data --api-key=xyz --enable-cors &&
|
||||
sh scripts/init_db.sh
|
||||
```
|
||||
#### Crawler
|
||||
```shell
|
||||
./crawler https://www.gnu.org ../../config.json
|
||||
#[start_site] [config path]
|
||||
```
|
||||
#### Backend
|
||||
#### Website
|
||||
```shell
|
||||
./backend ../../config.json
|
||||
./website ../../config.json
|
||||
#[config path]
|
||||
```
|
||||
## Config
|
||||
@ -75,10 +75,11 @@ sh scripts/set_opensearch.sh
|
||||
//proxy: type://ip:port
|
||||
//socks5://127.0.0.1:9050
|
||||
|
||||
//_s - seconds
|
||||
|
||||
{
|
||||
"crawler": {
|
||||
"user_agent": "librengine",
|
||||
"opensearch_url": "http://localhost:9200",
|
||||
"proxy": "socks5://127.0.0.1:9050",
|
||||
"load_page_timeout_s": 20,
|
||||
"update_time_site_info_s_after": 86400, //10 days
|
||||
@ -103,16 +104,14 @@ sh scripts/set_opensearch.sh
|
||||
"url": "http://127.0.0.1:8080"
|
||||
}
|
||||
]
|
||||
},
|
||||
//edit also init_db.sh
|
||||
"db": {
|
||||
"url": "http://localhost:8108",
|
||||
"api_key": "xyz"
|
||||
}
|
||||
}
|
||||
```
|
||||
|
||||
#### OpenSearch: Permissions Denied
|
||||
|
||||
```shell
|
||||
sudo chmod -R 777 /usr/share/opensearch/config
|
||||
sudo chmod -R 777 /usr/share/opensearch/logs
|
||||
```
|
||||
|
||||
## License
|
||||
GNU General Public License v3.0
|
||||
|
@ -1,10 +1,11 @@
|
||||
//proxy: type://ip:port
|
||||
//socks5://127.0.0.1:9050
|
||||
|
||||
//_s - seconds
|
||||
|
||||
{
|
||||
"crawler": {
|
||||
"user_agent": "librengine",
|
||||
"opensearch_url": "http://localhost:9200",
|
||||
"proxy": "socks5://127.0.0.1:9050",
|
||||
"load_page_timeout_s": 20,
|
||||
"update_time_site_info_s_after": 86400, //10 days
|
||||
@ -29,5 +30,10 @@
|
||||
"url": "http://127.0.0.1:8080"
|
||||
}
|
||||
]
|
||||
},
|
||||
//edit also init_db.sh
|
||||
"db": {
|
||||
"url": "http://localhost:8108",
|
||||
"api_key": "xyz"
|
||||
}
|
||||
}
|
||||
|
@ -3,35 +3,19 @@
|
||||
#include <optional>
|
||||
|
||||
#include <librengine/config.h>
|
||||
#include <librengine/opensearch.h>
|
||||
#include "librengine/http.h"
|
||||
#include <librengine/typesense.h>
|
||||
#include <librengine/http.h>
|
||||
|
||||
#ifndef HELPER_H
|
||||
#define HELPER_H
|
||||
|
||||
namespace helper {
|
||||
inline size_t compute_time();
|
||||
size_t compute_time();
|
||||
|
||||
std::optional<std::string> lxb_string_to_std(const lxb_char_t *s);
|
||||
lxb_char_t *std_string_to_lxb(const std::string &s);
|
||||
|
||||
std::optional<lxb_html_document*> parse_html(const std::string &response);
|
||||
|
||||
std::string compute_search_website_json(const std::string &field, const std::string &phrase, const librengine::config::crawler ¤t_config);
|
||||
std::string compute_search_robots_txt_json(const std::string &field, const std::string &phrase, const librengine::config::crawler ¤t_config);
|
||||
|
||||
std::optional<std::string> compute_website_json(const std::string &title, const std::string &url, const std::string &host, const std::string &desc, const bool &has_ads, const bool &has_analytics);
|
||||
std::optional<std::string> compute_robots_txt_json(const std::string &body, const std::string &host);
|
||||
|
||||
std::string get_desc(const std::string &attribute_name, const std::string &attribute_value, lxb_html_document *document);
|
||||
std::string compute_desc(const std::string &tag_name, lxb_html_document *document);
|
||||
|
||||
std::optional<std::string> get_added_robots_txt(const std::string &host, const librengine::config::crawler ¤t_config, librengine::opensearch::client &opensearch_client);
|
||||
size_t hints_count_added(const std::string &field, const std::string &url, const librengine::config::crawler ¤t_config, librengine::opensearch::client &opensearch_client);
|
||||
|
||||
librengine::http::request::result_s site(const librengine::http::url &url, const librengine::config::crawler ¤t_config);
|
||||
bool is_allowed_in_robots(const std::string &body, const std::string &url, const librengine::config::crawler ¤t_config);
|
||||
std::optional<std::string> get_robots_txt(const librengine::http::url &url, const librengine::config::crawler ¤t_config);
|
||||
}
|
||||
|
||||
#endif
|
@ -3,7 +3,7 @@
|
||||
#include <optional>
|
||||
|
||||
#include <librengine/config.h>
|
||||
#include <librengine/opensearch.h>
|
||||
#include <librengine/typesense.h>
|
||||
|
||||
#ifndef WORKER_H
|
||||
#define WORKER_H
|
||||
@ -21,13 +21,27 @@ public:
|
||||
error,
|
||||
};
|
||||
private:
|
||||
librengine::config::crawler current_config;
|
||||
librengine::opensearch::client opensearch_client;
|
||||
librengine::config::crawler config;
|
||||
librengine::typesense db_website;
|
||||
librengine::typesense db_robots;
|
||||
bool is_work = false;
|
||||
public:
|
||||
std::optional<std::string> compute_website_json(const std::string &title, const std::string &url, const std::string &host, const std::string &desc, const bool &has_ads, const bool &has_analytics);
|
||||
std::optional<std::string> compute_robots_txt_json(const std::string &body, const std::string &host);
|
||||
|
||||
std::string get_desc(const std::string &attribute_name, const std::string &attribute_value, lxb_html_document *document);
|
||||
std::string compute_desc(const std::string &tag_name, lxb_html_document *document);
|
||||
|
||||
std::optional<std::string> get_added_robots_txt(const std::string &host);
|
||||
size_t hints_count_added(const std::string &field, const std::string &url);
|
||||
|
||||
librengine::http::request::result_s site(const librengine::http::url &url);
|
||||
bool is_allowed_in_robots(const std::string &body, const std::string &url);
|
||||
std::optional<std::string> get_robots_txt(const librengine::http::url &url);
|
||||
|
||||
bool normalize_url(librengine::http::url &url, const std::optional<std::string> &owner_host = std::nullopt) const;
|
||||
public:
|
||||
worker(librengine::config::crawler config, librengine::opensearch::client opensearch_client);
|
||||
worker(librengine::config::crawler config, const librengine::config::db &db);
|
||||
result main_thread(const std::string &site_url, int &deep, const std::optional<librengine::http::url> &owner_url = std::nullopt);
|
||||
};
|
||||
|
||||
|
@ -6,11 +6,11 @@
|
||||
|
||||
using namespace librengine;
|
||||
|
||||
void easy_start(const config::crawler &config) {
|
||||
void easy_start(const config::crawler &config, const config::db &db) {
|
||||
curl_global_init(CURL_GLOBAL_ALL); //https://stackoverflow.com/questions/6087886
|
||||
|
||||
int deep = 0;
|
||||
auto w = std::make_shared<worker>(config, opensearch::client(config.opensearch_url));
|
||||
auto w = std::make_shared<worker>(config, db);
|
||||
w->main_thread(config.start_site_url, deep);
|
||||
|
||||
curl_global_cleanup(); //https://curl.se/libcurl/c/curl_global_cleanup.html
|
||||
@ -23,15 +23,20 @@ int main(int argc, char **argv) {
|
||||
}
|
||||
|
||||
config::crawler config;
|
||||
config.start_site_url = argv[1];
|
||||
config::db db;
|
||||
|
||||
config.load_from_file(argv[2]);
|
||||
db.load_from_file(argv[2]);
|
||||
|
||||
config.start_site_url = argv[1];
|
||||
|
||||
std::string line = std::string(25, '=');
|
||||
|
||||
std::cout << logger::white << line << logger::green << "CFG" << logger::white << line << std::endl
|
||||
<< logger::reset << config.to_str() << std::endl
|
||||
<< logger::white << line << "===" << logger::white << line << std::endl;
|
||||
|
||||
easy_start(config);
|
||||
easy_start(config, db);
|
||||
|
||||
return 0;
|
||||
}
|
@ -4,12 +4,6 @@
|
||||
#include <optional>
|
||||
#include <thread>
|
||||
|
||||
#include <librengine/config.h>
|
||||
#include <librengine/http.h>
|
||||
#include <librengine/opensearch.h>
|
||||
#include <librengine/json.hpp>
|
||||
#include "../third_party/rep-cpp/robots.h"
|
||||
|
||||
namespace helper {
|
||||
using namespace librengine;
|
||||
|
||||
@ -36,168 +30,4 @@ namespace helper {
|
||||
if (document == nullptr) return std::nullopt;
|
||||
return document;
|
||||
}
|
||||
|
||||
std::string compute_search_website_json(const std::string &field, const std::string &phrase, const config::crawler ¤t_config) {
|
||||
nlohmann::json json;
|
||||
const auto now = compute_time();
|
||||
|
||||
json["query"]["bool"]["must"][0]["match"][field] = phrase;
|
||||
json["query"]["bool"]["must"][1]["range"]["date"]["gte"] = now - current_config.update_time_site_info_s_after;
|
||||
json["query"]["bool"]["must"][1]["range"]["date"]["lte"] = now;
|
||||
json["_source"] = false;
|
||||
|
||||
return json.dump();
|
||||
}
|
||||
std::string compute_search_robots_txt_json(const std::string &field, const std::string &phrase, const config::crawler ¤t_config) {
|
||||
nlohmann::json json;
|
||||
const auto now = compute_time();
|
||||
|
||||
json["query"]["bool"]["must"][0]["match"][field] = phrase;
|
||||
json["query"]["bool"]["must"][1]["range"]["date"]["gte"] = now - current_config.update_time_site_info_s_after;
|
||||
json["query"]["bool"]["must"][1]["range"]["date"]["lte"] = now;
|
||||
|
||||
return json.dump();
|
||||
}
|
||||
|
||||
std::optional<std::string> compute_website_json(const std::string &title, const std::string &url, const std::string &host, const std::string &desc, const bool &has_ads, const bool &has_analytics) {
|
||||
nlohmann::json json;
|
||||
|
||||
json["title"] = title;
|
||||
json["url"] = url;
|
||||
json["host"] = host;
|
||||
json["desc"] = desc;
|
||||
json["has_ads"] = has_ads;
|
||||
json["has_analytics"] = has_analytics;
|
||||
json["rating"] = 100; //def = 100
|
||||
json["date"] = compute_time();
|
||||
|
||||
try {
|
||||
return json.dump();
|
||||
} catch (const nlohmann::detail::type_error &e) { //crawler trap
|
||||
return std::nullopt;
|
||||
}
|
||||
}
|
||||
std::optional<std::string> compute_robots_txt_json(const std::string &body, const std::string &host) {
|
||||
nlohmann::json json;
|
||||
|
||||
json["body"] = body;
|
||||
json["host"] = host;
|
||||
json["date"] = compute_time();
|
||||
|
||||
try {
|
||||
return json.dump();
|
||||
} catch (const nlohmann::detail::type_error &e) { //crawler trap
|
||||
return std::nullopt;
|
||||
}
|
||||
}
|
||||
|
||||
std::string get_desc(const std::string &attribute_name, const std::string &attribute_value, lxb_html_document *document) {
|
||||
auto collection = lxb_dom_collection_make(&(document)->dom_document, 16);
|
||||
lxb_dom_elements_by_attr(lxb_dom_interface_element(document->head), collection, std_string_to_lxb(attribute_name),
|
||||
attribute_name.length(), std_string_to_lxb(attribute_value), attribute_value.length(), true);
|
||||
|
||||
const auto c_length = collection->array.length;
|
||||
std::string desc;
|
||||
|
||||
for (size_t i = 0; i < c_length; i++) {
|
||||
auto element = lxb_dom_collection_element(collection, i);
|
||||
const auto content = lxb_dom_element_get_attribute(element, std_string_to_lxb("content"), 7, nullptr);
|
||||
|
||||
if (content != nullptr) {
|
||||
if (desc.length() > 500) break;
|
||||
desc.append(lxb_string_to_std(content).value_or(""));
|
||||
desc.append("\n");
|
||||
}
|
||||
}
|
||||
|
||||
if (c_length > 0) lxb_dom_collection_destroy(collection, true);
|
||||
return desc;
|
||||
}
|
||||
std::string compute_desc(const std::string &tag_name, lxb_html_document *document) {
|
||||
auto collection = lxb_dom_collection_make(&(document)->dom_document, 16);
|
||||
lxb_dom_elements_by_tag_name(lxb_dom_interface_element(document->body), collection, std_string_to_lxb(tag_name), tag_name.length());
|
||||
|
||||
const auto c_length = collection->array.length;
|
||||
std::string desc;
|
||||
|
||||
for (size_t i = 0; i < c_length; i++) {
|
||||
if (desc.length() > 500) break;
|
||||
|
||||
auto element = lxb_dom_collection_element(collection, i);
|
||||
const auto text = lxb_string_to_std(lxb_dom_node_text_content(lxb_dom_interface_node(element), nullptr)).value_or("");
|
||||
desc.append(text);
|
||||
desc.append("\n");
|
||||
}
|
||||
|
||||
if (c_length > 0) lxb_dom_collection_destroy(collection, true);
|
||||
return desc;
|
||||
}
|
||||
|
||||
|
||||
std::optional<std::string> get_added_robots_txt(const std::string &host, const config::crawler ¤t_config, opensearch::client &opensearch_client) {
|
||||
const auto path = opensearch::client::path_options("robots_txt/_search");
|
||||
const auto type = opensearch::client::request_type::POST;
|
||||
const auto json = compute_search_robots_txt_json("host", host, current_config);
|
||||
const auto search_response = opensearch_client.custom_request(path, type, json);
|
||||
|
||||
if (!search_response) return std::nullopt;
|
||||
nlohmann::json result_json = nlohmann::json::parse(*search_response);
|
||||
const auto value = result_json["hits"]["total"]["value"];
|
||||
|
||||
if (value.is_null()) return std::nullopt;
|
||||
if (value > 0) {
|
||||
const auto body = result_json["hits"]["hits"][0]["_source"]["body"];
|
||||
if (body.is_null()) return std::nullopt;
|
||||
|
||||
return body;
|
||||
}
|
||||
|
||||
return std::nullopt;
|
||||
}
|
||||
size_t hints_count_added(const std::string &field, const std::string &url, const config::crawler ¤t_config, opensearch::client &opensearch_client) {
|
||||
const auto path = opensearch::client::path_options("website/_search");
|
||||
const auto type = opensearch::client::request_type::POST;
|
||||
const auto json = compute_search_website_json(field, url, current_config);
|
||||
const auto search_response = opensearch_client.custom_request(path, type, json);
|
||||
|
||||
if (!search_response) return false;
|
||||
nlohmann::json result_json = nlohmann::json::parse(*search_response);
|
||||
const auto value = result_json["hits"]["total"]["value"];
|
||||
|
||||
if (value.is_null()) return 0;
|
||||
if (value > 0) {
|
||||
return value;
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
http::request::result_s site(const http::url &url, const config::crawler ¤t_config) {
|
||||
http::request request(url.text);
|
||||
|
||||
request.options.timeout_s = current_config.load_page_timeout_s;
|
||||
request.options.user_agent = current_config.user_agent;
|
||||
request.options.proxy = current_config.proxy;
|
||||
request.perform();
|
||||
|
||||
return request.result;
|
||||
}
|
||||
bool is_allowed_in_robots(const std::string &body, const std::string &url, const config::crawler ¤t_config) {
|
||||
Rep::Robots robots = Rep::Robots(body);
|
||||
return robots.allowed(url, current_config.user_agent);
|
||||
}
|
||||
std::optional<std::string> get_robots_txt(const http::url &url, const config::crawler ¤t_config) {
|
||||
http::url url_cp(url.text);
|
||||
url_cp.set(CURLUPART_PATH, "/robots.txt");
|
||||
url_cp.parse();
|
||||
|
||||
http::request request(url_cp.text);
|
||||
request.options.timeout_s = current_config.load_page_timeout_s;
|
||||
request.options.user_agent = current_config.user_agent;
|
||||
request.options.proxy = current_config.proxy;
|
||||
request.perform();
|
||||
|
||||
if (request.result.code != 200) return std::nullopt;
|
||||
return request.result.response;
|
||||
}
|
||||
}
|
@ -20,8 +20,145 @@ void if_debug_print(const logger::type &type, const std::string &text, const std
|
||||
#endif
|
||||
}
|
||||
|
||||
using namespace helper;
|
||||
using namespace librengine;
|
||||
|
||||
std::optional<std::string> worker::compute_website_json(const std::string &title, const std::string &url, const std::string &host, const std::string &desc, const bool &has_ads, const bool &has_analytics) {
|
||||
nlohmann::json json;
|
||||
|
||||
json["title"] = title;
|
||||
json["url"] = url;
|
||||
json["host"] = host;
|
||||
json["desc"] = desc;
|
||||
json["has_ads"] = has_ads;
|
||||
json["has_analytics"] = has_analytics;
|
||||
json["rating"] = 100; //def = 100
|
||||
json["date"] = compute_time();
|
||||
|
||||
try {
|
||||
return json.dump();
|
||||
} catch (const nlohmann::detail::type_error &e) { //crawler trap
|
||||
return std::nullopt;
|
||||
}
|
||||
}
|
||||
std::optional<std::string> worker::compute_robots_txt_json(const std::string &body, const std::string &host) {
|
||||
nlohmann::json json;
|
||||
|
||||
json["body"] = body;
|
||||
json["host"] = host;
|
||||
json["date"] = compute_time();
|
||||
|
||||
try {
|
||||
return json.dump();
|
||||
} catch (const nlohmann::detail::type_error &e) { //crawler trap
|
||||
return std::nullopt;
|
||||
}
|
||||
}
|
||||
|
||||
std::string worker::get_desc(const std::string &attribute_name, const std::string &attribute_value, lxb_html_document *document) {
|
||||
auto collection = lxb_dom_collection_make(&(document)->dom_document, 16);
|
||||
lxb_dom_elements_by_attr(lxb_dom_interface_element(document->head), collection, std_string_to_lxb(attribute_name),
|
||||
attribute_name.length(), std_string_to_lxb(attribute_value), attribute_value.length(), true);
|
||||
|
||||
const auto c_length = collection->array.length;
|
||||
std::string desc;
|
||||
|
||||
for (size_t i = 0; i < c_length; i++) {
|
||||
auto element = lxb_dom_collection_element(collection, i);
|
||||
const auto content = lxb_dom_element_get_attribute(element, std_string_to_lxb("content"), 7, nullptr);
|
||||
|
||||
if (content != nullptr) {
|
||||
if (desc.length() > 500) break;
|
||||
desc.append(lxb_string_to_std(content).value_or(""));
|
||||
desc.append("\n");
|
||||
}
|
||||
}
|
||||
|
||||
if (c_length > 0) lxb_dom_collection_destroy(collection, true);
|
||||
return desc;
|
||||
}
|
||||
std::string worker::compute_desc(const std::string &tag_name, lxb_html_document *document) {
|
||||
auto collection = lxb_dom_collection_make(&(document)->dom_document, 16);
|
||||
lxb_dom_elements_by_tag_name(lxb_dom_interface_element(document->body), collection, std_string_to_lxb(tag_name), tag_name.length());
|
||||
|
||||
const auto c_length = collection->array.length;
|
||||
std::string desc;
|
||||
|
||||
for (size_t i = 0; i < c_length; i++) {
|
||||
if (desc.length() > 500) break;
|
||||
|
||||
auto element = lxb_dom_collection_element(collection, i);
|
||||
const auto text = lxb_string_to_std(lxb_dom_node_text_content(lxb_dom_interface_node(element), nullptr)).value_or("");
|
||||
desc.append(text);
|
||||
desc.append("\n");
|
||||
}
|
||||
|
||||
if (c_length > 0) lxb_dom_collection_destroy(collection, true);
|
||||
return desc;
|
||||
}
|
||||
|
||||
|
||||
std::optional<std::string> worker::get_added_robots_txt(const std::string &host) {
|
||||
const auto now = compute_time();
|
||||
auto filter_by = "date:>" + std::to_string(now - config.update_time_site_info_s_after) + " && date:<" + std::to_string(now);
|
||||
|
||||
const auto search_response = db_robots.search(host, "host", {{"filter_by", filter_by}});
|
||||
nlohmann::json result_json = nlohmann::json::parse(search_response);
|
||||
const auto value = result_json["found"];
|
||||
|
||||
if (value.is_null()) return std::nullopt;
|
||||
if (value > 0) {
|
||||
const auto body = result_json["hits"][0]["document"]["body"];
|
||||
if (body.is_null()) return std::nullopt;
|
||||
|
||||
return body;
|
||||
}
|
||||
|
||||
return std::nullopt;
|
||||
}
|
||||
size_t worker::hints_count_added(const std::string &field, const std::string &url) {
|
||||
const auto now = compute_time();
|
||||
auto filter_by = "date:>" + std::to_string(now - config.update_time_site_info_s_after) + " && date:<" + std::to_string(now);
|
||||
|
||||
const auto search_response = db_website.search(url, "url", {{"filter_by", filter_by}});
|
||||
nlohmann::json result_json = nlohmann::json::parse(search_response);
|
||||
const auto value = result_json["found"];
|
||||
|
||||
if (value.is_null()) return 0;
|
||||
if (value > 0) return value;
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
http::request::result_s worker::site(const http::url &url) {
|
||||
http::request request(url.text);
|
||||
|
||||
request.options.timeout_s = config.load_page_timeout_s;
|
||||
request.options.user_agent = config.user_agent;
|
||||
request.options.proxy = config.proxy;
|
||||
request.perform();
|
||||
|
||||
return request.result;
|
||||
}
|
||||
bool worker::is_allowed_in_robots(const std::string &body, const std::string &url) {
|
||||
Rep::Robots robots = Rep::Robots(body);
|
||||
return robots.allowed(url, config.user_agent);
|
||||
}
|
||||
std::optional<std::string> worker::get_robots_txt(const http::url &url) {
|
||||
http::url url_cp(url.text);
|
||||
url_cp.set(CURLUPART_PATH, "/robots.txt");
|
||||
url_cp.parse();
|
||||
|
||||
http::request request(url_cp.text);
|
||||
request.options.timeout_s = config.load_page_timeout_s;
|
||||
request.options.user_agent = config.user_agent;
|
||||
request.options.proxy = config.proxy;
|
||||
request.perform();
|
||||
|
||||
if (request.result.code != 200) return std::nullopt;
|
||||
return request.result.response;
|
||||
}
|
||||
|
||||
bool worker::normalize_url(http::url &url, const std::optional<std::string> &owner_host) const {
|
||||
if (url.text.size() < 3 && !owner_host) {
|
||||
return false;
|
||||
@ -68,7 +205,7 @@ bool worker::normalize_url(http::url &url, const std::optional<std::string> &own
|
||||
url.parse();
|
||||
}
|
||||
|
||||
if (this->current_config.is_http_to_https) {
|
||||
if (this->config.is_http_to_https) {
|
||||
if (url.scheme && url.scheme == "http") {
|
||||
url.set(CURLUPART_SCHEME, "https"); //protocol
|
||||
}
|
||||
@ -92,8 +229,10 @@ bool worker::normalize_url(http::url &url, const std::optional<std::string> &own
|
||||
return true;
|
||||
}
|
||||
|
||||
worker::worker(config::crawler config, opensearch::client opensearch_client) : current_config(std::move(config)), opensearch_client(std::move(opensearch_client)) {
|
||||
worker::worker(config::crawler config, const config::db &db) : config(std::move(config)) {
|
||||
this->is_work = true;
|
||||
this->db_website = typesense(db.url, "websites", db.api_key);
|
||||
this->db_robots = typesense(db.url, "robots", db.api_key);
|
||||
}
|
||||
|
||||
worker::result worker::main_thread(const std::string &site_url, int &deep, const std::optional<http::url> &owner_url) {
|
||||
@ -116,49 +255,44 @@ worker::result worker::main_thread(const std::string &site_url, int &deep, const
|
||||
if_debug_print(logger::type::error, "url == owner", url.text);
|
||||
return result::already_added;
|
||||
}
|
||||
if (current_config.is_one_site && owner_url && url.host != owner_url->host) {
|
||||
if (config.is_one_site && owner_url && url.host != owner_url->host) {
|
||||
return result::already_added;
|
||||
}
|
||||
if (helper::hints_count_added("url", url.text, current_config, opensearch_client) > 0) {
|
||||
if (hints_count_added("url", url.text) > 0) {
|
||||
if_debug_print(logger::type::error, "already added", url.text);
|
||||
return result::already_added;
|
||||
}
|
||||
|
||||
size_t pages_count = helper::hints_count_added("host", *url.host, current_config, opensearch_client);
|
||||
size_t pages_count = hints_count_added("host", *url.host);
|
||||
|
||||
if (pages_count >= this->current_config.max_pages_site) {
|
||||
if (pages_count >= this->config.max_pages_site) {
|
||||
if_debug_print(logger::type::error, "pages count >= limit", url.text);
|
||||
return result::pages_limit;
|
||||
}
|
||||
|
||||
if (this->current_config.is_check_robots_txt) {
|
||||
auto robots_txt_body = helper::get_added_robots_txt(*url.host, current_config, opensearch_client).value_or("");
|
||||
if (this->config.is_check_robots_txt) {
|
||||
auto robots_txt_body = get_added_robots_txt(*url.host).value_or("");
|
||||
bool is_checked = true;
|
||||
|
||||
if (robots_txt_body.empty()) {
|
||||
robots_txt_body = helper::get_robots_txt(url, current_config).value_or("");
|
||||
robots_txt_body = get_robots_txt(url).value_or("");
|
||||
auto robots_txt_body_length = robots_txt_body.length();
|
||||
|
||||
if (robots_txt_body_length > 1 && robots_txt_body_length < this->current_config.max_robots_txt_symbols) {
|
||||
const auto json = helper::compute_robots_txt_json(robots_txt_body, *url.host);
|
||||
if (robots_txt_body_length > 1 && robots_txt_body_length < this->config.max_robots_txt_symbols) {
|
||||
const auto json = compute_robots_txt_json(robots_txt_body, *url.host);
|
||||
if (!json) return result::null_or_limit;
|
||||
|
||||
const auto path = opensearch::client::path_options("robots_txt/_doc");
|
||||
const auto type = opensearch::client::request_type::POST;
|
||||
|
||||
//add a robots_txt to the opensearch
|
||||
opensearch_client.custom_request(path, type, json);
|
||||
db_robots.add(*json);
|
||||
} else {
|
||||
is_checked = false;
|
||||
}
|
||||
}
|
||||
|
||||
if (is_checked && !helper::is_allowed_in_robots(robots_txt_body, url.text, current_config)) {
|
||||
if (is_checked && !is_allowed_in_robots(robots_txt_body, url.text)) {
|
||||
return result::disallowed_robots;
|
||||
}
|
||||
}
|
||||
|
||||
auto request_result = helper::site(url, current_config);
|
||||
auto request_result = site(url);
|
||||
auto response = request_result.response;
|
||||
auto response_length = response->length();
|
||||
|
||||
@ -170,25 +304,25 @@ worker::result worker::main_thread(const std::string &site_url, int &deep, const
|
||||
if_debug_print(logger::type::error, "code != 200", url.text);
|
||||
return result::null_or_limit;
|
||||
}
|
||||
if (!response || response_length < 1 || response_length >= this->current_config.max_page_symbols) {
|
||||
if (!response || response_length < 1 || response_length >= this->config.max_page_symbols) {
|
||||
if_debug_print(logger::type::error, "response = null || length < 1 || >= limit", url.text);
|
||||
return result::null_or_limit;
|
||||
}
|
||||
|
||||
auto document = helper::parse_html(*response);
|
||||
auto document = parse_html(*response);
|
||||
if (!document) return result::null_or_limit;
|
||||
auto body = lxb_dom_interface_node((*document)->body);
|
||||
if (body == nullptr) return result::null_or_limit;
|
||||
|
||||
const std::string title = helper::lxb_string_to_std(lxb_html_document_title((*document), nullptr)).value_or("");
|
||||
const std::string title = lxb_string_to_std(lxb_html_document_title((*document), nullptr)).value_or("");
|
||||
//const std::string content = lxb_string_to_std(lxb_dom_node_text_content(body, nullptr)).value_or("");
|
||||
std::string desc = helper::get_desc("name", "description", *document); //by meta tag
|
||||
std::string desc = get_desc("name", "description", *document); //by meta tag
|
||||
|
||||
if (desc.empty()) {
|
||||
desc = helper::get_desc("http-equiv", "description", *document); //by meta tag
|
||||
desc = get_desc("http-equiv", "description", *document); //by meta tag
|
||||
}
|
||||
if (desc.empty()) {
|
||||
desc.append(helper::compute_desc("h1", *document)); //from h1 tags
|
||||
desc.append(compute_desc("h1", *document)); //from h1 tags
|
||||
}
|
||||
if (title.empty() && desc.empty()) {
|
||||
if_debug_print(logger::type::error, "title & desc are empty", url.text);
|
||||
@ -224,28 +358,24 @@ worker::result worker::main_thread(const std::string &site_url, int &deep, const
|
||||
}
|
||||
}
|
||||
|
||||
const auto json = helper::compute_website_json(title, url.text, *url.host, desc, has_ads, has_analytics);
|
||||
const auto json = compute_website_json(title, url.text, *url.host, desc, has_ads, has_analytics);
|
||||
if (!json) return result::null_or_limit;
|
||||
|
||||
const auto path = opensearch::client::path_options("website/_doc");
|
||||
const auto type = opensearch::client::request_type::POST;
|
||||
|
||||
//add a website to the opensearch
|
||||
opensearch_client.custom_request(path, type, json);
|
||||
db_website.add(*json);
|
||||
|
||||
//print added url
|
||||
std::cout << logger::yellow << "[" << url.text << "]" << std::endl;
|
||||
|
||||
if (deep < this->current_config.max_recursive_deep) {
|
||||
if (deep < this->config.max_recursive_deep) {
|
||||
auto collection = lxb_dom_collection_make(&(*document)->dom_document, 16);
|
||||
lxb_dom_elements_by_tag_name(lxb_dom_interface_element(body), collection, helper::std_string_to_lxb("a"), 1);
|
||||
lxb_dom_elements_by_tag_name(lxb_dom_interface_element(body), collection, std_string_to_lxb("a"), 1);
|
||||
const auto a_length = collection->array.length;
|
||||
std::vector<std::string> pages_limit_hosts;
|
||||
++deep;
|
||||
|
||||
for (size_t i = 0; i < a_length; i++) {
|
||||
auto element = lxb_dom_collection_element(collection, i);
|
||||
const auto href_value = helper::lxb_string_to_std(lxb_dom_element_get_attribute(element, helper::std_string_to_lxb("href"), 4, nullptr));
|
||||
const auto href_value = lxb_string_to_std(lxb_dom_element_get_attribute(element, std_string_to_lxb("href"), 4, nullptr));
|
||||
|
||||
if (!href_value && *href_value == url.text && str::starts_with(*href_value, "#")) {
|
||||
//skip fragment links
|
||||
@ -265,7 +395,7 @@ worker::result worker::main_thread(const std::string &site_url, int &deep, const
|
||||
if (!str::starts_with(*href_value, "http")) {
|
||||
result = main_thread(href_url.text, deep, url);
|
||||
} else {
|
||||
if (current_config.is_one_site && href_url.host != url.host) {
|
||||
if (config.is_one_site && href_url.host != url.host) {
|
||||
//skip other sites
|
||||
continue;
|
||||
}
|
||||
@ -280,7 +410,7 @@ worker::result worker::main_thread(const std::string &site_url, int &deep, const
|
||||
pages_limit_hosts.push_back(*href_url.host);
|
||||
} else if (result == result::added || result == result::disallowed_robots) {
|
||||
//delay
|
||||
std::this_thread::sleep_for(std::chrono::seconds(this->current_config.delay_time_s));
|
||||
std::this_thread::sleep_for(std::chrono::seconds(this->config.delay_time_s));
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -3,8 +3,8 @@ project(librengine LANGUAGES CXX)
|
||||
|
||||
set(CMAKE_CXX_STANDARD 17)
|
||||
|
||||
set(include include/opensearch.h include/http.h include/str.h include/str_impl.h include/config.h include/logger.h include/json.hpp)
|
||||
set(src src/opensearch.cpp src/http.cpp src/str.cpp src/logger.cpp src/config.cpp)
|
||||
set(include include/encryption.h include/typesense.h include/http.h include/str.h include/str_impl.h include/config.h include/logger.h include/json.hpp)
|
||||
set(src src/encryption.cpp src/typesense.cpp src/http.cpp src/str.cpp src/logger.cpp src/config.cpp)
|
||||
|
||||
set(include_all ${include} ${tp_rep_cpp} ${tp_url_cpp})
|
||||
set(src_all ${src})
|
||||
|
@ -17,7 +17,6 @@ namespace librengine::config {
|
||||
struct crawler {
|
||||
std::string user_agent;
|
||||
std::string start_site_url;
|
||||
std::string opensearch_url;
|
||||
|
||||
std::optional<http::proxy> proxy;
|
||||
|
||||
@ -51,9 +50,16 @@ namespace librengine::config {
|
||||
std::string url;
|
||||
};
|
||||
|
||||
size_t port = 8080;
|
||||
std::optional<http::proxy> proxy = std::nullopt; //socks5://127.0.0.1:9050
|
||||
std::vector<node_s> nodes = {};
|
||||
size_t port;
|
||||
std::optional<http::proxy> proxy;
|
||||
std::vector<node_s> nodes;
|
||||
|
||||
void load_from_file(const std::string &path);
|
||||
std::string to_str() const;
|
||||
};
|
||||
struct db {
|
||||
std::string url;
|
||||
std::string api_key;
|
||||
|
||||
void load_from_file(const std::string &path);
|
||||
std::string to_str() const;
|
||||
|
@ -1,38 +0,0 @@
|
||||
#ifndef OPENSEARCH_H
|
||||
#define OPENSEARCH_H
|
||||
|
||||
#include <string>
|
||||
#include <memory>
|
||||
|
||||
#include "http.h"
|
||||
|
||||
namespace librengine::opensearch {
|
||||
class client {
|
||||
public:
|
||||
enum class request_type {
|
||||
GET,
|
||||
POST,
|
||||
PUT,
|
||||
DELETE,
|
||||
};
|
||||
|
||||
struct path_options {
|
||||
std::string full;
|
||||
std::string index;
|
||||
std::string type;
|
||||
std::string document;
|
||||
|
||||
void compute_full();
|
||||
explicit path_options(const std::string &full);
|
||||
};
|
||||
private:
|
||||
std::string url;
|
||||
private:
|
||||
std::string compute_url(const std::string &path);
|
||||
public:
|
||||
explicit client(std::string url = "http://localhost:9200");
|
||||
std::optional<std::string> custom_request(const path_options &path_options, const request_type &request_type,const std::optional<std::string> &data = std::nullopt);
|
||||
};
|
||||
}
|
||||
|
||||
#endif
|
28
lib/include/typesense.h
Normal file
28
lib/include/typesense.h
Normal file
@ -0,0 +1,28 @@
|
||||
#ifndef TYPESENSE_H
|
||||
#define TYPESENSE_H
|
||||
|
||||
#include <string>
|
||||
#include <memory>
|
||||
#include <map>
|
||||
|
||||
#include <librengine/http.h>
|
||||
|
||||
namespace librengine {
|
||||
class typesense {
|
||||
private:
|
||||
std::string url;
|
||||
std::string collection_name;
|
||||
std::string api_key;
|
||||
public:
|
||||
typesense();
|
||||
typesense(const std::string &url, const std::string &collection_name, const std::string &api_key);
|
||||
|
||||
std::string add(const std::string &json);
|
||||
std::string update(const std::string &json);
|
||||
|
||||
std::string get(const int &id);
|
||||
std::string search(const std::string &q, const std::string &query_by, const std::map<std::string, std::string> &options = {});
|
||||
};
|
||||
}
|
||||
|
||||
#endif
|
@ -19,7 +19,6 @@ namespace librengine::config {
|
||||
auto json_crawler = json["crawler"];
|
||||
|
||||
this->user_agent = json_crawler["user_agent"].get<std::string>();
|
||||
this->opensearch_url = json_crawler["opensearch_url"].get<std::string>();
|
||||
|
||||
std::string proxy_string = json_crawler["proxy"].get<std::string>();
|
||||
|
||||
@ -40,10 +39,10 @@ namespace librengine::config {
|
||||
}
|
||||
|
||||
std::string crawler::to_str() const {
|
||||
const std::string format = "UA={0}\nStartSiteUrl={1}\nOpenSearchUrl={2}\nProxy={3}\nMaxRecDeep={4}"
|
||||
"\nLPageTimeoutS={5}\nUpdateTimeSISAfter={6}\nDelayTimeS={7}\nMaxPagesS={8}\nMaxPageSym={9}"
|
||||
"\nMaxRobotsTSym={10}\nIsOneSite={11}\nIsHttpToHttps={12}\nIsCheckRobots={13}";
|
||||
return str::format(format, user_agent, start_site_url, opensearch_url,
|
||||
const std::string format = "UA={0}\nStartSiteUrl={1}\nProxy={2}\nMaxRecDeep={3}"
|
||||
"\nLPageTimeoutS={4}\nUpdateTimeSISAfter={5}\nDelayTimeS={6}\nMaxPagesS={7}\nMaxPageSym={8}"
|
||||
"\nMaxRobotsTSym={9}\nIsOneSite={10}\nIsHttpToHttps={11}\nIsCheckRobots={12}";
|
||||
return str::format(format, user_agent, start_site_url,
|
||||
(proxy) ? proxy->compute_curl_format() : "null", max_recursive_deep,
|
||||
load_page_timeout_s, update_time_site_info_s_after, delay_time_s, max_pages_site,
|
||||
max_page_symbols, max_robots_txt_symbols,
|
||||
@ -87,4 +86,18 @@ namespace librengine::config {
|
||||
const std::string format = "Port={0}\nProxy={1}\nNodes={2}";
|
||||
return str::format(format, port, (proxy) ? proxy->compute_curl_format() : "null", nodes.size());
|
||||
}
|
||||
|
||||
void db::load_from_file(const std::string &path) {
|
||||
const std::string content = helper::get_file_content(path);
|
||||
nlohmann::json json = nlohmann::json::parse(content, nullptr, true, true);
|
||||
auto json_db = json["db"];
|
||||
|
||||
this->url = json_db["url"].get<std::string>();
|
||||
this->api_key = json_db["api_key"].get<std::string>();
|
||||
}
|
||||
|
||||
std::string db::to_str() const {
|
||||
const std::string format = "Url={0}\nApiKey={1}";
|
||||
return str::format(format, url, api_key);
|
||||
}
|
||||
}
|
@ -164,6 +164,8 @@ namespace librengine::http {
|
||||
this->curl = curl_easy_init();
|
||||
this->options.headers = std::make_shared<std::vector<header>>();
|
||||
|
||||
this->url = str::replace(this->url, " ", "%20");
|
||||
|
||||
if (is_set_secure_headers) {
|
||||
this->options.headers->emplace_back("DNT", "1"); //don't track
|
||||
this->options.headers->emplace_back("Sec-GPC", "1"); //don't sell or share
|
||||
|
@ -1,45 +0,0 @@
|
||||
#include "opensearch.h"
|
||||
|
||||
#include <utility>
|
||||
|
||||
namespace librengine::opensearch {
|
||||
void client::path_options::compute_full() {
|
||||
this->full.append(this->index);
|
||||
this->full.push_back('/');
|
||||
this->full.append(this->type);
|
||||
this->full.push_back('/');
|
||||
this->full.append(this->document);
|
||||
}
|
||||
|
||||
client::path_options::path_options(const std::string &full) {
|
||||
this->full = full;
|
||||
}
|
||||
|
||||
client::client(std::string url) : url(std::move(url)) {
|
||||
|
||||
}
|
||||
|
||||
std::string client::compute_url(const std::string &path) {
|
||||
std::string result = this->url;
|
||||
|
||||
result.push_back('/');
|
||||
result.append(path);
|
||||
|
||||
return result;
|
||||
}
|
||||
|
||||
std::optional<std::string> client::custom_request(const client::path_options &path_options, const request_type &request_type, const std::optional<std::string> &data) {
|
||||
std::string type;
|
||||
|
||||
if (request_type == request_type::GET) type = "GET";
|
||||
else if (request_type == request_type::POST) type = "POST";
|
||||
else if (request_type == request_type::PUT) type = "PUT";
|
||||
else if (request_type == request_type::DELETE) type = "DELETE";
|
||||
|
||||
http::request request(compute_url(path_options.full), data.value_or(""), type, false);
|
||||
request.options.headers->emplace_back("Content-Type: application/json");
|
||||
request.perform();
|
||||
|
||||
return request.result.response;
|
||||
}
|
||||
}
|
55
lib/src/typesense.cpp
Normal file
55
lib/src/typesense.cpp
Normal file
@ -0,0 +1,55 @@
|
||||
#include "../include/typesense.h"
|
||||
|
||||
namespace librengine {
|
||||
typesense::typesense() = default;
|
||||
typesense::typesense(const std::string &url, const std::string &collection_name, const std::string &api_key) {
|
||||
this->url = url;
|
||||
this->collection_name = collection_name;
|
||||
this->api_key = api_key;
|
||||
}
|
||||
|
||||
std::string typesense::add(const std::string &json) {
|
||||
std::string request_url = this->url + "/collections/" + this->collection_name + "/documents/";
|
||||
http::request request(request_url, json, "POST", false);
|
||||
request.options.headers->emplace_back("Content-Type: application/json");
|
||||
request.options.headers->emplace_back("X-TYPESENSE-API-KEY", api_key);
|
||||
request.perform();
|
||||
|
||||
return request.result.response.value_or("");
|
||||
}
|
||||
|
||||
std::string typesense::update(const std::string &json) {
|
||||
std::string request_url = this->url + "/collections/" + this->collection_name + "/documents/?action=upsert";
|
||||
http::request request(request_url, json, "POST", false);
|
||||
request.options.headers->emplace_back("Content-Type: application/json");
|
||||
request.options.headers->emplace_back("X-TYPESENSE-API-KEY", api_key);
|
||||
request.perform();
|
||||
|
||||
return request.result.response.value_or("");
|
||||
}
|
||||
|
||||
std::string typesense::get(const int &id) {
|
||||
std::string request_url = this->url + "/collections/" + this->collection_name + "/documents/" + std::to_string(id);
|
||||
http::request request(request_url, "", "GET", false);
|
||||
request.options.headers->emplace_back("X-TYPESENSE-API-KEY", api_key);
|
||||
request.perform();
|
||||
|
||||
return request.result.response.value_or("");
|
||||
}
|
||||
|
||||
std::string typesense::search(const std::string &q, const std::string &query_by, const std::map<std::string, std::string> &options) {
|
||||
std::string request_url = this->url + "/collections/" + this->collection_name + "/documents/search";
|
||||
request_url.append("?q=" + q);
|
||||
request_url.append("&query_by=" + query_by);
|
||||
|
||||
for (const auto &option : options) {
|
||||
request_url.append("&" + option.first + "=" + option.second);
|
||||
}
|
||||
|
||||
http::request request(request_url, "", "GET", false);
|
||||
request.options.headers->emplace_back("X-TYPESENSE-API-KEY", api_key);
|
||||
request.perform();
|
||||
|
||||
return request.result.response.value_or("");
|
||||
}
|
||||
}
|
@ -4,5 +4,5 @@ mkdir build && cd build && cmake .. && sudo make install
|
||||
cd ../../crawler
|
||||
mkdir build && cd build && cmake .. && make
|
||||
|
||||
cd ../../website/backend
|
||||
cd ../../website
|
||||
mkdir build && cd build && cmake .. && make
|
||||
|
33
scripts/init_db.sh
Normal file
33
scripts/init_db.sh
Normal file
@ -0,0 +1,33 @@
|
||||
export URL=http://localhost:8108
|
||||
export API_KEY=xyz
|
||||
|
||||
curl -XDELETE "$URL/collections/websites" -H "X-TYPESENSE-API-KEY: $API_KEY"
|
||||
curl -XDELETE "$URL/collections/robots" -H "X-TYPESENSE-API-KEY: $API_KEY"
|
||||
|
||||
curl -XPOST "$URL/collections/" -d'
|
||||
{
|
||||
"name": "websites",
|
||||
"fields": [
|
||||
{"name": "title", "type": "string" },
|
||||
{"name": "desc", "type": "string" },
|
||||
{"name": "url", "type": "string" },
|
||||
{"name": "host", "type": "string" },
|
||||
{"name": "rating", "type": "int32" },
|
||||
{"name": "has_ads", "type": "bool" },
|
||||
{"name": "has_analytics", "type": "bool" },
|
||||
{"name": "date", "type": "int64" }
|
||||
],
|
||||
"default_sorting_field": "date"
|
||||
}
|
||||
' -H "X-TYPESENSE-API-KEY: $API_KEY" -H 'Content-Type: application/json'
|
||||
curl -XPOST "$URL/collections/" -d'
|
||||
{
|
||||
"name": "robots",
|
||||
"fields": [
|
||||
{"name": "body", "type": "string" },
|
||||
{"name": "host", "type": "string" },
|
||||
{"name": "date", "type": "int64" }
|
||||
],
|
||||
"default_sorting_field": "date"
|
||||
}
|
||||
' -H "X-TYPESENSE-API-KEY: $API_KEY" -H 'Content-Type: application/json'
|
@ -1,84 +0,0 @@
|
||||
export ES_URL=localhost:9200
|
||||
curl -XDELETE "$ES_URL/website"
|
||||
curl -XPUT "$ES_URL/website" -d'{
|
||||
"settings": {
|
||||
"analysis": {
|
||||
"analyzer": {
|
||||
"autocomplete": {
|
||||
"tokenizer": "autocomplete",
|
||||
"filter": [
|
||||
"lowercase",
|
||||
"word_delimiter"
|
||||
]
|
||||
},
|
||||
"autocomplete_search": {
|
||||
"tokenizer": "lowercase"
|
||||
},
|
||||
"not_analyzed": {
|
||||
|
||||
}
|
||||
},
|
||||
"tokenizer": {
|
||||
"autocomplete": {
|
||||
"type": "edge_ngram",
|
||||
"token_chars": [
|
||||
"letter"
|
||||
]
|
||||
}
|
||||
}
|
||||
}
|
||||
},
|
||||
"mappings": {
|
||||
"properties": {
|
||||
"title": {
|
||||
"type": "text",
|
||||
"analyzer": "autocomplete"
|
||||
},
|
||||
"content": {
|
||||
"type": "text",
|
||||
"analyzer": "autocomplete"
|
||||
},
|
||||
"desc": {
|
||||
"type": "text",
|
||||
"analyzer": "autocomplete"
|
||||
},
|
||||
"url": {
|
||||
"type": "keyword"
|
||||
},
|
||||
"host": {
|
||||
"type": "keyword"
|
||||
},
|
||||
"rating": {
|
||||
"type": "byte"
|
||||
},
|
||||
"has_ads": {
|
||||
"type": "boolean"
|
||||
},
|
||||
"has_analytics": {
|
||||
"type": "boolean"
|
||||
},
|
||||
"date": {
|
||||
"type": "date"
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
' -H 'Content-Type: application/json'
|
||||
|
||||
curl -XDELETE "$ES_URL/robots_txt"
|
||||
curl -XPUT "$ES_URL/robots_txt" -d'{
|
||||
"mappings": {
|
||||
"properties": {
|
||||
"body": {
|
||||
"type": "keyword"
|
||||
},
|
||||
"host": {
|
||||
"type": "keyword"
|
||||
},
|
||||
"date": {
|
||||
"type": "date"
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
' -H 'Content-Type: application/json'
|
@ -1,5 +1,5 @@
|
||||
cmake_minimum_required(VERSION 3.20)
|
||||
project(backend)
|
||||
project(website)
|
||||
|
||||
set(CMAKE_CXX_STANDARD 17)
|
||||
|
||||
@ -11,5 +11,5 @@ find_package(CURL)
|
||||
find_package(Threads)
|
||||
find_package(OpenSSL)
|
||||
|
||||
add_executable(${PROJECT_NAME} main.cpp third_party/httplib.h src/pages.cpp include/pages.h src/encryption.cpp include/encryption.h)
|
||||
add_executable(${PROJECT_NAME} main.cpp third_party/httplib.h src/pages.cpp include/pages.h)
|
||||
target_link_libraries(${PROJECT_NAME} PRIVATE /usr/lib/liblexbor.so curl OpenSSL::Crypto Threads::Threads /usr/local/lib/liblibrengine.so)
|
@ -19,7 +19,11 @@ function decrypt() {
|
||||
|
||||
item = item.replaceAll("\n", "");
|
||||
item = item.trim();
|
||||
result += rsa.decrypt(item);
|
||||
|
||||
let decrypted = rsa.decrypt(item);
|
||||
if (decrypted == null) continue;
|
||||
|
||||
result += decrypted;
|
||||
}
|
||||
|
||||
splited = splited.slice(last_i);
|
||||
|
@ -1,21 +1,29 @@
|
||||
let rsa = new JSEncrypt({default_key_size: 1024});
|
||||
let is_generating = true;
|
||||
let is_started = true;
|
||||
|
||||
rsa.getKey(function() {
|
||||
let public_key = get_with_expiry("public_key");
|
||||
let private_key = get_with_expiry("private_key");
|
||||
let public_key = get_with_expiry("public_key");
|
||||
let private_key = get_with_expiry("private_key");
|
||||
|
||||
if (public_key == null || private_key == null) {
|
||||
rsa.getKey(function() {
|
||||
let public_key = get_with_expiry("public_key");
|
||||
let private_key = get_with_expiry("private_key");
|
||||
|
||||
if (public_key == null) {
|
||||
set_with_expiry("public_key", rsa.getPublicKey(), 3600 * 1000); //1 hour
|
||||
}
|
||||
if (private_key == null) {
|
||||
set_with_expiry("private_key", rsa.getPrivateKey(), 3600 * 1000); //1 hour
|
||||
}
|
||||
|
||||
if (public_key == null) {
|
||||
set_with_expiry("public_key", rsa.getPublicKey(), 3600 * 1000); //1 hour
|
||||
}
|
||||
if (private_key == null) {
|
||||
set_with_expiry("private_key", rsa.getPrivateKey(), 3600 * 1000); //1 hour
|
||||
}
|
||||
|
||||
is_generating = false;
|
||||
is_started = false;
|
||||
});
|
||||
} else {
|
||||
is_started = false;
|
||||
});
|
||||
}
|
||||
|
||||
function submit_form() {
|
||||
if (is_started) return false;
|
||||
@ -28,15 +36,15 @@ function submit_form() {
|
||||
let query_v = query.value;
|
||||
let key_v = key.value;
|
||||
|
||||
let rsa2 = new JSEncrypt({default_key_size: 2048});
|
||||
rsa2.setPublicKey(key_v);
|
||||
let rsa2 = new JSEncrypt({default_key_size: 1024});
|
||||
rsa2.setPublicKey(atob(key_v));
|
||||
|
||||
let encrypted = rsa2.encrypt(query_v);
|
||||
let public_key = get_with_expiry("public_key");
|
||||
|
||||
query.value = encrypted;
|
||||
encryption.value = "1";
|
||||
key.value = public_key;
|
||||
key.value = btoa(public_key);
|
||||
|
||||
form.submit();
|
||||
return false;
|
||||
|
@ -37,7 +37,7 @@
|
||||
<div class="input_container">
|
||||
<input class="input" name="q" id="q" type="search" value="{QUERY}" placeholder="" required>
|
||||
</div>
|
||||
<input name="s" value="0" type="hidden">
|
||||
<input name="p" value="1" type="hidden">
|
||||
<input name="e" id="e" value="0" type="hidden">
|
||||
<input name="ek" id="ek" value="{RSA_PUBLIC_KEY}" type="hidden">
|
||||
<button class="button">
|
||||
|
@ -26,13 +26,13 @@
|
||||
</div>
|
||||
</header>
|
||||
<div class="center_container">
|
||||
<div class="websites counter">
|
||||
<!--<div class="websites counter">
|
||||
<div class="content">
|
||||
<i class="fa fa-globe"></i>
|
||||
<span class="title">{WEBSITES_COUNT}</span>
|
||||
<h2 class="title">Websites</h2>
|
||||
</div>
|
||||
</div>
|
||||
</div>-->
|
||||
<div class="pages counter">
|
||||
<div class="content">
|
||||
<i class="fa fa-file"></i>
|
||||
|
@ -24,7 +24,7 @@
|
||||
<div class="top_container">
|
||||
<div class="top_container_content">
|
||||
<div class="top_left_container">
|
||||
<form class="search_widget" action="search" method="GET">
|
||||
<form autocomplete="off" class="search_widget" action="search" onsubmit="return submit_form();" method="GET">
|
||||
<div class="input_container">
|
||||
<input class="input" name="q" id="q" type="search" value="{QUERY}" placeholder="query" required>
|
||||
</div>
|
||||
|
File diff suppressed because one or more lines are too long
@ -1,11 +1,12 @@
|
||||
#include <optional>
|
||||
#include <librengine/config.h>
|
||||
#include <librengine/opensearch.h>
|
||||
#include <librengine/logger.h>
|
||||
#include <librengine/json.hpp>
|
||||
#include <librengine/str.h>
|
||||
#include <librengine/str_impl.h>
|
||||
#include <librengine/http.h>
|
||||
#include <librengine/typesense.h>
|
||||
#include <librengine/encryption.h>
|
||||
#include <iostream>
|
||||
#include <cstring>
|
||||
#include <thread>
|
||||
@ -13,7 +14,6 @@
|
||||
#include <map>
|
||||
|
||||
#include "../third_party/httplib.h"
|
||||
#include "encryption.h"
|
||||
|
||||
#ifndef PAGES_H
|
||||
#define PAGES_H
|
||||
@ -36,11 +36,11 @@ namespace backend {
|
||||
private:
|
||||
encryption::rsa rsa;
|
||||
config::website config;
|
||||
opensearch::client client;
|
||||
typesense db;
|
||||
|
||||
std::map<std::string, std::string> rsa_public_keys;
|
||||
public:
|
||||
pages(const config::website &config, opensearch::client &client);
|
||||
pages(const config::website &config, const config::db &db);
|
||||
void init();
|
||||
|
||||
void set_variables(std::string &page_src);
|
||||
@ -49,7 +49,7 @@ namespace backend {
|
||||
void update(const std::string &id, const std::string &field, const std::string &value);
|
||||
size_t get_number_field_value(const std::string &id, const std::string &field);
|
||||
/*size_t get_last_added_website_date(opensearch::client &client);*/
|
||||
std::optional<std::vector<search_result>> search(const std::string &q, const size_t &s);
|
||||
std::optional<std::vector<search_result>> search(const std::string &q, const size_t &p);
|
||||
size_t get_field_count(const std::string &field);
|
||||
|
||||
void home(const Request &request, Response &response);
|
@ -11,20 +11,22 @@ int main(int argc, char **argv) {
|
||||
}
|
||||
|
||||
config::website config;
|
||||
config::db db;
|
||||
|
||||
config.load_from_file(argv[1]);
|
||||
db.load_from_file(argv[1]);
|
||||
|
||||
std::string line = std::string(25, '=');
|
||||
|
||||
std::cout << logger::white << line << logger::green << "CFG" << logger::white << line << std::endl
|
||||
<< logger::reset << config.to_str() << std::endl
|
||||
<< logger::white << line << "===" << logger::white << line << std::endl;
|
||||
|
||||
auto client = librengine::opensearch::client("http://localhost:9200");
|
||||
|
||||
auto server = std::make_shared<Server>();
|
||||
auto pages = std::make_shared<backend::pages>(config, client);
|
||||
auto pages = std::make_shared<backend::pages>(config, db);
|
||||
|
||||
std::thread server_thread([&] {
|
||||
server->set_mount_point("/", "../../frontend/");
|
||||
server->set_mount_point("/", "../frontend/");
|
||||
server->Get("/home", [&](const Request &req, Response &res) { pages->home(req, res); });
|
||||
server->Get("/search", [&](const Request &req, Response &res) { pages->search(req, res); });
|
||||
server->Get("/node/info", [&](const Request &req, Response &res) { pages->node_info(req, res); });
|
@ -20,10 +20,10 @@ void if_debug_print(const logger::type &type, const std::string &text, const std
|
||||
}
|
||||
|
||||
namespace backend {
|
||||
pages::pages(const config::website &config, opensearch::client &client) {
|
||||
pages::pages(const config::website &config, const config::db &db) {
|
||||
this->rsa = encryption::rsa();
|
||||
this->config = config;
|
||||
this->client = client;
|
||||
this->db = typesense(db.url, "websites", db.api_key);
|
||||
|
||||
this->rsa.generate_keys(1024);
|
||||
}
|
||||
@ -41,83 +41,37 @@ namespace backend {
|
||||
const std::string noscript_src = R"(<noscript><span class="noscript">Encryption doesn't work without js</span></noscript>)";
|
||||
const std::string header_src = R"(<li><a href="/home">Home</a></li><li><a href="/node/info">Node Info</a></li><li><a href="https://github.com/liameno/librengine">Github</a></li>)";
|
||||
|
||||
str::replace_ref(page_src, "{RSA_PUBLIC_KEY}", rsa.get_public_key_buffer());
|
||||
auto key = rsa.get_public_key_buffer();
|
||||
|
||||
str::replace_ref(page_src, "{RSA_PUBLIC_KEY}", encryption::base64::easy_encode(key));
|
||||
str::replace_ref(page_src, "{NOSCRIPT_CONTENT}", noscript_src);
|
||||
str::replace_ref(page_src, "{HEADER_CONTENT}", header_src);
|
||||
}
|
||||
|
||||
void pages::update(const std::string &id, const std::string &field, const size_t &value) {
|
||||
const auto path = opensearch::client::path_options("website/_doc/" + id + "/_update");
|
||||
const auto type = opensearch::client::request_type::POST;
|
||||
const auto response = db.get(std::stoi(id));
|
||||
nlohmann::json result_json = nlohmann::json::parse(response);
|
||||
result_json[field] = value;
|
||||
|
||||
nlohmann::json json;
|
||||
json["doc"][field] = value;
|
||||
|
||||
const auto response = client.custom_request(path, type, json.dump());
|
||||
db.update(result_json.dump());
|
||||
}
|
||||
void pages::update(const std::string &id, const std::string &field, const std::string &value) {
|
||||
const auto path = opensearch::client::path_options("website/_doc/" + id + "/_update");
|
||||
const auto type = opensearch::client::request_type::POST;
|
||||
const auto response = db.get(std::stoi(id));
|
||||
nlohmann::json result_json = nlohmann::json::parse(response);
|
||||
result_json[field] = value;
|
||||
|
||||
nlohmann::json json;
|
||||
json["doc"][field] = value;
|
||||
|
||||
const auto response = client.custom_request(path, type, json.dump());
|
||||
db.update(result_json.dump());
|
||||
}
|
||||
size_t pages::get_number_field_value(const std::string &id, const std::string &field) {
|
||||
const auto path = opensearch::client::path_options("website/_doc/" + id);
|
||||
const auto type = opensearch::client::request_type::GET;
|
||||
|
||||
const auto response = client.custom_request(path, type);
|
||||
nlohmann::json result_json = nlohmann::json::parse(*response);
|
||||
const auto rating = result_json["_source"][field];
|
||||
|
||||
if (rating.is_null()) return 0;
|
||||
if (rating > 0) return (size_t)rating;
|
||||
|
||||
return 0;
|
||||
const auto response = db.get(std::stoi(id));
|
||||
nlohmann::json result_json = nlohmann::json::parse(response);
|
||||
return result_json[field];
|
||||
}
|
||||
/*size_t pages::get_last_added_website_date(opensearch::client &client) {
|
||||
const auto path = opensearch::client::path_options("website/_search");
|
||||
const auto type = opensearch::client::request_type::POST;
|
||||
std::optional<std::vector<pages::search_result>> pages::search(const std::string &q, const size_t &p) {
|
||||
const auto response = db.search(q, "url,title,desc", {{"page", std::to_string(p)}});
|
||||
nlohmann::json result_json = nlohmann::json::parse(response);
|
||||
|
||||
nlohmann::json json;
|
||||
|
||||
json["size"] = 1;
|
||||
json["sort"][0]["date"]["order"] = "desc";
|
||||
|
||||
const auto response = client.custom_request(path, type, json.dump());
|
||||
nlohmann::json result_json = nlohmann::json::parse(*response);
|
||||
const auto value = result_json["hits"]["total"]["value"];
|
||||
|
||||
if (value.is_null()) return std::nullopt;
|
||||
if (value < 0) return std::nullopt;
|
||||
|
||||
const auto body = result_json["hits"]["hits"];
|
||||
if (body.is_null()) return std::nullopt;
|
||||
|
||||
auto hit = body[0];
|
||||
|
||||
size_t hit_date = hit["_source"]["date"];
|
||||
size_t current_date = time(nullptr);
|
||||
|
||||
return current_date - hit_date;
|
||||
}*/
|
||||
std::optional<std::vector<pages::search_result>> pages::search(const std::string &q, const size_t &s) {
|
||||
const auto path = opensearch::client::path_options("website/_search");
|
||||
const auto type = opensearch::client::request_type::POST;
|
||||
|
||||
nlohmann::json json;
|
||||
|
||||
json["query"]["query_string"]["fields"] = {"url", "title", "desc"};
|
||||
json["query"]["query_string"]["query"] = q;
|
||||
json["size"] = 10;
|
||||
json["from"] = s;
|
||||
|
||||
const auto response = client.custom_request(path, type, json.dump());
|
||||
nlohmann::json result_json = nlohmann::json::parse(*response);
|
||||
|
||||
const auto body = result_json["hits"]["hits"];
|
||||
const auto body = result_json["hits"];
|
||||
if (body.is_null() || body.empty()) return std::nullopt;
|
||||
|
||||
size_t value = body.size();
|
||||
@ -128,15 +82,16 @@ namespace backend {
|
||||
for (int i = 0; i < value; ++i) {
|
||||
search_result result;
|
||||
auto hit = body[i];
|
||||
auto hit_doc = hit["document"];
|
||||
|
||||
try {
|
||||
result.id = hit["_id"];
|
||||
result.title = hit["_source"]["title"];
|
||||
result.url = hit["_source"]["url"];
|
||||
result.desc = hit["_source"]["desc"];
|
||||
result.rating = hit["_source"]["rating"];
|
||||
result.has_ads = hit["_source"]["has_ads"];
|
||||
result.has_analytics = hit["_source"]["has_analytics"];
|
||||
result.id = hit_doc["id"];
|
||||
result.title = hit_doc["title"];
|
||||
result.url = hit_doc["url"];
|
||||
result.desc = hit_doc["desc"];
|
||||
result.rating = hit_doc["rating"];
|
||||
result.has_ads = hit_doc["has_ads"];
|
||||
result.has_analytics = hit_doc["has_analytics"];
|
||||
} catch (const nlohmann::json::exception &e) {
|
||||
continue;
|
||||
}
|
||||
@ -147,25 +102,13 @@ namespace backend {
|
||||
return results;
|
||||
}
|
||||
size_t pages::get_field_count(const std::string &field) {
|
||||
const auto path = opensearch::client::path_options("website/_search");
|
||||
const auto type = opensearch::client::request_type::POST;
|
||||
|
||||
nlohmann::json json;
|
||||
|
||||
json["aggs"]["host_uniq"]["terms"]["field"] = field;
|
||||
json["aggs"]["host_uniq"]["terms"]["size"] = 1;
|
||||
json["size"] = 0;
|
||||
|
||||
const auto response = client.custom_request(path, type, json.dump());
|
||||
nlohmann::json result_json = nlohmann::json::parse(*response);
|
||||
const auto value = result_json["aggregations"]["host_uniq"]["sum_other_doc_count"];
|
||||
|
||||
if (value.is_null()) return 0;
|
||||
return (size_t)value + 1;
|
||||
const auto response = db.search("*", field);
|
||||
nlohmann::json result_json = nlohmann::json::parse(response);
|
||||
return result_json["found"];
|
||||
}
|
||||
|
||||
void pages::home(const Request &request, Response &response) {
|
||||
std::string page_src = config::helper::get_file_content("../../frontend/src/index.html");
|
||||
std::string page_src = config::helper::get_file_content("../frontend/src/index.html");
|
||||
const std::string query = request.get_param_value("q");
|
||||
str::replace_ref(page_src, "{QUERY}", query);
|
||||
|
||||
@ -174,12 +117,12 @@ namespace backend {
|
||||
response.set_content(page_src, "text/html");
|
||||
}
|
||||
void pages::search(const Request &request, Response &response) {
|
||||
std::string page_src = config::helper::get_file_content("../../frontend/src/search.html");
|
||||
std::string query = request.get_param_value("q");
|
||||
std::string page_src = config::helper::get_file_content("../frontend/src/search.html");
|
||||
std::string query = str::replace(request.get_param_value("q"), " ", "+");
|
||||
std::string e_ = request.get_param_value("e");
|
||||
std::string ek_ = request.get_param_value("ek");
|
||||
const std::string s_ = request.get_param_value("s");
|
||||
const size_t start_index = (!s_.empty()) ? std::stoi(s_) : 0;
|
||||
const std::string p_ = request.get_param_value("p");
|
||||
const size_t page = (!p_.empty()) ? std::stoi(p_) : 1;
|
||||
const std::string center_result_src_format = "<div class=\"center_result\">"
|
||||
"<div class=\"content\">"
|
||||
"<a class=\"title\" href=\"{1}\">{0}<span><i class=\"fa fa-ad info_icon info_{6}\"></i><i class=\"fa fa-user-secret info_icon info_{7}\"></i></span></a>"
|
||||
@ -195,6 +138,9 @@ namespace backend {
|
||||
"</div>"
|
||||
"</div>";
|
||||
|
||||
std::string params_s = str::format("?q={0}&p={1}&e={2}&ek={3}", query, p_, e_, ek_);
|
||||
ek_ = encryption::base64::easy_decode(ek_);
|
||||
|
||||
if_debug_print(logger::type::info, "query = " + query, request.path);
|
||||
|
||||
if (e_ == "1") {
|
||||
@ -209,10 +155,10 @@ namespace backend {
|
||||
}
|
||||
|
||||
std::string center_results_src;
|
||||
std::string params_s = str::format("?q={0}&s={1}&e=0", str::replace(query, " ", "+"), s_);
|
||||
|
||||
for (const auto &node : config.nodes) {
|
||||
if_debug_print(logger::type::info, "node = " + node.url, request.path);
|
||||
std::string params_s2;
|
||||
|
||||
if (e_ == "1") {
|
||||
encryption::rsa rsa_node;
|
||||
@ -227,10 +173,10 @@ namespace backend {
|
||||
|
||||
auto public_key = rsa.get_public_key_buffer();
|
||||
auto key2 = encryption::base64::easy_encode(public_key); //error of curl (CURLE_URL_MALFORMAT)
|
||||
params_s = str::format("?q={0}&s={1}&e=1&ek={2}", encrypted_base64, s_, key2);
|
||||
params_s2 = str::format("?q={0}&p={1}&e=1&ek={2}", encrypted_base64, p_, key2);
|
||||
}
|
||||
|
||||
http::request request_(node.url + "/api/search" + params_s);
|
||||
http::request request_(node.url + "/api/search" + params_s2);
|
||||
if (!http::url(node.url).is_localhost()) request_.options.proxy = config.proxy;
|
||||
request_.perform();
|
||||
|
||||
@ -308,15 +254,15 @@ namespace backend {
|
||||
std::string url = request.path + params_s;
|
||||
str::replace_ref(page_src, "{CENTER_RESULTS}", center_results_src2);
|
||||
str::replace_ref(page_src, "{QUERY}", query);
|
||||
str::replace_ref(page_src, "{PREV_PAGE}", str::replace(url, "&s=" + s_, "&s=" + std::to_string((start_index >= 10) ? start_index - 10 : 0)));
|
||||
str::replace_ref(page_src, "{NEXT_PAGE}", str::replace(url, "&s=" + s_, "&s=" + std::to_string(start_index + 10)));
|
||||
str::replace_ref(page_src, "{PREV_PAGE}", str::replace(url, "&p=" + p_, "&p=" + std::to_string((page > 1) ? page - 1 : 1)));
|
||||
str::replace_ref(page_src, "{NEXT_PAGE}", str::replace(url, "&p=" + p_, "&p=" + std::to_string(page + 1)));
|
||||
|
||||
set_variables(page_src);
|
||||
response.status = 200;
|
||||
response.set_content(page_src, "text/html");
|
||||
}
|
||||
void pages::node_info(const Request &request, Response &response) {
|
||||
std::string page_src = config::helper::get_file_content("../../frontend/src/node/info.html");
|
||||
std::string page_src = config::helper::get_file_content("../frontend/src/node/info.html");
|
||||
|
||||
str::replace_ref(page_src, "{WEBSITES_COUNT}", std::to_string(get_field_count("host")));
|
||||
str::replace_ref(page_src, "{PAGES_COUNT}", std::to_string(get_field_count("url")));
|
||||
@ -326,7 +272,7 @@ namespace backend {
|
||||
response.set_content(page_src, "text/html");
|
||||
}
|
||||
void pages::node_admin_panel(const Request &request, Response &response) {
|
||||
std::string page_src = config::helper::get_file_content("../../frontend/src/node/admin_panel/index.html");
|
||||
std::string page_src = config::helper::get_file_content("../frontend/src/node/admin_panel/index.html");
|
||||
|
||||
set_variables(page_src);
|
||||
response.status = 200;
|
||||
@ -378,8 +324,8 @@ namespace backend {
|
||||
std::string query = str::replace(request.get_param_value("q"), " ", "+");
|
||||
std::string e_ = request.get_param_value("e");
|
||||
std::string ek_ = request.get_param_value("ek");
|
||||
const std::string s_ = request.get_param_value("s");
|
||||
const size_t start_index = (!s_.empty()) ? std::stoi(s_) : 0;
|
||||
const std::string p_ = request.get_param_value("p");
|
||||
const size_t page = (!p_.empty()) ? std::stoi(p_) : 1;
|
||||
nlohmann::json page_src;
|
||||
std::vector<unsigned char> ek_decrypted;
|
||||
|
||||
@ -395,7 +341,7 @@ namespace backend {
|
||||
if_debug_print(logger::type::info, "decrypted query = " + query, request.path);
|
||||
}
|
||||
|
||||
const auto search_results = search(query, start_index);
|
||||
const auto search_results = search(query, page);
|
||||
|
||||
if (search_results) {
|
||||
auto sr_size = search_results->size();
|
||||
@ -464,7 +410,7 @@ namespace backend {
|
||||
void pages::api_node_info(const Request &request, Response &response) {
|
||||
nlohmann::json page_src;
|
||||
|
||||
page_src["websites_count"] = get_field_count("host");
|
||||
//page_src["websites_count"] = get_field_count("host");
|
||||
page_src["pages_count"] = get_field_count("url");
|
||||
|
||||
response.status = 200;
|
Loading…
Reference in New Issue
Block a user