mirror of
https://github.com/hasura/graphql-engine.git
synced 2024-12-15 01:12:56 +03:00
cc30f08f6e
PR-URL: https://github.com/hasura/graphql-engine-mono/pull/4261 GitOrigin-RevId: 3d80068acdd61b5350fc36ec3444db47508f9c09
141 lines
4.5 KiB
Python
141 lines
4.5 KiB
Python
# -*- coding: utf-8 -*-
|
|
from sphinx import addnodes
|
|
|
|
"""
|
|
``generate_index``: A callable yielding the global TOC tree that contains
|
|
list of all the content below the specified page. ``generate_index`` need
|
|
pagename specifying as like as ``{{ generate_index(pagename) }}`` and
|
|
optional keyword arguments are available:
|
|
|
|
* maxdepth (defaults to the max depth selected in the toctree directive):
|
|
the maximum depth of the tree; set it to -1 to allow unlimited depth
|
|
"""
|
|
|
|
import os
|
|
import json
|
|
from bs4 import BeautifulSoup
|
|
|
|
import datetime
|
|
import calendar
|
|
|
|
import re
|
|
|
|
import xml.etree.ElementTree as ET
|
|
|
|
indexObjs = []
|
|
|
|
|
|
def check_directory(path):
|
|
directory = os.path.dirname(path)
|
|
try:
|
|
if not os.path.exists(directory):
|
|
os.makedirs(directory)
|
|
except OSError as e:
|
|
raise
|
|
|
|
|
|
def on_finish_building(app, exception):
|
|
current_version = app.env.config["version"]
|
|
if "latest_docs_version" in app.env.config["html_context"].keys():
|
|
latest_version = app.env.config["html_context"]["latest_docs_version"]
|
|
else:
|
|
latest_version = "dev"
|
|
base_domain = app.env.config["html_context"]["SITEMAP_DOMAIN"]
|
|
|
|
index_file_path = "./_build/algolia_index/index.json"
|
|
sitemap_path = "./_build/sitemap/sitemap_" + current_version + ".xml"
|
|
|
|
check_directory(index_file_path)
|
|
check_directory(sitemap_path)
|
|
|
|
f = open(index_file_path, 'w+')
|
|
|
|
root = ET.Element("urlset")
|
|
root.set("xmlns", "http://www.sitemaps.org/schemas/sitemap/0.9")
|
|
|
|
for link in indexObjs:
|
|
url = ET.SubElement(root, "url")
|
|
ET.SubElement(url, "loc").text = "https://" + base_domain + "/latest/" + link["url"]
|
|
ET.SubElement(url, "changefreq").text = "daily"
|
|
ET.SubElement(url, "priority").text = "1" if (current_version == latest_version) else "0.5"
|
|
|
|
ET.ElementTree(root).write(sitemap_path)
|
|
|
|
f.write(json.dumps(indexObjs))
|
|
|
|
|
|
def generate_index_file(app, pagename, templatename, context, doctree):
|
|
# If the page name is not part of the below list and is present in toc-tree
|
|
if (pagename not in ['manual/index', 'index', 'search', 'genindex']
|
|
and not (pagename.startswith("ref/") or pagename.startswith("tutorials/") or pagename.startswith("guides/"))
|
|
and re.search('<a[^>]*class="[^"]*current[^"]*"[^>]*>', context['toc_full'])):
|
|
title = ''
|
|
keyword = ''
|
|
description = ''
|
|
tags_val = ''
|
|
content = ''
|
|
image = ''
|
|
created_val = 0
|
|
|
|
if 'title' in context:
|
|
title = context['title']
|
|
|
|
if 'metatags' in context:
|
|
metatags = context['metatags']
|
|
if len(metatags) > 0:
|
|
soup = BeautifulSoup(metatags, 'html.parser')
|
|
descriptions = soup.findAll("meta", {"name": "description"})
|
|
keywords = soup.findAll("meta", {"name": "keywords"})
|
|
tags = soup.findAll("meta", {"name": "content-tags"})
|
|
created_at = soup.findAll("meta", {"name": "created-on"})
|
|
|
|
if len(descriptions) > 0:
|
|
description = descriptions[0]['content']
|
|
|
|
if len(keywords) > 0:
|
|
keyword = keywords[0]['content']
|
|
|
|
if len(tags) > 0:
|
|
tags_val = tags[0]['content']
|
|
|
|
if len(created_at) > 0:
|
|
created_val = created_at[0]['content']
|
|
created_val = datetime.datetime.strptime(created_val, "%Y-%m-%dT%H:%M:%S.%fZ")
|
|
created_val = calendar.timegm(created_val.utctimetuple())
|
|
else:
|
|
created_val = 0
|
|
|
|
if 'body' in context:
|
|
body = context['body']
|
|
soup = BeautifulSoup(body, 'html.parser')
|
|
|
|
content = soup.get_text()
|
|
|
|
imgs = soup.findAll("img", {"class": "featured-image"})
|
|
if len(imgs) > 0:
|
|
image = imgs[0]['src'].split('/')[-1]
|
|
|
|
url = pagename + '.html'
|
|
category = pagename.split('/')[0]
|
|
|
|
index_obj = {
|
|
"title": title,
|
|
"content": content,
|
|
"url": url,
|
|
"category": category,
|
|
"image": image,
|
|
"description": description,
|
|
"keywords": keyword,
|
|
"tags": tags_val,
|
|
"created_at": created_val
|
|
}
|
|
|
|
indexObjs.append(index_obj)
|
|
else:
|
|
print('\t ** IGNORED FOR INDEXING **')
|
|
|
|
|
|
def setup(app):
|
|
app.connect('build-finished', on_finish_building)
|
|
app.connect('html-page-context', generate_index_file)
|