zeal/gendocsets/qt5/crawl.py

48 lines
1.5 KiB
Python

import sqlite3
import os
from lxml.html import parse
os.unlink('index.sqlite')
conn = sqlite3.connect('index.sqlite')
c = conn.cursor()
c.execute('CREATE TABLE things (id integer primary key, type text, name text, path text, parent integer)')
tree = parse('qtdoc/modules.html')
for tbl in tree.xpath('//table[@class="generic"]'):
for tr in tbl.findall('tr'):
a = tr.find('td').find('a')
c.execute('INSERT INTO things(type, name, path) values("module", ?, ?)', (a.text, a.attrib['href'].replace('../', '')))
def parseClass(class_id, path):
tree = parse(path)
basepath = path.split('/')[0]
for a in tree.xpath('//td[@class="memItemRight bottomAlign"]/b/a'):
c.execute('INSERT INTO things(type, name, path, parent) values("member", ?, ?, ?)', (a.text, basepath+'/'+a.attrib['href'], class_id))
tree = parse('qtdoc/classes.html')
for cls in tree.xpath('//dd'):
a = cls.find('a')
url = a.attrib['href'].replace('../', '')
if not url.startswith('http://'):
c.execute('INSERT INTO things(type, name, path) values("class", ?, ?)', (a.text, url))
parseClass(c.lastrowid, url)
# global functions:
tree = parse('qtdoc/functions.html')
for fun in tree.xpath('//li'):
if fun.find('a') is None: continue
if fun.find('a').text == 'global':
url = fun.find('a').attrib['href'].replace('../', '')
c.execute('INSERT INTO things(type, name, path) values("function", ?, ?)',
(fun.text.strip(': '), url))
conn.commit()
conn.close()