1
1
mirror of https://github.com/chubin/cheat.sh.git synced 2024-11-24 03:56:55 +03:00
cheat.sh/lib/get_answer.py

447 lines
13 KiB
Python

"""
Main module, answers hub.
Exports:
get_topics_list()
get_topic_type()
get_answer()
"""
from gevent.monkey import patch_all
from gevent.subprocess import Popen, PIPE
patch_all()
# pylint: disable=wrong-import-position,wrong-import-order
import collections
import glob
import os
import re
import redis
from fuzzywuzzy import process, fuzz
import beautifier
from globals import MYDIR, PATH_TLDR_PAGES, PATH_CHEAT_PAGES, PATH_CHEAT_SHEETS, COLOR_STYLES
from adapter_learnxiny import get_learnxiny, get_learnxiny_list, is_valid_learnxy
from languages_data import LANGUAGE_ALIAS
# pylint: enable=wrong-import-position,wrong-import-order
REDIS = redis.StrictRedis(host='localhost', port=6379, db=0)
MAX_SEARCH_LEN = 20
INTERNAL_TOPICS = [
":list",
":firstpage",
':post',
':bash_completion',
':help',
':styles',
':styles-demo',
':emacs',
':emacs-ivy',
':fish',
':bash',
':zsh'
]
def _update_tldr_topics():
answer = []
for topic in glob.glob(PATH_TLDR_PAGES):
_, filename = os.path.split(topic)
if filename.endswith('.md'):
answer.append(filename[:-3])
return answer
TLDR_TOPICS = _update_tldr_topics()
def _update_cheat_topics():
answer = []
for topic in glob.glob(PATH_CHEAT_PAGES):
_, filename = os.path.split(topic)
answer.append(filename)
return answer
CHEAT_TOPICS = _update_cheat_topics()
def _update_cheat_sheets_topics():
answer = []
answer_dirs = []
for topic in glob.glob(PATH_CHEAT_SHEETS + "*/*"):
dirname, filename = os.path.split(topic)
dirname = os.path.basename(dirname)
if dirname.startswith('_'):
dirname = dirname[1:]
answer.append("%s/%s" % (dirname, filename))
for topic in glob.glob(PATH_CHEAT_SHEETS + "*"):
_, filename = os.path.split(topic)
if os.path.isdir(topic):
if filename.startswith('_'):
filename = filename[1:]
answer_dirs.append(filename+'/')
else:
answer.append(filename)
return answer, answer_dirs
CHEAT_SHEETS_TOPICS, CHEAT_SHEETS_DIRS = _update_cheat_sheets_topics()
CACHED_TOPICS_LIST = [[]]
def get_topics_list(skip_dirs=False, skip_internal=False):
"""
List of topics returned on /:list
"""
if CACHED_TOPICS_LIST[0] != []:
return CACHED_TOPICS_LIST[0]
answer = CHEAT_TOPICS + TLDR_TOPICS + CHEAT_SHEETS_TOPICS
answer = sorted(set(answer))
# doing it in this strange way to save the order of the topics
for topic in get_learnxiny_list():
if topic not in answer:
answer.append(topic)
if not skip_dirs:
answer += CHEAT_SHEETS_DIRS
if not skip_internal:
answer += INTERNAL_TOPICS
CACHED_TOPICS_LIST[0] = answer
return answer
def _get_topics_dirs():
return set([x.split('/', 1)[0] for x in get_topics_list() if '/' in x])
def _get_stat():
stat = collections.Counter([
get_topic_type(topic) for topic in get_topics_list()
])
answer = ""
for key, val in stat.items():
answer += "%s %s\n" % (key, val)
return answer
#
#
#
def get_topic_type(topic): # pylint: disable=too-many-locals,too-many-branches,too-many-statements
"""
Return topic type for `topic` or "unknown" if topic can't be determined.
"""
result = ''
if topic == "":
result = "search"
elif topic.startswith(":"):
result = "internal"
elif '/' in topic:
topic_type, topic_name = topic.split('/', 1)
if '+' in topic_name:
result = 'question'
else:
if topic_type in _get_topics_dirs() and topic_name in [':list']:
result = "internal"
elif is_valid_learnxy(topic):
result = 'learnxiny'
else:
result = 'question'
elif topic in CHEAT_SHEETS_TOPICS:
result = "cheat.sheets"
elif topic.rstrip('/') in CHEAT_SHEETS_DIRS and topic.endswith('/'):
result = "cheat.sheets dir"
elif topic in CHEAT_TOPICS:
result = "cheat"
elif topic in TLDR_TOPICS:
result = "tldr"
elif '+' in topic:
result = "question"
else:
result = 'unknown'
print topic, " ", result
return result
#
# Various cheat sheets getters
#
#
#def registered_answer_getter(func):
# REGISTERED_ANSWER_GETTERS.append(funct)
# return cls
def _get_internal(topic):
if '/' in topic:
topic_type, topic_name = topic.split('/', 1)
if topic_name == ":list":
topic_list = [x[len(topic_type)+1:]
for x in get_topics_list()
if x.startswith(topic_type + "/")]
return "\n".join(topic_list)+"\n"
if topic == ":list":
return "\n".join(x for x in get_topics_list()) + "\n"
if topic == ':styles':
return "\n".join(COLOR_STYLES) + "\n"
if topic == ":stat":
return _get_stat()+"\n"
if topic in INTERNAL_TOPICS:
return open(os.path.join(MYDIR, "share", topic[1:]+".txt"), "r").read()
return ""
def _get_tldr(topic):
cmd = ["tldr", topic]
proc = Popen(cmd, stdout=PIPE, stderr=PIPE)
answer = proc.communicate()[0]
fixed_answer = []
for line in answer.splitlines():
line = line[2:]
if line.startswith('-'):
line = '# '+line[2:]
elif line == "":
pass
elif not line.startswith(' '):
line = "# "+line
fixed_answer.append(line)
answer = "\n".join(fixed_answer) + "\n"
return answer.decode('utf-8')
def _get_cheat(topic):
cmd = ["cheat", topic]
proc = Popen(cmd, stdout=PIPE, stderr=PIPE)
answer = proc.communicate()[0].decode('utf-8')
return answer
def _get_cheat_sheets(topic):
"""
Get the cheat sheet topic from the own repository (cheat.sheets).
It's possible that topic directory starts with omited underscore
"""
filename = PATH_CHEAT_SHEETS + "%s" % topic
if not os.path.exists(filename):
filename = PATH_CHEAT_SHEETS + "_%s" % topic
return open(filename, "r").read().decode('utf-8')
def _get_cheat_sheets_dir(topic):
answer = []
for f_name in glob.glob(PATH_CHEAT_SHEETS + "%s/*" % topic.rstrip('/')):
answer.append(os.path.basename(f_name))
topics = sorted(answer)
return "\n".join(topics) + "\n"
def _get_answer_for_question(topic):
"""
Find answer for the `topic` question.
"""
topic = " ".join(topic.replace('+', ' ').strip().split())
cmd = ["/home/igor/cheat.sh/bin/get-answer-for-question", topic]
proc = Popen(cmd, stdout=PIPE, stderr=PIPE)
answer = proc.communicate()[0].decode('utf-8')
return answer
def _get_unknown(topic):
topics_list = get_topics_list()
if topic.startswith(':'):
topics_list = [x for x in topics_list if x.startswith(':')]
else:
topics_list = [x for x in topics_list if not x.startswith(':')]
possible_topics = process.extract(topic, topics_list, scorer=fuzz.ratio)[:3]
possible_topics_text = "\n".join([(" * %s %s" % x) for x in possible_topics])
return """
Unknown topic.
Do you mean one of these topics may be?
%s
""" % possible_topics_text
# pylint: disable=bad-whitespace
#
# topic_type, function_getter
# should be replaced with a decorator
TOPIC_GETTERS = (
("cheat.sheets", _get_cheat_sheets),
("cheat.sheets dir", _get_cheat_sheets_dir),
("tldr", _get_tldr),
("internal", _get_internal),
("cheat", _get_cheat),
("learnxiny", get_learnxiny),
("question", _get_answer_for_question),
("unknown", _get_unknown),
)
# pylint: enable=bad-whitespace
def get_answer(topic, keyword, options="", request_options=None): # pylint: disable=too-many-locals,too-many-branches,too-many-statements
"""
Find cheat sheet for the topic.
If `keyword` is None or rempty, return the whole answer.
Otherwise cut the paragraphs containing keywords.
Args:
topic (str): the name of the topic of the cheat sheet
keyword (str): the name of the keywords to search in the cheat sheets
Returns:
string: the cheat sheet
"""
def _join_paragraphs(paragraphs):
answer = "\n".join(paragraphs)
return answer
def _split_paragraphs(text):
answer = []
paragraph = ""
for line in text.splitlines():
if line == "":
answer.append(paragraph)
paragraph = ""
else:
paragraph += line+"\n"
answer.append(paragraph)
return answer
def _paragraph_contains(paragraph, keyword, insensitive=False, word_boundaries=True):
"""
Check if `paragraph` contains `keyword`.
Several keywords can be joined together using ~
For example: ~ssh~passphrase
"""
answer = True
if '~' in keyword:
keywords = keyword.split('~')
else:
keywords = [keyword]
for kwrd in keywords:
regex = re.escape(kwrd)
if not word_boundaries:
regex = r"\b%s\b" % kwrd
if insensitive:
answer = answer and bool(re.search(regex, paragraph, re.IGNORECASE))
else:
answer = answer and bool(re.search(regex, paragraph))
return answer
def _rewrite_aliases(word):
if word == ':bash.completion':
return ':bash_completion'
return word
def _rewrite_section_name(query):
"""
"""
if '/' not in query:
return query
section_name, rest = query.split('/', 1)
section_name = LANGUAGE_ALIAS.get(section_name, section_name)
return "%s/%s" % (section_name, rest)
answer = None
needs_beautification = False
topic = _rewrite_aliases(topic)
topic = _rewrite_section_name(topic)
# checking if the answer is in the cache
if topic != "":
# temporary hack for "questions":
# the topic name has to be prefixed with q:
# so we can later delete them from redis
# and we known that they need beautification
if '/' in topic and '+' in topic:
topic = "q:" + topic
needs_beautification = True
answer = REDIS.get(topic)
if answer:
answer = answer.decode('utf-8')
# if answer was not found in the cache
# try to find it in one of the repositories
if not answer:
topic_type = get_topic_type(topic)
for topic_getter_type, topic_getter in TOPIC_GETTERS:
if topic_type == topic_getter_type:
answer = topic_getter(topic)
break
if not answer:
topic_type = "unknown"
answer = _get_unknown(topic)
# saving answers in the cache
if topic_type not in ["search", "internal", "unknown"]:
REDIS.set(topic, answer)
if needs_beautification:
filetype = 'bash'
if '/' in topic:
filetype = topic.split('/', 1)[0]
if filetype.startswith('q:'):
filetype = filetype[2:]
answer = beautifier.beautify(answer.encode('utf-8'), filetype, request_options)
if not keyword:
return answer
#
# shorten the answer, because keyword is specified
#
insensitive = 'i' in options
word_boundaries = 'b' in options
paragraphs = _split_paragraphs(answer)
paragraphs = [p for p in paragraphs
if _paragraph_contains(p, keyword,
insensitive=insensitive,
word_boundaries=word_boundaries)]
if paragraphs == []:
return ""
answer = _join_paragraphs(paragraphs)
return answer
def find_answer_by_keyword(directory, keyword, options="", request_options=None):
"""
Search in the whole tree of all cheatsheets or in its subtree `directory`
by `keyword`
"""
recursive = 'r' in options
answer_paragraphs = []
for topic in get_topics_list(skip_internal=True, skip_dirs=True):
# skip the internal pages, don't show them in search
if topic in INTERNAL_TOPICS:
continue
if not topic.startswith(directory):
continue
subtopic = topic[len(directory):]
if not recursive and '/' in subtopic:
continue
answer = get_answer(topic, keyword, options=options, request_options=request_options)
if answer:
answer_paragraphs.append((topic, answer))
if len(answer_paragraphs) > MAX_SEARCH_LEN:
answer_paragraphs.append(("LIMITED", "LIMITED TO %s ANSWERS" % MAX_SEARCH_LEN))
break
return answer_paragraphs