mirror of
https://github.com/chubin/cheat.sh.git
synced 2024-11-24 03:56:55 +03:00
308 lines
8.6 KiB
Python
308 lines
8.6 KiB
Python
|
"""
|
||
|
Extract text from the text-code stream and comment it.
|
||
|
|
||
|
Supports three modes of normalization and commenting:
|
||
|
|
||
|
1. Don't add any comments
|
||
|
2. Add comments
|
||
|
3. Remove text, leave code only
|
||
|
|
||
|
Since several operations are quite expensice,
|
||
|
actively uses caching.
|
||
|
|
||
|
Exported functions:
|
||
|
|
||
|
normalize(text, mode)
|
||
|
"""
|
||
|
|
||
|
import sys
|
||
|
import os
|
||
|
import textwrap
|
||
|
import subprocess
|
||
|
import hashlib
|
||
|
import re
|
||
|
|
||
|
from itertools import groupby, chain
|
||
|
from tempfile import NamedTemporaryFile
|
||
|
|
||
|
import redis
|
||
|
|
||
|
# pylint: disable=wrong-import-position,wrong-import-order
|
||
|
MYDIR = os.path.abspath(os.path.dirname(os.path.dirname('__file__')))
|
||
|
sys.path.append("%s/lib/" % MYDIR)
|
||
|
from languages_data import VIM_NAME
|
||
|
from globals import PATH_VIM_ENVIRONMENT
|
||
|
# pylint: enable=wrong-import-position,wrong-import-order
|
||
|
|
||
|
REDIS = redis.StrictRedis(host='localhost', port=6379, db=1)
|
||
|
FNULL = open(os.devnull, 'w')
|
||
|
|
||
|
def _language_name(name):
|
||
|
return VIM_NAME.get(name, name)
|
||
|
|
||
|
def _cleanup_lines(lines):
|
||
|
"""
|
||
|
Cleanup `lines` a little bit: remove empty lines at the beginning
|
||
|
and at the end; remove to much empty lines in between.
|
||
|
"""
|
||
|
|
||
|
if lines == []:
|
||
|
return lines
|
||
|
|
||
|
# remove empty lines from the beginning
|
||
|
start = 0
|
||
|
while start < len(lines) and lines[start].strip() == '':
|
||
|
start += 1
|
||
|
lines = lines[start:]
|
||
|
if lines == []:
|
||
|
return lines
|
||
|
|
||
|
# remove empty lines from the end
|
||
|
end = len(lines) - 1
|
||
|
while end >= 0 and lines[end].strip() == '':
|
||
|
end -= 1
|
||
|
lines = lines[:end+1]
|
||
|
if lines == []:
|
||
|
return lines
|
||
|
|
||
|
# remove repeating empty lines
|
||
|
lines = list(chain.from_iterable(
|
||
|
[(list(x[1]) if x[0] else [''])
|
||
|
for x in groupby(lines, key=lambda x: x.strip() != '')]))
|
||
|
|
||
|
return lines
|
||
|
|
||
|
|
||
|
def _classify_lines(lines):
|
||
|
"""
|
||
|
Classify each line and say which of them
|
||
|
are text (0) and which of them are code (1).
|
||
|
|
||
|
A line is considered to be code,
|
||
|
if it starts with four spaces.
|
||
|
|
||
|
A line is considerer to be text if it is not
|
||
|
empty and is not code.
|
||
|
|
||
|
If line is empty, it is considered to be
|
||
|
code if it surrounded but two other code lines
|
||
|
(or if it is the first/last line and it has
|
||
|
code on the other side.
|
||
|
"""
|
||
|
|
||
|
def _line_type(line):
|
||
|
if line.strip() == '':
|
||
|
return -1
|
||
|
|
||
|
# some line may start with spaces but still be not code.
|
||
|
# we need some heuristics here, but for the moment just
|
||
|
# whitelist such cases:
|
||
|
if line.strip().startswith('* ') or re.match(r'[0-9]+\.', line.strip()):
|
||
|
return 0
|
||
|
|
||
|
if line.startswith(' '):
|
||
|
return 1
|
||
|
return 0
|
||
|
|
||
|
line_types = [_line_type(line) for line in lines]
|
||
|
|
||
|
# pass 2:
|
||
|
# adding empty code lines to the code
|
||
|
for i in range(len(line_types) - 1):
|
||
|
if line_types[i] == 1 and line_types[i+1] == -1:
|
||
|
line_types[i+1] = -2
|
||
|
changed = True
|
||
|
|
||
|
for i in range(len(line_types) - 1)[::-1]:
|
||
|
if line_types[i] == -1 and line_types[i+1] == 1:
|
||
|
line_types[i] = -2
|
||
|
changed = True
|
||
|
line_types = [1 if x == -2 else x for x in line_types]
|
||
|
|
||
|
# pass 3:
|
||
|
# fixing undefined line types (-1)
|
||
|
changed = True
|
||
|
while changed:
|
||
|
changed = False
|
||
|
|
||
|
# changing all lines types that are near the text
|
||
|
|
||
|
for i in range(len(line_types) - 1):
|
||
|
if line_types[i] == 0 and line_types[i+1] == -1:
|
||
|
line_types[i+1] = 0
|
||
|
changed = True
|
||
|
|
||
|
for i in range(len(line_types) - 1)[::-1]:
|
||
|
if line_types[i] == -1 and line_types[i+1] == 0:
|
||
|
line_types[i] = 0
|
||
|
changed = True
|
||
|
|
||
|
# everything what is still undefined, change to 1
|
||
|
line_types = [1 if x == -1 else x for x in line_types]
|
||
|
return line_types
|
||
|
|
||
|
def _wrap_lines(lines_classes, shift_code=False):
|
||
|
"""
|
||
|
Wrap classified lines. Add the splitted lines to the stream.
|
||
|
If `shift_code` is True, remove leading four spaces.
|
||
|
"""
|
||
|
|
||
|
def _shift_code(line, shift=0):
|
||
|
#if line.startswith(' '):
|
||
|
# return line[4:]
|
||
|
|
||
|
if shift == 1 and line != '':
|
||
|
return ' ' + line
|
||
|
|
||
|
if shift == 3:
|
||
|
if line.startswith(' '):
|
||
|
return line[3:]
|
||
|
|
||
|
return line
|
||
|
|
||
|
result = []
|
||
|
for line_tuple in lines_classes:
|
||
|
if line_tuple[0] == 1:
|
||
|
if shift_code:
|
||
|
shift = 3
|
||
|
else:
|
||
|
shift = -1
|
||
|
result.append((line_tuple[0], _shift_code(line_tuple[1], shift=shift)))
|
||
|
else:
|
||
|
if line_tuple[1].strip() == "":
|
||
|
result.append((line_tuple[0], ""))
|
||
|
for line in textwrap.fill(line_tuple[1]).splitlines():
|
||
|
result.append((line_tuple[0], line))
|
||
|
|
||
|
return result
|
||
|
|
||
|
def _run_vim_script(script_lines, text_lines):
|
||
|
"""
|
||
|
Apply `script_lines` to `lines_classes`
|
||
|
and returns the result
|
||
|
"""
|
||
|
|
||
|
script_vim = NamedTemporaryFile(delete=True)
|
||
|
textfile = NamedTemporaryFile(delete=True)
|
||
|
|
||
|
open(script_vim.name, "w").write("\n".join(script_lines))
|
||
|
open(textfile.name, "w").write("\n".join(text_lines))
|
||
|
|
||
|
script_vim.file.close()
|
||
|
textfile.file.close()
|
||
|
|
||
|
my_env = os.environ.copy()
|
||
|
my_env['HOME'] = PATH_VIM_ENVIRONMENT
|
||
|
|
||
|
cmd = ["script", "-q", "-c",
|
||
|
"vim -S %s %s" % (script_vim.name, textfile.name)]
|
||
|
subprocess.Popen(cmd, shell=False, stdout=FNULL, stderr=FNULL, env=my_env).communicate()
|
||
|
|
||
|
return open(textfile.name, "r").read()
|
||
|
|
||
|
def _commenting_script(lines_blocks, filetype):
|
||
|
|
||
|
script_lines = []
|
||
|
block_start = 1
|
||
|
for block in lines_blocks:
|
||
|
lines = list(block[1])
|
||
|
|
||
|
block_end = block_start + len(lines)-1
|
||
|
|
||
|
if block[0] == 0:
|
||
|
comment_type = 'sexy'
|
||
|
if block_end - block_start < 1:
|
||
|
comment_type = 'comment'
|
||
|
|
||
|
script_lines.insert(0, "%s,%s call NERDComment(1, '%s')"
|
||
|
% (block_start, block_end, comment_type))
|
||
|
script_lines.insert(0, "%s,%s call NERDComment(1, 'uncomment')"
|
||
|
% (block_start, block_end))
|
||
|
|
||
|
block_start = block_end + 1
|
||
|
|
||
|
script_lines.insert(0, "set ft=%s" % _language_name(filetype))
|
||
|
script_lines.append("wq")
|
||
|
|
||
|
return script_lines
|
||
|
|
||
|
def _beautify(text, filetype, add_comments=False, remove_text=False):
|
||
|
"""
|
||
|
Main function that actually does the whole beautification job.
|
||
|
"""
|
||
|
|
||
|
# We shift the code if and only if we either convert the text into comments
|
||
|
# or remove the text completely. Otherwise the code has to remain aligned
|
||
|
shift_code = add_comments or remove_text
|
||
|
|
||
|
lines = [x.rstrip('\n') for x in text.splitlines()]
|
||
|
lines = _cleanup_lines(lines)
|
||
|
lines_classes = zip(_classify_lines(lines), lines)
|
||
|
lines_classes = _wrap_lines(lines_classes, shift_code=shift_code)
|
||
|
#for x,y in lines_classes:
|
||
|
# print "%s: %s" % (x, y)
|
||
|
|
||
|
if remove_text:
|
||
|
lines = [line[1] for line in lines_classes if line[0] == 1]
|
||
|
lines = _cleanup_lines(lines)
|
||
|
output = "\n".join(lines)
|
||
|
if not output.endswith('\n'):
|
||
|
output += "\n"
|
||
|
elif not add_comments:
|
||
|
output = "\n".join(line[1] for line in lines_classes)
|
||
|
else:
|
||
|
lines_blocks = groupby(lines_classes, key=lambda x: x[0])
|
||
|
script_lines = _commenting_script(lines_blocks, filetype)
|
||
|
output = _run_vim_script(
|
||
|
script_lines,
|
||
|
[line for (_, line) in lines_classes])
|
||
|
|
||
|
return output
|
||
|
|
||
|
def beautify(text, lang, options):
|
||
|
"""
|
||
|
Process input `text` according to the specified `mode`.
|
||
|
Adds comments if needed, according to the `lang` rules.
|
||
|
Caches the results.
|
||
|
The whole work (except caching) is done by _beautify().
|
||
|
"""
|
||
|
|
||
|
options = options or {}
|
||
|
beauty_options = dict((k, v) for k, v in options.items() if k in
|
||
|
['add_comments', 'remove_text'])
|
||
|
|
||
|
mode = ''
|
||
|
if beauty_options.get('add_comments'):
|
||
|
mode += 'c'
|
||
|
if beauty_options.get('remove_text'):
|
||
|
mode += 'q'
|
||
|
|
||
|
if beauty_options == {}:
|
||
|
# if mode is unknown, just don't transform the text at all
|
||
|
return text
|
||
|
|
||
|
digest = "t:%s:%s:%s" % (hashlib.md5(text).hexdigest(), lang, mode)
|
||
|
answer = REDIS.get(digest)
|
||
|
if answer:
|
||
|
return answer
|
||
|
|
||
|
answer = _beautify(text, lang, **beauty_options)
|
||
|
|
||
|
REDIS.set(digest, answer)
|
||
|
return answer
|
||
|
|
||
|
def __main__():
|
||
|
text = sys.stdin.read()
|
||
|
filetype = sys.argv[1]
|
||
|
options = {
|
||
|
"": {},
|
||
|
"c": dict(add_comments=True),
|
||
|
"C": dict(add_comments=False),
|
||
|
"q": dict(remove_text=True),
|
||
|
}[sys.argv[2]]
|
||
|
result = beautify(text, filetype, options)
|
||
|
sys.stdout.write(result)
|
||
|
|
||
|
if __name__ == '__main__':
|
||
|
__main__()
|