mirror of
https://github.com/chubin/cheat.sh.git
synced 2024-11-23 19:43:33 +03:00
912e77faa3
Fix incorrect MYDIR
330 lines
9.4 KiB
Python
330 lines
9.4 KiB
Python
"""
|
|
Extract text from the text-code stream and comment it.
|
|
|
|
Supports three modes of normalization and commenting:
|
|
|
|
1. Don't add any comments
|
|
2. Add comments
|
|
3. Remove text, leave code only
|
|
|
|
Since several operations are quite expensive,
|
|
it actively uses caching.
|
|
|
|
Exported functions:
|
|
|
|
beautify(text, lang, options)
|
|
code_blocks(text)
|
|
"""
|
|
from __future__ import print_function
|
|
|
|
from gevent.monkey import patch_all
|
|
from gevent.subprocess import Popen
|
|
patch_all()
|
|
|
|
# pylint: disable=wrong-import-position,wrong-import-order
|
|
import sys
|
|
import os
|
|
import textwrap
|
|
import hashlib
|
|
import re
|
|
|
|
from itertools import groupby, chain
|
|
from tempfile import NamedTemporaryFile
|
|
|
|
import redis
|
|
|
|
MYDIR = os.path.abspath(os.path.join(__file__, '..', '..'))
|
|
sys.path.append("%s/lib/" % MYDIR)
|
|
from languages_data import VIM_NAME
|
|
from globals import PATH_VIM_ENVIRONMENT
|
|
# pylint: enable=wrong-import-position,wrong-import-order
|
|
|
|
REDIS = redis.StrictRedis(host='localhost', port=6379, db=1)
|
|
FNULL = open(os.devnull, 'w')
|
|
TEXT = 0
|
|
CODE = 1
|
|
UNDEFINED = -1
|
|
CODE_WHITESPACE = -2
|
|
def _language_name(name):
|
|
return VIM_NAME.get(name, name)
|
|
|
|
|
|
def _remove_empty_lines_from_beginning(lines):
|
|
start = 0
|
|
while start < len(lines) and lines[start].strip() == '':
|
|
start += 1
|
|
lines = lines[start:]
|
|
return lines
|
|
|
|
def _remove_empty_lines_from_end(lines):
|
|
end = len(lines) - 1
|
|
while end >= 0 and lines[end].strip() == '':
|
|
end -= 1
|
|
lines = lines[:end+1]
|
|
return lines
|
|
|
|
def _cleanup_lines(lines):
|
|
"""
|
|
Cleanup `lines` a little bit: remove empty lines at the beginning
|
|
and at the end; remove too many empty lines in between.
|
|
"""
|
|
lines = _remove_empty_lines_from_beginning(lines)
|
|
lines = _remove_empty_lines_from_end(lines)
|
|
if lines == []:
|
|
return lines
|
|
# remove repeating empty lines
|
|
lines = list(chain.from_iterable(
|
|
[(list(x[1]) if x[0] else [''])
|
|
for x in groupby(lines, key=lambda x: x.strip() != '')]))
|
|
|
|
return lines
|
|
|
|
|
|
def _line_type(line):
|
|
"""
|
|
Classify each line and say which of them
|
|
are text (0) and which of them are code (1).
|
|
|
|
A line is considered to be code,
|
|
if it starts with four spaces.
|
|
|
|
A line is considerer to be text if it is not
|
|
empty and is not code.
|
|
|
|
If line is empty, it is considered to be
|
|
code if it surrounded but two other code lines,
|
|
or if it is the first/last line and it has
|
|
code on the other side.
|
|
"""
|
|
if line.strip() == '':
|
|
return UNDEFINED
|
|
|
|
# some line may start with spaces but still be not code.
|
|
# we need some heuristics here, but for the moment just
|
|
# whitelist such cases:
|
|
if line.strip().startswith('* ') or re.match(r'[0-9]+\.', line.strip()):
|
|
return TEXT
|
|
|
|
if line.startswith(' '):
|
|
return CODE
|
|
return TEXT
|
|
|
|
|
|
|
|
def _classify_lines(lines):
|
|
line_types = [_line_type(line) for line in lines]
|
|
|
|
# pass 2:
|
|
# adding empty code lines to the code
|
|
for i in range(len(line_types) - 1):
|
|
if line_types[i] == CODE and line_types[i+1] == UNDEFINED:
|
|
line_types[i+1] = CODE_WHITESPACE
|
|
changed = True
|
|
|
|
for i in range(len(line_types) - 1)[::-1]:
|
|
if line_types[i] == UNDEFINED and line_types[i+1] == CODE:
|
|
line_types[i] = CODE_WHITESPACE
|
|
changed = True
|
|
line_types = [CODE if x == CODE_WHITESPACE else x for x in line_types]
|
|
|
|
# pass 3:
|
|
# fixing undefined line types (-1)
|
|
changed = True
|
|
while changed:
|
|
changed = False
|
|
|
|
# changing all lines types that are near the text
|
|
|
|
for i in range(len(line_types) - 1):
|
|
if line_types[i] == TEXT and line_types[i+1] == UNDEFINED:
|
|
line_types[i+1] = TEXT
|
|
changed = True
|
|
|
|
for i in range(len(line_types) - 1)[::-1]:
|
|
if line_types[i] == UNDEFINED and line_types[i+1] == TEXT:
|
|
line_types[i] = TEXT
|
|
changed = True
|
|
|
|
# everything what is still undefined, change to code type
|
|
line_types = [CODE if x == UNDEFINED else x for x in line_types]
|
|
return line_types
|
|
|
|
def _unindent_code(line, shift=0):
|
|
#if line.startswith(' '):
|
|
# return line[4:]
|
|
|
|
if shift == -1 and line != '':
|
|
return ' ' + line
|
|
|
|
if shift > 0:
|
|
if line.startswith(' '*shift):
|
|
return line[shift:]
|
|
|
|
return line
|
|
|
|
def _wrap_lines(lines_classes, unindent_code=False):
|
|
"""
|
|
Wrap classified lines. Add the split lines to the stream.
|
|
If `unindent_code` is True, remove leading four spaces.
|
|
"""
|
|
|
|
result = []
|
|
for line_type,line_content in lines_classes:
|
|
if line_type == CODE:
|
|
|
|
shift = 3 if unindent_code else -1
|
|
result.append((line_type, _unindent_code(line_content, shift=shift)))
|
|
else:
|
|
if line_content.strip() == "":
|
|
result.append((line_type, ""))
|
|
for line in textwrap.fill(line_content).splitlines():
|
|
result.append((line_type, line))
|
|
|
|
return result
|
|
|
|
def _run_vim_script(script_lines, text_lines):
|
|
"""
|
|
Apply `script_lines` to `lines_classes`
|
|
and returns the result
|
|
"""
|
|
|
|
script_vim = NamedTemporaryFile(delete=True)
|
|
textfile = NamedTemporaryFile(delete=True)
|
|
|
|
open(script_vim.name, "w").write("\n".join(script_lines))
|
|
open(textfile.name, "w").write("\n".join(text_lines))
|
|
|
|
script_vim.file.close()
|
|
textfile.file.close()
|
|
|
|
my_env = os.environ.copy()
|
|
my_env['HOME'] = PATH_VIM_ENVIRONMENT
|
|
|
|
cmd = ["script", "-q", "-c",
|
|
"vim -S %s %s" % (script_vim.name, textfile.name)]
|
|
Popen(cmd, shell=False, stdout=FNULL, stderr=FNULL, env=my_env).communicate()
|
|
|
|
return open(textfile.name, "r").read()
|
|
|
|
def _commenting_script(lines_blocks, filetype):
|
|
script_lines = []
|
|
block_start = 1
|
|
for block in lines_blocks:
|
|
lines = list(block[1])
|
|
|
|
block_end = block_start + len(lines)-1
|
|
|
|
if block[0] == 0:
|
|
comment_type = 'sexy'
|
|
if block_end - block_start < 1 or filetype == 'ruby':
|
|
comment_type = 'comment'
|
|
|
|
script_lines.insert(0, "%s,%s call NERDComment(1, '%s')"
|
|
% (block_start, block_end, comment_type))
|
|
script_lines.insert(0, "%s,%s call NERDComment(1, 'uncomment')"
|
|
% (block_start, block_end))
|
|
|
|
block_start = block_end + 1
|
|
|
|
script_lines.insert(0, "set ft=%s" % _language_name(filetype))
|
|
script_lines.append("wq")
|
|
|
|
return script_lines
|
|
|
|
def _beautify(text, filetype, add_comments=False, remove_text=False):
|
|
"""
|
|
Main function that actually does the whole beautification job.
|
|
"""
|
|
|
|
# We shift the code if and only if we either convert the text into comments
|
|
# or remove the text completely. Otherwise the code has to remain aligned
|
|
unindent_code = add_comments or remove_text
|
|
print(unindent_code)
|
|
|
|
lines = [x.rstrip('\n') for x in text.splitlines()]
|
|
lines = _cleanup_lines(lines)
|
|
lines_classes = zip(_classify_lines(lines), lines)
|
|
lines_classes = _wrap_lines(lines_classes, unindent_code=unindent_code)
|
|
#for x,y in lines_classes:
|
|
# print "%s: %s" % (x, y)
|
|
|
|
if remove_text:
|
|
lines = [line[1] for line in lines_classes if line[0] == 1]
|
|
lines = _cleanup_lines(lines)
|
|
output = "\n".join(lines)
|
|
if not output.endswith('\n'):
|
|
output += "\n"
|
|
elif not add_comments:
|
|
output = "\n".join(line[1] for line in lines_classes)
|
|
else:
|
|
lines_blocks = groupby(lines_classes, key=lambda x: x[0])
|
|
script_lines = _commenting_script(lines_blocks, filetype)
|
|
output = _run_vim_script(
|
|
script_lines,
|
|
[line for (_, line) in lines_classes])
|
|
|
|
return output
|
|
|
|
def code_blocks(text, wrap_lines=False, unindent_code=False):
|
|
"""
|
|
Split `text` into blocks of text and code.
|
|
Return list of tuples TYPE, TEXT
|
|
"""
|
|
lines = [x.rstrip('\n') for x in text.splitlines()]
|
|
lines_classes = zip(_classify_lines(lines), lines)
|
|
|
|
if wrap_lines:
|
|
lines_classes = _wrap_lines(lines_classes, unindent_code=unindent_code)
|
|
|
|
lines_blocks = groupby(lines_classes, key=lambda x: x[0])
|
|
answer = [(x[0], "\n".join([y[1] for y in x[1]])+"\n") for x in lines_blocks]
|
|
return answer
|
|
|
|
|
|
def beautify(text, lang, options):
|
|
"""
|
|
Process input `text` according to the specified `mode`.
|
|
Adds comments if needed, according to the `lang` rules.
|
|
Caches the results.
|
|
The whole work (except caching) is done by _beautify().
|
|
"""
|
|
|
|
options = options or {}
|
|
beauty_options = dict((k, v) for k, v in options.items() if k in
|
|
['add_comments', 'remove_text'])
|
|
|
|
mode = ''
|
|
if beauty_options.get('add_comments'):
|
|
mode += 'c'
|
|
if beauty_options.get('remove_text'):
|
|
mode += 'q'
|
|
|
|
if beauty_options == {}:
|
|
# if mode is unknown, just don't transform the text at all
|
|
return text
|
|
|
|
digest = "t:%s:%s:%s" % (hashlib.md5(text).hexdigest(), lang, mode)
|
|
answer = REDIS.get(digest)
|
|
if answer:
|
|
return answer
|
|
|
|
answer = _beautify(text, lang, **beauty_options)
|
|
|
|
REDIS.set(digest, answer)
|
|
return answer
|
|
|
|
def __main__():
|
|
text = sys.stdin.read()
|
|
filetype = sys.argv[1]
|
|
options = {
|
|
"": {},
|
|
"c": dict(add_comments=True),
|
|
"C": dict(add_comments=False),
|
|
"q": dict(remove_text=True),
|
|
}[sys.argv[2]]
|
|
result = beautify(text, filetype, options)
|
|
sys.stdout.write(result)
|
|
|
|
if __name__ == '__main__':
|
|
__main__()
|