add parse check and tree-sitter

This commit is contained in:
reshinthadithyan 2021-07-10 23:36:19 +05:30
parent 02dbf0d828
commit 98fecd06b9
4 changed files with 53 additions and 0 deletions

BIN
.DS_Store vendored Normal file

Binary file not shown.

BIN
metrics/.DS_Store vendored Normal file

Binary file not shown.

53
metrics/parse_check.py Normal file
View File

@ -0,0 +1,53 @@
from tree_sitter import Language, Parser
def load_tree_sitter_languages():
"""Loads language Grammars to evaluate"""
py_parser = Parser()
py_parser.set_language(Language('./tree_sitter_utils/build/my-languages.so', 'python'))
js_parser = Parser()
js_parser.set_language(Language('./tree_sitter_utils/build/my-languages.so', 'javascript'))
cpp_parser = Parser()
cpp_parser.set_language(Language('./tree_sitter_utils/build/my-languages.so', 'cpp'))
go_parser = Parser()
go_parser.set_language(Language('./tree_sitter_utils/build/my-languages.so', 'go'))
java_parser = Parser()
java_parser.set_language(Language('./tree_sitter_utils/build/my-languages.so', 'java'))
return {
"py" : py_parser,
"js" : js_parser,
"cpp" : cpp_parser,
"go" : go_parser,
"java": java_parser
}
class check_parse:
def __init__(self):
self.language_dict = load_tree_sitter_languages()
def __call__(self,batch,lang):
"""
args:
batch : list[str] of code generated by the model
lang : lang should be one of the above language_dict keys
returns:
dict(
parse_score = averaged out score on how many datapoints are parsed
index_parse = check if corresponding index is parsed
)
"""
cumulative_parse_score = 0
index_parse_list = []
parser = self.language_dict[lang]
for inp in batch:
parsed = parser.parse(bytes(inp,"utf-8"))
inp_ind_score = int("ERROR" not in parsed.root_node.sexp())
cumulative_parse_score+=inp_ind_score
index_parse_list.append(inp_ind_score)
return {"parse_score":cumulative_parse_score,"index_parse":index_parse_list}
if __name__ == "__main__":
Parse = check_parse()
score = Parse(["""
def a():
if bar:
baz()"""],"py")
print(score)

BIN
metrics/tree_sitter_utils/.DS_Store vendored Normal file

Binary file not shown.