mirror of
https://github.com/hsjobeki/noogle.git
synced 2024-12-29 00:43:57 +03:00
526 lines
16 KiB
Rust
526 lines
16 KiB
Rust
mod tests;
|
|
|
|
use regex::Regex;
|
|
use rnix::ast::{AstToken, AttrpathValue, Comment, Expr, Lambda, Param};
|
|
use rnix::{SyntaxKind, SyntaxNode, SyntaxToken};
|
|
use rowan::{ast::AstNode, GreenToken, NodeOrToken, WalkEvent};
|
|
use std::fs::File;
|
|
use std::io::Write;
|
|
use std::path::PathBuf;
|
|
use std::println;
|
|
use std::{env, fs};
|
|
use textwrap::dedent;
|
|
use walkdir::WalkDir;
|
|
|
|
const EXAMPLE_LANG: &str = "nix";
|
|
const TYPE_LANG: &str = "";
|
|
|
|
/// Represent a single function argument name and its (optional)
|
|
/// doc-string.
|
|
#[derive(Clone, Debug)]
|
|
pub struct SingleArg {
|
|
pub name: String,
|
|
pub doc: Option<String>,
|
|
}
|
|
|
|
/// Represent a function argument, which is either a flat identifier
|
|
/// or a pattern set.
|
|
#[derive(Clone, Debug)]
|
|
pub enum Argument {
|
|
/// Flat function argument (e.g. `n: n * 2`).
|
|
Flat(SingleArg),
|
|
|
|
/// Pattern function argument (e.g. `{ name, age }: ...`)
|
|
Pattern(Vec<SingleArg>),
|
|
}
|
|
|
|
///
|
|
fn handle_indentation(raw: &str) -> Option<String> {
|
|
let result: String = match raw.split_once('\n') {
|
|
Some((first, rest)) => {
|
|
format!("{}\n{}", first.trim_start(), dedent(rest))
|
|
}
|
|
None => raw.into(),
|
|
};
|
|
|
|
Some(result.trim().to_owned()).filter(|s| !s.is_empty())
|
|
}
|
|
|
|
/// Retrieve documentation comments.
|
|
fn retrieve_doc_comment(node: &SyntaxNode, allow_line_comments: bool) -> Option<String> {
|
|
// if the current node has a doc comment it'll be immediately preceded by that comment,
|
|
// or there will be a whitespace token and *then* the comment tokens before it. We merge
|
|
// multiple line comments into one large comment if they are on adjacent lines for
|
|
// documentation simplicity.
|
|
let mut token = node.first_token()?.prev_token()?;
|
|
if token.kind() == SyntaxKind::TOKEN_WHITESPACE {
|
|
token = token.prev_token()?;
|
|
}
|
|
if token.kind() != SyntaxKind::TOKEN_COMMENT {
|
|
return None;
|
|
}
|
|
|
|
// if we want to ignore line comments (eg because they may contain deprecation
|
|
// comments on attributes) we'll backtrack to the first preceding multiline comment.
|
|
while !allow_line_comments && token.text().starts_with('#') {
|
|
token = token.prev_token()?;
|
|
if token.kind() == SyntaxKind::TOKEN_WHITESPACE {
|
|
token = token.prev_token()?;
|
|
}
|
|
if token.kind() != SyntaxKind::TOKEN_COMMENT {
|
|
return None;
|
|
}
|
|
}
|
|
|
|
if token.text().starts_with("/*") {
|
|
return Some(Comment::cast(token)?.text().to_string());
|
|
}
|
|
|
|
// backtrack to the start of the doc comment, allowing only adjacent line comments.
|
|
// we don't care much about optimization here, doc comments aren't long enough for that.
|
|
if token.text().starts_with('#') {
|
|
let mut result: String = String::new();
|
|
while let Some(comment) = Comment::cast(token) {
|
|
if !comment.syntax().text().starts_with('#') {
|
|
break;
|
|
}
|
|
result.insert_str(0, comment.text().trim());
|
|
let ws = match comment.syntax().prev_token() {
|
|
Some(t) if t.kind() == SyntaxKind::TOKEN_WHITESPACE => t,
|
|
_ => break,
|
|
};
|
|
// only adjacent lines continue a doc comment, empty lines do not.
|
|
match ws.text().strip_prefix('\n') {
|
|
Some(trail) if !trail.contains('\n') => result.insert(0, ' '),
|
|
_ => break,
|
|
}
|
|
token = match ws.prev_token() {
|
|
Some(c) => c,
|
|
_ => break,
|
|
};
|
|
}
|
|
return Some(result);
|
|
}
|
|
|
|
None
|
|
}
|
|
|
|
/// Copied from nixdoc.
|
|
/// Traverse a Nix lambda and collect the identifiers of arguments
|
|
/// until an unexpected AST node is encountered.
|
|
fn collect_lambda_args(mut lambda: Lambda) -> Vec<Argument> {
|
|
let mut args = vec![];
|
|
|
|
loop {
|
|
match lambda.param().unwrap() {
|
|
// a variable, e.g. `id = x: x`
|
|
Param::IdentParam(id) => {
|
|
args.push(Argument::Flat(SingleArg {
|
|
name: id.to_string(),
|
|
// doc:
|
|
doc: handle_indentation(
|
|
&retrieve_doc_comment(id.syntax(), true).unwrap_or_default(),
|
|
),
|
|
}));
|
|
}
|
|
// an attribute set, e.g. `foo = { a }: a`
|
|
Param::Pattern(pat) => {
|
|
// collect doc-comments for each attribute in the set
|
|
let pattern_vec: Vec<_> = pat
|
|
.pat_entries()
|
|
.map(|entry| SingleArg {
|
|
name: entry.ident().unwrap().to_string(),
|
|
doc: None, // handle_indentation(
|
|
// &retrieve_doc_comment(entry.syntax(), true).unwrap_or_default(),
|
|
// ),
|
|
})
|
|
.collect();
|
|
|
|
args.push(Argument::Pattern(pattern_vec));
|
|
}
|
|
}
|
|
|
|
// Curried or not?
|
|
match lambda.body() {
|
|
Some(Expr::Lambda(inner)) => lambda = inner,
|
|
_ => break,
|
|
}
|
|
}
|
|
|
|
args
|
|
}
|
|
|
|
fn parse_doc_comment(
|
|
raw: &str,
|
|
indent: usize,
|
|
argument_block: Option<String>,
|
|
name: Option<String>,
|
|
) -> String {
|
|
enum ParseState {
|
|
Doc,
|
|
Type,
|
|
Example,
|
|
}
|
|
let left = " ".repeat(indent);
|
|
|
|
let mut doc = String::new();
|
|
let mut doc_type = String::new();
|
|
let mut example = String::new();
|
|
let mut state = ParseState::Doc;
|
|
|
|
for line in raw.lines() {
|
|
let mut line = line.trim_end();
|
|
|
|
let trimmed = line.trim();
|
|
|
|
if trimmed.starts_with("Type:") {
|
|
state = ParseState::Type;
|
|
line = &trimmed[5..]; // trim 'Type:'
|
|
}
|
|
|
|
if trimmed.starts_with("Example:") {
|
|
state = ParseState::Example;
|
|
line = &trimmed[8..]; // trim 'Example:'
|
|
}
|
|
if trimmed.starts_with("Examples:") {
|
|
state = ParseState::Example;
|
|
line = &trimmed[9..]; // trim 'Examples:'
|
|
}
|
|
|
|
match state {
|
|
// important: trim only trailing whitespaces; as leading ones might be markdown formatting or code examples.
|
|
ParseState::Type => {
|
|
doc_type.push_str(&format!("{line}\n"));
|
|
}
|
|
ParseState::Doc => {
|
|
doc.push_str(&format!("{line}\n"));
|
|
}
|
|
ParseState::Example => {
|
|
example.push_str(&format!("{line}\n"));
|
|
}
|
|
}
|
|
}
|
|
|
|
let f = |s: String| {
|
|
if s.is_empty() {
|
|
None
|
|
} else {
|
|
return Some(s.trim().to_owned());
|
|
}
|
|
};
|
|
let mut markdown = handle_indentation(&doc)
|
|
.map(|t| format_code(t, indent))
|
|
.unwrap_or_default();
|
|
|
|
// example and type can contain indented code
|
|
let formatted_example = format_code(example, indent);
|
|
let formatted_type = format_code(doc_type, indent);
|
|
|
|
if let Some(argument_block) = argument_block {
|
|
markdown.push_str(&argument_block);
|
|
}
|
|
|
|
if let Some(doc_type) = f(formatted_type) {
|
|
markdown.push_str(&format!("\n{left}# Type"));
|
|
markdown.push_str(&format!(
|
|
"\n\n{left}```{TYPE_LANG}\n{left}{doc_type}\n{left}```"
|
|
));
|
|
}
|
|
|
|
let args: Vec<String> = env::args().collect();
|
|
let prefix = args.get(2);
|
|
|
|
if let Some(example) = f(formatted_example) {
|
|
markdown.push_str(&format!("\n\n{left}# Examples"));
|
|
markdown.push_str(&format!("\n{left}:::{{.example}}"));
|
|
if let Some(name) = name {
|
|
if let Some(prefix) = prefix {
|
|
markdown.push_str(&format!("\n{left}## `{prefix}.{name}` usage example"));
|
|
} else {
|
|
markdown.push_str(&format!("\n{left}## `{name}` usage example"));
|
|
}
|
|
}
|
|
markdown.push_str(&format!(
|
|
"\n\n{left}```{EXAMPLE_LANG}\n{left}{example}\n{left}```"
|
|
));
|
|
markdown.push_str(&format!("\n\n{left}:::"));
|
|
}
|
|
|
|
format!("{}\n", markdown.trim_end())
|
|
}
|
|
|
|
fn get_binding_name(token: &SyntaxToken) -> Option<String> {
|
|
let mut step = token.next_sibling_or_token();
|
|
|
|
// Find the Expr that is a lambda.
|
|
let name = loop {
|
|
if step.is_none() {
|
|
// If there is no next token or node
|
|
break None;
|
|
} else if let Some(NodeOrToken::Node(ref node)) = step {
|
|
match node.kind() {
|
|
// SyntaxKind::NODE_LAMBDA => break Some(node.clone()),
|
|
SyntaxKind::NODE_ATTRPATH_VALUE => {
|
|
if let Some(value) = AttrpathValue::cast(node.clone()) {
|
|
break value.attrpath().map(|p| p.to_string());
|
|
} else {
|
|
break None;
|
|
}
|
|
}
|
|
_ => {}
|
|
};
|
|
} else {
|
|
}
|
|
step = step.unwrap().next_sibling_or_token();
|
|
};
|
|
name
|
|
}
|
|
|
|
fn get_argument_docs(token: &SyntaxToken, _ident: &str) -> Option<String> {
|
|
let mut step = token.next_sibling_or_token();
|
|
|
|
// Find the Expr that is a lambda.
|
|
let doc_expr = loop {
|
|
if step.is_none() {
|
|
// If there is no next token or node
|
|
break None;
|
|
} else if let Some(NodeOrToken::Node(ref node)) = step {
|
|
match node.kind() {
|
|
// SyntaxKind::NODE_LAMBDA => break Some(node.clone()),
|
|
SyntaxKind::NODE_ATTRPATH_VALUE => {
|
|
if let Some(value) = AttrpathValue::cast(node.clone()) {
|
|
break value.value();
|
|
} else {
|
|
break None;
|
|
}
|
|
}
|
|
_ => {}
|
|
};
|
|
} else {
|
|
}
|
|
step = step.unwrap().next_sibling_or_token();
|
|
};
|
|
|
|
let mut argument_docs: Option<String> = None;
|
|
if let Some(Expr::Lambda(l)) = doc_expr {
|
|
let args = collect_lambda_args(l);
|
|
let mut docs = String::new();
|
|
for (pos, arg) in args.iter().enumerate() {
|
|
match arg {
|
|
Argument::Flat(single_arg) => {
|
|
docs.push_str(&format!(
|
|
"`{}`\n\n: {}\n\n",
|
|
single_arg.name,
|
|
handle_indentation(
|
|
&single_arg
|
|
.clone()
|
|
.doc
|
|
.unwrap_or(format!("{}\\. Function argument", pos + 1))
|
|
)
|
|
.unwrap_or(format!("{}\\. Function argument", pos + 1)),
|
|
));
|
|
}
|
|
Argument::Pattern(_pattern) => (),
|
|
}
|
|
}
|
|
argument_docs = Some(docs);
|
|
}
|
|
return argument_docs;
|
|
}
|
|
|
|
fn format_comment(text: &str, token: &SyntaxToken) -> String {
|
|
let content = text.strip_prefix("/*").unwrap().strip_suffix("*/").unwrap();
|
|
let mut whitespace = "";
|
|
let prev = &token.prev_token();
|
|
|
|
if let Some(prev) = prev {
|
|
whitespace = prev.text();
|
|
}
|
|
let stripped = Regex::new(r#" +"#).unwrap().replace_all(whitespace, "");
|
|
let indentation = (whitespace.len() - stripped.len()) / 2 * 2;
|
|
|
|
let indent_1 = " ".repeat(indentation);
|
|
let indent_2 = " ".repeat(indentation + 2);
|
|
|
|
let lines: Vec<String> = content
|
|
.lines()
|
|
.map(|content| format!("{}{}", indent_2, content))
|
|
.collect();
|
|
|
|
let argument_block = if let Some(argument_docs) = get_argument_docs(token, &indent_2) {
|
|
let mut res = String::new();
|
|
if !argument_docs.trim().is_empty() {
|
|
res.push_str(&format!("\n\n{indent_2}# Inputs\n"));
|
|
for line in argument_docs.lines() {
|
|
if !line.trim().is_empty() {
|
|
res.push_str(&format!("\n{indent_2}{line}"));
|
|
} else {
|
|
res.push_str(&format!("\n"));
|
|
}
|
|
}
|
|
}
|
|
Some(res)
|
|
} else {
|
|
None
|
|
};
|
|
|
|
let name = get_binding_name(token);
|
|
|
|
let markdown = parse_doc_comment(&lines.join("\n"), indentation + 2, argument_block, name);
|
|
|
|
return format!("/**\n{}{}*/", markdown, indent_1);
|
|
}
|
|
|
|
fn format_code(text: String, ident: usize) -> String {
|
|
let mut content = text
|
|
.trim_end_matches("\n")
|
|
.trim_start_matches("\n")
|
|
.to_owned();
|
|
|
|
while let Some(stripped) = strip_column(&content) {
|
|
content = stripped;
|
|
}
|
|
|
|
let mut result = String::new();
|
|
let left: String = " ".repeat(ident);
|
|
for line in content.lines() {
|
|
if line.is_empty() {
|
|
result.push_str(&format!("\n"));
|
|
} else {
|
|
result.push_str(&format!("{left}{line}\n"));
|
|
}
|
|
}
|
|
|
|
result
|
|
}
|
|
|
|
fn strip_column(text: &str) -> Option<String> {
|
|
let mut result: Vec<&str> = vec![];
|
|
|
|
let mut any_non_whitespace = false;
|
|
|
|
for line in text.lines() {
|
|
if line.is_empty() {
|
|
continue;
|
|
}
|
|
if let Some(_) = line.strip_prefix(" ") {
|
|
} else {
|
|
any_non_whitespace = true;
|
|
}
|
|
}
|
|
|
|
if !any_non_whitespace && !text.is_empty() {
|
|
for line in text.lines() {
|
|
if let Some(stripped) = line.strip_prefix(" ") {
|
|
result.push(stripped);
|
|
} else {
|
|
result.push("");
|
|
}
|
|
}
|
|
return Some(result.join("\n"));
|
|
}
|
|
return None;
|
|
}
|
|
|
|
fn replace_first_comment(syntax: &SyntaxNode) -> Option<SyntaxNode> {
|
|
let mut result = None;
|
|
for ev in syntax.preorder_with_tokens() {
|
|
match ev {
|
|
WalkEvent::Enter(node_or_token) => match node_or_token {
|
|
NodeOrToken::Token(token) => match token.kind() {
|
|
SyntaxKind::TOKEN_COMMENT => {
|
|
if token.text().starts_with("/**") || token.text().starts_with("#") {
|
|
// Already a doc-comment or not supposed to be migrated
|
|
continue;
|
|
}
|
|
let replacement: GreenToken = GreenToken::new(
|
|
rowan::SyntaxKind(token.kind() as u16),
|
|
&format_comment(token.text(), &token),
|
|
);
|
|
let green = token.replace_with(replacement);
|
|
let updated = syntax.replace_with(green);
|
|
|
|
result = Some(rnix::SyntaxNode::new_root(updated));
|
|
break;
|
|
}
|
|
_ => continue,
|
|
},
|
|
_ => continue,
|
|
},
|
|
_ => continue,
|
|
};
|
|
}
|
|
result
|
|
}
|
|
|
|
fn main() {
|
|
let args: Vec<String> = env::args().collect();
|
|
|
|
if let Some(path) = &args.get(1) {
|
|
println!("trying to read path: {path}");
|
|
for entry in WalkDir::new(path)
|
|
.follow_links(true)
|
|
.into_iter()
|
|
.filter_map(|e| e.ok())
|
|
{
|
|
let f_name = entry.file_name().to_string_lossy();
|
|
|
|
if f_name.ends_with(".nix") {
|
|
modify_file_inplace(entry.path().to_path_buf());
|
|
}
|
|
}
|
|
} else {
|
|
println!("Usage: codemod <dirPath>");
|
|
}
|
|
}
|
|
|
|
fn replace_all(syntax: SyntaxNode) -> (Option<SyntaxNode>, i32) {
|
|
let mut maybe_replaced = replace_first_comment(&syntax);
|
|
let mut count = 0;
|
|
let r: Option<SyntaxNode> = loop {
|
|
if let Some(replaced) = maybe_replaced {
|
|
// Maybe we can replace more
|
|
count += 1;
|
|
let result = replace_first_comment(&replaced);
|
|
|
|
// If we cannot replace more comments
|
|
if result.is_none() {
|
|
break Some(replaced);
|
|
}
|
|
maybe_replaced = result;
|
|
} else {
|
|
break None;
|
|
}
|
|
};
|
|
(r, count)
|
|
}
|
|
|
|
fn modify_file_inplace(file_path: PathBuf) -> () {
|
|
let contents = fs::read_to_string(&file_path);
|
|
if let Err(_) = contents {
|
|
println!("Could not read the file {:?}", file_path);
|
|
return;
|
|
}
|
|
let root = rnix::Root::parse(&contents.unwrap()).ok();
|
|
|
|
if let Err(err) = &root {
|
|
println!(
|
|
"{}",
|
|
format!(
|
|
"failed to parse input of file: {:?} \n\ngot error: {}",
|
|
file_path,
|
|
err.to_string()
|
|
)
|
|
);
|
|
return;
|
|
}
|
|
|
|
let syntax = root.unwrap().syntax().clone_for_update();
|
|
let display_name = file_path.to_str().unwrap();
|
|
if let (Some(updates), count) = replace_all(syntax) {
|
|
let mut file = File::create(&file_path).unwrap();
|
|
file.write_all(updates.text().to_string().as_bytes()).ok();
|
|
println!("{display_name} - Changed {count} comments");
|
|
}
|
|
}
|