noogle/codemod/src/main.rs

526 lines
16 KiB
Rust
Raw Normal View History

2024-03-15 19:01:51 +03:00
mod tests;
2023-12-22 22:34:22 +03:00
use regex::Regex;
use rnix::ast::{AstToken, AttrpathValue, Comment, Expr, Lambda, Param};
use rnix::{SyntaxKind, SyntaxNode, SyntaxToken};
use rowan::{ast::AstNode, GreenToken, NodeOrToken, WalkEvent};
use std::fs::File;
use std::io::Write;
2024-03-07 18:56:17 +03:00
use std::path::PathBuf;
2023-12-22 22:34:22 +03:00
use std::println;
use std::{env, fs};
use textwrap::dedent;
2023-12-22 22:34:22 +03:00
use walkdir::WalkDir;
const EXAMPLE_LANG: &str = "nix";
const TYPE_LANG: &str = "";
/// Represent a single function argument name and its (optional)
/// doc-string.
#[derive(Clone, Debug)]
pub struct SingleArg {
pub name: String,
pub doc: Option<String>,
}
/// Represent a function argument, which is either a flat identifier
/// or a pattern set.
#[derive(Clone, Debug)]
pub enum Argument {
/// Flat function argument (e.g. `n: n * 2`).
Flat(SingleArg),
/// Pattern function argument (e.g. `{ name, age }: ...`)
Pattern(Vec<SingleArg>),
}
///
fn handle_indentation(raw: &str) -> Option<String> {
let result: String = match raw.split_once('\n') {
Some((first, rest)) => {
format!("{}\n{}", first.trim_start(), dedent(rest))
}
None => raw.into(),
};
Some(result.trim().to_owned()).filter(|s| !s.is_empty())
}
/// Retrieve documentation comments.
fn retrieve_doc_comment(node: &SyntaxNode, allow_line_comments: bool) -> Option<String> {
// if the current node has a doc comment it'll be immediately preceded by that comment,
// or there will be a whitespace token and *then* the comment tokens before it. We merge
// multiple line comments into one large comment if they are on adjacent lines for
// documentation simplicity.
let mut token = node.first_token()?.prev_token()?;
if token.kind() == SyntaxKind::TOKEN_WHITESPACE {
token = token.prev_token()?;
}
if token.kind() != SyntaxKind::TOKEN_COMMENT {
return None;
}
// if we want to ignore line comments (eg because they may contain deprecation
// comments on attributes) we'll backtrack to the first preceding multiline comment.
while !allow_line_comments && token.text().starts_with('#') {
token = token.prev_token()?;
if token.kind() == SyntaxKind::TOKEN_WHITESPACE {
token = token.prev_token()?;
}
if token.kind() != SyntaxKind::TOKEN_COMMENT {
return None;
}
}
if token.text().starts_with("/*") {
return Some(Comment::cast(token)?.text().to_string());
}
// backtrack to the start of the doc comment, allowing only adjacent line comments.
// we don't care much about optimization here, doc comments aren't long enough for that.
if token.text().starts_with('#') {
let mut result: String = String::new();
while let Some(comment) = Comment::cast(token) {
if !comment.syntax().text().starts_with('#') {
break;
}
result.insert_str(0, comment.text().trim());
let ws = match comment.syntax().prev_token() {
Some(t) if t.kind() == SyntaxKind::TOKEN_WHITESPACE => t,
_ => break,
};
// only adjacent lines continue a doc comment, empty lines do not.
match ws.text().strip_prefix('\n') {
Some(trail) if !trail.contains('\n') => result.insert(0, ' '),
_ => break,
}
token = match ws.prev_token() {
Some(c) => c,
_ => break,
};
}
return Some(result);
}
None
}
/// Copied from nixdoc.
/// Traverse a Nix lambda and collect the identifiers of arguments
/// until an unexpected AST node is encountered.
fn collect_lambda_args(mut lambda: Lambda) -> Vec<Argument> {
let mut args = vec![];
loop {
match lambda.param().unwrap() {
// a variable, e.g. `id = x: x`
Param::IdentParam(id) => {
args.push(Argument::Flat(SingleArg {
name: id.to_string(),
// doc:
doc: handle_indentation(
&retrieve_doc_comment(id.syntax(), true).unwrap_or_default(),
),
}));
}
// an attribute set, e.g. `foo = { a }: a`
Param::Pattern(pat) => {
// collect doc-comments for each attribute in the set
let pattern_vec: Vec<_> = pat
.pat_entries()
.map(|entry| SingleArg {
name: entry.ident().unwrap().to_string(),
doc: None, // handle_indentation(
// &retrieve_doc_comment(entry.syntax(), true).unwrap_or_default(),
// ),
})
.collect();
args.push(Argument::Pattern(pattern_vec));
}
}
// Curried or not?
match lambda.body() {
Some(Expr::Lambda(inner)) => lambda = inner,
_ => break,
}
}
args
}
2024-03-07 18:26:22 +03:00
fn parse_doc_comment(
raw: &str,
indent: usize,
argument_block: Option<String>,
name: Option<String>,
) -> String {
2023-12-22 22:34:22 +03:00
enum ParseState {
Doc,
Type,
Example,
}
let left = " ".repeat(indent);
let mut doc = String::new();
let mut doc_type = String::new();
let mut example = String::new();
let mut state = ParseState::Doc;
for line in raw.lines() {
let mut line = line.trim_end();
2024-03-15 19:01:51 +03:00
let trimmed = line.trim();
2023-12-22 22:34:22 +03:00
if trimmed.starts_with("Type:") {
state = ParseState::Type;
line = &trimmed[5..]; // trim 'Type:'
}
if trimmed.starts_with("Example:") {
state = ParseState::Example;
line = &trimmed[8..]; // trim 'Example:'
}
if trimmed.starts_with("Examples:") {
state = ParseState::Example;
line = &trimmed[9..]; // trim 'Examples:'
}
match state {
// important: trim only trailing whitespaces; as leading ones might be markdown formatting or code examples.
ParseState::Type => {
doc_type.push_str(&format!("{line}\n"));
}
ParseState::Doc => {
2024-03-07 18:56:17 +03:00
doc.push_str(&format!("{line}\n"));
2023-12-22 22:34:22 +03:00
}
ParseState::Example => {
example.push_str(&format!("{line}\n"));
}
}
}
2024-03-07 18:56:17 +03:00
2023-12-22 22:34:22 +03:00
let f = |s: String| {
if s.is_empty() {
None
} else {
return Some(s.trim().to_owned());
}
};
let mut markdown = handle_indentation(&doc)
.map(|t| format_code(t, indent))
.unwrap_or_default();
2024-03-07 18:56:17 +03:00
2023-12-22 22:34:22 +03:00
// example and type can contain indented code
let formatted_example = format_code(example, indent);
let formatted_type = format_code(doc_type, indent);
if let Some(argument_block) = argument_block {
markdown.push_str(&argument_block);
2023-12-22 22:34:22 +03:00
}
if let Some(doc_type) = f(formatted_type) {
markdown.push_str(&format!("\n{left}# Type"));
2023-12-22 22:34:22 +03:00
markdown.push_str(&format!(
"\n\n{left}```{TYPE_LANG}\n{left}{doc_type}\n{left}```"
));
}
2024-03-07 18:26:22 +03:00
let args: Vec<String> = env::args().collect();
let prefix = args.get(2);
if let Some(example) = f(formatted_example) {
markdown.push_str(&format!("\n\n{left}# Examples"));
2024-03-07 18:26:22 +03:00
markdown.push_str(&format!("\n{left}:::{{.example}}"));
if let Some(name) = name {
if let Some(prefix) = prefix {
markdown.push_str(&format!("\n{left}## `{prefix}.{name}` usage example"));
} else {
markdown.push_str(&format!("\n{left}## `{name}` usage example"));
}
}
markdown.push_str(&format!(
"\n\n{left}```{EXAMPLE_LANG}\n{left}{example}\n{left}```"
));
2024-03-07 18:26:22 +03:00
markdown.push_str(&format!("\n\n{left}:::"));
}
format!("{}\n", markdown.trim_end())
2023-12-22 22:34:22 +03:00
}
2024-03-07 18:26:22 +03:00
fn get_binding_name(token: &SyntaxToken) -> Option<String> {
let mut step = token.next_sibling_or_token();
// Find the Expr that is a lambda.
let name = loop {
if step.is_none() {
// If there is no next token or node
break None;
} else if let Some(NodeOrToken::Node(ref node)) = step {
match node.kind() {
// SyntaxKind::NODE_LAMBDA => break Some(node.clone()),
SyntaxKind::NODE_ATTRPATH_VALUE => {
if let Some(value) = AttrpathValue::cast(node.clone()) {
break value.attrpath().map(|p| p.to_string());
} else {
break None;
}
}
_ => {}
};
} else {
}
step = step.unwrap().next_sibling_or_token();
};
name
}
2024-03-15 19:01:51 +03:00
fn get_argument_docs(token: &SyntaxToken, _ident: &str) -> Option<String> {
2023-12-22 22:34:22 +03:00
let mut step = token.next_sibling_or_token();
// Find the Expr that is a lambda.
let doc_expr = loop {
if step.is_none() {
// If there is no next token or node
break None;
} else if let Some(NodeOrToken::Node(ref node)) = step {
match node.kind() {
// SyntaxKind::NODE_LAMBDA => break Some(node.clone()),
SyntaxKind::NODE_ATTRPATH_VALUE => {
if let Some(value) = AttrpathValue::cast(node.clone()) {
break value.value();
} else {
break None;
}
}
_ => {}
};
} else {
}
step = step.unwrap().next_sibling_or_token();
};
let mut argument_docs: Option<String> = None;
if let Some(Expr::Lambda(l)) = doc_expr {
let args = collect_lambda_args(l);
let mut docs = String::new();
2024-03-05 11:09:51 +03:00
for (pos, arg) in args.iter().enumerate() {
2023-12-22 22:34:22 +03:00
match arg {
Argument::Flat(single_arg) => {
docs.push_str(&format!(
"`{}`\n\n: {}\n\n",
2023-12-22 22:34:22 +03:00
single_arg.name,
handle_indentation(
&single_arg
.clone()
.doc
2024-03-05 11:09:51 +03:00
.unwrap_or(format!("{}\\. Function argument", pos + 1))
)
2024-03-05 11:09:51 +03:00
.unwrap_or(format!("{}\\. Function argument", pos + 1)),
2023-12-22 22:34:22 +03:00
));
}
Argument::Pattern(_pattern) => (),
}
}
argument_docs = Some(docs);
}
return argument_docs;
}
fn format_comment(text: &str, token: &SyntaxToken) -> String {
let content = text.strip_prefix("/*").unwrap().strip_suffix("*/").unwrap();
let mut whitespace = "";
let prev = &token.prev_token();
if let Some(prev) = prev {
whitespace = prev.text();
}
let stripped = Regex::new(r#" +"#).unwrap().replace_all(whitespace, "");
let indentation = (whitespace.len() - stripped.len()) / 2 * 2;
let indent_1 = " ".repeat(indentation);
let indent_2 = " ".repeat(indentation + 2);
let lines: Vec<String> = content
.lines()
.map(|content| format!("{}{}", indent_2, content))
.collect();
let argument_block = if let Some(argument_docs) = get_argument_docs(token, &indent_2) {
let mut res = String::new();
2023-12-31 00:15:48 +03:00
if !argument_docs.trim().is_empty() {
res.push_str(&format!("\n\n{indent_2}# Inputs\n"));
for line in argument_docs.lines() {
if !line.trim().is_empty() {
res.push_str(&format!("\n{indent_2}{line}"));
} else {
res.push_str(&format!("\n"));
}
}
2023-12-31 00:15:48 +03:00
}
Some(res)
} else {
None
};
2024-03-07 18:26:22 +03:00
let name = get_binding_name(token);
let markdown = parse_doc_comment(&lines.join("\n"), indentation + 2, argument_block, name);
2023-12-22 22:34:22 +03:00
2024-03-07 18:56:17 +03:00
return format!("/**\n{}{}*/", markdown, indent_1);
2023-12-22 22:34:22 +03:00
}
fn format_code(text: String, ident: usize) -> String {
let mut content = text
.trim_end_matches("\n")
.trim_start_matches("\n")
.to_owned();
while let Some(stripped) = strip_column(&content) {
content = stripped;
}
let mut result = String::new();
let left: String = " ".repeat(ident);
for line in content.lines() {
if line.is_empty() {
result.push_str(&format!("\n"));
} else {
result.push_str(&format!("{left}{line}\n"));
}
}
result
}
fn strip_column(text: &str) -> Option<String> {
let mut result: Vec<&str> = vec![];
let mut any_non_whitespace = false;
for line in text.lines() {
if line.is_empty() {
continue;
}
if let Some(_) = line.strip_prefix(" ") {
} else {
any_non_whitespace = true;
}
}
if !any_non_whitespace && !text.is_empty() {
for line in text.lines() {
if let Some(stripped) = line.strip_prefix(" ") {
result.push(stripped);
2024-03-07 18:56:17 +03:00
} else {
result.push("");
2023-12-22 22:34:22 +03:00
}
}
return Some(result.join("\n"));
}
return None;
}
fn replace_first_comment(syntax: &SyntaxNode) -> Option<SyntaxNode> {
let mut result = None;
for ev in syntax.preorder_with_tokens() {
match ev {
WalkEvent::Enter(node_or_token) => match node_or_token {
NodeOrToken::Token(token) => match token.kind() {
SyntaxKind::TOKEN_COMMENT => {
if token.text().starts_with("/**") || token.text().starts_with("#") {
// Already a doc-comment or not supposed to be migrated
continue;
}
let replacement: GreenToken = GreenToken::new(
rowan::SyntaxKind(token.kind() as u16),
&format_comment(token.text(), &token),
);
let green = token.replace_with(replacement);
let updated = syntax.replace_with(green);
result = Some(rnix::SyntaxNode::new_root(updated));
break;
}
_ => continue,
},
_ => continue,
},
_ => continue,
};
}
result
}
fn main() {
let args: Vec<String> = env::args().collect();
if let Some(path) = &args.get(1) {
println!("trying to read path: {path}");
for entry in WalkDir::new(path)
.follow_links(true)
.into_iter()
.filter_map(|e| e.ok())
{
let f_name = entry.file_name().to_string_lossy();
if f_name.ends_with(".nix") {
2024-03-15 19:01:51 +03:00
modify_file_inplace(entry.path().to_path_buf());
2023-12-22 22:34:22 +03:00
}
}
} else {
println!("Usage: codemod <dirPath>");
}
}
2024-03-15 19:01:51 +03:00
fn replace_all(syntax: SyntaxNode) -> (Option<SyntaxNode>, i32) {
let mut maybe_replaced = replace_first_comment(&syntax);
let mut count = 0;
let r: Option<SyntaxNode> = loop {
if let Some(replaced) = maybe_replaced {
// Maybe we can replace more
count += 1;
let result = replace_first_comment(&replaced);
// If we cannot replace more comments
if result.is_none() {
break Some(replaced);
}
maybe_replaced = result;
} else {
break None;
}
};
(r, count)
}
fn modify_file_inplace(file_path: PathBuf) -> () {
2023-12-22 22:34:22 +03:00
let contents = fs::read_to_string(&file_path);
if let Err(_) = contents {
println!("Could not read the file {:?}", file_path);
return;
}
let root = rnix::Root::parse(&contents.unwrap()).ok();
if let Err(err) = &root {
println!(
"{}",
format!(
"failed to parse input of file: {:?} \n\ngot error: {}",
file_path,
err.to_string()
)
);
return;
}
let syntax = root.unwrap().syntax().clone_for_update();
let display_name = file_path.to_str().unwrap();
2024-03-15 19:01:51 +03:00
if let (Some(updates), count) = replace_all(syntax) {
2023-12-22 22:34:22 +03:00
let mut file = File::create(&file_path).unwrap();
file.write_all(updates.text().to_string().as_bytes()).ok();
println!("{display_name} - Changed {count} comments");
}
}