Provide syntax warnings to Java (#10645)

Translate syntax warnings and attach to IR when translating operator applications.

We should ensure that all Trees are checked for warnings and every warning is attached to some IR. That would require a bit of refactoring: In TreeToIr, we could define helpers wrapping every IR constructor and accepting a `Tree` parameter. The `Tree` could be used to populate the `IdentifiedLocation` when constructing the IR type, and then to attach all warnings after constructing the IR object.

# Important Notes
- Update JNI dependency.
- Introduces a `cargo bench` runner for parser.
This commit is contained in:
Kaz Wesley 2024-07-24 13:54:23 -04:00 committed by GitHub
parent 7e0870267e
commit 8b48637691
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
7 changed files with 196 additions and 58 deletions

72
Cargo.lock generated
View File

@ -2515,16 +2515,18 @@ checksum = "fad582f4b9e86b6caa621cabeb0963332d92eea04729ab12892c2533951e6440"
[[package]]
name = "jni"
version = "0.19.0"
version = "0.21.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "c6df18c2e3db7e453d3c6ac5b3e9d5182664d28788126d39b91f2d1e22b017ec"
checksum = "1a87aa2bb7d2af34197c04845522473242e1aa17c12f4935d5856491a7fb8c97"
dependencies = [
"cesu8",
"cfg-if",
"combine",
"jni-sys",
"log",
"thiserror",
"walkdir",
"windows-sys 0.45.0",
]
[[package]]
@ -5192,13 +5194,22 @@ version = "0.42.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "5a3e1820f08b8513f676f7ab6c1f99ff312fb97b553d30ff4dd86f9f15728aa7"
dependencies = [
"windows_aarch64_gnullvm 0.42.1",
"windows_aarch64_msvc 0.42.1",
"windows_i686_gnu 0.42.1",
"windows_i686_msvc 0.42.1",
"windows_x86_64_gnu 0.42.1",
"windows_x86_64_gnullvm 0.42.1",
"windows_x86_64_msvc 0.42.1",
"windows_aarch64_gnullvm 0.42.2",
"windows_aarch64_msvc 0.42.2",
"windows_i686_gnu 0.42.2",
"windows_i686_msvc 0.42.2",
"windows_x86_64_gnu 0.42.2",
"windows_x86_64_gnullvm 0.42.2",
"windows_x86_64_msvc 0.42.2",
]
[[package]]
name = "windows-sys"
version = "0.45.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "75283be5efb2831d37ea142365f009c02ec203cd29a3ebecbc093d52315b66d0"
dependencies = [
"windows-targets 0.42.2",
]
[[package]]
@ -5219,6 +5230,21 @@ dependencies = [
"windows-targets 0.52.4",
]
[[package]]
name = "windows-targets"
version = "0.42.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "8e5180c00cd44c9b1c88adb3693291f1cd93605ded80c250a75d472756b4d071"
dependencies = [
"windows_aarch64_gnullvm 0.42.2",
"windows_aarch64_msvc 0.42.2",
"windows_i686_gnu 0.42.2",
"windows_i686_msvc 0.42.2",
"windows_x86_64_gnu 0.42.2",
"windows_x86_64_gnullvm 0.42.2",
"windows_x86_64_msvc 0.42.2",
]
[[package]]
name = "windows-targets"
version = "0.48.5"
@ -5251,9 +5277,9 @@ dependencies = [
[[package]]
name = "windows_aarch64_gnullvm"
version = "0.42.1"
version = "0.42.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "8c9864e83243fdec7fc9c5444389dcbbfd258f745e7853198f365e3c4968a608"
checksum = "597a5118570b68bc08d8d59125332c54f1ba9d9adeedeef5b99b02ba2b0698f8"
[[package]]
name = "windows_aarch64_gnullvm"
@ -5269,9 +5295,9 @@ checksum = "bcf46cf4c365c6f2d1cc93ce535f2c8b244591df96ceee75d8e83deb70a9cac9"
[[package]]
name = "windows_aarch64_msvc"
version = "0.42.1"
version = "0.42.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "4c8b1b673ffc16c47a9ff48570a9d85e25d265735c503681332589af6253c6c7"
checksum = "e08e8864a60f06ef0d0ff4ba04124db8b0fb3be5776a5cd47641e942e58c4d43"
[[package]]
name = "windows_aarch64_msvc"
@ -5287,9 +5313,9 @@ checksum = "da9f259dd3bcf6990b55bffd094c4f7235817ba4ceebde8e6d11cd0c5633b675"
[[package]]
name = "windows_i686_gnu"
version = "0.42.1"
version = "0.42.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "de3887528ad530ba7bdbb1faa8275ec7a1155a45ffa57c37993960277145d640"
checksum = "c61d927d8da41da96a81f029489353e68739737d3beca43145c8afec9a31a84f"
[[package]]
name = "windows_i686_gnu"
@ -5305,9 +5331,9 @@ checksum = "b474d8268f99e0995f25b9f095bc7434632601028cf86590aea5c8a5cb7801d3"
[[package]]
name = "windows_i686_msvc"
version = "0.42.1"
version = "0.42.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "bf4d1122317eddd6ff351aa852118a2418ad4214e6613a50e0191f7004372605"
checksum = "44d840b6ec649f480a41c8d80f9c65108b92d89345dd94027bfe06ac444d1060"
[[package]]
name = "windows_i686_msvc"
@ -5323,9 +5349,9 @@ checksum = "1515e9a29e5bed743cb4415a9ecf5dfca648ce85ee42e15873c3cd8610ff8e02"
[[package]]
name = "windows_x86_64_gnu"
version = "0.42.1"
version = "0.42.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "c1040f221285e17ebccbc2591ffdc2d44ee1f9186324dd3e84e99ac68d699c45"
checksum = "8de912b8b8feb55c064867cf047dda097f92d51efad5b491dfb98f6bbb70cb36"
[[package]]
name = "windows_x86_64_gnu"
@ -5341,9 +5367,9 @@ checksum = "5eee091590e89cc02ad514ffe3ead9eb6b660aedca2183455434b93546371a03"
[[package]]
name = "windows_x86_64_gnullvm"
version = "0.42.1"
version = "0.42.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "628bfdf232daa22b0d64fdb62b09fcc36bb01f05a3939e20ab73aaf9470d0463"
checksum = "26d41b46a36d453748aedef1486d5c7a85db22e56aff34643984ea85514e94a3"
[[package]]
name = "windows_x86_64_gnullvm"
@ -5359,9 +5385,9 @@ checksum = "77ca79f2451b49fa9e2af39f0747fe999fcda4f5e241b2898624dca97a1f2177"
[[package]]
name = "windows_x86_64_msvc"
version = "0.42.1"
version = "0.42.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "447660ad36a13288b1db4d4248e857b510e8c3a225c822ba4fb748c0aafecffd"
checksum = "9aec5da331524158c6d1a4ac0ab1541149c0b9505fde06423b02f5ef0106b9f0"
[[package]]
name = "windows_x86_64_msvc"

View File

@ -19,6 +19,7 @@ import org.enso.compiler.core.ir.Module;
import org.enso.compiler.core.ir.Name;
import org.enso.compiler.core.ir.Pattern;
import org.enso.compiler.core.ir.Type;
import org.enso.compiler.core.ir.Warning;
import org.enso.compiler.core.ir.expression.Application;
import org.enso.compiler.core.ir.expression.Case;
import org.enso.compiler.core.ir.expression.Comment;
@ -35,6 +36,7 @@ import org.enso.syntax2.ArgumentDefinition;
import org.enso.syntax2.Base;
import org.enso.syntax2.DocComment;
import org.enso.syntax2.Line;
import org.enso.syntax2.Parser;
import org.enso.syntax2.TextElement;
import org.enso.syntax2.Token;
import org.enso.syntax2.Tree;
@ -918,7 +920,9 @@ final class TreeToIr {
var lhs = unnamedCallArgument(app.getLhs());
var rhs = unnamedCallArgument(app.getRhs());
var loc = getIdentifiedLocation(app);
yield applyOperator(op, lhs, rhs, loc);
var ir = applyOperator(op, lhs, rhs, loc);
attachTranslatedWarnings(ir, app);
yield ir;
}
};
}
@ -1197,6 +1201,14 @@ final class TreeToIr {
};
}
private void attachTranslatedWarnings(IR ir, Tree tree) {
for (var warning : tree.getWarnings()) {
var message = Parser.getWarningMessage(warning);
var irWarning = new Warning.Syntax(ir, message);
ir.diagnostics().add(irWarning);
}
}
private Operator applyOperator(Token.Operator op, CallArgument lhs, CallArgument rhs,
Option<IdentifiedLocation> loc) {
var name = new Name.Literal(

View File

@ -144,4 +144,19 @@ object Warning {
override def diagnosticKeys(): Array[Any] = Array(ir.name)
}
case class Syntax(ir: IR, message: String) extends Warning {
/** @return a human-readable description of this error condition.
*/
override def message(source: (IdentifiedLocation => String)): String =
message
/** The location at which the diagnostic occurs. */
override val location: Option[IdentifiedLocation] = ir.location
/** The important keys identifying identity of the diagnostic
*/
override def diagnosticKeys(): Array[Any] = Array()
}
}

View File

@ -1,5 +1,6 @@
//! Parses Enso sources, measuring time spent in the parser.
#![feature(test)]
// === Non-Standard Linter Configuration ===
#![allow(clippy::option_map_unit_fn)]
#![allow(clippy::precedence)]
@ -11,24 +12,78 @@
// =============
// === Tests ===
// =============
// ===========
// === CLI ===
// ===========
fn main() {
let args = std::env::args().skip(1);
let parser = enso_parser::Parser::new();
let parse_time: std::time::Duration = args
.map(|path| {
let code = std::fs::read_to_string(path).unwrap();
let mut code = code.as_str();
if let Some((_meta, code_)) = enso_parser::metadata::parse(code) {
code = code_;
}
let code = read_source(path).unwrap();
let start = std::time::Instant::now();
std::hint::black_box(parser.run(code));
std::hint::black_box(parser.run(&code));
start.elapsed()
})
.sum();
println!("Parse time: {} ms", parse_time.as_millis());
}
fn read_source(path: impl AsRef<Path>) -> io::Result<String> {
let code = fs::read_to_string(path)?;
Ok(if let Some((_meta, code)) = enso_parser::metadata::parse(&code) {
code.to_owned()
} else {
code
})
}
// ===============================
// === `cargo bench` interface ===
// ===============================
extern crate test;
use std::fs::DirEntry;
use std::fs::{self};
use std::io;
use std::path::Path;
fn visit_files<F: FnMut(&DirEntry)>(dir: &Path, f: &mut F) -> io::Result<()> {
if dir.is_dir() {
for entry in fs::read_dir(dir)? {
let entry = entry?;
let path = entry.path();
if path.is_dir() {
visit_files(&path, f)?;
} else {
f(&entry);
}
}
}
Ok(())
}
#[bench]
fn bench_std_lib(b: &mut test::Bencher) {
let mut sources = vec![];
visit_files(Path::new("../../../../distribution/lib"), &mut |dir_ent| {
let path = dir_ent.path();
if let Some(ext) = path.extension() {
if ext == "enso" {
sources.push(read_source(path).unwrap())
}
}
})
.unwrap();
let parser = enso_parser::Parser::new();
b.bytes = sources.iter().map(|s| s.len() as u64).sum();
b.iter(|| {
for source in &sources {
test::black_box(parser.run(source));
}
});
}

View File

@ -94,6 +94,8 @@ public final class Parser implements AutoCloseable {
private static native long getMetadata(long state);
private static native String getWarningTemplate(int warningId);
static native long getUuidHigh(long metadata, long codeOffset, long codeLength);
static native long getUuidLow(long metadata, long codeOffset, long codeLength);
@ -131,6 +133,10 @@ public final class Parser implements AutoCloseable {
return Tree.deserialize(message);
}
public static String getWarningMessage(Warning warning) {
return getWarningTemplate(warning.getId());
}
@Override
public void close() {
freeState(state);

View File

@ -12,7 +12,7 @@ license-file = "../../LICENSE"
[dependencies]
enso-prelude = { path = "../../prelude" }
enso-parser = { path = "../" }
jni = "0.19.0"
jni = "0.21.0"
[lib]
name = "enso_parser"

View File

@ -14,6 +14,7 @@ use enso_prelude::*;
use jni::objects::JByteBuffer;
use jni::objects::JClass;
use jni::sys::jobject;
use jni::sys::jstring;
use jni::JNIEnv;
@ -37,18 +38,13 @@ static FAILED_SERIALIZE_AST: &str = "Failed to serialize AST to binary format.";
#[allow(unsafe_code)]
#[no_mangle]
pub extern "system" fn Java_org_enso_syntax2_Parser_parseTree(
env: JNIEnv,
mut env: JNIEnv,
_class: JClass,
state: u64,
input: JByteBuffer,
) -> jobject {
let state = unsafe { &mut *(state as usize as *mut State) };
let input = env.get_direct_buffer_address(input).expect(DIRECT_ALLOCATED);
let input = if cfg!(debug_assertions) {
std::str::from_utf8(input).unwrap()
} else {
unsafe { std::str::from_utf8_unchecked(input) }
};
let input = unsafe { decode_utf8_buffer(&env, &input) };
let mut code = input;
let mut meta = None;
if let Some((meta_, code_)) = enso_parser::metadata::parse(input) {
@ -70,8 +66,9 @@ pub extern "system" fn Java_org_enso_syntax2_Parser_parseTree(
}
};
state.metadata = meta;
let result = env.new_direct_byte_buffer(&mut state.output);
result.unwrap().into_inner()
let result =
unsafe { env.new_direct_byte_buffer(state.output.as_mut_ptr(), state.output.len()) };
result.unwrap().into_raw()
}
/// Parse the input. Returns a serialize format compatible with a lazy deserialization strategy. The
@ -86,24 +83,20 @@ pub extern "system" fn Java_org_enso_syntax2_Parser_parseTree(
#[allow(unsafe_code)]
#[no_mangle]
pub extern "system" fn Java_org_enso_syntax2_Parser_parseTreeLazy(
env: JNIEnv,
mut env: JNIEnv,
_class: JClass,
state: u64,
input: JByteBuffer,
) -> jobject {
let state = unsafe { &mut *(state as usize as *mut State) };
let input = env.get_direct_buffer_address(input).expect(DIRECT_ALLOCATED);
let input = if cfg!(debug_assertions) {
std::str::from_utf8(input).unwrap()
} else {
unsafe { std::str::from_utf8_unchecked(input) }
};
let input = unsafe { decode_utf8_buffer(&env, &input) };
let tree = enso_parser::Parser::new().run(input);
state.output = enso_parser::format::serialize(&tree).expect(FAILED_SERIALIZE_AST);
let result = env.new_direct_byte_buffer(&mut state.output);
result.unwrap().into_inner()
let result =
unsafe { env.new_direct_byte_buffer(state.output.as_mut_ptr(), state.output.len()) };
result.unwrap().into_raw()
}
/// Determine the token variant of the provided input.
@ -114,12 +107,7 @@ pub extern "system" fn Java_org_enso_syntax2_Parser_isIdentOrOperator(
_class: JClass,
input: JByteBuffer,
) -> u64 {
let input = env.get_direct_buffer_address(input).expect(DIRECT_ALLOCATED);
let input = if cfg!(debug_assertions) {
std::str::from_utf8(input).unwrap()
} else {
unsafe { std::str::from_utf8_unchecked(input) }
};
let input = unsafe { decode_utf8_buffer(&env, &input) };
let parsed = enso_parser::lexer::run(input);
if parsed.internal_error.is_some() {
@ -207,6 +195,19 @@ pub extern "system" fn Java_org_enso_syntax2_Parser_freeState(
}
}
/// Returns the string template corresponding to the given warning ID.
#[allow(unsafe_code)]
#[no_mangle]
pub extern "system" fn Java_org_enso_syntax2_Parser_getWarningTemplate(
env: JNIEnv,
_class: JClass,
id: u32,
) -> jstring {
let message =
enso_parser::syntax::WARNINGS.get(id as usize).copied().unwrap_or("Unknown warning ID");
env.new_string(message).unwrap().into_raw()
}
/// Return the high bits of the UUID associated with the specified node.
///
/// # Safety
@ -258,6 +259,29 @@ fn get_uuid(metadata: u64, code_offset: u64, code_length: u64) -> (u64, u64) {
}
}
/// # Safety
///
/// The input MUST be valid UTF-8.
#[allow(unsafe_code)]
unsafe fn decode_utf8_unchecked(input: &[u8]) -> &str {
if cfg!(debug_assertions) {
std::str::from_utf8(input).unwrap()
} else {
std::str::from_utf8_unchecked(input)
}
}
/// # Safety
///
/// The input buffer contents MUST be valid UTF-8.
#[allow(unsafe_code)]
unsafe fn decode_utf8_buffer<'a>(env: &JNIEnv, buffer: &'a JByteBuffer) -> &'a str {
let ptr = env.get_direct_buffer_address(buffer).expect(DIRECT_ALLOCATED);
let len = env.get_direct_buffer_capacity(buffer).expect(DIRECT_ALLOCATED);
let bytes = slice::from_raw_parts(ptr, len);
decode_utf8_unchecked(bytes)
}
// ====================