Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Remove token value #267

Merged
merged 5 commits into from
Oct 9, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion benchmark/benches/parser_benchmark.rs
Original file line number Diff line number Diff line change
Expand Up @@ -56,7 +56,7 @@ pub fn benchmark_parser(c: &mut Criterion) {
&source,
|b, source| {
b.iter(|| {
let mut parser = Parser::new(source, path);
let mut parser = Parser::new(source);
parser.parse().unwrap();

0
Expand Down
10 changes: 9 additions & 1 deletion compat/src/lexer_compat.rs
Original file line number Diff line number Diff line change
Expand Up @@ -430,7 +430,7 @@ fn check_tokens_match(
}

let python_token_value = python_token.value.clone();
let enderpy_token_value = enderpy_token.value.to_string();
let enderpy_token_value = enderpy_token.to_string(lexer.source);
// The Python tokenizer sets values in a number of places where Enderpy simply relies
// on kind to assume value. Handle those cases here.
let value_matches = matches_python_name_token(python_token.value.as_str(), &enderpy_token.kind)
Expand Down Expand Up @@ -507,6 +507,8 @@ fn matches_python_name_token(python_token_value: &str, token_kind: &Kind) -> boo
"while" => token_kind == &Kind::While,
"with" => token_kind == &Kind::With,
"yield" => token_kind == &Kind::Yield,
"match" => token_kind == &Kind::Match,
"type" => token_kind == &Kind::Type,
_ => token_kind == &Kind::Identifier,
}
}
Expand Down Expand Up @@ -903,6 +905,12 @@ print(a)
]);
}

// TODO: fstring middle offset is wrong in case of {{ or }}
#[test]
fn test_fstring_positions() {
python_tokenize_test_lexer(&["f\"{{{', '.join(dict_items)}}}\""]);
}

#[test]
#[should_panic]
fn test_lex_unterminated_string_double_quotes() {
Expand Down
2 changes: 1 addition & 1 deletion compat/src/parser_compat.rs
Original file line number Diff line number Diff line change
Expand Up @@ -110,7 +110,7 @@ fn remove_unimplemented_attributes(value: &mut Value) {
}

fn parse_enderpy_source(source: &str) -> Result<Value> {
let mut parser = Parser::new(source, "string");
let mut parser = Parser::new(source);
let typed_ast = parser.parse().into_diagnostic()?;
let ast = typed_ast.as_python_compat(&parser);
Ok(ast)
Expand Down
5 changes: 2 additions & 3 deletions enderpy/src/main.rs
Original file line number Diff line number Diff line change
Expand Up @@ -91,13 +91,12 @@ fn tokenize() -> Result<()> {
let (start_line_num, start_line_column, end_line_num, end_line_column) =
get_row_col_position(token.start, token.end, &lexer.line_starts);
println!(
"{}-{}, {}-{}: {} {} {} {}",
"{}-{}, {}-{}: {} {} {}",
start_line_num,
start_line_column,
end_line_num,
end_line_column,
token.kind,
token.value,
token.start,
token.end,
);
Expand All @@ -108,7 +107,7 @@ fn tokenize() -> Result<()> {
fn parse(file: &PathBuf) -> Result<()> {
let source = fs::read_to_string(file).into_diagnostic()?;
let file_path = file.to_str().unwrap_or("");
let mut parser = Parser::new(&source, file_path);
let mut parser = Parser::new(&source);
let ast = parser.parse();
println!("{:#?}", ast);
Ok(())
Expand Down
24 changes: 0 additions & 24 deletions parser/src/error.rs
Original file line number Diff line number Diff line change
@@ -1,8 +1,6 @@
use miette::Diagnostic;
use thiserror::Error;

use crate::parser::parser::Parser;

#[derive(Error, Diagnostic, Debug, Clone)]
pub enum ParsingError {
#[error("Invalid syntax")]
Expand All @@ -16,28 +14,6 @@ pub enum ParsingError {
},
}

impl From<Parser<'_>> for ParsingError {
fn from(err: Parser) -> Self {
let token = err.cur_token();
ParsingError::InvalidSyntax {
msg: token.value.to_string(),
advice: String::default(),
span: err.get_span_on_line(token.start, token.end),
}
}
}

impl From<&mut Parser<'_>> for ParsingError {
fn from(err: &mut Parser) -> Self {
let token = err.cur_token();
ParsingError::InvalidSyntax {
msg: token.value.to_string(),
advice: String::default(),
span: err.get_span_on_line(token.start, token.end),
}
}
}

#[derive(Error, Debug)]
pub enum LexError {
#[error("String not terminated")]
Expand Down
66 changes: 9 additions & 57 deletions parser/src/lexer/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@ use unicode_id_start::{is_id_continue, is_id_start};
use crate::{
error::LexError,
get_row_col_position,
token::{Kind, Token, TokenValue},
token::{Kind, Token},
};

#[derive(Debug, Clone, Copy, PartialEq)]
Expand Down Expand Up @@ -101,7 +101,6 @@ impl<'a> Lexer<'a> {
self.next_token_is_dedent -= 1;
return Token {
kind: Kind::Dedent,
value: TokenValue::None,
start: self.current,
end: self.current,
};
Expand Down Expand Up @@ -138,22 +137,12 @@ impl<'a> Lexer<'a> {
if kind != Kind::Comment && kind != Kind::NL && kind != Kind::Dedent {
self.non_logical_line_state = kind == Kind::NewLine;
}
let value = self.parse_token_value(kind, start);
let end = match kind {
Kind::FStringMiddle => start + value.as_str().expect("").len() as u32,
_ => self.current,
};

let end = self.current;
if kind == Kind::Dedent {
start = end
}

Token {
kind,
value,
start,
end,
}
Token { kind, start, end }
}

// peek_token is a side-effect free version of next_token
Expand Down Expand Up @@ -750,6 +739,8 @@ impl<'a> Lexer<'a> {
"while" => Kind::While,
"with" => Kind::With,
"yield" => Kind::Yield,
"match" => Kind::Match,
"type" => Kind::Type,
_ => Kind::Identifier,
}
}
Expand Down Expand Up @@ -1053,43 +1044,6 @@ impl<'a> Lexer<'a> {
}
}

fn parse_token_value(&mut self, kind: Kind, start: u32) -> TokenValue {
let kind_value = &self.source[start as usize..self.current as usize];
match kind {
Kind::Integer
| Kind::Hexadecimal
| Kind::Binary
| Kind::PointFloat
| Kind::Octal
| Kind::ExponentFloat
| Kind::ImaginaryInteger
| Kind::ImaginaryExponentFloat
| Kind::ImaginaryPointFloat => TokenValue::Number(kind_value.to_string()),
Kind::Identifier => match kind_value {
"type" => TokenValue::Type,
"match" => TokenValue::Match,
_ => TokenValue::Str(kind_value.to_string()),
},
Kind::StringLiteral
| Kind::FStringStart
| Kind::FStringEnd
| Kind::RawBytes
| Kind::RawFStringStart
| Kind::Bytes
| Kind::Unicode
| Kind::Comment => TokenValue::Str(kind_value.to_string()),
Kind::FStringMiddle => {
let value = kind_value.replace("{{", "{");
let value = value.replace("}}", "}");
TokenValue::Str(value)
}
Kind::Dedent => TokenValue::Indent(1),
Kind::Indent => TokenValue::Indent(1),
Kind::Error => TokenValue::Str(kind_value.to_string()),
_ => TokenValue::None,
}
}

fn f_string_quote_count(&mut self, str_start: char) -> u8 {
let mut count = 1;
if self.peek() == Some(str_start) && self.double_peek() == Some(str_start) {
Expand All @@ -1115,15 +1069,14 @@ mod tests {

fn snapshot_test_lexer_and_errors(test_case: &str) {
let mut lexer = Lexer::new(test_case);
let mut tokens = vec![];
let mut snapshot = String::from("");
loop {
let token = lexer.next_token();
if token.kind == Kind::Eof {
break;
}
snapshot += format!("{}\n", token).as_str();
tokens.push(token);
snapshot += token.display_token(test_case).as_str();
snapshot += "\n";
}
let mut settings = insta::Settings::clone_current();
settings.set_snapshot_path("../../test_data/output/");
Expand All @@ -1136,15 +1089,14 @@ mod tests {
fn snapshot_test_lexer(snap_name: &str, inputs: &[&str]) -> Result<(), LexError> {
for (i, test_input) in inputs.iter().enumerate() {
let mut lexer = Lexer::new(test_input);
let mut tokens = vec![];
let mut snapshot = String::from("");
loop {
let token = lexer.next_token();
if token.kind == Kind::Eof {
break;
}
snapshot += format!("{}\n", token).as_str();
tokens.push(token);
snapshot += token.display_token(test_input).as_str();
snapshot += "\n";
}
let mut settings = insta::Settings::clone_current();
settings.set_snapshot_suffix(format!("{snap_name}-{i}"));
Expand Down
15 changes: 4 additions & 11 deletions parser/src/parser/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -11,31 +11,24 @@ use crate::{
parser::ast::{Expression, JoinedStr},
};
pub fn is_at_compound_statement(token: &Token) -> bool {
let kind_is_statement = match token.kind {
match token.kind {
Kind::If
| Kind::While
| Kind::For
| Kind::Try
| Kind::With
| Kind::Def
| Kind::Class
| Kind::Type
| Kind::Match
// Decorator
| Kind::MatrixMul
| Kind::Async => true,
_ => false,
};
if kind_is_statement {
return true;
}

// Match is a soft keyword so it's an identifier token
if Kind::Identifier == token.kind && token.value.to_string() == "match" {
return true;
}

false
}

// TODO: performance
pub fn extract_string_inside(val: String) -> String {
let delimiters = vec!["\"\"\"", "\"", "'''", "'"];
let mut result = String::new();
Expand Down
Loading