Skip to content

Commit dcde8d1

Browse files
authored
chore: Upgrade to Winnow 0.7.0 (#79)
* chore: Upgrade to Winnow 0.6.26 * refactor: Resolve deprecations * refactor: Switch from Parser to ModalParser * chore: Upgrade to Winnow 0.7.0 * refactor: Remove use of ErrMode
1 parent 7142a12 commit dcde8d1

File tree

2 files changed

+41
-50
lines changed

2 files changed

+41
-50
lines changed

Cargo.toml

+1-1
Original file line numberDiff line numberDiff line change
@@ -14,7 +14,7 @@ categories = ["development-tools"]
1414

1515
[dependencies]
1616
unicode_categories = "0.1.1"
17-
winnow = { version = "0.6.23", features = ["simd"] }
17+
winnow = { version = "0.7.0", features = ["simd"] }
1818

1919
[dev-dependencies]
2020
criterion = "0.4"

src/tokenizer.rs

+40-49
Original file line numberDiff line numberDiff line change
@@ -3,13 +3,10 @@ use unicode_categories::UnicodeCategories;
33
use winnow::ascii::{digit0, digit1, till_line_ending, Caseless};
44
use winnow::combinator::{alt, dispatch, eof, fail, opt, peek, terminated};
55
use winnow::error::ContextError;
6-
use winnow::error::ErrMode;
7-
use winnow::error::ErrorKind;
8-
use winnow::error::ParserError as _;
6+
use winnow::error::ParserError;
97
use winnow::prelude::*;
10-
use winnow::stream::{ContainsToken as _, Stream as _};
118
use winnow::token::{any, one_of, rest, take, take_until, take_while};
12-
use winnow::PResult;
9+
use winnow::Result;
1310

1411
pub(crate) fn tokenize(mut input: &str, named_placeholders: bool) -> Vec<Token<'_>> {
1512
let mut tokens: Vec<Token> = Vec::new();
@@ -101,7 +98,7 @@ fn get_next_token<'a>(
10198
last_reserved_token: Option<Token<'a>>,
10299
last_reserved_top_level_token: Option<Token<'a>>,
103100
named_placeholders: bool,
104-
) -> PResult<Token<'a>> {
101+
) -> Result<Token<'a>> {
105102
alt((
106103
get_comment_token,
107104
get_string_token,
@@ -124,14 +121,14 @@ fn get_next_token<'a>(
124121
))
125122
.parse_next(input)
126123
}
127-
fn get_double_colon_token<'i>(input: &mut &'i str) -> PResult<Token<'i>> {
124+
fn get_double_colon_token<'i>(input: &mut &'i str) -> Result<Token<'i>> {
128125
"::".parse_next(input).map(|token| Token {
129126
kind: TokenKind::DoubleColon,
130127
value: token,
131128
key: None,
132129
})
133130
}
134-
fn get_whitespace_token<'i>(input: &mut &'i str) -> PResult<Token<'i>> {
131+
fn get_whitespace_token<'i>(input: &mut &'i str) -> Result<Token<'i>> {
135132
take_while(1.., char::is_whitespace)
136133
.parse_next(input)
137134
.map(|token| Token {
@@ -141,7 +138,7 @@ fn get_whitespace_token<'i>(input: &mut &'i str) -> PResult<Token<'i>> {
141138
})
142139
}
143140

144-
fn get_comment_token<'i>(input: &mut &'i str) -> PResult<Token<'i>> {
141+
fn get_comment_token<'i>(input: &mut &'i str) -> Result<Token<'i>> {
145142
dispatch! {any;
146143
'#' => till_line_ending.value(TokenKind::LineComment),
147144
'-' => ('-', till_line_ending).value(TokenKind::LineComment),
@@ -193,7 +190,7 @@ pub fn take_till_escaping<'a>(
193190
// 3. double quoted string using "" or \" to escape
194191
// 4. single quoted string using '' or \' to escape
195192
// 5. national character quoted string using N'' or N\' to escape
196-
fn get_string_token<'i>(input: &mut &'i str) -> PResult<Token<'i>> {
193+
fn get_string_token<'i>(input: &mut &'i str) -> Result<Token<'i>> {
197194
dispatch! {any;
198195
'`' => (take_till_escaping('`', &['`']), any).void(),
199196
'[' => (take_till_escaping(']', &[']']), any).void(),
@@ -213,7 +210,7 @@ fn get_string_token<'i>(input: &mut &'i str) -> PResult<Token<'i>> {
213210
}
214211

215212
// Like above but it doesn't replace double quotes
216-
fn get_placeholder_string_token<'i>(input: &mut &'i str) -> PResult<Token<'i>> {
213+
fn get_placeholder_string_token<'i>(input: &mut &'i str) -> Result<Token<'i>> {
217214
dispatch! {any;
218215
'`'=>( take_till_escaping('`', &['`']), any).void(),
219216
'['=>( take_till_escaping(']', &[']']), any).void(),
@@ -231,7 +228,7 @@ fn get_placeholder_string_token<'i>(input: &mut &'i str) -> PResult<Token<'i>> {
231228
})
232229
}
233230

234-
fn get_open_paren_token<'i>(input: &mut &'i str) -> PResult<Token<'i>> {
231+
fn get_open_paren_token<'i>(input: &mut &'i str) -> Result<Token<'i>> {
235232
alt(("(", terminated(Caseless("CASE"), end_of_word)))
236233
.parse_next(input)
237234
.map(|token| Token {
@@ -241,7 +238,7 @@ fn get_open_paren_token<'i>(input: &mut &'i str) -> PResult<Token<'i>> {
241238
})
242239
}
243240

244-
fn get_close_paren_token<'i>(input: &mut &'i str) -> PResult<Token<'i>> {
241+
fn get_close_paren_token<'i>(input: &mut &'i str) -> Result<Token<'i>> {
245242
alt((")", terminated(Caseless("END"), end_of_word)))
246243
.parse_next(input)
247244
.map(|token| Token {
@@ -251,7 +248,7 @@ fn get_close_paren_token<'i>(input: &mut &'i str) -> PResult<Token<'i>> {
251248
})
252249
}
253250

254-
fn get_placeholder_token<'i>(input: &mut &'i str, named_placeholders: bool) -> PResult<Token<'i>> {
251+
fn get_placeholder_token<'i>(input: &mut &'i str, named_placeholders: bool) -> Result<Token<'i>> {
255252
// The precedence changes based on 'named_placeholders' but not the exhaustiveness.
256253
// This is to ensure the formatting is the same even if parameters aren't used.
257254

@@ -272,7 +269,7 @@ fn get_placeholder_token<'i>(input: &mut &'i str, named_placeholders: bool) -> P
272269
}
273270
}
274271

275-
fn get_indexed_placeholder_token<'i>(input: &mut &'i str) -> PResult<Token<'i>> {
272+
fn get_indexed_placeholder_token<'i>(input: &mut &'i str) -> Result<Token<'i>> {
276273
alt(((one_of(('?', '$')), digit1).take(), "?"))
277274
.parse_next(input)
278275
.map(|token| Token {
@@ -294,7 +291,7 @@ fn get_indexed_placeholder_token<'i>(input: &mut &'i str) -> PResult<Token<'i>>
294291
})
295292
}
296293

297-
fn get_ident_named_placeholder_token<'i>(input: &mut &'i str) -> PResult<Token<'i>> {
294+
fn get_ident_named_placeholder_token<'i>(input: &mut &'i str) -> Result<Token<'i>> {
298295
(
299296
one_of(('@', ':', '$')),
300297
take_while(1.., |item: char| {
@@ -313,7 +310,7 @@ fn get_ident_named_placeholder_token<'i>(input: &mut &'i str) -> PResult<Token<'
313310
})
314311
}
315312

316-
fn get_string_named_placeholder_token<'i>(input: &mut &'i str) -> PResult<Token<'i>> {
313+
fn get_string_named_placeholder_token<'i>(input: &mut &'i str) -> Result<Token<'i>> {
317314
(one_of(('@', ':')), get_placeholder_string_token)
318315
.take()
319316
.parse_next(input)
@@ -332,7 +329,7 @@ fn get_escaped_placeholder_key<'a>(key: &'a str, quote_char: &str) -> Cow<'a, st
332329
Cow::Owned(key.replace(&format!("\\{}", quote_char), quote_char))
333330
}
334331

335-
fn get_number_token<'i>(input: &mut &'i str) -> PResult<Token<'i>> {
332+
fn get_number_token<'i>(input: &mut &'i str) -> Result<Token<'i>> {
336333
(opt("-"), alt((scientific_notation, decimal_number, digit1)))
337334
.take()
338335
.parse_next(input)
@@ -343,11 +340,11 @@ fn get_number_token<'i>(input: &mut &'i str) -> PResult<Token<'i>> {
343340
})
344341
}
345342

346-
fn decimal_number<'i>(input: &mut &'i str) -> PResult<&'i str> {
343+
fn decimal_number<'i>(input: &mut &'i str) -> Result<&'i str> {
347344
(digit1, ".", digit0).take().parse_next(input)
348345
}
349346

350-
fn scientific_notation<'i>(input: &mut &'i str) -> PResult<&'i str> {
347+
fn scientific_notation<'i>(input: &mut &'i str) -> Result<&'i str> {
351348
(
352349
alt((decimal_number, digit1)),
353350
"e",
@@ -363,17 +360,17 @@ fn get_reserved_word_token<'a>(
363360
previous_token: Option<Token<'a>>,
364361
last_reserved_token: Option<Token<'a>>,
365362
last_reserved_top_level_token: Option<Token<'a>>,
366-
) -> PResult<Token<'a>> {
363+
) -> Result<Token<'a>> {
367364
// A reserved word cannot be preceded by a "."
368365
// this makes it so in "my_table.from", "from" is not considered a reserved word
369366
if let Some(token) = previous_token {
370367
if token.value == "." {
371-
return Err(ErrMode::from_error_kind(input, ErrorKind::Slice));
368+
return Err(ParserError::from_input(input));
372369
}
373370
}
374371

375372
if !('a'..='z', 'A'..='Z', '$').contains_token(input.chars().next().unwrap_or('\0')) {
376-
return Err(ErrMode::from_error_kind(input, ErrorKind::Slice));
373+
return Err(ParserError::from_input(input));
377374
}
378375

379376
alt((
@@ -406,7 +403,7 @@ fn get_top_level_reserved_token<'a>(
406403
let first_char = peek(any).parse_next(input)?.to_ascii_uppercase();
407404

408405
// Match keywords based on their first letter
409-
let result: PResult<&str> = match first_char {
406+
let result: Result<&str> = match first_char {
410407
'A' => alt((
411408
terminated("ADD", end_of_word),
412409
terminated("AFTER", end_of_word),
@@ -462,10 +459,7 @@ fn get_top_level_reserved_token<'a>(
462459
'W' => terminated("WHERE", end_of_word).parse_next(&mut uc_input),
463460

464461
// If the first character doesn't match any of our keywords, fail early
465-
_ => Err(ErrMode::from_error_kind(
466-
&uc_input,
467-
winnow::error::ErrorKind::Tag,
468-
)),
462+
_ => Err(ParserError::from_input(&uc_input)),
469463
};
470464

471465
if let Ok(token) = result {
@@ -490,7 +484,7 @@ fn get_top_level_reserved_token<'a>(
490484
key: None,
491485
})
492486
} else {
493-
Err(ErrMode::from_error_kind(input, ErrorKind::Tag))
487+
Err(ParserError::from_input(input))
494488
}
495489
}
496490
}
@@ -554,7 +548,7 @@ fn get_newline_reserved_token<'a>(
554548
));
555549

556550
// Combine all parsers
557-
let result: PResult<&str> = alt((standard_joins, specific_joins, special_joins, operators))
551+
let result: Result<&str> = alt((standard_joins, specific_joins, special_joins, operators))
558552
.parse_next(&mut uc_input);
559553

560554
if let Ok(token) = result {
@@ -577,16 +571,16 @@ fn get_newline_reserved_token<'a>(
577571
key: None,
578572
})
579573
} else {
580-
Err(ErrMode::from_error_kind(input, ErrorKind::Alt))
574+
Err(ParserError::from_input(input))
581575
}
582576
}
583577
}
584578

585-
fn get_top_level_reserved_token_no_indent<'i>(input: &mut &'i str) -> PResult<Token<'i>> {
579+
fn get_top_level_reserved_token_no_indent<'i>(input: &mut &'i str) -> Result<Token<'i>> {
586580
let uc_input = get_uc_words(input, 2);
587581
let mut uc_input = uc_input.as_str();
588582

589-
let result: PResult<&str> = alt((
583+
let result: Result<&str> = alt((
590584
terminated("BEGIN", end_of_word),
591585
terminated("DECLARE", end_of_word),
592586
terminated("INTERSECT", end_of_word),
@@ -608,19 +602,19 @@ fn get_top_level_reserved_token_no_indent<'i>(input: &mut &'i str) -> PResult<To
608602
key: None,
609603
})
610604
} else {
611-
Err(ErrMode::from_error_kind(input, ErrorKind::Alt))
605+
Err(ParserError::from_input(input))
612606
}
613607
}
614-
fn get_plain_reserved_token<'i>(input: &mut &'i str) -> PResult<Token<'i>> {
608+
fn get_plain_reserved_token<'i>(input: &mut &'i str) -> Result<Token<'i>> {
615609
alt((get_plain_reserved_two_token, get_plain_reserved_one_token)).parse_next(input)
616610
}
617-
fn get_plain_reserved_one_token<'i>(input: &mut &'i str) -> PResult<Token<'i>> {
611+
fn get_plain_reserved_one_token<'i>(input: &mut &'i str) -> Result<Token<'i>> {
618612
let uc_input = get_uc_words(input, 1);
619613
let mut uc_input = uc_input.as_str();
620614

621615
let first_char = peek(any).parse_next(input)?.to_ascii_uppercase();
622616

623-
let result: PResult<&str> = match first_char {
617+
let result: Result<&str> = match first_char {
624618
'A' => alt((
625619
terminated("ACCESSIBLE", end_of_word),
626620
terminated("ACTION", end_of_word),
@@ -995,10 +989,7 @@ fn get_plain_reserved_one_token<'i>(input: &mut &'i str) -> PResult<Token<'i>> {
995989

996990
'Y' => alt((terminated("YEAR_MONTH", end_of_word),)).parse_next(&mut uc_input),
997991
// If the first character doesn't match any of our keywords, fail early
998-
_ => Err(ErrMode::from_error_kind(
999-
&uc_input,
1000-
winnow::error::ErrorKind::Tag,
1001-
)),
992+
_ => Err(ParserError::from_input(&uc_input)),
1002993
};
1003994
if let Ok(token) = result {
1004995
let input_end_pos = token.len();
@@ -1009,14 +1000,14 @@ fn get_plain_reserved_one_token<'i>(input: &mut &'i str) -> PResult<Token<'i>> {
10091000
key: None,
10101001
})
10111002
} else {
1012-
Err(ErrMode::from_error_kind(input, ErrorKind::Alt))
1003+
Err(ParserError::from_input(input))
10131004
}
10141005
}
10151006

1016-
fn get_plain_reserved_two_token<'i>(input: &mut &'i str) -> PResult<Token<'i>> {
1007+
fn get_plain_reserved_two_token<'i>(input: &mut &'i str) -> Result<Token<'i>> {
10171008
let uc_input = get_uc_words(input, 2);
10181009
let mut uc_input = uc_input.as_str();
1019-
let result: PResult<&str> = alt((
1010+
let result: Result<&str> = alt((
10201011
terminated("CHARACTER SET", end_of_word),
10211012
terminated("ON DELETE", end_of_word),
10221013
terminated("ON UPDATE", end_of_word),
@@ -1032,11 +1023,11 @@ fn get_plain_reserved_two_token<'i>(input: &mut &'i str) -> PResult<Token<'i>> {
10321023
key: None,
10331024
})
10341025
} else {
1035-
Err(ErrMode::from_error_kind(input, ErrorKind::Alt))
1026+
Err(ParserError::from_input(input))
10361027
}
10371028
}
10381029

1039-
fn get_word_token<'i>(input: &mut &'i str) -> PResult<Token<'i>> {
1030+
fn get_word_token<'i>(input: &mut &'i str) -> Result<Token<'i>> {
10401031
take_while(1.., is_word_character)
10411032
.parse_next(input)
10421033
.map(|token| Token {
@@ -1046,7 +1037,7 @@ fn get_word_token<'i>(input: &mut &'i str) -> PResult<Token<'i>> {
10461037
})
10471038
}
10481039

1049-
fn get_operator_token<'i>(input: &mut &'i str) -> PResult<Token<'i>> {
1040+
fn get_operator_token<'i>(input: &mut &'i str) -> Result<Token<'i>> {
10501041
// Define the allowed operator characters
10511042
let allowed_operators = (
10521043
'!', '<', '>', '=', '|', ':', '-', '~', '*', '&', '@', '^', '?', '#', '/', '%',
@@ -1060,7 +1051,7 @@ fn get_operator_token<'i>(input: &mut &'i str) -> PResult<Token<'i>> {
10601051
})
10611052
.parse_next(input)
10621053
}
1063-
fn get_any_other_char<'i>(input: &mut &'i str) -> PResult<Token<'i>> {
1054+
fn get_any_other_char<'i>(input: &mut &'i str) -> Result<Token<'i>> {
10641055
one_of(|token| token != '\n' && token != '\r')
10651056
.take()
10661057
.parse_next(input)
@@ -1071,7 +1062,7 @@ fn get_any_other_char<'i>(input: &mut &'i str) -> PResult<Token<'i>> {
10711062
})
10721063
}
10731064

1074-
fn end_of_word<'i>(input: &mut &'i str) -> PResult<&'i str> {
1065+
fn end_of_word<'i>(input: &mut &'i str) -> Result<&'i str> {
10751066
peek(alt((
10761067
eof,
10771068
one_of(|val: char| !is_word_character(val)).take(),

0 commit comments

Comments
 (0)