diff --git a/Cargo.lock b/Cargo.lock index 65d0ad8c..30dd7662 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -326,6 +326,7 @@ dependencies = [ "assert_matches", "cached", "criterion", + "imstr", "indenter", "peg", "pprof", @@ -1269,6 +1270,15 @@ dependencies = [ "winapi-util", ] +[[package]] +name = "imstr" +version = "0.2.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7d3441deb04ea9c6b472f313be54d585428cd6a68cdb8dcf40cf43744bec78fe" +dependencies = [ + "peg-runtime", +] + [[package]] name = "indent" version = "0.1.1" diff --git a/brush-core/benches/shell.rs b/brush-core/benches/shell.rs index c515e695..f18a093e 100644 --- a/brush-core/benches/shell.rs +++ b/brush-core/benches/shell.rs @@ -30,7 +30,7 @@ mod unix { } async fn eval_arithmetic_expr(shell: &mut brush_core::Shell, expr: &str) { - let parsed_expr = brush_parser::arithmetic::parse(expr).unwrap(); + let parsed_expr = brush_parser::arithmetic::parse(&expr.into()).unwrap(); let _ = shell.eval_arithmetic(parsed_expr).await.unwrap(); } @@ -107,7 +107,7 @@ mod unix { shell.funcs.update( String::from("testfunc"), Arc::new(brush_parser::ast::FunctionDefinition { - fname: String::from("testfunc"), + fname: "testfunc".into(), body: brush_parser::ast::FunctionBody( brush_parser::ast::CompoundCommand::BraceGroup( brush_parser::ast::BraceGroupCommand(brush_parser::ast::CompoundList( @@ -116,7 +116,7 @@ mod unix { ), None, ), - source: String::from("/some/path"), + source: "/some/path".into(), }), ); c.bench_function("function_call", |b| { diff --git a/brush-core/src/arithmetic.rs b/brush-core/src/arithmetic.rs index 4fd975a5..7decd38d 100644 --- a/brush-core/src/arithmetic.rs +++ b/brush-core/src/arithmetic.rs @@ -60,7 +60,7 @@ impl ExpandAndEvaluate for ast::UnexpandedArithmeticExpr { .map_err(|_e| EvalError::FailedToExpandExpression)?; // Now parse. - let expr = brush_parser::arithmetic::parse(&expanded_self) + let expr = brush_parser::arithmetic::parse(&expanded_self.clone().into()) .map_err(|_e| EvalError::ParseError(expanded_self))?; // Trace if applicable. diff --git a/brush-core/src/builtins/complete.rs b/brush-core/src/builtins/complete.rs index a38169bc..fb1358b2 100644 --- a/brush-core/src/builtins/complete.rs +++ b/brush-core/src/builtins/complete.rs @@ -480,7 +480,7 @@ impl builtins::Command for CompGenCommand { command_name: None, token_index: 0, tokens: &[&brush_parser::Token::Word( - token_to_complete.to_owned(), + token_to_complete.to_owned().into(), brush_parser::TokenLocation::default(), )], input_line: token_to_complete, diff --git a/brush-core/src/builtins/declare.rs b/brush-core/src/builtins/declare.rs index 15153381..11f024f7 100644 --- a/brush-core/src/builtins/declare.rs +++ b/brush-core/src/builtins/declare.rs @@ -339,7 +339,7 @@ impl DeclareCommand { commands::CommandArg::Assignment(assignment) => { match &assignment.name { brush_parser::ast::AssignmentName::VariableName(var_name) => { - name = var_name.to_owned(); + name = var_name.clone().into_std_string(); assigned_index = None; } brush_parser::ast::AssignmentName::ArrayElementName(var_name, index) => { @@ -350,8 +350,8 @@ impl DeclareCommand { return Err(error::Error::AssigningListToArrayMember); } - name = var_name.to_owned(); - assigned_index = Some(index.to_owned()); + name = var_name.clone().into_std_string(); + assigned_index = Some(index.clone().into_std_string()); } } @@ -360,11 +360,12 @@ impl DeclareCommand { if let Some(index) = &assigned_index { initial_value = Some(ShellValueLiteral::Array(ArrayLiteral(vec![( Some(index.to_owned()), - s.value.clone(), + s.value.to_string(), )]))); name_is_array = true; } else { - initial_value = Some(ShellValueLiteral::Scalar(s.value.clone())); + initial_value = + Some(ShellValueLiteral::Scalar(s.value.clone().into_std_string())); name_is_array = false; } } @@ -372,7 +373,10 @@ impl DeclareCommand { initial_value = Some(ShellValueLiteral::Array(ArrayLiteral( a.iter() .map(|(i, v)| { - (i.as_ref().map(|w| w.value.clone()), v.value.clone()) + ( + i.as_ref().map(|w| w.value.clone().into_std_string()), + v.value.clone().into_std_string(), + ) }) .collect(), ))); diff --git a/brush-core/src/builtins/export.rs b/brush-core/src/builtins/export.rs index 19541f4a..7aa4f173 100644 --- a/brush-core/src/builtins/export.rs +++ b/brush-core/src/builtins/export.rs @@ -63,13 +63,16 @@ impl builtins::Command for ExportCommand { let value = match &assignment.value { brush_parser::ast::AssignmentValue::Scalar(s) => { - variables::ShellValueLiteral::Scalar(s.flatten()) + variables::ShellValueLiteral::Scalar(s.flatten().into_std_string()) } brush_parser::ast::AssignmentValue::Array(a) => { variables::ShellValueLiteral::Array(variables::ArrayLiteral( a.iter() .map(|(k, v)| { - (k.as_ref().map(|k| k.flatten()), v.flatten()) + ( + k.as_ref().map(|k| k.flatten().into_std_string()), + v.flatten().into_std_string(), + ) }) .collect(), )) @@ -78,7 +81,7 @@ impl builtins::Command for ExportCommand { // Update the variable with the provided value and then mark it exported. context.shell.env.update_or_add( - name, + name.as_str(), value, |var| { var.export(); diff --git a/brush-core/src/builtins/let_.rs b/brush-core/src/builtins/let_.rs index 9f2cb4ae..e6ff3aef 100644 --- a/brush-core/src/builtins/let_.rs +++ b/brush-core/src/builtins/let_.rs @@ -24,7 +24,7 @@ impl builtins::Command for LetCommand { } for expr in &self.exprs { - let parsed = brush_parser::arithmetic::parse(expr.as_str())?; + let parsed = brush_parser::arithmetic::parse(&expr.to_owned().into())?; let evaluated = parsed.eval(context.shell).await?; if evaluated == 0 { diff --git a/brush-core/src/builtins/unset.rs b/brush-core/src/builtins/unset.rs index 3582e60c..96182005 100644 --- a/brush-core/src/builtins/unset.rs +++ b/brush-core/src/builtins/unset.rs @@ -50,8 +50,10 @@ impl builtins::Command for UnsetCommand { for name in &self.names { if unspecified || self.name_interpretation.shell_variables { - let parameter = - brush_parser::word::parse_parameter(name, &context.shell.parser_options())?; + let parameter = brush_parser::word::parse_parameter( + &name.to_owned().into(), + &context.shell.parser_options(), + )?; let result = match parameter { brush_parser::word::Parameter::Positional(_) => continue, @@ -61,7 +63,7 @@ impl builtins::Command for UnsetCommand { } brush_parser::word::Parameter::NamedWithIndex { name, index } => { // First evaluate the index expression. - let index_as_expr = brush_parser::arithmetic::parse(index.as_str())?; + let index_as_expr = brush_parser::arithmetic::parse(&index)?; let evaluated_index = context.shell.eval_arithmetic(index_as_expr).await?; context diff --git a/brush-core/src/completion.rs b/brush-core/src/completion.rs index 1b497ddb..ce09dc50 100644 --- a/brush-core/src/completion.rs +++ b/brush-core/src/completion.rs @@ -927,8 +927,10 @@ impl Config { // If the position is after the last token, then we need to insert an empty // token for the new token to be generated. - let empty_token = - brush_parser::Token::Word(String::new(), brush_parser::TokenLocation::default()); + let empty_token = brush_parser::Token::Word( + brush_parser::TokenString::new(), + brush_parser::TokenLocation::default(), + ); if completion_token_index == tokens.len() { adjusted_tokens.push(&empty_token); } @@ -978,7 +980,7 @@ impl Config { fn tokenize_input_for_completion(shell: &mut Shell, input: &str) -> Vec { // Best-effort tokenization. if let Ok(tokens) = brush_parser::tokenize_str_with_options( - input, + &input.to_owned().into(), &(shell.parser_options().tokenizer_options()), ) { return tokens; @@ -1157,7 +1159,7 @@ fn simple_tokenize_by_delimiters(input: &str, delimiters: &[char]) -> Vec Result<(), error::Error> { let parser_options = shell.parser_options(); let mut expander = WordExpander::new(shell); - let parameter = brush_parser::word::parse_parameter(name, &parser_options)?; + let parameter = brush_parser::word::parse_parameter(&name.to_owned().into(), &parser_options)?; expander.assign_to_parameter(¶meter, value).await } @@ -379,7 +378,7 @@ impl<'a> WordExpander<'a> { #[allow(clippy::ref_option)] async fn basic_expand_opt_pattern( &mut self, - word: &Option, + word: &Option, ) -> Result, error::Error> { if let Some(word) = word { let pattern = self @@ -461,7 +460,7 @@ impl<'a> WordExpander<'a> { // Expand: tildes, parameters, command substitutions, arithmetic. let mut expansions = vec![]; - for piece in brush_parser::word::parse(brace_expanded.as_str(), &self.parser_options)? { + for piece in brush_parser::word::parse(&brace_expanded.into(), &self.parser_options)? { let piece_expansion = self.expand_word_piece(piece.piece).await?; expansions.push(piece_expansion); } @@ -481,7 +480,8 @@ impl<'a> WordExpander<'a> { return Ok(vec![word.into()]); } - let parse_result = brush_parser::word::parse_brace_expansions(word, &self.parser_options); + let parse_result = + brush_parser::word::parse_brace_expansions(&word.into(), &self.parser_options); if parse_result.is_err() { tracing::error!("failed to parse for brace expansion: {parse_result:?}"); return Ok(vec![word.into()]); @@ -597,10 +597,10 @@ impl<'a> WordExpander<'a> { ) -> Result { let expansion: Expansion = match word_piece { brush_parser::word::WordPiece::Text(s) => { - Expansion::from(ExpansionPiece::Splittable(s)) + Expansion::from(ExpansionPiece::Splittable(s.into_std_string())) } brush_parser::word::WordPiece::SingleQuotedText(s) => { - Expansion::from(ExpansionPiece::Unsplittable(s)) + Expansion::from(ExpansionPiece::Unsplittable(s.into_std_string())) } brush_parser::word::WordPiece::AnsiCQuotedText(s) => { let (expanded, _) = escape::expand_backslash_escapes( @@ -689,8 +689,11 @@ impl<'a> WordExpander<'a> { } brush_parser::word::WordPiece::BackquotedCommandSubstitution(s) | brush_parser::word::WordPiece::CommandSubstitution(s) => { - let output_str = - commands::invoke_command_in_subshell_and_get_output(self.shell, s).await?; + let output_str = commands::invoke_command_in_subshell_and_get_output( + self.shell, + s.into_std_string(), + ) + .await?; // We trim trailing newlines, per spec. let trimmed = output_str.trim_end_matches('\n'); @@ -1055,7 +1058,7 @@ impl<'a> WordExpander<'a> { .set_case_insensitive(self.shell.options.case_insensitive_conditionals); // If no replacement was provided, then we replace with an empty string. - let replacement = replacement.unwrap_or(String::new()); + let replacement = replacement.unwrap_or(WordString::new()); let expanded_replacement = self.basic_expand_to_str(&replacement).await?; let regex = expanded_pattern.to_regex( @@ -1159,7 +1162,7 @@ impl<'a> WordExpander<'a> { if let Some(index) = index { self.shell.env.update_or_add_array_element( - variable_name, + variable_name.as_str(), index, value, |_| Ok(()), @@ -1168,7 +1171,7 @@ impl<'a> WordExpander<'a> { ) } else { self.shell.env.update_or_add( - variable_name, + variable_name.as_str(), variables::ShellValueLiteral::Scalar(value), |_| Ok(()), env::EnvironmentLookup::Anywhere, @@ -1188,7 +1191,7 @@ impl<'a> WordExpander<'a> { let expansion = self.expand_parameter(parameter, true).await?; let parameter_str: String = expansion.into(); let inner_parameter = - brush_parser::word::parse_parameter(parameter_str.as_str(), &self.parser_options)?; + brush_parser::word::parse_parameter(¶meter_str.into(), &self.parser_options)?; Ok(self.try_resolve_parameter_to_variable_without_indirect(&inner_parameter)) } } @@ -1200,14 +1203,17 @@ impl<'a> WordExpander<'a> { let (name, index) = match parameter { brush_parser::word::Parameter::Positional(_) | brush_parser::word::Parameter::Special(_) => (None, None), - brush_parser::word::Parameter::Named(name) => (Some(name.to_owned()), Some("0".into())), - brush_parser::word::Parameter::NamedWithIndex { name, index } => { - (Some(name.to_owned()), Some(index.to_owned())) + brush_parser::word::Parameter::Named(name) => { + (Some(name.clone().into_std_string()), Some("0".into())) } + brush_parser::word::Parameter::NamedWithIndex { name, index } => ( + Some(name.clone().into_std_string()), + Some(index.clone().into_std_string()), + ), brush_parser::word::Parameter::NamedWithAllIndices { name, concatenate: _concatenate, - } => (Some(name.to_owned()), None), + } => (Some(name.clone().into_std_string()), None), }; let var = name @@ -1227,8 +1233,10 @@ impl<'a> WordExpander<'a> { Ok(expansion) } else { let parameter_str: String = expansion.into(); - let inner_parameter = - brush_parser::word::parse_parameter(parameter_str.as_str(), &self.parser_options)?; + let inner_parameter = brush_parser::word::parse_parameter( + ¶meter_str.clone().into(), + &self.parser_options, + )?; self.expand_parameter_without_indirect(&inner_parameter) .await @@ -1325,7 +1333,7 @@ impl<'a> WordExpander<'a> { self.basic_expand_to_str(index).await? } else { let index_expr = ast::UnexpandedArithmeticExpr { - value: index.to_owned(), + value: index.into(), }; self.expand_arithmetic_expr(index_expr).await? }; @@ -1637,7 +1645,7 @@ fn generate_and_combine_brace_expansions( ) -> Vec { let expansions: Vec> = pieces .into_iter() - .map(|piece| piece.generate().collect()) + .map(|piece| piece.generate().map(|x| x.into_std_string()).collect()) .collect(); expansions diff --git a/brush-core/src/interp.rs b/brush-core/src/interp.rs index 5be6a3e2..5bae07b3 100644 --- a/brush-core/src/interp.rs +++ b/brush-core/src/interp.rs @@ -535,7 +535,7 @@ impl Execute for ast::ForClauseCommand { // Update the variable. shell.env.update_or_add( - &self.variable_name, + self.variable_name.as_str(), ShellValueLiteral::Scalar(value), |_| Ok(()), EnvironmentLookup::Anywhere, @@ -798,7 +798,7 @@ impl Execute for ast::FunctionDefinition { ) -> Result { shell .funcs - .update(self.fname.clone(), Arc::new(self.clone())); + .update(self.fname.as_str().to_owned(), Arc::new(self.clone())); let result = ExecutionResult::success(); shell.last_exit_status = result.exit_code; @@ -1063,14 +1063,14 @@ async fn basic_expand_assignment_name( match name { ast::AssignmentName::VariableName(name) => { let expanded = expansion::basic_expand_str(shell, name).await?; - Ok(ast::AssignmentName::VariableName(expanded)) + Ok(ast::AssignmentName::VariableName(expanded.into())) } ast::AssignmentName::ArrayElementName(name, index) => { let expanded_name = expansion::basic_expand_str(shell, name).await?; let expanded_index = expansion::basic_expand_str(shell, index).await?; Ok(ast::AssignmentName::ArrayElementName( - expanded_name, - expanded_index, + expanded_name.into(), + expanded_index.into(), )) } } @@ -1083,9 +1083,7 @@ async fn expand_assignment_value( let expanded = match value { ast::AssignmentValue::Scalar(s) => { let expanded_word = expansion::basic_expand_word(shell, s).await?; - ast::AssignmentValue::Scalar(ast::Word { - value: expanded_word, - }) + ast::AssignmentValue::Scalar(expanded_word.into()) } ast::AssignmentValue::Array(arr) => { let mut expanded_values = vec![]; @@ -1184,10 +1182,12 @@ async fn apply_assignment( if will_be_indexed_array { array_index = Some( - ast::UnexpandedArithmeticExpr { value: idx.clone() } - .eval(shell, false) - .await? - .to_string(), + ast::UnexpandedArithmeticExpr { + value: idx.to_owned().into(), + } + .eval(shell, false) + .await? + .to_string(), ); } } @@ -1241,7 +1241,9 @@ async fn apply_assignment( new_var.export(); } - shell.env.add(variable_name, new_var, creation_scope) + shell + .env + .add(variable_name.as_str(), new_var, creation_scope) } fn setup_pipeline_redirection( @@ -1438,7 +1440,7 @@ pub(crate) async fn setup_redirect( let io_here_doc = if io_here.requires_expansion { expansion::basic_expand_word(shell, &io_here.doc).await? } else { - io_here.doc.flatten() + io_here.doc.flatten().into_std_string() }; let f = setup_open_file_with_contents(io_here_doc.as_str())?; diff --git a/brush-core/src/patterns.rs b/brush-core/src/patterns.rs index 5c1a9de8..8c08b1a7 100644 --- a/brush-core/src/patterns.rs +++ b/brush-core/src/patterns.rs @@ -370,10 +370,10 @@ fn pattern_to_regex_str( pattern: &str, enable_extended_globbing: bool, ) -> Result { - Ok(brush_parser::pattern::pattern_to_regex_str( - pattern, - enable_extended_globbing, - )?) + Ok( + brush_parser::pattern::pattern_to_regex_str(&pattern.into(), enable_extended_globbing)? + .into_std_string(), + ) } /// Removes the largest matching prefix from a string that matches the given pattern. diff --git a/brush-core/src/prompt.rs b/brush-core/src/prompt.rs index ba5053aa..92943064 100644 --- a/brush-core/src/prompt.rs +++ b/brush-core/src/prompt.rs @@ -26,7 +26,7 @@ pub(crate) fn expand_prompt(shell: &Shell, spec: String) -> Result Result, brush_parser::WordParseError> { - brush_parser::prompt::parse(spec.as_str()) + brush_parser::prompt::parse(&spec.into()) } pub(crate) fn format_prompt_piece( @@ -34,7 +34,7 @@ pub(crate) fn format_prompt_piece( piece: brush_parser::prompt::PromptPiece, ) -> Result { let formatted = match piece { - brush_parser::prompt::PromptPiece::Literal(l) => l, + brush_parser::prompt::PromptPiece::Literal(l) => l.into_std_string(), brush_parser::prompt::PromptPiece::AsciiCharacter(c) => { char::from_u32(c).map_or_else(String::new, |c| c.to_string()) } @@ -209,7 +209,7 @@ mod tests { assert_eq!( format_date( &dt, - &brush_parser::prompt::PromptDateFormat::Custom(String::from("%Y-%m-%d")) + &brush_parser::prompt::PromptDateFormat::Custom("%Y-%m-%d".into()) ), "2024-12-25" ); @@ -217,9 +217,7 @@ mod tests { assert_eq!( format_date( &dt, - &brush_parser::prompt::PromptDateFormat::Custom(String::from( - "%Y-%m-%d %H:%M:%S.%f" - )) + &brush_parser::prompt::PromptDateFormat::Custom("%Y-%m-%d %H:%M:%S.%f".into()) ), "2024-12-25 12:34:56.789000000" ); diff --git a/brush-core/src/shell.rs b/brush-core/src/shell.rs index 75e4659f..7cf6054e 100644 --- a/brush-core/src/shell.rs +++ b/brush-core/src/shell.rs @@ -443,7 +443,7 @@ impl Shell { } let source_info = brush_parser::SourceInfo { - source: path.to_string_lossy().to_string(), + source: path.to_string_lossy().into(), }; self.source_file(opened_file, &source_info, args, params) @@ -473,7 +473,7 @@ impl Shell { let parse_result = parser.parse(); let mut other_positional_parameters = args.iter().map(|s| s.as_ref().to_owned()).collect(); - let mut other_shell_name = Some(source_info.source.clone()); + let mut other_shell_name = Some(source_info.source.clone().into_std_string()); // TODO: Find a cleaner way to change args. std::mem::swap(&mut self.shell_name, &mut other_shell_name); @@ -483,7 +483,7 @@ impl Shell { ); self.script_call_stack - .push_front(source_info.source.clone()); + .push_front(source_info.source.clone().into_std_string()); self.update_bash_source_var()?; let result = self @@ -565,7 +565,7 @@ impl Shell { let parse_result = self.parse_string(command); let source_info = brush_parser::SourceInfo { - source: String::from("main"), + source: "main".into(), }; self.run_parsed_result(parse_result, &source_info, params) .await @@ -901,7 +901,7 @@ impl Shell { } else { self.function_call_stack .iter() - .map(|s| (None, s.function_definition.source.clone())) + .map(|s| (None, s.function_definition.source.clone().into_std_string())) .collect::>() }; @@ -1234,7 +1234,7 @@ fn parse_string_impl( ) -> Result { let mut reader = std::io::BufReader::new(s.as_bytes()); let source_info = brush_parser::SourceInfo { - source: String::from("main"), + source: "main".into(), }; let mut parser: brush_parser::Parser<&mut std::io::BufReader<&[u8]>> = brush_parser::Parser::new(&mut reader, &parser_options, &source_info); diff --git a/brush-interactive/src/reedline/highlighter.rs b/brush-interactive/src/reedline/highlighter.rs index 7f381873..42b12cc0 100644 --- a/brush-interactive/src/reedline/highlighter.rs +++ b/brush-interactive/src/reedline/highlighter.rs @@ -122,7 +122,7 @@ impl<'a> StyledInputLine<'a> { fn style_and_append_program(&mut self, line: &str, global_offset: usize) { #[allow(clippy::cast_sign_loss)] if let Ok(tokens) = brush_parser::tokenize_str_with_options( - line, + &line.to_owned().into(), &(self.shell.parser_options().tokenizer_options()), ) { let mut saw_command_token = false; @@ -137,7 +137,7 @@ impl<'a> StyledInputLine<'a> { } brush_parser::Token::Word(w, token_location) => { if let Ok(word_pieces) = - brush_parser::word::parse(w.as_str(), &self.shell.parser_options()) + brush_parser::word::parse(&w, &self.shell.parser_options()) { let default_text_style = self.get_style_for_word( w.as_str(), diff --git a/brush-parser/Cargo.toml b/brush-parser/Cargo.toml index d7aeea19..0b447a64 100644 --- a/brush-parser/Cargo.toml +++ b/brush-parser/Cargo.toml @@ -20,6 +20,7 @@ fuzz-testing = ["dep:arbitrary"] [dependencies] arbitrary = { version = "1.4.1", optional = true, features = ["derive"] } cached = "0.54.0" +imstr = { version = "0.2.0", features = ["peg"] } indenter = "0.3.3" peg = "0.8.4" thiserror = "2.0.7" diff --git a/brush-parser/benches/parser.rs b/brush-parser/benches/parser.rs index f7f90eb4..d1c0b520 100644 --- a/brush-parser/benches/parser.rs +++ b/brush-parser/benches/parser.rs @@ -1,8 +1,15 @@ #[cfg(unix)] mod unix { - use brush_parser::{parse_tokens, tokenize_str}; + use brush_parser::parse_tokens; use criterion::{black_box, Criterion}; + fn tokenize_str(input: &str) -> Result, brush_parser::TokenizerError> { + brush_parser::tokenize_str_with_options( + &input.into(), + &brush_parser::TokenizerOptions::default(), + ) + } + fn parse_script(contents: &str) -> brush_parser::ast::Program { let tokens = tokenize_str(contents).unwrap(); parse_tokens( diff --git a/brush-parser/src/arithmetic.rs b/brush-parser/src/arithmetic.rs index 7ea61daa..5fc27cda 100644 --- a/brush-parser/src/arithmetic.rs +++ b/brush-parser/src/arithmetic.rs @@ -3,23 +3,26 @@ use crate::ast; use crate::error; +/// Alias for string used by arithmetic parser. +pub type ArithmeticString = imstr::ImString; + /// Parses a shell arithmetic expression. /// /// # Arguments /// /// * `input` - The arithmetic expression to parse, in string form. -pub fn parse(input: &str) -> Result { - cacheable_parse(input.to_owned()) +pub fn parse(input: &ArithmeticString) -> Result { + cacheable_parse(input.clone()) } #[cached::proc_macro::cached(size = 64, result = true)] -fn cacheable_parse(input: String) -> Result { +fn cacheable_parse(input: ArithmeticString) -> Result { tracing::debug!(target: "arithmetic", "parsing arithmetic expression: '{input}'"); - arithmetic::full_expression(input.as_str()).map_err(error::WordParseError::ArithmeticExpression) + arithmetic::full_expression(&input).map_err(error::WordParseError::ArithmeticExpression) } peg::parser! { - grammar arithmetic() for str { + grammar arithmetic() for ArithmeticString { pub(crate) rule full_expression() -> ast::ArithmeticExpr = ![_] { ast::ArithmeticExpr::Literal(0) } / _ e:expression() _ { e } @@ -89,21 +92,21 @@ peg::parser! { rule lvalue() -> ast::ArithmeticTarget = name:variable_name() "[" index:expression() "]" { - ast::ArithmeticTarget::ArrayElement(name.to_owned(), Box::new(index)) + ast::ArithmeticTarget::ArrayElement(name, Box::new(index)) } / name:variable_name() { - ast::ArithmeticTarget::Variable(name.to_owned()) + ast::ArithmeticTarget::Variable(name) } - rule variable_name() -> &'input str = + rule variable_name() -> ArithmeticString = $(['a'..='z' | 'A'..='Z' | '_'](['a'..='z' | 'A'..='Z' | '_' | '0'..='9']*)) rule _() -> () = quiet!{[' ' | '\t' | '\n' | '\r']*} {} rule literal_number() -> i64 = // TODO: handle explicit radix (e.g., #) for bases 2 through 64 - "0" ['x' | 'X'] s:$(['0'..='9' | 'a'..='f' | 'A'..='F']*) {? i64::from_str_radix(s, 16).or(Err("i64")) } / - s:$("0" ['0'..='8']*) {? i64::from_str_radix(s, 8).or(Err("i64")) } / + "0" ['x' | 'X'] s:$(['0'..='9' | 'a'..='f' | 'A'..='F']*) {? i64::from_str_radix(s.as_str(), 16).or(Err("i64")) } / + s:$("0" ['0'..='8']*) {? i64::from_str_radix(s.as_str(), 8).or(Err("i64")) } / s:$(['1'..='9'] ['0'..='9']*) {? s.parse().or(Err("i64")) } } } diff --git a/brush-parser/src/ast.rs b/brush-parser/src/ast.rs index cdc40c02..4824dd98 100644 --- a/brush-parser/src/ast.rs +++ b/brush-parser/src/ast.rs @@ -3,10 +3,13 @@ use std::fmt::{Display, Write}; -use crate::tokenizer; +use crate::{tokenizer, word::WordString}; const DISPLAY_INDENT: &str = " "; +/// Alias for the string used in AST types. +pub type AstString = imstr::ImString; + /// Represents a complete shell program. #[derive(Clone, Debug)] #[cfg_attr(feature = "fuzz-testing", derive(arbitrary::Arbitrary))] @@ -247,13 +250,21 @@ impl Display for SubshellCommand { #[cfg_attr(test, derive(PartialEq, Eq))] pub struct ForClauseCommand { /// The name of the iterator variable. - pub variable_name: String, + #[cfg_attr(feature = "fuzz-testing", arbitrary(with = arbitrary_str))] + pub variable_name: AstString, /// The values being iterated over. pub values: Option>, /// The command to run for each iteration of the loop. pub body: DoGroupCommand, } +#[cfg(feature = "fuzz-testing")] +pub(crate) fn arbitrary_str(u: &mut arbitrary::Unstructured) -> arbitrary::Result { + use arbitrary::Arbitrary; + let s = String::arbitrary(u)?; + Ok(s.into()) +} + impl Display for ForClauseCommand { fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { write!(f, "for {} in ", self.variable_name)?; @@ -517,11 +528,13 @@ impl Display for WhileOrUntilClauseCommand { #[cfg_attr(test, derive(PartialEq, Eq))] pub struct FunctionDefinition { /// The name of the function. - pub fname: String, + #[cfg_attr(feature = "fuzz-testing", arbitrary(with = arbitrary_str))] + pub fname: AstString, /// The body of the function. pub body: FunctionBody, /// The source of the function definition. - pub source: String, + #[cfg_attr(feature = "fuzz-testing", arbitrary(with = arbitrary_str))] + pub source: AstString, } impl Display for FunctionDefinition { @@ -743,9 +756,12 @@ impl Display for Assignment { #[cfg_attr(test, derive(PartialEq, Eq))] pub enum AssignmentName { /// A named variable. - VariableName(String), + VariableName(#[cfg_attr(feature = "fuzz-testing", arbitrary(with = arbitrary_str))] AstString), /// An element in a named array. - ArrayElementName(String, String), + ArrayElementName( + #[cfg_attr(feature = "fuzz-testing", arbitrary(with = arbitrary_str))] AstString, + #[cfg_attr(feature = "fuzz-testing", arbitrary(with = arbitrary_str))] AstString, + ), } impl Display for AssignmentName { @@ -956,7 +972,7 @@ pub enum TestExpr { /// Always evaluates to false. False, /// A literal string. - Literal(String), + Literal(AstString), /// Logical AND operation on two nested expressions. And(Box, Box), /// Logical OR operation on two nested expressions. @@ -966,9 +982,9 @@ pub enum TestExpr { /// A parenthesized expression. Parenthesized(Box), /// A unary test operation. - UnaryTest(UnaryPredicate, String), + UnaryTest(UnaryPredicate, AstString), /// A binary test operation. - BinaryTest(BinaryPredicate, String, String), + BinaryTest(BinaryPredicate, AstString, AstString), } impl Display for TestExpr { @@ -1184,7 +1200,8 @@ impl Display for BinaryPredicate { #[cfg_attr(test, derive(PartialEq, Eq))] pub struct Word { /// Raw text of the word. - pub value: String, + #[cfg_attr(feature = "fuzz-testing", arbitrary(with = arbitrary_str))] + pub value: AstString, } impl Display for Word { @@ -1206,22 +1223,22 @@ impl From<&tokenizer::Token> for Word { } } +// TODO(IMSTR): remove this impl From for Word { fn from(s: String) -> Word { - Word { value: s } + WordString::from(s).into() } } -impl Word { - /// Constructs a new `Word` from a given string. - pub fn new(s: &str) -> Self { - Self { - value: s.to_owned(), - } +impl From for Word { + fn from(s: WordString) -> Word { + Word { value: s } } +} - /// Returns the raw text of the word, consuming the `Word`. - pub fn flatten(&self) -> String { +impl Word { + /// Returns the raw text of the word without consuming the word. + pub fn flatten(&self) -> AstString { self.value.clone() } } @@ -1232,7 +1249,8 @@ impl Word { #[cfg_attr(test, derive(PartialEq, Eq))] pub struct UnexpandedArithmeticExpr { /// The raw text of the expression. - pub value: String, + #[cfg_attr(feature = "fuzz-testing", arbitrary(with = arbitrary_str))] + pub value: AstString, } impl Display for UnexpandedArithmeticExpr { @@ -1473,9 +1491,12 @@ impl Display for UnaryAssignmentOperator { #[cfg_attr(feature = "fuzz-testing", derive(arbitrary::Arbitrary))] pub enum ArithmeticTarget { /// A named variable. - Variable(String), + Variable(#[cfg_attr(feature = "fuzz-testing", arbitrary(with = arbitrary_str))] AstString), /// An element in an array. - ArrayElement(String, Box), + ArrayElement( + #[cfg_attr(feature = "fuzz-testing", arbitrary(with = arbitrary_str))] AstString, + Box, + ), } impl Display for ArithmeticTarget { diff --git a/brush-parser/src/error.rs b/brush-parser/src/error.rs index fba17c23..51b4d90d 100644 --- a/brush-parser/src/error.rs +++ b/brush-parser/src/error.rs @@ -1,4 +1,5 @@ use crate::tokenizer; +use crate::word::WordString; use crate::Token; /// Represents an error that occurred while parsing tokens. @@ -39,11 +40,11 @@ pub enum WordParseError { /// An error occurred while parsing a parameter. #[error("failed to parse parameter '{0}'")] - Parameter(String, peg::error::ParseError), + Parameter(WordString, peg::error::ParseError), /// An error occurred while parsing for brace expansion. #[error("failed to parse for brace expansion: '{0}'")] - BraceExpansion(String, peg::error::ParseError), + BraceExpansion(WordString, peg::error::ParseError), /// An error occurred while parsing a word. #[error("failed to parse word '{0}'")] diff --git a/brush-parser/src/lib.rs b/brush-parser/src/lib.rs index 3f902291..67af1d9c 100644 --- a/brush-parser/src/lib.rs +++ b/brush-parser/src/lib.rs @@ -16,5 +16,6 @@ mod tokenizer; pub use error::{ParseError, TestCommandParseError, WordParseError}; pub use parser::{parse_tokens, Parser, ParserOptions, SourceInfo}; pub use tokenizer::{ - tokenize_str, tokenize_str_with_options, unquote_str, SourcePosition, Token, TokenLocation, + tokenize_str_with_options, unquote_str, SourcePosition, Token, TokenLocation, TokenString, + TokenizerError, TokenizerOptions, }; diff --git a/brush-parser/src/parser.rs b/brush-parser/src/parser.rs index f2be1e2a..15f26452 100644 --- a/brush-parser/src/parser.rs +++ b/brush-parser/src/parser.rs @@ -1,6 +1,7 @@ use crate::ast::{self, SeparatorOperator}; -use crate::error; use crate::tokenizer::{Token, TokenEndReason, Tokenizer, TokenizerOptions, Tokens}; +use crate::word::WordString; +use crate::{error, tokenizer}; /// Options used to control the behavior of the parser. #[derive(Clone, Eq, Hash, PartialEq)] @@ -167,10 +168,10 @@ impl<'a> peg::ParseElem<'a> for Tokens<'a> { } impl<'a> peg::ParseSlice<'a> for Tokens<'a> { - type Slice = String; + type Slice = tokenizer::TokenString; fn parse_slice(&'a self, start: usize, end: usize) -> Self::Slice { - let mut result = String::new(); + let mut result = tokenizer::TokenString::new(); let mut last_token_was_word = false; for token in &self.tokens[start..end] { @@ -199,7 +200,7 @@ impl<'a> peg::ParseSlice<'a> for Tokens<'a> { #[derive(Clone, Default)] pub struct SourceInfo { /// The source of the tokens. - pub source: String, + pub source: tokenizer::TokenString, } peg::parser! { @@ -330,10 +331,10 @@ peg::parser! { rule for_clause() -> ast::ForClauseCommand = specific_word("for") n:name() linebreak() _in() w:wordlist()? sequential_sep() d:do_group() { - ast::ForClauseCommand { variable_name: n.to_owned(), values: w, body: d } + ast::ForClauseCommand { variable_name: n.clone(), values: w, body: d } } / specific_word("for") n:name() sequential_sep()? d:do_group() { - ast::ForClauseCommand { variable_name: n.to_owned(), values: None, body: d } + ast::ForClauseCommand { variable_name: n.clone(), values: None, body: d } } // N.B. The arithmetic for loop is a non-sh extension. @@ -434,8 +435,8 @@ peg::parser! { regex_word_piece() / !specific_operator(")") !specific_operator("]]") [_] - rule name() -> &'input str = - w:[Token::Word(_, _)] { w.to_str() } + rule name() -> &'input tokenizer::TokenString = + [Token::Word(s, _)] { s } rule _in() -> () = specific_word("in") { } @@ -530,21 +531,21 @@ peg::parser! { // N.B. Non-sh extensions allows use of the 'function' word to indicate a function definition. rule function_definition() -> ast::FunctionDefinition = specific_word("function")? fname:fname() specific_operator("(") specific_operator(")") linebreak() body:function_body() { - ast::FunctionDefinition { fname: fname.to_owned(), body, source: source_info.source.clone() } + ast::FunctionDefinition { fname: fname.clone(), body, source: source_info.source.clone() } } / specific_word("function") fname:fname() linebreak() body:function_body() { - ast::FunctionDefinition { fname: fname.to_owned(), body, source: source_info.source.clone() } + ast::FunctionDefinition { fname: fname.clone(), body, source: source_info.source.clone() } } / expected!("function definition") rule function_body() -> ast::FunctionBody = c:compound_command() r:redirect_list()? { ast::FunctionBody(c, r) } - rule fname() -> &'input str = + rule fname() -> &'input tokenizer::TokenString = // Special-case: don't allow it to end with an equals sign, to avoid the challenge of // misinterpreting certain declaration assignments as function definitions. // TODO: Find a way to make this still work without requiring this targeted exception. - w:[Token::Word(word, _) if !word.ends_with('=')] { w.to_str() } + w:[Token::Word(word, _) if !word.ends_with('=')] { word } rule brace_group() -> ast::BraceGroupCommand = specific_word("{") c:compound_list() specific_word("}") { ast::BraceGroupCommand(c) } @@ -735,9 +736,9 @@ peg::parser! { pub(crate) rule assignment_word() -> (ast::Assignment, ast::Word) = non_posix_extensions_enabled() [Token::Word(w, _)] specific_operator("(") elements:array_elements() specific_operator(")") {? - let parsed = parse_array_assignment(w.as_str(), elements.as_slice())?; + let parsed = parse_array_assignment(w, elements.as_slice())?; - let mut all_as_word = w.to_owned(); + let mut all_as_word = w.clone(); all_as_word.push('('); for (i, e) in elements.iter().enumerate() { if i > 0 { @@ -750,14 +751,14 @@ peg::parser! { Ok((parsed, ast::Word { value: all_as_word })) } / [Token::Word(w, _)] {? - let parsed = parse_assignment_word(w.as_str())?; - Ok((parsed, ast::Word { value: w.to_owned() })) + let parsed = parse_assignment_word(w)?; + Ok((parsed, ast::Word { value: w.clone() })) } - rule array_elements() -> Vec<&'input String> = + rule array_elements() -> Vec<&'input tokenizer::TokenString> = e:array_element()* - rule array_element() -> &'input String = + rule array_element() -> &'input tokenizer::TokenString = linebreak() [Token::Word(e, _)] linebreak() { e } // N.B. An I/O number must be a string of only digits, and it must be @@ -783,7 +784,7 @@ peg::parser! { } peg::parser! { - grammar assignments() for str { + grammar assignments() for crate::word::WordString { pub(crate) rule name_and_scalar_value() -> ast::Assignment = nae:name_and_equals() value:scalar_value() { let (name, append) = nae; @@ -795,30 +796,28 @@ peg::parser! { (name, append.is_some()) } - pub(crate) rule literal_array_element() -> (Option, String) = + pub(crate) rule literal_array_element() -> (Option, WordString) = "[" inner:$((!"]" [_])*) "]=" value:$([_]*) { - (Some(inner.to_owned()), value.to_owned()) + (Some(inner), value) } / - value:$([_]+) { - (None, value.to_owned()) - } + value:$([_]+) { (None, value) } rule name() -> ast::AssignmentName = aen:array_element_name() { let (name, index) = aen; - ast::AssignmentName::ArrayElementName(name.to_owned(), index.to_owned()) + ast::AssignmentName::ArrayElementName(name, index) } / name:scalar_name() { - ast::AssignmentName::VariableName(name.to_owned()) + ast::AssignmentName::VariableName(name) } - rule array_element_name() -> (&'input str, &'input str) = + rule array_element_name() -> (WordString, WordString) = name:scalar_name() "[" ai:array_index() "]" { (name, ai) } - rule array_index() -> &'input str = + rule array_index() -> WordString = $((![']'] [_])*) - rule scalar_name() -> &'input str = + rule scalar_name() -> WordString = $(alpha_or_underscore() non_first_variable_char()*) rule non_first_variable_char() -> () = @@ -828,11 +827,11 @@ peg::parser! { ['_' | 'a'..='z' | 'A'..='Z'] {} rule scalar_value() -> ast::AssignmentValue = - v:$([_]*) { ast::AssignmentValue::Scalar(ast::Word { value: v.to_owned() }) } + value:$([_]*) { ast::AssignmentValue::Scalar(ast::Word { value }) } } } -fn parse_assignment_word(word: &str) -> Result { +fn parse_assignment_word(word: &tokenizer::TokenString) -> Result { let parse_result = assignments::name_and_scalar_value(word); parse_result.map_err(|_| "not assignment word") } @@ -872,8 +871,8 @@ fn add_pipe_extension_redirection(c: &mut ast::Command) -> Result<(), &'static s } fn parse_array_assignment( - word: &str, - elements: &[&String], + word: &tokenizer::TokenString, + elements: &[&tokenizer::TokenString], ) -> Result { let (assignment_name, append) = assignments::name_and_equals(word).map_err(|_| "not array assignment word")?; @@ -886,12 +885,7 @@ fn parse_array_assignment( let elements_as_words = elements .into_iter() - .map(|(key, value)| { - ( - key.map(|k| ast::Word::new(k.as_str())), - ast::Word::new(value.as_str()), - ) - }) + .map(|(key, value)| (key.map(ast::Word::from), ast::Word::from(value))) .collect(); Ok(ast::Assignment { @@ -909,6 +903,10 @@ mod tests { use anyhow::Result; use assert_matches::assert_matches; + fn str_to_word(s: &str) -> ast::Word { + ast::Word { value: s.into() } + } + #[test] fn parse_case() -> Result<()> { let input = r"\ @@ -1042,16 +1040,20 @@ for f in A B C; do seq: vec![Command::Compound( CompoundCommand::ForClause(ForClauseCommand { variable_name: "f".into(), - values: Some(vec![Word::new("A"), Word::new("B"), Word::new("C")]), + values: Some(vec![ + str_to_word("A"), + str_to_word("B"), + str_to_word("C"), + ]), body: DoGroupCommand(CompoundList(vec![CompoundListItem( AndOrList { first: Pipeline { bang: false, seq: vec![Command::Simple(SimpleCommand { prefix: None, - word_or_name: Some(Word::new("echo")), + word_or_name: Some(str_to_word("echo")), suffix: Some(CommandSuffix(vec![ - CommandPrefixOrSuffixItem::Word(Word::new( + CommandPrefixOrSuffixItem::Word(str_to_word( r#""${f@L}""#, )), CommandPrefixOrSuffixItem::IoRedirect( diff --git a/brush-parser/src/pattern.rs b/brush-parser/src/pattern.rs index cfce4934..0d2bde55 100644 --- a/brush-parser/src/pattern.rs +++ b/brush-parser/src/pattern.rs @@ -1,6 +1,6 @@ //! Implements parsing for shell glob and extglob patterns. -use crate::error; +use crate::{error, word::WordString}; /// Represents the kind of an extended glob. pub enum ExtendedGlobKind { @@ -23,74 +23,73 @@ pub enum ExtendedGlobKind { /// * `pattern` - The shell pattern to convert. /// * `enable_extended_globbing` - Whether to enable extended globbing (extglob). pub fn pattern_to_regex_str( - pattern: &str, + pattern: &WordString, enable_extended_globbing: bool, -) -> Result { - let regex_str = pattern_to_regex_translator::pattern(pattern, enable_extended_globbing) - .map_err(error::WordParseError::Pattern)?; - Ok(regex_str) +) -> Result { + pattern_to_regex_translator::pattern(pattern, enable_extended_globbing) + .map_err(error::WordParseError::Pattern) } peg::parser! { - grammar pattern_to_regex_translator(enable_extended_globbing: bool) for str { - pub(crate) rule pattern() -> String = + grammar pattern_to_regex_translator(enable_extended_globbing: bool) for WordString { + pub(crate) rule pattern() -> WordString = pieces:(pattern_piece()*) { - pieces.join("") + pieces.join("").into() } - rule pattern_piece() -> String = + rule pattern_piece() -> WordString = escape_sequence() / bracket_expression() / extglob_enabled() s:extended_glob_pattern() { s } / wildcard() / [c if regex_char_needs_escaping(c)] { - let mut s = '\\'.to_string(); + let mut s: WordString = '\\'.into(); s.push(c); s } / - [c] { c.to_string() } + [c] { c.into() } - rule escape_sequence() -> String = - sequence:$(['\\'] [c if regex_char_needs_escaping(c)]) { sequence.to_owned() } / - ['\\'] [c] { c.to_string() } + rule escape_sequence() -> WordString = + sequence:$(['\\'] [c if regex_char_needs_escaping(c)]) { sequence } / + ['\\'] [c] { c.into() } - rule bracket_expression() -> String = + rule bracket_expression() -> WordString = "[" invert:(("!")?) members:bracket_member()+ "]" { let mut members = members; if invert.is_some() { - members.insert(0, String::from("^")); + members.insert(0, WordString::from("^")); } - std::format!("[{}]", members.join("")) + std::format!("[{}]", members.join("")).into() } - rule bracket_member() -> String = + rule bracket_member() -> WordString = char_class_expression() / char_range() / char_list() - rule char_class_expression() -> String = - e:$("[:" char_class() ":]") { e.to_owned() } + rule char_class_expression() -> WordString = + e:$("[:" char_class() ":]") { e } rule char_class() = "alnum" / "alpha" / "blank" / "cntrl" / "digit" / "graph" / "lower" / "print" / "punct" / "space" / "upper"/ "xdigit" - rule char_range() -> String = - range:$([_] "-" [_]) { range.to_owned() } + rule char_range() -> WordString = + range:$([_] "-" [_]) { range } - rule char_list() -> String = - chars:$([c if c != ']']+) { escape_char_class_char_list(chars) } + rule char_list() -> WordString = + chars:$([c if c != ']']+) { escape_char_class_char_list(&chars) } - rule wildcard() -> String = - "?" { String::from(".") } / - "*" { String::from(".*") } + rule wildcard() -> WordString = + "?" { WordString::from(".") } / + "*" { WordString::from(".*") } rule extglob_enabled() -> () = &[_] {? if enable_extended_globbing { Ok(()) } else { Err("extglob disabled") } } - pub(crate) rule extended_glob_pattern() -> String = + pub(crate) rule extended_glob_pattern() -> WordString = kind:extended_glob_prefix() "(" branches:extended_glob_body() ")" { - let mut s = String::new(); + let mut s = WordString::new(); s.push('('); @@ -123,15 +122,15 @@ peg::parser! { "?" { ExtendedGlobKind::Question } / "*" { ExtendedGlobKind::Star } - pub(crate) rule extended_glob_body() -> Vec = + pub(crate) rule extended_glob_body() -> Vec = first_branches:((b:extended_glob_branch() "|" { b })*) last_branch:extended_glob_branch() { let mut branches = first_branches; branches.push(last_branch); branches } - rule extended_glob_branch() -> String = - pieces:(!['|' | ')'] piece:pattern_piece() { piece })* { pieces.join("") } + rule extended_glob_branch() -> WordString = + pieces:(!['|' | ')'] piece:pattern_piece() { piece })* { pieces.join("").into() } } } @@ -147,8 +146,8 @@ pub fn regex_char_needs_escaping(c: char) -> bool { ) } -fn escape_char_class_char_list(s: &str) -> String { - s.replace('[', r"\[") +fn escape_char_class_char_list(s: &WordString) -> WordString { + s.replace('[', r"\[").into() } #[cfg(test)] @@ -158,30 +157,39 @@ mod tests { #[test] fn test_bracket_exprs() -> Result<()> { - assert_eq!(pattern_to_regex_str("[a-z]", true)?, "[a-z]"); - assert_eq!(pattern_to_regex_str("[abc]", true)?, "[abc]"); - assert_eq!(pattern_to_regex_str(r"[\(]", true)?, r"[\(]"); - assert_eq!(pattern_to_regex_str(r"[(]", true)?, "[(]"); - assert_eq!(pattern_to_regex_str("[[:digit:]]", true)?, "[[:digit:]]"); - assert_eq!(pattern_to_regex_str(r"[-(),!]*", true)?, r"[-(),!].*"); - assert_eq!(pattern_to_regex_str(r"[-\(\),\!]*", true)?, r"[-\(\),\!].*"); + assert_eq!(pattern_to_regex_str(&"[a-z]".into(), true)?, "[a-z]"); + assert_eq!(pattern_to_regex_str(&"[abc]".into(), true)?, "[abc]"); + assert_eq!(pattern_to_regex_str(&r"[\(]".into(), true)?, r"[\(]"); + assert_eq!(pattern_to_regex_str(&r"[(]".into(), true)?, "[(]"); + assert_eq!( + pattern_to_regex_str(&"[[:digit:]]".into(), true)?, + "[[:digit:]]" + ); + assert_eq!( + pattern_to_regex_str(&r"[-(),!]*".into(), true)?, + r"[-(),!].*" + ); + assert_eq!( + pattern_to_regex_str(&r"[-\(\),\!]*".into(), true)?, + r"[-\(\),\!].*" + ); Ok(()) } #[test] fn test_extended_glob() -> Result<()> { assert_eq!( - pattern_to_regex_translator::extended_glob_pattern("@(a|b)", true)?, + pattern_to_regex_translator::extended_glob_pattern(&"@(a|b)".into(), true)?, "(a|b)" ); assert_eq!( - pattern_to_regex_translator::extended_glob_body("ab|ac", true)?, + pattern_to_regex_translator::extended_glob_body(&"ab|ac".into(), true)?, vec!["ab", "ac"], ); assert_eq!( - pattern_to_regex_translator::extended_glob_pattern("*(ab|ac)", true)?, + pattern_to_regex_translator::extended_glob_pattern(&"*(ab|ac)".into(), true)?, "(ab|ac)*" ); diff --git a/brush-parser/src/prompt.rs b/brush-parser/src/prompt.rs index 9190b17d..7d626673 100644 --- a/brush-parser/src/prompt.rs +++ b/brush-parser/src/prompt.rs @@ -1,6 +1,6 @@ //! Parser for shell prompt syntax (e.g., `PS1`). -use crate::error; +use crate::{error, word::WordString}; /// A piece of a prompt string. #[derive(Clone)] @@ -40,7 +40,7 @@ pub enum PromptPiece { only_up_to_first_dot: bool, }, /// A literal string. - Literal(String), + Literal(WordString), /// A newline character. Newline, /// The number of actively managed jobs. @@ -65,7 +65,7 @@ pub enum PromptDateFormat { /// A format including weekday, month, and date. WeekdayMonthDate, /// A customer string format. - Custom(String), + Custom(WordString), } /// Format for a time in a prompt. @@ -80,7 +80,7 @@ pub enum PromptTimeFormat { } peg::parser! { - grammar prompt_parser() for str { + grammar prompt_parser() for WordString { pub(crate) rule prompt() -> Vec = pieces:prompt_piece()* @@ -120,13 +120,13 @@ peg::parser! { "\\]" { PromptPiece::EndNonPrintingSequence } rule literal_sequence() -> PromptPiece = - s:$((!special_sequence() [c])+) { PromptPiece::Literal(s.to_owned()) } + s:$((!special_sequence() [c])+) { PromptPiece::Literal(s) } - rule date_format() -> String = - s:$([c if c != '}']*) { s.to_owned() } + rule date_format() -> WordString = + s:$([c if c != '}']*) { s } rule octal_number() -> u32 = - s:$(['0'..='9']*<3,3>) {? u32::from_str_radix(s, 8).or(Err("invalid octal number")) } + s:$(['0'..='9']*<3,3>) {? u32::from_str_radix(s.as_str(), 8).or(Err("invalid octal number")) } } } @@ -135,7 +135,6 @@ peg::parser! { /// # Arguments /// /// * `s` - The prompt string to parse. -pub fn parse(s: &str) -> Result, error::WordParseError> { - let result = prompt_parser::prompt(s).map_err(error::WordParseError::Prompt)?; - Ok(result) +pub fn parse(s: &WordString) -> Result, error::WordParseError> { + prompt_parser::prompt(s).map_err(error::WordParseError::Prompt) } diff --git a/brush-parser/src/test_command.rs b/brush-parser/src/test_command.rs index 82df7dd6..0770b8ec 100644 --- a/brush-parser/src/test_command.rs +++ b/brush-parser/src/test_command.rs @@ -1,5 +1,9 @@ //! Parser for shell test commands. +// +// TODO(IMSTR): figure out how to convert this over. +// + use crate::{ast, error}; /// Parses a test command expression. @@ -27,17 +31,17 @@ peg::parser! { expression() rule one_arg_expr() -> ast::TestExpr = - [s] { ast::TestExpr::Literal(s.to_owned()) } + [s] { ast::TestExpr::Literal(s.into()) } rule two_arg_expr() -> ast::TestExpr = ["!"] e:one_arg_expr() { ast::TestExpr::Not(Box::from(e)) } / - op:unary_op() [s] { ast::TestExpr::UnaryTest(op, s.to_owned()) } / + op:unary_op() [s] { ast::TestExpr::UnaryTest(op, s.into()) } / [_] [_] { ast::TestExpr::False } rule three_arg_expr() -> ast::TestExpr = - [left] ["-a"] [right] { ast::TestExpr::And(Box::from(ast::TestExpr::Literal(left.to_owned())), Box::from(ast::TestExpr::Literal(right.to_owned()))) } / - [left] ["-o"] [right] { ast::TestExpr::Or(Box::from(ast::TestExpr::Literal(left.to_owned())), Box::from(ast::TestExpr::Literal(right.to_owned()))) } / - [left] op:binary_op() [right] { ast::TestExpr::BinaryTest(op, left.to_owned(), right.to_owned()) } / + [left] ["-a"] [right] { ast::TestExpr::And(Box::from(ast::TestExpr::Literal(left.into())), Box::from(ast::TestExpr::Literal(right.into()))) } / + [left] ["-o"] [right] { ast::TestExpr::Or(Box::from(ast::TestExpr::Literal(left.into())), Box::from(ast::TestExpr::Literal(right.into()))) } / + [left] op:binary_op() [right] { ast::TestExpr::BinaryTest(op, left.into(), right.into()) } / ["!"] e:two_arg_expr() { ast::TestExpr::Not(Box::from(e)) } / ["("] e:one_arg_expr() [")"] { e } / [_] [_] [_] { ast::TestExpr::False } @@ -53,11 +57,11 @@ peg::parser! { -- ["!"] e:@ { ast::TestExpr::Not(Box::from(e)) } -- - [left] op:binary_op() [right] { ast::TestExpr::BinaryTest(op, left.to_owned(), right.to_owned()) } + [left] op:binary_op() [right] { ast::TestExpr::BinaryTest(op, left.into(), right.into()) } -- - op:unary_op() [operand] { ast::TestExpr::UnaryTest(op, operand.to_owned()) } + op:unary_op() [operand] { ast::TestExpr::UnaryTest(op, operand.into()) } -- - [s] { ast::TestExpr::Literal(s.to_owned()) } + [s] { ast::TestExpr::Literal(s.into()) } } rule unary_op() -> ast::UnaryPredicate = diff --git a/brush-parser/src/tokenizer.rs b/brush-parser/src/tokenizer.rs index a62c1e75..73394f36 100644 --- a/brush-parser/src/tokenizer.rs +++ b/brush-parser/src/tokenizer.rs @@ -45,6 +45,9 @@ impl Display for SourcePosition { } } +/// Alias for the string type used in tokens. +pub type TokenString = imstr::ImString; + /// Represents the location of a token in its source shell script. #[derive(Clone, Default, Debug)] #[cfg_attr(feature = "fuzz-testing", derive(arbitrary::Arbitrary))] @@ -60,9 +63,17 @@ pub struct TokenLocation { #[cfg_attr(feature = "fuzz-testing", derive(arbitrary::Arbitrary))] pub enum Token { /// An operator token. - Operator(String, TokenLocation), + Operator( + #[cfg_attr(feature = "fuzz-testing", arbitrary(with = crate::ast::arbitrary_str))] + TokenString, + TokenLocation, + ), /// A word token. - Word(String, TokenLocation), + Word( + #[cfg_attr(feature = "fuzz-testing", arbitrary(with = crate::ast::arbitrary_str))] + TokenString, + TokenLocation, + ), } impl Token { @@ -134,7 +145,7 @@ pub enum TokenizerError { /// The indicated I/O here tag was missing. #[error("missing here tag '{0}'")] - MissingHereTag(String), + MissingHereTag(TokenString), /// An unterminated here document sequence was encountered at the end of the input stream. #[error("unterminated here document sequence; tag(s) [{0}] found at: [{1}]")] @@ -146,6 +157,7 @@ pub enum TokenizerError { } impl TokenizerError { + /// Returns whether or not the error indicates that the tokenization process is incomplete. pub fn is_incomplete(&self) -> bool { matches!( self, @@ -252,7 +264,7 @@ pub(crate) struct Tokenizer<'a, R: ?Sized + std::io::BufRead> { #[derive(Clone, Debug)] struct TokenParseState { pub start_position: SourcePosition, - pub token_so_far: String, + pub token_so_far: TokenString, pub token_is_operator: bool, pub in_escape: bool, pub quote_mode: QuoteMode, @@ -262,7 +274,7 @@ impl TokenParseState { pub fn new(start_position: &SourcePosition) -> Self { TokenParseState { start_position: start_position.clone(), - token_so_far: String::new(), + token_so_far: TokenString::new(), token_is_operator: false, in_escape: false, quote_mode: QuoteMode::None, @@ -304,7 +316,7 @@ impl TokenParseState { !self.in_escape && matches!(self.quote_mode, QuoteMode::None) } - pub fn current_token(&self) -> &str { + pub fn current_token(&self) -> &TokenString { &self.token_so_far } @@ -320,7 +332,7 @@ impl TokenParseState { self.token_so_far == "\n" } - fn replace_with_here_doc(&mut self, s: String) { + fn replace_with_here_doc(&mut self, s: TokenString) { self.token_so_far = s; } @@ -361,9 +373,7 @@ impl TokenParseState { operator_token_result, } => { if self.is_newline() { - return Err(TokenizerError::MissingHereTag( - self.current_token().to_owned(), - )); + return Err(TokenizerError::MissingHereTag(self.current_token().clone())); } cross_token_state.here_state = HereState::NextLineIsHereDoc; @@ -469,8 +479,9 @@ impl TokenParseState { /// # Arguments /// /// * `input` - The shell script to tokenize. +#[cfg(test)] pub fn tokenize_str(input: &str) -> Result, TokenizerError> { - tokenize_str_with_options(input, &TokenizerOptions::default()) + tokenize_str_with_options(&input.into(), &TokenizerOptions::default()) } /// Break the given input shell script string into tokens, returning the tokens. @@ -480,15 +491,15 @@ pub fn tokenize_str(input: &str) -> Result, TokenizerError> { /// * `input` - The shell script to tokenize. /// * `options` - Options controlling how the tokenizer operates. pub fn tokenize_str_with_options( - input: &str, + input: &TokenString, options: &TokenizerOptions, ) -> Result, TokenizerError> { - cacheable_tokenize_str(input.to_owned(), options.to_owned()) + cacheable_tokenize_str(input.clone(), options.to_owned()) } #[cached::proc_macro::cached(size = 64, result = true)] fn cacheable_tokenize_str( - input: String, + input: TokenString, options: TokenizerOptions, ) -> Result, TokenizerError> { let mut reader = std::io::BufReader::new(input.as_bytes()); @@ -676,9 +687,7 @@ impl<'a, R: ?Sized + std::io::BufRead> Tokenizer<'a, R> { if current_token_without_here_tag.is_empty() || current_token_without_here_tag.ends_with('\n') { - state.replace_with_here_doc( - current_token_without_here_tag.to_owned(), - ); + state.replace_with_here_doc(current_token_without_here_tag.into()); // Delimit the end of the here-document body. result = state.delimit_current_token( @@ -695,7 +704,7 @@ impl<'a, R: ?Sized + std::io::BufRead> Tokenizer<'a, R> { // must be a separate token (because it wouldn't make a prefix of an operator). // - let mut hypothetical_token = state.current_token().to_owned(); + let mut hypothetical_token = state.current_token().clone(); hypothetical_token.push(c); if state.unquoted() && self.is_operator(hypothetical_token.as_ref()) { diff --git a/brush-parser/src/word.rs b/brush-parser/src/word.rs index 2828bc1d..a7ee1c22 100644 --- a/brush-parser/src/word.rs +++ b/brush-parser/src/word.rs @@ -13,6 +13,9 @@ use crate::ast; use crate::error; use crate::ParserOptions; +/// Alias for string type used with words. +pub type WordString = imstr::ImString; + /// Encapsulates a `WordPiece` together with its position in the string it came from. #[derive(Clone, Debug)] pub struct WordPieceWithSource { @@ -28,23 +31,23 @@ pub struct WordPieceWithSource { #[derive(Clone, Debug)] pub enum WordPiece { /// A simple unquoted, unescaped string. - Text(String), + Text(WordString), /// A string that is single-quoted. - SingleQuotedText(String), + SingleQuotedText(WordString), /// A string that is ANSI-C quoted. - AnsiCQuotedText(String), + AnsiCQuotedText(WordString), /// A sequence of pieces that are embedded in double quotes. DoubleQuotedSequence(Vec), /// A tilde prefix. - TildePrefix(String), + TildePrefix(WordString), /// A parameter expansion. ParameterExpansion(ParameterExpr), /// A command substitution. - CommandSubstitution(String), + CommandSubstitution(WordString), /// A backquoted command substitution. - BackquotedCommandSubstitution(String), + BackquotedCommandSubstitution(WordString), /// An escape sequence. - EscapeSequence(String), + EscapeSequence(WordString), /// An arithmetic expression. ArithmeticExpression(ast::UnexpandedArithmeticExpr), } @@ -66,18 +69,18 @@ pub enum Parameter { /// A special parameter. Special(SpecialParameter), /// A named variable. - Named(String), + Named(WordString), /// An index into a named variable. NamedWithIndex { /// Variable name. - name: String, + name: WordString, /// Index. - index: String, + index: WordString, }, /// A named array variable with all indices. NamedWithAllIndices { /// Variable name. - name: String, + name: WordString, /// Whether to concatenate the values. concatenate: bool, }, @@ -128,7 +131,7 @@ pub enum ParameterExpr { /// The type of test to perform. test_type: ParameterTestType, /// Default value to conditionally use. - default_value: Option, + default_value: Option, }, /// Conditionally assign default values. AssignDefaultValues { @@ -141,7 +144,7 @@ pub enum ParameterExpr { /// The type of test to perform. test_type: ParameterTestType, /// Default value to conditionally assign. - default_value: Option, + default_value: Option, }, /// Indicate error if null or unset. IndicateErrorIfNullOrUnset { @@ -154,7 +157,7 @@ pub enum ParameterExpr { /// The type of test to perform. test_type: ParameterTestType, /// Error message to conditionally yield. - error_message: Option, + error_message: Option, }, /// Conditionally use an alternative value. UseAlternativeValue { @@ -167,7 +170,7 @@ pub enum ParameterExpr { /// The type of test to perform. test_type: ParameterTestType, /// Alternative value to conditionally use. - alternative_value: Option, + alternative_value: Option, }, /// Compute the length of the given parameter. ParameterLength { @@ -187,7 +190,7 @@ pub enum ParameterExpr { /// for the expansion. indirect: bool, /// Optionally provides a pattern to match. - pattern: Option, + pattern: Option, }, /// Remove the largest suffix from the given string matching the given pattern. RemoveLargestSuffixPattern { @@ -198,7 +201,7 @@ pub enum ParameterExpr { /// for the expansion. indirect: bool, /// Optionally provides a pattern to match. - pattern: Option, + pattern: Option, }, /// Remove the smallest prefix from the given string matching the given pattern. RemoveSmallestPrefixPattern { @@ -209,7 +212,7 @@ pub enum ParameterExpr { /// for the expansion. indirect: bool, /// Optionally provides a pattern to match. - pattern: Option, + pattern: Option, }, /// Remove the largest prefix from the given string matching the given pattern. RemoveLargestPrefixPattern { @@ -220,7 +223,7 @@ pub enum ParameterExpr { /// for the expansion. indirect: bool, /// Optionally provides a pattern to match. - pattern: Option, + pattern: Option, }, /// Extract a substring from the given parameter. Substring { @@ -258,7 +261,7 @@ pub enum ParameterExpr { /// for the expansion. indirect: bool, /// Optionally provides a pattern to match. - pattern: Option, + pattern: Option, }, /// Uppercase the portion of the given parameter matching the given pattern. UppercasePattern { @@ -269,7 +272,7 @@ pub enum ParameterExpr { /// for the expansion. indirect: bool, /// Optionally provides a pattern to match. - pattern: Option, + pattern: Option, }, /// Lowercase the first character of the given parameter. LowercaseFirstChar { @@ -280,7 +283,7 @@ pub enum ParameterExpr { /// for the expansion. indirect: bool, /// Optionally provides a pattern to match. - pattern: Option, + pattern: Option, }, /// Lowercase the portion of the given parameter matching the given pattern. LowercasePattern { @@ -291,7 +294,7 @@ pub enum ParameterExpr { /// for the expansion. indirect: bool, /// Optionally provides a pattern to match. - pattern: Option, + pattern: Option, }, /// Replace occurrences of the given pattern in the given parameter. ReplaceSubstring { @@ -302,23 +305,23 @@ pub enum ParameterExpr { /// for the expansion. indirect: bool, /// Pattern to match. - pattern: String, + pattern: WordString, /// Replacement string. - replacement: Option, + replacement: Option, /// Kind of match to perform. match_kind: SubstringMatchKind, }, /// Select variable names from the environment with a given prefix. VariableNames { /// The prefix to match. - prefix: String, + prefix: WordString, /// Whether to concatenate the results. concatenate: bool, }, /// Select member keys from the named array. MemberKeys { /// Name of the array variable. - variable_name: String, + variable_name: WordString, /// Whether to concatenate the results. concatenate: bool, }, @@ -369,7 +372,7 @@ pub enum BraceExpressionOrText { /// A brace expression. Expr(BraceExpression), /// Other word text. - Text(String), + Text(WordString), } /// Represents a brace expression to be expanded. @@ -377,7 +380,7 @@ pub type BraceExpression = Vec; impl BraceExpressionOrText { /// Generates expansions for this value. - pub fn generate(self) -> Box> { + pub fn generate(self) -> Box> { match self { BraceExpressionOrText::Expr(members) => { let mut iters = vec![]; @@ -413,12 +416,12 @@ pub enum BraceExpressionMember { increment: i64, }, /// Text. - Text(String), + Text(WordString), } impl BraceExpressionMember { /// Generates expansions for this member. - pub fn generate(self) -> Box> { + pub fn generate(self) -> Box> { match self { BraceExpressionMember::NumberSequence { low, @@ -427,17 +430,13 @@ impl BraceExpressionMember { } => Box::new( (low..=high) .step_by(increment as usize) - .map(|n| n.to_string()), + .map(|n| n.to_string().into()), ), BraceExpressionMember::CharSequence { low, high, increment, - } => Box::new( - (low..=high) - .step_by(increment as usize) - .map(|c| c.to_string()), - ), + } => Box::new((low..=high).step_by(increment as usize).map(|c| c.into())), BraceExpressionMember::Text(text) => Box::new(std::iter::once(text)), } } @@ -450,23 +449,23 @@ impl BraceExpressionMember { /// * `word` - The word to parse. /// * `options` - The parser options to use. pub fn parse( - word: &str, + word: &WordString, options: &ParserOptions, ) -> Result, error::WordParseError> { - cacheable_parse(word.to_owned(), options.to_owned()) + cacheable_parse(word.clone(), options.to_owned()) } #[cached::proc_macro::cached(size = 64, result = true)] fn cacheable_parse( - word: String, + word: WordString, options: ParserOptions, ) -> Result, error::WordParseError> { tracing::debug!(target: "expansion", "Parsing word '{}'", word); - let pieces = expansion_parser::unexpanded_word(word.as_str(), &options) - .map_err(|err| error::WordParseError::Word(word.to_owned(), err))?; + let pieces = expansion_parser::unexpanded_word(&word, &options) + .map_err(|err| error::WordParseError::Word(word.clone().into(), err))?; - tracing::debug!(target: "expansion", "Parsed word '{}' => {{{:?}}}", word, pieces); + tracing::debug!(target: "expansion", "Parsed word '{}' => {{{:?}}}", word.as_str(), pieces); Ok(pieces) } @@ -478,11 +477,11 @@ fn cacheable_parse( /// * `word` - The word to parse. /// * `options` - The parser options to use. pub fn parse_parameter( - word: &str, + word: &WordString, options: &ParserOptions, ) -> Result { expansion_parser::parameter(word, options) - .map_err(|err| error::WordParseError::Parameter(word.to_owned(), err)) + .map_err(|err| error::WordParseError::Parameter(word.clone(), err)) } /// Parse brace expansion from a given word . @@ -492,15 +491,15 @@ pub fn parse_parameter( /// * `word` - The word to parse. /// * `options` - The parser options to use. pub fn parse_brace_expansions( - word: &str, + word: &WordString, options: &ParserOptions, ) -> Result>, error::WordParseError> { expansion_parser::brace_expansions(word, options) - .map_err(|err| error::WordParseError::BraceExpansion(word.to_owned(), err)) + .map_err(|err| error::WordParseError::BraceExpansion(word.clone(), err)) } peg::parser! { - grammar expansion_parser(parser_options: &ParserOptions) for str { + grammar expansion_parser(parser_options: &ParserOptions) for WordString { pub(crate) rule unexpanded_word() -> Vec = word() rule word(stop_condition: rule) -> Vec = @@ -521,7 +520,7 @@ peg::parser! { expr:brace_expr() { BraceExpressionOrText::Expr(expr) } / - text:$(non_brace_expr_text()+) { BraceExpressionOrText::Text(text.to_owned()) } + text:$(non_brace_expr_text()+) { BraceExpressionOrText::Text(text) } rule non_brace_expr_text() -> () = !"{" word_piece(<['{']>, false) {} / @@ -540,10 +539,10 @@ peg::parser! { } rule brace_text_list_member() -> BraceExpression = - &[',' | '}'] { vec![BraceExpressionMember::Text(String::new())] } / + &[',' | '}'] { vec![BraceExpressionMember::Text(WordString::new())] } / brace_expr() / text:$(word_piece(<[',' | '}']>, false)) { - vec![BraceExpressionMember::Text(text.to_owned())] + vec![BraceExpressionMember::Text(text)] } rule brace_sequence_expr() -> BraceExpressionMember = @@ -594,8 +593,8 @@ peg::parser! { rule unquoted_text(stop_condition: rule, in_command: bool) -> WordPiece = s:double_quoted_sequence() { WordPiece::DoubleQuotedSequence(s) } / - s:single_quoted_literal_text() { WordPiece::SingleQuotedText(s.to_owned()) } / - s:ansi_c_quoted_text() { WordPiece::AnsiCQuotedText(s.to_owned()) } / + s:single_quoted_literal_text() { WordPiece::SingleQuotedText(s) } / + s:ansi_c_quoted_text() { WordPiece::AnsiCQuotedText(s) } / normal_escape_sequence() / unquoted_literal_text(, in_command) @@ -611,14 +610,14 @@ peg::parser! { } } - rule single_quoted_literal_text() -> &'input str = + rule single_quoted_literal_text() -> WordString = "\'" inner:$([^'\'']*) "\'" { inner } - rule ansi_c_quoted_text() -> &'input str = + rule ansi_c_quoted_text() -> WordString = "$\'" inner:$([^'\'']*) "\'" { inner } rule unquoted_literal_text(stop_condition: rule, in_command: bool) -> WordPiece = - s:$(unquoted_literal_text_piece(, in_command)+) { WordPiece::Text(s.to_owned()) } + s:$(unquoted_literal_text_piece(, in_command)+) { WordPiece::Text(s) } // TODO: Find a way to remove the special-case logic for extglob + subshell commands rule unquoted_literal_text_piece(stop_condition: rule, in_command: bool) = @@ -638,16 +637,16 @@ peg::parser! { "(" command() ")" {} rule double_quoted_text() -> WordPiece = - s:double_quote_body_text() { WordPiece::Text(s.to_owned()) } + s:double_quote_body_text() { WordPiece::Text(s) } - rule double_quote_body_text() -> &'input str = + rule double_quote_body_text() -> WordString = $((!double_quoted_escape_sequence() [^'$' | '\"'])+) rule normal_escape_sequence() -> WordPiece = - s:$("\\" [c]) { WordPiece::EscapeSequence(s.to_owned()) } + s:$("\\" [c]) { WordPiece::EscapeSequence(s) } rule double_quoted_escape_sequence() -> WordPiece = - s:$("\\" ['$' | '`' | '\"' | '\'' | '\\']) { WordPiece::EscapeSequence(s.to_owned()) } + s:$("\\" ['$' | '`' | '\"' | '\'' | '\\']) { WordPiece::EscapeSequence(s) } rule tilde_prefix_with_source() -> WordPieceWithSource = start_index:position!() piece:tilde_prefix() end_index:position!() { @@ -660,7 +659,7 @@ peg::parser! { // TODO: Handle colon syntax rule tilde_prefix() -> WordPiece = - tilde_parsing_enabled() "~" cs:$((!['/' | ':' | ';'] [c])*) { WordPiece::TildePrefix(cs.to_owned()) } + tilde_parsing_enabled() "~" cs:$((!['/' | ':' | ';'] [c])*) { WordPiece::TildePrefix(cs) } // TODO: Deal with fact that there may be a quoted word or escaped closing brace chars. // TODO: Improve on how we handle a '$' not followed by a valid variable name or parameter. @@ -672,7 +671,7 @@ peg::parser! { WordPiece::ParameterExpansion(ParameterExpr::Parameter { parameter, indirect: false }) } / "$" !['\''] { - WordPiece::Text("$".to_owned()) + WordPiece::Text("$".into()) } rule parameter_expression() -> ParameterExpr = @@ -720,16 +719,16 @@ peg::parser! { rule non_posix_parameter_expression() -> ParameterExpr = "!" variable_name:variable_name() "[*]" { - ParameterExpr::MemberKeys { variable_name: variable_name.to_owned(), concatenate: true } + ParameterExpr::MemberKeys { variable_name, concatenate: true } } / "!" variable_name:variable_name() "[@]" { - ParameterExpr::MemberKeys { variable_name: variable_name.to_owned(), concatenate: false } + ParameterExpr::MemberKeys { variable_name, concatenate: false } } / "!" prefix:variable_name() "*" { - ParameterExpr::VariableNames { prefix: prefix.to_owned(), concatenate: true } + ParameterExpr::VariableNames { prefix, concatenate: true } } / "!" prefix:variable_name() "@" { - ParameterExpr::VariableNames { prefix: prefix.to_owned(), concatenate: false } + ParameterExpr::VariableNames { prefix, concatenate: false } } / indirect:parameter_indirection() parameter:parameter() ":" offset:substring_offset() length:(":" l:substring_length() { l })? { ParameterExpr::Substring { parameter, indirect, offset, length } @@ -782,18 +781,18 @@ peg::parser! { rule unbraced_parameter() -> Parameter = p:unbraced_positional_parameter() { Parameter::Positional(p) } / p:special_parameter() { Parameter::Special(p) } / - p:variable_name() { Parameter::Named(p.to_owned()) } + p:variable_name() { Parameter::Named(p) } // N.B. The indexing syntax is not a standard sh-ism. pub(crate) rule parameter() -> Parameter = p:positional_parameter() { Parameter::Positional(p) } / p:special_parameter() { Parameter::Special(p) } / - non_posix_extensions_enabled() p:variable_name() "[@]" { Parameter::NamedWithAllIndices { name: p.to_owned(), concatenate: false } } / - non_posix_extensions_enabled() p:variable_name() "[*]" { Parameter::NamedWithAllIndices { name: p.to_owned(), concatenate: true } } / + non_posix_extensions_enabled() p:variable_name() "[@]" { Parameter::NamedWithAllIndices { name: p, concatenate: false } } / + non_posix_extensions_enabled() p:variable_name() "[*]" { Parameter::NamedWithAllIndices { name: p, concatenate: true } } / non_posix_extensions_enabled() p:variable_name() "[" index:$(arithmetic_word(<"]">)) "]" {? - Ok(Parameter::NamedWithIndex { name: p.to_owned(), index: index.to_owned() }) + Ok(Parameter::NamedWithIndex { name: p, index }) } / - p:variable_name() { Parameter::Named(p.to_owned()) } + p:variable_name() { Parameter::Named(p) } rule positional_parameter() -> u32 = n:$(['1'..='9'](['0'..='9']*)) {? n.parse().or(Err("u32")) } @@ -810,21 +809,20 @@ peg::parser! { "!" { SpecialParameter::LastBackgroundProcessId } / "0" { SpecialParameter::ShellName } - rule variable_name() -> &'input str = + rule variable_name() -> WordString = $(!['0'..='9'] ['_' | '0'..='9' | 'a'..='z' | 'A'..='Z']+) pub(crate) rule command_substitution() -> WordPiece = - "$(" c:command() ")" { WordPiece::CommandSubstitution(c.to_owned()) } / + "$(" c:command() ")" { WordPiece::CommandSubstitution(c) } / "`" c:backquoted_command() "`" { WordPiece::BackquotedCommandSubstitution(c) } - pub(crate) rule command() -> &'input str = - $(command_piece()*) + pub(crate) rule command() -> WordString = $(command_piece()*) pub(crate) rule command_piece() -> () = word_piece(<[')']>, true /*in_command*/) {} / ([' ' | '\t'])+ {} - rule backquoted_command() -> String = + rule backquoted_command() -> WordString = chars:(backquoted_char()*) { chars.into_iter().collect() } rule backquoted_char() -> char = @@ -832,22 +830,22 @@ peg::parser! { [^'`'] rule arithmetic_expansion() -> WordPiece = - "$((" e:$(arithmetic_word(<"))">)) "))" { WordPiece::ArithmeticExpression(ast::UnexpandedArithmeticExpr { value: e.to_owned() } ) } + "$((" e:$(arithmetic_word(<"))">)) "))" { WordPiece::ArithmeticExpression(ast::UnexpandedArithmeticExpr { value: e } ) } rule substring_offset() -> ast::UnexpandedArithmeticExpr = - s:$(arithmetic_word(<[':' | '}']>)) { ast::UnexpandedArithmeticExpr { value: s.to_owned() } } + s:$(arithmetic_word(<[':' | '}']>)) { ast::UnexpandedArithmeticExpr { value: s } } rule substring_length() -> ast::UnexpandedArithmeticExpr = - s:$(arithmetic_word(<[':' | '}']>)) { ast::UnexpandedArithmeticExpr { value: s.to_owned() } } + s:$(arithmetic_word(<[':' | '}']>)) { ast::UnexpandedArithmeticExpr { value: s } } - rule parameter_replacement_str() -> String = - "/" s:$(word(<['}']>)) { s.to_owned() } + rule parameter_replacement_str() -> WordString = + "/" s:$(word(<['}']>)) { s } - rule parameter_search_pattern() -> String = - s:$(word(<['}' | '/']>)) { s.to_owned() } + rule parameter_search_pattern() -> WordString = + s:$(word(<['}' | '/']>)) { s } - rule parameter_expression_word() -> String = - s:$(word(<['}']>)) { s.to_owned() } + rule parameter_expression_word() -> WordString = + s:$(word(<['}']>)) { s } rule extglob_enabled() -> () = &[_] {? if parser_options.enable_extended_globbing { Ok(()) } else { Err("no extglob") } } @@ -868,12 +866,15 @@ mod tests { #[test] fn parse_command_substitution() -> Result<()> { - super::expansion_parser::command_piece("echo", &ParserOptions::default())?; - super::expansion_parser::command_piece("hi", &ParserOptions::default())?; - super::expansion_parser::command("echo hi", &ParserOptions::default())?; - super::expansion_parser::command_substitution("$(echo hi)", &ParserOptions::default())?; + super::expansion_parser::command_piece(&"echo".into(), &ParserOptions::default())?; + super::expansion_parser::command_piece(&"hi".into(), &ParserOptions::default())?; + super::expansion_parser::command(&"echo hi".into(), &ParserOptions::default())?; + super::expansion_parser::command_substitution( + &"$(echo hi)".into(), + &ParserOptions::default(), + )?; - let parsed = super::parse("$(echo hi)", &ParserOptions::default())?; + let parsed = super::parse(&"$(echo hi)".into(), &ParserOptions::default())?; assert_matches!( &parsed[..], [WordPieceWithSource { piece: WordPiece::CommandSubstitution(s), .. }] if s.as_str() == "echo hi" @@ -884,15 +885,15 @@ mod tests { #[test] fn parse_command_substitution_with_embedded_quotes() -> Result<()> { - super::expansion_parser::command_piece("echo", &ParserOptions::default())?; - super::expansion_parser::command_piece(r#""hi""#, &ParserOptions::default())?; - super::expansion_parser::command(r#"echo "hi""#, &ParserOptions::default())?; + super::expansion_parser::command_piece(&"echo".into(), &ParserOptions::default())?; + super::expansion_parser::command_piece(&r#""hi""#.into(), &ParserOptions::default())?; + super::expansion_parser::command(&r#"echo "hi""#.into(), &ParserOptions::default())?; super::expansion_parser::command_substitution( - r#"$(echo "hi")"#, + &r#"$(echo "hi")"#.into(), &ParserOptions::default(), )?; - let parsed = super::parse(r#"$(echo "hi")"#, &ParserOptions::default())?; + let parsed = super::parse(&r#"$(echo "hi")"#.into(), &ParserOptions::default())?; assert_matches!( &parsed[..], [WordPieceWithSource { piece: WordPiece::CommandSubstitution(s), .. }] if s.as_str() == r#"echo "hi""# @@ -903,7 +904,7 @@ mod tests { #[test] fn parse_command_substitution_with_embedded_extglob() -> Result<()> { - let parsed = super::parse("$(echo !(x))", &ParserOptions::default())?; + let parsed = super::parse(&"$(echo !(x))".into(), &ParserOptions::default())?; assert_matches!( &parsed[..], [WordPieceWithSource { piece: WordPiece::CommandSubstitution(s), .. }] if s.as_str() == "echo !(x)" @@ -914,7 +915,7 @@ mod tests { #[test] fn parse_extglob_with_embedded_parameter() -> Result<()> { - let parsed = super::parse("+([$var])", &ParserOptions::default())?; + let parsed = super::parse(&"+([$var])".into(), &ParserOptions::default())?; assert_matches!( &parsed[..], [WordPieceWithSource { piece: WordPiece::Text(s1), .. }, diff --git a/fuzz/fuzz_targets/fuzz_arithmetic.rs b/fuzz/fuzz_targets/fuzz_arithmetic.rs index 1930b837..20b843d7 100644 --- a/fuzz/fuzz_targets/fuzz_arithmetic.rs +++ b/fuzz/fuzz_targets/fuzz_arithmetic.rs @@ -28,7 +28,7 @@ async fn eval_arithmetic_async( // // Instantiate a brush shell with defaults, then try to evaluate the expression. // - let parsed_expr = brush_parser::arithmetic::parse(input_str.as_str()).ok(); + let parsed_expr = brush_parser::arithmetic::parse(&input_str.clone().into()).ok(); let our_eval_result = if let Some(parsed_expr) = parsed_expr { shell.eval_arithmetic(parsed_expr).await.ok() } else {