diff --git a/src/lexer.rs b/src/lexer.rs index d4d6198..1e8ab4b 100644 --- a/src/lexer.rs +++ b/src/lexer.rs @@ -38,6 +38,10 @@ pub enum Token<'a> { impl<'a> Token<'a> { pub fn tokens_to_string(tokens: &[Token]) -> String { + if tokens.len() >= 2 && (tokens[0] == Self::Object("struct") || tokens[1] == Self::Object("struct")) { + return Self::struct_tokens_to_string(tokens); + } + let space_after = [Token::Comma, Token::Asterisk]; let mut string = String::new(); @@ -69,7 +73,7 @@ impl<'a> Token<'a> { string } - pub fn struct_tokens_to_string(tokens: &[Token]) -> String { + fn struct_tokens_to_string(tokens: &[Token]) -> String { let mut string = String::new(); let mut in_struct = false; @@ -520,6 +524,35 @@ pub fn get_structs<'a>(tokens: &'a Vec) -> Vec<&'a [Token<'a>]> { structs } +pub fn get_defines<'a>(tokens: &'a Vec) -> Vec<&'a [Token<'a>]> { + let mut defines = vec![]; + + let mut idx: usize = 0; + + while idx < tokens.len() { + if tokens[idx] != Token::HashTag { + skip_to(tokens, Token::HashTag, &mut idx); + } + + let start_idx = idx; + + if idx + 1 >= tokens.len() || tokens[idx+1] != Token::Object("define") { + idx += 2; + continue; + } + idx += 1; + + skip_to(tokens, Token::NewLine, &mut idx); + while idx < tokens.len() && tokens[idx-1] == Token::BackSlash { + skip_to(tokens, Token::NewLine, &mut idx); + } + + defines.push(&tokens[start_idx..idx]); + } + + defines +} + fn struct_len(tokens: &[Token]) -> Option { let mut num_brackets = 0; let mut contains_brackets = false; @@ -552,13 +585,18 @@ fn struct_len(tokens: &[Token]) -> Option { None } +/// Updates `idx` to point to the next token specified. If the +/// token does not exist, `idx` will be set equal to tokens.len() fn skip_to(tokens: &[Token], target: Token, idx: &mut usize) { for i in (*idx + 1)..tokens.len() { *idx = i; if tokens[i] == target { - break; + return; } } + + // If the for loop ends, we haven't found it, so set idx appropriatly + *idx = tokens.len(); } /// Ignores values inside the targets, it just skips to the next token @@ -573,3 +611,47 @@ fn skip_to_oneof(tokens: &[Token], targets: &[Token], idx: &mut usize) { } } } + + +#[cfg(test)] +mod lexer_tests { + use std::fs; + + use super::*; + + #[test] + fn test_get_defines() { + let s = fs::read_to_string("tests/lexer-define.c").unwrap(); + let s = clean_source_code(s); + let tokens = tokenize(&s).unwrap(); + + let defines = get_defines(&tokens); + + let mut log_dump = "".to_string(); + for &def in &defines { + let x = format!("{:?}\n\n", def); + log_dump.push_str(&x); + } + + fs::write("tests/lexer-define.log", format!("{}", log_dump)).unwrap(); + + assert_eq!(defines.len(), s.split("#define").collect::>().len() - 1); + } + + #[test] + fn test_get_structs() { + let s = fs::read_to_string("tests/lexer-struct.c").unwrap(); + let s = clean_source_code(s); + let tokens = tokenize(&s).unwrap(); + + let defines = get_structs(&tokens); + + let mut log_dump = "".to_string(); + for &def in &defines { + let x = format!("{:?}\n\n", def); + log_dump.push_str(&x); + } + + fs::write("tests/lexer-struct.log", format!("{}", log_dump)).unwrap(); + } +} \ No newline at end of file diff --git a/src/main.rs b/src/main.rs index fc57450..a9442be 100644 --- a/src/main.rs +++ b/src/main.rs @@ -20,6 +20,7 @@ use valgrind::VgOutput; #[tokio::main(flavor = "current_thread")] async fn main() { + let cli_args: cli::CliCommand; let raw_cli_args = std::env::args().collect::>(); if raw_cli_args.len() < 2 { @@ -96,7 +97,7 @@ async fn main() { process::exit(0); } - if let Err(err) = handle_headers(&config) { + if let Err(err) = handle_gen_headers(&config) { println!("An error occured while generating header files:\n{}", err); process::exit(1); } @@ -331,7 +332,7 @@ fn handle_execution( Ok(()) } -fn handle_headers(config: &Config) -> Result<()> { +fn handle_gen_headers(config: &Config) -> Result<()> { let cwd = env::current_dir()?; let src_dir = config.get_src_dir(); let inc_dir = config.get_include_dir(); @@ -357,9 +358,16 @@ fn handle_headers(config: &Config) -> Result<()> { code_h = lexer::clean_source_code(code_h); let tokens_h = lexer::tokenize(&code_h)?; + let mut defines_h = lexer::get_defines(&tokens_h); + let mut sturcts_h = lexer::get_structs(&tokens_h); + let fn_defs = lexer::get_fn_def(&tokens); let includes = lexer::get_includes(&tokens); - let structs = lexer::get_structs(&tokens_h); + let defines = lexer::get_defines(&tokens); + let structs = lexer::get_structs(&tokens); + + defines_h.extend_from_slice(&defines[1..]); // Skip the first definition to skip the #ifndef NAME_H #define NAME_H + sturcts_h.extend_from_slice(&structs); let mut headers = String::new(); @@ -372,8 +380,16 @@ fn handle_headers(config: &Config) -> Result<()> { headers.push('\n'); } headers.push('\n'); - for &struc in &structs { - headers.push_str(&lexer::Token::struct_tokens_to_string(struc).trim()); + + for &def in &defines_h { + let s = lexer::Token::tokens_to_string(def); + headers.push_str(&s); + headers.push('\n'); + } + headers.push('\n'); + + for &struc in &sturcts_h { + headers.push_str(&lexer::Token::tokens_to_string(struc).trim()); headers.push_str("\n\n"); } for &func in &fn_defs { diff --git a/tests/lexer-define.c b/tests/lexer-define.c new file mode 100644 index 0000000..527e9e3 --- /dev/null +++ b/tests/lexer-define.c @@ -0,0 +1,7 @@ + +#define FOO 982 + +#define PRINT_HELLO_WORLD() do { \ + printf("Hello, "); \ + printf("World!\n"); \ +} while(0) \ No newline at end of file diff --git a/tests/lexer-struct.c b/tests/lexer-struct.c new file mode 100644 index 0000000..7b24c6f --- /dev/null +++ b/tests/lexer-struct.c @@ -0,0 +1,72 @@ +// Test case 1: Simple struct definition +struct Point { + int x; + int y; +}; + +// Test case 2: Typedef struct without a tag +typedef struct { + float real; + float imag; +} Complex; + +// Test case 3: Typedef struct with a tag +typedef struct Rectangle { + int width; + int height; +} Rectangle; + +// Test case 4: Struct with a single member +struct Circle { + float radius; +}; + +// Test case 5: Struct with a nested struct definition +struct Line { + // Nested struct definition inside a struct + struct Point { + int x; + int y; + } start, end; +}; + +// Test case 6: Struct with an anonymous struct member +struct Node { + int value; + struct { + int left; + int right; + } children; +}; + +// Test case 7: Struct representing a linked list node (self-referential) +struct List { + int data; + struct List *next; +}; + +// Test case 8: Struct with bit fields +struct Flags { + unsigned int flag1 : 1; + unsigned int flag2 : 1; +}; + +// Test case 9: Typedef struct with more complex members +typedef struct Employee { + char name[50]; + int id; + float salary; +} Employee; + +// Test case 10: Struct for a binary tree node (self-referential pointers) +struct TreeNode { + int value; + struct TreeNode *left; + struct TreeNode *right; +}; + +// Test case 11: Typedef struct with a different alias +typedef struct Car { + int wheels; + float engine_power; +} Vehicle;