From 4a8630b7f257199e8eb2b3c4f2bab9f4b348b86a Mon Sep 17 00:00:00 2001 From: Pranjal Raihan Date: Fri, 7 Feb 2025 19:37:47 -0800 Subject: [PATCH] Introduce ast:header node Summary: This diff introduces a "header" section which will make it possible to add AST node types that are only allowed to be at the top of files. For example, `{{#import}}` statement in the next diff(s). The header section is already part of the spec. Reviewed By: createdbysk Differential Revision: D68424381 fbshipit-source-id: 154ec34ae964a5a2c4b7076449d130299291537c --- .../rust/lib/metadata/struct.mustache | 4 +- .../thrift/src/thrift/compiler/whisker/ast.h | 8 ++++ .../src/thrift/compiler/whisker/parser.cc | 46 ++++++++++++++++++- .../src/thrift/compiler/whisker/print_ast.cc | 34 ++++++++++---- .../src/thrift/compiler/whisker/render.cc | 23 +++++++--- .../compiler/whisker/test/parser_test.cc | 46 +++++++++++++++---- 6 files changed, 131 insertions(+), 30 deletions(-) diff --git a/third-party/thrift/src/thrift/compiler/generate/templates/rust/lib/metadata/struct.mustache b/third-party/thrift/src/thrift/compiler/generate/templates/rust/lib/metadata/struct.mustache index 8d18f42ee99353..8f302805053ee0 100644 --- a/third-party/thrift/src/thrift/compiler/generate/templates/rust/lib/metadata/struct.mustache +++ b/third-party/thrift/src/thrift/compiler/generate/templates/rust/lib/metadata/struct.mustache @@ -24,7 +24,9 @@ This might be fine in C++, but Rust crates have isolated dependencies, so this doesn't work for us. Disable generating annotations on patch structs for now. -}}{{^struct:generated?}} +}} + +{{^struct:generated?}} impl ::fbthrift::metadata::ThriftAnnotations for {{struct:rust_name}} { fn get_structured_annotation() -> ::std::option::Option { {{! We can't use a match statement as `TypeId::of` must be const, which is blocked by diff --git a/third-party/thrift/src/thrift/compiler/whisker/ast.h b/third-party/thrift/src/thrift/compiler/whisker/ast.h index 34febb82d548d7..3b8d5f3be9e2d6 100644 --- a/third-party/thrift/src/thrift/compiler/whisker/ast.h +++ b/third-party/thrift/src/thrift/compiler/whisker/ast.h @@ -61,6 +61,13 @@ using body = std::variant< macro>; using bodies = std::vector; +/** + * Elements that can appear at the top of a Whisker source file. + * Whisker does not render these kinds of elements. + */ +using header = std::variant; +using headers = std::vector
; + // Defines operator!= in terms of operator== // Remove in C++20 which introduces comparison operator synthesis #define WHISKER_DEFINE_OPERATOR_INEQUALITY(type) \ @@ -73,6 +80,7 @@ using bodies = std::vector; */ struct root { source_location loc; + headers header_elements; bodies body_elements; }; diff --git a/third-party/thrift/src/thrift/compiler/whisker/parser.cc b/third-party/thrift/src/thrift/compiler/whisker/parser.cc index 438d0ecd09b5e3..1ef9825ad7ab0d 100644 --- a/third-party/thrift/src/thrift/compiler/whisker/parser.cc +++ b/third-party/thrift/src/thrift/compiler/whisker/parser.cc @@ -628,11 +628,17 @@ class parser { return std::nullopt; } - // root → { body* } + // root → { header* ~ body* } std::optional parse_root(parser_scan_window scan) { constexpr std::string_view expected_types = "text, template, or comment"; try { auto original_scan = scan; + + ast::headers headers; + while (parse_result header = parse_header(scan)) { + headers.emplace_back(std::move(header).consume_and_advance(&scan)); + } + ast::bodies bodies; while (scan.can_advance()) { if (parse_result maybe_body = parse_body(scan)) { @@ -655,7 +661,10 @@ class parser { // is not valid. return std::nullopt; } - return ast::root{original_scan.start_location(), std::move(bodies)}; + return ast::root{ + original_scan.start_location(), + std::move(headers), + std::move(bodies)}; } catch (const parse_error&) { // the error should already have been reported via the diagnostics // engine @@ -739,6 +748,39 @@ class parser { scan}; } + // Parses header elements that can only appear at the top of the source. + // + // header → { comment | pragma-statement } + parse_result parse_header(parser_scan_window scan) { + assert(scan.empty()); + + while (scan.can_advance()) { + // Because header elements are more "directives" for the Whisker language, + // we ignore whitespace that exist between header elements. + // However, the header is not "greedy". When we encounter the first + // non-header element, we return all consumed whitespace back to the body. + if (parse_result newline = parse_newline(scan)) { + std::ignore = std::move(newline).consume_and_advance(&scan); + continue; + } + if (try_consume_token(&scan, tok::whitespace)) { + scan = scan.make_fresh(); + continue; + } + if (parse_result maybe_comment = parse_comment(scan)) { + auto comment = std::move(maybe_comment).consume_and_advance(&scan); + return {std::move(comment), scan}; + } + if (parse_result maybe_pragma = parse_pragma_statement(scan)) { + auto pragma = std::move(maybe_pragma).consume_and_advance(&scan); + return {pragma, scan}; + } + // Next token is not valid for a header element + break; + } + return no_parse_result(); + } + // Returns an empty parse result if no body was found. // // Returns an empty optional if body was found but consisted diff --git a/third-party/thrift/src/thrift/compiler/whisker/print_ast.cc b/third-party/thrift/src/thrift/compiler/whisker/print_ast.cc index 896af0e579a868..5c1a07a0e3a5f3 100644 --- a/third-party/thrift/src/thrift/compiler/whisker/print_ast.cc +++ b/third-party/thrift/src/thrift/compiler/whisker/print_ast.cc @@ -197,16 +197,13 @@ struct ast_visitor { } } - // Prevent implicit conversion to ast::body. Otherwise, we can silently - // compile an infinitely recursive visit() chain if there is a missing - // overload for one of the alternatives in the variant. - template < - class T = ast::body, - typename = std::enable_if_t>> - void visit(const T& body, tree_printer::scope scope) const { - // This node is transparent so it does not directly appear in the tree - detail::variant_match( - body, [&](const auto& node) { visit(node, std::move(scope)); }); + void visit( + const ast::headers& header_elements, tree_printer::scope scope) const { + scope.println(" header"); + auto header_scope = scope.open_node(); + for (const auto& header : header_elements) { + visit(header, header_scope); + } } void visit( const ast::bodies& body_elements, tree_printer::scope scope) const { @@ -215,10 +212,27 @@ struct ast_visitor { visit(body, scope); } } + // Prevent implicit conversion to ast::header or ast::body. Otherwise, we can + // silently compile an infinitely recursive visit() chain if there is a + // missing overload for one of the alternatives in the variant. + template < + class T, + std::enable_if_t< + std::is_same_v || std::is_same_v>* = + nullptr> + void visit(const T& elements, tree_printer::scope scope) const { + // This node is transparent so it does not directly appear in the tree + detail::variant_match( + elements, [&](const auto& node) { visit(node, std::move(scope)); }); + } + void visit(const ast::root& root, tree_printer::scope scope) const { scope.println( "root [{}]", resolved_location(root.loc, src_manager).file_name()); auto root_scope = scope.open_node(); + if (!root.header_elements.empty()) { + visit(root.header_elements, root_scope); + } visit(root.body_elements, root_scope); } diff --git a/third-party/thrift/src/thrift/compiler/whisker/render.cc b/third-party/thrift/src/thrift/compiler/whisker/render.cc index f4682c9a6fcf3d..2d9e271e7ef8f6 100644 --- a/third-party/thrift/src/thrift/compiler/whisker/render.cc +++ b/third-party/thrift/src/thrift/compiler/whisker/render.cc @@ -426,6 +426,7 @@ class render_engine { std::move(eval_ctx), nullptr /* the root node is not a partial */, source_location()); + visit(root.header_elements); visit(root.body_elements); return true; } catch (const render_error& err) { @@ -501,19 +502,26 @@ class render_engine { abort_rendering(loc); } + void visit(const ast::headers& headers) { + for (const auto& header : headers) { + visit(header); + } + } void visit(const ast::bodies& bodies) { for (const auto& body : bodies) { visit(body); } } - // Prevent implicit conversion to ast::body. Otherwise, we can silently - // compile an infinitely recursive visit() chain if there is a missing - // overload for one of the alternatives in the variant. + // Prevent implicit conversion to ast::header or ast::body. Otherwise, we can + // silently compile an infinitely recursive visit() chain if there is a + // missing overload for one of the alternatives in the variant. template < - typename T = ast::body, - typename = std::enable_if_t>> - void visit(const T& body) { - detail::variant_match(body, [&](const auto& node) { visit(node); }); + typename T, + std::enable_if_t< + std::is_same_v || std::is_same_v>* = + nullptr> + void visit(const T& elements) { + detail::variant_match(elements, [&](const auto& node) { visit(node); }); } void visit(const ast::text& text) { out_.write(text); } @@ -1179,6 +1187,7 @@ class render_engine { macro.loc.begin); auto indent_guard = out_.make_indent_guard(macro.standalone_indentation_within_line); + visit(resolved_macro->header_elements); visit(resolved_macro->body_elements); } diff --git a/third-party/thrift/src/thrift/compiler/whisker/test/parser_test.cc b/third-party/thrift/src/thrift/compiler/whisker/test/parser_test.cc index cdaf0e7616433c..ee9749207029e8 100644 --- a/third-party/thrift/src/thrift/compiler/whisker/test/parser_test.cc +++ b/third-party/thrift/src/thrift/compiler/whisker/test/parser_test.cc @@ -1005,14 +1005,13 @@ TEST_F(ParserTest, partial_block_after_comment) { EXPECT_EQ( to_string(ast), "root [path/to/test-1.whisker]\n" - "|- comment ''\n" + "|- header\n" + "| |- comment ''\n" "|- partial-block 'foo'\n" "| `- argument 'arg'\n"); } -TEST_F(ParserTest, partial_block_consume_whitespace) { - // Partial blocks appear in the header of a file, so they should consume - // preceding whitespace. +TEST_F(ParserTest, partial_block_do_not_consume_whitespace) { auto ast = parse_ast( "\n" "{{!}}\n" @@ -1024,8 +1023,8 @@ TEST_F(ParserTest, partial_block_consume_whitespace) { EXPECT_EQ( to_string(ast), "root [path/to/test-1.whisker]\n" - "|- newline '\\n'\n" - "|- comment ''\n" + "|- header\n" + "| |- comment ''\n" "|- newline '\\n'\n" "|- partial-block 'foo'\n" "| `- argument 'arg'\n" @@ -1176,7 +1175,8 @@ TEST_F(ParserTest, partial_block_inside_body) { EXPECT_EQ( to_string(ast), "root [path/to/test-1.whisker]\n" - "|- comment ' header '\n" + "|- header\n" + "| |- comment ' header '\n" "|- partial-block 'foo'\n" "| `- argument 'arg1'\n" "| `- argument 'arg2'\n" @@ -1510,7 +1510,8 @@ TEST_F(ParserTest, pragma_ignore_newlines) { EXPECT_EQ( to_string(ast), "root [path/to/test-1.whisker]\n" - "|- pragma-statement 'ignore-newlines' \n"); + "|- header\n" + "| |- pragma-statement 'ignore-newlines' \n"); } TEST_F(ParserTest, pragma_unrecognized) { @@ -1594,7 +1595,8 @@ TEST_F(ParserTest, comment_empty) { EXPECT_EQ( to_string(ast), "root [path/to/test-1.whisker]\n" - "|- comment ''\n"); + "|- header\n" + "| |- comment ''\n"); } TEST_F(ParserTest, comment_escaped) { @@ -1614,7 +1616,31 @@ TEST_F(ParserTest, comment_escaped_empty) { EXPECT_EQ( to_string(ast), "root [path/to/test-1.whisker]\n" - "|- comment ''\n"); + "|- header\n" + "| |- comment ''\n"); +} + +TEST_F(ParserTest, header_consumes_whitespace) { + auto ast = parse_ast( + "\n" + "{{!}}\n" + " \t\n" + "{{#pragma ignore-newlines}}\t\n" + "\n" + "hello\n" + "{{#pragma ignore-newlines}}\n"); + // Consumes newlines within a header. + // Does not consume newlines after the last header element. + EXPECT_EQ( + to_string(ast), + "root [path/to/test-1.whisker]\n" + "|- header\n" + "| |- comment ''\n" + "| |- pragma-statement 'ignore-newlines' \n" + "|- newline '\\n'\n" + "|- text 'hello'\n" + "|- newline '\\n'\n" + "|- pragma-statement 'ignore-newlines' \n"); } TEST_F(ParserTest, strip_standalone_lines) {