Skip to content

Commit

Permalink
Introduce ast:header node
Browse files Browse the repository at this point in the history
Summary:
This diff introduces a "header" section which will make it possible to add AST node types that are only allowed to be at the top of files. For example, `{{#import}}` statement in the next diff(s).

The header section is already part of the spec.

Reviewed By: createdbysk

Differential Revision: D68424381

fbshipit-source-id: 154ec34ae964a5a2c4b7076449d130299291537c
  • Loading branch information
praihan authored and facebook-github-bot committed Feb 8, 2025
1 parent a96b817 commit 4a8630b
Show file tree
Hide file tree
Showing 6 changed files with 131 additions and 30 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,9 @@
This might be fine in C++, but Rust crates have isolated dependencies, so
this doesn't work for us. Disable generating annotations on patch structs
for now.
}}{{^struct:generated?}}
}}
{{^struct:generated?}}
impl ::fbthrift::metadata::ThriftAnnotations for {{struct:rust_name}} {
fn get_structured_annotation<T: Sized + 'static>() -> ::std::option::Option<T> {
{{! We can't use a match statement as `TypeId::of` must be const, which is blocked by
Expand Down
8 changes: 8 additions & 0 deletions third-party/thrift/src/thrift/compiler/whisker/ast.h
Original file line number Diff line number Diff line change
Expand Up @@ -61,6 +61,13 @@ using body = std::variant<
macro>;
using bodies = std::vector<body>;

/**
* Elements that can appear at the top of a Whisker source file.
* Whisker does not render these kinds of elements.
*/
using header = std::variant<comment, pragma_statement>;
using headers = std::vector<header>;

// Defines operator!= in terms of operator==
// Remove in C++20 which introduces comparison operator synthesis
#define WHISKER_DEFINE_OPERATOR_INEQUALITY(type) \
Expand All @@ -73,6 +80,7 @@ using bodies = std::vector<body>;
*/
struct root {
source_location loc;
headers header_elements;
bodies body_elements;
};

Expand Down
46 changes: 44 additions & 2 deletions third-party/thrift/src/thrift/compiler/whisker/parser.cc
Original file line number Diff line number Diff line change
Expand Up @@ -628,11 +628,17 @@ class parser {
return std::nullopt;
}

// root → { body* }
// root → { header* ~ body* }
std::optional<ast::root> parse_root(parser_scan_window scan) {
constexpr std::string_view expected_types = "text, template, or comment";
try {
auto original_scan = scan;

ast::headers headers;
while (parse_result header = parse_header(scan)) {
headers.emplace_back(std::move(header).consume_and_advance(&scan));
}

ast::bodies bodies;
while (scan.can_advance()) {
if (parse_result maybe_body = parse_body(scan)) {
Expand All @@ -655,7 +661,10 @@ class parser {
// is not valid.
return std::nullopt;
}
return ast::root{original_scan.start_location(), std::move(bodies)};
return ast::root{
original_scan.start_location(),
std::move(headers),
std::move(bodies)};
} catch (const parse_error&) {
// the error should already have been reported via the diagnostics
// engine
Expand Down Expand Up @@ -739,6 +748,39 @@ class parser {
scan};
}

// Parses header elements that can only appear at the top of the source.
//
// header → { comment | pragma-statement }
parse_result<ast::header> parse_header(parser_scan_window scan) {
assert(scan.empty());

while (scan.can_advance()) {
// Because header elements are more "directives" for the Whisker language,
// we ignore whitespace that exist between header elements.
// However, the header is not "greedy". When we encounter the first
// non-header element, we return all consumed whitespace back to the body.
if (parse_result newline = parse_newline(scan)) {
std::ignore = std::move(newline).consume_and_advance(&scan);
continue;
}
if (try_consume_token(&scan, tok::whitespace)) {
scan = scan.make_fresh();
continue;
}
if (parse_result maybe_comment = parse_comment(scan)) {
auto comment = std::move(maybe_comment).consume_and_advance(&scan);
return {std::move(comment), scan};
}
if (parse_result maybe_pragma = parse_pragma_statement(scan)) {
auto pragma = std::move(maybe_pragma).consume_and_advance(&scan);
return {pragma, scan};
}
// Next token is not valid for a header element
break;
}
return no_parse_result();
}

// Returns an empty parse result if no body was found.
//
// Returns an empty optional<ast::body> if body was found but consisted
Expand Down
34 changes: 24 additions & 10 deletions third-party/thrift/src/thrift/compiler/whisker/print_ast.cc
Original file line number Diff line number Diff line change
Expand Up @@ -197,16 +197,13 @@ struct ast_visitor {
}
}

// Prevent implicit conversion to ast::body. Otherwise, we can silently
// compile an infinitely recursive visit() chain if there is a missing
// overload for one of the alternatives in the variant.
template <
class T = ast::body,
typename = std::enable_if_t<std::is_same_v<T, ast::body>>>
void visit(const T& body, tree_printer::scope scope) const {
// This node is transparent so it does not directly appear in the tree
detail::variant_match(
body, [&](const auto& node) { visit(node, std::move(scope)); });
void visit(
const ast::headers& header_elements, tree_printer::scope scope) const {
scope.println(" header");
auto header_scope = scope.open_node();
for (const auto& header : header_elements) {
visit(header, header_scope);
}
}
void visit(
const ast::bodies& body_elements, tree_printer::scope scope) const {
Expand All @@ -215,10 +212,27 @@ struct ast_visitor {
visit(body, scope);
}
}
// Prevent implicit conversion to ast::header or ast::body. Otherwise, we can
// silently compile an infinitely recursive visit() chain if there is a
// missing overload for one of the alternatives in the variant.
template <
class T,
std::enable_if_t<
std::is_same_v<T, ast::header> || std::is_same_v<T, ast::body>>* =
nullptr>
void visit(const T& elements, tree_printer::scope scope) const {
// This node is transparent so it does not directly appear in the tree
detail::variant_match(
elements, [&](const auto& node) { visit(node, std::move(scope)); });
}

void visit(const ast::root& root, tree_printer::scope scope) const {
scope.println(
"root [{}]", resolved_location(root.loc, src_manager).file_name());
auto root_scope = scope.open_node();
if (!root.header_elements.empty()) {
visit(root.header_elements, root_scope);
}
visit(root.body_elements, root_scope);
}

Expand Down
23 changes: 16 additions & 7 deletions third-party/thrift/src/thrift/compiler/whisker/render.cc
Original file line number Diff line number Diff line change
Expand Up @@ -426,6 +426,7 @@ class render_engine {
std::move(eval_ctx),
nullptr /* the root node is not a partial */,
source_location());
visit(root.header_elements);
visit(root.body_elements);
return true;
} catch (const render_error& err) {
Expand Down Expand Up @@ -501,19 +502,26 @@ class render_engine {
abort_rendering(loc);
}

void visit(const ast::headers& headers) {
for (const auto& header : headers) {
visit(header);
}
}
void visit(const ast::bodies& bodies) {
for (const auto& body : bodies) {
visit(body);
}
}
// Prevent implicit conversion to ast::body. Otherwise, we can silently
// compile an infinitely recursive visit() chain if there is a missing
// overload for one of the alternatives in the variant.
// Prevent implicit conversion to ast::header or ast::body. Otherwise, we can
// silently compile an infinitely recursive visit() chain if there is a
// missing overload for one of the alternatives in the variant.
template <
typename T = ast::body,
typename = std::enable_if_t<std::is_same_v<T, ast::body>>>
void visit(const T& body) {
detail::variant_match(body, [&](const auto& node) { visit(node); });
typename T,
std::enable_if_t<
std::is_same_v<T, ast::header> || std::is_same_v<T, ast::body>>* =
nullptr>
void visit(const T& elements) {
detail::variant_match(elements, [&](const auto& node) { visit(node); });
}

void visit(const ast::text& text) { out_.write(text); }
Expand Down Expand Up @@ -1179,6 +1187,7 @@ class render_engine {
macro.loc.begin);
auto indent_guard =
out_.make_indent_guard(macro.standalone_indentation_within_line);
visit(resolved_macro->header_elements);
visit(resolved_macro->body_elements);
}

Expand Down
46 changes: 36 additions & 10 deletions third-party/thrift/src/thrift/compiler/whisker/test/parser_test.cc
Original file line number Diff line number Diff line change
Expand Up @@ -1005,14 +1005,13 @@ TEST_F(ParserTest, partial_block_after_comment) {
EXPECT_EQ(
to_string(ast),
"root [path/to/test-1.whisker]\n"
"|- comment <line:1:1, col:6> ''\n"
"|- header\n"
"| |- comment <line:1:1, col:6> ''\n"
"|- partial-block <line:1:6, col:48> 'foo'\n"
"| `- argument 'arg'\n");
}

TEST_F(ParserTest, partial_block_consume_whitespace) {
// Partial blocks appear in the header of a file, so they should consume
// preceding whitespace.
TEST_F(ParserTest, partial_block_do_not_consume_whitespace) {
auto ast = parse_ast(
"\n"
"{{!}}\n"
Expand All @@ -1024,8 +1023,8 @@ TEST_F(ParserTest, partial_block_consume_whitespace) {
EXPECT_EQ(
to_string(ast),
"root [path/to/test-1.whisker]\n"
"|- newline <line:1:1, line:2:1> '\\n'\n"
"|- comment <line:2:1, col:6> ''\n"
"|- header\n"
"| |- comment <line:2:1, col:6> ''\n"
"|- newline <line:3:1, line:4:1> '\\n'\n"
"|- partial-block <line:4:1, line:6:17> 'foo'\n"
"| `- argument 'arg'\n"
Expand Down Expand Up @@ -1176,7 +1175,8 @@ TEST_F(ParserTest, partial_block_inside_body) {
EXPECT_EQ(
to_string(ast),
"root [path/to/test-1.whisker]\n"
"|- comment <line:1:1, col:14> ' header '\n"
"|- header\n"
"| |- comment <line:1:1, col:14> ' header '\n"
"|- partial-block <line:1:14, line:3:17> 'foo'\n"
"| `- argument 'arg1'\n"
"| `- argument 'arg2'\n"
Expand Down Expand Up @@ -1510,7 +1510,8 @@ TEST_F(ParserTest, pragma_ignore_newlines) {
EXPECT_EQ(
to_string(ast),
"root [path/to/test-1.whisker]\n"
"|- pragma-statement 'ignore-newlines' <line:1:1, col:28>\n");
"|- header\n"
"| |- pragma-statement 'ignore-newlines' <line:1:1, col:28>\n");
}

TEST_F(ParserTest, pragma_unrecognized) {
Expand Down Expand Up @@ -1594,7 +1595,8 @@ TEST_F(ParserTest, comment_empty) {
EXPECT_EQ(
to_string(ast),
"root [path/to/test-1.whisker]\n"
"|- comment <line:1:1, col:6> ''\n");
"|- header\n"
"| |- comment <line:1:1, col:6> ''\n");
}

TEST_F(ParserTest, comment_escaped) {
Expand All @@ -1614,7 +1616,31 @@ TEST_F(ParserTest, comment_escaped_empty) {
EXPECT_EQ(
to_string(ast),
"root [path/to/test-1.whisker]\n"
"|- comment <line:1:1, col:10> ''\n");
"|- header\n"
"| |- comment <line:1:1, col:10> ''\n");
}

TEST_F(ParserTest, header_consumes_whitespace) {
auto ast = parse_ast(
"\n"
"{{!}}\n"
" \t\n"
"{{#pragma ignore-newlines}}\t\n"
"\n"
"hello\n"
"{{#pragma ignore-newlines}}\n");
// Consumes newlines within a header.
// Does not consume newlines after the last header element.
EXPECT_EQ(
to_string(ast),
"root [path/to/test-1.whisker]\n"
"|- header\n"
"| |- comment <line:2:1, col:6> ''\n"
"| |- pragma-statement 'ignore-newlines' <line:4:1, col:28>\n"
"|- newline <line:5:1, line:6:1> '\\n'\n"
"|- text <line:6:1, col:6> 'hello'\n"
"|- newline <line:6:6, line:7:1> '\\n'\n"
"|- pragma-statement 'ignore-newlines' <line:7:1, col:28>\n");
}

TEST_F(ParserTest, strip_standalone_lines) {
Expand Down

0 comments on commit 4a8630b

Please sign in to comment.