From 2a37cf00a92a55fe3381ef75369affa0625ce118 Mon Sep 17 00:00:00 2001 From: Aaron Jorbin Date: Tue, 4 Feb 2025 02:47:41 +0000 Subject: [PATCH] HTML API: Fix extensibility of WP_HTML_Processor::next_token(). Break out logic from the next_token() method into a private method which may call itself recursively. This allows for subclasses to override the next_token() method and be assured that each call to next_token() corresponds with the consumption of one single token. This also parallels how WP_HTML_Tag_Processor::next_token() wraps a private base_class_next_token() method. Reviewed by jonsurrell. Merges [59285], [59364], and [59747] to 6.7 branch. Props westonruter, jonsurrell, dmsnell, jorbin. git-svn-id: https://develop.svn.wordpress.org/branches/6.7@59757 602fd350-edb4-49c9-b593-d223f7449a82 --- .../html-api/class-wp-html-processor.php | 33 ++++- .../token-counting-html-processor.php | 35 +++++ .../tests/html-api/wpHtmlProcessor.php | 135 ++++++++++++++++++ 3 files changed, 197 insertions(+), 6 deletions(-) create mode 100644 tests/phpunit/data/html-api/token-counting-html-processor.php diff --git a/src/wp-includes/html-api/class-wp-html-processor.php b/src/wp-includes/html-api/class-wp-html-processor.php index 7bb9a1bab469b..ecb19ee988d80 100644 --- a/src/wp-includes/html-api/class-wp-html-processor.php +++ b/src/wp-includes/html-api/class-wp-html-processor.php @@ -607,6 +607,22 @@ public function next_tag( $query = null ): bool { return false; } + /** + * Finds the next token in the HTML document. + * + * This doesn't currently have a way to represent non-tags and doesn't process + * semantic rules for text nodes. For access to the raw tokens consider using + * WP_HTML_Tag_Processor instead. + * + * @since 6.5.0 Added for internal support; do not use. + * @since 6.7.2 Refactored so subclasses may extend. + * + * @return bool Whether a token was parsed. + */ + public function next_token(): bool { + return $this->next_visitable_token(); + } + /** * Ensures internal accounting is maintained for HTML semantic rules while * the underlying Tag Processor class is seeking to a bookmark. @@ -615,13 +631,18 @@ public function next_tag( $query = null ): bool { * semantic rules for text nodes. For access to the raw tokens consider using * WP_HTML_Tag_Processor instead. * - * @since 6.5.0 Added for internal support; do not use. + * Note that this method may call itself recursively. This is why it is not + * implemented as {@see WP_HTML_Processor::next_token()}, which instead calls + * this method similarly to how {@see WP_HTML_Tag_Processor::next_token()} + * calls the {@see WP_HTML_Tag_Processor::base_class_next_token()} method. + * + * @since 6.7.2 Added for internal support. * * @access private * * @return bool */ - public function next_token(): bool { + private function next_visitable_token(): bool { $this->current_element = null; if ( isset( $this->last_error ) ) { @@ -639,7 +660,7 @@ public function next_token(): bool { * tokens works in the meantime and isn't obviously wrong. */ if ( empty( $this->element_queue ) && $this->step() ) { - return $this->next_token(); + return $this->next_visitable_token(); } // Process the next event on the queue. @@ -650,7 +671,7 @@ public function next_token(): bool { continue; } - return empty( $this->element_queue ) ? false : $this->next_token(); + return empty( $this->element_queue ) ? false : $this->next_visitable_token(); } $is_pop = WP_HTML_Stack_Event::POP === $this->current_element->operation; @@ -661,7 +682,7 @@ public function next_token(): bool { * the breadcrumbs. */ if ( 'root-node' === $this->current_element->token->bookmark_name ) { - return $this->next_token(); + return $this->next_visitable_token(); } // Adjust the breadcrumbs for this event. @@ -673,7 +694,7 @@ public function next_token(): bool { // Avoid sending close events for elements which don't expect a closing. if ( $is_pop && ! $this->expects_closer( $this->current_element->token ) ) { - return $this->next_token(); + return $this->next_visitable_token(); } return true; diff --git a/tests/phpunit/data/html-api/token-counting-html-processor.php b/tests/phpunit/data/html-api/token-counting-html-processor.php new file mode 100644 index 0000000000000..b511acaf8bacb --- /dev/null +++ b/tests/phpunit/data/html-api/token-counting-html-processor.php @@ -0,0 +1,35 @@ + + */ + public $token_seen_count = array(); + + /** + * Gets next token. + * + * @return bool Whether next token was matched. + */ + public function next_token(): bool { + $result = parent::next_token(); + + if ( $this->get_token_type() === '#tag' ) { + $token_name = ( $this->is_tag_closer() ? '-' : '+' ) . $this->get_tag(); + } else { + $token_name = $this->get_token_name(); + } + + if ( ! isset( $this->token_seen_count[ $token_name ] ) ) { + $this->token_seen_count[ $token_name ] = 1; + } else { + ++$this->token_seen_count[ $token_name ]; + } + + return $result; + } + +} diff --git a/tests/phpunit/tests/html-api/wpHtmlProcessor.php b/tests/phpunit/tests/html-api/wpHtmlProcessor.php index 54832f27301d3..1ca60e691f03e 100644 --- a/tests/phpunit/tests/html-api/wpHtmlProcessor.php +++ b/tests/phpunit/tests/html-api/wpHtmlProcessor.php @@ -908,6 +908,141 @@ public function test_ensure_form_tag_closer_token_is_reachable() { $this->assertTrue( $processor->is_tag_closer() ); } + /** + * Data provider. + * + * @return array + */ + public function data_html_processor_with_extended_next_token() { + return array( + 'single_instance_per_tag' => array( + 'html' => ' + + + + Hello World + + +

Hello World!

+ +

Each tag should occur only once in this document. +

+ + + ', + 'expected_token_counts' => array( + '+HTML' => 1, + '+HEAD' => 1, + '#text' => 14, + '+META' => 1, + '+TITLE' => 1, + '-HEAD' => 1, + '+BODY' => 1, + '+H1' => 1, + '-H1' => 1, + '+IMG' => 1, + '+P' => 1, + '#comment' => 1, + '-P' => 1, + '+FOOTER' => 1, + '-FOOTER' => 1, + '-BODY' => 1, + '-HTML' => 1, + '' => 1, + ), + ), + + 'multiple_tag_instances' => array( + 'html' => ' + + +

Hello World!

+

First +

Second +

Third +

+ + + ', + 'expected_token_counts' => array( + '+HTML' => 1, + '+HEAD' => 1, + '-HEAD' => 1, + '+BODY' => 1, + '#text' => 13, + '+H1' => 1, + '-H1' => 1, + '+P' => 3, + '-P' => 3, + '+UL' => 1, + '+LI' => 3, + '-LI' => 3, + '-UL' => 1, + '-BODY' => 1, + '-HTML' => 1, + '' => 1, + ), + ), + + 'extreme_nested_formatting' => array( + 'html' => ' + + +

+ FORMAT +

+ + + ', + 'expected_token_counts' => array( + '+HTML' => 1, + '+HEAD' => 1, + '-HEAD' => 1, + '+BODY' => 1, + '#text' => 7, + '+P' => 1, + '+STRONG' => 1, + '+EM' => 1, + '+STRIKE' => 1, + '+I' => 1, + '+B' => 1, + '+U' => 1, + '-U' => 1, + '-B' => 1, + '-I' => 1, + '-STRIKE' => 1, + '-EM' => 1, + '-STRONG' => 1, + '-P' => 1, + '-BODY' => 1, + '-HTML' => 1, + '' => 1, + ), + ), + ); + } + + /** + * Ensures that subclasses to WP_HTML_Processor can do bookkeeping by extending the next_token() method. + * + * @ticket 62269 + * @dataProvider data_html_processor_with_extended_next_token + */ + public function test_ensure_next_token_method_extensibility( $html, $expected_token_counts ) { + require_once DIR_TESTDATA . '/html-api/token-counting-html-processor.php'; + + $processor = Token_Counting_HTML_Processor::create_full_parser( $html ); + while ( $processor->next_tag() ) { + continue; + } + + $this->assertEquals( $expected_token_counts, $processor->token_seen_count, 'Snapshot: ' . var_export( $processor->token_seen_count, true ) ); + } + /** * Ensure that lowercased tag_name query matches tags case-insensitively. *