From 912bb39ab75529a581a6786f24dec41a63365cb5 Mon Sep 17 00:00:00 2001 From: Alexandre Alapetite Date: Sun, 16 Jun 2024 01:33:34 +0200 Subject: [PATCH 01/28] Help PHPStan (#2) * Help PHPStan * PHP signature has changed --- composer.json | 2 +- src/Parser.php | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/composer.json b/composer.json index d5bd2a6bd..f70559bef 100644 --- a/composer.json +++ b/composer.json @@ -32,7 +32,7 @@ "require-dev": { "friendsofphp/php-cs-fixer": "^2.19 || ^3.8", "mf2/mf2": "^0.5.0", - "phpstan/phpstan": "^1.10", + "phpstan/phpstan": "^1.11", "phpunit/phpunit": "^8 || ^9 || ^10", "psr/http-client": "^1.0", "psr/http-factory": "^1.0", diff --git a/src/Parser.php b/src/Parser.php index 9c78492a3..b7c5a29d3 100644 --- a/src/Parser.php +++ b/src/Parser.php @@ -144,7 +144,7 @@ public function parse(string &$data, string $encoding, string $url = '') //Parse by chunks not to use too much memory do { $stream_data = fread($stream, 1048576); - if (!xml_parse($xml, $stream_data === false ? '' : $stream_data, feof($stream))) { + if (!xml_parse($xml, $stream_data == false ? '' : $stream_data, feof($stream))) { $this->error_code = xml_get_error_code($xml); $this->error_string = xml_error_string($this->error_code); $return = false; From 97cfaf779aeaa836228324f1b458747edec06f9b Mon Sep 17 00:00:00 2001 From: Alexandre Alapetite Date: Sun, 16 Jun 2024 01:38:48 +0200 Subject: [PATCH 02/28] Disable coding style (#3) Because it is not in sync with SimplePie code at all and make all tests fail --- .github/workflows/ci.yml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index f2394040d..0663c0a24 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -35,8 +35,8 @@ jobs: - name: "Install Composer dependencies" uses: "ramsey/composer-install@v2" - - name: "Check coding style" - run: composer cs + # - name: "Check coding style" + # run: composer cs test: runs-on: ubuntu-latest From b2585b7f35283d3fd813bec5eef9b6d982a5e3a0 Mon Sep 17 00:00:00 2001 From: Alexandre Alapetite Date: Sun, 16 Jun 2024 01:45:08 +0200 Subject: [PATCH 03/28] Fix SimplePie autodiscovery for text/xml HTML pages (#1) * Fix SimplePie autodiscovery for text/xml HTML pages https://github.com/FreshRSS/FreshRSS/pull/1265 https://github.com/FreshRSS/FreshRSS/issues/1264 * Move comment --- src/Content/Type/Sniffer.php | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/src/Content/Type/Sniffer.php b/src/Content/Type/Sniffer.php index d1da5fd71..0538b9b5f 100644 --- a/src/Content/Type/Sniffer.php +++ b/src/Content/Type/Sniffer.php @@ -78,9 +78,7 @@ public function get_type() if ($official === 'unknown/unknown' || $official === 'application/unknown') { return $this->unknown(); - } elseif (substr($official, -4) === '+xml' - || $official === 'text/xml' - || $official === 'application/xml') { + } elseif (substr($official, -4) === '+xml') { // FreshRSS return $official; } elseif (substr($official, 0, 6) === 'image/') { if ($return = $this->image()) { @@ -88,7 +86,10 @@ public function get_type() } return $official; - } elseif ($official === 'text/html') { + } elseif ($official === 'text/html' + || $official === 'text/xml' // FreshRSS + || $official === 'application/xml' // FreshRSS + ) { return $this->feed_or_html(); } From b5c61d82b4b15a6d26538e99bd588b96636db73d Mon Sep 17 00:00:00 2001 From: Alexandre Alapetite Date: Sun, 16 Jun 2024 01:51:14 +0200 Subject: [PATCH 04/28] Trim body (#4) https://github.com/FreshRSS/FreshRSS/pull/1143 https://github.com/FreshRSS/FreshRSS/issues/1142 --- src/SimplePie.php | 1 + 1 file changed, 1 insertion(+) diff --git a/src/SimplePie.php b/src/SimplePie.php index 115a07999..bd129ebb2 100644 --- a/src/SimplePie.php +++ b/src/SimplePie.php @@ -2081,6 +2081,7 @@ protected function fetch_data(&$cache) } $this->raw_data = $file->get_body_content(); + $this->raw_data = trim($this->raw_data); $this->permanent_url = $file->get_permanent_uri(); $headers = []; From f792768ec21f6163210b84d25a380adb1f394c77 Mon Sep 17 00:00:00 2001 From: Alexandre Alapetite Date: Sun, 16 Jun 2024 02:27:01 +0200 Subject: [PATCH 05/28] Simplify get_build (#6) That function has too much IO and is not up-to-date https://github.com/FreshRSS/FreshRSS/commit/02d1dac0bb07884b79ddea20980bfcf21131f2d7 --- src/Misc.php | 24 ++---------------------- 1 file changed, 2 insertions(+), 22 deletions(-) diff --git a/src/Misc.php b/src/Misc.php index d5e07586f..145e0d297 100644 --- a/src/Misc.php +++ b/src/Misc.php @@ -2117,28 +2117,8 @@ public static function get_build() return self::$SIMPLEPIE_BUILD; } - $root = dirname(__FILE__, 2); - if (file_exists($root . '/.git/index')) { - self::$SIMPLEPIE_BUILD = filemtime($root . '/.git/index'); - - return self::$SIMPLEPIE_BUILD; - } elseif (file_exists($root . '/SimplePie')) { - $time = 0; - foreach (glob($root . '/SimplePie/*.php') as $file) { - if (($mtime = filemtime($file)) > $time) { - $time = $mtime; - } - } - self::$SIMPLEPIE_BUILD = $time; - - return self::$SIMPLEPIE_BUILD; - } elseif (file_exists(dirname(__FILE__) . '/Core.php')) { - self::$SIMPLEPIE_BUILD = filemtime(dirname(__FILE__) . '/Core.php'); - - return self::$SIMPLEPIE_BUILD; - } - - self::$SIMPLEPIE_BUILD = filemtime(__FILE__); + $mtime = @filemtime(dirname(__FILE__) . '/SimplePie.php'); // FreshRSS + self::$SIMPLEPIE_BUILD = $mtime ?: (filemtime(__FILE__) ?: 0); // FreshRSS return self::$SIMPLEPIE_BUILD; } From d3ee906e2c7cf6ea172d9f7a8c5e96a900a704c8 Mon Sep 17 00:00:00 2001 From: Alexandre Alapetite Date: Sun, 16 Jun 2024 02:39:00 +0200 Subject: [PATCH 06/28] fix for Atom feeds using namespace for type (#7) https://github.com/FreshRSS/FreshRSS/pull/1893 https://github.com/FreshRSS/FreshRSS/issues/1892 --- src/Misc.php | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/src/Misc.php b/src/Misc.php index 145e0d297..298ace077 100644 --- a/src/Misc.php +++ b/src/Misc.php @@ -1876,8 +1876,14 @@ public static function atom_10_construct_type(array $attribs) */ public static function atom_10_content_construct_type(array $attribs) { + $type = ''; if (isset($attribs['']['type'])) { - $type = strtolower(trim($attribs['']['type'])); + $type = trim($attribs['']['type']); + } elseif (isset($attribs[\SimplePie\SimplePie::NAMESPACE_ATOM_10]['type'])) { // FreshRSS + $type = trim($attribs[\SimplePie\SimplePie::NAMESPACE_ATOM_10]['type']); + } + if ($type != '') { // FreshRSS + $type = strtolower($type); // FreshRSS switch ($type) { case 'text': return \SimplePie\SimplePie::CONSTRUCT_TEXT; From 2edfc227eec039872260f086a13284d63b45903f Mon Sep 17 00:00:00 2001 From: Alexandre Alapetite Date: Sun, 16 Jun 2024 09:21:32 +0200 Subject: [PATCH 07/28] Fix absolutize URL for several cases (#8) * Fix absolutize URL for several cases There were a number of bugs related to the fact that `Item::get_links()` and `Item::get_base()` call each-other, making a nice mess during initialisation. See tests. Furthermore, the standard Atom `self` link was not supported, wrongly falling back to `alternate`. In the same PR because otherwise the tests from both PRs would fail. * Minor style * Fix PHPStan * Improved comment --- src/Item.php | 56 ++++++++++++++++++++++++------------ src/SimplePie.php | 13 ++++++--- tests/Unit/EnclosureTest.php | 32 +++++++++++++++++++++ tests/Unit/ItemTest.php | 44 ++++++++++++++++++++++++++++ 4 files changed, 122 insertions(+), 23 deletions(-) diff --git a/src/Item.php b/src/Item.php index 5d75a1f7f..1e7a29f0b 100644 --- a/src/Item.php +++ b/src/Item.php @@ -117,9 +117,27 @@ public function get_item_tags(string $namespace, string $tag) return null; } + /** + * Get base URL of the item itself. + * Returns `` or feed base URL. + * Similar to `Item::get_base()` but can safely be used during initialisation methods + * such as `Item::get_links()` (`Item::get_base()` and `Item::get_links()` call each-other) + * and is not affected by enclosures. + * + * @param array $element + * @see get_base + */ + protected function get_own_base(array $element = []): string + { + if (!empty($element['xml_base_explicit']) && isset($element['xml_base'])) { + return $element['xml_base']; + } + return $this->feed->get_base(); + } + /** * Get the base URL value. - * Uses ``, or item link, or feed base URL. + * Uses ``, or item link, or enclosure link, or feed base URL. * * @param array $element * @return string @@ -812,27 +830,27 @@ public function get_links(string $rel = 'alternate') foreach ((array) $this->get_item_tags(\SimplePie\SimplePie::NAMESPACE_ATOM_10, 'link') as $link) { if (isset($link['attribs']['']['href'])) { $link_rel = (isset($link['attribs']['']['rel'])) ? $link['attribs']['']['rel'] : 'alternate'; - $this->data['links'][$link_rel][] = $this->sanitize($link['attribs']['']['href'], \SimplePie\SimplePie::CONSTRUCT_IRI, $this->get_base($link)); + $this->data['links'][$link_rel][] = $this->sanitize($link['attribs']['']['href'], \SimplePie\SimplePie::CONSTRUCT_IRI, $this->get_own_base($link)); } } foreach ((array) $this->get_item_tags(\SimplePie\SimplePie::NAMESPACE_ATOM_03, 'link') as $link) { if (isset($link['attribs']['']['href'])) { $link_rel = (isset($link['attribs']['']['rel'])) ? $link['attribs']['']['rel'] : 'alternate'; - $this->data['links'][$link_rel][] = $this->sanitize($link['attribs']['']['href'], \SimplePie\SimplePie::CONSTRUCT_IRI, $this->get_base($link)); + $this->data['links'][$link_rel][] = $this->sanitize($link['attribs']['']['href'], \SimplePie\SimplePie::CONSTRUCT_IRI, $this->get_own_base($link)); } } if ($links = $this->get_item_tags(\SimplePie\SimplePie::NAMESPACE_RSS_10, 'link')) { - $this->data['links']['alternate'][] = $this->sanitize($links[0]['data'], \SimplePie\SimplePie::CONSTRUCT_IRI, $this->get_base($links[0])); + $this->data['links']['alternate'][] = $this->sanitize($links[0]['data'], \SimplePie\SimplePie::CONSTRUCT_IRI, $this->get_own_base($links[0])); } if ($links = $this->get_item_tags(\SimplePie\SimplePie::NAMESPACE_RSS_090, 'link')) { - $this->data['links']['alternate'][] = $this->sanitize($links[0]['data'], \SimplePie\SimplePie::CONSTRUCT_IRI, $this->get_base($links[0])); + $this->data['links']['alternate'][] = $this->sanitize($links[0]['data'], \SimplePie\SimplePie::CONSTRUCT_IRI, $this->get_own_base($links[0])); } if ($links = $this->get_item_tags(\SimplePie\SimplePie::NAMESPACE_RSS_20, 'link')) { - $this->data['links']['alternate'][] = $this->sanitize($links[0]['data'], \SimplePie\SimplePie::CONSTRUCT_IRI, $this->get_base($links[0])); + $this->data['links']['alternate'][] = $this->sanitize($links[0]['data'], \SimplePie\SimplePie::CONSTRUCT_IRI, $this->get_own_base($links[0])); } if ($links = $this->get_item_tags(\SimplePie\SimplePie::NAMESPACE_RSS_20, 'guid')) { if (!isset($links[0]['attribs']['']['isPermaLink']) || strtolower(trim($links[0]['attribs']['']['isPermaLink'])) === 'true') { - $this->data['links']['alternate'][] = $this->sanitize($links[0]['data'], \SimplePie\SimplePie::CONSTRUCT_IRI, $this->get_base($links[0])); + $this->data['links']['alternate'][] = $this->sanitize($links[0]['data'], \SimplePie\SimplePie::CONSTRUCT_IRI, $this->get_own_base($links[0])); } } @@ -1199,11 +1217,11 @@ public function get_enclosures() // PLAYER if ($player_parent = $this->get_item_tags(\SimplePie\SimplePie::NAMESPACE_MEDIARSS, 'player')) { if (isset($player_parent[0]['attribs']['']['url'])) { - $player_parent = $this->sanitize($player_parent[0]['attribs']['']['url'], \SimplePie\SimplePie::CONSTRUCT_IRI); + $player_parent = $this->sanitize($player_parent[0]['attribs']['']['url'], \SimplePie\SimplePie::CONSTRUCT_IRI, $this->get_base($player_parent[0])); } } elseif ($player_parent = $parent->get_channel_tags(\SimplePie\SimplePie::NAMESPACE_MEDIARSS, 'player')) { if (isset($player_parent[0]['attribs']['']['url'])) { - $player_parent = $this->sanitize($player_parent[0]['attribs']['']['url'], \SimplePie\SimplePie::CONSTRUCT_IRI); + $player_parent = $this->sanitize($player_parent[0]['attribs']['']['url'], \SimplePie\SimplePie::CONSTRUCT_IRI, $this->get_base($player_parent[0])); } } @@ -1323,13 +1341,13 @@ public function get_enclosures() if ($thumbnails = $this->get_item_tags(\SimplePie\SimplePie::NAMESPACE_MEDIARSS, 'thumbnail')) { foreach ($thumbnails as $thumbnail) { if (isset($thumbnail['attribs']['']['url'])) { - $thumbnails_parent[] = $this->sanitize($thumbnail['attribs']['']['url'], \SimplePie\SimplePie::CONSTRUCT_IRI); + $thumbnails_parent[] = $this->sanitize($thumbnail['attribs']['']['url'], \SimplePie\SimplePie::CONSTRUCT_IRI, $this->get_base($thumbnail)); } } } elseif ($thumbnails = $parent->get_channel_tags(\SimplePie\SimplePie::NAMESPACE_MEDIARSS, 'thumbnail')) { foreach ($thumbnails as $thumbnail) { if (isset($thumbnail['attribs']['']['url'])) { - $thumbnails_parent[] = $this->sanitize($thumbnail['attribs']['']['url'], \SimplePie\SimplePie::CONSTRUCT_IRI); + $thumbnails_parent[] = $this->sanitize($thumbnail['attribs']['']['url'], \SimplePie\SimplePie::CONSTRUCT_IRI, $this->get_base($thumbnail)); } } } @@ -1453,7 +1471,7 @@ public function get_enclosures() if (isset($content['attribs']['']['width'])) { $width = $this->sanitize($content['attribs']['']['width'], \SimplePie\SimplePie::CONSTRUCT_TEXT); } - $url = $this->sanitize($content['attribs']['']['url'], \SimplePie\SimplePie::CONSTRUCT_IRI); + $url = $this->sanitize($content['attribs']['']['url'], \SimplePie\SimplePie::CONSTRUCT_IRI, $this->get_base($content)); // Checking the other optional media: elements. Priority: media:content, media:group, item, channel @@ -1712,9 +1730,9 @@ public function get_enclosures() // PLAYER if (isset($content['child'][\SimplePie\SimplePie::NAMESPACE_MEDIARSS]['player'])) { - $player = $this->sanitize($content['child'][\SimplePie\SimplePie::NAMESPACE_MEDIARSS]['player'][0]['attribs']['']['url'], \SimplePie\SimplePie::CONSTRUCT_IRI); + $player = $this->sanitize($content['child'][\SimplePie\SimplePie::NAMESPACE_MEDIARSS]['player'][0]['attribs']['']['url'], \SimplePie\SimplePie::CONSTRUCT_IRI, $this->get_base($content['child'][\SimplePie\SimplePie::NAMESPACE_MEDIARSS]['player'])); } elseif (isset($group['child'][\SimplePie\SimplePie::NAMESPACE_MEDIARSS]['player'])) { - $player = $this->sanitize($group['child'][\SimplePie\SimplePie::NAMESPACE_MEDIARSS]['player'][0]['attribs']['']['url'], \SimplePie\SimplePie::CONSTRUCT_IRI); + $player = $this->sanitize($group['child'][\SimplePie\SimplePie::NAMESPACE_MEDIARSS]['player'][0]['attribs']['']['url'], \SimplePie\SimplePie::CONSTRUCT_IRI, $this->get_base($group['child'][\SimplePie\SimplePie::NAMESPACE_MEDIARSS]['player'])); } else { $player = $player_parent; } @@ -1804,14 +1822,14 @@ public function get_enclosures() // THUMBNAILS if (isset($content['child'][\SimplePie\SimplePie::NAMESPACE_MEDIARSS]['thumbnail'])) { foreach ($content['child'][\SimplePie\SimplePie::NAMESPACE_MEDIARSS]['thumbnail'] as $thumbnail) { - $thumbnails[] = $this->sanitize($thumbnail['attribs']['']['url'], \SimplePie\SimplePie::CONSTRUCT_IRI); + $thumbnails[] = $this->sanitize($thumbnail['attribs']['']['url'], \SimplePie\SimplePie::CONSTRUCT_IRI, $this->get_base($thumbnail)); } if (is_array($thumbnails)) { $thumbnails = array_values(array_unique($thumbnails)); } } elseif (isset($group['child'][\SimplePie\SimplePie::NAMESPACE_MEDIARSS]['thumbnail'])) { foreach ($group['child'][\SimplePie\SimplePie::NAMESPACE_MEDIARSS]['thumbnail'] as $thumbnail) { - $thumbnails[] = $this->sanitize($thumbnail['attribs']['']['url'], \SimplePie\SimplePie::CONSTRUCT_IRI); + $thumbnails[] = $this->sanitize($thumbnail['attribs']['']['url'], \SimplePie\SimplePie::CONSTRUCT_IRI, $this->get_base($thumbnail)); } if (is_array($thumbnails)) { $thumbnails = array_values(array_unique($thumbnails)); @@ -1909,7 +1927,7 @@ public function get_enclosures() $width = $this->sanitize($content['attribs']['']['width'], \SimplePie\SimplePie::CONSTRUCT_TEXT); } if (isset($content['attribs']['']['url'])) { - $url = $this->sanitize($content['attribs']['']['url'], \SimplePie\SimplePie::CONSTRUCT_IRI); + $url = $this->sanitize($content['attribs']['']['url'], \SimplePie\SimplePie::CONSTRUCT_IRI, $this->get_base($content)); } // Checking the other optional media: elements. Priority: media:content, media:group, item, channel @@ -2064,7 +2082,7 @@ public function get_enclosures() // PLAYER if (isset($content['child'][\SimplePie\SimplePie::NAMESPACE_MEDIARSS]['player'])) { if (isset($content['child'][\SimplePie\SimplePie::NAMESPACE_MEDIARSS]['player'][0]['attribs']['']['url'])) { - $player = $this->sanitize($content['child'][\SimplePie\SimplePie::NAMESPACE_MEDIARSS]['player'][0]['attribs']['']['url'], \SimplePie\SimplePie::CONSTRUCT_IRI); + $player = $this->sanitize($content['child'][\SimplePie\SimplePie::NAMESPACE_MEDIARSS]['player'][0]['attribs']['']['url'], \SimplePie\SimplePie::CONSTRUCT_IRI, $this->get_base($content['child'][\SimplePie\SimplePie::NAMESPACE_MEDIARSS]['player'][0])); } } else { $player = $player_parent; @@ -2120,7 +2138,7 @@ public function get_enclosures() if (isset($content['child'][\SimplePie\SimplePie::NAMESPACE_MEDIARSS]['thumbnail'])) { foreach ($content['child'][\SimplePie\SimplePie::NAMESPACE_MEDIARSS]['thumbnail'] as $thumbnail) { if (isset($thumbnail['attribs']['']['url'])) { - $thumbnails[] = $this->sanitize($thumbnail['attribs']['']['url'], \SimplePie\SimplePie::CONSTRUCT_IRI); + $thumbnails[] = $this->sanitize($thumbnail['attribs']['']['url'], \SimplePie\SimplePie::CONSTRUCT_IRI, $this->get_base($thumbnail)); } } if (is_array($thumbnails)) { diff --git a/src/SimplePie.php b/src/SimplePie.php index bd129ebb2..7e9d37b3d 100644 --- a/src/SimplePie.php +++ b/src/SimplePie.php @@ -2457,8 +2457,9 @@ public function get_image_tags(string $namespace, string $tag) /** * Get the base URL value from the feed * - * Uses `` if available, otherwise uses the first link in the - * feed, or failing that, the URL of the feed itself. + * Uses `` if available, + * otherwise uses the first 'self' link or the first 'alternate' link of the feed, + * or failing that, the URL of the feed itself. * * @see get_link * @see subscribe_url @@ -2470,8 +2471,12 @@ public function get_base(array $element = []) { if (!empty($element['xml_base_explicit']) && isset($element['xml_base'])) { return $element['xml_base']; - } elseif ($this->get_link() !== null) { - return $this->get_link(); + } + if (($link = $this->get_link(0, 'self')) !== null) { + return $link; + } + if (($link = $this->get_link(0, 'alternate')) !== null) { + return $link; } return $this->subscribe_url() ?? ''; diff --git a/tests/Unit/EnclosureTest.php b/tests/Unit/EnclosureTest.php index 25fa9df64..bfd8ea463 100644 --- a/tests/Unit/EnclosureTest.php +++ b/tests/Unit/EnclosureTest.php @@ -88,6 +88,38 @@ public static function getLinkProvider(): iterable , 'http://example.net/link?a=%22b%22&c=%3Cd%3E', ]; + + yield 'Test RSS 2.0 with channel link and enclosure' => [ + << + + http://example.net/tests/ + + /tests/3/ + + + + +XML + , + 'http://example.net/images/3.jpg', + ]; + + yield 'Test RSS 2.0 with Atom channel link and enclosure' => [ + << + + + + /tests/4/ + + + + +XML + , + 'http://example.net/images/4.jpg', + ]; } /** diff --git a/tests/Unit/ItemTest.php b/tests/Unit/ItemTest.php index 458172dfe..a46abfd9d 100644 --- a/tests/Unit/ItemTest.php +++ b/tests/Unit/ItemTest.php @@ -3262,6 +3262,50 @@ public static function getPermalinkDataProvider(): array , 'http://example.com/', ], + 'Test RSS 2.0 with channel link and enclosure from another domain' => [ +<< + + http://example.net/tests/ + + /tests/1/ + + + + +XML + , + 'http://example.net/tests/1/', + ], + 'Test RSS 2.0 with Atom channel link and relative enclosure' => [ +<< + + + + /tests/2/ + + + + +XML + , + 'http://example.net/tests/2/', + ], + 'Test RSS 2.0 with xml:base and enclosure from another domain' => [ +<< + + + /tests/3/ + + + + +XML + , + 'http://example.net/tests/3/', + ], 'Test Atom 1.0 xmlbase 1' => [ << From 91917a1634e68341927882937ac425ada5729f09 Mon Sep 17 00:00:00 2001 From: Alexandre Alapetite Date: Tue, 18 Jun 2024 23:06:56 +0200 Subject: [PATCH 08/28] PHPDoc avoid aliases for scalar types (#10) https://php.net/language.types.declarations Also helps PHPStan, which otherwise makes another lookup as it might be in the SimplePie namespace, and which fails in some cases. --- src/HTTP/Parser.php | 2 +- src/Item.php | 6 +++--- src/Misc.php | 2 +- 3 files changed, 5 insertions(+), 5 deletions(-) diff --git a/src/HTTP/Parser.php b/src/HTTP/Parser.php index 35889365e..60a19ca1f 100644 --- a/src/HTTP/Parser.php +++ b/src/HTTP/Parser.php @@ -475,7 +475,7 @@ protected function chunked() * Prepare headers (take care of proxies headers) * * @param string $headers Raw headers - * @param integer $count Redirection count. Default to 1. + * @param int $count Redirection count. Default to 1. * * @return string */ diff --git a/src/Item.php b/src/Item.php index 1e7a29f0b..2187d308b 100644 --- a/src/Item.php +++ b/src/Item.php @@ -193,7 +193,7 @@ public function get_feed() * MD5 hash based on the permalink, title and content. * * @since Beta 2 - * @param boolean $hash Should we force using a hash instead of the supplied ID? + * @param bool $hash Should we force using a hash instead of the supplied ID? * @param string|false $fn User-supplied function to generate an hash * @return string|null */ @@ -270,7 +270,7 @@ public function get_title() * `` * * @since 0.8 - * @param boolean $description_only Should we avoid falling back to the content? + * @param bool $description_only Should we avoid falling back to the content? * @return string|null */ public function get_description(bool $description_only = false) @@ -320,7 +320,7 @@ public function get_description(bool $description_only = false) * Uses `` or `` (RSS 1.0 Content Module) * * @since 1.0 - * @param boolean $content_only Should we avoid falling back to the description? + * @param bool $content_only Should we avoid falling back to the description? * @return string|null */ public function get_content(bool $content_only = false) diff --git a/src/Misc.php b/src/Misc.php index 298ace077..950e3f9bb 100644 --- a/src/Misc.php +++ b/src/Misc.php @@ -283,7 +283,7 @@ public static function windows_1252_to_utf8(string $string) * @param string $data Raw data in $input encoding * @param string $input Encoding of $data * @param string $output Encoding you want - * @return string|boolean False if we can't convert it + * @return string|bool False if we can't convert it */ public static function change_encoding(string $data, string $input, string $output) { From 9d899294a62767b49b6cd6bcb0d13e5f270a0aab Mon Sep 17 00:00:00 2001 From: Alexandre Alapetite Date: Wed, 19 Jun 2024 00:07:36 +0200 Subject: [PATCH 09/28] Readme for the fork --- .github/readme.md | 17 +++++++++++++++++ 1 file changed, 17 insertions(+) create mode 100644 .github/readme.md diff --git a/.github/readme.md b/.github/readme.md new file mode 100644 index 000000000..4bcfa4cb4 --- /dev/null +++ b/.github/readme.md @@ -0,0 +1,17 @@ +# SimplePie + +> SimplePie is a very fast and easy-to-use class, written in PHP, that puts the +> *simple* back into *Really Simple Syndication* (RSS). Flexible enough to suit +> beginners and veterans alike, SimplePie is focused on [speed, ease of use, +> compatibility and standards compliance](http://simplepie.org/wiki/faq/what_is_simplepie). + +[Read the rest of the original readme](../README.markdown). + +## FreshRSS fork + +[This repository `FreshRSS/simplepie`](https://github.com/FreshRSS/simplepie) is a fork of the [main repository `simplepie/simplepie`](https://github.com/simplepie/simplepie) +intended to be used for [FreshRSS](https://github.com/FreshRSS/FreshRSS), which is a free, self-hostable news aggregator. + +The branch [`freshrss`](https://github.com/FreshRSS/simplepie/tree/freshrss) is the one used in [the FreshRSS codebase](https://github.com/FreshRSS/FreshRSS/tree/edge/lib/simplepie). + +This allows in particular to use fixes and patches, which have not (yet) been submitted upstream or not (yet) merged upstream. From a676d407a524dcf48dc072a0c09f683fcda93f11 Mon Sep 17 00:00:00 2001 From: Alexandre Alapetite Date: Wed, 19 Jun 2024 00:11:33 +0200 Subject: [PATCH 10/28] Readme casing --- .github/{readme.md => README.md} | 0 1 file changed, 0 insertions(+), 0 deletions(-) rename .github/{readme.md => README.md} (100%) diff --git a/.github/readme.md b/.github/README.md similarity index 100% rename from .github/readme.md rename to .github/README.md From a2f4d55d3b10f30779084c541db2e15fd2f99671 Mon Sep 17 00:00:00 2001 From: Alexandre Alapetite Date: Sun, 8 Sep 2024 19:24:29 +0200 Subject: [PATCH 11/28] Fix CI (#13) Due to SimplePie not pinning `composer.lock`, CI tests are randomly failing with updates of test tools ``` ------ ------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------ Line src/IRI.php ------ ------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------ :304 Offset 3 on array{0: string, 1: '', scheme: null, 2: string, 3: string, authority: string, 4: string, path: string, ...}|array{0: string, 1: non-empty-string, scheme: string, 2: string, 3: string, authority: string, 4: string, path: string, ...} in isset() always exists and is not nullable. :307 Offset 5 on array{0: string, 1: '', scheme: null, 2: string, 3: string, authority: string|null, 4: string, path: string, ...}|array{0: string, 1: non-empty-string, scheme: string, 2: string, 3: string, authority: string|null, 4: string, path: string, ...} in isset() always exists and is not nullable. ------ ------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------ ``` --- composer.json | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/composer.json b/composer.json index f70559bef..9c594d343 100644 --- a/composer.json +++ b/composer.json @@ -32,7 +32,7 @@ "require-dev": { "friendsofphp/php-cs-fixer": "^2.19 || ^3.8", "mf2/mf2": "^0.5.0", - "phpstan/phpstan": "^1.11", + "phpstan/phpstan": "~1.11.11", "phpunit/phpunit": "^8 || ^9 || ^10", "psr/http-client": "^1.0", "psr/http-factory": "^1.0", From a169376d06b7504d6a7109f9049dfd6a84ffef62 Mon Sep 17 00:00:00 2001 From: Alexandre Alapetite Date: Sun, 8 Sep 2024 19:29:46 +0200 Subject: [PATCH 12/28] Hash-based caching (#11) * Hash-based caching https://github.com/simplepie/simplepie/pull/401 https://github.com/FreshRSS/FreshRSS/commit/9aab83af115de3b210ea41caae49b70a0f82492a https://github.com/FreshRSS/FreshRSS/commit/00127f07c5fc784130d658e3f26519b0279fc6b8 * Backport fix https://github.com/FreshRSS/FreshRSS/pull/6723 * A few fixes * Reduce changes * Reduce changes * Relax some tests * Fix comment * Fix a few tests * PHP 7.2 compatibility * Behaviour fixes * Simplification * Comment * Fix tests * Remove debug logs * Minor comment * Fix type mess with $this->data['headers'] --- src/Cache/BaseDataCache.php | 9 ++-- src/SimplePie.php | 59 ++++++++++++++++++++++++-- tests/Integration/CachingTest.php | 3 +- tests/Unit/Cache/BaseDataCacheTest.php | 21 ++++----- 4 files changed, 73 insertions(+), 19 deletions(-) diff --git a/src/Cache/BaseDataCache.php b/src/Cache/BaseDataCache.php index 60207b075..bb1b8c5fa 100644 --- a/src/Cache/BaseDataCache.php +++ b/src/Cache/BaseDataCache.php @@ -55,10 +55,11 @@ public function get_data(string $key, $default = null) return $default; } - // ignore data if internal cache expiration time is expired - if ($data['__cache_expiration_time'] < time()) { - return $default; - } + // FreshRSS commented out, to allow HTTP 304 + // // ignore data if internal cache expiration time is expired + // if ($data['__cache_expiration_time'] < time()) { + // return $default; + // } // remove internal cache expiration time unset($data['__cache_expiration_time']); diff --git a/src/SimplePie.php b/src/SimplePie.php index 9cbba0087..72bde2b38 100644 --- a/src/SimplePie.php +++ b/src/SimplePie.php @@ -1634,6 +1634,40 @@ public function enable_exceptions(bool $enable = true) $this->enable_exceptions = $enable; } + /** + * Computes a hash of the raw feed content, + * after having cleaned it from noisy elements such as statistics or comments. + * FreshRSS + * @return string $rss A hash of the cleaned content, or empty string in case of error. + */ + function clean_hash(string $rss): string + { + if ($rss === '') { + return ''; + } + //Process by chunks not to use too much memory + if (($stream = fopen('php://temp', 'r+')) + && fwrite($stream, $rss) + && rewind($stream) + ) { + $ctx = hash_init('sha1'); + while ($stream_data = fread($stream, 1048576)) { + hash_update( + $ctx, preg_replace( + [ + '#<(lastBuildDate|pubDate|updated|feedDate|dc:date|slash:comments)>[^<]+#', + '#<(media:starRating|media:statistics) [^/<>]+/>#', + '##s', + ], '', $stream_data + ) + ); + } + fclose($stream); + return hash_final($ctx); + } + return ''; + } + /** * Initialize the feed object * @@ -1641,7 +1675,7 @@ public function enable_exceptions(bool $enable = true) * configuration options get processed, feeds are fetched, cached, and * parsed, and all of that other good stuff. * - * @return bool True if successful, false otherwise + * @return bool|int positive integer with modification time if using cache, boolean true if otherwise successful, false otherwise // FreshRSS */ public function init() { @@ -1729,7 +1763,7 @@ public function init() // Fetch the data into $this->raw_data if (($fetched = $this->fetch_data($cache)) === true) { - return true; + return $this->data['mtime'] ?? true; // FreshRSS } elseif ($fetched === false) { return false; } @@ -1803,6 +1837,8 @@ public function init() $this->data['headers'] = $headers; } $this->data['build'] = Misc::get_build(); + $this->data['hash'] = $this->data['hash'] ?? $this->clean_hash($this->raw_data); // FreshRSS + $this->data['mtime'] = time(); // FreshRSS // Cache the file if caching is enabled $this->data['cache_expiration_time'] = $this->cache_duration + time(); @@ -1850,6 +1886,7 @@ public function init() * * @param Base|DataCache|false $cache Cache handler, or false to not load from the cache * @return array{array, string}|array{}|bool Returns true if the data was loaded from the cache, or an array of HTTP headers and sniffed type + * @phpstan-impure */ protected function fetch_data(&$cache) { @@ -1901,7 +1938,8 @@ protected function fetch_data(&$cache) $this->data = []; } // Check if the cache has been updated - elseif (isset($this->data['cache_expiration_time']) && $this->data['cache_expiration_time'] > time()) { + // elseif (isset($this->data['cache_expiration_time']) && $this->data['cache_expiration_time'] > time()) { // FreshRSS + elseif (empty($this->data['mtime']) || $this->data['mtime'] + $this->cache_duration <= time()) { // Want to know if we tried to send last-modified and/or etag headers // when requesting this file. (Note that it's up to the file to // support this, but we don't always send the headers either.) @@ -1942,6 +1980,18 @@ protected function fetch_data(&$cache) return true; } } + if (isset($file)) { // FreshRSS + $hash = $this->clean_hash($file->get_body_content()); + if (($this->data['hash'] ?? null) === $hash) { + $this->data['headers'] = array_map(function (array $values): string { + return implode(',', $values); + }, $file->get_headers()); + $cache->set_data($cacheKey, $this->data, $this->cache_duration); + return true; // Content unchanged even though server did not send a 304 + } else { + $this->data['hash'] = $hash; + } + } } // If the cache is still valid, just return true else { @@ -2069,6 +2119,8 @@ protected function fetch_data(&$cache) 'feed_url' => $file->get_final_requested_uri(), 'build' => Misc::get_build(), 'cache_expiration_time' => $this->cache_duration + time(), + 'hash' => empty($hash) ? $this->clean_hash($file->get_body_content()) : $hash, // FreshRSS + 'mtime' => time(), // FreshRSS ]; if (!$cache->set_data($cacheKey, $this->data, $this->cache_duration)) { @@ -2081,7 +2133,6 @@ protected function fetch_data(&$cache) } $this->raw_data = $file->get_body_content(); - $this->raw_data = trim($this->raw_data); $this->permanent_url = $file->get_permanent_uri(); $headers = []; diff --git a/tests/Integration/CachingTest.php b/tests/Integration/CachingTest.php index 350410061..1caa0c6b8 100644 --- a/tests/Integration/CachingTest.php +++ b/tests/Integration/CachingTest.php @@ -112,7 +112,8 @@ public function testInitWithDifferentCacheStateCallsCacheCorrectly( $expectedDataWritten['cache_expiration_time'] = $writtenData['cache_expiration_time']; } - $this->assertSame($expectedDataWritten, $writtenData); + // Test that $expectedDataWritten is a subset of $writtenData + $this->assertEmpty(array_udiff_assoc($expectedDataWritten, $writtenData, function ($a, $b) { return json_encode($a) <=> json_encode($b); })); // FreshRSS } /** diff --git a/tests/Unit/Cache/BaseDataCacheTest.php b/tests/Unit/Cache/BaseDataCacheTest.php index e38c92f7d..718a1b5f2 100644 --- a/tests/Unit/Cache/BaseDataCacheTest.php +++ b/tests/Unit/Cache/BaseDataCacheTest.php @@ -69,19 +69,20 @@ public function testGetDataWithCacheMissReturnsDefault(): void $this->assertSame($default, $cache->get_data($key, $default)); } - public function testGetDataWithCacheExpiredReturnsDefault(): void - { - $key = 'name'; - $cachedValue = ['__cache_expiration_time' => 0]; - $default = new stdClass(); + // FreshRSS commented out, to allow HTTP 304 + // public function testGetDataWithCacheExpiredReturnsDefault(): void + // { + // $key = 'name'; + // $cachedValue = ['__cache_expiration_time' => 0]; + // $default = new stdClass(); - $baseCache = $this->createMock(Base::class); - $baseCache->expects($this->once())->method('load')->willReturn($cachedValue); + // $baseCache = $this->createMock(Base::class); + // $baseCache->expects($this->once())->method('load')->willReturn($cachedValue); - $cache = new BaseDataCache($baseCache); + // $cache = new BaseDataCache($baseCache); - $this->assertSame($default, $cache->get_data($key, $default)); - } + // $this->assertSame($default, $cache->get_data($key, $default)); + // } public function testGetDataWithCacheCorruptionReturnsDefault(): void { From ce4cd9f2f98222083f143b17aa160d04288f7e18 Mon Sep 17 00:00:00 2001 From: Alexandre Alapetite Date: Sun, 8 Sep 2024 19:48:39 +0200 Subject: [PATCH 13/28] Fix CI for PHPStan 1.12 CI was broken due to not pinning `composer.lock` AND allowing updates at `1.x.x` level, which may contain breaking changes, e.g. for PHPStan. ``` ------ ------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------ Line src/IRI.php ------ ------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------ :304 Offset 3 on array{0: string, 1: '', scheme: null, 2: string, 3: string, authority: string, 4: string, path: string, ...}|array{0: string, 1: non-empty-string, scheme: string, 2: string, 3: string, authority: string, 4: string, path: string, ...} in isset() always exists and is not nullable. :307 Offset 5 on array{0: string, 1: '', scheme: null, 2: string, 3: string, authority: string|null, 4: string, path: string, ...}|array{0: string, 1: non-empty-string, scheme: string, 2: string, 3: string, authority: string|null, 4: string, path: string, ...} in isset() always exists and is not nullable. ------ ------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------ ``` --- composer.json | 2 +- src/IRI.php | 6 ++---- 2 files changed, 3 insertions(+), 5 deletions(-) diff --git a/composer.json b/composer.json index f70559bef..cc24b51af 100644 --- a/composer.json +++ b/composer.json @@ -32,7 +32,7 @@ "require-dev": { "friendsofphp/php-cs-fixer": "^2.19 || ^3.8", "mf2/mf2": "^0.5.0", - "phpstan/phpstan": "^1.11", + "phpstan/phpstan": "~1.12.2", "phpunit/phpunit": "^8 || ^9 || ^10", "psr/http-client": "^1.0", "psr/http-factory": "^1.0", diff --git a/src/IRI.php b/src/IRI.php index fd906c1ab..4b35b6e94 100644 --- a/src/IRI.php +++ b/src/IRI.php @@ -301,12 +301,10 @@ protected function parse_iri(string $iri) if ($match[1] === '') { $match['scheme'] = null; } - if (!isset($match[3]) || $match[3] === '') { + if ($match[3] === '') { $match['authority'] = null; } - if (!isset($match[5])) { - $match['path'] = ''; - } + $match['path'] = ''; if (!isset($match[6]) || $match[6] === '') { $match['query'] = null; } From f3dcd45f23a5d4df57e03c7e804eec3b845df35a Mon Sep 17 00:00:00 2001 From: Alexandre Alapetite Date: Sun, 8 Sep 2024 20:19:01 +0200 Subject: [PATCH 14/28] Fix regression --- src/IRI.php | 1 - 1 file changed, 1 deletion(-) diff --git a/src/IRI.php b/src/IRI.php index 4b35b6e94..60f8fbac2 100644 --- a/src/IRI.php +++ b/src/IRI.php @@ -304,7 +304,6 @@ protected function parse_iri(string $iri) if ($match[3] === '') { $match['authority'] = null; } - $match['path'] = ''; if (!isset($match[6]) || $match[6] === '') { $match['query'] = null; } From 91055d230c3db6e436ada1954c7c1d2e88729fca Mon Sep 17 00:00:00 2001 From: Alexandre Alapetite Date: Sun, 8 Sep 2024 21:23:08 +0200 Subject: [PATCH 15/28] Add phpcs.xml (#15) https://github.com/simplepie/simplepie/pull/879 --- .editorconfig | 3 +++ phpcs.xml | 19 +++++++++++++++++++ 2 files changed, 22 insertions(+) create mode 100644 phpcs.xml diff --git a/.editorconfig b/.editorconfig index a760cdf84..7694fb7f7 100644 --- a/.editorconfig +++ b/.editorconfig @@ -26,3 +26,6 @@ indent_style = space [*.{yaml,yml}] indent_size = 2 indent_style = space + +[*.xml] +indent_style = tab diff --git a/phpcs.xml b/phpcs.xml new file mode 100644 index 000000000..f77624289 --- /dev/null +++ b/phpcs.xml @@ -0,0 +1,19 @@ + + + + ./.git/ + ./compatibility_test/ + ./vendor/ + + + + + + + + + + + + + From 0861907a861b493503e81f4c5e0a994021507e23 Mon Sep 17 00:00:00 2001 From: Alexandre Alapetite Date: Mon, 9 Sep 2024 11:44:37 +0200 Subject: [PATCH 16/28] Readme link to differences (#16) --- .github/README.md | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/.github/README.md b/.github/README.md index 4bcfa4cb4..6dfdc260f 100644 --- a/.github/README.md +++ b/.github/README.md @@ -12,6 +12,10 @@ [This repository `FreshRSS/simplepie`](https://github.com/FreshRSS/simplepie) is a fork of the [main repository `simplepie/simplepie`](https://github.com/simplepie/simplepie) intended to be used for [FreshRSS](https://github.com/FreshRSS/FreshRSS), which is a free, self-hostable news aggregator. -The branch [`freshrss`](https://github.com/FreshRSS/simplepie/tree/freshrss) is the one used in [the FreshRSS codebase](https://github.com/FreshRSS/FreshRSS/tree/edge/lib/simplepie). +The default branch [`freshrss`](https://github.com/FreshRSS/simplepie/tree/freshrss) is the one used in [the FreshRSS codebase](https://github.com/FreshRSS/FreshRSS/tree/edge/lib/simplepie). This allows in particular to use fixes and patches, which have not (yet) been submitted upstream or not (yet) merged upstream. + +### Differences + +[See the differences](https://github.com/simplepie/simplepie/compare/master...FreshRSS:simplepie:freshrss). From adebff47a058125173996aea5e3befdf89270217 Mon Sep 17 00:00:00 2001 From: Alexandre Alapetite Date: Mon, 9 Sep 2024 12:51:01 +0200 Subject: [PATCH 17/28] clean_hash visibility (#17) --- src/SimplePie.php | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/SimplePie.php b/src/SimplePie.php index 72bde2b38..445344c7d 100644 --- a/src/SimplePie.php +++ b/src/SimplePie.php @@ -1640,7 +1640,7 @@ public function enable_exceptions(bool $enable = true) * FreshRSS * @return string $rss A hash of the cleaned content, or empty string in case of error. */ - function clean_hash(string $rss): string + private function clean_hash(string $rss): string { if ($rss === '') { return ''; From 6bfc34890b2ec00dd7e07a3b5361ed5fade43da5 Mon Sep 17 00:00:00 2001 From: Alexandre Alapetite Date: Tue, 10 Sep 2024 12:31:33 +0200 Subject: [PATCH 18/28] CI re-enable php-cs-fixer (#18) --- .github/workflows/ci.yml | 4 ++-- src/SimplePie.php | 7 +++++-- 2 files changed, 7 insertions(+), 4 deletions(-) diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index 0663c0a24..f2394040d 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -35,8 +35,8 @@ jobs: - name: "Install Composer dependencies" uses: "ramsey/composer-install@v2" - # - name: "Check coding style" - # run: composer cs + - name: "Check coding style" + run: composer cs test: runs-on: ubuntu-latest diff --git a/src/SimplePie.php b/src/SimplePie.php index 445344c7d..295a9dbcd 100644 --- a/src/SimplePie.php +++ b/src/SimplePie.php @@ -1653,12 +1653,15 @@ private function clean_hash(string $rss): string $ctx = hash_init('sha1'); while ($stream_data = fread($stream, 1048576)) { hash_update( - $ctx, preg_replace( + $ctx, + preg_replace( [ '#<(lastBuildDate|pubDate|updated|feedDate|dc:date|slash:comments)>[^<]+#', '#<(media:starRating|media:statistics) [^/<>]+/>#', '##s', - ], '', $stream_data + ], + '', + $stream_data ) ); } From d71972fbb73779b9a6b66c7b32dfec3d7765854d Mon Sep 17 00:00:00 2001 From: Alexandre Alapetite Date: Tue, 10 Sep 2024 22:48:36 +0200 Subject: [PATCH 19/28] on_http_response (#19) * on_http_response * Comment --- src/File.php | 23 +++++++++++++++++++++++ 1 file changed, 23 insertions(+) diff --git a/src/File.php b/src/File.php index 4bb0f5507..2a6177f6b 100644 --- a/src/File.php +++ b/src/File.php @@ -133,6 +133,10 @@ public function __construct(string $url, int $timeout = 10, int $redirects = 5, $responseHeaders = curl_exec($fp); if (curl_errno($fp) === 23 || curl_errno($fp) === 61) { + $this->error = 'cURL error ' . curl_errno($fp) . ': ' . curl_error($fp); // FreshRSS + $this->status_code = curl_getinfo($fp, CURLINFO_HTTP_CODE); // FreshRSS + $this->on_http_response(); + $this->error = null; // FreshRSS curl_setopt($fp, CURLOPT_ENCODING, 'none'); $responseHeaders = curl_exec($fp); } @@ -140,7 +144,9 @@ public function __construct(string $url, int $timeout = 10, int $redirects = 5, if (curl_errno($fp)) { $this->error = 'cURL error ' . curl_errno($fp) . ': ' . curl_error($fp); $this->success = false; + $this->on_http_response(); } else { + $this->on_http_response(); // Use the updated url provided by curl_getinfo after any redirects. if ($info = curl_getinfo($fp)) { $this->url = $info['url']; @@ -176,6 +182,7 @@ public function __construct(string $url, int $timeout = 10, int $redirects = 5, if (!$fp) { $this->error = 'fsockopen error: ' . $errstr; $this->success = false; + $this->on_http_response(); } else { stream_set_timeout($fp, $timeout); if (isset($url_parts['path'])) { @@ -216,6 +223,7 @@ public function __construct(string $url, int $timeout = 10, int $redirects = 5, $this->set_headers($parser->headers); $this->body = $parser->body; $this->status_code = $parser->status_code; + $this->on_http_response(); if ((in_array($this->status_code, [300, 301, 302, 303, 307]) || $this->status_code > 307 && $this->status_code < 400) && ($locationHeader = $this->get_header_line('location')) !== '' && $this->redirects < $redirects) { $this->redirects++; $location = \SimplePie\Misc::absolutize_url($locationHeader, $url); @@ -255,10 +263,15 @@ public function __construct(string $url, int $timeout = 10, int $redirects = 5, $this->success = false; } } + } else { + $this->error = 'Could not parse'; // FreshRSS + $this->success = false; // FreshRSS + $this->on_http_response(); } } else { $this->error = 'fsocket timed out'; $this->success = false; + $this->on_http_response(); } fclose($fp); } @@ -273,9 +286,19 @@ public function __construct(string $url, int $timeout = 10, int $redirects = 5, $this->body = trim($filebody); $this->status_code = 200; } + $this->on_http_response(); } } + /** + * Event to allow inheriting classes to e.g. log the HTTP responses. + * Triggered just after an HTTP response is received. + * FreshRSS. + */ + protected function on_http_response(): void + { + } + public function get_permanent_uri(): string { return (string) $this->permanent_url; From 44aea920b6792096f7fb1d8ac93026889a5149ee Mon Sep 17 00:00:00 2001 From: Alexandre Alapetite Date: Wed, 11 Sep 2024 14:55:50 +0200 Subject: [PATCH 20/28] Better update cache metadata (#20) * Better update cache metadata * Minor optimisation --- src/SimplePie.php | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/src/SimplePie.php b/src/SimplePie.php index 295a9dbcd..cdf6bfdb6 100644 --- a/src/SimplePie.php +++ b/src/SimplePie.php @@ -1978,6 +1978,13 @@ protected function fetch_data(&$cache) // Set raw_data to false here too, to signify that the cache // is still valid. $this->raw_data = false; + if (isset($file)) { // FreshRSS + // Update cache metadata + $this->data['mtime'] = time(); + $this->data['headers'] = array_map(function (array $values): string { + return implode(',', $values); + }, $file->get_headers()); + } $cache->set_data($cacheKey, $this->data, $this->cache_duration); return true; @@ -1986,10 +1993,13 @@ protected function fetch_data(&$cache) if (isset($file)) { // FreshRSS $hash = $this->clean_hash($file->get_body_content()); if (($this->data['hash'] ?? null) === $hash) { + // Update cache metadata + $this->data['mtime'] = time(); $this->data['headers'] = array_map(function (array $values): string { return implode(',', $values); }, $file->get_headers()); $cache->set_data($cacheKey, $this->data, $this->cache_duration); + return true; // Content unchanged even though server did not send a 304 } else { $this->data['hash'] = $hash; From f99d21bc542836e45eeb69aab506464a8d1c783e Mon Sep 17 00:00:00 2001 From: Alexandre Alapetite Date: Sat, 14 Sep 2024 13:46:13 +0200 Subject: [PATCH 21/28] Get_hash (#21) * New function get_hash For easier FreshRSS patches * PHPDoc see --- src/SimplePie.php | 25 ++++++++++++++++--------- 1 file changed, 16 insertions(+), 9 deletions(-) diff --git a/src/SimplePie.php b/src/SimplePie.php index cdf6bfdb6..18f274e65 100644 --- a/src/SimplePie.php +++ b/src/SimplePie.php @@ -1639,6 +1639,7 @@ public function enable_exceptions(bool $enable = true) * after having cleaned it from noisy elements such as statistics or comments. * FreshRSS * @return string $rss A hash of the cleaned content, or empty string in case of error. + * @see SimplePie::get_hash() */ private function clean_hash(string $rss): string { @@ -1678,7 +1679,7 @@ private function clean_hash(string $rss): string * configuration options get processed, feeds are fetched, cached, and * parsed, and all of that other good stuff. * - * @return bool|int positive integer with modification time if using cache, boolean true if otherwise successful, false otherwise // FreshRSS + * @return bool True if successful, false otherwise */ public function init() { @@ -1766,7 +1767,7 @@ public function init() // Fetch the data into $this->raw_data if (($fetched = $this->fetch_data($cache)) === true) { - return $this->data['mtime'] ?? true; // FreshRSS + return true; } elseif ($fetched === false) { return false; } @@ -1841,7 +1842,6 @@ public function init() } $this->data['build'] = Misc::get_build(); $this->data['hash'] = $this->data['hash'] ?? $this->clean_hash($this->raw_data); // FreshRSS - $this->data['mtime'] = time(); // FreshRSS // Cache the file if caching is enabled $this->data['cache_expiration_time'] = $this->cache_duration + time(); @@ -1889,7 +1889,6 @@ public function init() * * @param Base|DataCache|false $cache Cache handler, or false to not load from the cache * @return array{array, string}|array{}|bool Returns true if the data was loaded from the cache, or an array of HTTP headers and sniffed type - * @phpstan-impure */ protected function fetch_data(&$cache) { @@ -1941,8 +1940,7 @@ protected function fetch_data(&$cache) $this->data = []; } // Check if the cache has been updated - // elseif (isset($this->data['cache_expiration_time']) && $this->data['cache_expiration_time'] > time()) { // FreshRSS - elseif (empty($this->data['mtime']) || $this->data['mtime'] + $this->cache_duration <= time()) { + elseif (empty($this->data['cache_expiration_time']) || $this->data['cache_expiration_time'] > time()) { // FreshRSS, https://github.com/simplepie/simplepie/pull/846 // Want to know if we tried to send last-modified and/or etag headers // when requesting this file. (Note that it's up to the file to // support this, but we don't always send the headers either.) @@ -1980,7 +1978,6 @@ protected function fetch_data(&$cache) $this->raw_data = false; if (isset($file)) { // FreshRSS // Update cache metadata - $this->data['mtime'] = time(); $this->data['headers'] = array_map(function (array $values): string { return implode(',', $values); }, $file->get_headers()); @@ -1994,7 +1991,6 @@ protected function fetch_data(&$cache) $hash = $this->clean_hash($file->get_body_content()); if (($this->data['hash'] ?? null) === $hash) { // Update cache metadata - $this->data['mtime'] = time(); $this->data['headers'] = array_map(function (array $values): string { return implode(',', $values); }, $file->get_headers()); @@ -2133,7 +2129,6 @@ protected function fetch_data(&$cache) 'build' => Misc::get_build(), 'cache_expiration_time' => $this->cache_duration + time(), 'hash' => empty($hash) ? $this->clean_hash($file->get_body_content()) : $hash, // FreshRSS - 'mtime' => time(), // FreshRSS ]; if (!$cache->set_data($cacheKey, $this->data, $this->cache_duration)) { @@ -2179,6 +2174,18 @@ public function status_code() return $this->status_code; } + /** + * Get the last hash of the feed + * + * @return string Hash of the content of the feed + * @see SimplePie::clean_hash() + * FreshRSS + */ + public function get_hash(): string + { + return $this->data['hash'] ?? ''; + } + /** * Get the raw XML * From cc0e6e1ca2de29171d85f8a6a8d66c5602d9000f Mon Sep 17 00:00:00 2001 From: Alexandre Alapetite Date: Sat, 14 Sep 2024 15:15:39 +0200 Subject: [PATCH 22/28] Cache_version (#22) * Introduce a cache version To avoid destroying the cache at every minor update of the SimplePie file, which is an unnacceptable waste of resources https://github.com/FreshRSS/FreshRSS/pull/4374 * Add FreshRSS comment --- src/SimplePie.php | 14 ++++++++++++-- tests/Integration/CachingTest.php | 13 +++++++++++-- tests/Integration/SimplePieTest.php | 2 ++ 3 files changed, 25 insertions(+), 4 deletions(-) diff --git a/src/SimplePie.php b/src/SimplePie.php index 18f274e65..5302f8f1f 100644 --- a/src/SimplePie.php +++ b/src/SimplePie.php @@ -41,6 +41,13 @@ class SimplePie */ public const VERSION = '1.8.0'; + /** + * SimplePie cache version. + * To be updated when the cache format changes. + * FreshRSS + */ + public const CACHE_VERSION = 1; + /** * SimplePie Website URL */ @@ -1841,6 +1848,7 @@ public function init() $this->data['headers'] = $headers; } $this->data['build'] = Misc::get_build(); + $this->data['cache_version'] = self::CACHE_VERSION; // FreshRSS $this->data['hash'] = $this->data['hash'] ?? $this->clean_hash($this->raw_data); // FreshRSS // Cache the file if caching is enabled @@ -1915,7 +1923,8 @@ protected function fetch_data(&$cache) if (!empty($this->data)) { // If the cache is for an outdated build of SimplePie - if (!isset($this->data['build']) || $this->data['build'] !== Misc::get_build()) { + // if (!isset($this->data['build']) || $this->data['build'] !== Misc::get_build()) { + if (($this->data['cache_version'] ?? null) !== self::CACHE_VERSION) { // FreshRSS $cache->delete_data($cacheKey); $this->data = []; } @@ -1940,7 +1949,7 @@ protected function fetch_data(&$cache) $this->data = []; } // Check if the cache has been updated - elseif (empty($this->data['cache_expiration_time']) || $this->data['cache_expiration_time'] > time()) { // FreshRSS, https://github.com/simplepie/simplepie/pull/846 + elseif (empty($this->data['cache_expiration_time']) || $this->data['cache_expiration_time'] > time()) { // FreshRSS // Want to know if we tried to send last-modified and/or etag headers // when requesting this file. (Note that it's up to the file to // support this, but we don't always send the headers either.) @@ -2128,6 +2137,7 @@ protected function fetch_data(&$cache) 'feed_url' => $file->get_final_requested_uri(), 'build' => Misc::get_build(), 'cache_expiration_time' => $this->cache_duration + time(), + 'cache_version' => self::CACHE_VERSION, // FreshRSS 'hash' => empty($hash) ? $this->clean_hash($file->get_body_content()) : $hash, // FreshRSS ]; diff --git a/tests/Integration/CachingTest.php b/tests/Integration/CachingTest.php index 1caa0c6b8..8391390b1 100644 --- a/tests/Integration/CachingTest.php +++ b/tests/Integration/CachingTest.php @@ -111,9 +111,11 @@ public function testInitWithDifferentCacheStateCallsCacheCorrectly( if (array_key_exists('cache_expiration_time', $writtenData)) { $expectedDataWritten['cache_expiration_time'] = $writtenData['cache_expiration_time']; } + if (isset($writtenData['hash'])) { // FreshRSS + $expectedDataWritten['hash'] = $writtenData['hash']; + } - // Test that $expectedDataWritten is a subset of $writtenData - $this->assertEmpty(array_udiff_assoc($expectedDataWritten, $writtenData, function ($a, $b) { return json_encode($a) <=> json_encode($b); })); // FreshRSS + $this->assertEqualsCanonicalizing($expectedDataWritten, $writtenData); // FreshRSS } /** @@ -144,6 +146,7 @@ public static function provideSavedCacheData(): array ], 'build' => Misc::get_build(), 'cache_expiration_time' => 0, // Needs to be adjust in test case + 'cache_version' => \SimplePie\SimplePie::CACHE_VERSION, // FreshRSS ]; $expectNoDataWritten = []; @@ -153,6 +156,7 @@ public static function provideSavedCacheData(): array 'feed_url' => 'http://example.com/feed.xml/', 'build' => Misc::get_build(), 'cache_expiration_time' => $defaultExpirationTime, + 'cache_version' => \SimplePie\SimplePie::CACHE_VERSION, // FreshRSS ]; $currentlyCachedDataIsUpdated = [ @@ -175,6 +179,7 @@ public static function provideSavedCacheData(): array ], 'build' => Misc::get_build(), 'cache_expiration_time' => $defaultExpirationTime, + 'cache_version' => \SimplePie\SimplePie::CACHE_VERSION, // FreshRSS ]; $currentlyCachedDataIsValid = [ @@ -197,6 +202,7 @@ public static function provideSavedCacheData(): array ], 'build' => Misc::get_build(), 'cache_expiration_time' => $defaultMtime, + 'cache_version' => \SimplePie\SimplePie::CACHE_VERSION, // FreshRSS ]; $currentlyNoDataIsCached = []; @@ -209,6 +215,7 @@ public static function provideSavedCacheData(): array 'url' => 'http://example.com/some-different-url', 'build' => Misc::get_build(), 'cache_expiration_time' => $defaultExpirationTime, + 'cache_version' => \SimplePie\SimplePie::CACHE_VERSION, // FreshRSS ]; $currentlyCachedDataWithFeedUrl = [ @@ -216,6 +223,7 @@ public static function provideSavedCacheData(): array 'feed_url' => 'http://example.com/feed/', 'build' => Misc::get_build(), 'cache_expiration_time' => $defaultExpirationTime, + 'cache_version' => \SimplePie\SimplePie::CACHE_VERSION, // FreshRSS ]; $currentlyCachedDataWithNonFeedUrl = [ @@ -223,6 +231,7 @@ public static function provideSavedCacheData(): array 'feed_url' => 'http://example.com/feed.xml/', 'build' => Misc::get_build(), 'cache_expiration_time' => $defaultExpirationTime, + 'cache_version' => \SimplePie\SimplePie::CACHE_VERSION, // FreshRSS ]; return [ diff --git a/tests/Integration/SimplePieTest.php b/tests/Integration/SimplePieTest.php index bd20adc58..6e00827d7 100644 --- a/tests/Integration/SimplePieTest.php +++ b/tests/Integration/SimplePieTest.php @@ -147,6 +147,7 @@ public function testRequestingAFeedFromCacheWorks(): void // Fix build in cache $data['build'] = \SimplePie\Misc::get_build(); + $data['cache_version'] = \SimplePie\SimplePie::CACHE_VERSION; // FreshRSS return $data; }); @@ -196,6 +197,7 @@ public function testRequestingAFeedFromPsr16CacheWorks(): void // Fix build in cache $data['build'] = \SimplePie\Misc::get_build(); + $data['cache_version'] = \SimplePie\SimplePie::CACHE_VERSION; // FreshRSS return $data; }); From 8458f8dbf9ac4c92a646bd958a85ab81a4dd86f6 Mon Sep 17 00:00:00 2001 From: Alexandre Alapetite Date: Sat, 14 Sep 2024 16:15:53 +0200 Subject: [PATCH 23/28] Simplepie846 (#23) * Merge SimplePie fix 846 https://github.com/simplepie/simplepie/pull/846 * Fix tests running in the same second --- src/SimplePie.php | 5 ++++- tests/Integration/CachingTest.php | 6 +++--- 2 files changed, 7 insertions(+), 4 deletions(-) diff --git a/src/SimplePie.php b/src/SimplePie.php index 5302f8f1f..475085e73 100644 --- a/src/SimplePie.php +++ b/src/SimplePie.php @@ -1949,7 +1949,7 @@ protected function fetch_data(&$cache) $this->data = []; } // Check if the cache has been updated - elseif (empty($this->data['cache_expiration_time']) || $this->data['cache_expiration_time'] > time()) { // FreshRSS + elseif (empty($this->data['cache_expiration_time']) || $this->data['cache_expiration_time'] < time()) { // FreshRSS https://github.com/simplepie/simplepie/pull/846 // Want to know if we tried to send last-modified and/or etag headers // when requesting this file. (Note that it's up to the file to // support this, but we don't always send the headers either.) @@ -1973,6 +1973,7 @@ protected function fetch_data(&$cache) $this->status_code = 0; if ($this->force_cache_fallback) { + $this->data['cache_expiration_time'] = $this->cache_duration + time(); // FreshRSS $cache->set_data($cacheKey, $this->data, $this->cache_duration); return true; @@ -1987,6 +1988,7 @@ protected function fetch_data(&$cache) $this->raw_data = false; if (isset($file)) { // FreshRSS // Update cache metadata + $this->data['cache_expiration_time'] = $this->cache_duration + time(); $this->data['headers'] = array_map(function (array $values): string { return implode(',', $values); }, $file->get_headers()); @@ -2000,6 +2002,7 @@ protected function fetch_data(&$cache) $hash = $this->clean_hash($file->get_body_content()); if (($this->data['hash'] ?? null) === $hash) { // Update cache metadata + $this->data['cache_expiration_time'] = $this->cache_duration + time(); $this->data['headers'] = array_map(function (array $values): string { return implode(',', $values); }, $file->get_headers()); diff --git a/tests/Integration/CachingTest.php b/tests/Integration/CachingTest.php index 8391390b1..c6f532e69 100644 --- a/tests/Integration/CachingTest.php +++ b/tests/Integration/CachingTest.php @@ -123,7 +123,7 @@ public function testInitWithDifferentCacheStateCallsCacheCorrectly( */ public static function provideSavedCacheData(): array { - $defaultMtime = time(); + $defaultMtime = time() - 1; // FreshRSS: -1 to account for tests running in the same second $defaultExpirationTime = $defaultMtime + 3600; $expectDefaultDataWritten = [ @@ -256,10 +256,10 @@ public static function provideSavedCacheData(): array [CacheInterface::class, $currentlyCachedDataWithNonFeedUrl, $expectDataWithNewFeedUrl, $defaultMtime], // Check if the cache has been updated [Base::class, $currentlyCachedDataIsUpdated, $expectDefaultDataWritten, $defaultMtime], - [CacheInterface::class, $currentlyCachedDataIsUpdated, $expectDefaultDataWritten, $defaultMtime], + [CacheInterface::class, $currentlyCachedDataIsUpdated, $expectNoDataWritten, $defaultMtime], // FreshRSS https://github.com/simplepie/simplepie/pull/846 // If the cache is still valid, just return true [Base::class, $currentlyCachedDataIsValid, $expectDefaultDataWritten, $defaultMtime], - [CacheInterface::class, $currentlyCachedDataIsValid, $expectNoDataWritten, $defaultMtime], + [CacheInterface::class, $currentlyCachedDataIsValid, $expectDefaultDataWritten, $defaultMtime], // FreshRSS https://github.com/simplepie/simplepie/pull/846 ]; } } From f699bf8ebd3ba2b3cfadf38160e4cfa7233cf219 Mon Sep 17 00:00:00 2001 From: Alexandre Alapetite Date: Sat, 14 Sep 2024 22:32:31 +0200 Subject: [PATCH 24/28] PHP 8.4 | Fix implicitly nullable parameters (#24) PHP 8.4 deprecates implicitly nullable parameters, i.e. typed parameter with a `null` default value, which are not explicitly declared as nullable. Includes updating the documentation to match. Ref: https://wiki.php.net/rfc/deprecate-implicitly-nullable-types Co-authored-by: jrfnl --- src/IRI.php | 4 ++-- src/Sanitize.php | 2 +- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/src/IRI.php b/src/IRI.php index 60f8fbac2..25b342a85 100644 --- a/src/IRI.php +++ b/src/IRI.php @@ -203,9 +203,9 @@ public function __unset(string $name) /** * Create a new IRI object, from a specified string * - * @param string $iri + * @param string|null $iri */ - public function __construct(string $iri = null) + public function __construct(?string $iri = null) { $this->set_iri($iri); } diff --git a/src/Sanitize.php b/src/Sanitize.php index 4d4c1fdd5..1050b2266 100644 --- a/src/Sanitize.php +++ b/src/Sanitize.php @@ -148,7 +148,7 @@ public function set_registry(\SimplePie\Registry $registry) * @param class-string $cache_class * @return void */ - public function pass_cache_data(bool $enable_cache = true, string $cache_location = './cache', $cache_name_function = 'md5', string $cache_class = Cache::class, DataCache $cache = null) + public function pass_cache_data(bool $enable_cache = true, string $cache_location = './cache', $cache_name_function = 'md5', string $cache_class = Cache::class, ?DataCache $cache = null) { $this->enable_cache = $enable_cache; From 1cabd55aee050a0a665685d8ec700f1edd5c5160 Mon Sep 17 00:00:00 2001 From: Alexandre Alapetite Date: Sat, 14 Sep 2024 22:33:15 +0200 Subject: [PATCH 25/28] Feature/php 8.4 fix trigger user error (#25) * PHP 8.4 | Fix passing E_USER_ERROR to trigger_error() [1] PHP 8.4 deprecates passing `E_USER_ERROR` to `trigger_error()`, with the recommendation being to replace these type of calls with either an Exception or an `exit` statement. This commit fixes an instance found in the autoloader by changing it to a call to `exit`, which considering it is an error related to the autoloader not being registered seems the most appropriate. Ref: https://wiki.php.net/rfc/deprecations_php_8_4#deprecate_passing_e_user_error_to_trigger_error * PHP 8.4 | Fix passing E_USER_ERROR to trigger_error() [2] PHP 8.4 deprecates passing `E_USER_ERROR` to `trigger_error()`, with the recommendation being to replace these type of calls with either an Exception or an `exit` statement. This commit fixes another instance of this issue. In this case, the call to `trigger_error()` is replaced by throwing a generic `SimplePie\Exception` for the same. Ref: https://wiki.php.net/rfc/deprecations_php_8_4#deprecate_passing_e_user_error_to_trigger_error * PHP 8.4 | Fix passing E_USER_ERROR to trigger_error() [3] PHP 8.4 deprecates passing `E_USER_ERROR` to `trigger_error()`, with the recommendation being to replace these type of calls with either an Exception or an `exit` statement. This commit fixes the last instance of this issue. In this case, the call to `trigger_error()` is also replaced by throwing a generic `SimplePie\Exception` for the same. Ref: https://wiki.php.net/rfc/deprecations_php_8_4#deprecate_passing_e_user_error_to_trigger_error * QA: replace another call to `trigger_error()` with `exit` ... the call to `trigger_error()` would now throw a PHP notice, but with the `die()` after it, with still kill the program anyhow, so we may as well, exit with the error message and remove the `trigger_error()` function call completely. --------- Co-authored-by: jrfnl --- autoloader.php | 2 +- src/Gzdecode.php | 2 +- src/SimplePie.php | 5 ++--- 3 files changed, 4 insertions(+), 5 deletions(-) diff --git a/autoloader.php b/autoloader.php index e25ec4420..4356b456f 100644 --- a/autoloader.php +++ b/autoloader.php @@ -49,7 +49,7 @@ if (!class_exists('SimplePie')) { - trigger_error('Autoloader not registered properly', E_USER_ERROR); + exit('Autoloader not registered properly'); } /** diff --git a/src/Gzdecode.php b/src/Gzdecode.php index 64de6d135..6adfc05cc 100644 --- a/src/Gzdecode.php +++ b/src/Gzdecode.php @@ -142,7 +142,7 @@ class Gzdecode */ public function __set(string $name, $value) { - trigger_error("Cannot write property $name", E_USER_ERROR); + throw new Exception("Cannot write property $name"); } /** diff --git a/src/SimplePie.php b/src/SimplePie.php index 475085e73..cc4adc3df 100644 --- a/src/SimplePie.php +++ b/src/SimplePie.php @@ -678,8 +678,7 @@ class SimplePie public function __construct() { if (version_compare(PHP_VERSION, '7.2', '<')) { - trigger_error('Please upgrade to PHP 7.2 or newer.'); - die(); + exit('Please upgrade to PHP 7.2 or newer.'); } $this->set_useragent(); @@ -3387,7 +3386,7 @@ public function __call(string $method, array $args) $trace = debug_backtrace(); $file = $trace[0]['file']; $line = $trace[0]['line']; - trigger_error("Call to undefined method $class::$method() in $file on line $line", E_USER_ERROR); + throw new SimplePieException("Call to undefined method $class::$method() in $file on line $line"); } /** From 7090eedb1358d95c002282a645ef915d5ce55c56 Mon Sep 17 00:00:00 2001 From: Alexandre Alapetite Date: Fri, 20 Sep 2024 23:19:42 +0200 Subject: [PATCH 26/28] Negotiate cache_expiration_time (#26) * Add support for HTTP cache headers - `Cache-Control: max-age` minus `Age`, extendable by `$simplepie_cache_duration` - `Cache-Control: must-revalidate` will prevent `$simplepie_cache_duration` from extending past the `max-age` - `Cache-Control: no-cache` will return the current time - `Cache-Control: no-store` will return `0` - `Expires` but only if `Cache-Control: max-age` is absent * Refined logic * More doc * Add support for `s-maxage` * Typos * Pragmatism? * Typos * Remove support of s-maxage Seems to only make sense for CDNs * Move order of headers * Remove superfluous comments * Remove wrong comment * Workaround for buggy servers returning wrong cache-control headers for 304 responses * fix logic for 304 workaround * Fix whitespace * Minor simplification --- src/HTTP/Utils.php | 79 ++++++++++++++++++++++++++++++++++++++++++++++ src/SimplePie.php | 73 +++++++++++++++++++++++++++++++++++------- 2 files changed, 140 insertions(+), 12 deletions(-) create mode 100644 src/HTTP/Utils.php diff --git a/src/HTTP/Utils.php b/src/HTTP/Utils.php new file mode 100644 index 000000000..b2e05e04b --- /dev/null +++ b/src/HTTP/Utils.php @@ -0,0 +1,79 @@ + $http_headers HTTP headers of the response + * @return int|null The `max-age` value or `null` if not found + * + * FreshRSS + */ + public static function get_http_max_age(array $http_headers): ?int + { + $cache_control = $http_headers['cache-control'] ?? null; + if (is_string($cache_control) && preg_match('/\bmax-age=(\d+)\b/', $cache_control, $matches)) { + return (int) $matches[1]; + } + return null; + } + + /** + * Negotiate the cache expiration time based on the HTTP response headers. + * Return the cache duration time in number of seconds since the Unix Epoch, accounting for: + * - `Cache-Control: max-age` minus `Age`, bounded by `$cache_duration_min` and `$cache_duration_max` + * - `Cache-Control: must-revalidate` will set `$cache_duration` to `$cache_duration_min` + * - `Cache-Control: no-cache` will return `time() + $cache_duration_min` + * - `Cache-Control: no-store` will return `time() + $cache_duration_min - 3` + * - `Expires` like `Cache-Control: max-age` but only if it is absent + * + * @param array $http_headers HTTP headers of the response + * @param int $cache_duration Desired cache duration in seconds, potentially overridden by HTTP response headers + * @param int $cache_duration_min Minimal cache duration (in seconds), overriding HTTP response headers `Cache-Control` and `Expires`, + * @param int $cache_duration_max Maximal cache duration (in seconds), overriding HTTP response headers `Cache-Control: max-age` and `Expires`, + * @return int The negotiated cache expiration time in seconds since the Unix Epoch + * + * FreshRSS + */ + public static function negociate_cache_expiration_time(array $http_headers, int $cache_duration, int $cache_duration_min, int $cache_duration_max): int + { + $cache_control = $http_headers['cache-control'] ?? ''; + if ($cache_control !== '') { + if (preg_match('/\bno-store\b/', $cache_control)) { + return time() + $cache_duration_min - 3; // -3 to distinguish from no-cache if needed + } + if (preg_match('/\bno-cache\b/', $cache_control)) { + return time() + $cache_duration_min; + } + if (preg_match('/\bmust-revalidate\b/', $cache_control)) { + $cache_duration = $cache_duration_min; + } + if (preg_match('/\bmax-age=(\d+)\b/', $cache_control, $matches)) { + $max_age = (int) $matches[1]; + $age = $http_headers['age'] ?? ''; + if (is_numeric($age)) { + $max_age -= (int) $age; + } + return time() + min(max($max_age, $cache_duration), $cache_duration_max); + } + } + $expires = $http_headers['expires'] ?? ''; + if ($expires !== '') { + $expire_date = \SimplePie\Misc::parse_date($expires); + if ($expire_date !== false) { + return min(max($expire_date, time() + $cache_duration), time() + $cache_duration_max); + } + } + return time() + $cache_duration; + } +} diff --git a/src/SimplePie.php b/src/SimplePie.php index cc4adc3df..455a90a2f 100644 --- a/src/SimplePie.php +++ b/src/SimplePie.php @@ -509,12 +509,28 @@ class SimplePie public $force_cache_fallback = false; /** - * @var int Cache duration (in seconds) + * @var int Cache duration (in seconds), but may be overridden by HTTP response headers (FreshRSS) * @see SimplePie::set_cache_duration() * @access private */ public $cache_duration = 3600; + /** + * @var int Minimal cache duration (in seconds), overriding HTTP response headers `Cache-Control` and `Expires` + * @see SimplePie::set_cache_duration() + * @access private + * FreshRSS + */ + public $cache_duration_min = 60; + + /** + * @var int Maximal cache duration (in seconds), overriding HTTP response headers `Cache-Control` and `Expires` + * @see SimplePie::set_cache_duration() + * @access private + * FreshRSS + */ + public $cache_duration_max = 86400; + /** * @var int Auto-discovery cache duration (in seconds) * @see SimplePie::set_autodiscovery_cache_duration() @@ -989,12 +1005,26 @@ public function force_cache_fallback(bool $enable = false) * Set the length of time (in seconds) that the contents of a feed will be * cached * - * @param int $seconds The feed content cache duration + * FreshRSS: The cache is (partially) HTTP compliant, with the following rules: + * + * @param int $seconds The feed content cache duration, which may be overridden by HTTP response headers) + * @param int $min The minimum cache duration (default: 60s), overriding HTTP response headers `Cache-Control` and `Expires` + * @param int $max The maximum cache duration (default: 24h), overriding HTTP response headers `Cache-Control` and `Expires` * @return void */ - public function set_cache_duration(int $seconds = 3600) + public function set_cache_duration(int $seconds = 3600, ?int $min = null, ?int $max = null) { - $this->cache_duration = $seconds; + $this->cache_duration = max(0, $seconds); + if (is_int($min)) { // FreshRSS + $this->cache_duration_min = min(max(0, $min), $seconds); + } elseif ($this->cache_duration_min > $seconds) { + $this->cache_duration_min = $seconds; + } + if (is_int($max)) { // FreshRSS + $this->cache_duration_max = max($seconds, $max); + } elseif ($this->cache_duration_max < $seconds) { + $this->cache_duration_max = $seconds; + } } /** @@ -1851,7 +1881,7 @@ public function init() $this->data['hash'] = $this->data['hash'] ?? $this->clean_hash($this->raw_data); // FreshRSS // Cache the file if caching is enabled - $this->data['cache_expiration_time'] = $this->cache_duration + time(); + $this->data['cache_expiration_time'] = \SimplePie\HTTP\Utils::negociate_cache_expiration_time($this->data['headers'] ?? [], $this->cache_duration, $this->cache_duration_min, $this->cache_duration_max); if ($cache && !$cache->set_data($this->get_cache_filename($this->feed_url), $this->data, $this->cache_duration)) { trigger_error("$this->cache_location is not writable. Make sure you've set the correct relative or absolute path, and that the location is server-writable.", E_USER_WARNING); @@ -1972,8 +2002,10 @@ protected function fetch_data(&$cache) $this->status_code = 0; if ($this->force_cache_fallback) { - $this->data['cache_expiration_time'] = $this->cache_duration + time(); // FreshRSS - $cache->set_data($cacheKey, $this->data, $this->cache_duration); + $this->data['cache_expiration_time'] = \SimplePie\HTTP\Utils::negociate_cache_expiration_time($this->data['headers'] ?? [], $this->cache_duration, $this->cache_duration_min, $this->cache_duration_max); // FreshRSS + if (!$cache->set_data($cacheKey, $this->data, $this->cache_duration)) { // FreshRSS + trigger_error("$this->cache_location is not writable. Make sure you've set the correct relative or absolute path, and that the location is server-writable.", E_USER_WARNING); + } return true; } @@ -1986,13 +2018,28 @@ protected function fetch_data(&$cache) // is still valid. $this->raw_data = false; if (isset($file)) { // FreshRSS + $old_cache_control = $this->data['headers']['cache-control'] ?? ''; + $old_max_age = \SimplePie\HTTP\Utils::get_http_max_age($this->data['headers']); + // Update cache metadata - $this->data['cache_expiration_time'] = $this->cache_duration + time(); $this->data['headers'] = array_map(function (array $values): string { return implode(',', $values); }, $file->get_headers()); + + // Workaround for buggy servers returning wrong cache-control headers for 304 responses + if ($old_max_age !== null) { + $new_max_age = \SimplePie\HTTP\Utils::get_http_max_age($this->data['headers']); + if ($new_max_age === null || $new_max_age > $old_max_age) { + // Allow servers to return a shorter cache duration for 304 responses, but not longer + $this->data['headers']['cache-control'] = $old_cache_control; + } + } + + $this->data['cache_expiration_time'] = \SimplePie\HTTP\Utils::negociate_cache_expiration_time($this->data['headers'] ?? [], $this->cache_duration, $this->cache_duration_min, $this->cache_duration_max); + } + if (!$cache->set_data($cacheKey, $this->data, $this->cache_duration)) { // FreshRSS + trigger_error("$this->cache_location is not writable. Make sure you've set the correct relative or absolute path, and that the location is server-writable.", E_USER_WARNING); } - $cache->set_data($cacheKey, $this->data, $this->cache_duration); return true; } @@ -2001,11 +2048,13 @@ protected function fetch_data(&$cache) $hash = $this->clean_hash($file->get_body_content()); if (($this->data['hash'] ?? null) === $hash) { // Update cache metadata - $this->data['cache_expiration_time'] = $this->cache_duration + time(); $this->data['headers'] = array_map(function (array $values): string { return implode(',', $values); }, $file->get_headers()); - $cache->set_data($cacheKey, $this->data, $this->cache_duration); + $this->data['cache_expiration_time'] = \SimplePie\HTTP\Utils::negociate_cache_expiration_time($this->data['headers'] ?? [], $this->cache_duration, $this->cache_duration_min, $this->cache_duration_max); + if (!$cache->set_data($cacheKey, $this->data, $this->cache_duration)) { + trigger_error("$this->cache_location is not writable. Make sure you've set the correct relative or absolute path, and that the location is server-writable.", E_USER_WARNING); + } return true; // Content unchanged even though server did not send a 304 } else { @@ -2138,7 +2187,7 @@ protected function fetch_data(&$cache) 'url' => $this->feed_url, 'feed_url' => $file->get_final_requested_uri(), 'build' => Misc::get_build(), - 'cache_expiration_time' => $this->cache_duration + time(), + 'cache_expiration_time' => \SimplePie\HTTP\Utils::negociate_cache_expiration_time($this->data['headers'] ?? [], $this->cache_duration, $this->cache_duration_min, $this->cache_duration_max), // FreshRSS 'cache_version' => self::CACHE_VERSION, // FreshRSS 'hash' => empty($hash) ? $this->clean_hash($file->get_body_content()) : $hash, // FreshRSS ]; From e041dacffc6e29979437d1545773a3e0f716c486 Mon Sep 17 00:00:00 2001 From: Alexandre Alapetite Date: Sat, 21 Sep 2024 15:37:17 +0200 Subject: [PATCH 27/28] Remove HTTP Referer (#27) https://github.com/FreshRSS/FreshRSS/pull/6523 https://github.com/FreshRSS/FreshRSS/issues/6811 --- src/File.php | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/File.php b/src/File.php index 2a6177f6b..b56fac342 100644 --- a/src/File.php +++ b/src/File.php @@ -124,7 +124,7 @@ public function __construct(string $url, int $timeout = 10, int $redirects = 5, curl_setopt($fp, CURLOPT_FAILONERROR, 1); curl_setopt($fp, CURLOPT_TIMEOUT, $timeout); curl_setopt($fp, CURLOPT_CONNECTTIMEOUT, $timeout); - curl_setopt($fp, CURLOPT_REFERER, \SimplePie\Misc::url_remove_credentials($url)); + // curl_setopt($fp, CURLOPT_REFERER, \SimplePie\Misc::url_remove_credentials($url)); // FreshRSS removed curl_setopt($fp, CURLOPT_USERAGENT, $useragent); curl_setopt($fp, CURLOPT_HTTPHEADER, $headers2); foreach ($curl_options as $curl_param => $curl_value) { From 5e92ce767597aa9f5cbb00d88c7591b2e604dc5f Mon Sep 17 00:00:00 2001 From: Alexandre Alapetite Date: Sat, 21 Sep 2024 23:01:43 +0200 Subject: [PATCH 28/28] More PHPCS (#28) https://github.com/FreshRSS/simplepie/pull/15 --- phpcs.xml | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/phpcs.xml b/phpcs.xml index f77624289..6ec1797bf 100644 --- a/phpcs.xml +++ b/phpcs.xml @@ -7,6 +7,7 @@ + @@ -16,4 +17,13 @@ + + ./demo/ + + + ./library/ + + + ./library/ +