diff options
Diffstat (limited to 'classes')
-rw-r--r-- | classes/FeedParser.php | 130 | ||||
-rw-r--r-- | classes/Feeds.php | 7 | ||||
-rw-r--r-- | classes/RSSUtils.php | 6 |
3 files changed, 79 insertions, 64 deletions
diff --git a/classes/FeedParser.php b/classes/FeedParser.php index 8a117bde4..e0b16010c 100644 --- a/classes/FeedParser.php +++ b/classes/FeedParser.php @@ -14,8 +14,8 @@ class FeedParser { private ?string $title = null; - /** @var FeedParser::FEED_*|null */ - private ?int $type = null; + /** @var FeedParser::FEED_* */ + private int $type; private ?DOMXPath $xpath = null; @@ -27,6 +27,9 @@ class FeedParser { function __construct(string $data) { libxml_use_internal_errors(true); libxml_clear_errors(); + + $this->type = $this::FEED_UNKNOWN; + $this->doc = new DOMDocument(); $this->doc->loadXML($data); @@ -43,71 +46,54 @@ class FeedParser { } } } + libxml_clear_errors(); - } - function init() : void { - $xpath = new DOMXPath($this->doc); - $xpath->registerNamespace('atom', 'http://www.w3.org/2005/Atom'); - $xpath->registerNamespace('atom03', 'http://purl.org/atom/ns#'); - $xpath->registerNamespace('media', 'http://search.yahoo.com/mrss/'); - $xpath->registerNamespace('rdf', 'http://www.w3.org/1999/02/22-rdf-syntax-ns#'); - $xpath->registerNamespace('slash', 'http://purl.org/rss/1.0/modules/slash/'); - $xpath->registerNamespace('dc', 'http://purl.org/dc/elements/1.1/'); - $xpath->registerNamespace('content', 'http://purl.org/rss/1.0/modules/content/'); - $xpath->registerNamespace('thread', 'http://purl.org/syndication/thread/1.0'); + if ($this->error) + return; - $this->xpath = $xpath; + $this->xpath = new DOMXPath($this->doc); + $this->xpath->registerNamespace('atom', 'http://www.w3.org/2005/Atom'); + $this->xpath->registerNamespace('atom03', 'http://purl.org/atom/ns#'); + $this->xpath->registerNamespace('media', 'http://search.yahoo.com/mrss/'); + $this->xpath->registerNamespace('rdf', 'http://www.w3.org/1999/02/22-rdf-syntax-ns#'); + $this->xpath->registerNamespace('slash', 'http://purl.org/rss/1.0/modules/slash/'); + $this->xpath->registerNamespace('dc', 'http://purl.org/dc/elements/1.1/'); + $this->xpath->registerNamespace('content', 'http://purl.org/rss/1.0/modules/content/'); + $this->xpath->registerNamespace('thread', 'http://purl.org/syndication/thread/1.0'); + } - $root_list = $xpath->query("(//atom03:feed|//atom:feed|//channel|//rdf:rdf|//rdf:RDF)"); + /** + * @return bool false if initialization couldn't occur (e.g. parsing error or unrecognized feed type), otherwise true + */ + function init(): bool { + if ($this->error) + return false; - if (!empty($root_list) && $root_list->length > 0) { + $type = $this->get_type(); - /** @var DOMElement|null $root */ - $root = $root_list->item(0); + if ($type === self::FEED_UNKNOWN) + return false; - if ($root) { - $this->type = match (mb_strtolower($root->tagName)) { - 'rdf:rdf' => $this::FEED_RDF, - 'channel' => $this::FEED_RSS, - 'feed', 'atom:feed' => $this::FEED_ATOM, - default => $this::FEED_UNKNOWN, - }; - - if ($this->type === $this::FEED_UNKNOWN) { - $this->error ??= 'Unknown/unsupported feed type'; - return; - } - } + $xpath = $this->xpath; - switch ($this->type) { + switch ($type) { case $this::FEED_ATOM: - - $title = $xpath->query("//atom:feed/atom:title")->item(0); - - if (!$title) - $title = $xpath->query("//atom03:feed/atom03:title")->item(0); - + $title = $xpath->query('//atom:feed/atom:title')->item(0) + ?? $xpath->query('//atom03:feed/atom03:title')->item(0); if ($title) { $this->title = $title->nodeValue; } - $link = $xpath->query("//atom:feed/atom:link[not(@rel)]")->item(0); - - if (!$link) - $link = $xpath->query("//atom:feed/atom:link[@rel='alternate']")->item(0); - - if (!$link) - $link = $xpath->query("//atom03:feed/atom03:link[not(@rel)]")->item(0); - - if (!$link) - $link = $xpath->query("//atom03:feed/atom03:link[@rel='alternate']")->item(0); - /** @var DOMElement|null $link */ - if ($link && $link->hasAttributes()) { - $this->link = $link->getAttribute("href"); - } + $link = $xpath->query('//atom:feed/atom:link[not(@rel)]')->item(0) + ?? $xpath->query("//atom:feed/atom:link[@rel='alternate']")->item(0) + ?? $xpath->query('//atom03:feed/atom03:link[not(@rel)]')->item(0) + ?? $xpath->query("//atom03:feed/atom03:link[@rel='alternate']")->item(0); + + if ($link?->getAttribute('href')) + $this->link = $link->getAttribute('href'); $articles = $xpath->query("//atom:entry"); @@ -165,16 +151,15 @@ class FeedParser { } break; + } - } + if ($this->title) + $this->title = trim($this->title); - if ($this->title) $this->title = trim($this->title); - if ($this->link) $this->link = trim($this->link); + if ($this->link) + $this->link = trim($this->link); - } else { - $this->error ??= "Unknown/unsupported feed type"; - return; - } + return true; } /** @deprecated use Errors::format_libxml_error() instead */ @@ -192,6 +177,33 @@ class FeedParser { return $this->libxml_errors; } + /** + * @return FeedParser::FEED_* + */ + function get_type(): int { + if ($this->type !== self::FEED_UNKNOWN || $this->error) + return $this->type; + + $root_list = $this->xpath->query('(//atom03:feed|//atom:feed|//channel|//rdf:rdf|//rdf:RDF)'); + + if ($root_list && $root_list->length > 0) { + /** @var DOMElement $root */ + $root = $root_list->item(0); + + $this->type = match (mb_strtolower($root->tagName)) { + 'rdf:rdf' => self::FEED_RDF, + 'channel' => self::FEED_RSS, + 'feed', 'atom:feed' => self::FEED_ATOM, + default => self::FEED_UNKNOWN, + }; + } + + if ($this->type === self::FEED_UNKNOWN) + $this->error ??= 'Unknown/unsupported feed type'; + + return $this->type; + } + function get_link() : string { return clean($this->link ?? ''); } diff --git a/classes/Feeds.php b/classes/Feeds.php index 1900b40f1..e76044060 100644 --- a/classes/Feeds.php +++ b/classes/Feeds.php @@ -999,7 +999,7 @@ class Feeds extends Handler_Protected { * Here you should call extractfeedurls in rpc-backend * to get all possible feeds. * 5 - Couldn't download the URL content. - * 6 - currently unused + * 6 - Feed parsing failure (invalid content) * 7 - Error while creating feed database entry. * 8 - Permission denied (ACCESS_LEVEL_READONLY). */ @@ -1059,6 +1059,11 @@ class Feeds extends Handler_Protected { $url = key($feedUrls); } + // Don't allow subscribing if the content is invalid + $fp = new FeedParser($contents); + if ($fp->error() || $fp->get_type() === FeedParser::FEED_UNKNOWN) + return ['code' => 6, 'message' => truncate_string(clean($contents), 250, '…')]; + $feed = ORM::for_table('ttrss_feeds') ->where('feed_url', $url) ->where('owner_uid', $_SESSION['uid']) diff --git a/classes/RSSUtils.php b/classes/RSSUtils.php index bfa79de71..540069b6a 100644 --- a/classes/RSSUtils.php +++ b/classes/RSSUtils.php @@ -305,9 +305,8 @@ class RSSUtils { if ($feed_data) { $rss = new FeedParser($feed_data); - $rss->init(); - if (!$rss->error()) { + if ($rss->init()) { $basic_info = [ 'title' => mb_substr(clean($rss->get_title()), 0, 199), 'site_url' => mb_substr(UrlHelper::rewrite_relative($feed->feed_url, clean($rss->get_link())), 0, 245), @@ -590,9 +589,8 @@ class RSSUtils { } $rss = new FeedParser($feed_data); - $rss->init(); - if (!$rss->error()) { + if ($rss->init()) { Debug::log("running HOOK_FEED_PARSED handlers...", Debug::LOG_VERBOSE); |