fossar / selfoss

multipurpose rss reader, live stream, mashup, aggregation web application
https://selfoss.aditu.de
GNU General Public License v3.0
2.38k stars 345 forks source link

Dynamic OPML #1316

Open amazighdotmobi opened 2 years ago

amazighdotmobi commented 2 years ago

Hi, Would it be possible to work with dynamic OPMLs available online : for example https://diigo2dynopml.e-formation.mobi/conversion.php?url=https%3A%2F%2Fwww.diigo.com%2Frss%2Fuser%2Falexdup44%2FRSS_EDU thank you in advance for your answer

jtojnar commented 2 years ago

How do you imagine this should work? Would a single source aggregating all of those feeds be okay? Or do you want a single source for each feed in the OPML file.

amazighdotmobi commented 2 years ago

Thank you very much for the answer. What would be very very useful, would be to aggregate all the feeds from an opml source and make available in addition to the web view, an aggregated feed in rss format.

jtojnar commented 2 years ago

Copying the following code to src/spouts/rss/DynOpml.php file should work with selfoss 2.19:

<?php

namespace spouts\rss;

use helpers\WebClient;
use Monolog\Logger;
use SimpleXMLElement;

/**
 * Spout for fetching feeds linked in an DynOPML.
 */
class DynOpml extends \spouts\spout {
    /** @var string name of source */
    public $name = 'Dynamic OPML';

    /** @var string description of this source type */
    public $description = 'Get posts from plain RSS/Atom feeds listed in an OPML file.';

    /** @var array configurable parameters */
    public $params = [
        'url' => [
            'title' => 'URL',
            'type' => 'url',
            'default' => '',
            'required' => true,
            'validation' => ['notempty'],
        ],
    ];

    /** @var ?string URL of the source */
    protected $htmlUrl = null;

    /** @var Logger */
    private $logger;

    /** @var feed */
    private $feed;

    /** @var WebClient */
    private $webClient;

    /** @var \Generator<string> */
    private $feedUrls;

    public function __construct(feed $feed, Logger $logger, WebClient $webClient) {
        $this->logger = $logger;
        $this->feed = $feed;
        $this->webClient = $webClient;
    }

    public function load(array $params) {
        if (!function_exists('simplexml_load_string')) {
            throw new \Exception('Missing SimpleXML PHP extension. Please install/enable it as described on https://www.php.net/manual/en/simplexml.installation.php');
        }

        $http = $this->webClient->getHttpClient();
        $this->logger->debug('DynOpml: Loading outline: ' . $params['url']);
        $response = $http->get($params['url']);
        $opml = simplexml_load_string((string) $response->getBody());
        $this->feedUrls = $this->collectFeeds($opml->body);

        $this->htmlUrl = $params['url'];
        $this->spoutTitle = isset($opml->head->title) ? (string) $opml->head->title : null;
    }

    private function collectFeeds(SimpleXMLElement $xml) {
        // parse outline items from the default and selfoss namespaces
        foreach ($xml->xpath('outline') as $outline) {
            if (count($outline->children()) > 0) {
                // outline element has children, recurse into it
                $feeds = $this->collectFeeds($outline);
                // Do not use `yield from`, it re-uses keys.
                foreach ($feeds as $feed) {
                    yield $feed;
                }
            } else {
                $attrs = $outline->attributes(null);
                // RSS URL
                if (isset($attrs->xmlUrl)) {
                    yield (string) $attrs->xmlUrl;
                }
            }
        }
    }

    public function getXmlUrl(array $params) {
        return isset($params['url']) ? html_entity_decode($params['url']) : null;
    }

    public function getHtmlUrl() {
        return $this->htmlUrl;
    }

    public function getIcon() {
        return null;
    }

    /**
     * @return \Generator<Item<SimplePie\Item>> list of items
     */
    public function getItems() {
        foreach ($this->feedUrls as $feedUrl) {
            $this->logger->debug('DynOpml: Fetching subfeed: ' . $feedUrl);
            try {
                $this->feed->load([
                    'url' => $feedUrl,
                ]);

                foreach ($this->feed->getItems() as $item) {
                    // Do not use `yield from`, it re-uses keys.
                    yield $item;
                }
            } catch (\Exception $exception) {
                $this->logger->error('DynOpml: Fetching subfeed ' . $feedUrl . ' failed', ['exception' => $exception]);
            }
        }
    }

    public function destroy() {
        $this->feed->destroy();
        $this->feedUrls = null;
    }
}
amazighdotmobi commented 2 years ago

Thank you very much, I will test and get back to you as soon as possible.