From b63ada6deaf465e094a01d950d9ea3c7cae0466e Mon Sep 17 00:00:00 2001 From: Lewis Dale Date: Wed, 8 Mar 2023 21:27:47 +0000 Subject: [PATCH] Finish implementing endpoint discovery for Webmentions --- composer.json | 3 +- composer.lock | 226 ++++++++++++++++++++++++++++++++++- src/EndpointParser.php | 54 ++++++--- src/Webmention.php | 5 +- tests/EndpointParserTest.php | 44 ++++++- 5 files changed, 312 insertions(+), 20 deletions(-) diff --git a/composer.json b/composer.json index 321dc4e..dc3bcfe 100644 --- a/composer.json +++ b/composer.json @@ -18,6 +18,7 @@ "require": { "symfony/dom-crawler": "^6.2", "symfony/css-selector": "^6.2", - "symfony/http-client": "^6.2" + "symfony/http-client": "^6.2", + "league/uri": "^6.8" } } diff --git a/composer.lock b/composer.lock index 9f5ee9e..142251e 100644 --- a/composer.lock +++ b/composer.lock @@ -4,8 +4,179 @@ "Read more about it at https://getcomposer.org/doc/01-basic-usage.md#installing-dependencies", "This file is @generated automatically" ], - "content-hash": "d432732c93eebfe24b583d70e2f89661", + "content-hash": "352975aee49cdf2b7ee0c1f9a6ced635", "packages": [ + { + "name": "league/uri", + "version": "6.8.0", + "source": { + "type": "git", + "url": "https://github.com/thephpleague/uri.git", + "reference": "a700b4656e4c54371b799ac61e300ab25a2d1d39" + }, + "dist": { + "type": "zip", + "url": "https://api.github.com/repos/thephpleague/uri/zipball/a700b4656e4c54371b799ac61e300ab25a2d1d39", + "reference": "a700b4656e4c54371b799ac61e300ab25a2d1d39", + "shasum": "" + }, + "require": { + "ext-json": "*", + "league/uri-interfaces": "^2.3", + "php": "^8.1", + "psr/http-message": "^1.0.1" + }, + "conflict": { + "league/uri-schemes": "^1.0" + }, + "require-dev": { + "friendsofphp/php-cs-fixer": "^v3.9.5", + "nyholm/psr7": "^1.5.1", + "php-http/psr7-integration-tests": "^1.1.1", + "phpbench/phpbench": "^1.2.6", + "phpstan/phpstan": "^1.8.5", + "phpstan/phpstan-deprecation-rules": "^1.0", + "phpstan/phpstan-phpunit": "^1.1.1", + "phpstan/phpstan-strict-rules": "^1.4.3", + "phpunit/phpunit": "^9.5.24", + "psr/http-factory": "^1.0.1" + }, + "suggest": { + "ext-fileinfo": "Needed to create Data URI from a filepath", + "ext-intl": "Needed to improve host validation", + "league/uri-components": "Needed to easily manipulate URI objects", + "psr/http-factory": "Needed to use the URI factory" + }, + "type": "library", + "extra": { + "branch-alias": { + "dev-master": "6.x-dev" + } + }, + "autoload": { + "psr-4": { + "League\\Uri\\": "src" + } + }, + "notification-url": "https://packagist.org/downloads/", + "license": [ + "MIT" + ], + "authors": [ + { + "name": "Ignace Nyamagana Butera", + "email": "nyamsprod@gmail.com", + "homepage": "https://nyamsprod.com" + } + ], + "description": "URI manipulation library", + "homepage": "https://uri.thephpleague.com", + "keywords": [ + "data-uri", + "file-uri", + "ftp", + "hostname", + "http", + "https", + "middleware", + "parse_str", + "parse_url", + "psr-7", + "query-string", + "querystring", + "rfc3986", + "rfc3987", + "rfc6570", + "uri", + "uri-template", + "url", + "ws" + ], + "support": { + "docs": "https://uri.thephpleague.com", + "forum": "https://thephpleague.slack.com", + "issues": "https://github.com/thephpleague/uri/issues", + "source": "https://github.com/thephpleague/uri/tree/6.8.0" + }, + "funding": [ + { + "url": "https://github.com/sponsors/nyamsprod", + "type": "github" + } + ], + "time": "2022-09-13T19:58:47+00:00" + }, + { + "name": "league/uri-interfaces", + "version": "2.3.0", + "source": { + "type": "git", + "url": "https://github.com/thephpleague/uri-interfaces.git", + "reference": "00e7e2943f76d8cb50c7dfdc2f6dee356e15e383" + }, + "dist": { + "type": "zip", + "url": "https://api.github.com/repos/thephpleague/uri-interfaces/zipball/00e7e2943f76d8cb50c7dfdc2f6dee356e15e383", + "reference": "00e7e2943f76d8cb50c7dfdc2f6dee356e15e383", + "shasum": "" + }, + "require": { + "ext-json": "*", + "php": "^7.2 || ^8.0" + }, + "require-dev": { + "friendsofphp/php-cs-fixer": "^2.19", + "phpstan/phpstan": "^0.12.90", + "phpstan/phpstan-phpunit": "^0.12.19", + "phpstan/phpstan-strict-rules": "^0.12.9", + "phpunit/phpunit": "^8.5.15 || ^9.5" + }, + "suggest": { + "ext-intl": "to use the IDNA feature", + "symfony/intl": "to use the IDNA feature via Symfony Polyfill" + }, + "type": "library", + "extra": { + "branch-alias": { + "dev-master": "2.x-dev" + } + }, + "autoload": { + "psr-4": { + "League\\Uri\\": "src/" + } + }, + "notification-url": "https://packagist.org/downloads/", + "license": [ + "MIT" + ], + "authors": [ + { + "name": "Ignace Nyamagana Butera", + "email": "nyamsprod@gmail.com", + "homepage": "https://nyamsprod.com" + } + ], + "description": "Common interface for URI representation", + "homepage": "http://github.com/thephpleague/uri-interfaces", + "keywords": [ + "rfc3986", + "rfc3987", + "uri", + "url" + ], + "support": { + "issues": "https://github.com/thephpleague/uri-interfaces/issues", + "source": "https://github.com/thephpleague/uri-interfaces/tree/2.3.0" + }, + "funding": [ + { + "url": "https://github.com/sponsors/nyamsprod", + "type": "github" + } + ], + "time": "2021-06-28T04:27:21+00:00" + }, { "name": "masterminds/html5", "version": "2.7.6", @@ -128,6 +299,59 @@ }, "time": "2021-11-05T16:47:00+00:00" }, + { + "name": "psr/http-message", + "version": "1.0.1", + "source": { + "type": "git", + "url": "https://github.com/php-fig/http-message.git", + "reference": "f6561bf28d520154e4b0ec72be95418abe6d9363" + }, + "dist": { + "type": "zip", + "url": "https://api.github.com/repos/php-fig/http-message/zipball/f6561bf28d520154e4b0ec72be95418abe6d9363", + "reference": "f6561bf28d520154e4b0ec72be95418abe6d9363", + "shasum": "" + }, + "require": { + "php": ">=5.3.0" + }, + "type": "library", + "extra": { + "branch-alias": { + "dev-master": "1.0.x-dev" + } + }, + "autoload": { + "psr-4": { + "Psr\\Http\\Message\\": "src/" + } + }, + "notification-url": "https://packagist.org/downloads/", + "license": [ + "MIT" + ], + "authors": [ + { + "name": "PHP-FIG", + "homepage": "http://www.php-fig.org/" + } + ], + "description": "Common interface for HTTP messages", + "homepage": "https://github.com/php-fig/http-message", + "keywords": [ + "http", + "http-message", + "psr", + "psr-7", + "request", + "response" + ], + "support": { + "source": "https://github.com/php-fig/http-message/tree/master" + }, + "time": "2016-08-06T14:39:51+00:00" + }, { "name": "psr/log", "version": "3.0.0", diff --git a/src/EndpointParser.php b/src/EndpointParser.php index 8960548..ea6fa0a 100644 --- a/src/EndpointParser.php +++ b/src/EndpointParser.php @@ -4,34 +4,60 @@ namespace Lewisdale\Webmentions; use Symfony\Contracts\HttpClient\ResponseInterface; use Symfony\Component\DomCrawler\Crawler; +use League\Uri\Uri; +use League\Uri\UriInfo; +use League\Uri\UriResolver; class EndpointParser { public static function parse(ResponseInterface $response) : string | null { + $endpoint = self::parseHeaders($response) ?? self::parseBody($response); + return self::absoluteURL($response, $endpoint); + } + + private static function parseHeaders(ResponseInterface $response) : string | null { $headers = $response->getHeaders(); - $endpoint = null; if (isset($headers["link"])) { - $link = $headers["link"][0]; - if (preg_match('/rel=("?)webmention("?)/', $link)) { - $matches = []; - preg_match('/\<(..*?)\>/', $link, $matches); + foreach($headers["link"] as $link) { + if (preg_match('/rel=("?)webmention("?)/', $link)) { + $matches = []; + preg_match('/\<(..*?)\>/', $link, $matches); - if (count($matches) > 1) { - $endpoint = $matches[1]; + if (count($matches) > 1) { + return $matches[1]; + } } - } + } } + return null; + } + + private static function absoluteURL(ResponseInterface $response, ?string $endpoint) : string | null { if (!$endpoint) { - $doc = new Crawler($response->getContent()); - $webmention = $doc->filter('rel="webmention"')->first(); + return null; } - if ($endpoint && !str_contains($endpoint, "https://")) { - $res = parse_url($response->getInfo('url')); - $endpoint = $res["scheme"] . "://" . $res["host"] . $endpoint; + $url = $response->getInfo("redirect_url") ?? $response->getInfo('url'); + $uri = Uri::createFromString($url); + $endpoint = Uri::createFromString($endpoint); + + if (!UriInfo::isAbsolute($endpoint)) { + return (string) UriResolver::resolve($endpoint, $uri); } - return $endpoint; + + return (string) $endpoint; + } + + private static function parseBody(ResponseInterface $response) : string | null { + $doc = new Crawler($response->getContent()); + $webmentions = $doc->filter('[rel="webmention"][href]'); + + if ($webmentions->count()) { + return $webmentions->first()->attr('href'); + } + + return null; } } \ No newline at end of file diff --git a/src/Webmention.php b/src/Webmention.php index b738e11..fc104bb 100644 --- a/src/Webmention.php +++ b/src/Webmention.php @@ -2,7 +2,6 @@ namespace Lewisdale\Webmentions; -use DOMDocument; use Symfony\Component\DomCrawler\Crawler; use Symfony\Component\HttpClient\HttpClient; use Symfony\Contracts\HttpClient\HttpClientInterface; @@ -22,14 +21,14 @@ class Webmention { // $this->sendWebmention($source, $target); // } - $this->sendWebmention($source, "https://webmention.rocks/test/1"); + $this->sendWebmention($source, "https://webmention.rocks/test/23/page"); } private function getUrls(string $url) : array { $page = file_get_contents($url); $doc = new Crawler($page); - $urls = []; + $urls = []; foreach ($doc->filter('.h-entry a') as $anchor) { $target_url = $anchor->attributes->getNamedItem('href')->textContent; diff --git a/tests/EndpointParserTest.php b/tests/EndpointParserTest.php index 75255d4..e745008 100644 --- a/tests/EndpointParserTest.php +++ b/tests/EndpointParserTest.php @@ -4,7 +4,6 @@ use PHPUnit\Framework\TestCase; use Symfony\Contracts\HttpClient\ResponseInterface; use Lewisdale\Webmentions\EndpointParser; - class EndpointParserTest extends TestCase { public function testParsesRelativeEndpointFromResponseHeaders() { $response = $this->createStub(ResponseInterface::class); @@ -68,6 +67,49 @@ class EndpointParserTest extends TestCase { ->willReturn('https://webmention.rocks/test/4'); $this->assertSame("https://webmention.rocks/test/4/webmention", EndpointParser::parse($response)); + } + public function testSkipsParsingEndpointWithNoHref() { + $content = << + + + + +

Some content

+ The real webmention + + + XML; + + $response = $this->createStub(ResponseInterface::class); + $response->method('getHeaders')->willReturn([]); + $response->method('getContent')->willReturn($content); + $response->method('getInfo') + ->willReturn('https://webmention.rocks/test/6'); + + $this->assertSame("https://webmention.rocks/test/6/webmention", EndpointParser::parse($response)); + } + + public function testParseRelativeToPath() { + $content = << + + + + +

Some content

+ The real webmention + + + XML; + + $response = $this->createStub(ResponseInterface::class); + $response->method('getHeaders')->willReturn([]); + $response->method('getContent')->willReturn($content); + $response->method('getInfo') + ->willReturn('https://webmention.rocks/test/153'); + + $this->assertSame("https://webmention.rocks/test/153/webmention", EndpointParser::parse($response)); } } \ No newline at end of file