Finish implementing endpoint discovery for Webmentions

This commit is contained in:
Lewis Dale 2023-03-08 21:27:47 +00:00
parent e5f7595321
commit b63ada6dea
5 changed files with 312 additions and 20 deletions

View File

@ -18,6 +18,7 @@
"require": {
"symfony/dom-crawler": "^6.2",
"symfony/css-selector": "^6.2",
"symfony/http-client": "^6.2"
"symfony/http-client": "^6.2",
"league/uri": "^6.8"
}
}

226
composer.lock generated
View File

@ -4,8 +4,179 @@
"Read more about it at https://getcomposer.org/doc/01-basic-usage.md#installing-dependencies",
"This file is @generated automatically"
],
"content-hash": "d432732c93eebfe24b583d70e2f89661",
"content-hash": "352975aee49cdf2b7ee0c1f9a6ced635",
"packages": [
{
"name": "league/uri",
"version": "6.8.0",
"source": {
"type": "git",
"url": "https://github.com/thephpleague/uri.git",
"reference": "a700b4656e4c54371b799ac61e300ab25a2d1d39"
},
"dist": {
"type": "zip",
"url": "https://api.github.com/repos/thephpleague/uri/zipball/a700b4656e4c54371b799ac61e300ab25a2d1d39",
"reference": "a700b4656e4c54371b799ac61e300ab25a2d1d39",
"shasum": ""
},
"require": {
"ext-json": "*",
"league/uri-interfaces": "^2.3",
"php": "^8.1",
"psr/http-message": "^1.0.1"
},
"conflict": {
"league/uri-schemes": "^1.0"
},
"require-dev": {
"friendsofphp/php-cs-fixer": "^v3.9.5",
"nyholm/psr7": "^1.5.1",
"php-http/psr7-integration-tests": "^1.1.1",
"phpbench/phpbench": "^1.2.6",
"phpstan/phpstan": "^1.8.5",
"phpstan/phpstan-deprecation-rules": "^1.0",
"phpstan/phpstan-phpunit": "^1.1.1",
"phpstan/phpstan-strict-rules": "^1.4.3",
"phpunit/phpunit": "^9.5.24",
"psr/http-factory": "^1.0.1"
},
"suggest": {
"ext-fileinfo": "Needed to create Data URI from a filepath",
"ext-intl": "Needed to improve host validation",
"league/uri-components": "Needed to easily manipulate URI objects",
"psr/http-factory": "Needed to use the URI factory"
},
"type": "library",
"extra": {
"branch-alias": {
"dev-master": "6.x-dev"
}
},
"autoload": {
"psr-4": {
"League\\Uri\\": "src"
}
},
"notification-url": "https://packagist.org/downloads/",
"license": [
"MIT"
],
"authors": [
{
"name": "Ignace Nyamagana Butera",
"email": "nyamsprod@gmail.com",
"homepage": "https://nyamsprod.com"
}
],
"description": "URI manipulation library",
"homepage": "https://uri.thephpleague.com",
"keywords": [
"data-uri",
"file-uri",
"ftp",
"hostname",
"http",
"https",
"middleware",
"parse_str",
"parse_url",
"psr-7",
"query-string",
"querystring",
"rfc3986",
"rfc3987",
"rfc6570",
"uri",
"uri-template",
"url",
"ws"
],
"support": {
"docs": "https://uri.thephpleague.com",
"forum": "https://thephpleague.slack.com",
"issues": "https://github.com/thephpleague/uri/issues",
"source": "https://github.com/thephpleague/uri/tree/6.8.0"
},
"funding": [
{
"url": "https://github.com/sponsors/nyamsprod",
"type": "github"
}
],
"time": "2022-09-13T19:58:47+00:00"
},
{
"name": "league/uri-interfaces",
"version": "2.3.0",
"source": {
"type": "git",
"url": "https://github.com/thephpleague/uri-interfaces.git",
"reference": "00e7e2943f76d8cb50c7dfdc2f6dee356e15e383"
},
"dist": {
"type": "zip",
"url": "https://api.github.com/repos/thephpleague/uri-interfaces/zipball/00e7e2943f76d8cb50c7dfdc2f6dee356e15e383",
"reference": "00e7e2943f76d8cb50c7dfdc2f6dee356e15e383",
"shasum": ""
},
"require": {
"ext-json": "*",
"php": "^7.2 || ^8.0"
},
"require-dev": {
"friendsofphp/php-cs-fixer": "^2.19",
"phpstan/phpstan": "^0.12.90",
"phpstan/phpstan-phpunit": "^0.12.19",
"phpstan/phpstan-strict-rules": "^0.12.9",
"phpunit/phpunit": "^8.5.15 || ^9.5"
},
"suggest": {
"ext-intl": "to use the IDNA feature",
"symfony/intl": "to use the IDNA feature via Symfony Polyfill"
},
"type": "library",
"extra": {
"branch-alias": {
"dev-master": "2.x-dev"
}
},
"autoload": {
"psr-4": {
"League\\Uri\\": "src/"
}
},
"notification-url": "https://packagist.org/downloads/",
"license": [
"MIT"
],
"authors": [
{
"name": "Ignace Nyamagana Butera",
"email": "nyamsprod@gmail.com",
"homepage": "https://nyamsprod.com"
}
],
"description": "Common interface for URI representation",
"homepage": "http://github.com/thephpleague/uri-interfaces",
"keywords": [
"rfc3986",
"rfc3987",
"uri",
"url"
],
"support": {
"issues": "https://github.com/thephpleague/uri-interfaces/issues",
"source": "https://github.com/thephpleague/uri-interfaces/tree/2.3.0"
},
"funding": [
{
"url": "https://github.com/sponsors/nyamsprod",
"type": "github"
}
],
"time": "2021-06-28T04:27:21+00:00"
},
{
"name": "masterminds/html5",
"version": "2.7.6",
@ -128,6 +299,59 @@
},
"time": "2021-11-05T16:47:00+00:00"
},
{
"name": "psr/http-message",
"version": "1.0.1",
"source": {
"type": "git",
"url": "https://github.com/php-fig/http-message.git",
"reference": "f6561bf28d520154e4b0ec72be95418abe6d9363"
},
"dist": {
"type": "zip",
"url": "https://api.github.com/repos/php-fig/http-message/zipball/f6561bf28d520154e4b0ec72be95418abe6d9363",
"reference": "f6561bf28d520154e4b0ec72be95418abe6d9363",
"shasum": ""
},
"require": {
"php": ">=5.3.0"
},
"type": "library",
"extra": {
"branch-alias": {
"dev-master": "1.0.x-dev"
}
},
"autoload": {
"psr-4": {
"Psr\\Http\\Message\\": "src/"
}
},
"notification-url": "https://packagist.org/downloads/",
"license": [
"MIT"
],
"authors": [
{
"name": "PHP-FIG",
"homepage": "http://www.php-fig.org/"
}
],
"description": "Common interface for HTTP messages",
"homepage": "https://github.com/php-fig/http-message",
"keywords": [
"http",
"http-message",
"psr",
"psr-7",
"request",
"response"
],
"support": {
"source": "https://github.com/php-fig/http-message/tree/master"
},
"time": "2016-08-06T14:39:51+00:00"
},
{
"name": "psr/log",
"version": "3.0.0",

View File

@ -4,34 +4,60 @@ namespace Lewisdale\Webmentions;
use Symfony\Contracts\HttpClient\ResponseInterface;
use Symfony\Component\DomCrawler\Crawler;
use League\Uri\Uri;
use League\Uri\UriInfo;
use League\Uri\UriResolver;
class EndpointParser {
public static function parse(ResponseInterface $response) : string | null
{
$endpoint = self::parseHeaders($response) ?? self::parseBody($response);
return self::absoluteURL($response, $endpoint);
}
private static function parseHeaders(ResponseInterface $response) : string | null {
$headers = $response->getHeaders();
$endpoint = null;
if (isset($headers["link"])) {
$link = $headers["link"][0];
if (preg_match('/rel=("?)webmention("?)/', $link)) {
$matches = [];
preg_match('/\<(..*?)\>/', $link, $matches);
foreach($headers["link"] as $link) {
if (preg_match('/rel=("?)webmention("?)/', $link)) {
$matches = [];
preg_match('/\<(..*?)\>/', $link, $matches);
if (count($matches) > 1) {
$endpoint = $matches[1];
if (count($matches) > 1) {
return $matches[1];
}
}
}
}
return null;
}
private static function absoluteURL(ResponseInterface $response, ?string $endpoint) : string | null {
if (!$endpoint) {
$doc = new Crawler($response->getContent());
$webmention = $doc->filter('rel="webmention"')->first();
return null;
}
if ($endpoint && !str_contains($endpoint, "https://")) {
$res = parse_url($response->getInfo('url'));
$endpoint = $res["scheme"] . "://" . $res["host"] . $endpoint;
$url = $response->getInfo("redirect_url") ?? $response->getInfo('url');
$uri = Uri::createFromString($url);
$endpoint = Uri::createFromString($endpoint);
if (!UriInfo::isAbsolute($endpoint)) {
return (string) UriResolver::resolve($endpoint, $uri);
}
return $endpoint;
return (string) $endpoint;
}
private static function parseBody(ResponseInterface $response) : string | null {
$doc = new Crawler($response->getContent());
$webmentions = $doc->filter('[rel="webmention"][href]');
if ($webmentions->count()) {
return $webmentions->first()->attr('href');
}
return null;
}
}

View File

@ -2,7 +2,6 @@
namespace Lewisdale\Webmentions;
use DOMDocument;
use Symfony\Component\DomCrawler\Crawler;
use Symfony\Component\HttpClient\HttpClient;
use Symfony\Contracts\HttpClient\HttpClientInterface;
@ -22,14 +21,14 @@ class Webmention {
// $this->sendWebmention($source, $target);
// }
$this->sendWebmention($source, "https://webmention.rocks/test/1");
$this->sendWebmention($source, "https://webmention.rocks/test/23/page");
}
private function getUrls(string $url) : array {
$page = file_get_contents($url);
$doc = new Crawler($page);
$urls = [];
$urls = [];
foreach ($doc->filter('.h-entry a') as $anchor) {
$target_url = $anchor->attributes->getNamedItem('href')->textContent;

View File

@ -4,7 +4,6 @@ use PHPUnit\Framework\TestCase;
use Symfony\Contracts\HttpClient\ResponseInterface;
use Lewisdale\Webmentions\EndpointParser;
class EndpointParserTest extends TestCase {
public function testParsesRelativeEndpointFromResponseHeaders() {
$response = $this->createStub(ResponseInterface::class);
@ -68,6 +67,49 @@ class EndpointParserTest extends TestCase {
->willReturn('https://webmention.rocks/test/4');
$this->assertSame("https://webmention.rocks/test/4/webmention", EndpointParser::parse($response));
}
public function testSkipsParsingEndpointWithNoHref() {
$content = <<<XML
<html>
<head>
<link rel="webmention" />
</head>
<body>
<h1>Some content</h1>
<a href="/test/6/webmention" rel="webmention">The real webmention</a>
</body>
</html>
XML;
$response = $this->createStub(ResponseInterface::class);
$response->method('getHeaders')->willReturn([]);
$response->method('getContent')->willReturn($content);
$response->method('getInfo')
->willReturn('https://webmention.rocks/test/6');
$this->assertSame("https://webmention.rocks/test/6/webmention", EndpointParser::parse($response));
}
public function testParseRelativeToPath() {
$content = <<<XML
<html>
<head>
<link rel="webmention" />
</head>
<body>
<h1>Some content</h1>
<a href="153/webmention" rel="webmention">The real webmention</a>
</body>
</html>
XML;
$response = $this->createStub(ResponseInterface::class);
$response->method('getHeaders')->willReturn([]);
$response->method('getContent')->willReturn($content);
$response->method('getInfo')
->willReturn('https://webmention.rocks/test/153');
$this->assertSame("https://webmention.rocks/test/153/webmention", EndpointParser::parse($response));
}
}