Work on parsing authors and content from mentions

This commit is contained in:
Lewis Dale 2023-03-15 09:15:55 +00:00
parent a5e0902366
commit a429befc94
6 changed files with 282 additions and 77 deletions

View File

@ -3,44 +3,46 @@
namespace Lewisdale\Webmentions; namespace Lewisdale\Webmentions;
use League\Uri\Exceptions\SyntaxError; use League\Uri\Exceptions\SyntaxError;
use League\Uri\Uri;
use Lewisdale\Webmentions\Exceptions\InvalidTargetException; use Lewisdale\Webmentions\Exceptions\InvalidTargetException;
use Lewisdale\Webmentions\Exceptions\InvalidUrlException; use Lewisdale\Webmentions\Exceptions\InvalidUrlException;
use Lewisdale\Webmentions\Gateways\WebmentionGatewayInterface;
use Lewisdale\Webmentions\Models\Webmention;
use Symfony\Component\HttpClient\HttpClient;
use League\Uri\Uri;
use Lewisdale\Webmentions\Exceptions\SourceNotFoundException; use Lewisdale\Webmentions\Exceptions\SourceNotFoundException;
use Lewisdale\Webmentions\Exceptions\TargetNotMentionedException; use Lewisdale\Webmentions\Exceptions\TargetNotMentionedException;
use Lewisdale\Webmentions\Gateways\WebmentionGatewayInterface;
use Lewisdale\Webmentions\Models\Author;
use Lewisdale\Webmentions\Models\MentionType; use Lewisdale\Webmentions\Models\MentionType;
use Lewisdale\Webmentions\Models\Webmention;
use Symfony\Component\DomCrawler\Crawler; use Symfony\Component\DomCrawler\Crawler;
use Symfony\Contracts\HttpClient\HttpClientInterface; use Symfony\Contracts\HttpClient\HttpClientInterface;
class Endpoint { class Endpoint
{
function __construct( function __construct(
private readonly HttpClientInterface $httpClient, private readonly HttpClientInterface $httpClient,
private readonly WebmentionGatewayInterface $gateway private readonly WebmentionGatewayInterface $gateway,
) )
{} {
}
public function validateUrl(string $url) : bool { public function validateUrl(string $url): bool
{
try { try {
$uri = Uri::createFromString($url); $uri = Uri::createFromString($url);
$scheme = $uri->getScheme(); $scheme = $uri->getScheme();
$schemeValid = in_array($scheme, ["http", "https"]); $schemeValid = in_array($scheme, ["http", "https"]);
return $schemeValid && !!filter_var($url, FILTER_VALIDATE_URL); return $schemeValid && !!filter_var($url, FILTER_VALIDATE_URL);
} catch (SyntaxError $e) } catch (SyntaxError $e) {
{
return false; return false;
} }
} }
public function receiveWebmention(string $source, string $target) : void
public function receiveWebmention(string $source, string $target): void
{ {
// Validate that both source and target are actual domains // Validate that both source and target are actual domains
if (!$this->validateUrl($source) || !$this->validateUrl($target)) if (!$this->validateUrl($source) || !$this->validateUrl($target)) {
{
throw new InvalidUrlException(); throw new InvalidUrlException();
} }
@ -52,12 +54,10 @@ class Endpoint {
// Parse content from the source // Parse content from the source
$response = $this->httpClient->request('GET', $source); $response = $this->httpClient->request('GET', $source);
if ($response->getStatusCode() < 400) if ($response->getStatusCode() < 400) {
{
$document = new Crawler($response->getContent()); $document = new Crawler($response->getContent());
if (!$this->hasMention($target, $document)) if (!$this->hasMention($target, $document)) {
{
throw new TargetNotMentionedException(); throw new TargetNotMentionedException();
} }
@ -66,26 +66,26 @@ class Endpoint {
$content = $this->parseContent($target, $container, $type); $content = $this->parseContent($target, $container, $type);
$author = $this->parseAuthor($container); $author = $this->parseAuthor($container);
$webmention = new Webmention(null, $target, $source, $type, null, $author); $webmention = new Webmention(null, $target, $source, $type, $content, $author);
$this->gateway->save($webmention); $this->gateway->save($webmention);
} else { } else {
throw new SourceNotFoundException(); throw new SourceNotFoundException();
} }
} }
private function hasMention(string $target, Crawler $document) : bool private function hasMention(string $target, Crawler $document): bool
{ {
return $document->filter('a[href="' . $target . '"]')->count() > 0; return $document->filter('a[href="' . $target . '"]')->count() > 0;
} }
private function getContainer(string $target, Crawler $document) : Crawler private function getContainer(string $target, Crawler $document): Crawler
{ {
return $document->filter('a[href="' . $target . '"]')->closest('.h-entry') ?? $document; return $document->filter('a[href="' . $target . '"]')->closest('.h-entry') ?? $document;
} }
private function parseMentionType(string $target, Crawler $document) : MentionType private function parseMentionType(string $target, Crawler $document): MentionType
{ {
$class = $document->filter('a[href="'. $target . '"]')->attr('class'); $class = $document->filter('a[href="' . $target . '"]')->attr('class');
if (str_contains($class, "u-like-of")) { if (str_contains($class, "u-like-of")) {
return MentionType::Like; return MentionType::Like;
@ -98,28 +98,30 @@ class Endpoint {
return MentionType::Mention; return MentionType::Mention;
} }
private function parseContent(string $target, Crawler $document, MentionType $type) : ?string private function parseContent(string $target, Crawler $document, MentionType $type): ?string
{ {
return match ($type) { return match ($type) {
MentionType::Like => "Liked this post", MentionType::Like => "Liked this post",
MentionType::Reply => $document->innerText(), MentionType::Reply, MentionType::Mention => $document->text(),
MentionType::Repost => "Reposted this post", MentionType::Repost => "Reposted this post",
MentionType::Mention => $document->innerText(),
}; };
} }
private function parseAuthor(Crawler $document) : ?string private function parseAuthor(Crawler $document): Author
{ {
$card = $document->filter('.p-author.h-card')->eq(0); $card = $document->filter('.h-card');
if ($card->count()) if (!$card->count()) {
{ $card = $document->closest('.h-card');
}
if ($card && $card->count()) {
$name = $card->filter('.p-name')?->text(""); $name = $card->filter('.p-name')?->text("");
$url = $card->filter('.u-url')?->text(""); $url = $card->filter('.u-url')->count() ? $card->filter('.u-url')->attr('href') : "";
$photo = $card->filter('.u-photo')->count() ? $card->filter('.u-photo')->attr('src') : ""; $photo = $card->filter('.u-photo')->count() ? $card->filter('.u-photo')->attr('src') : "";
return implode(", ", [$name, $url, $photo]); return new Author(null, $name, $url, $photo);
} }
return null; return new Author();
} }
} }

View File

@ -2,12 +2,13 @@
namespace Lewisdale\Webmentions\Gateways; namespace Lewisdale\Webmentions\Gateways;
use Exception; use Lewisdale\Webmentions\Models\Author;
use Lewisdale\Webmentions\Models\MentionType; use Lewisdale\Webmentions\Models\MentionType;
use Lewisdale\Webmentions\Models\Webmention; use Lewisdale\Webmentions\Models\Webmention;
use PDO; use PDO;
class SqliteGateway extends WebmentionGatewayInterface { class SqliteGateway extends WebmentionGatewayInterface
{
private readonly PDO $connection; private readonly PDO $connection;
function __construct(string $name) function __construct(string $name)
@ -16,7 +17,7 @@ class SqliteGateway extends WebmentionGatewayInterface {
$this->up(); $this->up();
} }
protected function up() : void protected function up(): void
{ {
// Create Webmention table // Create Webmention table
$sql = <<<SQL $sql = <<<SQL
@ -26,7 +27,7 @@ class SqliteGateway extends WebmentionGatewayInterface {
source TEXT NOT NULL, source TEXT NOT NULL,
type TEXT NOT NULL, type TEXT NOT NULL,
content TEXT, content TEXT,
author TEXT author INTEGER
); );
SQL; SQL;
@ -48,7 +49,7 @@ class SqliteGateway extends WebmentionGatewayInterface {
$row["source"], $row["source"],
MentionType::from($row["type"]), MentionType::from($row["type"]),
$row["content"], $row["content"],
$row["author"] new Author((int)$row["author"])
); );
} }
@ -70,7 +71,7 @@ class SqliteGateway extends WebmentionGatewayInterface {
$row["source"], $row["source"],
MentionType::from($row["type"]), MentionType::from($row["type"]),
$row["content"], $row["content"],
$row["author"] new Author((int)$row["author"])
); );
} }
$statement->closeCursor(); $statement->closeCursor();
@ -94,11 +95,11 @@ class SqliteGateway extends WebmentionGatewayInterface {
"source" => $webmention->source, "source" => $webmention->source,
"type" => $webmention->type->toString(), "type" => $webmention->type->toString(),
"content" => $webmention->content, "content" => $webmention->content,
"author" => $webmention->author, "author" => $webmention->author->id,
]); ]);
$statement->closeCursor(); $statement->closeCursor();
return $success ? (int) $this->connection->lastInsertId() : null; return $success ? (int)$this->connection->lastInsertId() : null;
} }
public function delete(Webmention $webmention): void public function delete(Webmention $webmention): void
@ -109,9 +110,9 @@ class SqliteGateway extends WebmentionGatewayInterface {
$statement->closeCursor(); $statement->closeCursor();
} }
public function find(array $values) : array public function find(array $values): array
{ {
$keys = implode(" AND ", array_map(function($v) { $keys = implode(" AND ", array_map(function ($v) {
return "$v=:$v"; return "$v=:$v";
}, array_keys($values))); }, array_keys($values)));
$sql = <<<SQL $sql = <<<SQL
@ -125,4 +126,5 @@ class SqliteGateway extends WebmentionGatewayInterface {
return $statement->fetchAll(); return $statement->fetchAll();
} }
} }
?> ?>

15
src/Models/Author.php Normal file
View File

@ -0,0 +1,15 @@
<?php declare(strict_types=1);
namespace Lewisdale\Webmentions\Models;
class Author
{
public function __construct(
public ?int $id = null,
public ?string $name = "",
public ?string $url = "",
public ?string $photo = "",
)
{
}
}

View File

@ -2,19 +2,23 @@
namespace Lewisdale\Webmentions\Models; namespace Lewisdale\Webmentions\Models;
class Webmention { class Webmention
{
function __construct( function __construct(
public ?int $id, public ?int $id,
public string $target, // The target post public string $target, // The target post
public string $source, public string $source,
public MentionType $type, public MentionType $type,
public ?string $content, public ?string $content,
public ?string $author, // TODO: Should be reference to another model public Author $author, // TODO: Should be reference to another model
) )
{} {
}
public function __toString() { public function __toString()
return "Webmention (id: {$this->id}, target: {$this->target}, source: {$this->source}, content: {$this->content}, author: {$this->author})"; {
return "Webmention (id: {$this->id}, target: {$this->target}, source: {$this->source}, content: {$this->content}, author: {$this->author->name})";
} }
} }
?> ?>

View File

@ -7,15 +7,25 @@ use Lewisdale\Webmentions\Exceptions\SourceNotFoundException;
use Lewisdale\Webmentions\Exceptions\TargetNotMentionedException; use Lewisdale\Webmentions\Exceptions\TargetNotMentionedException;
use Lewisdale\Webmentions\Gateways\WebmentionGatewayInterface; use Lewisdale\Webmentions\Gateways\WebmentionGatewayInterface;
use Lewisdale\Webmentions\Models\MentionType; use Lewisdale\Webmentions\Models\MentionType;
use Lewisdale\Webmentions\Models\Webmention;
use PHPUnit\Framework\TestCase;
use PHPUnit\Framework\Attributes\TestWith; use PHPUnit\Framework\Attributes\TestWith;
use PHPUnit\Framework\TestCase;
use Symfony\Contracts\HttpClient\HttpClientInterface; use Symfony\Contracts\HttpClient\HttpClientInterface;
use Symfony\Contracts\HttpClient\ResponseInterface; use Symfony\Contracts\HttpClient\ResponseInterface;
class EndpointTest extends TestCase { class EndpointTest extends TestCase
private function objectContains(string $key, mixed $value) { {
return $this->callback(fn(object $obj) => $obj->$key === $value); private function objectContains(string $key, mixed $expected)
{
return $this->callback(function (object $obj) use ($expected, $key) {
$val = $obj->$key;
$type = gettype($val);
return match ($type) {
"object" => $val == $expected,
"array" => count(array_diff($val, $expected)) === 0,
default => $val === $expected
};
});
} }
#[TestWith(["https://my.url.com", true])] #[TestWith(["https://my.url.com", true])]
@ -148,7 +158,7 @@ class EndpointTest extends TestCase {
->with($this->objectContains('type', MentionType::Like)); ->with($this->objectContains('type', MentionType::Like));
$endpoint = new Endpoint($mockClient, $mockGateway); $endpoint = new Endpoint($mockClient, $mockGateway);
$endpoint->receiveWebmention($source, $target); $endpoint->receiveWebmention($source, $target);
} }
@ -277,5 +287,176 @@ class EndpointTest extends TestCase {
$endpoint = new Endpoint($mockClient, $mockGateway); $endpoint = new Endpoint($mockClient, $mockGateway);
$endpoint->receiveWebmention($source, $target); $endpoint->receiveWebmention($source, $target);
} }
}
?> public function testItShouldParseARepostsContent()
{
$source = "https://my-valid-source-url.com";
$target = "https://lewisdale.dev/post/a-post-page";
$content = <<<XML
<html>
<head>
</head>
<body>
<article="h-entry">
<h1>Some content</h1>
<p>Here's some body content. It <a href="/another/page">contains a url</a>.</p>
<p>I'm writing about <a href="$target" class="u-repost-of">this post</a>.</p>
</article>
</body>
</html>
XML;
$mockClient = $this->createMock(HttpClientInterface::class);
$mockResponse = $this->createMock(ResponseInterface::class);
$mockGateway = $this->createMock(WebmentionGatewayInterface::class);
$mockClient->expects($this->once())
->method('request')
->with($this->identicalTo('GET'), $this->identicalTo($source))
->will($this->returnValue($mockResponse));
$mockResponse->method('getStatusCode')
->will($this->returnValue(200));
$mockResponse->method('getContent')
->willReturn($content);
$mockGateway->expects($this->once())
->method('save')
->with($this->objectContains('content', "Reposted this post"));
$endpoint = new Endpoint($mockClient, $mockGateway);
$endpoint->receiveWebmention($source, $target);
}
public function testItShouldParseALikeContent()
{
$source = "https://my-valid-source-url.com";
$target = "https://lewisdale.dev/post/a-post-page";
$content = <<<HTML
<html>
<head>
</head>
<body>
<span class="h-entry">
<a class="u-like-of" href="$target">A Cool Post</a><a class="u-url" href="/"></a>
</span>
</body>
</html>
HTML;
$mockClient = $this->createMock(HttpClientInterface::class);
$mockResponse = $this->createMock(ResponseInterface::class);
$mockGateway = $this->createMock(WebmentionGatewayInterface::class);
$mockClient->expects($this->once())
->method('request')
->with($this->identicalTo('GET'), $this->identicalTo($source))
->will($this->returnValue($mockResponse));
$mockResponse->method('getStatusCode')
->will($this->returnValue(200));
$mockResponse->method('getContent')
->willReturn($content);
$mockGateway->expects($this->once())
->method('save')
->with($this->objectContains('content', "Liked this post"));
$endpoint = new Endpoint($mockClient, $mockGateway);
$endpoint->receiveWebmention($source, $target);
}
public function testItShouldParseAReplyContent()
{
$source = "https://my-valid-source-url.com";
$target = "https://lewisdale.dev/post/a-post-page";
$content = <<<HTML
<html>
<head>
</head>
<body>
<article class="h-entry">
<a class="u-in-reply-to" rel="in-reply-to" href="$target">@post</a>: That's a great idea!<a class="u-url" href="/"></a>
</article>
</body>
</html>
HTML;
$mockClient = $this->createMock(HttpClientInterface::class);
$mockResponse = $this->createMock(ResponseInterface::class);
$mockGateway = $this->createMock(WebmentionGatewayInterface::class);
$mockClient->expects($this->once())
->method('request')
->with($this->identicalTo('GET'), $this->identicalTo($source))
->will($this->returnValue($mockResponse));
$mockResponse->method('getStatusCode')
->will($this->returnValue(200));
$mockResponse->method('getContent')
->willReturn($content);
$mockGateway->expects($this->once())
->method('save')
->with($this->objectContains('content', "@post: That's a great idea!"));
$endpoint = new Endpoint($mockClient, $mockGateway);
$endpoint->receiveWebmention($source, $target);
}
public function testItShouldParseAnAuthorCardWithANameUrlAndPhoto()
{
$source = "https://my-valid-source-url.com";
$target = "https://lewisdale.dev/post/a-post-page";
$content = <<<HTML
<html>
<head>
</head>
<body>
<article class="h-entry">
<a class="u-in-reply-to" rel="in-reply-to" href="$target">@post</a>: That's a great idea!<a class="u-url" href="/"></a>
<div class="h-card">
<p class="p-name">Anne Author</p> who can be found at <a class="u-url" href="https://my-blog.com">my-blog.com</a>.
<img src="https://dummyimage.com/100x100/fff/aaa" class="u-photo" alt="My profile picture" />
</div>
</article>
</body>
</html>
HTML;
$mockClient = $this->createMock(HttpClientInterface::class);
$mockResponse = $this->createMock(ResponseInterface::class);
$mockGateway = $this->createMock(WebmentionGatewayInterface::class);
$mockClient->expects($this->once())
->method('request')
->with($this->identicalTo('GET'), $this->identicalTo($source))
->will($this->returnValue($mockResponse));
$mockResponse->method('getStatusCode')
->will($this->returnValue(200));
$mockResponse->method('getContent')
->willReturn($content);
$expected = new \Lewisdale\Webmentions\Models\Author(
null,
"Anne Author",
"https://my-blog.com",
"https://dummyimage.com/100x100/fff/aaa"
);
$mockGateway->expects($this->once())
->method('save')
->with($this->objectContains('author', $expected));
$endpoint = new Endpoint($mockClient, $mockGateway);
$endpoint->receiveWebmention($source, $target);
}
}

View File

@ -1,7 +1,8 @@
<?php <?php
declare(strict_types=1); declare(strict_types=1);
error_reporting(E_ALL); ini_set('display_errors',1); error_reporting(E_ALL);
ini_set('display_errors', 1);
use Lewisdale\Webmentions\Gateways\SqliteGateway; use Lewisdale\Webmentions\Gateways\SqliteGateway;
@ -27,7 +28,7 @@ class SqliteGatewayTest extends TestCase
"https://a-source.url", "https://a-source.url",
MentionType::Like, MentionType::Like,
"No content", "No content",
"Some Author Name" new \Lewisdale\Webmentions\Models\Author(),
); );
$webmention->id = $this->gateway->save($webmention); $webmention->id = $this->gateway->save($webmention);
@ -42,7 +43,7 @@ class SqliteGatewayTest extends TestCase
"https://a-source.url", "https://a-source.url",
MentionType::Like, MentionType::Like,
"No content", "No content",
"Some Author Name" new \Lewisdale\Webmentions\Models\Author(),
); );
$webmention->id = $this->gateway->save($webmention); $webmention->id = $this->gateway->save($webmention);
@ -60,7 +61,7 @@ class SqliteGatewayTest extends TestCase
"https://a-source.url", "https://a-source.url",
MentionType::Like, MentionType::Like,
"No content", "No content",
"Some Author Name" new \Lewisdale\Webmentions\Models\Author()
); );
$webmention->id = $this->gateway->save($webmention); $webmention->id = $this->gateway->save($webmention);
@ -73,25 +74,25 @@ class SqliteGatewayTest extends TestCase
public function testCanGetByPost() public function testCanGetByPost()
{ {
foreach(range(0, 4) as $_) { foreach (range(0, 4) as $_) {
$this->gateway->save(new Webmention( $this->gateway->save(new Webmention(
null, null,
"https://lewisdale.dev/post/a-new-post", "https://lewisdale.dev/post/a-new-post",
"https://a-source.url", "https://a-source.url",
MentionType::Reply, MentionType::Reply,
"No content", "No content",
"Some Author Name" new \Lewisdale\Webmentions\Models\Author()
)); ));
} }
foreach(range(0, 4) as $_) { foreach (range(0, 4) as $_) {
$this->gateway->save(new Webmention( $this->gateway->save(new Webmention(
null, null,
"https://lewisdale.dev/post/a-different-post", "https://lewisdale.dev/post/a-different-post",
"https://a-source.url", "https://a-source.url",
MentionType::Like, MentionType::Like,
"No content", "No content",
"Some Author Name" new \Lewisdale\Webmentions\Models\Author()
)); ));
} }
@ -108,7 +109,7 @@ class SqliteGatewayTest extends TestCase
"https://a-source.url", "https://a-source.url",
MentionType::Reply, MentionType::Reply,
"No content", "No content",
"Some Author Name" new \Lewisdale\Webmentions\Models\Author()
)); ));
$this->gateway->save(new Webmention( $this->gateway->save(new Webmention(
@ -117,7 +118,7 @@ class SqliteGatewayTest extends TestCase
"https://a-different-source.url", "https://a-different-source.url",
MentionType::Like, MentionType::Like,
"No content", "No content",
"Some Author Name" new \Lewisdale\Webmentions\Models\Author()
)); ));
$this->gateway->save(new Webmention( $this->gateway->save(new Webmention(
@ -126,14 +127,14 @@ class SqliteGatewayTest extends TestCase
"https://a-source.url", "https://a-source.url",
MentionType::Reply, MentionType::Reply,
"Some content", "Some content",
"Some Author Name" new \Lewisdale\Webmentions\Models\Author()
)); ));
$this->assertCount( $this->assertCount(
2, 2,
$this->gateway->find([ $this->gateway->find([
"target" => "https://lewisdale.dev/post/a-new-post", "target" => "https://lewisdale.dev/post/a-new-post",
"source" => "https://a-source.url" "source" => "https://a-source.url",
]) ])
); );
@ -141,7 +142,7 @@ class SqliteGatewayTest extends TestCase
1, 1,
$this->gateway->find([ $this->gateway->find([
"target" => "https://lewisdale.dev/post/a-new-post", "target" => "https://lewisdale.dev/post/a-new-post",
"source" => "https://a-different-source.url" "source" => "https://a-different-source.url",
]) ])
); );
@ -150,7 +151,7 @@ class SqliteGatewayTest extends TestCase
$this->gateway->find([ $this->gateway->find([
"target" => "https://lewisdale.dev/post/a-new-post", "target" => "https://lewisdale.dev/post/a-new-post",
"source" => "https://a-source.url", "source" => "https://a-source.url",
"content" => "Some content" "content" => "Some content",
]) ])
); );
} }