Work on parsing authors and content from mentions

This commit is contained in:
Lewis Dale 2023-03-15 09:15:55 +00:00
parent a5e0902366
commit a429befc94
6 changed files with 282 additions and 77 deletions

View File

@ -3,27 +3,30 @@
namespace Lewisdale\Webmentions;
use League\Uri\Exceptions\SyntaxError;
use League\Uri\Uri;
use Lewisdale\Webmentions\Exceptions\InvalidTargetException;
use Lewisdale\Webmentions\Exceptions\InvalidUrlException;
use Lewisdale\Webmentions\Gateways\WebmentionGatewayInterface;
use Lewisdale\Webmentions\Models\Webmention;
use Symfony\Component\HttpClient\HttpClient;
use League\Uri\Uri;
use Lewisdale\Webmentions\Exceptions\SourceNotFoundException;
use Lewisdale\Webmentions\Exceptions\TargetNotMentionedException;
use Lewisdale\Webmentions\Gateways\WebmentionGatewayInterface;
use Lewisdale\Webmentions\Models\Author;
use Lewisdale\Webmentions\Models\MentionType;
use Lewisdale\Webmentions\Models\Webmention;
use Symfony\Component\DomCrawler\Crawler;
use Symfony\Contracts\HttpClient\HttpClientInterface;
class Endpoint {
class Endpoint
{
function __construct(
private readonly HttpClientInterface $httpClient,
private readonly WebmentionGatewayInterface $gateway
private readonly HttpClientInterface $httpClient,
private readonly WebmentionGatewayInterface $gateway,
)
{}
{
}
public function validateUrl(string $url) : bool {
public function validateUrl(string $url): bool
{
try {
$uri = Uri::createFromString($url);
$scheme = $uri->getScheme();
@ -31,16 +34,15 @@ class Endpoint {
return $schemeValid && !!filter_var($url, FILTER_VALIDATE_URL);
} catch (SyntaxError $e)
{
} catch (SyntaxError $e) {
return false;
}
}
public function receiveWebmention(string $source, string $target) : void
public function receiveWebmention(string $source, string $target): void
{
// Validate that both source and target are actual domains
if (!$this->validateUrl($source) || !$this->validateUrl($target))
{
if (!$this->validateUrl($source) || !$this->validateUrl($target)) {
throw new InvalidUrlException();
}
@ -52,12 +54,10 @@ class Endpoint {
// Parse content from the source
$response = $this->httpClient->request('GET', $source);
if ($response->getStatusCode() < 400)
{
if ($response->getStatusCode() < 400) {
$document = new Crawler($response->getContent());
if (!$this->hasMention($target, $document))
{
if (!$this->hasMention($target, $document)) {
throw new TargetNotMentionedException();
}
@ -66,26 +66,26 @@ class Endpoint {
$content = $this->parseContent($target, $container, $type);
$author = $this->parseAuthor($container);
$webmention = new Webmention(null, $target, $source, $type, null, $author);
$webmention = new Webmention(null, $target, $source, $type, $content, $author);
$this->gateway->save($webmention);
} else {
throw new SourceNotFoundException();
}
}
private function hasMention(string $target, Crawler $document) : bool
private function hasMention(string $target, Crawler $document): bool
{
return $document->filter('a[href="' . $target . '"]')->count() > 0;
}
private function getContainer(string $target, Crawler $document) : Crawler
private function getContainer(string $target, Crawler $document): Crawler
{
return $document->filter('a[href="' . $target . '"]')->closest('.h-entry') ?? $document;
}
private function parseMentionType(string $target, Crawler $document) : MentionType
private function parseMentionType(string $target, Crawler $document): MentionType
{
$class = $document->filter('a[href="'. $target . '"]')->attr('class');
$class = $document->filter('a[href="' . $target . '"]')->attr('class');
if (str_contains($class, "u-like-of")) {
return MentionType::Like;
@ -98,28 +98,30 @@ class Endpoint {
return MentionType::Mention;
}
private function parseContent(string $target, Crawler $document, MentionType $type) : ?string
private function parseContent(string $target, Crawler $document, MentionType $type): ?string
{
return match ($type) {
MentionType::Like => "Liked this post",
MentionType::Reply => $document->innerText(),
MentionType::Reply, MentionType::Mention => $document->text(),
MentionType::Repost => "Reposted this post",
MentionType::Mention => $document->innerText(),
};
}
private function parseAuthor(Crawler $document) : ?string
private function parseAuthor(Crawler $document): Author
{
$card = $document->filter('.p-author.h-card')->eq(0);
$card = $document->filter('.h-card');
if ($card->count())
{
if (!$card->count()) {
$card = $document->closest('.h-card');
}
if ($card && $card->count()) {
$name = $card->filter('.p-name')?->text("");
$url = $card->filter('.u-url')?->text("");
$url = $card->filter('.u-url')->count() ? $card->filter('.u-url')->attr('href') : "";
$photo = $card->filter('.u-photo')->count() ? $card->filter('.u-photo')->attr('src') : "";
return implode(", ", [$name, $url, $photo]);
return new Author(null, $name, $url, $photo);
}
return null;
return new Author();
}
}

View File

@ -2,12 +2,13 @@
namespace Lewisdale\Webmentions\Gateways;
use Exception;
use Lewisdale\Webmentions\Models\Author;
use Lewisdale\Webmentions\Models\MentionType;
use Lewisdale\Webmentions\Models\Webmention;
use PDO;
class SqliteGateway extends WebmentionGatewayInterface {
class SqliteGateway extends WebmentionGatewayInterface
{
private readonly PDO $connection;
function __construct(string $name)
@ -16,7 +17,7 @@ class SqliteGateway extends WebmentionGatewayInterface {
$this->up();
}
protected function up() : void
protected function up(): void
{
// Create Webmention table
$sql = <<<SQL
@ -26,7 +27,7 @@ class SqliteGateway extends WebmentionGatewayInterface {
source TEXT NOT NULL,
type TEXT NOT NULL,
content TEXT,
author TEXT
author INTEGER
);
SQL;
@ -48,7 +49,7 @@ class SqliteGateway extends WebmentionGatewayInterface {
$row["source"],
MentionType::from($row["type"]),
$row["content"],
$row["author"]
new Author((int)$row["author"])
);
}
@ -70,7 +71,7 @@ class SqliteGateway extends WebmentionGatewayInterface {
$row["source"],
MentionType::from($row["type"]),
$row["content"],
$row["author"]
new Author((int)$row["author"])
);
}
$statement->closeCursor();
@ -94,11 +95,11 @@ class SqliteGateway extends WebmentionGatewayInterface {
"source" => $webmention->source,
"type" => $webmention->type->toString(),
"content" => $webmention->content,
"author" => $webmention->author,
"author" => $webmention->author->id,
]);
$statement->closeCursor();
return $success ? (int) $this->connection->lastInsertId() : null;
return $success ? (int)$this->connection->lastInsertId() : null;
}
public function delete(Webmention $webmention): void
@ -109,9 +110,9 @@ class SqliteGateway extends WebmentionGatewayInterface {
$statement->closeCursor();
}
public function find(array $values) : array
public function find(array $values): array
{
$keys = implode(" AND ", array_map(function($v) {
$keys = implode(" AND ", array_map(function ($v) {
return "$v=:$v";
}, array_keys($values)));
$sql = <<<SQL
@ -125,4 +126,5 @@ class SqliteGateway extends WebmentionGatewayInterface {
return $statement->fetchAll();
}
}
?>

15
src/Models/Author.php Normal file
View File

@ -0,0 +1,15 @@
<?php declare(strict_types=1);
namespace Lewisdale\Webmentions\Models;
class Author
{
public function __construct(
public ?int $id = null,
public ?string $name = "",
public ?string $url = "",
public ?string $photo = "",
)
{
}
}

View File

@ -2,19 +2,23 @@
namespace Lewisdale\Webmentions\Models;
class Webmention {
class Webmention
{
function __construct(
public ?int $id,
public string $target, // The target post
public string $source,
public ?int $id,
public string $target, // The target post
public string $source,
public MentionType $type,
public ?string $content,
public ?string $author, // TODO: Should be reference to another model
public ?string $content,
public Author $author, // TODO: Should be reference to another model
)
{}
{
}
public function __toString() {
return "Webmention (id: {$this->id}, target: {$this->target}, source: {$this->source}, content: {$this->content}, author: {$this->author})";
public function __toString()
{
return "Webmention (id: {$this->id}, target: {$this->target}, source: {$this->source}, content: {$this->content}, author: {$this->author->name})";
}
}
?>

View File

@ -7,15 +7,25 @@ use Lewisdale\Webmentions\Exceptions\SourceNotFoundException;
use Lewisdale\Webmentions\Exceptions\TargetNotMentionedException;
use Lewisdale\Webmentions\Gateways\WebmentionGatewayInterface;
use Lewisdale\Webmentions\Models\MentionType;
use Lewisdale\Webmentions\Models\Webmention;
use PHPUnit\Framework\TestCase;
use PHPUnit\Framework\Attributes\TestWith;
use PHPUnit\Framework\TestCase;
use Symfony\Contracts\HttpClient\HttpClientInterface;
use Symfony\Contracts\HttpClient\ResponseInterface;
class EndpointTest extends TestCase {
private function objectContains(string $key, mixed $value) {
return $this->callback(fn(object $obj) => $obj->$key === $value);
class EndpointTest extends TestCase
{
private function objectContains(string $key, mixed $expected)
{
return $this->callback(function (object $obj) use ($expected, $key) {
$val = $obj->$key;
$type = gettype($val);
return match ($type) {
"object" => $val == $expected,
"array" => count(array_diff($val, $expected)) === 0,
default => $val === $expected
};
});
}
#[TestWith(["https://my.url.com", true])]
@ -277,5 +287,176 @@ class EndpointTest extends TestCase {
$endpoint = new Endpoint($mockClient, $mockGateway);
$endpoint->receiveWebmention($source, $target);
}
public function testItShouldParseARepostsContent()
{
$source = "https://my-valid-source-url.com";
$target = "https://lewisdale.dev/post/a-post-page";
$content = <<<XML
<html>
<head>
</head>
<body>
<article="h-entry">
<h1>Some content</h1>
<p>Here's some body content. It <a href="/another/page">contains a url</a>.</p>
<p>I'm writing about <a href="$target" class="u-repost-of">this post</a>.</p>
</article>
</body>
</html>
XML;
$mockClient = $this->createMock(HttpClientInterface::class);
$mockResponse = $this->createMock(ResponseInterface::class);
$mockGateway = $this->createMock(WebmentionGatewayInterface::class);
$mockClient->expects($this->once())
->method('request')
->with($this->identicalTo('GET'), $this->identicalTo($source))
->will($this->returnValue($mockResponse));
$mockResponse->method('getStatusCode')
->will($this->returnValue(200));
$mockResponse->method('getContent')
->willReturn($content);
$mockGateway->expects($this->once())
->method('save')
->with($this->objectContains('content', "Reposted this post"));
$endpoint = new Endpoint($mockClient, $mockGateway);
$endpoint->receiveWebmention($source, $target);
}
public function testItShouldParseALikeContent()
{
$source = "https://my-valid-source-url.com";
$target = "https://lewisdale.dev/post/a-post-page";
$content = <<<HTML
<html>
<head>
</head>
<body>
<span class="h-entry">
<a class="u-like-of" href="$target">A Cool Post</a><a class="u-url" href="/"></a>
</span>
</body>
</html>
HTML;
$mockClient = $this->createMock(HttpClientInterface::class);
$mockResponse = $this->createMock(ResponseInterface::class);
$mockGateway = $this->createMock(WebmentionGatewayInterface::class);
$mockClient->expects($this->once())
->method('request')
->with($this->identicalTo('GET'), $this->identicalTo($source))
->will($this->returnValue($mockResponse));
$mockResponse->method('getStatusCode')
->will($this->returnValue(200));
$mockResponse->method('getContent')
->willReturn($content);
$mockGateway->expects($this->once())
->method('save')
->with($this->objectContains('content', "Liked this post"));
$endpoint = new Endpoint($mockClient, $mockGateway);
$endpoint->receiveWebmention($source, $target);
}
public function testItShouldParseAReplyContent()
{
$source = "https://my-valid-source-url.com";
$target = "https://lewisdale.dev/post/a-post-page";
$content = <<<HTML
<html>
<head>
</head>
<body>
<article class="h-entry">
<a class="u-in-reply-to" rel="in-reply-to" href="$target">@post</a>: That's a great idea!<a class="u-url" href="/"></a>
</article>
</body>
</html>
HTML;
$mockClient = $this->createMock(HttpClientInterface::class);
$mockResponse = $this->createMock(ResponseInterface::class);
$mockGateway = $this->createMock(WebmentionGatewayInterface::class);
$mockClient->expects($this->once())
->method('request')
->with($this->identicalTo('GET'), $this->identicalTo($source))
->will($this->returnValue($mockResponse));
$mockResponse->method('getStatusCode')
->will($this->returnValue(200));
$mockResponse->method('getContent')
->willReturn($content);
$mockGateway->expects($this->once())
->method('save')
->with($this->objectContains('content', "@post: That's a great idea!"));
$endpoint = new Endpoint($mockClient, $mockGateway);
$endpoint->receiveWebmention($source, $target);
}
public function testItShouldParseAnAuthorCardWithANameUrlAndPhoto()
{
$source = "https://my-valid-source-url.com";
$target = "https://lewisdale.dev/post/a-post-page";
$content = <<<HTML
<html>
<head>
</head>
<body>
<article class="h-entry">
<a class="u-in-reply-to" rel="in-reply-to" href="$target">@post</a>: That's a great idea!<a class="u-url" href="/"></a>
<div class="h-card">
<p class="p-name">Anne Author</p> who can be found at <a class="u-url" href="https://my-blog.com">my-blog.com</a>.
<img src="https://dummyimage.com/100x100/fff/aaa" class="u-photo" alt="My profile picture" />
</div>
</article>
</body>
</html>
HTML;
$mockClient = $this->createMock(HttpClientInterface::class);
$mockResponse = $this->createMock(ResponseInterface::class);
$mockGateway = $this->createMock(WebmentionGatewayInterface::class);
$mockClient->expects($this->once())
->method('request')
->with($this->identicalTo('GET'), $this->identicalTo($source))
->will($this->returnValue($mockResponse));
$mockResponse->method('getStatusCode')
->will($this->returnValue(200));
$mockResponse->method('getContent')
->willReturn($content);
$expected = new \Lewisdale\Webmentions\Models\Author(
null,
"Anne Author",
"https://my-blog.com",
"https://dummyimage.com/100x100/fff/aaa"
);
$mockGateway->expects($this->once())
->method('save')
->with($this->objectContains('author', $expected));
$endpoint = new Endpoint($mockClient, $mockGateway);
$endpoint->receiveWebmention($source, $target);
}
}
?>

View File

@ -1,7 +1,8 @@
<?php
declare(strict_types=1);
error_reporting(E_ALL); ini_set('display_errors',1);
error_reporting(E_ALL);
ini_set('display_errors', 1);
use Lewisdale\Webmentions\Gateways\SqliteGateway;
@ -27,7 +28,7 @@ class SqliteGatewayTest extends TestCase
"https://a-source.url",
MentionType::Like,
"No content",
"Some Author Name"
new \Lewisdale\Webmentions\Models\Author(),
);
$webmention->id = $this->gateway->save($webmention);
@ -42,7 +43,7 @@ class SqliteGatewayTest extends TestCase
"https://a-source.url",
MentionType::Like,
"No content",
"Some Author Name"
new \Lewisdale\Webmentions\Models\Author(),
);
$webmention->id = $this->gateway->save($webmention);
@ -60,7 +61,7 @@ class SqliteGatewayTest extends TestCase
"https://a-source.url",
MentionType::Like,
"No content",
"Some Author Name"
new \Lewisdale\Webmentions\Models\Author()
);
$webmention->id = $this->gateway->save($webmention);
@ -73,25 +74,25 @@ class SqliteGatewayTest extends TestCase
public function testCanGetByPost()
{
foreach(range(0, 4) as $_) {
foreach (range(0, 4) as $_) {
$this->gateway->save(new Webmention(
null,
"https://lewisdale.dev/post/a-new-post",
"https://a-source.url",
MentionType::Reply,
"No content",
"Some Author Name"
new \Lewisdale\Webmentions\Models\Author()
));
}
foreach(range(0, 4) as $_) {
foreach (range(0, 4) as $_) {
$this->gateway->save(new Webmention(
null,
"https://lewisdale.dev/post/a-different-post",
"https://a-source.url",
MentionType::Like,
"No content",
"Some Author Name"
new \Lewisdale\Webmentions\Models\Author()
));
}
@ -108,7 +109,7 @@ class SqliteGatewayTest extends TestCase
"https://a-source.url",
MentionType::Reply,
"No content",
"Some Author Name"
new \Lewisdale\Webmentions\Models\Author()
));
$this->gateway->save(new Webmention(
@ -117,7 +118,7 @@ class SqliteGatewayTest extends TestCase
"https://a-different-source.url",
MentionType::Like,
"No content",
"Some Author Name"
new \Lewisdale\Webmentions\Models\Author()
));
$this->gateway->save(new Webmention(
@ -126,14 +127,14 @@ class SqliteGatewayTest extends TestCase
"https://a-source.url",
MentionType::Reply,
"Some content",
"Some Author Name"
new \Lewisdale\Webmentions\Models\Author()
));
$this->assertCount(
2,
$this->gateway->find([
"target" => "https://lewisdale.dev/post/a-new-post",
"source" => "https://a-source.url"
"source" => "https://a-source.url",
])
);
@ -141,7 +142,7 @@ class SqliteGatewayTest extends TestCase
1,
$this->gateway->find([
"target" => "https://lewisdale.dev/post/a-new-post",
"source" => "https://a-different-source.url"
"source" => "https://a-different-source.url",
])
);
@ -150,7 +151,7 @@ class SqliteGatewayTest extends TestCase
$this->gateway->find([
"target" => "https://lewisdale.dev/post/a-new-post",
"source" => "https://a-source.url",
"content" => "Some content"
"content" => "Some content",
])
);
}