More work on parsing microformats

This commit is contained in:
Lewis Dale 2023-03-14 13:54:05 +00:00
parent 4179ee10e9
commit 2735abde64
4 changed files with 103 additions and 54 deletions

View File

@ -54,42 +54,72 @@ class Endpoint {
if ($response->getStatusCode() < 400) if ($response->getStatusCode() < 400)
{ {
[$type, $content] = $this->parseContent($response->getContent(), $target); $document = new Crawler($response->getContent());
$author = $this->parseAuthor($response->getContent());
$webmention = new Webmention(null, $target, $source, $type, $content, $author); if (!$this->hasMention($target, $document))
{
throw new TargetNotMentionedException();
}
$container = $this->getContainer($target, $document);
$type = $this->parseMentionType($target, $container);
$content = $this->parseContent($target, $container, $type);
$author = $this->parseAuthor($container);
$webmention = new Webmention(null, $target, $source, $type, null, $author);
$this->gateway->save($webmention); $this->gateway->save($webmention);
} else { } else {
throw new SourceNotFoundException(); throw new SourceNotFoundException();
} }
} }
private function hasMention(string $target, Crawler $document) : bool
private function parseContent(string $content, string $target) : array
{ {
$body = new Crawler($content); return $document->filter('a[href="' . $target . '"]')->count() > 0;
$anchors = $body->filter('a[href="'. $target . '"]');
if (!$anchors->count()) {
throw new TargetNotMentionedException();
}
$type = $this->classToMentionType($anchors->attr('class'));
return [$type, null];
} }
private function classToMentionType(string $class = "") : MentionType private function getContainer(string $target, Crawler $document) : Crawler
{ {
return $document->filter('a[href="' . $target . '"]')->closest('.h-entry') ?? $document;
}
private function parseMentionType(string $target, Crawler $document) : MentionType
{
$class = $document->filter('a[href="'. $target . '"]')->attr('class');
if (str_contains($class, "u-like-of")) { if (str_contains($class, "u-like-of")) {
return MentionType::Like; return MentionType::Like;
} else if (str_contains($class, "u-in-reply-to")) { } else if (str_contains($class, "u-in-reply-to")) {
return MentionType::Reply; return MentionType::Reply;
} else if (str_contains($class, "u-repost-of")) {
return MentionType::Repost;
} }
return MentionType::Mention; return MentionType::Mention;
} }
private function parseAuthor(string $author) : ?string private function parseContent(string $target, Crawler $document, MentionType $type) : ?string
{ {
return match ($type) {
MentionType::Like => "Liked this post",
MentionType::Reply => $document->innerText(),
MentionType::Repost => "Reposted this post",
MentionType::Mention => $document->closest('a[href="' . $target . '"]')->innerText()
};
}
private function parseAuthor(Crawler $document) : ?string
{
$card = $document->filter('.p-author.h-card')->eq(0);
if ($card)
{
$name = $card->filter('.p-name')?->text();
$url = $card->filter('.u-url')?->text();
$photo = $card->filter('.u-photo')?->attr('src');
return implode(", ", [$name, $url, $photo]);
}
return null; return null;
} }
} }

View File

@ -4,26 +4,28 @@ namespace Lewisdale\Webmentions\Models;
enum MentionType { enum MentionType {
case Like; case Like;
case Comment;
case Reply; case Reply;
case Mention; case Mention;
case Repost;
public function toString() : string public function toString() : string
{ {
return match ($this) { return match ($this) {
MentionType::Like => "like", MentionType::Like => "like",
MentionType::Reply => "reply", MentionType::Reply => "reply",
MentionType::Mention => "mention" MentionType::Mention => "mention",
MentionType::Repost => "repost",
}; };
} }
public static function from(string $string) : MentionType public static function from(string $string) : MentionType
{ {
switch($string) { return match($string) {
case "like": return MentionType::Like; "like" => MentionType::Like,
case "reply": return MentionType::Reply; "reply" => MentionType::Reply,
default: return MentionType::Mention; "repost" => MentionType::Repost,
} default => MentionType::Mention
};
} }
} }
?> ?>

View File

@ -14,6 +14,10 @@ use Symfony\Contracts\HttpClient\HttpClientInterface;
use Symfony\Contracts\HttpClient\ResponseInterface; use Symfony\Contracts\HttpClient\ResponseInterface;
class EndpointTest extends TestCase { class EndpointTest extends TestCase {
private function objectContains(string $key, mixed $value) {
return $this->callback(fn(object $obj) => $obj->$key === $value);
}
#[TestWith(["https://my.url.com", true])] #[TestWith(["https://my.url.com", true])]
#[TestWith(["my.url.com", false])] #[TestWith(["my.url.com", false])]
public function testValidatesUrls(string $url, bool $expected) public function testValidatesUrls(string $url, bool $expected)
@ -139,16 +143,7 @@ class EndpointTest extends TestCase {
$mockGateway->expects($this->once()) $mockGateway->expects($this->once())
->method('save') ->method('save')
->with($this->equalTo( ->with($this->objectContains('type', MentionType::Like));
new Webmention(
null,
$target,
$source,
MentionType::Like,
null,
null
))
);
$endpoint = new Endpoint($mockClient, $mockGateway); $endpoint = new Endpoint($mockClient, $mockGateway);
@ -189,16 +184,7 @@ class EndpointTest extends TestCase {
$mockGateway->expects($this->once()) $mockGateway->expects($this->once())
->method('save') ->method('save')
->with($this->equalTo( ->with($this->objectContains('type', MentionType::Mention));
new Webmention(
null,
$target,
$source,
MentionType::Mention,
null,
null
))
);
$endpoint = new Endpoint($mockClient, $mockGateway); $endpoint = new Endpoint($mockClient, $mockGateway);
$endpoint->receiveWebmention($source, $target); $endpoint->receiveWebmention($source, $target);
@ -238,19 +224,50 @@ class EndpointTest extends TestCase {
$mockGateway->expects($this->once()) $mockGateway->expects($this->once())
->method('save') ->method('save')
->with($this->equalTo( ->with($this->objectContains('type', MentionType::Reply));
new Webmention(
null,
$target,
$source,
MentionType::Reply,
null,
null
))
);
$endpoint = new Endpoint($mockClient, $mockGateway); $endpoint = new Endpoint($mockClient, $mockGateway);
$endpoint->receiveWebmention($source, $target); $endpoint->receiveWebmention($source, $target);
} }
public function testItShouldParseAWebmentionAsARepost()
{
$source = "https://my-valid-source-url.com";
$target = "https://lewisdale.dev/post/a-post-page";
$content = <<<XML
<html>
<head>
</head>
<body>
<h1>Some content</h1>
<p>Here's some body content. It <a href="/another/page">contains a url</a>.</p>
<p>I'm writing about <a href="$target" class="u-repost-of">this post</a>.</p>
</body>
</html>
XML;
$mockClient = $this->createMock(HttpClientInterface::class);
$mockResponse = $this->createMock(ResponseInterface::class);
$mockGateway = $this->createMock(WebmentionGatewayInterface::class);
$mockClient->expects($this->once())
->method('request')
->with($this->identicalTo('GET'), $this->identicalTo($source))
->will($this->returnValue($mockResponse));
$mockResponse->method('getStatusCode')
->will($this->returnValue(200));
$mockResponse->method('getContent')
->willReturn($content);
$mockGateway->expects($this->once())
->method('save')
->with($this->objectContains('type', MentionType::Repost));
$endpoint = new Endpoint($mockClient, $mockGateway);
$endpoint->receiveWebmention($source, $target);
}
} }
?> ?>

View File

@ -78,7 +78,7 @@ class SqliteGatewayTest extends TestCase
null, null,
"https://lewisdale.dev/post/a-new-post", "https://lewisdale.dev/post/a-new-post",
"https://a-source.url", "https://a-source.url",
MentionType::Comment, MentionType::Reply,
"No content", "No content",
"Some Author Name" "Some Author Name"
)); ));
@ -124,7 +124,7 @@ class SqliteGatewayTest extends TestCase
null, null,
"https://lewisdale.dev/post/a-new-post", "https://lewisdale.dev/post/a-new-post",
"https://a-source.url", "https://a-source.url",
MentionType::Comment, MentionType::Reply,
"Some content", "Some content",
"Some Author Name" "Some Author Name"
)); ));