feat(scraping): implémenter le health check de tous les scrapers

- Commande CheckAllScrapersHealth + handler avec ports dédiés
- Value Object ContentSourceHealthCheckData
- Resource API Platform et State Processor
- Adapters InMemory et tests unitaires + fonctionnels
This commit is contained in:
ext.jeremy.guillot@maxicoffee.domains
2026-03-16 00:08:57 +01:00
parent 734dea569c
commit ae7a485195
11 changed files with 453 additions and 0 deletions

View File

@@ -0,0 +1,7 @@
<?php
namespace App\Domain\Scraping\Application\Command;
readonly class CheckAllScrapersHealth
{
}

View File

@@ -0,0 +1,64 @@
<?php
namespace App\Domain\Scraping\Application\CommandHandler;
use App\Domain\Scraping\Application\Command\CheckAllScrapersHealth;
use App\Domain\Scraping\Application\Command\TestScraperConfiguration;
use App\Domain\Scraping\Domain\Contract\Repository\ContentSourceForHealthCheckInterface;
use App\Domain\Scraping\Domain\Contract\Repository\ContentSourceHealthRepositoryInterface;
use Psr\Log\LoggerInterface;
readonly class CheckAllScrapersHealthHandler
{
public function __construct(
private ContentSourceForHealthCheckInterface $contentSourceForHealthCheckRepo,
private ContentSourceHealthRepositoryInterface $contentSourceHealthRepo,
private TestScraperConfigurationHandler $testScraperConfigurationHandler,
private LoggerInterface $logger,
) {
}
public function handle(CheckAllScrapersHealth $command): void
{
$sources = $this->contentSourceForHealthCheckRepo->getAll();
foreach ($sources as $source) {
if ($source->testSlug === null || $source->testChapterNumber === null) {
$this->logger->warning('ContentSource {id} has no test config, skipping health check.', ['id' => $source->id]);
continue;
}
try {
$this->contentSourceHealthRepo->markAsTesting($source->id);
$testUrl = str_replace(
['{slug}', '{chapterNumber}'],
[$source->testSlug, $source->testChapterNumber],
$source->chapterUrlFormat
);
$testCommand = new TestScraperConfiguration(
baseUrl: $source->baseUrl,
chapterUrlFormat: $source->chapterUrlFormat,
scrapingType: $source->scrapingType,
testUrl: $testUrl,
mangaSlug: $source->testSlug,
chapterNumber: $source->testChapterNumber,
imageSelector: $source->imageSelector,
nextPageSelector: $source->nextPageSelector,
chapterSelector: $source->chapterSelector,
);
$response = $this->testScraperConfigurationHandler->handle($testCommand);
if ($response->success) {
$this->contentSourceHealthRepo->markAsHealthy($source->id, new \DateTimeImmutable());
} else {
$firstError = $response->errors[0]['message'] ?? 'Erreur inconnue';
$this->contentSourceHealthRepo->markAsUnhealthy($source->id, new \DateTimeImmutable(), $firstError);
}
} catch (\Exception $e) {
$this->contentSourceHealthRepo->markAsUnhealthy($source->id, new \DateTimeImmutable(), $e->getMessage());
}
}
}
}

View File

@@ -0,0 +1,11 @@
<?php
namespace App\Domain\Scraping\Domain\Contract\Repository;
use App\Domain\Scraping\Domain\Model\ValueObject\ContentSourceHealthCheckData;
interface ContentSourceForHealthCheckInterface
{
/** @return ContentSourceHealthCheckData[] */
public function getAll(): array;
}

View File

@@ -0,0 +1,12 @@
<?php
namespace App\Domain\Scraping\Domain\Contract\Repository;
interface ContentSourceHealthRepositoryInterface
{
public function markAsTesting(int $sourceId): void;
public function markAsHealthy(int $sourceId, \DateTimeImmutable $testedAt): void;
public function markAsUnhealthy(int $sourceId, \DateTimeImmutable $testedAt, string $error): void;
}

View File

@@ -0,0 +1,19 @@
<?php
namespace App\Domain\Scraping\Domain\Model\ValueObject;
readonly class ContentSourceHealthCheckData
{
public function __construct(
public int $id,
public string $baseUrl,
public string $chapterUrlFormat,
public string $scrapingType,
public ?string $imageSelector,
public ?string $nextPageSelector,
public ?string $chapterSelector,
public ?string $testSlug,
public ?float $testChapterNumber,
) {
}
}

View File

@@ -0,0 +1,23 @@
<?php
namespace App\Domain\Scraping\Infrastructure\ApiPlatform\Resource;
use ApiPlatform\Metadata\ApiResource;
use ApiPlatform\Metadata\Post;
use App\Domain\Scraping\Infrastructure\ApiPlatform\State\Processor\CheckAllScrapersHealthStateProcessor;
#[ApiResource(
shortName: 'Scraping',
operations: [
new Post(
uriTemplate: '/scraping/check-all-health',
processor: CheckAllScrapersHealthStateProcessor::class,
output: false,
status: 202,
description: 'Déclenche le test de santé de tous les scrapers configurés avec testSlug',
),
]
)]
class CheckAllScrapersHealthResource
{
}

View File

@@ -0,0 +1,23 @@
<?php
namespace App\Domain\Scraping\Infrastructure\ApiPlatform\State\Processor;
use ApiPlatform\Metadata\Operation;
use ApiPlatform\State\ProcessorInterface;
use App\Domain\Scraping\Application\Command\CheckAllScrapersHealth;
use App\Domain\Scraping\Application\CommandHandler\CheckAllScrapersHealthHandler;
readonly class CheckAllScrapersHealthStateProcessor implements ProcessorInterface
{
public function __construct(
private CheckAllScrapersHealthHandler $handler,
) {
}
public function process(mixed $data, Operation $operation, array $uriVariables = [], array $context = []): null
{
$this->handler->handle(new CheckAllScrapersHealth());
return null;
}
}

View File

@@ -0,0 +1,27 @@
<?php
namespace App\Tests\Domain\Scraping\Adapter;
use App\Domain\Scraping\Domain\Contract\Repository\ContentSourceForHealthCheckInterface;
use App\Domain\Scraping\Domain\Model\ValueObject\ContentSourceHealthCheckData;
class InMemoryContentSourceForHealthCheckRepository implements ContentSourceForHealthCheckInterface
{
/** @var ContentSourceHealthCheckData[] */
private array $sources = [];
public function add(ContentSourceHealthCheckData $data): void
{
$this->sources[] = $data;
}
public function getAll(): array
{
return $this->sources;
}
public function clear(): void
{
$this->sources = [];
}
}

View File

@@ -0,0 +1,41 @@
<?php
namespace App\Tests\Domain\Scraping\Adapter;
use App\Domain\Scraping\Domain\Contract\Repository\ContentSourceHealthRepositoryInterface;
class InMemoryContentSourceHealthRepository implements ContentSourceHealthRepositoryInterface
{
/** @var array<int, array{status: string, testedAt: ?\DateTimeImmutable, error: ?string}> */
private array $statuses = [];
public function markAsTesting(int $sourceId): void
{
$this->statuses[$sourceId] = ['status' => 'testing', 'testedAt' => null, 'error' => null];
}
public function markAsHealthy(int $sourceId, \DateTimeImmutable $testedAt): void
{
$this->statuses[$sourceId] = ['status' => 'ok', 'testedAt' => $testedAt, 'error' => null];
}
public function markAsUnhealthy(int $sourceId, \DateTimeImmutable $testedAt, string $error): void
{
$this->statuses[$sourceId] = ['status' => 'ko', 'testedAt' => $testedAt, 'error' => $error];
}
public function getStatus(int $sourceId): ?string
{
return $this->statuses[$sourceId]['status'] ?? null;
}
public function getError(int $sourceId): ?string
{
return $this->statuses[$sourceId]['error'] ?? null;
}
public function clear(): void
{
$this->statuses = [];
}
}

View File

@@ -0,0 +1,154 @@
<?php
namespace App\Tests\Domain\Scraping\Application\CommandHandler;
use App\Domain\Scraping\Application\Command\CheckAllScrapersHealth;
use App\Domain\Scraping\Application\CommandHandler\CheckAllScrapersHealthHandler;
use App\Domain\Scraping\Application\CommandHandler\TestScraperConfigurationHandler;
use App\Domain\Scraping\Domain\Model\ValueObject\ContentSourceHealthCheckData;
use App\Tests\Domain\Scraping\Adapter\InMemoryContentSourceForHealthCheckRepository;
use App\Tests\Domain\Scraping\Adapter\InMemoryContentSourceHealthRepository;
use App\Tests\Domain\Scraping\Adapter\InMemoryScraperAdapter;
use App\Tests\Domain\Scraping\Adapter\InMemoryScraperFactory;
use PHPUnit\Framework\TestCase;
use Psr\Log\NullLogger;
class CheckAllScrapersHealthHandlerTest extends TestCase
{
private InMemoryContentSourceForHealthCheckRepository $sourceRepo;
private InMemoryContentSourceHealthRepository $healthRepo;
private InMemoryScraperFactory $scraperFactory;
private CheckAllScrapersHealthHandler $handler;
protected function setUp(): void
{
$this->sourceRepo = new InMemoryContentSourceForHealthCheckRepository();
$this->healthRepo = new InMemoryContentSourceHealthRepository();
$this->scraperFactory = new InMemoryScraperFactory();
$this->scraperFactory->addScraper('html', new InMemoryScraperAdapter());
$testScraperHandler = new TestScraperConfigurationHandler($this->scraperFactory);
$this->handler = new CheckAllScrapersHealthHandler(
$this->sourceRepo,
$this->healthRepo,
$testScraperHandler,
new NullLogger(),
);
}
public function testSourceWithoutTestSlugIsSkipped(): void
{
$this->sourceRepo->add(new ContentSourceHealthCheckData(
id: 1,
baseUrl: 'https://example.com',
chapterUrlFormat: 'https://example.com/{slug}/{chapterNumber}',
scrapingType: 'html',
imageSelector: 'img',
nextPageSelector: null,
chapterSelector: null,
testSlug: null,
testChapterNumber: null,
));
$this->handler->handle(new CheckAllScrapersHealth());
$this->assertNull($this->healthRepo->getStatus(1));
}
public function testSourceWithTestSlugIsMarkedAsHealthyOnSuccess(): void
{
$this->sourceRepo->add(new ContentSourceHealthCheckData(
id: 2,
baseUrl: 'https://example.com',
chapterUrlFormat: 'https://example.com/{slug}/{chapterNumber}',
scrapingType: 'html',
imageSelector: 'img',
nextPageSelector: null,
chapterSelector: null,
testSlug: 'one-piece',
testChapterNumber: 1.0,
));
$this->handler->handle(new CheckAllScrapersHealth());
$this->assertSame('ok', $this->healthRepo->getStatus(2));
$this->assertNull($this->healthRepo->getError(2));
}
public function testSourceIsMarkedAsUnhealthyWhenScraperThrows(): void
{
$failingScraper = new InMemoryScraperAdapter();
$failingScraper->simulateError(new \RuntimeException('Connexion refusée'));
$this->scraperFactory->addScraper('html', $failingScraper);
$this->sourceRepo->add(new ContentSourceHealthCheckData(
id: 3,
baseUrl: 'https://example.com',
chapterUrlFormat: 'https://example.com/{slug}/{chapterNumber}',
scrapingType: 'html',
imageSelector: 'img',
nextPageSelector: null,
chapterSelector: null,
testSlug: 'one-piece',
testChapterNumber: 1.0,
));
$this->handler->handle(new CheckAllScrapersHealth());
$this->assertSame('ko', $this->healthRepo->getStatus(3));
$this->assertNotNull($this->healthRepo->getError(3));
}
public function testMultipleSourcesAreAllProcessed(): void
{
$this->sourceRepo->add(new ContentSourceHealthCheckData(
id: 10,
baseUrl: 'https://siteA.com',
chapterUrlFormat: 'https://siteA.com/{slug}/{chapterNumber}',
scrapingType: 'html',
imageSelector: 'img',
nextPageSelector: null,
chapterSelector: null,
testSlug: 'manga-a',
testChapterNumber: 1.0,
));
$this->sourceRepo->add(new ContentSourceHealthCheckData(
id: 11,
baseUrl: 'https://siteB.com',
chapterUrlFormat: 'https://siteB.com/{slug}/{chapterNumber}',
scrapingType: 'html',
imageSelector: 'img',
nextPageSelector: null,
chapterSelector: null,
testSlug: null,
testChapterNumber: null,
));
$this->sourceRepo->add(new ContentSourceHealthCheckData(
id: 12,
baseUrl: 'https://siteC.com',
chapterUrlFormat: 'https://siteC.com/{slug}/{chapterNumber}',
scrapingType: 'html',
imageSelector: 'img',
nextPageSelector: null,
chapterSelector: null,
testSlug: 'manga-c',
testChapterNumber: 3.0,
));
$this->handler->handle(new CheckAllScrapersHealth());
$this->assertSame('ok', $this->healthRepo->getStatus(10));
$this->assertNull($this->healthRepo->getStatus(11)); // skippée
$this->assertSame('ok', $this->healthRepo->getStatus(12));
}
protected function tearDown(): void
{
$this->sourceRepo->clear();
$this->healthRepo->clear();
$this->scraperFactory->clear();
}
}

View File

@@ -0,0 +1,72 @@
<?php
declare(strict_types=1);
namespace App\Tests\Feature\Scraping;
use App\Entity\ContentSource;
use App\Tests\Feature\AbstractApiTestCase;
use Symfony\Component\HttpFoundation\Response;
use Zenstruck\Foundry\Test\ResetDatabase;
final class CheckAllScrapersHealthTest extends AbstractApiTestCase
{
use ResetDatabase;
private function post(): void
{
static::createClient()->request('POST', '/api/scraping/check-all-health', [
'json' => new \stdClass(),
]);
}
public function testItReturns202WithNoSources(): void
{
$this->post();
$this->assertResponseStatusCodeSame(Response::HTTP_ACCEPTED);
}
public function testItReturns202WithSourcesHavingNoTestConfig(): void
{
$source = new ContentSource();
$source->setBaseUrl('https://example.com')
->setChapterUrlFormat('https://example.com/{slug}/{chapterNumber}')
->setScrapingType('html');
$this->entityManager->persist($source);
$this->entityManager->flush();
$this->post();
$this->assertResponseStatusCodeSame(Response::HTTP_ACCEPTED);
// La source sans testSlug ne doit pas avoir son statut modifié
$this->entityManager->clear();
$reloaded = $this->entityManager->find(ContentSource::class, $source->getId());
$this->assertSame('unknown', $reloaded->getHealthStatus());
}
public function testHealthStatusIsUpdatedForSourcesWithTestConfig(): void
{
$source = new ContentSource();
$source->setBaseUrl('https://example.com')
->setChapterUrlFormat('https://example.com/{slug}/{chapterNumber}')
->setScrapingType('html')
->setTestSlug('one-piece')
->setTestChapterNumber(1.0);
$this->entityManager->persist($source);
$this->entityManager->flush();
$this->post();
$this->assertResponseStatusCodeSame(Response::HTTP_ACCEPTED);
// Le statut ne doit plus être 'unknown' après le test
$this->entityManager->clear();
$reloaded = $this->entityManager->find(ContentSource::class, $source->getId());
$this->assertNotSame('unknown', $reloaded->getHealthStatus());
$this->assertNotSame('testing', $reloaded->getHealthStatus()); // doit être terminé
}
}