feat(scraping): implémenter le health check de tous les scrapers

- Commande CheckAllScrapersHealth + handler avec ports dédiés
- Value Object ContentSourceHealthCheckData
- Resource API Platform et State Processor
- Adapters InMemory et tests unitaires + fonctionnels
This commit is contained in:
ext.jeremy.guillot@maxicoffee.domains
2026-03-16 00:08:57 +01:00
parent 795cbeccc3
commit 01474c264b
11 changed files with 453 additions and 0 deletions

View File

@@ -0,0 +1,27 @@
<?php
namespace App\Tests\Domain\Scraping\Adapter;
use App\Domain\Scraping\Domain\Contract\Repository\ContentSourceForHealthCheckInterface;
use App\Domain\Scraping\Domain\Model\ValueObject\ContentSourceHealthCheckData;
class InMemoryContentSourceForHealthCheckRepository implements ContentSourceForHealthCheckInterface
{
/** @var ContentSourceHealthCheckData[] */
private array $sources = [];
public function add(ContentSourceHealthCheckData $data): void
{
$this->sources[] = $data;
}
public function getAll(): array
{
return $this->sources;
}
public function clear(): void
{
$this->sources = [];
}
}

View File

@@ -0,0 +1,41 @@
<?php
namespace App\Tests\Domain\Scraping\Adapter;
use App\Domain\Scraping\Domain\Contract\Repository\ContentSourceHealthRepositoryInterface;
class InMemoryContentSourceHealthRepository implements ContentSourceHealthRepositoryInterface
{
/** @var array<int, array{status: string, testedAt: ?\DateTimeImmutable, error: ?string}> */
private array $statuses = [];
public function markAsTesting(int $sourceId): void
{
$this->statuses[$sourceId] = ['status' => 'testing', 'testedAt' => null, 'error' => null];
}
public function markAsHealthy(int $sourceId, \DateTimeImmutable $testedAt): void
{
$this->statuses[$sourceId] = ['status' => 'ok', 'testedAt' => $testedAt, 'error' => null];
}
public function markAsUnhealthy(int $sourceId, \DateTimeImmutable $testedAt, string $error): void
{
$this->statuses[$sourceId] = ['status' => 'ko', 'testedAt' => $testedAt, 'error' => $error];
}
public function getStatus(int $sourceId): ?string
{
return $this->statuses[$sourceId]['status'] ?? null;
}
public function getError(int $sourceId): ?string
{
return $this->statuses[$sourceId]['error'] ?? null;
}
public function clear(): void
{
$this->statuses = [];
}
}

View File

@@ -0,0 +1,154 @@
<?php
namespace App\Tests\Domain\Scraping\Application\CommandHandler;
use App\Domain\Scraping\Application\Command\CheckAllScrapersHealth;
use App\Domain\Scraping\Application\CommandHandler\CheckAllScrapersHealthHandler;
use App\Domain\Scraping\Application\CommandHandler\TestScraperConfigurationHandler;
use App\Domain\Scraping\Domain\Model\ValueObject\ContentSourceHealthCheckData;
use App\Tests\Domain\Scraping\Adapter\InMemoryContentSourceForHealthCheckRepository;
use App\Tests\Domain\Scraping\Adapter\InMemoryContentSourceHealthRepository;
use App\Tests\Domain\Scraping\Adapter\InMemoryScraperAdapter;
use App\Tests\Domain\Scraping\Adapter\InMemoryScraperFactory;
use PHPUnit\Framework\TestCase;
use Psr\Log\NullLogger;
class CheckAllScrapersHealthHandlerTest extends TestCase
{
private InMemoryContentSourceForHealthCheckRepository $sourceRepo;
private InMemoryContentSourceHealthRepository $healthRepo;
private InMemoryScraperFactory $scraperFactory;
private CheckAllScrapersHealthHandler $handler;
protected function setUp(): void
{
$this->sourceRepo = new InMemoryContentSourceForHealthCheckRepository();
$this->healthRepo = new InMemoryContentSourceHealthRepository();
$this->scraperFactory = new InMemoryScraperFactory();
$this->scraperFactory->addScraper('html', new InMemoryScraperAdapter());
$testScraperHandler = new TestScraperConfigurationHandler($this->scraperFactory);
$this->handler = new CheckAllScrapersHealthHandler(
$this->sourceRepo,
$this->healthRepo,
$testScraperHandler,
new NullLogger(),
);
}
public function testSourceWithoutTestSlugIsSkipped(): void
{
$this->sourceRepo->add(new ContentSourceHealthCheckData(
id: 1,
baseUrl: 'https://example.com',
chapterUrlFormat: 'https://example.com/{slug}/{chapterNumber}',
scrapingType: 'html',
imageSelector: 'img',
nextPageSelector: null,
chapterSelector: null,
testSlug: null,
testChapterNumber: null,
));
$this->handler->handle(new CheckAllScrapersHealth());
$this->assertNull($this->healthRepo->getStatus(1));
}
public function testSourceWithTestSlugIsMarkedAsHealthyOnSuccess(): void
{
$this->sourceRepo->add(new ContentSourceHealthCheckData(
id: 2,
baseUrl: 'https://example.com',
chapterUrlFormat: 'https://example.com/{slug}/{chapterNumber}',
scrapingType: 'html',
imageSelector: 'img',
nextPageSelector: null,
chapterSelector: null,
testSlug: 'one-piece',
testChapterNumber: 1.0,
));
$this->handler->handle(new CheckAllScrapersHealth());
$this->assertSame('ok', $this->healthRepo->getStatus(2));
$this->assertNull($this->healthRepo->getError(2));
}
public function testSourceIsMarkedAsUnhealthyWhenScraperThrows(): void
{
$failingScraper = new InMemoryScraperAdapter();
$failingScraper->simulateError(new \RuntimeException('Connexion refusée'));
$this->scraperFactory->addScraper('html', $failingScraper);
$this->sourceRepo->add(new ContentSourceHealthCheckData(
id: 3,
baseUrl: 'https://example.com',
chapterUrlFormat: 'https://example.com/{slug}/{chapterNumber}',
scrapingType: 'html',
imageSelector: 'img',
nextPageSelector: null,
chapterSelector: null,
testSlug: 'one-piece',
testChapterNumber: 1.0,
));
$this->handler->handle(new CheckAllScrapersHealth());
$this->assertSame('ko', $this->healthRepo->getStatus(3));
$this->assertNotNull($this->healthRepo->getError(3));
}
public function testMultipleSourcesAreAllProcessed(): void
{
$this->sourceRepo->add(new ContentSourceHealthCheckData(
id: 10,
baseUrl: 'https://siteA.com',
chapterUrlFormat: 'https://siteA.com/{slug}/{chapterNumber}',
scrapingType: 'html',
imageSelector: 'img',
nextPageSelector: null,
chapterSelector: null,
testSlug: 'manga-a',
testChapterNumber: 1.0,
));
$this->sourceRepo->add(new ContentSourceHealthCheckData(
id: 11,
baseUrl: 'https://siteB.com',
chapterUrlFormat: 'https://siteB.com/{slug}/{chapterNumber}',
scrapingType: 'html',
imageSelector: 'img',
nextPageSelector: null,
chapterSelector: null,
testSlug: null,
testChapterNumber: null,
));
$this->sourceRepo->add(new ContentSourceHealthCheckData(
id: 12,
baseUrl: 'https://siteC.com',
chapterUrlFormat: 'https://siteC.com/{slug}/{chapterNumber}',
scrapingType: 'html',
imageSelector: 'img',
nextPageSelector: null,
chapterSelector: null,
testSlug: 'manga-c',
testChapterNumber: 3.0,
));
$this->handler->handle(new CheckAllScrapersHealth());
$this->assertSame('ok', $this->healthRepo->getStatus(10));
$this->assertNull($this->healthRepo->getStatus(11)); // skippée
$this->assertSame('ok', $this->healthRepo->getStatus(12));
}
protected function tearDown(): void
{
$this->sourceRepo->clear();
$this->healthRepo->clear();
$this->scraperFactory->clear();
}
}

View File

@@ -0,0 +1,72 @@
<?php
declare(strict_types=1);
namespace App\Tests\Feature\Scraping;
use App\Entity\ContentSource;
use App\Tests\Feature\AbstractApiTestCase;
use Symfony\Component\HttpFoundation\Response;
use Zenstruck\Foundry\Test\ResetDatabase;
final class CheckAllScrapersHealthTest extends AbstractApiTestCase
{
use ResetDatabase;
private function post(): void
{
static::createClient()->request('POST', '/api/scraping/check-all-health', [
'json' => new \stdClass(),
]);
}
public function testItReturns202WithNoSources(): void
{
$this->post();
$this->assertResponseStatusCodeSame(Response::HTTP_ACCEPTED);
}
public function testItReturns202WithSourcesHavingNoTestConfig(): void
{
$source = new ContentSource();
$source->setBaseUrl('https://example.com')
->setChapterUrlFormat('https://example.com/{slug}/{chapterNumber}')
->setScrapingType('html');
$this->entityManager->persist($source);
$this->entityManager->flush();
$this->post();
$this->assertResponseStatusCodeSame(Response::HTTP_ACCEPTED);
// La source sans testSlug ne doit pas avoir son statut modifié
$this->entityManager->clear();
$reloaded = $this->entityManager->find(ContentSource::class, $source->getId());
$this->assertSame('unknown', $reloaded->getHealthStatus());
}
public function testHealthStatusIsUpdatedForSourcesWithTestConfig(): void
{
$source = new ContentSource();
$source->setBaseUrl('https://example.com')
->setChapterUrlFormat('https://example.com/{slug}/{chapterNumber}')
->setScrapingType('html')
->setTestSlug('one-piece')
->setTestChapterNumber(1.0);
$this->entityManager->persist($source);
$this->entityManager->flush();
$this->post();
$this->assertResponseStatusCodeSame(Response::HTTP_ACCEPTED);
// Le statut ne doit plus être 'unknown' après le test
$this->entityManager->clear();
$reloaded = $this->entityManager->find(ContentSource::class, $source->getId());
$this->assertNotSame('unknown', $reloaded->getHealthStatus());
$this->assertNotSame('testing', $reloaded->getHealthStatus()); // doit être terminé
}
}