feat(scraping): implémenter le health check de tous les scrapers

- Commande CheckAllScrapersHealth + handler avec ports dédiés
- Value Object ContentSourceHealthCheckData
- Resource API Platform et State Processor
- Adapters InMemory et tests unitaires + fonctionnels
This commit is contained in:
ext.jeremy.guillot@maxicoffee.domains
2026-03-16 00:08:57 +01:00
parent 734dea569c
commit ae7a485195
11 changed files with 453 additions and 0 deletions

View File

@@ -0,0 +1,154 @@
<?php
namespace App\Tests\Domain\Scraping\Application\CommandHandler;
use App\Domain\Scraping\Application\Command\CheckAllScrapersHealth;
use App\Domain\Scraping\Application\CommandHandler\CheckAllScrapersHealthHandler;
use App\Domain\Scraping\Application\CommandHandler\TestScraperConfigurationHandler;
use App\Domain\Scraping\Domain\Model\ValueObject\ContentSourceHealthCheckData;
use App\Tests\Domain\Scraping\Adapter\InMemoryContentSourceForHealthCheckRepository;
use App\Tests\Domain\Scraping\Adapter\InMemoryContentSourceHealthRepository;
use App\Tests\Domain\Scraping\Adapter\InMemoryScraperAdapter;
use App\Tests\Domain\Scraping\Adapter\InMemoryScraperFactory;
use PHPUnit\Framework\TestCase;
use Psr\Log\NullLogger;
class CheckAllScrapersHealthHandlerTest extends TestCase
{
private InMemoryContentSourceForHealthCheckRepository $sourceRepo;
private InMemoryContentSourceHealthRepository $healthRepo;
private InMemoryScraperFactory $scraperFactory;
private CheckAllScrapersHealthHandler $handler;
protected function setUp(): void
{
$this->sourceRepo = new InMemoryContentSourceForHealthCheckRepository();
$this->healthRepo = new InMemoryContentSourceHealthRepository();
$this->scraperFactory = new InMemoryScraperFactory();
$this->scraperFactory->addScraper('html', new InMemoryScraperAdapter());
$testScraperHandler = new TestScraperConfigurationHandler($this->scraperFactory);
$this->handler = new CheckAllScrapersHealthHandler(
$this->sourceRepo,
$this->healthRepo,
$testScraperHandler,
new NullLogger(),
);
}
public function testSourceWithoutTestSlugIsSkipped(): void
{
$this->sourceRepo->add(new ContentSourceHealthCheckData(
id: 1,
baseUrl: 'https://example.com',
chapterUrlFormat: 'https://example.com/{slug}/{chapterNumber}',
scrapingType: 'html',
imageSelector: 'img',
nextPageSelector: null,
chapterSelector: null,
testSlug: null,
testChapterNumber: null,
));
$this->handler->handle(new CheckAllScrapersHealth());
$this->assertNull($this->healthRepo->getStatus(1));
}
public function testSourceWithTestSlugIsMarkedAsHealthyOnSuccess(): void
{
$this->sourceRepo->add(new ContentSourceHealthCheckData(
id: 2,
baseUrl: 'https://example.com',
chapterUrlFormat: 'https://example.com/{slug}/{chapterNumber}',
scrapingType: 'html',
imageSelector: 'img',
nextPageSelector: null,
chapterSelector: null,
testSlug: 'one-piece',
testChapterNumber: 1.0,
));
$this->handler->handle(new CheckAllScrapersHealth());
$this->assertSame('ok', $this->healthRepo->getStatus(2));
$this->assertNull($this->healthRepo->getError(2));
}
public function testSourceIsMarkedAsUnhealthyWhenScraperThrows(): void
{
$failingScraper = new InMemoryScraperAdapter();
$failingScraper->simulateError(new \RuntimeException('Connexion refusée'));
$this->scraperFactory->addScraper('html', $failingScraper);
$this->sourceRepo->add(new ContentSourceHealthCheckData(
id: 3,
baseUrl: 'https://example.com',
chapterUrlFormat: 'https://example.com/{slug}/{chapterNumber}',
scrapingType: 'html',
imageSelector: 'img',
nextPageSelector: null,
chapterSelector: null,
testSlug: 'one-piece',
testChapterNumber: 1.0,
));
$this->handler->handle(new CheckAllScrapersHealth());
$this->assertSame('ko', $this->healthRepo->getStatus(3));
$this->assertNotNull($this->healthRepo->getError(3));
}
public function testMultipleSourcesAreAllProcessed(): void
{
$this->sourceRepo->add(new ContentSourceHealthCheckData(
id: 10,
baseUrl: 'https://siteA.com',
chapterUrlFormat: 'https://siteA.com/{slug}/{chapterNumber}',
scrapingType: 'html',
imageSelector: 'img',
nextPageSelector: null,
chapterSelector: null,
testSlug: 'manga-a',
testChapterNumber: 1.0,
));
$this->sourceRepo->add(new ContentSourceHealthCheckData(
id: 11,
baseUrl: 'https://siteB.com',
chapterUrlFormat: 'https://siteB.com/{slug}/{chapterNumber}',
scrapingType: 'html',
imageSelector: 'img',
nextPageSelector: null,
chapterSelector: null,
testSlug: null,
testChapterNumber: null,
));
$this->sourceRepo->add(new ContentSourceHealthCheckData(
id: 12,
baseUrl: 'https://siteC.com',
chapterUrlFormat: 'https://siteC.com/{slug}/{chapterNumber}',
scrapingType: 'html',
imageSelector: 'img',
nextPageSelector: null,
chapterSelector: null,
testSlug: 'manga-c',
testChapterNumber: 3.0,
));
$this->handler->handle(new CheckAllScrapersHealth());
$this->assertSame('ok', $this->healthRepo->getStatus(10));
$this->assertNull($this->healthRepo->getStatus(11)); // skippée
$this->assertSame('ok', $this->healthRepo->getStatus(12));
}
protected function tearDown(): void
{
$this->sourceRepo->clear();
$this->healthRepo->clear();
$this->scraperFactory->clear();
}
}