feat(scraping): implémenter le health check de tous les scrapers
- Commande CheckAllScrapersHealth + handler avec ports dédiés - Value Object ContentSourceHealthCheckData - Resource API Platform et State Processor - Adapters InMemory et tests unitaires + fonctionnels
This commit is contained in:
parent
795cbeccc3
commit
01474c264b
@@ -0,0 +1,7 @@
|
||||
<?php
|
||||
|
||||
namespace App\Domain\Scraping\Application\Command;
|
||||
|
||||
readonly class CheckAllScrapersHealth
|
||||
{
|
||||
}
|
||||
@@ -0,0 +1,64 @@
|
||||
<?php
|
||||
|
||||
namespace App\Domain\Scraping\Application\CommandHandler;
|
||||
|
||||
use App\Domain\Scraping\Application\Command\CheckAllScrapersHealth;
|
||||
use App\Domain\Scraping\Application\Command\TestScraperConfiguration;
|
||||
use App\Domain\Scraping\Domain\Contract\Repository\ContentSourceForHealthCheckInterface;
|
||||
use App\Domain\Scraping\Domain\Contract\Repository\ContentSourceHealthRepositoryInterface;
|
||||
use Psr\Log\LoggerInterface;
|
||||
|
||||
readonly class CheckAllScrapersHealthHandler
|
||||
{
|
||||
public function __construct(
|
||||
private ContentSourceForHealthCheckInterface $contentSourceForHealthCheckRepo,
|
||||
private ContentSourceHealthRepositoryInterface $contentSourceHealthRepo,
|
||||
private TestScraperConfigurationHandler $testScraperConfigurationHandler,
|
||||
private LoggerInterface $logger,
|
||||
) {
|
||||
}
|
||||
|
||||
public function handle(CheckAllScrapersHealth $command): void
|
||||
{
|
||||
$sources = $this->contentSourceForHealthCheckRepo->getAll();
|
||||
|
||||
foreach ($sources as $source) {
|
||||
if ($source->testSlug === null || $source->testChapterNumber === null) {
|
||||
$this->logger->warning('ContentSource {id} has no test config, skipping health check.', ['id' => $source->id]);
|
||||
continue;
|
||||
}
|
||||
|
||||
try {
|
||||
$this->contentSourceHealthRepo->markAsTesting($source->id);
|
||||
$testUrl = str_replace(
|
||||
['{slug}', '{chapterNumber}'],
|
||||
[$source->testSlug, $source->testChapterNumber],
|
||||
$source->chapterUrlFormat
|
||||
);
|
||||
|
||||
$testCommand = new TestScraperConfiguration(
|
||||
baseUrl: $source->baseUrl,
|
||||
chapterUrlFormat: $source->chapterUrlFormat,
|
||||
scrapingType: $source->scrapingType,
|
||||
testUrl: $testUrl,
|
||||
mangaSlug: $source->testSlug,
|
||||
chapterNumber: $source->testChapterNumber,
|
||||
imageSelector: $source->imageSelector,
|
||||
nextPageSelector: $source->nextPageSelector,
|
||||
chapterSelector: $source->chapterSelector,
|
||||
);
|
||||
|
||||
$response = $this->testScraperConfigurationHandler->handle($testCommand);
|
||||
|
||||
if ($response->success) {
|
||||
$this->contentSourceHealthRepo->markAsHealthy($source->id, new \DateTimeImmutable());
|
||||
} else {
|
||||
$firstError = $response->errors[0]['message'] ?? 'Erreur inconnue';
|
||||
$this->contentSourceHealthRepo->markAsUnhealthy($source->id, new \DateTimeImmutable(), $firstError);
|
||||
}
|
||||
} catch (\Exception $e) {
|
||||
$this->contentSourceHealthRepo->markAsUnhealthy($source->id, new \DateTimeImmutable(), $e->getMessage());
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,11 @@
|
||||
<?php
|
||||
|
||||
namespace App\Domain\Scraping\Domain\Contract\Repository;
|
||||
|
||||
use App\Domain\Scraping\Domain\Model\ValueObject\ContentSourceHealthCheckData;
|
||||
|
||||
interface ContentSourceForHealthCheckInterface
|
||||
{
|
||||
/** @return ContentSourceHealthCheckData[] */
|
||||
public function getAll(): array;
|
||||
}
|
||||
@@ -0,0 +1,12 @@
|
||||
<?php
|
||||
|
||||
namespace App\Domain\Scraping\Domain\Contract\Repository;
|
||||
|
||||
interface ContentSourceHealthRepositoryInterface
|
||||
{
|
||||
public function markAsTesting(int $sourceId): void;
|
||||
|
||||
public function markAsHealthy(int $sourceId, \DateTimeImmutable $testedAt): void;
|
||||
|
||||
public function markAsUnhealthy(int $sourceId, \DateTimeImmutable $testedAt, string $error): void;
|
||||
}
|
||||
@@ -0,0 +1,19 @@
|
||||
<?php
|
||||
|
||||
namespace App\Domain\Scraping\Domain\Model\ValueObject;
|
||||
|
||||
readonly class ContentSourceHealthCheckData
|
||||
{
|
||||
public function __construct(
|
||||
public int $id,
|
||||
public string $baseUrl,
|
||||
public string $chapterUrlFormat,
|
||||
public string $scrapingType,
|
||||
public ?string $imageSelector,
|
||||
public ?string $nextPageSelector,
|
||||
public ?string $chapterSelector,
|
||||
public ?string $testSlug,
|
||||
public ?float $testChapterNumber,
|
||||
) {
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,23 @@
|
||||
<?php
|
||||
|
||||
namespace App\Domain\Scraping\Infrastructure\ApiPlatform\Resource;
|
||||
|
||||
use ApiPlatform\Metadata\ApiResource;
|
||||
use ApiPlatform\Metadata\Post;
|
||||
use App\Domain\Scraping\Infrastructure\ApiPlatform\State\Processor\CheckAllScrapersHealthStateProcessor;
|
||||
|
||||
#[ApiResource(
|
||||
shortName: 'Scraping',
|
||||
operations: [
|
||||
new Post(
|
||||
uriTemplate: '/scraping/check-all-health',
|
||||
processor: CheckAllScrapersHealthStateProcessor::class,
|
||||
output: false,
|
||||
status: 202,
|
||||
description: 'Déclenche le test de santé de tous les scrapers configurés avec testSlug',
|
||||
),
|
||||
]
|
||||
)]
|
||||
class CheckAllScrapersHealthResource
|
||||
{
|
||||
}
|
||||
@@ -0,0 +1,23 @@
|
||||
<?php
|
||||
|
||||
namespace App\Domain\Scraping\Infrastructure\ApiPlatform\State\Processor;
|
||||
|
||||
use ApiPlatform\Metadata\Operation;
|
||||
use ApiPlatform\State\ProcessorInterface;
|
||||
use App\Domain\Scraping\Application\Command\CheckAllScrapersHealth;
|
||||
use App\Domain\Scraping\Application\CommandHandler\CheckAllScrapersHealthHandler;
|
||||
|
||||
readonly class CheckAllScrapersHealthStateProcessor implements ProcessorInterface
|
||||
{
|
||||
public function __construct(
|
||||
private CheckAllScrapersHealthHandler $handler,
|
||||
) {
|
||||
}
|
||||
|
||||
public function process(mixed $data, Operation $operation, array $uriVariables = [], array $context = []): null
|
||||
{
|
||||
$this->handler->handle(new CheckAllScrapersHealth());
|
||||
|
||||
return null;
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,27 @@
|
||||
<?php
|
||||
|
||||
namespace App\Tests\Domain\Scraping\Adapter;
|
||||
|
||||
use App\Domain\Scraping\Domain\Contract\Repository\ContentSourceForHealthCheckInterface;
|
||||
use App\Domain\Scraping\Domain\Model\ValueObject\ContentSourceHealthCheckData;
|
||||
|
||||
class InMemoryContentSourceForHealthCheckRepository implements ContentSourceForHealthCheckInterface
|
||||
{
|
||||
/** @var ContentSourceHealthCheckData[] */
|
||||
private array $sources = [];
|
||||
|
||||
public function add(ContentSourceHealthCheckData $data): void
|
||||
{
|
||||
$this->sources[] = $data;
|
||||
}
|
||||
|
||||
public function getAll(): array
|
||||
{
|
||||
return $this->sources;
|
||||
}
|
||||
|
||||
public function clear(): void
|
||||
{
|
||||
$this->sources = [];
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,41 @@
|
||||
<?php
|
||||
|
||||
namespace App\Tests\Domain\Scraping\Adapter;
|
||||
|
||||
use App\Domain\Scraping\Domain\Contract\Repository\ContentSourceHealthRepositoryInterface;
|
||||
|
||||
class InMemoryContentSourceHealthRepository implements ContentSourceHealthRepositoryInterface
|
||||
{
|
||||
/** @var array<int, array{status: string, testedAt: ?\DateTimeImmutable, error: ?string}> */
|
||||
private array $statuses = [];
|
||||
|
||||
public function markAsTesting(int $sourceId): void
|
||||
{
|
||||
$this->statuses[$sourceId] = ['status' => 'testing', 'testedAt' => null, 'error' => null];
|
||||
}
|
||||
|
||||
public function markAsHealthy(int $sourceId, \DateTimeImmutable $testedAt): void
|
||||
{
|
||||
$this->statuses[$sourceId] = ['status' => 'ok', 'testedAt' => $testedAt, 'error' => null];
|
||||
}
|
||||
|
||||
public function markAsUnhealthy(int $sourceId, \DateTimeImmutable $testedAt, string $error): void
|
||||
{
|
||||
$this->statuses[$sourceId] = ['status' => 'ko', 'testedAt' => $testedAt, 'error' => $error];
|
||||
}
|
||||
|
||||
public function getStatus(int $sourceId): ?string
|
||||
{
|
||||
return $this->statuses[$sourceId]['status'] ?? null;
|
||||
}
|
||||
|
||||
public function getError(int $sourceId): ?string
|
||||
{
|
||||
return $this->statuses[$sourceId]['error'] ?? null;
|
||||
}
|
||||
|
||||
public function clear(): void
|
||||
{
|
||||
$this->statuses = [];
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,154 @@
|
||||
<?php
|
||||
|
||||
namespace App\Tests\Domain\Scraping\Application\CommandHandler;
|
||||
|
||||
use App\Domain\Scraping\Application\Command\CheckAllScrapersHealth;
|
||||
use App\Domain\Scraping\Application\CommandHandler\CheckAllScrapersHealthHandler;
|
||||
use App\Domain\Scraping\Application\CommandHandler\TestScraperConfigurationHandler;
|
||||
use App\Domain\Scraping\Domain\Model\ValueObject\ContentSourceHealthCheckData;
|
||||
use App\Tests\Domain\Scraping\Adapter\InMemoryContentSourceForHealthCheckRepository;
|
||||
use App\Tests\Domain\Scraping\Adapter\InMemoryContentSourceHealthRepository;
|
||||
use App\Tests\Domain\Scraping\Adapter\InMemoryScraperAdapter;
|
||||
use App\Tests\Domain\Scraping\Adapter\InMemoryScraperFactory;
|
||||
use PHPUnit\Framework\TestCase;
|
||||
use Psr\Log\NullLogger;
|
||||
|
||||
class CheckAllScrapersHealthHandlerTest extends TestCase
|
||||
{
|
||||
private InMemoryContentSourceForHealthCheckRepository $sourceRepo;
|
||||
private InMemoryContentSourceHealthRepository $healthRepo;
|
||||
private InMemoryScraperFactory $scraperFactory;
|
||||
private CheckAllScrapersHealthHandler $handler;
|
||||
|
||||
protected function setUp(): void
|
||||
{
|
||||
$this->sourceRepo = new InMemoryContentSourceForHealthCheckRepository();
|
||||
$this->healthRepo = new InMemoryContentSourceHealthRepository();
|
||||
$this->scraperFactory = new InMemoryScraperFactory();
|
||||
$this->scraperFactory->addScraper('html', new InMemoryScraperAdapter());
|
||||
|
||||
$testScraperHandler = new TestScraperConfigurationHandler($this->scraperFactory);
|
||||
|
||||
$this->handler = new CheckAllScrapersHealthHandler(
|
||||
$this->sourceRepo,
|
||||
$this->healthRepo,
|
||||
$testScraperHandler,
|
||||
new NullLogger(),
|
||||
);
|
||||
}
|
||||
|
||||
public function testSourceWithoutTestSlugIsSkipped(): void
|
||||
{
|
||||
$this->sourceRepo->add(new ContentSourceHealthCheckData(
|
||||
id: 1,
|
||||
baseUrl: 'https://example.com',
|
||||
chapterUrlFormat: 'https://example.com/{slug}/{chapterNumber}',
|
||||
scrapingType: 'html',
|
||||
imageSelector: 'img',
|
||||
nextPageSelector: null,
|
||||
chapterSelector: null,
|
||||
testSlug: null,
|
||||
testChapterNumber: null,
|
||||
));
|
||||
|
||||
$this->handler->handle(new CheckAllScrapersHealth());
|
||||
|
||||
$this->assertNull($this->healthRepo->getStatus(1));
|
||||
}
|
||||
|
||||
public function testSourceWithTestSlugIsMarkedAsHealthyOnSuccess(): void
|
||||
{
|
||||
$this->sourceRepo->add(new ContentSourceHealthCheckData(
|
||||
id: 2,
|
||||
baseUrl: 'https://example.com',
|
||||
chapterUrlFormat: 'https://example.com/{slug}/{chapterNumber}',
|
||||
scrapingType: 'html',
|
||||
imageSelector: 'img',
|
||||
nextPageSelector: null,
|
||||
chapterSelector: null,
|
||||
testSlug: 'one-piece',
|
||||
testChapterNumber: 1.0,
|
||||
));
|
||||
|
||||
$this->handler->handle(new CheckAllScrapersHealth());
|
||||
|
||||
$this->assertSame('ok', $this->healthRepo->getStatus(2));
|
||||
$this->assertNull($this->healthRepo->getError(2));
|
||||
}
|
||||
|
||||
public function testSourceIsMarkedAsUnhealthyWhenScraperThrows(): void
|
||||
{
|
||||
$failingScraper = new InMemoryScraperAdapter();
|
||||
$failingScraper->simulateError(new \RuntimeException('Connexion refusée'));
|
||||
$this->scraperFactory->addScraper('html', $failingScraper);
|
||||
|
||||
$this->sourceRepo->add(new ContentSourceHealthCheckData(
|
||||
id: 3,
|
||||
baseUrl: 'https://example.com',
|
||||
chapterUrlFormat: 'https://example.com/{slug}/{chapterNumber}',
|
||||
scrapingType: 'html',
|
||||
imageSelector: 'img',
|
||||
nextPageSelector: null,
|
||||
chapterSelector: null,
|
||||
testSlug: 'one-piece',
|
||||
testChapterNumber: 1.0,
|
||||
));
|
||||
|
||||
$this->handler->handle(new CheckAllScrapersHealth());
|
||||
|
||||
$this->assertSame('ko', $this->healthRepo->getStatus(3));
|
||||
$this->assertNotNull($this->healthRepo->getError(3));
|
||||
}
|
||||
|
||||
public function testMultipleSourcesAreAllProcessed(): void
|
||||
{
|
||||
$this->sourceRepo->add(new ContentSourceHealthCheckData(
|
||||
id: 10,
|
||||
baseUrl: 'https://siteA.com',
|
||||
chapterUrlFormat: 'https://siteA.com/{slug}/{chapterNumber}',
|
||||
scrapingType: 'html',
|
||||
imageSelector: 'img',
|
||||
nextPageSelector: null,
|
||||
chapterSelector: null,
|
||||
testSlug: 'manga-a',
|
||||
testChapterNumber: 1.0,
|
||||
));
|
||||
|
||||
$this->sourceRepo->add(new ContentSourceHealthCheckData(
|
||||
id: 11,
|
||||
baseUrl: 'https://siteB.com',
|
||||
chapterUrlFormat: 'https://siteB.com/{slug}/{chapterNumber}',
|
||||
scrapingType: 'html',
|
||||
imageSelector: 'img',
|
||||
nextPageSelector: null,
|
||||
chapterSelector: null,
|
||||
testSlug: null,
|
||||
testChapterNumber: null,
|
||||
));
|
||||
|
||||
$this->sourceRepo->add(new ContentSourceHealthCheckData(
|
||||
id: 12,
|
||||
baseUrl: 'https://siteC.com',
|
||||
chapterUrlFormat: 'https://siteC.com/{slug}/{chapterNumber}',
|
||||
scrapingType: 'html',
|
||||
imageSelector: 'img',
|
||||
nextPageSelector: null,
|
||||
chapterSelector: null,
|
||||
testSlug: 'manga-c',
|
||||
testChapterNumber: 3.0,
|
||||
));
|
||||
|
||||
$this->handler->handle(new CheckAllScrapersHealth());
|
||||
|
||||
$this->assertSame('ok', $this->healthRepo->getStatus(10));
|
||||
$this->assertNull($this->healthRepo->getStatus(11)); // skippée
|
||||
$this->assertSame('ok', $this->healthRepo->getStatus(12));
|
||||
}
|
||||
|
||||
protected function tearDown(): void
|
||||
{
|
||||
$this->sourceRepo->clear();
|
||||
$this->healthRepo->clear();
|
||||
$this->scraperFactory->clear();
|
||||
}
|
||||
}
|
||||
72
tests/Feature/Scraping/CheckAllScrapersHealthTest.php
Normal file
72
tests/Feature/Scraping/CheckAllScrapersHealthTest.php
Normal file
@@ -0,0 +1,72 @@
|
||||
<?php
|
||||
|
||||
declare(strict_types=1);
|
||||
|
||||
namespace App\Tests\Feature\Scraping;
|
||||
|
||||
use App\Entity\ContentSource;
|
||||
use App\Tests\Feature\AbstractApiTestCase;
|
||||
use Symfony\Component\HttpFoundation\Response;
|
||||
use Zenstruck\Foundry\Test\ResetDatabase;
|
||||
|
||||
final class CheckAllScrapersHealthTest extends AbstractApiTestCase
|
||||
{
|
||||
use ResetDatabase;
|
||||
|
||||
private function post(): void
|
||||
{
|
||||
static::createClient()->request('POST', '/api/scraping/check-all-health', [
|
||||
'json' => new \stdClass(),
|
||||
]);
|
||||
}
|
||||
|
||||
public function testItReturns202WithNoSources(): void
|
||||
{
|
||||
$this->post();
|
||||
|
||||
$this->assertResponseStatusCodeSame(Response::HTTP_ACCEPTED);
|
||||
}
|
||||
|
||||
public function testItReturns202WithSourcesHavingNoTestConfig(): void
|
||||
{
|
||||
$source = new ContentSource();
|
||||
$source->setBaseUrl('https://example.com')
|
||||
->setChapterUrlFormat('https://example.com/{slug}/{chapterNumber}')
|
||||
->setScrapingType('html');
|
||||
|
||||
$this->entityManager->persist($source);
|
||||
$this->entityManager->flush();
|
||||
|
||||
$this->post();
|
||||
|
||||
$this->assertResponseStatusCodeSame(Response::HTTP_ACCEPTED);
|
||||
|
||||
// La source sans testSlug ne doit pas avoir son statut modifié
|
||||
$this->entityManager->clear();
|
||||
$reloaded = $this->entityManager->find(ContentSource::class, $source->getId());
|
||||
$this->assertSame('unknown', $reloaded->getHealthStatus());
|
||||
}
|
||||
|
||||
public function testHealthStatusIsUpdatedForSourcesWithTestConfig(): void
|
||||
{
|
||||
$source = new ContentSource();
|
||||
$source->setBaseUrl('https://example.com')
|
||||
->setChapterUrlFormat('https://example.com/{slug}/{chapterNumber}')
|
||||
->setScrapingType('html')
|
||||
->setTestSlug('one-piece')
|
||||
->setTestChapterNumber(1.0);
|
||||
|
||||
$this->entityManager->persist($source);
|
||||
$this->entityManager->flush();
|
||||
|
||||
$this->post();
|
||||
|
||||
$this->assertResponseStatusCodeSame(Response::HTTP_ACCEPTED);
|
||||
|
||||
// Le statut ne doit plus être 'unknown' après le test
|
||||
$this->entityManager->clear();
|
||||
$reloaded = $this->entityManager->find(ContentSource::class, $source->getId());
|
||||
$this->assertNotSame('unknown', $reloaded->getHealthStatus());
|
||||
$this->assertNotSame('testing', $reloaded->getHealthStatus()); // doit être terminé
|
||||
}
|
||||
}
|
||||
Reference in New Issue
Block a user