feat(scraping): implémenter le health check de tous les scrapers
- Commande CheckAllScrapersHealth + handler avec ports dédiés - Value Object ContentSourceHealthCheckData - Resource API Platform et State Processor - Adapters InMemory et tests unitaires + fonctionnels
This commit is contained in:
parent
795cbeccc3
commit
01474c264b
@@ -0,0 +1,7 @@
|
|||||||
|
<?php
|
||||||
|
|
||||||
|
namespace App\Domain\Scraping\Application\Command;
|
||||||
|
|
||||||
|
readonly class CheckAllScrapersHealth
|
||||||
|
{
|
||||||
|
}
|
||||||
@@ -0,0 +1,64 @@
|
|||||||
|
<?php
|
||||||
|
|
||||||
|
namespace App\Domain\Scraping\Application\CommandHandler;
|
||||||
|
|
||||||
|
use App\Domain\Scraping\Application\Command\CheckAllScrapersHealth;
|
||||||
|
use App\Domain\Scraping\Application\Command\TestScraperConfiguration;
|
||||||
|
use App\Domain\Scraping\Domain\Contract\Repository\ContentSourceForHealthCheckInterface;
|
||||||
|
use App\Domain\Scraping\Domain\Contract\Repository\ContentSourceHealthRepositoryInterface;
|
||||||
|
use Psr\Log\LoggerInterface;
|
||||||
|
|
||||||
|
readonly class CheckAllScrapersHealthHandler
|
||||||
|
{
|
||||||
|
public function __construct(
|
||||||
|
private ContentSourceForHealthCheckInterface $contentSourceForHealthCheckRepo,
|
||||||
|
private ContentSourceHealthRepositoryInterface $contentSourceHealthRepo,
|
||||||
|
private TestScraperConfigurationHandler $testScraperConfigurationHandler,
|
||||||
|
private LoggerInterface $logger,
|
||||||
|
) {
|
||||||
|
}
|
||||||
|
|
||||||
|
public function handle(CheckAllScrapersHealth $command): void
|
||||||
|
{
|
||||||
|
$sources = $this->contentSourceForHealthCheckRepo->getAll();
|
||||||
|
|
||||||
|
foreach ($sources as $source) {
|
||||||
|
if ($source->testSlug === null || $source->testChapterNumber === null) {
|
||||||
|
$this->logger->warning('ContentSource {id} has no test config, skipping health check.', ['id' => $source->id]);
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
|
||||||
|
try {
|
||||||
|
$this->contentSourceHealthRepo->markAsTesting($source->id);
|
||||||
|
$testUrl = str_replace(
|
||||||
|
['{slug}', '{chapterNumber}'],
|
||||||
|
[$source->testSlug, $source->testChapterNumber],
|
||||||
|
$source->chapterUrlFormat
|
||||||
|
);
|
||||||
|
|
||||||
|
$testCommand = new TestScraperConfiguration(
|
||||||
|
baseUrl: $source->baseUrl,
|
||||||
|
chapterUrlFormat: $source->chapterUrlFormat,
|
||||||
|
scrapingType: $source->scrapingType,
|
||||||
|
testUrl: $testUrl,
|
||||||
|
mangaSlug: $source->testSlug,
|
||||||
|
chapterNumber: $source->testChapterNumber,
|
||||||
|
imageSelector: $source->imageSelector,
|
||||||
|
nextPageSelector: $source->nextPageSelector,
|
||||||
|
chapterSelector: $source->chapterSelector,
|
||||||
|
);
|
||||||
|
|
||||||
|
$response = $this->testScraperConfigurationHandler->handle($testCommand);
|
||||||
|
|
||||||
|
if ($response->success) {
|
||||||
|
$this->contentSourceHealthRepo->markAsHealthy($source->id, new \DateTimeImmutable());
|
||||||
|
} else {
|
||||||
|
$firstError = $response->errors[0]['message'] ?? 'Erreur inconnue';
|
||||||
|
$this->contentSourceHealthRepo->markAsUnhealthy($source->id, new \DateTimeImmutable(), $firstError);
|
||||||
|
}
|
||||||
|
} catch (\Exception $e) {
|
||||||
|
$this->contentSourceHealthRepo->markAsUnhealthy($source->id, new \DateTimeImmutable(), $e->getMessage());
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
@@ -0,0 +1,11 @@
|
|||||||
|
<?php
|
||||||
|
|
||||||
|
namespace App\Domain\Scraping\Domain\Contract\Repository;
|
||||||
|
|
||||||
|
use App\Domain\Scraping\Domain\Model\ValueObject\ContentSourceHealthCheckData;
|
||||||
|
|
||||||
|
interface ContentSourceForHealthCheckInterface
|
||||||
|
{
|
||||||
|
/** @return ContentSourceHealthCheckData[] */
|
||||||
|
public function getAll(): array;
|
||||||
|
}
|
||||||
@@ -0,0 +1,12 @@
|
|||||||
|
<?php
|
||||||
|
|
||||||
|
namespace App\Domain\Scraping\Domain\Contract\Repository;
|
||||||
|
|
||||||
|
interface ContentSourceHealthRepositoryInterface
|
||||||
|
{
|
||||||
|
public function markAsTesting(int $sourceId): void;
|
||||||
|
|
||||||
|
public function markAsHealthy(int $sourceId, \DateTimeImmutable $testedAt): void;
|
||||||
|
|
||||||
|
public function markAsUnhealthy(int $sourceId, \DateTimeImmutable $testedAt, string $error): void;
|
||||||
|
}
|
||||||
@@ -0,0 +1,19 @@
|
|||||||
|
<?php
|
||||||
|
|
||||||
|
namespace App\Domain\Scraping\Domain\Model\ValueObject;
|
||||||
|
|
||||||
|
readonly class ContentSourceHealthCheckData
|
||||||
|
{
|
||||||
|
public function __construct(
|
||||||
|
public int $id,
|
||||||
|
public string $baseUrl,
|
||||||
|
public string $chapterUrlFormat,
|
||||||
|
public string $scrapingType,
|
||||||
|
public ?string $imageSelector,
|
||||||
|
public ?string $nextPageSelector,
|
||||||
|
public ?string $chapterSelector,
|
||||||
|
public ?string $testSlug,
|
||||||
|
public ?float $testChapterNumber,
|
||||||
|
) {
|
||||||
|
}
|
||||||
|
}
|
||||||
@@ -0,0 +1,23 @@
|
|||||||
|
<?php
|
||||||
|
|
||||||
|
namespace App\Domain\Scraping\Infrastructure\ApiPlatform\Resource;
|
||||||
|
|
||||||
|
use ApiPlatform\Metadata\ApiResource;
|
||||||
|
use ApiPlatform\Metadata\Post;
|
||||||
|
use App\Domain\Scraping\Infrastructure\ApiPlatform\State\Processor\CheckAllScrapersHealthStateProcessor;
|
||||||
|
|
||||||
|
#[ApiResource(
|
||||||
|
shortName: 'Scraping',
|
||||||
|
operations: [
|
||||||
|
new Post(
|
||||||
|
uriTemplate: '/scraping/check-all-health',
|
||||||
|
processor: CheckAllScrapersHealthStateProcessor::class,
|
||||||
|
output: false,
|
||||||
|
status: 202,
|
||||||
|
description: 'Déclenche le test de santé de tous les scrapers configurés avec testSlug',
|
||||||
|
),
|
||||||
|
]
|
||||||
|
)]
|
||||||
|
class CheckAllScrapersHealthResource
|
||||||
|
{
|
||||||
|
}
|
||||||
@@ -0,0 +1,23 @@
|
|||||||
|
<?php
|
||||||
|
|
||||||
|
namespace App\Domain\Scraping\Infrastructure\ApiPlatform\State\Processor;
|
||||||
|
|
||||||
|
use ApiPlatform\Metadata\Operation;
|
||||||
|
use ApiPlatform\State\ProcessorInterface;
|
||||||
|
use App\Domain\Scraping\Application\Command\CheckAllScrapersHealth;
|
||||||
|
use App\Domain\Scraping\Application\CommandHandler\CheckAllScrapersHealthHandler;
|
||||||
|
|
||||||
|
readonly class CheckAllScrapersHealthStateProcessor implements ProcessorInterface
|
||||||
|
{
|
||||||
|
public function __construct(
|
||||||
|
private CheckAllScrapersHealthHandler $handler,
|
||||||
|
) {
|
||||||
|
}
|
||||||
|
|
||||||
|
public function process(mixed $data, Operation $operation, array $uriVariables = [], array $context = []): null
|
||||||
|
{
|
||||||
|
$this->handler->handle(new CheckAllScrapersHealth());
|
||||||
|
|
||||||
|
return null;
|
||||||
|
}
|
||||||
|
}
|
||||||
@@ -0,0 +1,27 @@
|
|||||||
|
<?php
|
||||||
|
|
||||||
|
namespace App\Tests\Domain\Scraping\Adapter;
|
||||||
|
|
||||||
|
use App\Domain\Scraping\Domain\Contract\Repository\ContentSourceForHealthCheckInterface;
|
||||||
|
use App\Domain\Scraping\Domain\Model\ValueObject\ContentSourceHealthCheckData;
|
||||||
|
|
||||||
|
class InMemoryContentSourceForHealthCheckRepository implements ContentSourceForHealthCheckInterface
|
||||||
|
{
|
||||||
|
/** @var ContentSourceHealthCheckData[] */
|
||||||
|
private array $sources = [];
|
||||||
|
|
||||||
|
public function add(ContentSourceHealthCheckData $data): void
|
||||||
|
{
|
||||||
|
$this->sources[] = $data;
|
||||||
|
}
|
||||||
|
|
||||||
|
public function getAll(): array
|
||||||
|
{
|
||||||
|
return $this->sources;
|
||||||
|
}
|
||||||
|
|
||||||
|
public function clear(): void
|
||||||
|
{
|
||||||
|
$this->sources = [];
|
||||||
|
}
|
||||||
|
}
|
||||||
@@ -0,0 +1,41 @@
|
|||||||
|
<?php
|
||||||
|
|
||||||
|
namespace App\Tests\Domain\Scraping\Adapter;
|
||||||
|
|
||||||
|
use App\Domain\Scraping\Domain\Contract\Repository\ContentSourceHealthRepositoryInterface;
|
||||||
|
|
||||||
|
class InMemoryContentSourceHealthRepository implements ContentSourceHealthRepositoryInterface
|
||||||
|
{
|
||||||
|
/** @var array<int, array{status: string, testedAt: ?\DateTimeImmutable, error: ?string}> */
|
||||||
|
private array $statuses = [];
|
||||||
|
|
||||||
|
public function markAsTesting(int $sourceId): void
|
||||||
|
{
|
||||||
|
$this->statuses[$sourceId] = ['status' => 'testing', 'testedAt' => null, 'error' => null];
|
||||||
|
}
|
||||||
|
|
||||||
|
public function markAsHealthy(int $sourceId, \DateTimeImmutable $testedAt): void
|
||||||
|
{
|
||||||
|
$this->statuses[$sourceId] = ['status' => 'ok', 'testedAt' => $testedAt, 'error' => null];
|
||||||
|
}
|
||||||
|
|
||||||
|
public function markAsUnhealthy(int $sourceId, \DateTimeImmutable $testedAt, string $error): void
|
||||||
|
{
|
||||||
|
$this->statuses[$sourceId] = ['status' => 'ko', 'testedAt' => $testedAt, 'error' => $error];
|
||||||
|
}
|
||||||
|
|
||||||
|
public function getStatus(int $sourceId): ?string
|
||||||
|
{
|
||||||
|
return $this->statuses[$sourceId]['status'] ?? null;
|
||||||
|
}
|
||||||
|
|
||||||
|
public function getError(int $sourceId): ?string
|
||||||
|
{
|
||||||
|
return $this->statuses[$sourceId]['error'] ?? null;
|
||||||
|
}
|
||||||
|
|
||||||
|
public function clear(): void
|
||||||
|
{
|
||||||
|
$this->statuses = [];
|
||||||
|
}
|
||||||
|
}
|
||||||
@@ -0,0 +1,154 @@
|
|||||||
|
<?php
|
||||||
|
|
||||||
|
namespace App\Tests\Domain\Scraping\Application\CommandHandler;
|
||||||
|
|
||||||
|
use App\Domain\Scraping\Application\Command\CheckAllScrapersHealth;
|
||||||
|
use App\Domain\Scraping\Application\CommandHandler\CheckAllScrapersHealthHandler;
|
||||||
|
use App\Domain\Scraping\Application\CommandHandler\TestScraperConfigurationHandler;
|
||||||
|
use App\Domain\Scraping\Domain\Model\ValueObject\ContentSourceHealthCheckData;
|
||||||
|
use App\Tests\Domain\Scraping\Adapter\InMemoryContentSourceForHealthCheckRepository;
|
||||||
|
use App\Tests\Domain\Scraping\Adapter\InMemoryContentSourceHealthRepository;
|
||||||
|
use App\Tests\Domain\Scraping\Adapter\InMemoryScraperAdapter;
|
||||||
|
use App\Tests\Domain\Scraping\Adapter\InMemoryScraperFactory;
|
||||||
|
use PHPUnit\Framework\TestCase;
|
||||||
|
use Psr\Log\NullLogger;
|
||||||
|
|
||||||
|
class CheckAllScrapersHealthHandlerTest extends TestCase
|
||||||
|
{
|
||||||
|
private InMemoryContentSourceForHealthCheckRepository $sourceRepo;
|
||||||
|
private InMemoryContentSourceHealthRepository $healthRepo;
|
||||||
|
private InMemoryScraperFactory $scraperFactory;
|
||||||
|
private CheckAllScrapersHealthHandler $handler;
|
||||||
|
|
||||||
|
protected function setUp(): void
|
||||||
|
{
|
||||||
|
$this->sourceRepo = new InMemoryContentSourceForHealthCheckRepository();
|
||||||
|
$this->healthRepo = new InMemoryContentSourceHealthRepository();
|
||||||
|
$this->scraperFactory = new InMemoryScraperFactory();
|
||||||
|
$this->scraperFactory->addScraper('html', new InMemoryScraperAdapter());
|
||||||
|
|
||||||
|
$testScraperHandler = new TestScraperConfigurationHandler($this->scraperFactory);
|
||||||
|
|
||||||
|
$this->handler = new CheckAllScrapersHealthHandler(
|
||||||
|
$this->sourceRepo,
|
||||||
|
$this->healthRepo,
|
||||||
|
$testScraperHandler,
|
||||||
|
new NullLogger(),
|
||||||
|
);
|
||||||
|
}
|
||||||
|
|
||||||
|
public function testSourceWithoutTestSlugIsSkipped(): void
|
||||||
|
{
|
||||||
|
$this->sourceRepo->add(new ContentSourceHealthCheckData(
|
||||||
|
id: 1,
|
||||||
|
baseUrl: 'https://example.com',
|
||||||
|
chapterUrlFormat: 'https://example.com/{slug}/{chapterNumber}',
|
||||||
|
scrapingType: 'html',
|
||||||
|
imageSelector: 'img',
|
||||||
|
nextPageSelector: null,
|
||||||
|
chapterSelector: null,
|
||||||
|
testSlug: null,
|
||||||
|
testChapterNumber: null,
|
||||||
|
));
|
||||||
|
|
||||||
|
$this->handler->handle(new CheckAllScrapersHealth());
|
||||||
|
|
||||||
|
$this->assertNull($this->healthRepo->getStatus(1));
|
||||||
|
}
|
||||||
|
|
||||||
|
public function testSourceWithTestSlugIsMarkedAsHealthyOnSuccess(): void
|
||||||
|
{
|
||||||
|
$this->sourceRepo->add(new ContentSourceHealthCheckData(
|
||||||
|
id: 2,
|
||||||
|
baseUrl: 'https://example.com',
|
||||||
|
chapterUrlFormat: 'https://example.com/{slug}/{chapterNumber}',
|
||||||
|
scrapingType: 'html',
|
||||||
|
imageSelector: 'img',
|
||||||
|
nextPageSelector: null,
|
||||||
|
chapterSelector: null,
|
||||||
|
testSlug: 'one-piece',
|
||||||
|
testChapterNumber: 1.0,
|
||||||
|
));
|
||||||
|
|
||||||
|
$this->handler->handle(new CheckAllScrapersHealth());
|
||||||
|
|
||||||
|
$this->assertSame('ok', $this->healthRepo->getStatus(2));
|
||||||
|
$this->assertNull($this->healthRepo->getError(2));
|
||||||
|
}
|
||||||
|
|
||||||
|
public function testSourceIsMarkedAsUnhealthyWhenScraperThrows(): void
|
||||||
|
{
|
||||||
|
$failingScraper = new InMemoryScraperAdapter();
|
||||||
|
$failingScraper->simulateError(new \RuntimeException('Connexion refusée'));
|
||||||
|
$this->scraperFactory->addScraper('html', $failingScraper);
|
||||||
|
|
||||||
|
$this->sourceRepo->add(new ContentSourceHealthCheckData(
|
||||||
|
id: 3,
|
||||||
|
baseUrl: 'https://example.com',
|
||||||
|
chapterUrlFormat: 'https://example.com/{slug}/{chapterNumber}',
|
||||||
|
scrapingType: 'html',
|
||||||
|
imageSelector: 'img',
|
||||||
|
nextPageSelector: null,
|
||||||
|
chapterSelector: null,
|
||||||
|
testSlug: 'one-piece',
|
||||||
|
testChapterNumber: 1.0,
|
||||||
|
));
|
||||||
|
|
||||||
|
$this->handler->handle(new CheckAllScrapersHealth());
|
||||||
|
|
||||||
|
$this->assertSame('ko', $this->healthRepo->getStatus(3));
|
||||||
|
$this->assertNotNull($this->healthRepo->getError(3));
|
||||||
|
}
|
||||||
|
|
||||||
|
public function testMultipleSourcesAreAllProcessed(): void
|
||||||
|
{
|
||||||
|
$this->sourceRepo->add(new ContentSourceHealthCheckData(
|
||||||
|
id: 10,
|
||||||
|
baseUrl: 'https://siteA.com',
|
||||||
|
chapterUrlFormat: 'https://siteA.com/{slug}/{chapterNumber}',
|
||||||
|
scrapingType: 'html',
|
||||||
|
imageSelector: 'img',
|
||||||
|
nextPageSelector: null,
|
||||||
|
chapterSelector: null,
|
||||||
|
testSlug: 'manga-a',
|
||||||
|
testChapterNumber: 1.0,
|
||||||
|
));
|
||||||
|
|
||||||
|
$this->sourceRepo->add(new ContentSourceHealthCheckData(
|
||||||
|
id: 11,
|
||||||
|
baseUrl: 'https://siteB.com',
|
||||||
|
chapterUrlFormat: 'https://siteB.com/{slug}/{chapterNumber}',
|
||||||
|
scrapingType: 'html',
|
||||||
|
imageSelector: 'img',
|
||||||
|
nextPageSelector: null,
|
||||||
|
chapterSelector: null,
|
||||||
|
testSlug: null,
|
||||||
|
testChapterNumber: null,
|
||||||
|
));
|
||||||
|
|
||||||
|
$this->sourceRepo->add(new ContentSourceHealthCheckData(
|
||||||
|
id: 12,
|
||||||
|
baseUrl: 'https://siteC.com',
|
||||||
|
chapterUrlFormat: 'https://siteC.com/{slug}/{chapterNumber}',
|
||||||
|
scrapingType: 'html',
|
||||||
|
imageSelector: 'img',
|
||||||
|
nextPageSelector: null,
|
||||||
|
chapterSelector: null,
|
||||||
|
testSlug: 'manga-c',
|
||||||
|
testChapterNumber: 3.0,
|
||||||
|
));
|
||||||
|
|
||||||
|
$this->handler->handle(new CheckAllScrapersHealth());
|
||||||
|
|
||||||
|
$this->assertSame('ok', $this->healthRepo->getStatus(10));
|
||||||
|
$this->assertNull($this->healthRepo->getStatus(11)); // skippée
|
||||||
|
$this->assertSame('ok', $this->healthRepo->getStatus(12));
|
||||||
|
}
|
||||||
|
|
||||||
|
protected function tearDown(): void
|
||||||
|
{
|
||||||
|
$this->sourceRepo->clear();
|
||||||
|
$this->healthRepo->clear();
|
||||||
|
$this->scraperFactory->clear();
|
||||||
|
}
|
||||||
|
}
|
||||||
72
tests/Feature/Scraping/CheckAllScrapersHealthTest.php
Normal file
72
tests/Feature/Scraping/CheckAllScrapersHealthTest.php
Normal file
@@ -0,0 +1,72 @@
|
|||||||
|
<?php
|
||||||
|
|
||||||
|
declare(strict_types=1);
|
||||||
|
|
||||||
|
namespace App\Tests\Feature\Scraping;
|
||||||
|
|
||||||
|
use App\Entity\ContentSource;
|
||||||
|
use App\Tests\Feature\AbstractApiTestCase;
|
||||||
|
use Symfony\Component\HttpFoundation\Response;
|
||||||
|
use Zenstruck\Foundry\Test\ResetDatabase;
|
||||||
|
|
||||||
|
final class CheckAllScrapersHealthTest extends AbstractApiTestCase
|
||||||
|
{
|
||||||
|
use ResetDatabase;
|
||||||
|
|
||||||
|
private function post(): void
|
||||||
|
{
|
||||||
|
static::createClient()->request('POST', '/api/scraping/check-all-health', [
|
||||||
|
'json' => new \stdClass(),
|
||||||
|
]);
|
||||||
|
}
|
||||||
|
|
||||||
|
public function testItReturns202WithNoSources(): void
|
||||||
|
{
|
||||||
|
$this->post();
|
||||||
|
|
||||||
|
$this->assertResponseStatusCodeSame(Response::HTTP_ACCEPTED);
|
||||||
|
}
|
||||||
|
|
||||||
|
public function testItReturns202WithSourcesHavingNoTestConfig(): void
|
||||||
|
{
|
||||||
|
$source = new ContentSource();
|
||||||
|
$source->setBaseUrl('https://example.com')
|
||||||
|
->setChapterUrlFormat('https://example.com/{slug}/{chapterNumber}')
|
||||||
|
->setScrapingType('html');
|
||||||
|
|
||||||
|
$this->entityManager->persist($source);
|
||||||
|
$this->entityManager->flush();
|
||||||
|
|
||||||
|
$this->post();
|
||||||
|
|
||||||
|
$this->assertResponseStatusCodeSame(Response::HTTP_ACCEPTED);
|
||||||
|
|
||||||
|
// La source sans testSlug ne doit pas avoir son statut modifié
|
||||||
|
$this->entityManager->clear();
|
||||||
|
$reloaded = $this->entityManager->find(ContentSource::class, $source->getId());
|
||||||
|
$this->assertSame('unknown', $reloaded->getHealthStatus());
|
||||||
|
}
|
||||||
|
|
||||||
|
public function testHealthStatusIsUpdatedForSourcesWithTestConfig(): void
|
||||||
|
{
|
||||||
|
$source = new ContentSource();
|
||||||
|
$source->setBaseUrl('https://example.com')
|
||||||
|
->setChapterUrlFormat('https://example.com/{slug}/{chapterNumber}')
|
||||||
|
->setScrapingType('html')
|
||||||
|
->setTestSlug('one-piece')
|
||||||
|
->setTestChapterNumber(1.0);
|
||||||
|
|
||||||
|
$this->entityManager->persist($source);
|
||||||
|
$this->entityManager->flush();
|
||||||
|
|
||||||
|
$this->post();
|
||||||
|
|
||||||
|
$this->assertResponseStatusCodeSame(Response::HTTP_ACCEPTED);
|
||||||
|
|
||||||
|
// Le statut ne doit plus être 'unknown' après le test
|
||||||
|
$this->entityManager->clear();
|
||||||
|
$reloaded = $this->entityManager->find(ContentSource::class, $source->getId());
|
||||||
|
$this->assertNotSame('unknown', $reloaded->getHealthStatus());
|
||||||
|
$this->assertNotSame('testing', $reloaded->getHealthStatus()); // doit être terminé
|
||||||
|
}
|
||||||
|
}
|
||||||
Reference in New Issue
Block a user