From 01474c264b8c43b96b6865f5446c687534477afd Mon Sep 17 00:00:00 2001 From: "ext.jeremy.guillot@maxicoffee.domains" Date: Mon, 16 Mar 2026 00:08:57 +0100 Subject: [PATCH] =?UTF-8?q?feat(scraping):=20impl=C3=A9menter=20le=20healt?= =?UTF-8?q?h=20check=20de=20tous=20les=20scrapers?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit - Commande CheckAllScrapersHealth + handler avec ports dédiés - Value Object ContentSourceHealthCheckData - Resource API Platform et State Processor - Adapters InMemory et tests unitaires + fonctionnels --- .../Command/CheckAllScrapersHealth.php | 7 + .../CheckAllScrapersHealthHandler.php | 64 ++++++++ .../ContentSourceForHealthCheckInterface.php | 11 ++ ...ContentSourceHealthRepositoryInterface.php | 12 ++ .../ContentSourceHealthCheckData.php | 19 +++ .../CheckAllScrapersHealthResource.php | 23 +++ .../CheckAllScrapersHealthStateProcessor.php | 23 +++ ...yContentSourceForHealthCheckRepository.php | 27 +++ .../InMemoryContentSourceHealthRepository.php | 41 +++++ .../CheckAllScrapersHealthHandlerTest.php | 154 ++++++++++++++++++ .../Scraping/CheckAllScrapersHealthTest.php | 72 ++++++++ 11 files changed, 453 insertions(+) create mode 100644 src/Domain/Scraping/Application/Command/CheckAllScrapersHealth.php create mode 100644 src/Domain/Scraping/Application/CommandHandler/CheckAllScrapersHealthHandler.php create mode 100644 src/Domain/Scraping/Domain/Contract/Repository/ContentSourceForHealthCheckInterface.php create mode 100644 src/Domain/Scraping/Domain/Contract/Repository/ContentSourceHealthRepositoryInterface.php create mode 100644 src/Domain/Scraping/Domain/Model/ValueObject/ContentSourceHealthCheckData.php create mode 100644 src/Domain/Scraping/Infrastructure/ApiPlatform/Resource/CheckAllScrapersHealthResource.php create mode 100644 src/Domain/Scraping/Infrastructure/ApiPlatform/State/Processor/CheckAllScrapersHealthStateProcessor.php create mode 100644 tests/Domain/Scraping/Adapter/InMemoryContentSourceForHealthCheckRepository.php create mode 100644 tests/Domain/Scraping/Adapter/InMemoryContentSourceHealthRepository.php create mode 100644 tests/Domain/Scraping/Application/CommandHandler/CheckAllScrapersHealthHandlerTest.php create mode 100644 tests/Feature/Scraping/CheckAllScrapersHealthTest.php diff --git a/src/Domain/Scraping/Application/Command/CheckAllScrapersHealth.php b/src/Domain/Scraping/Application/Command/CheckAllScrapersHealth.php new file mode 100644 index 0000000..2f74bd5 --- /dev/null +++ b/src/Domain/Scraping/Application/Command/CheckAllScrapersHealth.php @@ -0,0 +1,7 @@ +contentSourceForHealthCheckRepo->getAll(); + + foreach ($sources as $source) { + if ($source->testSlug === null || $source->testChapterNumber === null) { + $this->logger->warning('ContentSource {id} has no test config, skipping health check.', ['id' => $source->id]); + continue; + } + + try { + $this->contentSourceHealthRepo->markAsTesting($source->id); + $testUrl = str_replace( + ['{slug}', '{chapterNumber}'], + [$source->testSlug, $source->testChapterNumber], + $source->chapterUrlFormat + ); + + $testCommand = new TestScraperConfiguration( + baseUrl: $source->baseUrl, + chapterUrlFormat: $source->chapterUrlFormat, + scrapingType: $source->scrapingType, + testUrl: $testUrl, + mangaSlug: $source->testSlug, + chapterNumber: $source->testChapterNumber, + imageSelector: $source->imageSelector, + nextPageSelector: $source->nextPageSelector, + chapterSelector: $source->chapterSelector, + ); + + $response = $this->testScraperConfigurationHandler->handle($testCommand); + + if ($response->success) { + $this->contentSourceHealthRepo->markAsHealthy($source->id, new \DateTimeImmutable()); + } else { + $firstError = $response->errors[0]['message'] ?? 'Erreur inconnue'; + $this->contentSourceHealthRepo->markAsUnhealthy($source->id, new \DateTimeImmutable(), $firstError); + } + } catch (\Exception $e) { + $this->contentSourceHealthRepo->markAsUnhealthy($source->id, new \DateTimeImmutable(), $e->getMessage()); + } + } + } +} diff --git a/src/Domain/Scraping/Domain/Contract/Repository/ContentSourceForHealthCheckInterface.php b/src/Domain/Scraping/Domain/Contract/Repository/ContentSourceForHealthCheckInterface.php new file mode 100644 index 0000000..6bf1d3a --- /dev/null +++ b/src/Domain/Scraping/Domain/Contract/Repository/ContentSourceForHealthCheckInterface.php @@ -0,0 +1,11 @@ +handler->handle(new CheckAllScrapersHealth()); + + return null; + } +} diff --git a/tests/Domain/Scraping/Adapter/InMemoryContentSourceForHealthCheckRepository.php b/tests/Domain/Scraping/Adapter/InMemoryContentSourceForHealthCheckRepository.php new file mode 100644 index 0000000..e3c6158 --- /dev/null +++ b/tests/Domain/Scraping/Adapter/InMemoryContentSourceForHealthCheckRepository.php @@ -0,0 +1,27 @@ +sources[] = $data; + } + + public function getAll(): array + { + return $this->sources; + } + + public function clear(): void + { + $this->sources = []; + } +} diff --git a/tests/Domain/Scraping/Adapter/InMemoryContentSourceHealthRepository.php b/tests/Domain/Scraping/Adapter/InMemoryContentSourceHealthRepository.php new file mode 100644 index 0000000..6bd827d --- /dev/null +++ b/tests/Domain/Scraping/Adapter/InMemoryContentSourceHealthRepository.php @@ -0,0 +1,41 @@ + */ + private array $statuses = []; + + public function markAsTesting(int $sourceId): void + { + $this->statuses[$sourceId] = ['status' => 'testing', 'testedAt' => null, 'error' => null]; + } + + public function markAsHealthy(int $sourceId, \DateTimeImmutable $testedAt): void + { + $this->statuses[$sourceId] = ['status' => 'ok', 'testedAt' => $testedAt, 'error' => null]; + } + + public function markAsUnhealthy(int $sourceId, \DateTimeImmutable $testedAt, string $error): void + { + $this->statuses[$sourceId] = ['status' => 'ko', 'testedAt' => $testedAt, 'error' => $error]; + } + + public function getStatus(int $sourceId): ?string + { + return $this->statuses[$sourceId]['status'] ?? null; + } + + public function getError(int $sourceId): ?string + { + return $this->statuses[$sourceId]['error'] ?? null; + } + + public function clear(): void + { + $this->statuses = []; + } +} diff --git a/tests/Domain/Scraping/Application/CommandHandler/CheckAllScrapersHealthHandlerTest.php b/tests/Domain/Scraping/Application/CommandHandler/CheckAllScrapersHealthHandlerTest.php new file mode 100644 index 0000000..d75ff39 --- /dev/null +++ b/tests/Domain/Scraping/Application/CommandHandler/CheckAllScrapersHealthHandlerTest.php @@ -0,0 +1,154 @@ +sourceRepo = new InMemoryContentSourceForHealthCheckRepository(); + $this->healthRepo = new InMemoryContentSourceHealthRepository(); + $this->scraperFactory = new InMemoryScraperFactory(); + $this->scraperFactory->addScraper('html', new InMemoryScraperAdapter()); + + $testScraperHandler = new TestScraperConfigurationHandler($this->scraperFactory); + + $this->handler = new CheckAllScrapersHealthHandler( + $this->sourceRepo, + $this->healthRepo, + $testScraperHandler, + new NullLogger(), + ); + } + + public function testSourceWithoutTestSlugIsSkipped(): void + { + $this->sourceRepo->add(new ContentSourceHealthCheckData( + id: 1, + baseUrl: 'https://example.com', + chapterUrlFormat: 'https://example.com/{slug}/{chapterNumber}', + scrapingType: 'html', + imageSelector: 'img', + nextPageSelector: null, + chapterSelector: null, + testSlug: null, + testChapterNumber: null, + )); + + $this->handler->handle(new CheckAllScrapersHealth()); + + $this->assertNull($this->healthRepo->getStatus(1)); + } + + public function testSourceWithTestSlugIsMarkedAsHealthyOnSuccess(): void + { + $this->sourceRepo->add(new ContentSourceHealthCheckData( + id: 2, + baseUrl: 'https://example.com', + chapterUrlFormat: 'https://example.com/{slug}/{chapterNumber}', + scrapingType: 'html', + imageSelector: 'img', + nextPageSelector: null, + chapterSelector: null, + testSlug: 'one-piece', + testChapterNumber: 1.0, + )); + + $this->handler->handle(new CheckAllScrapersHealth()); + + $this->assertSame('ok', $this->healthRepo->getStatus(2)); + $this->assertNull($this->healthRepo->getError(2)); + } + + public function testSourceIsMarkedAsUnhealthyWhenScraperThrows(): void + { + $failingScraper = new InMemoryScraperAdapter(); + $failingScraper->simulateError(new \RuntimeException('Connexion refusée')); + $this->scraperFactory->addScraper('html', $failingScraper); + + $this->sourceRepo->add(new ContentSourceHealthCheckData( + id: 3, + baseUrl: 'https://example.com', + chapterUrlFormat: 'https://example.com/{slug}/{chapterNumber}', + scrapingType: 'html', + imageSelector: 'img', + nextPageSelector: null, + chapterSelector: null, + testSlug: 'one-piece', + testChapterNumber: 1.0, + )); + + $this->handler->handle(new CheckAllScrapersHealth()); + + $this->assertSame('ko', $this->healthRepo->getStatus(3)); + $this->assertNotNull($this->healthRepo->getError(3)); + } + + public function testMultipleSourcesAreAllProcessed(): void + { + $this->sourceRepo->add(new ContentSourceHealthCheckData( + id: 10, + baseUrl: 'https://siteA.com', + chapterUrlFormat: 'https://siteA.com/{slug}/{chapterNumber}', + scrapingType: 'html', + imageSelector: 'img', + nextPageSelector: null, + chapterSelector: null, + testSlug: 'manga-a', + testChapterNumber: 1.0, + )); + + $this->sourceRepo->add(new ContentSourceHealthCheckData( + id: 11, + baseUrl: 'https://siteB.com', + chapterUrlFormat: 'https://siteB.com/{slug}/{chapterNumber}', + scrapingType: 'html', + imageSelector: 'img', + nextPageSelector: null, + chapterSelector: null, + testSlug: null, + testChapterNumber: null, + )); + + $this->sourceRepo->add(new ContentSourceHealthCheckData( + id: 12, + baseUrl: 'https://siteC.com', + chapterUrlFormat: 'https://siteC.com/{slug}/{chapterNumber}', + scrapingType: 'html', + imageSelector: 'img', + nextPageSelector: null, + chapterSelector: null, + testSlug: 'manga-c', + testChapterNumber: 3.0, + )); + + $this->handler->handle(new CheckAllScrapersHealth()); + + $this->assertSame('ok', $this->healthRepo->getStatus(10)); + $this->assertNull($this->healthRepo->getStatus(11)); // skippée + $this->assertSame('ok', $this->healthRepo->getStatus(12)); + } + + protected function tearDown(): void + { + $this->sourceRepo->clear(); + $this->healthRepo->clear(); + $this->scraperFactory->clear(); + } +} diff --git a/tests/Feature/Scraping/CheckAllScrapersHealthTest.php b/tests/Feature/Scraping/CheckAllScrapersHealthTest.php new file mode 100644 index 0000000..547d293 --- /dev/null +++ b/tests/Feature/Scraping/CheckAllScrapersHealthTest.php @@ -0,0 +1,72 @@ +request('POST', '/api/scraping/check-all-health', [ + 'json' => new \stdClass(), + ]); + } + + public function testItReturns202WithNoSources(): void + { + $this->post(); + + $this->assertResponseStatusCodeSame(Response::HTTP_ACCEPTED); + } + + public function testItReturns202WithSourcesHavingNoTestConfig(): void + { + $source = new ContentSource(); + $source->setBaseUrl('https://example.com') + ->setChapterUrlFormat('https://example.com/{slug}/{chapterNumber}') + ->setScrapingType('html'); + + $this->entityManager->persist($source); + $this->entityManager->flush(); + + $this->post(); + + $this->assertResponseStatusCodeSame(Response::HTTP_ACCEPTED); + + // La source sans testSlug ne doit pas avoir son statut modifié + $this->entityManager->clear(); + $reloaded = $this->entityManager->find(ContentSource::class, $source->getId()); + $this->assertSame('unknown', $reloaded->getHealthStatus()); + } + + public function testHealthStatusIsUpdatedForSourcesWithTestConfig(): void + { + $source = new ContentSource(); + $source->setBaseUrl('https://example.com') + ->setChapterUrlFormat('https://example.com/{slug}/{chapterNumber}') + ->setScrapingType('html') + ->setTestSlug('one-piece') + ->setTestChapterNumber(1.0); + + $this->entityManager->persist($source); + $this->entityManager->flush(); + + $this->post(); + + $this->assertResponseStatusCodeSame(Response::HTTP_ACCEPTED); + + // Le statut ne doit plus être 'unknown' après le test + $this->entityManager->clear(); + $reloaded = $this->entityManager->find(ContentSource::class, $source->getId()); + $this->assertNotSame('unknown', $reloaded->getHealthStatus()); + $this->assertNotSame('testing', $reloaded->getHealthStatus()); // doit être terminé + } +}