From c9f17715225619cdc6f9991eb6809b068d4bc028 Mon Sep 17 00:00:00 2001 From: "ext.jeremy.guillot@maxicoffee.domains" Date: Thu, 3 Apr 2025 16:34:30 +0200 Subject: [PATCH] =?UTF-8?q?feat:=20refactorisation=20de=20la=20gestion=20d?= =?UTF-8?q?u=20scraping=20des=20chapitres=20en=20rempla=C3=A7ant=20les=20i?= =?UTF-8?q?dentifiants=20de=20manga=20et=20de=20chapitre=20par=20un=20iden?= =?UTF-8?q?tifiant=20de=20chapitre=20unique,=20am=C3=A9lioration=20de=20la?= =?UTF-8?q?=20r=C3=A9cup=C3=A9ration=20des=20sources=20pr=C3=A9f=C3=A9r?= =?UTF-8?q?=C3=A9es=20et=20ajout=20de=20la=20gestion=20des=20erreurs=20pou?= =?UTF-8?q?r=20les=20=C3=A9checs=20de=20scraping.?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- .../Application/Command/ScrapeChapter.php | 4 +- .../CommandHandler/ScrapeChapterHandler.php | 180 ++++++++++++------ .../Repository/ChapterRepositoryInterface.php | 1 + .../Repository/SourceRepositoryInterface.php | 5 + src/Domain/Scraping/Domain/Model/Manga.php | 20 ++ .../ApiPlatform/Dto/ScrapeChapterRequest.php | 10 +- .../Processor/ScrapeChapterStateProcessor.php | 4 +- .../Persistence/LegacyChapterRepository.php | 21 ++ .../Persistence/LegacyMangaRepository.php | 21 +- .../Persistence/LegacySourceRepository.php | 39 ++++ .../Adapter/InMemoryChapterRepository.php | 6 + .../Adapter/InMemoryMangaRepository.php | 24 ++- .../Adapter/InMemorySourceRepository.php | 18 +- .../ScrapeChapterHandlerTest.php | 10 +- tests/Feature/Scraping/ScrapeChapterTest.php | 11 +- 15 files changed, 270 insertions(+), 104 deletions(-) diff --git a/src/Domain/Scraping/Application/Command/ScrapeChapter.php b/src/Domain/Scraping/Application/Command/ScrapeChapter.php index ff4ed18..fe483f4 100644 --- a/src/Domain/Scraping/Application/Command/ScrapeChapter.php +++ b/src/Domain/Scraping/Application/Command/ScrapeChapter.php @@ -5,9 +5,7 @@ namespace App\Domain\Scraping\Application\Command; readonly class ScrapeChapter { public function __construct( - public string $mangaId, - public string $chapterNumber, - public string $sourceId + public string $chapterId ) { } } diff --git a/src/Domain/Scraping/Application/CommandHandler/ScrapeChapterHandler.php b/src/Domain/Scraping/Application/CommandHandler/ScrapeChapterHandler.php index 3f695f1..4589833 100644 --- a/src/Domain/Scraping/Application/CommandHandler/ScrapeChapterHandler.php +++ b/src/Domain/Scraping/Application/CommandHandler/ScrapeChapterHandler.php @@ -12,6 +12,7 @@ use App\Domain\Scraping\Domain\Contract\Service\ScraperInterface; use App\Domain\Scraping\Domain\Event\ChapterScraped; use App\Domain\Scraping\Domain\Event\ChapterScrapingFailed; use App\Domain\Scraping\Domain\Model\ScrapingJob; +use App\Domain\Scraping\Domain\Model\Source; use App\Domain\Scraping\Domain\Model\ValueObject\CbzGenerationRequest; use App\Domain\Scraping\Domain\Model\ValueObject\ScrapingRequest; use App\Domain\Scraping\Domain\Model\ValueObject\TempDirectory; @@ -39,76 +40,141 @@ readonly class ScrapeChapterHandler { $job = null; try { - // 1. Création du job dans sa propre transaction - $job = new ScrapingJob( - Uuid::uuid4()->toString(), - $command->mangaId, - $command->chapterNumber, - $command->sourceId - ); - $job->start(); - $this->jobRepository->save($job); + // 1. Récupération du chapitre + $chapter = $this->chapterRepository->getById($command->chapterId); + if (!$chapter) { + throw new \InvalidArgumentException("Chapter not found with ID: {$command->chapterId}"); + } - // 2. Nouvelle transaction pour le reste des opérations - $this->entityManager->beginTransaction(); - try { - // Préparation des données - $manga = $this->mangaRepository->getById($command->mangaId); - $chapter = $this->chapterRepository->getByMangaIdAndChapterNumber($command->mangaId, $command->chapterNumber); - $source = $this->sourceRepository->getById($command->sourceId); + // 2. Récupération du manga + $manga = $this->mangaRepository->getById($chapter->mangaId); + if (!$manga) { + throw new \InvalidArgumentException("Manga not found with ID: {$chapter->mangaId}"); + } - // 3. Scraping des URLs - $scrapingRequest = new ScrapingRequest( - 'html', - $source->buildChapterUrl($manga->getSlug(), $command->chapterNumber), - $source->getScrappingParameters() + // 3. Détermination des sources à utiliser + $sources = $this->getSourcesToTry($manga); + if (empty($sources)) { + throw new \InvalidArgumentException("No sources available for scraping"); + } + + // 4. Essai de scraping sur chaque source jusqu'à succès + $success = false; + $lastException = null; + + foreach ($sources as $source) { + $job = new ScrapingJob( + Uuid::uuid4()->toString(), + $chapter->mangaId, + $chapter->chapterNumber, + $source->getId()->getValue() ); - - $scrapingResult = $this->scraper->scrape($scrapingRequest); - - // 4. Téléchargement des images - $tempDir = new TempDirectory(); - $downloadResults = $this->imageDownloader->downloadBatch( - $scrapingResult->getImageUrls(), - $tempDir, - $job->id - ); - - // 5. Génération du CBZ - $cbzRequest = new CbzGenerationRequest( - $manga->getTitle(), - $manga->getPublicationYear(), - $chapter->volumeNumber, - $command->chapterNumber, - $tempDir, - array_map(fn($r) => $r->getLocalPath(), $downloadResults) - ); - - $cbzPath = $this->cbzGenerator->generate($cbzRequest); - - // 6. Mise à jour et sauvegarde - $chapter->cbzPath = $cbzPath->getPath(); - $this->chapterRepository->save($chapter); - - $job->complete(); + $job->start(); $this->jobRepository->save($job); - $this->entityManager->commit(); + try { + $this->entityManager->beginTransaction(); - $this->eventBus->dispatch(new ChapterScraped($job->id)); + // 5. Scraping des URLs + $scrapingRequest = new ScrapingRequest( + 'html', + $source->buildChapterUrl($manga->getSlug(), $chapter->chapterNumber), + $source->getScrappingParameters() + ); - // 7. Nettoyage - $tempDir->cleanup(); - } catch (\Exception $e) { - $this->entityManager->rollback(); - throw $e; + $scrapingResult = $this->scraper->scrape($scrapingRequest); + + // 6. Téléchargement des images + $tempDir = new TempDirectory(); + $downloadResults = $this->imageDownloader->downloadBatch( + $scrapingResult->getImageUrls(), + $tempDir, + $job->id + ); + + // 7. Génération du CBZ + $cbzRequest = new CbzGenerationRequest( + $manga->getTitle(), + $manga->getPublicationYear(), + $chapter->volumeNumber, + $chapter->chapterNumber, + $tempDir, + array_map(fn($r) => $r->getLocalPath(), $downloadResults) + ); + + $cbzPath = $this->cbzGenerator->generate($cbzRequest); + + // 8. Mise à jour et sauvegarde + $chapter->cbzPath = $cbzPath->getPath(); + $this->chapterRepository->save($chapter); + + $job->complete(); + $this->jobRepository->save($job); + + $this->entityManager->commit(); + + $this->eventBus->dispatch(new ChapterScraped($job->id)); + + // 9. Nettoyage + $tempDir->cleanup(); + + // Scraping réussi, pas besoin d'essayer d'autres sources + $success = true; + break; + + } catch (\Exception $e) { + $this->entityManager->rollback(); + + if (isset($job)) { + $job->fail($e->getMessage()); + $this->jobRepository->save($job); + } + + $lastException = $e; + + // Continuer avec la source suivante + } } + + // Si toutes les sources ont échoué + if (!$success) { + $errorMessage = $lastException ? $lastException->getMessage() : "Failed to scrape chapter from all available sources"; + $this->eventBus->dispatch(new ChapterScrapingFailed($chapter->mangaId, $chapter->chapterNumber, $errorMessage)); + } + } catch (\Exception $e) { if (isset($job)) { $job->fail($e->getMessage()); $this->jobRepository->save($job); } - $this->eventBus->dispatch(new ChapterScrapingFailed($command->mangaId, $command->chapterNumber, $e->getMessage())); + $this->eventBus->dispatch(new ChapterScrapingFailed($chapter->mangaId ?? 'unknown', $chapter->chapterNumber ?? 'unknown', $e->getMessage())); } } + + /** + * Détermine les sources à utiliser pour le scraping en fonction des préférences du manga + * + * @param \App\Domain\Scraping\Domain\Model\Manga $manga + * @return Source[] + */ + private function getSourcesToTry(\App\Domain\Scraping\Domain\Model\Manga $manga): array + { + // Si le manga a des sources préférées, les utiliser + if ($manga->hasPreferredSources()) { + $preferredSources = []; + foreach ($manga->getPreferredSources() as $sourceId) { + $source = $this->sourceRepository->getById($sourceId); + if ($source) { + $preferredSources[] = $source; + } + } + + if (!empty($preferredSources)) { + return $preferredSources; + } + } + + // Sinon, utiliser toutes les sources disponibles + return $this->sourceRepository->getAll(); + } } diff --git a/src/Domain/Scraping/Domain/Contract/Repository/ChapterRepositoryInterface.php b/src/Domain/Scraping/Domain/Contract/Repository/ChapterRepositoryInterface.php index 7ef79e5..cfd0591 100644 --- a/src/Domain/Scraping/Domain/Contract/Repository/ChapterRepositoryInterface.php +++ b/src/Domain/Scraping/Domain/Contract/Repository/ChapterRepositoryInterface.php @@ -6,6 +6,7 @@ use App\Domain\Scraping\Domain\Model\Chapter; interface ChapterRepositoryInterface { + public function getById(string $id): ?Chapter; public function getByMangaIdAndChapterNumber(string $mangaId, int $chapterNumber): Chapter; public function save(Chapter $chapter): void; } diff --git a/src/Domain/Scraping/Domain/Contract/Repository/SourceRepositoryInterface.php b/src/Domain/Scraping/Domain/Contract/Repository/SourceRepositoryInterface.php index ccbafe0..af7365a 100644 --- a/src/Domain/Scraping/Domain/Contract/Repository/SourceRepositoryInterface.php +++ b/src/Domain/Scraping/Domain/Contract/Repository/SourceRepositoryInterface.php @@ -7,4 +7,9 @@ use App\Domain\Scraping\Domain\Model\Source; interface SourceRepositoryInterface { public function getById(string $id): ?Source; + + /** + * @return Source[] + */ + public function getAll(): array; } diff --git a/src/Domain/Scraping/Domain/Model/Manga.php b/src/Domain/Scraping/Domain/Model/Manga.php index 9ca048b..bb2f347 100644 --- a/src/Domain/Scraping/Domain/Model/Manga.php +++ b/src/Domain/Scraping/Domain/Model/Manga.php @@ -4,6 +4,9 @@ namespace App\Domain\Scraping\Domain\Model; class Manga { + /** + * @param string[] $preferredSources + */ public function __construct( private readonly string $id, private readonly string $title, @@ -11,6 +14,7 @@ class Manga private readonly string $description, private readonly string $author, private readonly string $publicationYear, + private readonly array $preferredSources = [], ) { } @@ -43,4 +47,20 @@ class Manga { return $this->publicationYear; } + + /** + * @return string[] + */ + public function getPreferredSources(): array + { + return $this->preferredSources; + } + + /** + * @return bool + */ + public function hasPreferredSources(): bool + { + return !empty($this->preferredSources); + } } diff --git a/src/Domain/Scraping/Infrastructure/ApiPlatform/Dto/ScrapeChapterRequest.php b/src/Domain/Scraping/Infrastructure/ApiPlatform/Dto/ScrapeChapterRequest.php index c9d28dd..7359c35 100644 --- a/src/Domain/Scraping/Infrastructure/ApiPlatform/Dto/ScrapeChapterRequest.php +++ b/src/Domain/Scraping/Infrastructure/ApiPlatform/Dto/ScrapeChapterRequest.php @@ -21,15 +21,9 @@ use Symfony\Component\Validator\Constraints as Assert; readonly class ScrapeChapterRequest { public function __construct( - #[ApiProperty(description: 'ID du manga')] + #[ApiProperty(description: 'ID du chapitre à scraper')] #[Assert\NotBlank] - public string $mangaId, - #[ApiProperty(description: 'Numéro du chapitre')] - #[Assert\NotBlank] - public string $chapterNumber, - #[ApiProperty(description: 'ID de la source')] - #[Assert\NotBlank] - public string $sourceId, + public string $chapterId, ) { } } diff --git a/src/Domain/Scraping/Infrastructure/ApiPlatform/State/Processor/ScrapeChapterStateProcessor.php b/src/Domain/Scraping/Infrastructure/ApiPlatform/State/Processor/ScrapeChapterStateProcessor.php index 368588e..603ba6c 100644 --- a/src/Domain/Scraping/Infrastructure/ApiPlatform/State/Processor/ScrapeChapterStateProcessor.php +++ b/src/Domain/Scraping/Infrastructure/ApiPlatform/State/Processor/ScrapeChapterStateProcessor.php @@ -22,9 +22,7 @@ final class ScrapeChapterStateProcessor implements ProcessorInterface { $this->commandBus->dispatch( new ScrapeChapter( - $data->mangaId, - $data->chapterNumber, - $data->sourceId + $data->chapterId ) ); } diff --git a/src/Domain/Scraping/Infrastructure/Persistence/LegacyChapterRepository.php b/src/Domain/Scraping/Infrastructure/Persistence/LegacyChapterRepository.php index 3104e03..00c2f5e 100644 --- a/src/Domain/Scraping/Infrastructure/Persistence/LegacyChapterRepository.php +++ b/src/Domain/Scraping/Infrastructure/Persistence/LegacyChapterRepository.php @@ -13,6 +13,26 @@ readonly class LegacyChapterRepository implements ChapterRepositoryInterface private EntityManagerInterface $entityManager, ) {} + /** + * Récupère un chapitre par son identifiant + */ + public function getById(string $id): ?Chapter + { + $chapterEntity = $this->entityManager->getRepository(EntityChapter::class)->find($id); + + if (!$chapterEntity) { + return null; + } + + return new Chapter( + id: $chapterEntity->getId(), + mangaId: $chapterEntity->getManga()->getId(), + chapterNumber: $chapterEntity->getNumber(), + volumeNumber: $chapterEntity->getVolume(), + cbzPath: $chapterEntity->getCbzPath(), + ); + } + /** * @throws ChapterNotFoundException */ @@ -32,6 +52,7 @@ readonly class LegacyChapterRepository implements ChapterRepositoryInterface mangaId: $chapterEntity->getManga()->getId(), chapterNumber: $chapterEntity->getNumber(), volumeNumber: $chapterEntity->getVolume(), + cbzPath: $chapterEntity->getCbzPath(), ); } diff --git a/src/Domain/Scraping/Infrastructure/Persistence/LegacyMangaRepository.php b/src/Domain/Scraping/Infrastructure/Persistence/LegacyMangaRepository.php index 66675f7..1457995 100644 --- a/src/Domain/Scraping/Infrastructure/Persistence/LegacyMangaRepository.php +++ b/src/Domain/Scraping/Infrastructure/Persistence/LegacyMangaRepository.php @@ -19,13 +19,24 @@ readonly class LegacyMangaRepository implements MangaRepositoryInterface /** @var EntityManga|null $mangaEntity */ $mangaEntity = $this->entityManager->getRepository(EntityManga::class)->find($id); - return $mangaEntity ? new Manga( + if (!$mangaEntity) { + return null; + } + + // Récupération des sources préférées + $preferredSourceIds = []; + foreach ($mangaEntity->getPreferredSources() as $source) { + $preferredSourceIds[] = $source->getId(); + } + + return new Manga( $mangaEntity->getId(), $mangaEntity->getTitle(), $mangaEntity->getSlug(), - $mangaEntity->getDescription(), - $mangaEntity->getAuthor(), - $mangaEntity->getPublicationYear(), - ) : null; + $mangaEntity->getDescription() ?? '', + $mangaEntity->getAuthor() ?? '', + $mangaEntity->getPublicationYear() ?? '', + $preferredSourceIds, + ); } } diff --git a/src/Domain/Scraping/Infrastructure/Persistence/LegacySourceRepository.php b/src/Domain/Scraping/Infrastructure/Persistence/LegacySourceRepository.php index bf3132a..791e13a 100644 --- a/src/Domain/Scraping/Infrastructure/Persistence/LegacySourceRepository.php +++ b/src/Domain/Scraping/Infrastructure/Persistence/LegacySourceRepository.php @@ -46,4 +46,43 @@ readonly class LegacySourceRepository implements SourceRepositoryInterface updatedAt: new DateTimeImmutable() ); } + + /** + * @return Source[] + */ + public function getAll(): array + { + /** @var ContentSource[] $sourceEntities */ + $sourceEntities = $this->entityManager->getRepository(ContentSource::class)->findAll(); + + $sources = []; + foreach ($sourceEntities as $sourceEntity) { + $sources[] = $this->convertEntityToModel($sourceEntity); + } + + return $sources; + } + + /** + * Convertit une entité ContentSource en modèle Source + */ + private function convertEntityToModel(ContentSource $source): Source + { + return new Source( + id: new SourceId($source->getId()), + name: $source->getCleanBaseUrl(), + description: 'Legacy Source: ' . $source->getBaseUrl(), + baseUrl: $source->getBaseUrl(), + scrappingParameters: [ + 'imageSelector' => $source->getImageSelector(), + 'nextPageSelector' => $source->getNextPageSelector(), + 'chapterUrlFormat' => $source->getChapterUrlFormat(), + 'scrapingType' => $source->getScrapingType(), + 'chapterSelector' => $source->getChapterSelector() + ], + isActive: true, + createdAt: new DateTimeImmutable(), + updatedAt: new DateTimeImmutable() + ); + } } diff --git a/tests/Domain/Scraping/Adapter/InMemoryChapterRepository.php b/tests/Domain/Scraping/Adapter/InMemoryChapterRepository.php index fb7c87c..cf91f74 100644 --- a/tests/Domain/Scraping/Adapter/InMemoryChapterRepository.php +++ b/tests/Domain/Scraping/Adapter/InMemoryChapterRepository.php @@ -8,6 +8,12 @@ use App\Domain\Scraping\Domain\Model\Chapter; class InMemoryChapterRepository implements ChapterRepositoryInterface { private array $chapters = []; + + public function getById(string $id): ?Chapter + { + return $this->chapters[$id] ?? null; + } + public function getByMangaIdAndChapterNumber(string $mangaId, int $chapterNumber): Chapter { foreach ($this->chapters as $chapter) { diff --git a/tests/Domain/Scraping/Adapter/InMemoryMangaRepository.php b/tests/Domain/Scraping/Adapter/InMemoryMangaRepository.php index 95384fc..62ec6c2 100644 --- a/tests/Domain/Scraping/Adapter/InMemoryMangaRepository.php +++ b/tests/Domain/Scraping/Adapter/InMemoryMangaRepository.php @@ -16,19 +16,27 @@ class InMemoryMangaRepository implements MangaRepositoryInterface 'test-manga', 'Test Manga', 'test-manga', - '2024', + 'A test manga description', 'Test Author', - 'A test manga description' + '2024', + [] // Pas de sources préférées par défaut + ); + + // Ajoute un manga avec des sources préférées pour les tests + $this->mangas['test-manga-with-sources'] = new Manga( + 'test-manga-with-sources', + 'Test Manga With Sources', + 'test-manga-with-sources', + 'A test manga with preferred sources', + 'Test Author', + '2024', + ['test-source'] // Une source préférée ); } - public function getById(string $id): Manga + public function getById(string $id): ?Manga { - if (!isset($this->mangas[$id])) { - throw new \RuntimeException('Manga not found'); - } - - return $this->mangas[$id]; + return $this->mangas[$id] ?? null; } public function save(Manga $manga): void diff --git a/tests/Domain/Scraping/Adapter/InMemorySourceRepository.php b/tests/Domain/Scraping/Adapter/InMemorySourceRepository.php index cf55dae..6c4156a 100644 --- a/tests/Domain/Scraping/Adapter/InMemorySourceRepository.php +++ b/tests/Domain/Scraping/Adapter/InMemorySourceRepository.php @@ -22,7 +22,9 @@ class InMemorySourceRepository implements SourceRepositoryInterface [ 'imageSelector' => 'img.manga-image', 'nextPageSelector' => null, - 'chapterUrlFormat' => 'https://example.com/manga/{slug}/chapter-{chapterNumber}' + 'chapterUrlFormat' => 'https://example.com/manga/{slug}/chapter-{chapterNumber}', + 'scrapingType' => 'html', + 'chapterSelector' => '.chapter-item' ], true, new DateTimeImmutable(), @@ -30,13 +32,17 @@ class InMemorySourceRepository implements SourceRepositoryInterface ); } - public function getById(string $id): Source + public function getById(string $id): ?Source { - if (!isset($this->sources[$id])) { - throw new \RuntimeException('Source not found'); - } + return $this->sources[$id] ?? null; + } - return $this->sources[$id]; + /** + * @return Source[] + */ + public function getAll(): array + { + return array_values($this->sources); } public function save(Source $source): void diff --git a/tests/Domain/Scraping/Application/CommandHandler/ScrapeChapterHandlerTest.php b/tests/Domain/Scraping/Application/CommandHandler/ScrapeChapterHandlerTest.php index 010d367..28acfb6 100644 --- a/tests/Domain/Scraping/Application/CommandHandler/ScrapeChapterHandlerTest.php +++ b/tests/Domain/Scraping/Application/CommandHandler/ScrapeChapterHandlerTest.php @@ -74,9 +74,7 @@ class ScrapeChapterHandlerTest extends TestCase public function testHandleSuccessfully(): void { $command = new ScrapeChapter( - mangaId: 'test-manga', - chapterNumber: '2', - sourceId: 'test-source' + chapterId: '1' ); $this->handler->handle($command); @@ -90,16 +88,14 @@ class ScrapeChapterHandlerTest extends TestCase $this->assertInstanceOf(ChapterScraped::class, $dispatchedMessages[0]); $this->assertEquals($job->id, $dispatchedMessages[0]->getJobId()); - $chapter = $this->chapterRepository->getByMangaIdAndChapterNumber('test-manga', 2); + $chapter = $this->chapterRepository->getById('1'); $this->assertNotNull($chapter->cbzPath); } public function testHandleThrowsException(): void { $command = new ScrapeChapter( - mangaId: 'test-manga', - chapterNumber: '2', - sourceId: 'test-source' + chapterId: '1' ); $exception = new \Exception('Scraping failed'); diff --git a/tests/Feature/Scraping/ScrapeChapterTest.php b/tests/Feature/Scraping/ScrapeChapterTest.php index 07a890c..3f2e884 100644 --- a/tests/Feature/Scraping/ScrapeChapterTest.php +++ b/tests/Feature/Scraping/ScrapeChapterTest.php @@ -23,9 +23,7 @@ class ScrapeChapterTest extends AbstractApiTestCase { // Given $payload = [ - 'chapterNumber' => 'chapter-123', - 'sourceId' => 'source-456', - 'mangaId' => 'manga-789', + 'chapterId' => 'chapter-123', ]; // When @@ -43,15 +41,14 @@ class ScrapeChapterTest extends AbstractApiTestCase /** @var ScrapeChapter $message */ $message = $messages[0]; $this->assertInstanceOf(ScrapeChapter::class, $message); + $this->assertEquals('chapter-123', $message->chapterId); } public function testInitiateChapterScrapingWithInvalidPayload(): void { // Given $payload = [ - 'chapterNumber' => '', - 'sourceId' => 'source-456', - 'mangaId' => 'manga-789', + 'chapterId' => '', ]; // When @@ -65,7 +62,7 @@ class ScrapeChapterTest extends AbstractApiTestCase $this->assertJsonContains([ 'violations' => [ [ - 'propertyPath' => 'chapterNumber', + 'propertyPath' => 'chapterId', 'message' => 'This value should not be blank.', ], ],