feat: ajout de la gestion des slugs alternatifs pour le scraping des chapitres, mise à jour du service de scraping pour essayer plusieurs slugs, et amélioration de la configuration des services pour le dépôt de chapitres et le service de fichiers.

2025-07-03 18:41:13 +02:00
parent 9255509042
commit a6ca8a2c9a
4 changed files with 94 additions and 59 deletions
--- a/src/Domain/Scraping/Application/CommandHandler/ScrapeChapterHandler.php
+++ b/src/Domain/Scraping/Application/CommandHandler/ScrapeChapterHandler.php
@@ -65,80 +65,91 @@ readonly class ScrapeChapterHandler
            $lastException = null;

            foreach ($sources as $source) {
-                $job = new ScrapingJob(
-                    Uuid::uuid4()->toString(),
-                    $chapter->mangaId,
-                    $chapter->chapterNumber,
-                    $source->getId()->getValue()
-                );
+                // Préparer la liste des slugs à essayer : slug principal + slugs alternatifs
+                $slugsToTry = array_merge([$manga->getSlug()], $manga->getAlternativeSlugs());

-                // Ajout de l'ID du chapitre dans le contexte du job
-                $job->context['chapterId'] = $command->chapterId;
-
-                $job->start();
-                $this->jobRepository->save($job);
-
-                try {
-                    $this->entityManager->beginTransaction();
-
-                    // 5. Scraping des URLs
-                    $scrapingRequest = new ScrapingRequest(
-                        'html',
-                        $source->buildChapterUrl($manga->getSlug(), $chapter->chapterNumber),
-                        $source->getScrappingParameters()
-                    );
-
-                    $scrapingResult = $this->scraper->scrape($scrapingRequest);
-
-                    // 6. Téléchargement des images
-                    $tempDir = new TempDirectory();
-                    $downloadResults = $this->imageDownloader->downloadBatch(
-                        $scrapingResult->getImageUrls(),
-                        $tempDir,
-                        $job->id
-                    );
-
-                    // 7. Génération du CBZ
-                    $cbzRequest = new CbzGenerationRequest(
-                        $manga->getTitle(),
-                        $manga->getPublicationYear(),
-                        $chapter->volumeNumber,
+                foreach ($slugsToTry as $slug) {
+                    $job = new ScrapingJob(
+                        Uuid::uuid4()->toString(),
+                        $chapter->mangaId,
                        $chapter->chapterNumber,
-                        $tempDir,
-                        array_map(fn($r) => $r->getLocalPath(), $downloadResults)
+                        $source->getId()->getValue()
                    );

-                    $cbzPath = $this->cbzGenerator->generate($cbzRequest);
+                    // Ajout de l'ID du chapitre et du slug dans le contexte du job
+                    $job->context['chapterId'] = $command->chapterId;
+                    $job->context['slug'] = $slug;

-                    // 8. Mise à jour et sauvegarde
-                    $chapter->cbzPath = $cbzPath->getPath();
-                    $this->chapterRepository->save($chapter);
-
-                    $job->complete();
+                    $job->start();
                    $this->jobRepository->save($job);

-                    $this->entityManager->commit();
+                    try {
+                        $this->entityManager->beginTransaction();

-                    $this->eventBus->dispatch(new ChapterScraped($job->id));
+                        // 5. Scraping des URLs avec le slug courant
+                        $scrapingRequest = new ScrapingRequest(
+                            'html',
+                            $source->buildChapterUrl($slug, $chapter->chapterNumber),
+                            $source->getScrappingParameters()
+                        );

-                    // 9. Nettoyage
-                    $tempDir->cleanup();
+                        $scrapingResult = $this->scraper->scrape($scrapingRequest);

-                    // Scraping réussi, pas besoin d'essayer d'autres sources
-                    $success = true;
-                    break;
+                        // 6. Téléchargement des images
+                        $tempDir = new TempDirectory();
+                        $downloadResults = $this->imageDownloader->downloadBatch(
+                            $scrapingResult->getImageUrls(),
+                            $tempDir,
+                            $job->id
+                        );

-                } catch (\Exception $e) {
-                    $this->entityManager->rollback();
+                        // 7. Génération du CBZ
+                        $cbzRequest = new CbzGenerationRequest(
+                            $manga->getTitle(),
+                            $manga->getPublicationYear(),
+                            $chapter->volumeNumber,
+                            $chapter->chapterNumber,
+                            $tempDir,
+                            array_map(fn($r) => $r->getLocalPath(), $downloadResults)
+                        );

-                    if (isset($job)) {
-                        $job->fail($e->getMessage());
+                        $cbzPath = $this->cbzGenerator->generate($cbzRequest);
+
+                        // 8. Mise à jour et sauvegarde
+                        $chapter->cbzPath = $cbzPath->getPath();
+                        $this->chapterRepository->save($chapter);
+
+                        $job->complete();
                        $this->jobRepository->save($job);
+
+                        $this->entityManager->commit();
+
+                        $this->eventBus->dispatch(new ChapterScraped($job->id));
+
+                        // 9. Nettoyage
+                        $tempDir->cleanup();
+
+                        // Scraping réussi, pas besoin d'essayer d'autres slugs ni d'autres sources
+                        $success = true;
+                        break;
+
+                    } catch (\Exception $e) {
+                        $this->entityManager->rollback();
+
+                        if (isset($job)) {
+                            $job->fail($e->getMessage());
+                            $this->jobRepository->save($job);
+                        }
+
+                        $lastException = $e;
+
+                        // Continuer avec le slug suivant pour cette source
                    }
+                }

-                    $lastException = $e;
-
-                    // Continuer avec la source suivante
+                // Si le scraping a réussi avec un des slugs, sortir de la boucle des sources
+                if ($success) {
+                    break;
                }
            }

--- a/src/Domain/Scraping/Domain/Model/Manga.php
+++ b/src/Domain/Scraping/Domain/Model/Manga.php
@@ -6,6 +6,7 @@ class Manga
 {
    /**
     * @param string[] $preferredSources
+     * @param string[] $alternativeSlugs
     */
    public function __construct(
        private readonly string $id,
@@ -15,6 +16,7 @@ class Manga
        private readonly string $author,
        private readonly string $publicationYear,
        private readonly array $preferredSources = [],
+        private readonly array $alternativeSlugs = [],
    ) {
    }

@@ -63,4 +65,12 @@ class Manga
    {
        return !empty($this->preferredSources);
    }
+
+    /**
+     * @return string[]
+     */
+    public function getAlternativeSlugs(): array
+    {
+        return $this->alternativeSlugs;
+    }
 }
--- a/src/Domain/Scraping/Infrastructure/Persistence/LegacyMangaRepository.php
+++ b/src/Domain/Scraping/Infrastructure/Persistence/LegacyMangaRepository.php
@@ -42,6 +42,7 @@ readonly class LegacyMangaRepository implements MangaRepositoryInterface
            $mangaEntity->getAuthor() ?? '',
            (string) ($mangaEntity->getPublicationYear() ?? ''),
            $preferredSourceIds,
+            $mangaEntity->getAlternativeSlugs() ?? []
        );
    }