Mangarr/src/Domain/Scraping/Application/CommandHandler/ScrapeChapterHandler.php

<?php

namespace App\Domain\Scraping\Application\CommandHandler;

use App\Domain\Scraping\Application\Command\ScrapeChapter;
use App\Domain\Scraping\Domain\Contract\Repository\ChapterRepositoryInterface;
use App\Domain\Scraping\Domain\Contract\Repository\MangaRepositoryInterface;
use App\Domain\Scraping\Domain\Contract\Repository\SourceRepositoryInterface;
use App\Domain\Shared\Domain\Contract\ImageStorageInterface;
use App\Domain\Scraping\Domain\Contract\Service\ImageDownloaderInterface;
use App\Domain\Scraping\Domain\Contract\Service\ScraperFactoryInterface;
use App\Domain\Shared\Domain\Event\ChapterScraped;
use App\Domain\Scraping\Domain\Event\ChapterScrapingFailed;
use App\Domain\Scraping\Domain\Event\ChapterScrapingStarted;
use App\Domain\Scraping\Domain\Model\Chapter;
use App\Domain\Scraping\Domain\Model\ScrapingJob;
use App\Domain\Scraping\Domain\Model\Source;
use App\Domain\Scraping\Domain\Model\ValueObject\ScrapingRequest;
use App\Domain\Scraping\Domain\Model\ValueObject\TempDirectory;
use App\Domain\Shared\Domain\Contract\JobRepositoryInterface;
use Ramsey\Uuid\Uuid;
use Symfony\Component\Messenger\MessageBusInterface;
use Doctrine\ORM\EntityManagerInterface;

readonly class ScrapeChapterHandler
{
    public function __construct(
        private ScraperFactoryInterface $scraperFactory,
        private ImageDownloaderInterface $imageDownloader,
        private ImageStorageInterface $imageStorage,
        private JobRepositoryInterface $jobRepository,
        private ChapterRepositoryInterface $chapterRepository,
        private MangaRepositoryInterface $mangaRepository,
        private SourceRepositoryInterface $sourceRepository,
        private MessageBusInterface $eventBus,
        private EntityManagerInterface $entityManager
    ) {
    }

    public function handle(ScrapeChapter $command): void
    {
        $job = null;
        try {
            // 1. Récupération du chapitre
            /**@var Chapter $chapter */
            $chapter = $this->chapterRepository->getById($command->chapterId);
            if (!$chapter) {
                throw new \InvalidArgumentException("Chapter not found with ID: {$command->chapterId}");
            }

            // 2. Récupération du manga
            $manga = $this->mangaRepository->getById($chapter->mangaId);
            if (!$manga) {
                throw new \InvalidArgumentException("Manga not found with ID: {$chapter->mangaId}");
            }

            // 3. Dispatch de l'événement de démarrage
            $this->eventBus->dispatch(new ChapterScrapingStarted($manga->getTitle(), $chapter->chapterNumber));

            // 4. Détermination des sources à utiliser
            $sources = $this->getSourcesToTry($manga);
            if (empty($sources)) {
                throw new \InvalidArgumentException("No sources available for scraping");
            }

            // 5. Essai de scraping sur chaque source jusqu'à succès
            $success = false;
            $lastException = null;

            foreach ($sources as $source) {
                // Préparer la liste des slugs à essayer : slug principal + slugs alternatifs
                $slugsToTry = array_merge([$manga->getSlug()], $manga->getAlternativeSlugs());

                foreach ($slugsToTry as $slug) {
                    $job = new ScrapingJob(
                        Uuid::uuid4()->toString(),
                        $chapter->mangaId,
                        $chapter->chapterNumber,
                        $source->getId()->getValue()
                    );

                    // Ajout de l'ID du chapitre et du slug dans le contexte du job
                    $job->context['chapterId'] = $command->chapterId;
                    $job->context['slug'] = $slug;
                    $job->context['mangaTitle'] = $manga->getTitle();

                    $job->start();
                    $this->jobRepository->save($job);

                    try {
                        $this->entityManager->beginTransaction();

                        // 5. Scraping des URLs avec le slug courant
                        $scrapingParameters = $source->getScrappingParameters();
                        $scrapingParameters['chapterNumber'] = $chapter->chapterNumber;
                        $scrapingType = $scrapingParameters['scrapingType'] ?? 'html';

                        $scrapingRequest = new ScrapingRequest(
                            $scrapingType,
                            $source->buildChapterUrl($slug, $chapter->chapterNumber),
                            $scrapingParameters
                        );

                        // Sélection du scraper approprié selon le type
                        $scraper = $this->scraperFactory->getScraperWithFallback($scrapingType);
                        $scrapingResult = $scraper->scrape($scrapingRequest);

                        // 6. Téléchargement des images
                        $tempDir = new TempDirectory();
                        $downloadResults = $this->imageDownloader->downloadBatch(
                            $scrapingResult->getImageUrls(),
                            $tempDir,
                            $job->id
                        );

                        // 7. Stockage des images individuelles
                        $localPaths = array_map(fn ($r) => $r->getLocalPath(), $downloadResults);
                        $pagesDirectory = $this->imageStorage->storeChapterImages($command->chapterId, $localPaths);
                        $pageCount = count($downloadResults);

                        $job->complete();
                        $this->jobRepository->save($job);

                        $this->entityManager->commit();

                        $this->eventBus->dispatch(new ChapterScraped($job->id, $command->chapterId, $pagesDirectory, $pageCount));

                        // 8. Nettoyage
                        $tempDir->cleanup();

                        // Scraping réussi, pas besoin d'essayer d'autres slugs ni d'autres sources
                        $success = true;
                        break;

                    } catch (\Exception $e) {
                        dump('EXCEPTION for source ' . $source->getName() . ' with slug ' . $slug . ': ' . $e->getMessage());

                        $this->entityManager->rollback();

                        if (isset($job)) {
                            $job->fail($e->getMessage());
                            $this->jobRepository->save($job);
                        }

                        $lastException = $e;

                        // Continuer avec le slug suivant pour cette source
                    }
                }

                // Si le scraping a réussi avec un des slugs, sortir de la boucle des sources
                if ($success) {
                    break;
                }
            }

            // Si toutes les sources ont échoué
            if (!$success) {
                $errorMessage = $lastException ? $lastException->getMessage() : "Failed to scrape chapter from all available sources";
                $this->eventBus->dispatch(new ChapterScrapingFailed($chapter->mangaId, $chapter->chapterNumber, $errorMessage));
            }

        } catch (\Exception $e) {
            if (isset($job)) {
                $job->fail($e->getMessage());
                $this->jobRepository->save($job);
            }
            $this->eventBus->dispatch(new ChapterScrapingFailed($chapter->mangaId ?? 'unknown', $chapter->chapterNumber ?? 'unknown', $e->getMessage()));
        }
    }

    /**
     * Détermine les sources à utiliser pour le scraping en fonction des préférences du manga
     *
     * @param \App\Domain\Scraping\Domain\Model\Manga $manga
     * @return Source[]
     */
    private function getSourcesToTry(\App\Domain\Scraping\Domain\Model\Manga $manga): array
    {
        // Si le manga a des sources préférées, les utiliser
        if ($manga->hasPreferredSources()) {
            $preferredSources = [];
            foreach ($manga->getPreferredSources() as $sourceId) {
                $source = $this->sourceRepository->getById($sourceId);
                if ($source) {
                    $preferredSources[] = $source;
                }

                // Limiter à 3 sources préférées maximum
                if (count($preferredSources) >= 3) {
                    break;
                }
            }

            if (!empty($preferredSources)) {
                return $preferredSources;
            }
        }

        // Sinon, utiliser toutes les sources disponibles
        return $this->sourceRepository->getAll();
    }
}