refactor(scraping): job PENDING dès le POST HTTP, handler sans Doctrine

- ScrapingJob: mangaId/chapterNumber/sourceId optionnels (nullable) pour permettre la création en PENDING sans lookup DB dans le StateProcessor - ScrapeChapter: ajoute jobId (pré-généré par le StateProcessor) - ScrapeChapterStateProcessor: crée et persiste le job PENDING avant dispatch; injecte JobRepositoryInterface uniquement - ScrapeChapterHandler: supprime EntityManagerInterface, beginTransaction/ commit/rollback; charge le job existant via jobId, complete() sur succès seulement, fail() si toutes les sources échouent - ScrapeChapterHandlerTest: pré-crée le job, passe jobId dans la commande, supprime le mock EntityManagerInterface - ScrapeChapterTest: accès aux messages via static InMemoryMessageBus, vérifie la présence du jobId dans la commande dispatchée
2026-03-17 15:33:20 +01:00
parent ec4a8be934
commit fa035bfbfa
10 changed files with 252 additions and 356 deletions
--- a/src/Domain/Scraping/Application/Command/ScrapeChapter.php
+++ b/src/Domain/Scraping/Application/Command/ScrapeChapter.php
@@ -5,7 +5,8 @@ namespace App\Domain\Scraping\Application\Command;
 readonly class ScrapeChapter
 {
    public function __construct(
-        public string $chapterId
+        public string $chapterId,
+        public string $jobId
    ) {
    }
 }
--- a/src/Domain/Scraping/Application/CommandHandler/ScrapeChapterHandler.php
+++ b/src/Domain/Scraping/Application/CommandHandler/ScrapeChapterHandler.php
@@ -13,14 +13,11 @@ use App\Domain\Shared\Domain\Event\ChapterScraped;
 use App\Domain\Scraping\Domain\Event\ChapterScrapingFailed;
 use App\Domain\Scraping\Domain\Event\ChapterScrapingStarted;
 use App\Domain\Scraping\Domain\Model\Chapter;
-use App\Domain\Scraping\Domain\Model\ScrapingJob;
 use App\Domain\Scraping\Domain\Model\Source;
 use App\Domain\Scraping\Domain\Model\ValueObject\ScrapingRequest;
 use App\Domain\Scraping\Domain\Model\ValueObject\TempDirectory;
 use App\Domain\Shared\Domain\Contract\JobRepositoryInterface;
-use Ramsey\Uuid\Uuid;
 use Symfony\Component\Messenger\MessageBusInterface;
-use Doctrine\ORM\EntityManagerInterface;

 readonly class ScrapeChapterHandler
 {
@@ -33,151 +30,92 @@ readonly class ScrapeChapterHandler
        private MangaRepositoryInterface $mangaRepository,
        private SourceRepositoryInterface $sourceRepository,
        private MessageBusInterface $eventBus,
-        private EntityManagerInterface $entityManager
    ) {
    }

    public function handle(ScrapeChapter $command): void
    {
-        $job = null;
-        try {
-            // 1. Récupération du chapitre
-            /**@var Chapter $chapter */
-            $chapter = $this->chapterRepository->getById($command->chapterId);
-            if (!$chapter) {
-                throw new \InvalidArgumentException("Chapter not found with ID: {$command->chapterId}");
-            }
+        /** @var Chapter $chapter */
+        $chapter = $this->chapterRepository->getById($command->chapterId);
+        $manga = $this->mangaRepository->getById($chapter->mangaId);

-            // 2. Récupération du manga
-            $manga = $this->mangaRepository->getById($chapter->mangaId);
-            if (!$manga) {
-                throw new \InvalidArgumentException("Manga not found with ID: {$chapter->mangaId}");
-            }
+        $job = $this->jobRepository->get($command->jobId);
+        $job->context['chapterId'] = $command->chapterId;
+        $job->context['mangaTitle'] = $manga->getTitle();
+        $job->start();
+        $this->jobRepository->save($job);

-            // 3. Dispatch de l'événement de démarrage
-            $this->eventBus->dispatch(new ChapterScrapingStarted($manga->getTitle(), $chapter->chapterNumber));
+        $this->eventBus->dispatch(new ChapterScrapingStarted($manga->getTitle(), $chapter->chapterNumber));

-            // 4. Détermination des sources à utiliser
-            $sources = $this->getSourcesToTry($manga);
-            if (empty($sources)) {
-                throw new \InvalidArgumentException("No sources available for scraping");
-            }
+        $sources = $this->getSourcesToTry($manga);
+        $slugsToTry = array_merge([$manga->getSlug()], $manga->getAlternativeSlugs());
+        $success = false;
+        $lastException = null;

-            // 5. Essai de scraping sur chaque source jusqu'à succès
-            $success = false;
-            $lastException = null;
-
-            foreach ($sources as $source) {
-                // Préparer la liste des slugs à essayer : slug principal + slugs alternatifs
-                $slugsToTry = array_merge([$manga->getSlug()], $manga->getAlternativeSlugs());
-
-                foreach ($slugsToTry as $slug) {
-                    $job = new ScrapingJob(
-                        Uuid::uuid4()->toString(),
-                        $chapter->mangaId,
-                        $chapter->chapterNumber,
-                        $source->getId()->getValue()
-                    );
-
-                    // Ajout de l'ID du chapitre et du slug dans le contexte du job
-                    $job->context['chapterId'] = $command->chapterId;
+        foreach ($sources as $source) {
+            foreach ($slugsToTry as $slug) {
+                try {
+                    $job->context['sourceId'] = $source->getId()->getValue();
                    $job->context['slug'] = $slug;
-                    $job->context['mangaTitle'] = $manga->getTitle();
-
-                    $job->start();
                    $this->jobRepository->save($job);

-                    try {
-                        $this->entityManager->beginTransaction();
+                    $scrapingParameters = $source->getScrappingParameters();
+                    $scrapingParameters['chapterNumber'] = $chapter->chapterNumber;
+                    $scrapingType = $scrapingParameters['scrapingType'] ?? 'html';

-                        // 5. Scraping des URLs avec le slug courant
-                        $scrapingParameters = $source->getScrappingParameters();
-                        $scrapingParameters['chapterNumber'] = $chapter->chapterNumber;
-                        $scrapingType = $scrapingParameters['scrapingType'] ?? 'html';
+                    $scrapingRequest = new ScrapingRequest(
+                        $scrapingType,
+                        $source->buildChapterUrl($slug, $chapter->chapterNumber),
+                        $scrapingParameters
+                    );

-                        $scrapingRequest = new ScrapingRequest(
-                            $scrapingType,
-                            $source->buildChapterUrl($slug, $chapter->chapterNumber),
-                            $scrapingParameters
-                        );
+                    $scraper = $this->scraperFactory->getScraperWithFallback($scrapingType);
+                    $scrapingResult = $scraper->scrape($scrapingRequest);

-                        // Sélection du scraper approprié selon le type
-                        $scraper = $this->scraperFactory->getScraperWithFallback($scrapingType);
-                        $scrapingResult = $scraper->scrape($scrapingRequest);
+                    $tempDir = new TempDirectory();
+                    $downloadResults = $this->imageDownloader->downloadBatch(
+                        $scrapingResult->getImageUrls(),
+                        $tempDir,
+                        $job->id
+                    );

-                        // 6. Téléchargement des images
-                        $tempDir = new TempDirectory();
-                        $downloadResults = $this->imageDownloader->downloadBatch(
-                            $scrapingResult->getImageUrls(),
-                            $tempDir,
-                            $job->id
-                        );
+                    $localPaths = array_map(fn ($r) => $r->getLocalPath(), $downloadResults);
+                    $pagesDirectory = $this->imageStorage->storeChapterImages($command->chapterId, $localPaths);
+                    $pageCount = count($downloadResults);

-                        // 7. Stockage des images individuelles
-                        $localPaths = array_map(fn ($r) => $r->getLocalPath(), $downloadResults);
-                        $pagesDirectory = $this->imageStorage->storeChapterImages($command->chapterId, $localPaths);
-                        $pageCount = count($downloadResults);
+                    $job->complete();
+                    $this->jobRepository->save($job);

-                        $job->complete();
-                        $this->jobRepository->save($job);
+                    $this->eventBus->dispatch(new ChapterScraped($job->id, $command->chapterId, $pagesDirectory, $pageCount));
+                    $tempDir->cleanup();

-                        $this->entityManager->commit();
-
-                        $this->eventBus->dispatch(new ChapterScraped($job->id, $command->chapterId, $pagesDirectory, $pageCount));
-
-                        // 8. Nettoyage
-                        $tempDir->cleanup();
-
-                        // Scraping réussi, pas besoin d'essayer d'autres slugs ni d'autres sources
-                        $success = true;
-                        break;
-
-                    } catch (\Exception $e) {
-                        dump('EXCEPTION for source ' . $source->getName() . ' with slug ' . $slug . ': ' . $e->getMessage());
-
-                        $this->entityManager->rollback();
-
-                        if (isset($job)) {
-                            $job->fail($e->getMessage());
-                            $this->jobRepository->save($job);
-                        }
-
-                        $lastException = $e;
-
-                        // Continuer avec le slug suivant pour cette source
-                    }
-                }
-
-                // Si le scraping a réussi avec un des slugs, sortir de la boucle des sources
-                if ($success) {
+                    $success = true;
                    break;
+
+                } catch (\Exception $e) {
+                    $lastException = $e;
                }
            }

-            // Si toutes les sources ont échoué
-            if (!$success) {
-                $errorMessage = $lastException ? $lastException->getMessage() : "Failed to scrape chapter from all available sources";
-                $this->eventBus->dispatch(new ChapterScrapingFailed($chapter->mangaId, $chapter->chapterNumber, $errorMessage));
+            if ($success) {
+                break;
            }
+        }

-        } catch (\Exception $e) {
-            if (isset($job)) {
-                $job->fail($e->getMessage());
-                $this->jobRepository->save($job);
-            }
-            $this->eventBus->dispatch(new ChapterScrapingFailed($chapter->mangaId ?? 'unknown', $chapter->chapterNumber ?? 'unknown', $e->getMessage()));
+        if (!$success) {
+            $errorMessage = $lastException?->getMessage() ?? 'Failed to scrape chapter from all available sources';
+            $job->fail($errorMessage);
+            $this->jobRepository->save($job);
+            $this->eventBus->dispatch(new ChapterScrapingFailed($chapter->mangaId, $chapter->chapterNumber, $errorMessage));
        }
    }

    /**
-     * Détermine les sources à utiliser pour le scraping en fonction des préférences du manga
-     *
     * @param \App\Domain\Scraping\Domain\Model\Manga $manga
     * @return Source[]
     */
    private function getSourcesToTry(\App\Domain\Scraping\Domain\Model\Manga $manga): array
    {
-        // Si le manga a des sources préférées, les utiliser
        if ($manga->hasPreferredSources()) {
            $preferredSources = [];
            foreach ($manga->getPreferredSources() as $sourceId) {
@@ -186,7 +124,6 @@ readonly class ScrapeChapterHandler
                    $preferredSources[] = $source;
                }

-                // Limiter à 3 sources préférées maximum
                if (count($preferredSources) >= 3) {
                    break;
                }
@@ -197,7 +134,6 @@ readonly class ScrapeChapterHandler
            }
        }

-        // Sinon, utiliser toutes les sources disponibles
        return $this->sourceRepository->getAll();
    }
 }
--- a/src/Domain/Scraping/Domain/Model/ScrapingJob.php
+++ b/src/Domain/Scraping/Domain/Model/ScrapingJob.php
@@ -8,9 +8,9 @@ class ScrapingJob extends Job
 {
    public function __construct(
        string $id,
-        string $mangaId,
-        float $chapterNumber,
-        string $sourceId
+        ?string $mangaId = null,
+        ?float $chapterNumber = null,
+        ?string $sourceId = null
    ) {
        parent::__construct($id, 'scraping_job');
        $this->maxAttempts = 1;
--- a/src/Domain/Scraping/Infrastructure/ApiPlatform/State/Processor/ScrapeChapterStateProcessor.php
+++ b/src/Domain/Scraping/Infrastructure/ApiPlatform/State/Processor/ScrapeChapterStateProcessor.php
@@ -5,13 +5,17 @@ namespace App\Domain\Scraping\Infrastructure\ApiPlatform\State\Processor;
 use ApiPlatform\Metadata\Operation;
 use ApiPlatform\State\ProcessorInterface;
 use App\Domain\Scraping\Application\Command\ScrapeChapter;
+use App\Domain\Scraping\Domain\Model\ScrapingJob;
 use App\Domain\Scraping\Infrastructure\ApiPlatform\Dto\ScrapeChapterRequest;
+use App\Domain\Shared\Domain\Contract\JobRepositoryInterface;
+use Ramsey\Uuid\Uuid;
 use Symfony\Component\Messenger\MessageBusInterface;

 final class ScrapeChapterStateProcessor implements ProcessorInterface
 {
    public function __construct(
-        private readonly MessageBusInterface $commandBus
+        private readonly MessageBusInterface $commandBus,
+        private readonly JobRepositoryInterface $jobRepository,
    ) {
    }

@@ -20,10 +24,11 @@ final class ScrapeChapterStateProcessor implements ProcessorInterface
     */
    public function process(mixed $data, Operation $operation, array $uriVariables = [], array $context = []): void
    {
-        $this->commandBus->dispatch(
-            new ScrapeChapter(
-                $data->chapterId
-            )
-        );
+        $jobId = Uuid::uuid4()->toString();
+        $job = new ScrapingJob($jobId);
+        $job->context['chapterId'] = $data->chapterId;
+        $this->jobRepository->save($job);
+
+        $this->commandBus->dispatch(new ScrapeChapter($data->chapterId, $jobId));
    }
 }
--- a/src/Domain/Scraping/Infrastructure/EventSubscriber/ScrapingEventSubscriber.php
+++ b/src/Domain/Scraping/Infrastructure/EventSubscriber/ScrapingEventSubscriber.php
@@ -5,6 +5,7 @@ namespace App\Domain\Scraping\Infrastructure\EventSubscriber;
 use App\Domain\Shared\Domain\Event\ChapterScraped;
 use App\Domain\Scraping\Domain\Event\ChapterScrapingFailed;
 use App\Domain\Scraping\Domain\Event\ChapterScrapingStarted;
+use App\Domain\Scraping\Domain\Event\PageScrapingProgressed;
 use App\Domain\Scraping\Domain\Contract\Repository\ChapterRepositoryInterface;
 use App\Domain\Shared\Domain\Contract\JobRepositoryInterface;
 use App\Domain\Shared\Domain\Contract\NotificationInterface;
@@ -30,6 +31,22 @@ class ScrapingEventSubscriber implements EventSubscriberInterface
        return [];
    }

+    #[AsMessageHandler]
+    public function onPageScrapingProgressed(PageScrapingProgressed $event): void
+    {
+        $progress = (int) round($event->getProgress()->getPercentage());
+
+        $update = new Update(
+            'jobs/activity',
+            json_encode([
+                'type'     => 'job.progress_updated',
+                'jobId'    => $event->getJobId(),
+                'progress' => $progress,
+            ])
+        );
+        $this->hub->publish($update);
+    }
+
    #[AsMessageHandler]
    public function onChapterScrapingStarted(ChapterScrapingStarted $event): void
    {