Files
Mangarr/src/Domain/Scraping/Application/CommandHandler/ScrapeChapterHandler.php

185 lines
7.1 KiB
PHP

<?php
namespace App\Domain\Scraping\Application\CommandHandler;
use App\Domain\Scraping\Application\Command\ScrapeChapter;
use App\Domain\Scraping\Domain\Contract\Repository\ChapterRepositoryInterface;
use App\Domain\Scraping\Domain\Contract\Repository\MangaRepositoryInterface;
use App\Domain\Scraping\Domain\Contract\Repository\SourceRepositoryInterface;
use App\Domain\Scraping\Domain\Contract\Service\CbzGeneratorInterface;
use App\Domain\Scraping\Domain\Contract\Service\ImageDownloaderInterface;
use App\Domain\Scraping\Domain\Contract\Service\ScraperInterface;
use App\Domain\Scraping\Domain\Event\ChapterScraped;
use App\Domain\Scraping\Domain\Event\ChapterScrapingFailed;
use App\Domain\Scraping\Domain\Model\ScrapingJob;
use App\Domain\Scraping\Domain\Model\Source;
use App\Domain\Scraping\Domain\Model\ValueObject\CbzGenerationRequest;
use App\Domain\Scraping\Domain\Model\ValueObject\ScrapingRequest;
use App\Domain\Scraping\Domain\Model\ValueObject\TempDirectory;
use App\Domain\Shared\Domain\Contract\JobRepositoryInterface;
use Ramsey\Uuid\Uuid;
use Symfony\Component\Messenger\MessageBusInterface;
use Doctrine\ORM\EntityManagerInterface;
readonly class ScrapeChapterHandler
{
public function __construct(
private ScraperInterface $scraper,
private ImageDownloaderInterface $imageDownloader,
private CbzGeneratorInterface $cbzGenerator,
private JobRepositoryInterface $jobRepository,
private ChapterRepositoryInterface $chapterRepository,
private MangaRepositoryInterface $mangaRepository,
private SourceRepositoryInterface $sourceRepository,
private MessageBusInterface $eventBus,
private EntityManagerInterface $entityManager
) {
}
public function handle(ScrapeChapter $command): void
{
$job = null;
try {
// 1. Récupération du chapitre
$chapter = $this->chapterRepository->getById($command->chapterId);
if (!$chapter) {
throw new \InvalidArgumentException("Chapter not found with ID: {$command->chapterId}");
}
// 2. Récupération du manga
$manga = $this->mangaRepository->getById($chapter->mangaId);
if (!$manga) {
throw new \InvalidArgumentException("Manga not found with ID: {$chapter->mangaId}");
}
// 3. Détermination des sources à utiliser
$sources = $this->getSourcesToTry($manga);
if (empty($sources)) {
throw new \InvalidArgumentException("No sources available for scraping");
}
// 4. Essai de scraping sur chaque source jusqu'à succès
$success = false;
$lastException = null;
foreach ($sources as $source) {
$job = new ScrapingJob(
Uuid::uuid4()->toString(),
$chapter->mangaId,
$chapter->chapterNumber,
$source->getId()->getValue()
);
// Ajout de l'ID du chapitre dans le contexte du job
$job->context['chapterId'] = $command->chapterId;
$job->start();
$this->jobRepository->save($job);
try {
$this->entityManager->beginTransaction();
// 5. Scraping des URLs
$scrapingRequest = new ScrapingRequest(
'html',
$source->buildChapterUrl($manga->getSlug(), $chapter->chapterNumber),
$source->getScrappingParameters()
);
$scrapingResult = $this->scraper->scrape($scrapingRequest);
// 6. Téléchargement des images
$tempDir = new TempDirectory();
$downloadResults = $this->imageDownloader->downloadBatch(
$scrapingResult->getImageUrls(),
$tempDir,
$job->id
);
// 7. Génération du CBZ
$cbzRequest = new CbzGenerationRequest(
$manga->getTitle(),
$manga->getPublicationYear(),
$chapter->volumeNumber,
$chapter->chapterNumber,
$tempDir,
array_map(fn($r) => $r->getLocalPath(), $downloadResults)
);
$cbzPath = $this->cbzGenerator->generate($cbzRequest);
// 8. Mise à jour et sauvegarde
$chapter->cbzPath = $cbzPath->getPath();
$this->chapterRepository->save($chapter);
$job->complete();
$this->jobRepository->save($job);
$this->entityManager->commit();
$this->eventBus->dispatch(new ChapterScraped($job->id));
// 9. Nettoyage
$tempDir->cleanup();
// Scraping réussi, pas besoin d'essayer d'autres sources
$success = true;
break;
} catch (\Exception $e) {
$this->entityManager->rollback();
if (isset($job)) {
$job->fail($e->getMessage());
$this->jobRepository->save($job);
}
$lastException = $e;
// Continuer avec la source suivante
}
}
// Si toutes les sources ont échoué
if (!$success) {
$errorMessage = $lastException ? $lastException->getMessage() : "Failed to scrape chapter from all available sources";
$this->eventBus->dispatch(new ChapterScrapingFailed($chapter->mangaId, $chapter->chapterNumber, $errorMessage));
}
} catch (\Exception $e) {
if (isset($job)) {
$job->fail($e->getMessage());
$this->jobRepository->save($job);
}
$this->eventBus->dispatch(new ChapterScrapingFailed($chapter->mangaId ?? 'unknown', $chapter->chapterNumber ?? 'unknown', $e->getMessage()));
}
}
/**
* Détermine les sources à utiliser pour le scraping en fonction des préférences du manga
*
* @param \App\Domain\Scraping\Domain\Model\Manga $manga
* @return Source[]
*/
private function getSourcesToTry(\App\Domain\Scraping\Domain\Model\Manga $manga): array
{
// Si le manga a des sources préférées, les utiliser
if ($manga->hasPreferredSources()) {
$preferredSources = [];
foreach ($manga->getPreferredSources() as $sourceId) {
$source = $this->sourceRepository->getById($sourceId);
if ($source) {
$preferredSources[] = $source;
}
}
if (!empty($preferredSources)) {
return $preferredSources;
}
}
// Sinon, utiliser toutes les sources disponibles
return $this->sourceRepository->getAll();
}
}