185 lines
7.1 KiB
PHP
185 lines
7.1 KiB
PHP
<?php
|
|
|
|
namespace App\Domain\Scraping\Application\CommandHandler;
|
|
|
|
use App\Domain\Scraping\Application\Command\ScrapeChapter;
|
|
use App\Domain\Scraping\Domain\Contract\Repository\ChapterRepositoryInterface;
|
|
use App\Domain\Scraping\Domain\Contract\Repository\MangaRepositoryInterface;
|
|
use App\Domain\Scraping\Domain\Contract\Repository\SourceRepositoryInterface;
|
|
use App\Domain\Scraping\Domain\Contract\Service\CbzGeneratorInterface;
|
|
use App\Domain\Scraping\Domain\Contract\Service\ImageDownloaderInterface;
|
|
use App\Domain\Scraping\Domain\Contract\Service\ScraperInterface;
|
|
use App\Domain\Scraping\Domain\Event\ChapterScraped;
|
|
use App\Domain\Scraping\Domain\Event\ChapterScrapingFailed;
|
|
use App\Domain\Scraping\Domain\Model\ScrapingJob;
|
|
use App\Domain\Scraping\Domain\Model\Source;
|
|
use App\Domain\Scraping\Domain\Model\ValueObject\CbzGenerationRequest;
|
|
use App\Domain\Scraping\Domain\Model\ValueObject\ScrapingRequest;
|
|
use App\Domain\Scraping\Domain\Model\ValueObject\TempDirectory;
|
|
use App\Domain\Shared\Domain\Contract\JobRepositoryInterface;
|
|
use Ramsey\Uuid\Uuid;
|
|
use Symfony\Component\Messenger\MessageBusInterface;
|
|
use Doctrine\ORM\EntityManagerInterface;
|
|
|
|
readonly class ScrapeChapterHandler
|
|
{
|
|
public function __construct(
|
|
private ScraperInterface $scraper,
|
|
private ImageDownloaderInterface $imageDownloader,
|
|
private CbzGeneratorInterface $cbzGenerator,
|
|
private JobRepositoryInterface $jobRepository,
|
|
private ChapterRepositoryInterface $chapterRepository,
|
|
private MangaRepositoryInterface $mangaRepository,
|
|
private SourceRepositoryInterface $sourceRepository,
|
|
private MessageBusInterface $eventBus,
|
|
private EntityManagerInterface $entityManager
|
|
) {
|
|
}
|
|
|
|
public function handle(ScrapeChapter $command): void
|
|
{
|
|
$job = null;
|
|
try {
|
|
// 1. Récupération du chapitre
|
|
$chapter = $this->chapterRepository->getById($command->chapterId);
|
|
if (!$chapter) {
|
|
throw new \InvalidArgumentException("Chapter not found with ID: {$command->chapterId}");
|
|
}
|
|
|
|
// 2. Récupération du manga
|
|
$manga = $this->mangaRepository->getById($chapter->mangaId);
|
|
if (!$manga) {
|
|
throw new \InvalidArgumentException("Manga not found with ID: {$chapter->mangaId}");
|
|
}
|
|
|
|
// 3. Détermination des sources à utiliser
|
|
$sources = $this->getSourcesToTry($manga);
|
|
if (empty($sources)) {
|
|
throw new \InvalidArgumentException("No sources available for scraping");
|
|
}
|
|
|
|
// 4. Essai de scraping sur chaque source jusqu'à succès
|
|
$success = false;
|
|
$lastException = null;
|
|
|
|
foreach ($sources as $source) {
|
|
$job = new ScrapingJob(
|
|
Uuid::uuid4()->toString(),
|
|
$chapter->mangaId,
|
|
$chapter->chapterNumber,
|
|
$source->getId()->getValue()
|
|
);
|
|
|
|
// Ajout de l'ID du chapitre dans le contexte du job
|
|
$job->context['chapterId'] = $command->chapterId;
|
|
|
|
$job->start();
|
|
$this->jobRepository->save($job);
|
|
|
|
try {
|
|
$this->entityManager->beginTransaction();
|
|
|
|
// 5. Scraping des URLs
|
|
$scrapingRequest = new ScrapingRequest(
|
|
'html',
|
|
$source->buildChapterUrl($manga->getSlug(), $chapter->chapterNumber),
|
|
$source->getScrappingParameters()
|
|
);
|
|
|
|
$scrapingResult = $this->scraper->scrape($scrapingRequest);
|
|
|
|
// 6. Téléchargement des images
|
|
$tempDir = new TempDirectory();
|
|
$downloadResults = $this->imageDownloader->downloadBatch(
|
|
$scrapingResult->getImageUrls(),
|
|
$tempDir,
|
|
$job->id
|
|
);
|
|
|
|
// 7. Génération du CBZ
|
|
$cbzRequest = new CbzGenerationRequest(
|
|
$manga->getTitle(),
|
|
$manga->getPublicationYear(),
|
|
$chapter->volumeNumber,
|
|
$chapter->chapterNumber,
|
|
$tempDir,
|
|
array_map(fn($r) => $r->getLocalPath(), $downloadResults)
|
|
);
|
|
|
|
$cbzPath = $this->cbzGenerator->generate($cbzRequest);
|
|
|
|
// 8. Mise à jour et sauvegarde
|
|
$chapter->cbzPath = $cbzPath->getPath();
|
|
$this->chapterRepository->save($chapter);
|
|
|
|
$job->complete();
|
|
$this->jobRepository->save($job);
|
|
|
|
$this->entityManager->commit();
|
|
|
|
$this->eventBus->dispatch(new ChapterScraped($job->id));
|
|
|
|
// 9. Nettoyage
|
|
$tempDir->cleanup();
|
|
|
|
// Scraping réussi, pas besoin d'essayer d'autres sources
|
|
$success = true;
|
|
break;
|
|
|
|
} catch (\Exception $e) {
|
|
$this->entityManager->rollback();
|
|
|
|
if (isset($job)) {
|
|
$job->fail($e->getMessage());
|
|
$this->jobRepository->save($job);
|
|
}
|
|
|
|
$lastException = $e;
|
|
|
|
// Continuer avec la source suivante
|
|
}
|
|
}
|
|
|
|
// Si toutes les sources ont échoué
|
|
if (!$success) {
|
|
$errorMessage = $lastException ? $lastException->getMessage() : "Failed to scrape chapter from all available sources";
|
|
$this->eventBus->dispatch(new ChapterScrapingFailed($chapter->mangaId, $chapter->chapterNumber, $errorMessage));
|
|
}
|
|
|
|
} catch (\Exception $e) {
|
|
if (isset($job)) {
|
|
$job->fail($e->getMessage());
|
|
$this->jobRepository->save($job);
|
|
}
|
|
$this->eventBus->dispatch(new ChapterScrapingFailed($chapter->mangaId ?? 'unknown', $chapter->chapterNumber ?? 'unknown', $e->getMessage()));
|
|
}
|
|
}
|
|
|
|
/**
|
|
* Détermine les sources à utiliser pour le scraping en fonction des préférences du manga
|
|
*
|
|
* @param \App\Domain\Scraping\Domain\Model\Manga $manga
|
|
* @return Source[]
|
|
*/
|
|
private function getSourcesToTry(\App\Domain\Scraping\Domain\Model\Manga $manga): array
|
|
{
|
|
// Si le manga a des sources préférées, les utiliser
|
|
if ($manga->hasPreferredSources()) {
|
|
$preferredSources = [];
|
|
foreach ($manga->getPreferredSources() as $sourceId) {
|
|
$source = $this->sourceRepository->getById($sourceId);
|
|
if ($source) {
|
|
$preferredSources[] = $source;
|
|
}
|
|
}
|
|
|
|
if (!empty($preferredSources)) {
|
|
return $preferredSources;
|
|
}
|
|
}
|
|
|
|
// Sinon, utiliser toutes les sources disponibles
|
|
return $this->sourceRepository->getAll();
|
|
}
|
|
}
|