- Ajoute jobId dans ChapterScrapingStarted et ChapterScrapingFailed - Publie job.created (PENDING) depuis ScrapeChapterStateProcessor - Publie job.status_changed (in_progress/completed/failed) depuis ScrapingEventSubscriber - Gère job.created et job.status_changed dans activityStore : ajout instantané et suppression différée (1.5s)
140 lines
5.5 KiB
PHP
140 lines
5.5 KiB
PHP
<?php
|
|
|
|
namespace App\Domain\Scraping\Application\CommandHandler;
|
|
|
|
use App\Domain\Scraping\Application\Command\ScrapeChapter;
|
|
use App\Domain\Scraping\Domain\Contract\Repository\ChapterRepositoryInterface;
|
|
use App\Domain\Scraping\Domain\Contract\Repository\MangaRepositoryInterface;
|
|
use App\Domain\Scraping\Domain\Contract\Repository\SourceRepositoryInterface;
|
|
use App\Domain\Shared\Domain\Contract\ImageStorageInterface;
|
|
use App\Domain\Scraping\Domain\Contract\Service\ImageDownloaderInterface;
|
|
use App\Domain\Scraping\Domain\Contract\Service\ScraperFactoryInterface;
|
|
use App\Domain\Shared\Domain\Event\ChapterScraped;
|
|
use App\Domain\Scraping\Domain\Event\ChapterScrapingFailed;
|
|
use App\Domain\Scraping\Domain\Event\ChapterScrapingStarted;
|
|
use App\Domain\Scraping\Domain\Model\Chapter;
|
|
use App\Domain\Scraping\Domain\Model\Source;
|
|
use App\Domain\Scraping\Domain\Model\ValueObject\ScrapingRequest;
|
|
use App\Domain\Scraping\Domain\Model\ValueObject\TempDirectory;
|
|
use App\Domain\Shared\Domain\Contract\JobRepositoryInterface;
|
|
use Symfony\Component\Messenger\MessageBusInterface;
|
|
|
|
readonly class ScrapeChapterHandler
|
|
{
|
|
public function __construct(
|
|
private ScraperFactoryInterface $scraperFactory,
|
|
private ImageDownloaderInterface $imageDownloader,
|
|
private ImageStorageInterface $imageStorage,
|
|
private JobRepositoryInterface $jobRepository,
|
|
private ChapterRepositoryInterface $chapterRepository,
|
|
private MangaRepositoryInterface $mangaRepository,
|
|
private SourceRepositoryInterface $sourceRepository,
|
|
private MessageBusInterface $eventBus,
|
|
) {
|
|
}
|
|
|
|
public function handle(ScrapeChapter $command): void
|
|
{
|
|
/** @var Chapter $chapter */
|
|
$chapter = $this->chapterRepository->getById($command->chapterId);
|
|
$manga = $this->mangaRepository->getById($chapter->mangaId);
|
|
|
|
$job = $this->jobRepository->get($command->jobId);
|
|
$job->context['chapterId'] = $command->chapterId;
|
|
$job->context['mangaTitle'] = $manga->getTitle();
|
|
$job->start();
|
|
$this->jobRepository->save($job);
|
|
|
|
$this->eventBus->dispatch(new ChapterScrapingStarted($job->id, $manga->getTitle(), $chapter->chapterNumber));
|
|
|
|
$sources = $this->getSourcesToTry($manga);
|
|
$slugsToTry = array_merge([$manga->getSlug()], $manga->getAlternativeSlugs());
|
|
$success = false;
|
|
$lastException = null;
|
|
|
|
foreach ($sources as $source) {
|
|
foreach ($slugsToTry as $slug) {
|
|
try {
|
|
$job->context['sourceId'] = $source->getId()->getValue();
|
|
$job->context['slug'] = $slug;
|
|
$this->jobRepository->save($job);
|
|
|
|
$scrapingParameters = $source->getScrappingParameters();
|
|
$scrapingParameters['chapterNumber'] = $chapter->chapterNumber;
|
|
$scrapingType = $scrapingParameters['scrapingType'] ?? 'html';
|
|
|
|
$scrapingRequest = new ScrapingRequest(
|
|
$scrapingType,
|
|
$source->buildChapterUrl($slug, $chapter->chapterNumber),
|
|
$scrapingParameters
|
|
);
|
|
|
|
$scraper = $this->scraperFactory->getScraperWithFallback($scrapingType);
|
|
$scrapingResult = $scraper->scrape($scrapingRequest);
|
|
|
|
$tempDir = new TempDirectory();
|
|
$downloadResults = $this->imageDownloader->downloadBatch(
|
|
$scrapingResult->getImageUrls(),
|
|
$tempDir,
|
|
$job->id
|
|
);
|
|
|
|
$localPaths = array_map(fn ($r) => $r->getLocalPath(), $downloadResults);
|
|
$pagesDirectory = $this->imageStorage->storeChapterImages($command->chapterId, $localPaths);
|
|
$pageCount = count($downloadResults);
|
|
|
|
$job->complete();
|
|
$this->jobRepository->save($job);
|
|
|
|
$this->eventBus->dispatch(new ChapterScraped($job->id, $command->chapterId, $pagesDirectory, $pageCount));
|
|
$tempDir->cleanup();
|
|
|
|
$success = true;
|
|
break;
|
|
|
|
} catch (\Exception $e) {
|
|
$lastException = $e;
|
|
}
|
|
}
|
|
|
|
if ($success) {
|
|
break;
|
|
}
|
|
}
|
|
|
|
if (!$success) {
|
|
$errorMessage = $lastException?->getMessage() ?? 'Failed to scrape chapter from all available sources';
|
|
$job->fail($errorMessage);
|
|
$this->jobRepository->save($job);
|
|
$this->eventBus->dispatch(new ChapterScrapingFailed($job->id, $chapter->mangaId, $chapter->chapterNumber, $errorMessage));
|
|
}
|
|
}
|
|
|
|
/**
|
|
* @param \App\Domain\Scraping\Domain\Model\Manga $manga
|
|
* @return Source[]
|
|
*/
|
|
private function getSourcesToTry(\App\Domain\Scraping\Domain\Model\Manga $manga): array
|
|
{
|
|
if ($manga->hasPreferredSources()) {
|
|
$preferredSources = [];
|
|
foreach ($manga->getPreferredSources() as $sourceId) {
|
|
$source = $this->sourceRepository->getById($sourceId);
|
|
if ($source) {
|
|
$preferredSources[] = $source;
|
|
}
|
|
|
|
if (count($preferredSources) >= 3) {
|
|
break;
|
|
}
|
|
}
|
|
|
|
if (!empty($preferredSources)) {
|
|
return $preferredSources;
|
|
}
|
|
}
|
|
|
|
return $this->sourceRepository->getAll();
|
|
}
|
|
}
|