refactor(scraping): job PENDING dès le POST HTTP, handler sans Doctrine
- ScrapingJob: mangaId/chapterNumber/sourceId optionnels (nullable) pour permettre la création en PENDING sans lookup DB dans le StateProcessor - ScrapeChapter: ajoute jobId (pré-généré par le StateProcessor) - ScrapeChapterStateProcessor: crée et persiste le job PENDING avant dispatch; injecte JobRepositoryInterface uniquement - ScrapeChapterHandler: supprime EntityManagerInterface, beginTransaction/ commit/rollback; charge le job existant via jobId, complete() sur succès seulement, fail() si toutes les sources échouent - ScrapeChapterHandlerTest: pré-crée le job, passe jobId dans la commande, supprime le mock EntityManagerInterface - ScrapeChapterTest: accès aux messages via static InMemoryMessageBus, vérifie la présence du jobId dans la commande dispatchée
This commit is contained in:
parent
ec4a8be934
commit
fa035bfbfa
@@ -5,7 +5,8 @@ namespace App\Domain\Scraping\Application\Command;
|
||||
readonly class ScrapeChapter
|
||||
{
|
||||
public function __construct(
|
||||
public string $chapterId
|
||||
public string $chapterId,
|
||||
public string $jobId
|
||||
) {
|
||||
}
|
||||
}
|
||||
|
||||
@@ -13,14 +13,11 @@ use App\Domain\Shared\Domain\Event\ChapterScraped;
|
||||
use App\Domain\Scraping\Domain\Event\ChapterScrapingFailed;
|
||||
use App\Domain\Scraping\Domain\Event\ChapterScrapingStarted;
|
||||
use App\Domain\Scraping\Domain\Model\Chapter;
|
||||
use App\Domain\Scraping\Domain\Model\ScrapingJob;
|
||||
use App\Domain\Scraping\Domain\Model\Source;
|
||||
use App\Domain\Scraping\Domain\Model\ValueObject\ScrapingRequest;
|
||||
use App\Domain\Scraping\Domain\Model\ValueObject\TempDirectory;
|
||||
use App\Domain\Shared\Domain\Contract\JobRepositoryInterface;
|
||||
use Ramsey\Uuid\Uuid;
|
||||
use Symfony\Component\Messenger\MessageBusInterface;
|
||||
use Doctrine\ORM\EntityManagerInterface;
|
||||
|
||||
readonly class ScrapeChapterHandler
|
||||
{
|
||||
@@ -33,151 +30,92 @@ readonly class ScrapeChapterHandler
|
||||
private MangaRepositoryInterface $mangaRepository,
|
||||
private SourceRepositoryInterface $sourceRepository,
|
||||
private MessageBusInterface $eventBus,
|
||||
private EntityManagerInterface $entityManager
|
||||
) {
|
||||
}
|
||||
|
||||
public function handle(ScrapeChapter $command): void
|
||||
{
|
||||
$job = null;
|
||||
try {
|
||||
// 1. Récupération du chapitre
|
||||
/**@var Chapter $chapter */
|
||||
$chapter = $this->chapterRepository->getById($command->chapterId);
|
||||
if (!$chapter) {
|
||||
throw new \InvalidArgumentException("Chapter not found with ID: {$command->chapterId}");
|
||||
}
|
||||
/** @var Chapter $chapter */
|
||||
$chapter = $this->chapterRepository->getById($command->chapterId);
|
||||
$manga = $this->mangaRepository->getById($chapter->mangaId);
|
||||
|
||||
// 2. Récupération du manga
|
||||
$manga = $this->mangaRepository->getById($chapter->mangaId);
|
||||
if (!$manga) {
|
||||
throw new \InvalidArgumentException("Manga not found with ID: {$chapter->mangaId}");
|
||||
}
|
||||
$job = $this->jobRepository->get($command->jobId);
|
||||
$job->context['chapterId'] = $command->chapterId;
|
||||
$job->context['mangaTitle'] = $manga->getTitle();
|
||||
$job->start();
|
||||
$this->jobRepository->save($job);
|
||||
|
||||
// 3. Dispatch de l'événement de démarrage
|
||||
$this->eventBus->dispatch(new ChapterScrapingStarted($manga->getTitle(), $chapter->chapterNumber));
|
||||
$this->eventBus->dispatch(new ChapterScrapingStarted($manga->getTitle(), $chapter->chapterNumber));
|
||||
|
||||
// 4. Détermination des sources à utiliser
|
||||
$sources = $this->getSourcesToTry($manga);
|
||||
if (empty($sources)) {
|
||||
throw new \InvalidArgumentException("No sources available for scraping");
|
||||
}
|
||||
$sources = $this->getSourcesToTry($manga);
|
||||
$slugsToTry = array_merge([$manga->getSlug()], $manga->getAlternativeSlugs());
|
||||
$success = false;
|
||||
$lastException = null;
|
||||
|
||||
// 5. Essai de scraping sur chaque source jusqu'à succès
|
||||
$success = false;
|
||||
$lastException = null;
|
||||
|
||||
foreach ($sources as $source) {
|
||||
// Préparer la liste des slugs à essayer : slug principal + slugs alternatifs
|
||||
$slugsToTry = array_merge([$manga->getSlug()], $manga->getAlternativeSlugs());
|
||||
|
||||
foreach ($slugsToTry as $slug) {
|
||||
$job = new ScrapingJob(
|
||||
Uuid::uuid4()->toString(),
|
||||
$chapter->mangaId,
|
||||
$chapter->chapterNumber,
|
||||
$source->getId()->getValue()
|
||||
);
|
||||
|
||||
// Ajout de l'ID du chapitre et du slug dans le contexte du job
|
||||
$job->context['chapterId'] = $command->chapterId;
|
||||
foreach ($sources as $source) {
|
||||
foreach ($slugsToTry as $slug) {
|
||||
try {
|
||||
$job->context['sourceId'] = $source->getId()->getValue();
|
||||
$job->context['slug'] = $slug;
|
||||
$job->context['mangaTitle'] = $manga->getTitle();
|
||||
|
||||
$job->start();
|
||||
$this->jobRepository->save($job);
|
||||
|
||||
try {
|
||||
$this->entityManager->beginTransaction();
|
||||
$scrapingParameters = $source->getScrappingParameters();
|
||||
$scrapingParameters['chapterNumber'] = $chapter->chapterNumber;
|
||||
$scrapingType = $scrapingParameters['scrapingType'] ?? 'html';
|
||||
|
||||
// 5. Scraping des URLs avec le slug courant
|
||||
$scrapingParameters = $source->getScrappingParameters();
|
||||
$scrapingParameters['chapterNumber'] = $chapter->chapterNumber;
|
||||
$scrapingType = $scrapingParameters['scrapingType'] ?? 'html';
|
||||
$scrapingRequest = new ScrapingRequest(
|
||||
$scrapingType,
|
||||
$source->buildChapterUrl($slug, $chapter->chapterNumber),
|
||||
$scrapingParameters
|
||||
);
|
||||
|
||||
$scrapingRequest = new ScrapingRequest(
|
||||
$scrapingType,
|
||||
$source->buildChapterUrl($slug, $chapter->chapterNumber),
|
||||
$scrapingParameters
|
||||
);
|
||||
$scraper = $this->scraperFactory->getScraperWithFallback($scrapingType);
|
||||
$scrapingResult = $scraper->scrape($scrapingRequest);
|
||||
|
||||
// Sélection du scraper approprié selon le type
|
||||
$scraper = $this->scraperFactory->getScraperWithFallback($scrapingType);
|
||||
$scrapingResult = $scraper->scrape($scrapingRequest);
|
||||
$tempDir = new TempDirectory();
|
||||
$downloadResults = $this->imageDownloader->downloadBatch(
|
||||
$scrapingResult->getImageUrls(),
|
||||
$tempDir,
|
||||
$job->id
|
||||
);
|
||||
|
||||
// 6. Téléchargement des images
|
||||
$tempDir = new TempDirectory();
|
||||
$downloadResults = $this->imageDownloader->downloadBatch(
|
||||
$scrapingResult->getImageUrls(),
|
||||
$tempDir,
|
||||
$job->id
|
||||
);
|
||||
$localPaths = array_map(fn ($r) => $r->getLocalPath(), $downloadResults);
|
||||
$pagesDirectory = $this->imageStorage->storeChapterImages($command->chapterId, $localPaths);
|
||||
$pageCount = count($downloadResults);
|
||||
|
||||
// 7. Stockage des images individuelles
|
||||
$localPaths = array_map(fn ($r) => $r->getLocalPath(), $downloadResults);
|
||||
$pagesDirectory = $this->imageStorage->storeChapterImages($command->chapterId, $localPaths);
|
||||
$pageCount = count($downloadResults);
|
||||
$job->complete();
|
||||
$this->jobRepository->save($job);
|
||||
|
||||
$job->complete();
|
||||
$this->jobRepository->save($job);
|
||||
$this->eventBus->dispatch(new ChapterScraped($job->id, $command->chapterId, $pagesDirectory, $pageCount));
|
||||
$tempDir->cleanup();
|
||||
|
||||
$this->entityManager->commit();
|
||||
|
||||
$this->eventBus->dispatch(new ChapterScraped($job->id, $command->chapterId, $pagesDirectory, $pageCount));
|
||||
|
||||
// 8. Nettoyage
|
||||
$tempDir->cleanup();
|
||||
|
||||
// Scraping réussi, pas besoin d'essayer d'autres slugs ni d'autres sources
|
||||
$success = true;
|
||||
break;
|
||||
|
||||
} catch (\Exception $e) {
|
||||
dump('EXCEPTION for source ' . $source->getName() . ' with slug ' . $slug . ': ' . $e->getMessage());
|
||||
|
||||
$this->entityManager->rollback();
|
||||
|
||||
if (isset($job)) {
|
||||
$job->fail($e->getMessage());
|
||||
$this->jobRepository->save($job);
|
||||
}
|
||||
|
||||
$lastException = $e;
|
||||
|
||||
// Continuer avec le slug suivant pour cette source
|
||||
}
|
||||
}
|
||||
|
||||
// Si le scraping a réussi avec un des slugs, sortir de la boucle des sources
|
||||
if ($success) {
|
||||
$success = true;
|
||||
break;
|
||||
|
||||
} catch (\Exception $e) {
|
||||
$lastException = $e;
|
||||
}
|
||||
}
|
||||
|
||||
// Si toutes les sources ont échoué
|
||||
if (!$success) {
|
||||
$errorMessage = $lastException ? $lastException->getMessage() : "Failed to scrape chapter from all available sources";
|
||||
$this->eventBus->dispatch(new ChapterScrapingFailed($chapter->mangaId, $chapter->chapterNumber, $errorMessage));
|
||||
if ($success) {
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
} catch (\Exception $e) {
|
||||
if (isset($job)) {
|
||||
$job->fail($e->getMessage());
|
||||
$this->jobRepository->save($job);
|
||||
}
|
||||
$this->eventBus->dispatch(new ChapterScrapingFailed($chapter->mangaId ?? 'unknown', $chapter->chapterNumber ?? 'unknown', $e->getMessage()));
|
||||
if (!$success) {
|
||||
$errorMessage = $lastException?->getMessage() ?? 'Failed to scrape chapter from all available sources';
|
||||
$job->fail($errorMessage);
|
||||
$this->jobRepository->save($job);
|
||||
$this->eventBus->dispatch(new ChapterScrapingFailed($chapter->mangaId, $chapter->chapterNumber, $errorMessage));
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Détermine les sources à utiliser pour le scraping en fonction des préférences du manga
|
||||
*
|
||||
* @param \App\Domain\Scraping\Domain\Model\Manga $manga
|
||||
* @return Source[]
|
||||
*/
|
||||
private function getSourcesToTry(\App\Domain\Scraping\Domain\Model\Manga $manga): array
|
||||
{
|
||||
// Si le manga a des sources préférées, les utiliser
|
||||
if ($manga->hasPreferredSources()) {
|
||||
$preferredSources = [];
|
||||
foreach ($manga->getPreferredSources() as $sourceId) {
|
||||
@@ -186,7 +124,6 @@ readonly class ScrapeChapterHandler
|
||||
$preferredSources[] = $source;
|
||||
}
|
||||
|
||||
// Limiter à 3 sources préférées maximum
|
||||
if (count($preferredSources) >= 3) {
|
||||
break;
|
||||
}
|
||||
@@ -197,7 +134,6 @@ readonly class ScrapeChapterHandler
|
||||
}
|
||||
}
|
||||
|
||||
// Sinon, utiliser toutes les sources disponibles
|
||||
return $this->sourceRepository->getAll();
|
||||
}
|
||||
}
|
||||
|
||||
@@ -8,9 +8,9 @@ class ScrapingJob extends Job
|
||||
{
|
||||
public function __construct(
|
||||
string $id,
|
||||
string $mangaId,
|
||||
float $chapterNumber,
|
||||
string $sourceId
|
||||
?string $mangaId = null,
|
||||
?float $chapterNumber = null,
|
||||
?string $sourceId = null
|
||||
) {
|
||||
parent::__construct($id, 'scraping_job');
|
||||
$this->maxAttempts = 1;
|
||||
|
||||
@@ -5,13 +5,17 @@ namespace App\Domain\Scraping\Infrastructure\ApiPlatform\State\Processor;
|
||||
use ApiPlatform\Metadata\Operation;
|
||||
use ApiPlatform\State\ProcessorInterface;
|
||||
use App\Domain\Scraping\Application\Command\ScrapeChapter;
|
||||
use App\Domain\Scraping\Domain\Model\ScrapingJob;
|
||||
use App\Domain\Scraping\Infrastructure\ApiPlatform\Dto\ScrapeChapterRequest;
|
||||
use App\Domain\Shared\Domain\Contract\JobRepositoryInterface;
|
||||
use Ramsey\Uuid\Uuid;
|
||||
use Symfony\Component\Messenger\MessageBusInterface;
|
||||
|
||||
final class ScrapeChapterStateProcessor implements ProcessorInterface
|
||||
{
|
||||
public function __construct(
|
||||
private readonly MessageBusInterface $commandBus
|
||||
private readonly MessageBusInterface $commandBus,
|
||||
private readonly JobRepositoryInterface $jobRepository,
|
||||
) {
|
||||
}
|
||||
|
||||
@@ -20,10 +24,11 @@ final class ScrapeChapterStateProcessor implements ProcessorInterface
|
||||
*/
|
||||
public function process(mixed $data, Operation $operation, array $uriVariables = [], array $context = []): void
|
||||
{
|
||||
$this->commandBus->dispatch(
|
||||
new ScrapeChapter(
|
||||
$data->chapterId
|
||||
)
|
||||
);
|
||||
$jobId = Uuid::uuid4()->toString();
|
||||
$job = new ScrapingJob($jobId);
|
||||
$job->context['chapterId'] = $data->chapterId;
|
||||
$this->jobRepository->save($job);
|
||||
|
||||
$this->commandBus->dispatch(new ScrapeChapter($data->chapterId, $jobId));
|
||||
}
|
||||
}
|
||||
|
||||
@@ -5,6 +5,7 @@ namespace App\Domain\Scraping\Infrastructure\EventSubscriber;
|
||||
use App\Domain\Shared\Domain\Event\ChapterScraped;
|
||||
use App\Domain\Scraping\Domain\Event\ChapterScrapingFailed;
|
||||
use App\Domain\Scraping\Domain\Event\ChapterScrapingStarted;
|
||||
use App\Domain\Scraping\Domain\Event\PageScrapingProgressed;
|
||||
use App\Domain\Scraping\Domain\Contract\Repository\ChapterRepositoryInterface;
|
||||
use App\Domain\Shared\Domain\Contract\JobRepositoryInterface;
|
||||
use App\Domain\Shared\Domain\Contract\NotificationInterface;
|
||||
@@ -30,6 +31,22 @@ class ScrapingEventSubscriber implements EventSubscriberInterface
|
||||
return [];
|
||||
}
|
||||
|
||||
#[AsMessageHandler]
|
||||
public function onPageScrapingProgressed(PageScrapingProgressed $event): void
|
||||
{
|
||||
$progress = (int) round($event->getProgress()->getPercentage());
|
||||
|
||||
$update = new Update(
|
||||
'jobs/activity',
|
||||
json_encode([
|
||||
'type' => 'job.progress_updated',
|
||||
'jobId' => $event->getJobId(),
|
||||
'progress' => $progress,
|
||||
])
|
||||
);
|
||||
$this->hub->publish($update);
|
||||
}
|
||||
|
||||
#[AsMessageHandler]
|
||||
public function onChapterScrapingStarted(ChapterScrapingStarted $event): void
|
||||
{
|
||||
|
||||
Reference in New Issue
Block a user