feat: ajout de la gestion des slugs alternatifs pour le scraping des chapitres, mise à jour du service de scraping pour essayer plusieurs slugs, et amélioration de la configuration des services pour le dépôt de chapitres et le service de fichiers.

This commit is contained in:
ext.jeremy.guillot@maxicoffee.domains
2025-07-03 18:41:13 +02:00
parent 9255509042
commit a6ca8a2c9a
4 changed files with 94 additions and 59 deletions

View File

@@ -120,3 +120,16 @@ services:
App\Domain\Manga\Infrastructure\EventListener\MangaCreatedListener: App\Domain\Manga\Infrastructure\EventListener\MangaCreatedListener:
tags: tags:
- { name: messenger.message_handler } - { name: messenger.message_handler }
# Chapter Repository
App\Domain\Manga\Domain\Contract\Repository\ChapterRepositoryInterface:
alias: App\Domain\Manga\Infrastructure\Persistence\Repository\LegacyChapterRepository
# File Service
App\Domain\Manga\Domain\Contract\Service\FileServiceInterface:
alias: App\Domain\Manga\Infrastructure\Service\FileService
# File Service Configuration
App\Domain\Manga\Infrastructure\Service\FileService:
arguments:
$cbzStoragePath: '%kernel.project_dir%/public/cbz'

View File

@@ -65,80 +65,91 @@ readonly class ScrapeChapterHandler
$lastException = null; $lastException = null;
foreach ($sources as $source) { foreach ($sources as $source) {
$job = new ScrapingJob( // Préparer la liste des slugs à essayer : slug principal + slugs alternatifs
Uuid::uuid4()->toString(), $slugsToTry = array_merge([$manga->getSlug()], $manga->getAlternativeSlugs());
$chapter->mangaId,
$chapter->chapterNumber,
$source->getId()->getValue()
);
// Ajout de l'ID du chapitre dans le contexte du job foreach ($slugsToTry as $slug) {
$job->context['chapterId'] = $command->chapterId; $job = new ScrapingJob(
Uuid::uuid4()->toString(),
$job->start(); $chapter->mangaId,
$this->jobRepository->save($job);
try {
$this->entityManager->beginTransaction();
// 5. Scraping des URLs
$scrapingRequest = new ScrapingRequest(
'html',
$source->buildChapterUrl($manga->getSlug(), $chapter->chapterNumber),
$source->getScrappingParameters()
);
$scrapingResult = $this->scraper->scrape($scrapingRequest);
// 6. Téléchargement des images
$tempDir = new TempDirectory();
$downloadResults = $this->imageDownloader->downloadBatch(
$scrapingResult->getImageUrls(),
$tempDir,
$job->id
);
// 7. Génération du CBZ
$cbzRequest = new CbzGenerationRequest(
$manga->getTitle(),
$manga->getPublicationYear(),
$chapter->volumeNumber,
$chapter->chapterNumber, $chapter->chapterNumber,
$tempDir, $source->getId()->getValue()
array_map(fn($r) => $r->getLocalPath(), $downloadResults)
); );
$cbzPath = $this->cbzGenerator->generate($cbzRequest); // Ajout de l'ID du chapitre et du slug dans le contexte du job
$job->context['chapterId'] = $command->chapterId;
$job->context['slug'] = $slug;
// 8. Mise à jour et sauvegarde $job->start();
$chapter->cbzPath = $cbzPath->getPath();
$this->chapterRepository->save($chapter);
$job->complete();
$this->jobRepository->save($job); $this->jobRepository->save($job);
$this->entityManager->commit(); try {
$this->entityManager->beginTransaction();
$this->eventBus->dispatch(new ChapterScraped($job->id)); // 5. Scraping des URLs avec le slug courant
$scrapingRequest = new ScrapingRequest(
'html',
$source->buildChapterUrl($slug, $chapter->chapterNumber),
$source->getScrappingParameters()
);
// 9. Nettoyage $scrapingResult = $this->scraper->scrape($scrapingRequest);
$tempDir->cleanup();
// Scraping réussi, pas besoin d'essayer d'autres sources // 6. Téléchargement des images
$success = true; $tempDir = new TempDirectory();
break; $downloadResults = $this->imageDownloader->downloadBatch(
$scrapingResult->getImageUrls(),
$tempDir,
$job->id
);
} catch (\Exception $e) { // 7. Génération du CBZ
$this->entityManager->rollback(); $cbzRequest = new CbzGenerationRequest(
$manga->getTitle(),
$manga->getPublicationYear(),
$chapter->volumeNumber,
$chapter->chapterNumber,
$tempDir,
array_map(fn($r) => $r->getLocalPath(), $downloadResults)
);
if (isset($job)) { $cbzPath = $this->cbzGenerator->generate($cbzRequest);
$job->fail($e->getMessage());
// 8. Mise à jour et sauvegarde
$chapter->cbzPath = $cbzPath->getPath();
$this->chapterRepository->save($chapter);
$job->complete();
$this->jobRepository->save($job); $this->jobRepository->save($job);
$this->entityManager->commit();
$this->eventBus->dispatch(new ChapterScraped($job->id));
// 9. Nettoyage
$tempDir->cleanup();
// Scraping réussi, pas besoin d'essayer d'autres slugs ni d'autres sources
$success = true;
break;
} catch (\Exception $e) {
$this->entityManager->rollback();
if (isset($job)) {
$job->fail($e->getMessage());
$this->jobRepository->save($job);
}
$lastException = $e;
// Continuer avec le slug suivant pour cette source
} }
}
$lastException = $e; // Si le scraping a réussi avec un des slugs, sortir de la boucle des sources
if ($success) {
// Continuer avec la source suivante break;
} }
} }

View File

@@ -6,6 +6,7 @@ class Manga
{ {
/** /**
* @param string[] $preferredSources * @param string[] $preferredSources
* @param string[] $alternativeSlugs
*/ */
public function __construct( public function __construct(
private readonly string $id, private readonly string $id,
@@ -15,6 +16,7 @@ class Manga
private readonly string $author, private readonly string $author,
private readonly string $publicationYear, private readonly string $publicationYear,
private readonly array $preferredSources = [], private readonly array $preferredSources = [],
private readonly array $alternativeSlugs = [],
) { ) {
} }
@@ -63,4 +65,12 @@ class Manga
{ {
return !empty($this->preferredSources); return !empty($this->preferredSources);
} }
/**
* @return string[]
*/
public function getAlternativeSlugs(): array
{
return $this->alternativeSlugs;
}
} }

View File

@@ -42,6 +42,7 @@ readonly class LegacyMangaRepository implements MangaRepositoryInterface
$mangaEntity->getAuthor() ?? '', $mangaEntity->getAuthor() ?? '',
(string) ($mangaEntity->getPublicationYear() ?? ''), (string) ($mangaEntity->getPublicationYear() ?? ''),
$preferredSourceIds, $preferredSourceIds,
$mangaEntity->getAlternativeSlugs() ?? []
); );
} }