feat: ajout de la gestion des slugs alternatifs pour le scraping des chapitres, mise à jour du service de scraping pour essayer plusieurs slugs, et amélioration de la configuration des services pour le dépôt de chapitres et le service de fichiers.
This commit is contained in:
parent
9255509042
commit
a6ca8a2c9a
@@ -65,80 +65,91 @@ readonly class ScrapeChapterHandler
|
||||
$lastException = null;
|
||||
|
||||
foreach ($sources as $source) {
|
||||
$job = new ScrapingJob(
|
||||
Uuid::uuid4()->toString(),
|
||||
$chapter->mangaId,
|
||||
$chapter->chapterNumber,
|
||||
$source->getId()->getValue()
|
||||
);
|
||||
// Préparer la liste des slugs à essayer : slug principal + slugs alternatifs
|
||||
$slugsToTry = array_merge([$manga->getSlug()], $manga->getAlternativeSlugs());
|
||||
|
||||
// Ajout de l'ID du chapitre dans le contexte du job
|
||||
$job->context['chapterId'] = $command->chapterId;
|
||||
|
||||
$job->start();
|
||||
$this->jobRepository->save($job);
|
||||
|
||||
try {
|
||||
$this->entityManager->beginTransaction();
|
||||
|
||||
// 5. Scraping des URLs
|
||||
$scrapingRequest = new ScrapingRequest(
|
||||
'html',
|
||||
$source->buildChapterUrl($manga->getSlug(), $chapter->chapterNumber),
|
||||
$source->getScrappingParameters()
|
||||
);
|
||||
|
||||
$scrapingResult = $this->scraper->scrape($scrapingRequest);
|
||||
|
||||
// 6. Téléchargement des images
|
||||
$tempDir = new TempDirectory();
|
||||
$downloadResults = $this->imageDownloader->downloadBatch(
|
||||
$scrapingResult->getImageUrls(),
|
||||
$tempDir,
|
||||
$job->id
|
||||
);
|
||||
|
||||
// 7. Génération du CBZ
|
||||
$cbzRequest = new CbzGenerationRequest(
|
||||
$manga->getTitle(),
|
||||
$manga->getPublicationYear(),
|
||||
$chapter->volumeNumber,
|
||||
foreach ($slugsToTry as $slug) {
|
||||
$job = new ScrapingJob(
|
||||
Uuid::uuid4()->toString(),
|
||||
$chapter->mangaId,
|
||||
$chapter->chapterNumber,
|
||||
$tempDir,
|
||||
array_map(fn($r) => $r->getLocalPath(), $downloadResults)
|
||||
$source->getId()->getValue()
|
||||
);
|
||||
|
||||
$cbzPath = $this->cbzGenerator->generate($cbzRequest);
|
||||
// Ajout de l'ID du chapitre et du slug dans le contexte du job
|
||||
$job->context['chapterId'] = $command->chapterId;
|
||||
$job->context['slug'] = $slug;
|
||||
|
||||
// 8. Mise à jour et sauvegarde
|
||||
$chapter->cbzPath = $cbzPath->getPath();
|
||||
$this->chapterRepository->save($chapter);
|
||||
|
||||
$job->complete();
|
||||
$job->start();
|
||||
$this->jobRepository->save($job);
|
||||
|
||||
$this->entityManager->commit();
|
||||
try {
|
||||
$this->entityManager->beginTransaction();
|
||||
|
||||
$this->eventBus->dispatch(new ChapterScraped($job->id));
|
||||
// 5. Scraping des URLs avec le slug courant
|
||||
$scrapingRequest = new ScrapingRequest(
|
||||
'html',
|
||||
$source->buildChapterUrl($slug, $chapter->chapterNumber),
|
||||
$source->getScrappingParameters()
|
||||
);
|
||||
|
||||
// 9. Nettoyage
|
||||
$tempDir->cleanup();
|
||||
$scrapingResult = $this->scraper->scrape($scrapingRequest);
|
||||
|
||||
// Scraping réussi, pas besoin d'essayer d'autres sources
|
||||
$success = true;
|
||||
break;
|
||||
// 6. Téléchargement des images
|
||||
$tempDir = new TempDirectory();
|
||||
$downloadResults = $this->imageDownloader->downloadBatch(
|
||||
$scrapingResult->getImageUrls(),
|
||||
$tempDir,
|
||||
$job->id
|
||||
);
|
||||
|
||||
} catch (\Exception $e) {
|
||||
$this->entityManager->rollback();
|
||||
// 7. Génération du CBZ
|
||||
$cbzRequest = new CbzGenerationRequest(
|
||||
$manga->getTitle(),
|
||||
$manga->getPublicationYear(),
|
||||
$chapter->volumeNumber,
|
||||
$chapter->chapterNumber,
|
||||
$tempDir,
|
||||
array_map(fn($r) => $r->getLocalPath(), $downloadResults)
|
||||
);
|
||||
|
||||
if (isset($job)) {
|
||||
$job->fail($e->getMessage());
|
||||
$cbzPath = $this->cbzGenerator->generate($cbzRequest);
|
||||
|
||||
// 8. Mise à jour et sauvegarde
|
||||
$chapter->cbzPath = $cbzPath->getPath();
|
||||
$this->chapterRepository->save($chapter);
|
||||
|
||||
$job->complete();
|
||||
$this->jobRepository->save($job);
|
||||
|
||||
$this->entityManager->commit();
|
||||
|
||||
$this->eventBus->dispatch(new ChapterScraped($job->id));
|
||||
|
||||
// 9. Nettoyage
|
||||
$tempDir->cleanup();
|
||||
|
||||
// Scraping réussi, pas besoin d'essayer d'autres slugs ni d'autres sources
|
||||
$success = true;
|
||||
break;
|
||||
|
||||
} catch (\Exception $e) {
|
||||
$this->entityManager->rollback();
|
||||
|
||||
if (isset($job)) {
|
||||
$job->fail($e->getMessage());
|
||||
$this->jobRepository->save($job);
|
||||
}
|
||||
|
||||
$lastException = $e;
|
||||
|
||||
// Continuer avec le slug suivant pour cette source
|
||||
}
|
||||
}
|
||||
|
||||
$lastException = $e;
|
||||
|
||||
// Continuer avec la source suivante
|
||||
// Si le scraping a réussi avec un des slugs, sortir de la boucle des sources
|
||||
if ($success) {
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
@@ -6,6 +6,7 @@ class Manga
|
||||
{
|
||||
/**
|
||||
* @param string[] $preferredSources
|
||||
* @param string[] $alternativeSlugs
|
||||
*/
|
||||
public function __construct(
|
||||
private readonly string $id,
|
||||
@@ -15,6 +16,7 @@ class Manga
|
||||
private readonly string $author,
|
||||
private readonly string $publicationYear,
|
||||
private readonly array $preferredSources = [],
|
||||
private readonly array $alternativeSlugs = [],
|
||||
) {
|
||||
}
|
||||
|
||||
@@ -63,4 +65,12 @@ class Manga
|
||||
{
|
||||
return !empty($this->preferredSources);
|
||||
}
|
||||
|
||||
/**
|
||||
* @return string[]
|
||||
*/
|
||||
public function getAlternativeSlugs(): array
|
||||
{
|
||||
return $this->alternativeSlugs;
|
||||
}
|
||||
}
|
||||
|
||||
@@ -42,6 +42,7 @@ readonly class LegacyMangaRepository implements MangaRepositoryInterface
|
||||
$mangaEntity->getAuthor() ?? '',
|
||||
(string) ($mangaEntity->getPublicationYear() ?? ''),
|
||||
$preferredSourceIds,
|
||||
$mangaEntity->getAlternativeSlugs() ?? []
|
||||
);
|
||||
}
|
||||
|
||||
|
||||
Reference in New Issue
Block a user