feat: refactorisation de la gestion du scraping des chapitres en remplaçant les identifiants de manga et de chapitre par un identifiant de chapitre unique, amélioration de la récupération des sources préférées et ajout de la gestion des erreurs pour les échecs de scraping.
This commit is contained in:
parent
e29433bb0c
commit
c9f1771522
@@ -5,9 +5,7 @@ namespace App\Domain\Scraping\Application\Command;
|
||||
readonly class ScrapeChapter
|
||||
{
|
||||
public function __construct(
|
||||
public string $mangaId,
|
||||
public string $chapterNumber,
|
||||
public string $sourceId
|
||||
public string $chapterId
|
||||
) {
|
||||
}
|
||||
}
|
||||
|
||||
@@ -12,6 +12,7 @@ use App\Domain\Scraping\Domain\Contract\Service\ScraperInterface;
|
||||
use App\Domain\Scraping\Domain\Event\ChapterScraped;
|
||||
use App\Domain\Scraping\Domain\Event\ChapterScrapingFailed;
|
||||
use App\Domain\Scraping\Domain\Model\ScrapingJob;
|
||||
use App\Domain\Scraping\Domain\Model\Source;
|
||||
use App\Domain\Scraping\Domain\Model\ValueObject\CbzGenerationRequest;
|
||||
use App\Domain\Scraping\Domain\Model\ValueObject\ScrapingRequest;
|
||||
use App\Domain\Scraping\Domain\Model\ValueObject\TempDirectory;
|
||||
@@ -39,34 +40,51 @@ readonly class ScrapeChapterHandler
|
||||
{
|
||||
$job = null;
|
||||
try {
|
||||
// 1. Création du job dans sa propre transaction
|
||||
// 1. Récupération du chapitre
|
||||
$chapter = $this->chapterRepository->getById($command->chapterId);
|
||||
if (!$chapter) {
|
||||
throw new \InvalidArgumentException("Chapter not found with ID: {$command->chapterId}");
|
||||
}
|
||||
|
||||
// 2. Récupération du manga
|
||||
$manga = $this->mangaRepository->getById($chapter->mangaId);
|
||||
if (!$manga) {
|
||||
throw new \InvalidArgumentException("Manga not found with ID: {$chapter->mangaId}");
|
||||
}
|
||||
|
||||
// 3. Détermination des sources à utiliser
|
||||
$sources = $this->getSourcesToTry($manga);
|
||||
if (empty($sources)) {
|
||||
throw new \InvalidArgumentException("No sources available for scraping");
|
||||
}
|
||||
|
||||
// 4. Essai de scraping sur chaque source jusqu'à succès
|
||||
$success = false;
|
||||
$lastException = null;
|
||||
|
||||
foreach ($sources as $source) {
|
||||
$job = new ScrapingJob(
|
||||
Uuid::uuid4()->toString(),
|
||||
$command->mangaId,
|
||||
$command->chapterNumber,
|
||||
$command->sourceId
|
||||
$chapter->mangaId,
|
||||
$chapter->chapterNumber,
|
||||
$source->getId()->getValue()
|
||||
);
|
||||
$job->start();
|
||||
$this->jobRepository->save($job);
|
||||
|
||||
// 2. Nouvelle transaction pour le reste des opérations
|
||||
$this->entityManager->beginTransaction();
|
||||
try {
|
||||
// Préparation des données
|
||||
$manga = $this->mangaRepository->getById($command->mangaId);
|
||||
$chapter = $this->chapterRepository->getByMangaIdAndChapterNumber($command->mangaId, $command->chapterNumber);
|
||||
$source = $this->sourceRepository->getById($command->sourceId);
|
||||
$this->entityManager->beginTransaction();
|
||||
|
||||
// 3. Scraping des URLs
|
||||
// 5. Scraping des URLs
|
||||
$scrapingRequest = new ScrapingRequest(
|
||||
'html',
|
||||
$source->buildChapterUrl($manga->getSlug(), $command->chapterNumber),
|
||||
$source->buildChapterUrl($manga->getSlug(), $chapter->chapterNumber),
|
||||
$source->getScrappingParameters()
|
||||
);
|
||||
|
||||
$scrapingResult = $this->scraper->scrape($scrapingRequest);
|
||||
|
||||
// 4. Téléchargement des images
|
||||
// 6. Téléchargement des images
|
||||
$tempDir = new TempDirectory();
|
||||
$downloadResults = $this->imageDownloader->downloadBatch(
|
||||
$scrapingResult->getImageUrls(),
|
||||
@@ -74,19 +92,19 @@ readonly class ScrapeChapterHandler
|
||||
$job->id
|
||||
);
|
||||
|
||||
// 5. Génération du CBZ
|
||||
// 7. Génération du CBZ
|
||||
$cbzRequest = new CbzGenerationRequest(
|
||||
$manga->getTitle(),
|
||||
$manga->getPublicationYear(),
|
||||
$chapter->volumeNumber,
|
||||
$command->chapterNumber,
|
||||
$chapter->chapterNumber,
|
||||
$tempDir,
|
||||
array_map(fn($r) => $r->getLocalPath(), $downloadResults)
|
||||
);
|
||||
|
||||
$cbzPath = $this->cbzGenerator->generate($cbzRequest);
|
||||
|
||||
// 6. Mise à jour et sauvegarde
|
||||
// 8. Mise à jour et sauvegarde
|
||||
$chapter->cbzPath = $cbzPath->getPath();
|
||||
$this->chapterRepository->save($chapter);
|
||||
|
||||
@@ -97,18 +115,66 @@ readonly class ScrapeChapterHandler
|
||||
|
||||
$this->eventBus->dispatch(new ChapterScraped($job->id));
|
||||
|
||||
// 7. Nettoyage
|
||||
// 9. Nettoyage
|
||||
$tempDir->cleanup();
|
||||
|
||||
// Scraping réussi, pas besoin d'essayer d'autres sources
|
||||
$success = true;
|
||||
break;
|
||||
|
||||
} catch (\Exception $e) {
|
||||
$this->entityManager->rollback();
|
||||
throw $e;
|
||||
|
||||
if (isset($job)) {
|
||||
$job->fail($e->getMessage());
|
||||
$this->jobRepository->save($job);
|
||||
}
|
||||
|
||||
$lastException = $e;
|
||||
|
||||
// Continuer avec la source suivante
|
||||
}
|
||||
}
|
||||
|
||||
// Si toutes les sources ont échoué
|
||||
if (!$success) {
|
||||
$errorMessage = $lastException ? $lastException->getMessage() : "Failed to scrape chapter from all available sources";
|
||||
$this->eventBus->dispatch(new ChapterScrapingFailed($chapter->mangaId, $chapter->chapterNumber, $errorMessage));
|
||||
}
|
||||
|
||||
} catch (\Exception $e) {
|
||||
if (isset($job)) {
|
||||
$job->fail($e->getMessage());
|
||||
$this->jobRepository->save($job);
|
||||
}
|
||||
$this->eventBus->dispatch(new ChapterScrapingFailed($command->mangaId, $command->chapterNumber, $e->getMessage()));
|
||||
$this->eventBus->dispatch(new ChapterScrapingFailed($chapter->mangaId ?? 'unknown', $chapter->chapterNumber ?? 'unknown', $e->getMessage()));
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Détermine les sources à utiliser pour le scraping en fonction des préférences du manga
|
||||
*
|
||||
* @param \App\Domain\Scraping\Domain\Model\Manga $manga
|
||||
* @return Source[]
|
||||
*/
|
||||
private function getSourcesToTry(\App\Domain\Scraping\Domain\Model\Manga $manga): array
|
||||
{
|
||||
// Si le manga a des sources préférées, les utiliser
|
||||
if ($manga->hasPreferredSources()) {
|
||||
$preferredSources = [];
|
||||
foreach ($manga->getPreferredSources() as $sourceId) {
|
||||
$source = $this->sourceRepository->getById($sourceId);
|
||||
if ($source) {
|
||||
$preferredSources[] = $source;
|
||||
}
|
||||
}
|
||||
|
||||
if (!empty($preferredSources)) {
|
||||
return $preferredSources;
|
||||
}
|
||||
}
|
||||
|
||||
// Sinon, utiliser toutes les sources disponibles
|
||||
return $this->sourceRepository->getAll();
|
||||
}
|
||||
}
|
||||
|
||||
@@ -6,6 +6,7 @@ use App\Domain\Scraping\Domain\Model\Chapter;
|
||||
|
||||
interface ChapterRepositoryInterface
|
||||
{
|
||||
public function getById(string $id): ?Chapter;
|
||||
public function getByMangaIdAndChapterNumber(string $mangaId, int $chapterNumber): Chapter;
|
||||
public function save(Chapter $chapter): void;
|
||||
}
|
||||
|
||||
@@ -7,4 +7,9 @@ use App\Domain\Scraping\Domain\Model\Source;
|
||||
interface SourceRepositoryInterface
|
||||
{
|
||||
public function getById(string $id): ?Source;
|
||||
|
||||
/**
|
||||
* @return Source[]
|
||||
*/
|
||||
public function getAll(): array;
|
||||
}
|
||||
|
||||
@@ -4,6 +4,9 @@ namespace App\Domain\Scraping\Domain\Model;
|
||||
|
||||
class Manga
|
||||
{
|
||||
/**
|
||||
* @param string[] $preferredSources
|
||||
*/
|
||||
public function __construct(
|
||||
private readonly string $id,
|
||||
private readonly string $title,
|
||||
@@ -11,6 +14,7 @@ class Manga
|
||||
private readonly string $description,
|
||||
private readonly string $author,
|
||||
private readonly string $publicationYear,
|
||||
private readonly array $preferredSources = [],
|
||||
) {
|
||||
}
|
||||
|
||||
@@ -43,4 +47,20 @@ class Manga
|
||||
{
|
||||
return $this->publicationYear;
|
||||
}
|
||||
|
||||
/**
|
||||
* @return string[]
|
||||
*/
|
||||
public function getPreferredSources(): array
|
||||
{
|
||||
return $this->preferredSources;
|
||||
}
|
||||
|
||||
/**
|
||||
* @return bool
|
||||
*/
|
||||
public function hasPreferredSources(): bool
|
||||
{
|
||||
return !empty($this->preferredSources);
|
||||
}
|
||||
}
|
||||
|
||||
@@ -21,15 +21,9 @@ use Symfony\Component\Validator\Constraints as Assert;
|
||||
readonly class ScrapeChapterRequest
|
||||
{
|
||||
public function __construct(
|
||||
#[ApiProperty(description: 'ID du manga')]
|
||||
#[ApiProperty(description: 'ID du chapitre à scraper')]
|
||||
#[Assert\NotBlank]
|
||||
public string $mangaId,
|
||||
#[ApiProperty(description: 'Numéro du chapitre')]
|
||||
#[Assert\NotBlank]
|
||||
public string $chapterNumber,
|
||||
#[ApiProperty(description: 'ID de la source')]
|
||||
#[Assert\NotBlank]
|
||||
public string $sourceId,
|
||||
public string $chapterId,
|
||||
) {
|
||||
}
|
||||
}
|
||||
|
||||
@@ -22,9 +22,7 @@ final class ScrapeChapterStateProcessor implements ProcessorInterface
|
||||
{
|
||||
$this->commandBus->dispatch(
|
||||
new ScrapeChapter(
|
||||
$data->mangaId,
|
||||
$data->chapterNumber,
|
||||
$data->sourceId
|
||||
$data->chapterId
|
||||
)
|
||||
);
|
||||
}
|
||||
|
||||
@@ -13,6 +13,26 @@ readonly class LegacyChapterRepository implements ChapterRepositoryInterface
|
||||
private EntityManagerInterface $entityManager,
|
||||
) {}
|
||||
|
||||
/**
|
||||
* Récupère un chapitre par son identifiant
|
||||
*/
|
||||
public function getById(string $id): ?Chapter
|
||||
{
|
||||
$chapterEntity = $this->entityManager->getRepository(EntityChapter::class)->find($id);
|
||||
|
||||
if (!$chapterEntity) {
|
||||
return null;
|
||||
}
|
||||
|
||||
return new Chapter(
|
||||
id: $chapterEntity->getId(),
|
||||
mangaId: $chapterEntity->getManga()->getId(),
|
||||
chapterNumber: $chapterEntity->getNumber(),
|
||||
volumeNumber: $chapterEntity->getVolume(),
|
||||
cbzPath: $chapterEntity->getCbzPath(),
|
||||
);
|
||||
}
|
||||
|
||||
/**
|
||||
* @throws ChapterNotFoundException
|
||||
*/
|
||||
@@ -32,6 +52,7 @@ readonly class LegacyChapterRepository implements ChapterRepositoryInterface
|
||||
mangaId: $chapterEntity->getManga()->getId(),
|
||||
chapterNumber: $chapterEntity->getNumber(),
|
||||
volumeNumber: $chapterEntity->getVolume(),
|
||||
cbzPath: $chapterEntity->getCbzPath(),
|
||||
);
|
||||
}
|
||||
|
||||
|
||||
@@ -19,13 +19,24 @@ readonly class LegacyMangaRepository implements MangaRepositoryInterface
|
||||
/** @var EntityManga|null $mangaEntity */
|
||||
$mangaEntity = $this->entityManager->getRepository(EntityManga::class)->find($id);
|
||||
|
||||
return $mangaEntity ? new Manga(
|
||||
if (!$mangaEntity) {
|
||||
return null;
|
||||
}
|
||||
|
||||
// Récupération des sources préférées
|
||||
$preferredSourceIds = [];
|
||||
foreach ($mangaEntity->getPreferredSources() as $source) {
|
||||
$preferredSourceIds[] = $source->getId();
|
||||
}
|
||||
|
||||
return new Manga(
|
||||
$mangaEntity->getId(),
|
||||
$mangaEntity->getTitle(),
|
||||
$mangaEntity->getSlug(),
|
||||
$mangaEntity->getDescription(),
|
||||
$mangaEntity->getAuthor(),
|
||||
$mangaEntity->getPublicationYear(),
|
||||
) : null;
|
||||
$mangaEntity->getDescription() ?? '',
|
||||
$mangaEntity->getAuthor() ?? '',
|
||||
$mangaEntity->getPublicationYear() ?? '',
|
||||
$preferredSourceIds,
|
||||
);
|
||||
}
|
||||
}
|
||||
|
||||
@@ -46,4 +46,43 @@ readonly class LegacySourceRepository implements SourceRepositoryInterface
|
||||
updatedAt: new DateTimeImmutable()
|
||||
);
|
||||
}
|
||||
|
||||
/**
|
||||
* @return Source[]
|
||||
*/
|
||||
public function getAll(): array
|
||||
{
|
||||
/** @var ContentSource[] $sourceEntities */
|
||||
$sourceEntities = $this->entityManager->getRepository(ContentSource::class)->findAll();
|
||||
|
||||
$sources = [];
|
||||
foreach ($sourceEntities as $sourceEntity) {
|
||||
$sources[] = $this->convertEntityToModel($sourceEntity);
|
||||
}
|
||||
|
||||
return $sources;
|
||||
}
|
||||
|
||||
/**
|
||||
* Convertit une entité ContentSource en modèle Source
|
||||
*/
|
||||
private function convertEntityToModel(ContentSource $source): Source
|
||||
{
|
||||
return new Source(
|
||||
id: new SourceId($source->getId()),
|
||||
name: $source->getCleanBaseUrl(),
|
||||
description: 'Legacy Source: ' . $source->getBaseUrl(),
|
||||
baseUrl: $source->getBaseUrl(),
|
||||
scrappingParameters: [
|
||||
'imageSelector' => $source->getImageSelector(),
|
||||
'nextPageSelector' => $source->getNextPageSelector(),
|
||||
'chapterUrlFormat' => $source->getChapterUrlFormat(),
|
||||
'scrapingType' => $source->getScrapingType(),
|
||||
'chapterSelector' => $source->getChapterSelector()
|
||||
],
|
||||
isActive: true,
|
||||
createdAt: new DateTimeImmutable(),
|
||||
updatedAt: new DateTimeImmutable()
|
||||
);
|
||||
}
|
||||
}
|
||||
|
||||
@@ -8,6 +8,12 @@ use App\Domain\Scraping\Domain\Model\Chapter;
|
||||
class InMemoryChapterRepository implements ChapterRepositoryInterface
|
||||
{
|
||||
private array $chapters = [];
|
||||
|
||||
public function getById(string $id): ?Chapter
|
||||
{
|
||||
return $this->chapters[$id] ?? null;
|
||||
}
|
||||
|
||||
public function getByMangaIdAndChapterNumber(string $mangaId, int $chapterNumber): Chapter
|
||||
{
|
||||
foreach ($this->chapters as $chapter) {
|
||||
|
||||
@@ -16,19 +16,27 @@ class InMemoryMangaRepository implements MangaRepositoryInterface
|
||||
'test-manga',
|
||||
'Test Manga',
|
||||
'test-manga',
|
||||
'2024',
|
||||
'A test manga description',
|
||||
'Test Author',
|
||||
'A test manga description'
|
||||
'2024',
|
||||
[] // Pas de sources préférées par défaut
|
||||
);
|
||||
|
||||
// Ajoute un manga avec des sources préférées pour les tests
|
||||
$this->mangas['test-manga-with-sources'] = new Manga(
|
||||
'test-manga-with-sources',
|
||||
'Test Manga With Sources',
|
||||
'test-manga-with-sources',
|
||||
'A test manga with preferred sources',
|
||||
'Test Author',
|
||||
'2024',
|
||||
['test-source'] // Une source préférée
|
||||
);
|
||||
}
|
||||
|
||||
public function getById(string $id): Manga
|
||||
public function getById(string $id): ?Manga
|
||||
{
|
||||
if (!isset($this->mangas[$id])) {
|
||||
throw new \RuntimeException('Manga not found');
|
||||
}
|
||||
|
||||
return $this->mangas[$id];
|
||||
return $this->mangas[$id] ?? null;
|
||||
}
|
||||
|
||||
public function save(Manga $manga): void
|
||||
|
||||
@@ -22,7 +22,9 @@ class InMemorySourceRepository implements SourceRepositoryInterface
|
||||
[
|
||||
'imageSelector' => 'img.manga-image',
|
||||
'nextPageSelector' => null,
|
||||
'chapterUrlFormat' => 'https://example.com/manga/{slug}/chapter-{chapterNumber}'
|
||||
'chapterUrlFormat' => 'https://example.com/manga/{slug}/chapter-{chapterNumber}',
|
||||
'scrapingType' => 'html',
|
||||
'chapterSelector' => '.chapter-item'
|
||||
],
|
||||
true,
|
||||
new DateTimeImmutable(),
|
||||
@@ -30,13 +32,17 @@ class InMemorySourceRepository implements SourceRepositoryInterface
|
||||
);
|
||||
}
|
||||
|
||||
public function getById(string $id): Source
|
||||
public function getById(string $id): ?Source
|
||||
{
|
||||
if (!isset($this->sources[$id])) {
|
||||
throw new \RuntimeException('Source not found');
|
||||
return $this->sources[$id] ?? null;
|
||||
}
|
||||
|
||||
return $this->sources[$id];
|
||||
/**
|
||||
* @return Source[]
|
||||
*/
|
||||
public function getAll(): array
|
||||
{
|
||||
return array_values($this->sources);
|
||||
}
|
||||
|
||||
public function save(Source $source): void
|
||||
|
||||
@@ -74,9 +74,7 @@ class ScrapeChapterHandlerTest extends TestCase
|
||||
public function testHandleSuccessfully(): void
|
||||
{
|
||||
$command = new ScrapeChapter(
|
||||
mangaId: 'test-manga',
|
||||
chapterNumber: '2',
|
||||
sourceId: 'test-source'
|
||||
chapterId: '1'
|
||||
);
|
||||
|
||||
$this->handler->handle($command);
|
||||
@@ -90,16 +88,14 @@ class ScrapeChapterHandlerTest extends TestCase
|
||||
$this->assertInstanceOf(ChapterScraped::class, $dispatchedMessages[0]);
|
||||
$this->assertEquals($job->id, $dispatchedMessages[0]->getJobId());
|
||||
|
||||
$chapter = $this->chapterRepository->getByMangaIdAndChapterNumber('test-manga', 2);
|
||||
$chapter = $this->chapterRepository->getById('1');
|
||||
$this->assertNotNull($chapter->cbzPath);
|
||||
}
|
||||
|
||||
public function testHandleThrowsException(): void
|
||||
{
|
||||
$command = new ScrapeChapter(
|
||||
mangaId: 'test-manga',
|
||||
chapterNumber: '2',
|
||||
sourceId: 'test-source'
|
||||
chapterId: '1'
|
||||
);
|
||||
|
||||
$exception = new \Exception('Scraping failed');
|
||||
|
||||
@@ -23,9 +23,7 @@ class ScrapeChapterTest extends AbstractApiTestCase
|
||||
{
|
||||
// Given
|
||||
$payload = [
|
||||
'chapterNumber' => 'chapter-123',
|
||||
'sourceId' => 'source-456',
|
||||
'mangaId' => 'manga-789',
|
||||
'chapterId' => 'chapter-123',
|
||||
];
|
||||
|
||||
// When
|
||||
@@ -43,15 +41,14 @@ class ScrapeChapterTest extends AbstractApiTestCase
|
||||
/** @var ScrapeChapter $message */
|
||||
$message = $messages[0];
|
||||
$this->assertInstanceOf(ScrapeChapter::class, $message);
|
||||
$this->assertEquals('chapter-123', $message->chapterId);
|
||||
}
|
||||
|
||||
public function testInitiateChapterScrapingWithInvalidPayload(): void
|
||||
{
|
||||
// Given
|
||||
$payload = [
|
||||
'chapterNumber' => '',
|
||||
'sourceId' => 'source-456',
|
||||
'mangaId' => 'manga-789',
|
||||
'chapterId' => '',
|
||||
];
|
||||
|
||||
// When
|
||||
@@ -65,7 +62,7 @@ class ScrapeChapterTest extends AbstractApiTestCase
|
||||
$this->assertJsonContains([
|
||||
'violations' => [
|
||||
[
|
||||
'propertyPath' => 'chapterNumber',
|
||||
'propertyPath' => 'chapterId',
|
||||
'message' => 'This value should not be blank.',
|
||||
],
|
||||
],
|
||||
|
||||
Reference in New Issue
Block a user