feat: finalizing Scraping endpoint
This commit is contained in:
parent
0374ab0e46
commit
073439163b
@@ -96,3 +96,7 @@ services:
|
||||
App\Domain\Scraping\Infrastructure\Handler\SymfonyScrapeChapterHandler:
|
||||
tags:
|
||||
- { name: messenger.message_handler, bus: command.bus }
|
||||
|
||||
App\Domain\Scraping\Infrastructure\Service\CbzGenerator:
|
||||
arguments:
|
||||
$projectDir: '%kernel.project_dir%'
|
||||
|
||||
@@ -12,4 +12,14 @@ services:
|
||||
class: 'App\Tests\Domain\Scraping\Adapter\InMemoryScrapingJobRepository'
|
||||
public: true
|
||||
|
||||
App\Domain\Scraping\Domain\Contract\Service\ImageDownloaderInterface:
|
||||
class: 'App\Tests\Domain\Scraping\Adapter\InMemoryImageDownloader'
|
||||
public: true
|
||||
|
||||
App\Domain\Scraping\Domain\Contract\Service\CbzGeneratorInterface:
|
||||
class: 'App\Tests\Domain\Scraping\Adapter\InMemoryCbzGenerator'
|
||||
arguments:
|
||||
$projectDir: '%kernel.project_dir%'
|
||||
public: true
|
||||
|
||||
|
||||
|
||||
34
migrations/Version20250210154832.php
Normal file
34
migrations/Version20250210154832.php
Normal file
@@ -0,0 +1,34 @@
|
||||
<?php
|
||||
|
||||
declare(strict_types=1);
|
||||
|
||||
namespace DoctrineMigrations;
|
||||
|
||||
use Doctrine\DBAL\Schema\Schema;
|
||||
use Doctrine\Migrations\AbstractMigration;
|
||||
|
||||
/**
|
||||
* Auto-generated Migration: Please modify to your needs!
|
||||
*/
|
||||
final class Version20250210154832 extends AbstractMigration
|
||||
{
|
||||
public function getDescription(): string
|
||||
{
|
||||
return '';
|
||||
}
|
||||
|
||||
public function up(Schema $schema): void
|
||||
{
|
||||
// this up() migration is auto-generated, please modify it to your needs
|
||||
$this->addSql('ALTER TABLE scraping_jobs ADD cbz_path VARCHAR(255) DEFAULT NULL');
|
||||
$this->addSql('ALTER TABLE scraping_jobs ADD failure_reason VARCHAR(255) DEFAULT NULL');
|
||||
}
|
||||
|
||||
public function down(Schema $schema): void
|
||||
{
|
||||
// this down() migration is auto-generated, please modify it to your needs
|
||||
$this->addSql('CREATE SCHEMA public');
|
||||
$this->addSql('ALTER TABLE scraping_jobs DROP cbz_path');
|
||||
$this->addSql('ALTER TABLE scraping_jobs DROP failure_reason');
|
||||
}
|
||||
}
|
||||
@@ -5,9 +5,11 @@ namespace App\Domain\Scraping\Application\CommandHandler;
|
||||
use App\Domain\Scraping\Application\Command\ScrapeChapter;
|
||||
use App\Domain\Scraping\Domain\Contract\Repository\ScrapingJobRepositoryInterface;
|
||||
use App\Domain\Scraping\Domain\Contract\Service\ScraperInterface;
|
||||
use App\Domain\Scraping\Domain\Event\ChapterScraped;
|
||||
use App\Domain\Scraping\Domain\Event\ChapterScrapingFailed;
|
||||
use App\Domain\Scraping\Domain\Event\ChapterScrapingStarted;
|
||||
use App\Domain\Scraping\Domain\Model\ScrapingJob;
|
||||
use App\Domain\Scraping\Domain\Model\ScrapingStatus;
|
||||
use Ramsey\Uuid\Uuid;
|
||||
use Symfony\Component\Messenger\MessageBusInterface;
|
||||
|
||||
@@ -34,7 +36,15 @@ readonly class ScrapeChapterHandler
|
||||
|
||||
$this->eventBus->dispatch(new ChapterScrapingStarted($job->getId()));
|
||||
|
||||
$this->scraper->scrape($job);
|
||||
$job = $this->scraper->scrape($job);
|
||||
|
||||
if($job->status === ScrapingStatus::FAILED) {
|
||||
$this->eventBus->dispatch(new ChapterScrapingFailed($command->mangaId, $command->chapterNumber, $job->failureReason));
|
||||
}elseif ($job->status === ScrapingStatus::COMPLETED) {
|
||||
$this->eventBus->dispatch(new ChapterScraped($job->getId()));
|
||||
}
|
||||
|
||||
$this->scrapingJobRepository->save($job);
|
||||
} catch (\Exception $e) {
|
||||
$this->eventBus->dispatch(new ChapterScrapingFailed($command->mangaId, $command->chapterNumber, $e->getMessage()));
|
||||
throw $e;
|
||||
|
||||
@@ -0,0 +1,10 @@
|
||||
<?php
|
||||
|
||||
namespace App\Domain\Scraping\Domain\Contract\Repository;
|
||||
|
||||
use App\Domain\Scraping\Domain\Model\Chapter;
|
||||
|
||||
interface ChapterRepositoryInterface
|
||||
{
|
||||
public function getByMangaIdAndChapterNumber(string $mangaId, int $chapterNumber): Chapter;
|
||||
}
|
||||
@@ -0,0 +1,12 @@
|
||||
<?php
|
||||
|
||||
namespace App\Domain\Scraping\Domain\Contract\Service;
|
||||
|
||||
use App\Domain\Scraping\Domain\Model\ScrapingJob;
|
||||
use App\Domain\Scraping\Domain\Model\ValueObject\CbzPath;
|
||||
use App\Domain\Scraping\Domain\Model\ValueObject\TempDirectory;
|
||||
|
||||
interface CbzGeneratorInterface
|
||||
{
|
||||
public function generate(ScrapingJob $job, TempDirectory $tempDirectory): CbzPath;
|
||||
}
|
||||
@@ -2,7 +2,7 @@
|
||||
|
||||
namespace App\Domain\Scraping\Domain\Contract\Service;
|
||||
|
||||
interface ImageDownloader
|
||||
interface ImageDownloaderInterface
|
||||
{
|
||||
public function download(string $url, string $destination): void;
|
||||
}
|
||||
@@ -6,6 +6,6 @@ use App\Domain\Scraping\Domain\Model\ScrapingJob;
|
||||
|
||||
interface ScraperInterface
|
||||
{
|
||||
public function scrape(ScrapingJob $job): void;
|
||||
public function scrape(ScrapingJob $job): ScrapingJob;
|
||||
public function supports(string $sourceType): bool;
|
||||
}
|
||||
|
||||
14
src/Domain/Scraping/Domain/Event/ChapterScraped.php
Normal file
14
src/Domain/Scraping/Domain/Event/ChapterScraped.php
Normal file
@@ -0,0 +1,14 @@
|
||||
<?php
|
||||
|
||||
namespace App\Domain\Scraping\Domain\Event;
|
||||
|
||||
class ChapterScraped
|
||||
{
|
||||
|
||||
/**
|
||||
* @param string $getId
|
||||
*/
|
||||
public function __construct(string $getId)
|
||||
{
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,21 @@
|
||||
<?php
|
||||
|
||||
namespace App\Domain\Scraping\Domain\Exception;
|
||||
|
||||
class CbzGenerationException extends \RuntimeException
|
||||
{
|
||||
public static function unableToCreateDirectory(string $path): self
|
||||
{
|
||||
return new self(sprintf('Impossible de créer le répertoire : %s', $path));
|
||||
}
|
||||
|
||||
public static function unableToCreateCbz(string $path): self
|
||||
{
|
||||
return new self(sprintf('Impossible de créer le fichier CBZ : %s', $path));
|
||||
}
|
||||
|
||||
public static function unableToAddFileToArchive(string $filePath): self
|
||||
{
|
||||
return new self(sprintf('Impossible d\'ajouter le fichier à l\'archive : %s', $filePath));
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,11 @@
|
||||
<?php
|
||||
|
||||
namespace App\Domain\Scraping\Domain\Exception;
|
||||
|
||||
class ChapterNotFoundException extends \Exception
|
||||
{
|
||||
public function __construct()
|
||||
{
|
||||
parent::__construct('Chapter not found');
|
||||
}
|
||||
}
|
||||
13
src/Domain/Scraping/Domain/Model/Chapter.php
Normal file
13
src/Domain/Scraping/Domain/Model/Chapter.php
Normal file
@@ -0,0 +1,13 @@
|
||||
<?php
|
||||
|
||||
namespace App\Domain\Scraping\Domain\Model;
|
||||
|
||||
class Chapter
|
||||
{
|
||||
public function __construct(
|
||||
public readonly string $id,
|
||||
public readonly string $mangaId,
|
||||
public readonly int $chapterNumber,
|
||||
public readonly int $volumeNumber,
|
||||
) {}
|
||||
}
|
||||
@@ -10,6 +10,7 @@ class Manga
|
||||
private readonly string $slug,
|
||||
private readonly string $description,
|
||||
private readonly string $author,
|
||||
private readonly string $publicationYear,
|
||||
) {
|
||||
}
|
||||
|
||||
@@ -37,4 +38,9 @@ class Manga
|
||||
{
|
||||
return $this->author;
|
||||
}
|
||||
|
||||
public function getPublicationYear(): string
|
||||
{
|
||||
return $this->publicationYear;
|
||||
}
|
||||
}
|
||||
|
||||
@@ -2,16 +2,19 @@
|
||||
|
||||
namespace App\Domain\Scraping\Domain\Model;
|
||||
|
||||
use App\Domain\Scraping\Domain\Model\ValueObject\CbzPath;
|
||||
use App\Domain\Scraping\Domain\Model\ValueObject\ImageUrl;
|
||||
use App\Domain\Scraping\Domain\Model\ValueObject\PageNumber;
|
||||
|
||||
class ScrapingJob
|
||||
{
|
||||
private array $pages = [];
|
||||
private int $totalPages = 0;
|
||||
private ScrapingStatus $status;
|
||||
private \DateTimeImmutable $createdAt;
|
||||
private ?\DateTimeImmutable $completedAt = null;
|
||||
public array $pages = [];
|
||||
public int $totalPages = 0;
|
||||
public ScrapingStatus $status;
|
||||
public ?CbzPath $cbzPath = null;
|
||||
public string $failureReason = '';
|
||||
public \DateTimeImmutable $createdAt;
|
||||
public ?\DateTimeImmutable $completedAt = null;
|
||||
|
||||
public function __construct(
|
||||
private readonly string $id,
|
||||
@@ -37,8 +40,9 @@ class ScrapingJob
|
||||
$this->completedAt = new \DateTimeImmutable();
|
||||
}
|
||||
|
||||
public function fail(): void
|
||||
public function fail(string $exceptionMessage): void
|
||||
{
|
||||
$this->failureReason = $exceptionMessage;
|
||||
$this->status = ScrapingStatus::FAILED;
|
||||
$this->completedAt = new \DateTimeImmutable();
|
||||
}
|
||||
@@ -63,28 +67,8 @@ class ScrapingJob
|
||||
return $this->sourceId;
|
||||
}
|
||||
|
||||
public function getPages(): array
|
||||
public function setStatus(ScrapingStatus $status): void
|
||||
{
|
||||
return $this->pages;
|
||||
}
|
||||
|
||||
public function getTotalPages(): int
|
||||
{
|
||||
return $this->totalPages;
|
||||
}
|
||||
|
||||
public function getStatus(): ScrapingStatus
|
||||
{
|
||||
return $this->status;
|
||||
}
|
||||
|
||||
public function getCreatedAt(): \DateTimeImmutable
|
||||
{
|
||||
return $this->createdAt;
|
||||
}
|
||||
|
||||
public function getCompletedAt(): ?\DateTimeImmutable
|
||||
{
|
||||
return $this->completedAt;
|
||||
$this->status = $status;
|
||||
}
|
||||
}
|
||||
|
||||
18
src/Domain/Scraping/Domain/Model/ValueObject/CbzPath.php
Normal file
18
src/Domain/Scraping/Domain/Model/ValueObject/CbzPath.php
Normal file
@@ -0,0 +1,18 @@
|
||||
<?php
|
||||
|
||||
namespace App\Domain\Scraping\Domain\Model\ValueObject;
|
||||
|
||||
class CbzPath
|
||||
{
|
||||
public function __construct(private readonly string $path)
|
||||
{
|
||||
if (empty($path)) {
|
||||
throw new \InvalidArgumentException('Le chemin du fichier CBZ ne peut pas être vide');
|
||||
}
|
||||
}
|
||||
|
||||
public function getPath(): string
|
||||
{
|
||||
return $this->path;
|
||||
}
|
||||
}
|
||||
@@ -24,13 +24,13 @@ final readonly class ScrapingStatusStateProvider implements ProviderInterface
|
||||
}
|
||||
|
||||
$progress = 0;
|
||||
if ($job->getTotalPages() > 0) {
|
||||
$progress = (count($job->getPages()) / $job->getTotalPages()) * 100;
|
||||
if ($job->totalPages > 0) {
|
||||
$progress = (count($job->pages) / $job->totalPages) * 100;
|
||||
}
|
||||
|
||||
return new ScrapingStatusResponse(
|
||||
jobId: $job->getId(),
|
||||
status: $job->getStatus()->value,
|
||||
status: $job->status->value,
|
||||
progress: $progress
|
||||
);
|
||||
}
|
||||
|
||||
@@ -17,8 +17,19 @@ readonly class DoctrineScrapingJobRepository implements ScrapingJobRepositoryInt
|
||||
|
||||
public function save(ScrapingJob $job): void
|
||||
{
|
||||
$entity = ScrapingJobEntity::fromDomain($job);
|
||||
$this->entityManager->persist($entity);
|
||||
$existingEntity = $this->entityManager->getRepository(ScrapingJobEntity::class)->find($job->getId());
|
||||
|
||||
if ($existingEntity) {
|
||||
$existingEntity->setStatus($job->status->value);
|
||||
$existingEntity->setPages($job->pages);
|
||||
$existingEntity->setCompletedAt($job->completedAt);
|
||||
$existingEntity->setCbzPath($job->cbzPath?->getPath());
|
||||
$existingEntity->setFailureReason($job->failureReason);
|
||||
} else {
|
||||
$entity = ScrapingJobEntity::fromDomain($job);
|
||||
$this->entityManager->persist($entity);
|
||||
}
|
||||
|
||||
$this->entityManager->flush();
|
||||
}
|
||||
|
||||
|
||||
@@ -3,6 +3,7 @@
|
||||
namespace App\Domain\Scraping\Infrastructure\Persistence\Entity;
|
||||
|
||||
use App\Domain\Scraping\Domain\Model\ScrapingJob;
|
||||
use App\Domain\Scraping\Domain\Model\ScrapingStatus;
|
||||
use Doctrine\ORM\Mapping as ORM;
|
||||
|
||||
#[ORM\Entity]
|
||||
@@ -28,6 +29,12 @@ class ScrapingJobEntity
|
||||
#[ORM\Column(type: 'string')]
|
||||
private string $status;
|
||||
|
||||
#[ORM\Column(type: 'string', nullable: true)]
|
||||
private ?string $cbzPath = null;
|
||||
|
||||
#[ORM\Column(type: 'string', nullable: true)]
|
||||
private ?string $failureReason = '';
|
||||
|
||||
#[ORM\Column(type: 'datetime_immutable')]
|
||||
private \DateTimeImmutable $createdAt;
|
||||
|
||||
@@ -41,11 +48,12 @@ class ScrapingJobEntity
|
||||
$entity->chapterNumber = $job->getChapterNumber();
|
||||
$entity->mangaId = $job->getMangaId();
|
||||
$entity->sourceId = $job->getSourceId();
|
||||
$entity->pages = $job->getPages();
|
||||
$entity->status = $job->getStatus()->value;
|
||||
$entity->createdAt = $job->getCreatedAt();
|
||||
$entity->completedAt = $job->getCompletedAt();
|
||||
|
||||
$entity->pages = $job->pages;
|
||||
$entity->status = $job->status->value;
|
||||
$entity->createdAt = $job->createdAt;
|
||||
$entity->completedAt = $job->completedAt;
|
||||
$entity->cbzPath = $job->cbzPath?->getPath();
|
||||
$entity->failureReason = $job->failureReason;
|
||||
return $entity;
|
||||
}
|
||||
|
||||
@@ -58,6 +66,38 @@ class ScrapingJobEntity
|
||||
sourceId: $this->sourceId
|
||||
);
|
||||
|
||||
$job->status = ScrapingStatus::from($this->status);
|
||||
$job->pages = $this->pages;
|
||||
$job->createdAt = $this->createdAt;
|
||||
$job->completedAt = $this->completedAt;
|
||||
$job->cbzPath = $this->cbzPath;
|
||||
$job->failureReason = $this->failureReason;
|
||||
|
||||
return $job;
|
||||
}
|
||||
|
||||
public function setStatus(string $status): void
|
||||
{
|
||||
$this->status = $status;
|
||||
}
|
||||
|
||||
public function setPages(array $pages): void
|
||||
{
|
||||
$this->pages = $pages;
|
||||
}
|
||||
|
||||
public function setCompletedAt(\DateTimeImmutable $completedAt): void
|
||||
{
|
||||
$this->completedAt = $completedAt;
|
||||
}
|
||||
|
||||
public function setCbzPath(?string $cbzPath = null): void
|
||||
{
|
||||
$this->cbzPath = $cbzPath;
|
||||
}
|
||||
|
||||
public function setFailureReason(string $failureReason): void
|
||||
{
|
||||
$this->failureReason = $failureReason;
|
||||
}
|
||||
}
|
||||
|
||||
@@ -0,0 +1,34 @@
|
||||
<?php
|
||||
|
||||
namespace App\Domain\Scraping\Infrastructure\Persistence;
|
||||
|
||||
use App\Domain\Scraping\Domain\Contract\Repository\ChapterRepositoryInterface;
|
||||
use App\Domain\Scraping\Domain\Exception\ChapterNotFoundException;
|
||||
use App\Domain\Scraping\Domain\Model\Chapter;
|
||||
use App\Entity\Chapter as EntityChapter;
|
||||
use Doctrine\ORM\EntityManagerInterface;
|
||||
class LegacyChapterRepository implements ChapterRepositoryInterface
|
||||
{
|
||||
public function __construct(
|
||||
private readonly EntityManagerInterface $entityManager,
|
||||
) {}
|
||||
|
||||
public function getByMangaIdAndChapterNumber(string $mangaId, int $chapterNumber): Chapter
|
||||
{
|
||||
$chapterEntity = $this->entityManager->getRepository(EntityChapter::class)->findOneBy([
|
||||
'manga' => $mangaId,
|
||||
'number' => $chapterNumber,
|
||||
]);
|
||||
|
||||
if (!$chapterEntity) {
|
||||
throw new ChapterNotFoundException();
|
||||
}
|
||||
|
||||
return new Chapter(
|
||||
id: $chapterEntity->getId(),
|
||||
mangaId: $chapterEntity->getManga()->getId(),
|
||||
chapterNumber: $chapterEntity->getNumber(),
|
||||
volumeNumber: $chapterEntity->getVolume(),
|
||||
);
|
||||
}
|
||||
}
|
||||
@@ -25,6 +25,7 @@ readonly class LegacyMangaRepository implements MangaRepositoryInterface
|
||||
$mangaEntity->getSlug(),
|
||||
$mangaEntity->getDescription(),
|
||||
$mangaEntity->getAuthor(),
|
||||
$mangaEntity->getPublicationYear(),
|
||||
) : null;
|
||||
}
|
||||
}
|
||||
|
||||
104
src/Domain/Scraping/Infrastructure/Service/CbzGenerator.php
Normal file
104
src/Domain/Scraping/Infrastructure/Service/CbzGenerator.php
Normal file
@@ -0,0 +1,104 @@
|
||||
<?php
|
||||
|
||||
namespace App\Domain\Scraping\Infrastructure\Service;
|
||||
|
||||
use App\Domain\Scraping\Domain\Contract\Repository\ChapterRepositoryInterface;
|
||||
use App\Domain\Scraping\Domain\Contract\Repository\MangaRepositoryInterface;
|
||||
use App\Domain\Scraping\Domain\Model\Manga;
|
||||
use App\Domain\Scraping\Domain\Model\ScrapingJob;
|
||||
use App\Domain\Scraping\Domain\Model\ValueObject\CbzPath;
|
||||
use App\Domain\Scraping\Domain\Contract\Service\CbzGeneratorInterface;
|
||||
use App\Domain\Scraping\Domain\Model\Chapter;
|
||||
use App\Domain\Scraping\Domain\Model\ValueObject\TempDirectory;
|
||||
use App\Domain\Scraping\Domain\Exception\CbzGenerationException;
|
||||
use Exception;
|
||||
|
||||
readonly class CbzGenerator implements CbzGeneratorInterface
|
||||
{
|
||||
public function __construct(
|
||||
private string $projectDir,
|
||||
private MangaRepositoryInterface $mangaRepository,
|
||||
private ChapterRepositoryInterface $chapterRepository,
|
||||
) {}
|
||||
|
||||
/**
|
||||
* @throws Exception
|
||||
*/
|
||||
public function generate(ScrapingJob $job, TempDirectory $tempDirectory): CbzPath
|
||||
{
|
||||
$cbzPath = $this->generateCbzPath($job);
|
||||
$this->createCbzArchive($tempDirectory->getPath(), $cbzPath);
|
||||
|
||||
return new CbzPath($cbzPath);
|
||||
}
|
||||
|
||||
private function generateCbzPath(ScrapingJob $job): string
|
||||
{
|
||||
$manga = $this->mangaRepository->getById($job->getMangaId());
|
||||
$chapter = $this->chapterRepository->getByMangaIdAndChapterNumber($job->getMangaId(), $job->getChapterNumber());
|
||||
|
||||
$baseDir = sprintf(
|
||||
'%s/public/cbz/%s/%s',
|
||||
$this->projectDir,
|
||||
$manga->getTitle() . ' (' . $manga->getPublicationYear() . ')',
|
||||
sprintf('volume_%02d', $chapter->volumeNumber)
|
||||
);
|
||||
|
||||
try {
|
||||
if (!is_dir($baseDir)) {
|
||||
if (!mkdir($baseDir, 0755, true)) {
|
||||
throw new CbzGenerationException();
|
||||
}
|
||||
}
|
||||
} catch (Exception $e) {
|
||||
throw CbzGenerationException::unableToCreateDirectory($baseDir);
|
||||
}
|
||||
|
||||
$chapterNumber = $job->getChapterNumber();
|
||||
$formattedNumber = $chapterNumber == floor($chapterNumber)
|
||||
? sprintf('%02d', (int)$chapterNumber)
|
||||
: sprintf('%04.1f', $chapterNumber);
|
||||
|
||||
return sprintf(
|
||||
'%s/%s_vol%s_ch%s.cbz',
|
||||
$baseDir,
|
||||
strtolower($manga->getTitle()),
|
||||
sprintf('%02d', $chapter->volumeNumber),
|
||||
$formattedNumber
|
||||
);
|
||||
}
|
||||
|
||||
/**
|
||||
* @throws Exception
|
||||
*/
|
||||
private function createCbzArchive(string $sourceDirectory, string $destinationPath): void
|
||||
{
|
||||
$zip = new \ZipArchive();
|
||||
|
||||
if ($zip->open($destinationPath, \ZipArchive::CREATE) !== true) {
|
||||
throw CbzGenerationException::unableToCreateCbz($destinationPath);
|
||||
}
|
||||
|
||||
try {
|
||||
$files = new \RecursiveIteratorIterator(
|
||||
new \RecursiveDirectoryIterator($sourceDirectory),
|
||||
\RecursiveIteratorIterator::LEAVES_ONLY
|
||||
);
|
||||
|
||||
foreach ($files as $file) {
|
||||
if (!$file->isDir()) {
|
||||
$filePath = $file->getRealPath();
|
||||
$relativePath = substr($filePath, strlen($sourceDirectory) + 1);
|
||||
if (!$zip->addFile($filePath, $relativePath)) {
|
||||
throw CbzGenerationException::unableToAddFileToArchive($filePath);
|
||||
}
|
||||
}
|
||||
}
|
||||
} catch (Exception $e) {
|
||||
$zip->close();
|
||||
throw $e;
|
||||
}
|
||||
|
||||
$zip->close();
|
||||
}
|
||||
}
|
||||
@@ -3,7 +3,7 @@
|
||||
namespace App\Domain\Scraping\Infrastructure\Service;
|
||||
|
||||
use Symfony\Contracts\HttpClient\HttpClientInterface;
|
||||
use App\Domain\Scraping\Domain\Contract\Service\ImageDownloader as ImageDownloaderInterface;
|
||||
use App\Domain\Scraping\Domain\Contract\Service\ImageDownloaderInterface;
|
||||
|
||||
readonly class ImageDownloader implements ImageDownloaderInterface
|
||||
{
|
||||
|
||||
@@ -2,25 +2,25 @@
|
||||
|
||||
namespace App\Domain\Scraping\Infrastructure\Service\Scraper;
|
||||
|
||||
use App\Domain\Scraping\Domain\Contract\Service\ImageDownloaderInterface;
|
||||
use App\Domain\Scraping\Domain\Contract\Service\ScraperInterface;
|
||||
use App\Domain\Scraping\Domain\Event\PageScrapingProgressed;
|
||||
use App\Domain\Scraping\Domain\Model\ScrapingJob;
|
||||
use App\Domain\Scraping\Domain\Model\ScrapingProgress;
|
||||
use App\Domain\Scraping\Domain\Model\Source;
|
||||
use App\Domain\Scraping\Domain\Model\ValueObject\TempDirectory;
|
||||
use App\Domain\Scraping\Infrastructure\Service\ImageDownloader;
|
||||
use Symfony\Component\Messenger\MessageBusInterface;
|
||||
use Ramsey\Uuid\Uuid;
|
||||
|
||||
abstract class AbstractScraper implements ScraperInterface
|
||||
{
|
||||
public function __construct(
|
||||
protected readonly ImageDownloader $imageDownloader,
|
||||
protected readonly MessageBusInterface $eventBus
|
||||
protected ImageDownloaderInterface $imageDownloader,
|
||||
protected MessageBusInterface $eventBus
|
||||
) {
|
||||
}
|
||||
|
||||
abstract public function scrape(ScrapingJob $job): void;
|
||||
abstract public function scrape(ScrapingJob $job): ScrapingJob;
|
||||
|
||||
abstract protected function scrapePages(ScrapingJob $job, Source $source): array;
|
||||
|
||||
|
||||
@@ -9,26 +9,27 @@ use App\Domain\Scraping\Domain\Model\Source;
|
||||
use App\Domain\Scraping\Domain\Model\ValueObject\ImageUrl;
|
||||
use App\Domain\Scraping\Domain\Model\ValueObject\PageNumber;
|
||||
use App\Domain\Scraping\Domain\Contract\Repository\SourceRepositoryInterface;
|
||||
use App\Domain\Scraping\Domain\Contract\Service\CbzGeneratorInterface;
|
||||
use App\Domain\Scraping\Domain\Contract\Service\ImageDownloaderInterface;
|
||||
use App\Domain\Scraping\Domain\Model\ValueObject\ChapterUrl;
|
||||
use Symfony\Component\DomCrawler\Crawler;
|
||||
use Symfony\Contracts\HttpClient\HttpClientInterface;
|
||||
use Symfony\Component\Messenger\MessageBusInterface;
|
||||
use App\Domain\Scraping\Infrastructure\Service\ImageDownloader;
|
||||
|
||||
class HtmlScraper extends AbstractScraper
|
||||
{
|
||||
public function __construct(
|
||||
ImageDownloader $imageDownloader,
|
||||
MessageBusInterface $eventBus,
|
||||
private readonly HttpClientInterface $httpClient,
|
||||
ImageDownloaderInterface $imageDownloader,
|
||||
MessageBusInterface $eventBus,
|
||||
private readonly CbzGeneratorInterface $cbzGenerator,
|
||||
private readonly HttpClientInterface $httpClient,
|
||||
private readonly SourceRepositoryInterface $sourceRepository,
|
||||
private readonly MangaRepositoryInterface $mangaRepository,
|
||||
private readonly ScrapingJobRepositoryInterface $scrapingJobRepository,
|
||||
private readonly MangaRepositoryInterface $mangaRepository,
|
||||
) {
|
||||
parent::__construct($imageDownloader, $eventBus);
|
||||
}
|
||||
|
||||
public function scrape(ScrapingJob $job): void
|
||||
public function scrape(ScrapingJob $job): ScrapingJob
|
||||
{
|
||||
$sourceConfig = $this->sourceRepository->getById($job->getSourceId());
|
||||
$tempDir = $this->createTempDirectory();
|
||||
@@ -52,12 +53,14 @@ class HtmlScraper extends AbstractScraper
|
||||
$this->dispatchProgressEvent($job, $index + 1, count($pages));
|
||||
}
|
||||
|
||||
$cbzPath = $this->cbzGenerator->generate($job, $tempDir);
|
||||
|
||||
$job->cbzPath = $cbzPath;
|
||||
$job->complete();
|
||||
$this->scrapingJobRepository->save($job);
|
||||
return $job;
|
||||
} catch (\Exception $e) {
|
||||
$job->fail();
|
||||
$this->scrapingJobRepository->save($job);
|
||||
throw $e;
|
||||
$job->fail($e->getMessage());
|
||||
return $job;
|
||||
} finally {
|
||||
$this->cleanupTempFiles($tempDir);
|
||||
}
|
||||
|
||||
20
tests/Domain/Scraping/Adapter/InMemoryCbzGenerator.php
Normal file
20
tests/Domain/Scraping/Adapter/InMemoryCbzGenerator.php
Normal file
@@ -0,0 +1,20 @@
|
||||
<?php
|
||||
|
||||
namespace App\Tests\Domain\Scraping\Adapter;
|
||||
|
||||
use App\Domain\Scraping\Domain\Contract\Service\CbzGeneratorInterface;
|
||||
use App\Domain\Scraping\Domain\Model\ScrapingJob;
|
||||
use App\Domain\Scraping\Domain\Model\ValueObject\CbzPath;
|
||||
use App\Domain\Scraping\Domain\Model\ValueObject\TempDirectory;
|
||||
|
||||
readonly class InMemoryCbzGenerator implements CbzGeneratorInterface
|
||||
{
|
||||
public function __construct(private string $projectDir)
|
||||
{
|
||||
}
|
||||
|
||||
public function generate(ScrapingJob $job, TempDirectory $tempDirectory): CbzPath
|
||||
{
|
||||
return new CbzPath('test.cbz');
|
||||
}
|
||||
}
|
||||
@@ -10,11 +10,15 @@ class InMemoryScraperAdapter implements ScraperInterface
|
||||
{
|
||||
private ?\Exception $shouldThrowException = null;
|
||||
|
||||
public function scrape(ScrapingJob $job): void
|
||||
public function scrape(ScrapingJob $job): ScrapingJob
|
||||
{
|
||||
if ($this->shouldThrowException) {
|
||||
throw $this->shouldThrowException;
|
||||
$job->fail($this->shouldThrowException->getMessage());
|
||||
return $job;
|
||||
}
|
||||
|
||||
$job->complete();
|
||||
return $job;
|
||||
}
|
||||
|
||||
public function simulateError(\Exception $exception): void
|
||||
|
||||
@@ -12,23 +12,17 @@ class InMemoryScrapingJobRepository implements ScrapingJobRepositoryInterface
|
||||
|
||||
public function save(ScrapingJob $job): void
|
||||
{
|
||||
self::$jobs[] = $job;
|
||||
self::$jobs[$job->getId()] = $job;
|
||||
}
|
||||
|
||||
public function getJobs(): array
|
||||
{
|
||||
return self::$jobs;
|
||||
return array_values(self::$jobs);
|
||||
}
|
||||
|
||||
public function findById(string $id): ?ScrapingJob
|
||||
{
|
||||
foreach (self::$jobs as $job) {
|
||||
if ($job->getId() === $id) {
|
||||
return $job;
|
||||
}
|
||||
}
|
||||
|
||||
return null;
|
||||
return self::$jobs[$id] ?? null;
|
||||
}
|
||||
|
||||
public function findByChapterId(string $chapterId): ?ScrapingJob
|
||||
|
||||
@@ -4,8 +4,10 @@ namespace App\Tests\Domain\Scraping\Application\CommandHandler;
|
||||
|
||||
use App\Domain\Scraping\Application\Command\ScrapeChapter;
|
||||
use App\Domain\Scraping\Application\CommandHandler\ScrapeChapterHandler;
|
||||
use App\Domain\Scraping\Domain\Event\ChapterScraped;
|
||||
use App\Domain\Scraping\Domain\Event\ChapterScrapingFailed;
|
||||
use App\Domain\Scraping\Domain\Event\ChapterScrapingStarted;
|
||||
use App\Domain\Scraping\Domain\Model\ScrapingStatus;
|
||||
use App\Tests\Domain\Scraping\Adapter\InMemoryEventBus;
|
||||
use App\Tests\Domain\Scraping\Adapter\InMemoryScraperAdapter;
|
||||
use App\Tests\Domain\Scraping\Adapter\InMemoryScrapingJobRepository;
|
||||
@@ -41,17 +43,18 @@ class ScrapeChapterHandlerTest extends TestCase
|
||||
$this->handler->handle($command);
|
||||
|
||||
$scrapingJobs = $this->repository->getJobs();
|
||||
|
||||
|
||||
$this->assertCount(1, $scrapingJobs);
|
||||
$job = $scrapingJobs[0];
|
||||
|
||||
$savedJobs = $this->repository->getJobs();
|
||||
$this->assertCount(1, $savedJobs);
|
||||
$this->assertSame($job, $savedJobs[0]);
|
||||
|
||||
$dispatchedMessages = $this->eventBus->getDispatchedMessages();
|
||||
$this->assertCount(1, $dispatchedMessages);
|
||||
$this->assertCount(2, $dispatchedMessages);
|
||||
$this->assertInstanceOf(ChapterScrapingStarted::class, $dispatchedMessages[0]);
|
||||
$this->assertInstanceOf(ChapterScraped::class, $dispatchedMessages[1]);
|
||||
$this->assertEquals($job->getId(), $dispatchedMessages[0]->getJobId());
|
||||
|
||||
$this->repository->clear();
|
||||
}
|
||||
|
||||
public function testHandleThrowsException(): void
|
||||
@@ -65,18 +68,18 @@ class ScrapeChapterHandlerTest extends TestCase
|
||||
$exception = new \Exception('Scraping failed');
|
||||
$this->scraper->simulateError($exception);
|
||||
|
||||
$this->expectException(\Exception::class);
|
||||
$this->expectExceptionMessage('Scraping failed');
|
||||
$this->handler->handle($command);
|
||||
|
||||
try {
|
||||
$this->handler->handle($command);
|
||||
} finally {
|
||||
$dispatchedMessages = $this->eventBus->getDispatchedMessages();
|
||||
$this->assertCount(2, $dispatchedMessages);
|
||||
$this->assertInstanceOf(ChapterScrapingStarted::class, $dispatchedMessages[0]);
|
||||
$this->assertInstanceOf(ChapterScrapingFailed::class, $dispatchedMessages[1]);
|
||||
$this->assertEquals(2, $dispatchedMessages[1]->getChapterNumber());
|
||||
$this->assertEquals('Scraping failed', $dispatchedMessages[1]->getReason());
|
||||
}
|
||||
$dispatchedMessages = $this->eventBus->getDispatchedMessages();
|
||||
$this->assertCount(2, $dispatchedMessages);
|
||||
$this->assertInstanceOf(ChapterScrapingStarted::class, $dispatchedMessages[0]);
|
||||
$this->assertInstanceOf(ChapterScrapingFailed::class, $dispatchedMessages[1]);
|
||||
$this->assertEquals(2, $dispatchedMessages[1]->getChapterNumber());
|
||||
$this->assertEquals('Scraping failed', $dispatchedMessages[1]->getReason());
|
||||
|
||||
$jobs = $this->repository->getJobs();
|
||||
$this->assertCount(1, $jobs);
|
||||
$this->assertEquals(ScrapingStatus::FAILED, $jobs[0]->status);
|
||||
$this->assertEquals('Scraping failed', $jobs[0]->failureReason);
|
||||
}
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user