feat: ajout de la gestion des jobs avec création, récupération et filtrage via l'API, incluant des entités et des mappers pour les échecs et les jobs
This commit is contained in:
parent
d7088b14c2
commit
d7ccc1e603
@@ -17,8 +17,10 @@ use App\Domain\Scraping\Domain\Model\ScrapingJob;
|
||||
use App\Domain\Scraping\Domain\Model\ValueObject\CbzGenerationRequest;
|
||||
use App\Domain\Scraping\Domain\Model\ValueObject\ScrapingRequest;
|
||||
use App\Domain\Scraping\Domain\Model\ValueObject\TempDirectory;
|
||||
use App\Domain\Shared\Domain\Contract\JobRepositoryInterface;
|
||||
use Ramsey\Uuid\Uuid;
|
||||
use Symfony\Component\Messenger\MessageBusInterface;
|
||||
use Doctrine\ORM\EntityManagerInterface;
|
||||
|
||||
readonly class ScrapeChapterHandler
|
||||
{
|
||||
@@ -26,84 +28,91 @@ readonly class ScrapeChapterHandler
|
||||
private ScraperInterface $scraper,
|
||||
private ImageDownloaderInterface $imageDownloader,
|
||||
private CbzGeneratorInterface $cbzGenerator,
|
||||
private ScrapingJobRepositoryInterface $scrapingJobRepository,
|
||||
private JobRepositoryInterface $jobRepository,
|
||||
private ChapterRepositoryInterface $chapterRepository,
|
||||
private MangaRepositoryInterface $mangaRepository,
|
||||
private SourceRepositoryInterface $sourceRepository,
|
||||
private MessageBusInterface $eventBus
|
||||
private MessageBusInterface $eventBus,
|
||||
private EntityManagerInterface $entityManager
|
||||
) {
|
||||
}
|
||||
|
||||
public function handle(ScrapeChapter $command): void
|
||||
{
|
||||
$job = null;
|
||||
try {
|
||||
// 1. Création du job
|
||||
// 1. Création du job dans sa propre transaction
|
||||
$job = new ScrapingJob(
|
||||
Uuid::uuid4()->toString(),
|
||||
$command->mangaId,
|
||||
$command->chapterNumber,
|
||||
$command->sourceId
|
||||
);
|
||||
$this->scrapingJobRepository->save($job);
|
||||
$job->start();
|
||||
$this->jobRepository->save($job);
|
||||
|
||||
// 2. Préparation des données
|
||||
$manga = $this->mangaRepository->getById($command->mangaId);
|
||||
$chapter = $this->chapterRepository->getByMangaIdAndChapterNumber($command->mangaId, $command->chapterNumber);
|
||||
$source = $this->sourceRepository->getById($command->sourceId);
|
||||
// 2. Nouvelle transaction pour le reste des opérations
|
||||
$this->entityManager->beginTransaction();
|
||||
try {
|
||||
// Préparation des données
|
||||
$manga = $this->mangaRepository->getById($command->mangaId);
|
||||
$chapter = $this->chapterRepository->getByMangaIdAndChapterNumber($command->mangaId, $command->chapterNumber);
|
||||
$source = $this->sourceRepository->getById($command->sourceId);
|
||||
|
||||
$this->eventBus->dispatch(new ChapterScrapingStarted($job->getId()));
|
||||
throw new \Exception('test');
|
||||
|
||||
// 3. Scraping des URLs
|
||||
$scrapingRequest = new ScrapingRequest(
|
||||
'html',
|
||||
$source->buildChapterUrl($manga->getSlug(), $command->chapterNumber),
|
||||
$source->getScrappingParameters(),
|
||||
$job->getId()
|
||||
);
|
||||
// 3. Scraping des URLs
|
||||
$scrapingRequest = new ScrapingRequest(
|
||||
'html',
|
||||
$source->buildChapterUrl($manga->getSlug(), $command->chapterNumber),
|
||||
$source->getScrappingParameters()
|
||||
);
|
||||
|
||||
$scrapingResult = $this->scraper->scrape($scrapingRequest);
|
||||
$job->totalPages = $scrapingResult->getTotalPages();
|
||||
$this->scrapingJobRepository->save($job);
|
||||
$scrapingResult = $this->scraper->scrape($scrapingRequest);
|
||||
|
||||
// 4. Téléchargement des images
|
||||
$tempDir = new TempDirectory();
|
||||
$downloadResults = $this->imageDownloader->downloadBatch(
|
||||
$scrapingResult->getImageUrls(),
|
||||
$tempDir,
|
||||
$job->getId()
|
||||
);
|
||||
// 4. Téléchargement des images
|
||||
$tempDir = new TempDirectory();
|
||||
$downloadResults = $this->imageDownloader->downloadBatch(
|
||||
$scrapingResult->getImageUrls(),
|
||||
$tempDir,
|
||||
$job->id
|
||||
);
|
||||
|
||||
// 5. Génération du CBZ
|
||||
$cbzRequest = new CbzGenerationRequest(
|
||||
$manga->getTitle(),
|
||||
$manga->getPublicationYear(),
|
||||
$chapter->volumeNumber,
|
||||
$command->chapterNumber,
|
||||
$tempDir,
|
||||
array_map(fn($r) => $r->getLocalPath(), $downloadResults)
|
||||
);
|
||||
// 5. Génération du CBZ
|
||||
$cbzRequest = new CbzGenerationRequest(
|
||||
$manga->getTitle(),
|
||||
$manga->getPublicationYear(),
|
||||
$chapter->volumeNumber,
|
||||
$command->chapterNumber,
|
||||
$tempDir,
|
||||
array_map(fn($r) => $r->getLocalPath(), $downloadResults)
|
||||
);
|
||||
|
||||
$cbzPath = $this->cbzGenerator->generate($cbzRequest);
|
||||
$cbzPath = $this->cbzGenerator->generate($cbzRequest);
|
||||
|
||||
// 6. Mise à jour et sauvegarde
|
||||
$job->complete();
|
||||
$job->cbzPath = $cbzPath;
|
||||
$this->scrapingJobRepository->save($job);
|
||||
// 6. Mise à jour et sauvegarde
|
||||
$chapter->cbzPath = $cbzPath->getPath();
|
||||
$this->chapterRepository->save($chapter);
|
||||
|
||||
$chapter->cbzPath = $cbzPath->getPath();
|
||||
$this->chapterRepository->save($chapter);
|
||||
$job->complete();
|
||||
$this->jobRepository->save($job);
|
||||
|
||||
$this->eventBus->dispatch(new ChapterScraped($job->getId()));
|
||||
$this->entityManager->commit();
|
||||
|
||||
// 7. Nettoyage
|
||||
$tempDir->cleanup();
|
||||
$this->eventBus->dispatch(new ChapterScraped($job->id));
|
||||
|
||||
// 7. Nettoyage
|
||||
$tempDir->cleanup();
|
||||
} catch (\Exception $e) {
|
||||
$this->entityManager->rollback();
|
||||
throw $e;
|
||||
}
|
||||
} catch (\Exception $e) {
|
||||
if (isset($job)) {
|
||||
$job->fail($e->getMessage());
|
||||
$this->scrapingJobRepository->save($job);
|
||||
$this->jobRepository->save($job);
|
||||
}
|
||||
$this->eventBus->dispatch(new ChapterScrapingFailed($command->mangaId, $command->chapterNumber, $e->getMessage()));
|
||||
throw $e;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@@ -1,12 +0,0 @@
|
||||
<?php
|
||||
|
||||
namespace App\Domain\Scraping\Domain\Contract\Repository;
|
||||
|
||||
use App\Domain\Scraping\Domain\Model\ScrapingJob;
|
||||
|
||||
interface ScrapingJobRepositoryInterface
|
||||
{
|
||||
public function save(ScrapingJob $job): void;
|
||||
public function findById(string $id): ?ScrapingJob;
|
||||
public function findByChapterId(string $chapterId): ?ScrapingJob;
|
||||
}
|
||||
@@ -2,73 +2,22 @@
|
||||
|
||||
namespace App\Domain\Scraping\Domain\Model;
|
||||
|
||||
use App\Domain\Scraping\Domain\Model\ValueObject\CbzPath;
|
||||
use App\Domain\Scraping\Domain\Model\ValueObject\ImageUrl;
|
||||
use App\Domain\Scraping\Domain\Model\ValueObject\PageNumber;
|
||||
use App\Domain\Shared\Domain\Model\Job;
|
||||
|
||||
class ScrapingJob
|
||||
class ScrapingJob extends Job
|
||||
{
|
||||
public array $pages = [];
|
||||
public int $totalPages = 0;
|
||||
public ScrapingStatus $status;
|
||||
public ?CbzPath $cbzPath = null;
|
||||
public string $failureReason = '';
|
||||
public \DateTimeImmutable $createdAt;
|
||||
public ?\DateTimeImmutable $completedAt = null;
|
||||
|
||||
public function __construct(
|
||||
private readonly string $id,
|
||||
private readonly string $mangaId,
|
||||
private readonly float $chapterNumber,
|
||||
private readonly string $sourceId
|
||||
string $id,
|
||||
string $mangaId,
|
||||
float $chapterNumber,
|
||||
string $sourceId
|
||||
) {
|
||||
$this->status = ScrapingStatus::PENDING;
|
||||
$this->createdAt = new \DateTimeImmutable();
|
||||
}
|
||||
|
||||
public function addPage(PageNumber $pageNumber, ImageUrl $imageUrl): void
|
||||
{
|
||||
$this->pages[$pageNumber->getValue()] = $imageUrl->getValue();
|
||||
if ($this->status === ScrapingStatus::PENDING) {
|
||||
$this->status = ScrapingStatus::IN_PROGRESS;
|
||||
}
|
||||
}
|
||||
|
||||
public function complete(): void
|
||||
{
|
||||
$this->status = ScrapingStatus::COMPLETED;
|
||||
$this->completedAt = new \DateTimeImmutable();
|
||||
}
|
||||
|
||||
public function fail(string $exceptionMessage): void
|
||||
{
|
||||
$this->failureReason = $exceptionMessage;
|
||||
$this->status = ScrapingStatus::FAILED;
|
||||
$this->completedAt = new \DateTimeImmutable();
|
||||
}
|
||||
|
||||
public function getId(): string
|
||||
{
|
||||
return $this->id;
|
||||
}
|
||||
|
||||
public function getChapterNumber(): float
|
||||
{
|
||||
return $this->chapterNumber;
|
||||
}
|
||||
|
||||
public function getMangaId(): string
|
||||
{
|
||||
return $this->mangaId;
|
||||
}
|
||||
|
||||
public function getSourceId(): string
|
||||
{
|
||||
return $this->sourceId;
|
||||
}
|
||||
|
||||
public function setStatus(ScrapingStatus $status): void
|
||||
{
|
||||
$this->status = $status;
|
||||
parent::__construct($id, 'scraping_job');
|
||||
$this->maxAttempts = 1;
|
||||
$this->context = [
|
||||
'mangaId' => $mangaId,
|
||||
'chapterNumber' => $chapterNumber,
|
||||
'sourceId' => $sourceId
|
||||
];
|
||||
}
|
||||
}
|
||||
|
||||
@@ -1,11 +0,0 @@
|
||||
<?php
|
||||
|
||||
namespace App\Domain\Scraping\Domain\Model;
|
||||
|
||||
enum ScrapingStatus: string
|
||||
{
|
||||
case PENDING = 'pending';
|
||||
case IN_PROGRESS = 'in_progress';
|
||||
case COMPLETED = 'completed';
|
||||
case FAILED = 'failed';
|
||||
}
|
||||
@@ -8,7 +8,6 @@ readonly class ScrapingRequest
|
||||
private string $sourceType,
|
||||
private string $chapterUrl,
|
||||
private array $scrapingParameters,
|
||||
private string $jobId
|
||||
) {
|
||||
}
|
||||
|
||||
@@ -26,9 +25,4 @@ readonly class ScrapingRequest
|
||||
{
|
||||
return $this->scrapingParameters;
|
||||
}
|
||||
|
||||
public function getJobId(): string
|
||||
{
|
||||
return $this->jobId;
|
||||
}
|
||||
}
|
||||
|
||||
@@ -1,42 +0,0 @@
|
||||
<?php
|
||||
|
||||
namespace App\Domain\Scraping\Infrastructure\ApiPlatform\Dto;
|
||||
|
||||
use ApiPlatform\Metadata\ApiProperty;
|
||||
use ApiPlatform\Metadata\ApiResource;
|
||||
use ApiPlatform\Metadata\Get;
|
||||
use App\Domain\Scraping\Infrastructure\ApiPlatform\State\Provider\ScrapingStatusStateProvider;
|
||||
use ApiPlatform\Metadata\Link;
|
||||
use App\Domain\Scraping\Domain\Model\ScrapingJob;
|
||||
|
||||
#[ApiResource(
|
||||
shortName: 'Scraping',
|
||||
operations: [
|
||||
new Get(
|
||||
uriTemplate: '/scraping/jobs/{jobId}/status',
|
||||
provider: ScrapingStatusStateProvider::class,
|
||||
uriVariables: [
|
||||
'jobId' => new Link(
|
||||
fromProperty: 'jobId',
|
||||
toProperty: 'id',
|
||||
fromClass: ScrapingStatusResponse::class,
|
||||
toClass: ScrapingJob::class
|
||||
)
|
||||
]
|
||||
),
|
||||
],
|
||||
)]
|
||||
readonly class ScrapingStatusResponse
|
||||
{
|
||||
public function __construct(
|
||||
#[ApiProperty(identifier: true)]
|
||||
public string $jobId,
|
||||
#[ApiProperty]
|
||||
public string $status,
|
||||
#[ApiProperty]
|
||||
public ?float $progress = null,
|
||||
#[ApiProperty]
|
||||
public ?string $error = null
|
||||
) {
|
||||
}
|
||||
}
|
||||
@@ -1,37 +0,0 @@
|
||||
<?php
|
||||
|
||||
namespace App\Domain\Scraping\Infrastructure\ApiPlatform\State\Provider;
|
||||
|
||||
use ApiPlatform\Metadata\Operation;
|
||||
use ApiPlatform\State\ProviderInterface;
|
||||
use App\Domain\Scraping\Domain\Contract\Repository\ScrapingJobRepositoryInterface;
|
||||
use App\Domain\Scraping\Infrastructure\ApiPlatform\Dto\ScrapingStatusResponse;
|
||||
use Symfony\Component\HttpKernel\Exception\NotFoundHttpException;
|
||||
|
||||
final readonly class ScrapingStatusStateProvider implements ProviderInterface
|
||||
{
|
||||
public function __construct(
|
||||
private ScrapingJobRepositoryInterface $scrapingJobRepository
|
||||
) {
|
||||
}
|
||||
|
||||
public function provide(Operation $operation, array $uriVariables = [], array $context = []): ScrapingStatusResponse
|
||||
{
|
||||
$job = $this->scrapingJobRepository->findById($uriVariables['jobId']);
|
||||
|
||||
if (!$job) {
|
||||
throw new NotFoundHttpException('Job de scraping non trouvé');
|
||||
}
|
||||
|
||||
$progress = 0;
|
||||
if ($job->totalPages > 0) {
|
||||
$progress = (count($job->pages) / $job->totalPages) * 100;
|
||||
}
|
||||
|
||||
return new ScrapingStatusResponse(
|
||||
jobId: $job->getId(),
|
||||
status: $job->status->value,
|
||||
progress: $progress
|
||||
);
|
||||
}
|
||||
}
|
||||
@@ -1,76 +0,0 @@
|
||||
<?php
|
||||
|
||||
namespace App\Domain\Scraping\Infrastructure\Persistence;
|
||||
|
||||
use App\Domain\Scraping\Domain\Contract\Repository\ScrapingJobRepositoryInterface;
|
||||
use App\Domain\Scraping\Domain\Model\ScrapingJob;
|
||||
use App\Domain\Scraping\Domain\Model\ScrapingStatus;
|
||||
use App\Domain\Scraping\Infrastructure\Persistence\Entity\ScrapingJobEntity;
|
||||
use Doctrine\ORM\EntityManagerInterface;
|
||||
|
||||
readonly class DoctrineScrapingJobRepository implements ScrapingJobRepositoryInterface
|
||||
{
|
||||
public function __construct(
|
||||
private EntityManagerInterface $entityManager
|
||||
) {
|
||||
}
|
||||
|
||||
public function save(ScrapingJob $job): void
|
||||
{
|
||||
/** @var ScrapingJobEntity $existingEntity */
|
||||
$existingEntity = $this->entityManager->getRepository(ScrapingJobEntity::class)->find($job->getId());
|
||||
|
||||
if ($existingEntity) {
|
||||
$existingEntity->setStatus($job->status->value);
|
||||
$existingEntity->setPages($job->pages);
|
||||
$existingEntity->setCompletedAt($job->completedAt);
|
||||
$existingEntity->setCbzPath($job->cbzPath?->getPath());
|
||||
$existingEntity->setFailureReason($job->failureReason);
|
||||
} else {
|
||||
$entity = ScrapingJobEntity::fromDomain($job);
|
||||
$this->entityManager->persist($entity);
|
||||
}
|
||||
|
||||
$this->entityManager->flush();
|
||||
}
|
||||
|
||||
public function findById(string $id): ?ScrapingJob
|
||||
{
|
||||
$entity = $this->entityManager->getRepository(ScrapingJobEntity::class)
|
||||
->find($id);
|
||||
|
||||
return $entity?->toDomain();
|
||||
}
|
||||
|
||||
public function findByChapterId(string $chapterId): ?ScrapingJob
|
||||
{
|
||||
$entity = $this->entityManager->getRepository(ScrapingJobEntity::class)
|
||||
->findOneBy(['chapterId' => $chapterId]);
|
||||
|
||||
return $entity?->toDomain();
|
||||
}
|
||||
|
||||
public function findPendingJobs(): array
|
||||
{
|
||||
$entities = $this->entityManager->getRepository(ScrapingJobEntity::class)
|
||||
->createQueryBuilder('sj')
|
||||
->where('sj.status = :status')
|
||||
->setParameter('status', ScrapingStatus::PENDING->value)
|
||||
->getQuery()
|
||||
->getResult();
|
||||
|
||||
return array_map(fn (ScrapingJobEntity $entity) => $entity->toDomain(), $entities);
|
||||
}
|
||||
|
||||
public function findInProgressJobs(): array
|
||||
{
|
||||
$entities = $this->entityManager->getRepository(ScrapingJobEntity::class)
|
||||
->createQueryBuilder('sj')
|
||||
->where('sj.status = :status')
|
||||
->setParameter('status', ScrapingStatus::IN_PROGRESS->value)
|
||||
->getQuery()
|
||||
->getResult();
|
||||
|
||||
return array_map(fn (ScrapingJobEntity $entity) => $entity->toDomain(), $entities);
|
||||
}
|
||||
}
|
||||
@@ -1,103 +0,0 @@
|
||||
<?php
|
||||
|
||||
namespace App\Domain\Scraping\Infrastructure\Persistence\Entity;
|
||||
|
||||
use App\Domain\Scraping\Domain\Model\ScrapingJob;
|
||||
use App\Domain\Scraping\Domain\Model\ScrapingStatus;
|
||||
use Doctrine\ORM\Mapping as ORM;
|
||||
|
||||
#[ORM\Entity]
|
||||
#[ORM\Table(name: 'scraping_jobs')]
|
||||
class ScrapingJobEntity
|
||||
{
|
||||
#[ORM\Id]
|
||||
#[ORM\Column(type: 'string', length: 36)]
|
||||
private string $id;
|
||||
|
||||
#[ORM\Column(type: 'string')]
|
||||
private string $chapterNumber;
|
||||
|
||||
#[ORM\Column(type: 'string')]
|
||||
private string $mangaId;
|
||||
|
||||
#[ORM\Column(type: 'string')]
|
||||
private string $sourceId;
|
||||
|
||||
#[ORM\Column(type: 'json')]
|
||||
private array $pages = [];
|
||||
|
||||
#[ORM\Column(type: 'string')]
|
||||
private string $status;
|
||||
|
||||
#[ORM\Column(type: 'string', nullable: true)]
|
||||
private ?string $cbzPath = null;
|
||||
|
||||
#[ORM\Column(type: 'string', nullable: true)]
|
||||
private ?string $failureReason = '';
|
||||
|
||||
#[ORM\Column(type: 'datetime_immutable')]
|
||||
private \DateTimeImmutable $createdAt;
|
||||
|
||||
#[ORM\Column(type: 'datetime_immutable', nullable: true)]
|
||||
private ?\DateTimeImmutable $completedAt = null;
|
||||
|
||||
public static function fromDomain(ScrapingJob $job): self
|
||||
{
|
||||
$entity = new self();
|
||||
$entity->id = $job->getId();
|
||||
$entity->chapterNumber = $job->getChapterNumber();
|
||||
$entity->mangaId = $job->getMangaId();
|
||||
$entity->sourceId = $job->getSourceId();
|
||||
$entity->pages = $job->pages;
|
||||
$entity->status = $job->status->value;
|
||||
$entity->createdAt = $job->createdAt;
|
||||
$entity->completedAt = $job->completedAt;
|
||||
$entity->cbzPath = $job->cbzPath?->getPath();
|
||||
$entity->failureReason = $job->failureReason;
|
||||
return $entity;
|
||||
}
|
||||
|
||||
public function toDomain(): ScrapingJob
|
||||
{
|
||||
$job = new ScrapingJob(
|
||||
id: $this->id,
|
||||
mangaId: $this->mangaId,
|
||||
chapterNumber: $this->chapterNumber,
|
||||
sourceId: $this->sourceId
|
||||
);
|
||||
|
||||
$job->status = ScrapingStatus::from($this->status);
|
||||
$job->pages = $this->pages;
|
||||
$job->createdAt = $this->createdAt;
|
||||
$job->completedAt = $this->completedAt;
|
||||
$job->cbzPath = $this->cbzPath;
|
||||
$job->failureReason = $this->failureReason;
|
||||
|
||||
return $job;
|
||||
}
|
||||
|
||||
public function setStatus(string $status): void
|
||||
{
|
||||
$this->status = $status;
|
||||
}
|
||||
|
||||
public function setPages(array $pages): void
|
||||
{
|
||||
$this->pages = $pages;
|
||||
}
|
||||
|
||||
public function setCompletedAt(?\DateTimeImmutable $completedAt): void
|
||||
{
|
||||
$this->completedAt = $completedAt;
|
||||
}
|
||||
|
||||
public function setCbzPath(?string $cbzPath = null): void
|
||||
{
|
||||
$this->cbzPath = $cbzPath;
|
||||
}
|
||||
|
||||
public function setFailureReason(string $failureReason): void
|
||||
{
|
||||
$this->failureReason = $failureReason;
|
||||
}
|
||||
}
|
||||
@@ -1,76 +0,0 @@
|
||||
<?php
|
||||
|
||||
namespace App\Domain\Scraping\Infrastructure\Service\Scraper;
|
||||
|
||||
use App\Domain\Scraping\Domain\Contract\Service\ImageDownloaderInterface;
|
||||
use App\Domain\Scraping\Domain\Contract\Service\ScraperInterface;
|
||||
use App\Domain\Scraping\Domain\Event\PageScrapingProgressed;
|
||||
use App\Domain\Scraping\Domain\Model\ScrapingJob;
|
||||
use App\Domain\Scraping\Domain\Model\ScrapingProgress;
|
||||
use App\Domain\Scraping\Domain\Model\Source;
|
||||
use App\Domain\Scraping\Domain\Model\ValueObject\ScrapingRequest;
|
||||
use App\Domain\Scraping\Domain\Model\ValueObject\ScrapingResult;
|
||||
use App\Domain\Scraping\Domain\Model\ValueObject\TempDirectory;
|
||||
use Symfony\Component\Messenger\MessageBusInterface;
|
||||
use Ramsey\Uuid\Uuid;
|
||||
|
||||
abstract class AbstractScraper implements ScraperInterface
|
||||
{
|
||||
public function __construct(
|
||||
protected ImageDownloaderInterface $imageDownloader,
|
||||
protected MessageBusInterface $eventBus
|
||||
) {
|
||||
}
|
||||
|
||||
abstract public function scrape(ScrapingRequest $request): ScrapingResult;
|
||||
|
||||
abstract protected function scrapePages(ScrapingJob $job, Source $source): array;
|
||||
|
||||
protected function cleanupTempDirectory(string $tempDir): void
|
||||
{
|
||||
if (is_dir($tempDir)) {
|
||||
$files = new \RecursiveIteratorIterator(
|
||||
new \RecursiveDirectoryIterator($tempDir, \RecursiveDirectoryIterator::SKIP_DOTS),
|
||||
\RecursiveIteratorIterator::CHILD_FIRST
|
||||
);
|
||||
|
||||
foreach ($files as $file) {
|
||||
if ($file->isDir()) {
|
||||
rmdir($file->getRealPath());
|
||||
} else {
|
||||
unlink($file->getRealPath());
|
||||
}
|
||||
}
|
||||
rmdir($tempDir);
|
||||
}
|
||||
}
|
||||
|
||||
protected function dispatchProgressEvent(ScrapingJob $job, int $currentPage, int $totalPages): void
|
||||
{
|
||||
$progress = new ScrapingProgress($currentPage, $totalPages);
|
||||
$this->eventBus->dispatch(new PageScrapingProgressed($job->getId(), $progress));
|
||||
}
|
||||
|
||||
protected function downloadImage(string $imageUrl, string $destination): void
|
||||
{
|
||||
$this->imageDownloader->download($imageUrl, $destination);
|
||||
}
|
||||
|
||||
protected function createTempDirectory(): TempDirectory
|
||||
{
|
||||
return new TempDirectory(sys_get_temp_dir() . '/' . uniqid('manga_scraper_'));
|
||||
}
|
||||
|
||||
protected function cleanupTempFiles(TempDirectory $tempDirectory): void
|
||||
{
|
||||
$files = glob($tempDirectory->getPath() . '/*');
|
||||
foreach ($files as $file) {
|
||||
if (is_file($file)) {
|
||||
unlink($file);
|
||||
}
|
||||
}
|
||||
rmdir($tempDirectory->getPath());
|
||||
}
|
||||
|
||||
abstract public function supports(string $sourceType): bool;
|
||||
}
|
||||
@@ -2,16 +2,7 @@
|
||||
|
||||
namespace App\Domain\Scraping\Infrastructure\Service\Scraper;
|
||||
|
||||
use App\Domain\Scraping\Domain\Contract\Repository\MangaRepositoryInterface;
|
||||
use App\Domain\Scraping\Domain\Contract\Repository\ScrapingJobRepositoryInterface;
|
||||
use App\Domain\Scraping\Domain\Model\ScrapingJob;
|
||||
use App\Domain\Scraping\Domain\Model\Source;
|
||||
use App\Domain\Scraping\Domain\Model\ValueObject\ImageUrl;
|
||||
use App\Domain\Scraping\Domain\Model\ValueObject\PageNumber;
|
||||
use App\Domain\Scraping\Domain\Contract\Repository\SourceRepositoryInterface;
|
||||
use App\Domain\Scraping\Domain\Contract\Service\CbzGeneratorInterface;
|
||||
use App\Domain\Scraping\Domain\Contract\Service\ImageDownloaderInterface;
|
||||
use App\Domain\Scraping\Domain\Model\ValueObject\ChapterUrl;
|
||||
use Symfony\Component\DomCrawler\Crawler;
|
||||
use Symfony\Contracts\HttpClient\HttpClientInterface;
|
||||
use Symfony\Component\Messenger\MessageBusInterface;
|
||||
@@ -19,7 +10,6 @@ use App\Domain\Scraping\Domain\Contract\Service\ScraperInterface;
|
||||
use App\Domain\Scraping\Domain\Event\PageScrapingProgressed;
|
||||
use App\Domain\Scraping\Domain\Model\ValueObject\ScrapingRequest;
|
||||
use App\Domain\Scraping\Domain\Model\ValueObject\ScrapingResult;
|
||||
use App\Domain\Scraping\Domain\Model\ValueObject\TempDirectory;
|
||||
use App\Domain\Scraping\Domain\Model\ScrapingProgress;
|
||||
|
||||
class HtmlScraper implements ScraperInterface
|
||||
@@ -90,8 +80,6 @@ class HtmlScraper implements ScraperInterface
|
||||
|
||||
$nextLink = $crawler->filter($params['nextPageSelector']);
|
||||
$currentUrl = $nextLink->count() > 0 ? $nextLink->attr('href') : null;
|
||||
|
||||
$this->dispatchProgressEvent($request->getJobId(), count($pages), count($pages));
|
||||
}
|
||||
|
||||
return $pages;
|
||||
@@ -117,12 +105,4 @@ class HtmlScraper implements ScraperInterface
|
||||
{
|
||||
return preg_replace('/[\x00-\x1F\x7F]/', '', trim($url));
|
||||
}
|
||||
|
||||
private function dispatchProgressEvent(string $jobId, int $currentPage, int $totalPages): void
|
||||
{
|
||||
$this->eventBus->dispatch(new PageScrapingProgressed(
|
||||
$jobId,
|
||||
new ScrapingProgress($currentPage, $totalPages)
|
||||
));
|
||||
}
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user