feat: ajout de la gestion des jobs avec création, récupération et filtrage via l'API, incluant des entités et des mappers pour les échecs et les jobs

This commit is contained in:
ext.jeremy.guillot@maxicoffee.domains
2025-03-29 15:15:14 +01:00
parent d7088b14c2
commit d7ccc1e603
33 changed files with 1113 additions and 595 deletions

View File

@@ -1,42 +0,0 @@
<?php
namespace App\Domain\Scraping\Infrastructure\ApiPlatform\Dto;
use ApiPlatform\Metadata\ApiProperty;
use ApiPlatform\Metadata\ApiResource;
use ApiPlatform\Metadata\Get;
use App\Domain\Scraping\Infrastructure\ApiPlatform\State\Provider\ScrapingStatusStateProvider;
use ApiPlatform\Metadata\Link;
use App\Domain\Scraping\Domain\Model\ScrapingJob;
#[ApiResource(
shortName: 'Scraping',
operations: [
new Get(
uriTemplate: '/scraping/jobs/{jobId}/status',
provider: ScrapingStatusStateProvider::class,
uriVariables: [
'jobId' => new Link(
fromProperty: 'jobId',
toProperty: 'id',
fromClass: ScrapingStatusResponse::class,
toClass: ScrapingJob::class
)
]
),
],
)]
readonly class ScrapingStatusResponse
{
public function __construct(
#[ApiProperty(identifier: true)]
public string $jobId,
#[ApiProperty]
public string $status,
#[ApiProperty]
public ?float $progress = null,
#[ApiProperty]
public ?string $error = null
) {
}
}

View File

@@ -1,37 +0,0 @@
<?php
namespace App\Domain\Scraping\Infrastructure\ApiPlatform\State\Provider;
use ApiPlatform\Metadata\Operation;
use ApiPlatform\State\ProviderInterface;
use App\Domain\Scraping\Domain\Contract\Repository\ScrapingJobRepositoryInterface;
use App\Domain\Scraping\Infrastructure\ApiPlatform\Dto\ScrapingStatusResponse;
use Symfony\Component\HttpKernel\Exception\NotFoundHttpException;
final readonly class ScrapingStatusStateProvider implements ProviderInterface
{
public function __construct(
private ScrapingJobRepositoryInterface $scrapingJobRepository
) {
}
public function provide(Operation $operation, array $uriVariables = [], array $context = []): ScrapingStatusResponse
{
$job = $this->scrapingJobRepository->findById($uriVariables['jobId']);
if (!$job) {
throw new NotFoundHttpException('Job de scraping non trouvé');
}
$progress = 0;
if ($job->totalPages > 0) {
$progress = (count($job->pages) / $job->totalPages) * 100;
}
return new ScrapingStatusResponse(
jobId: $job->getId(),
status: $job->status->value,
progress: $progress
);
}
}

View File

@@ -1,76 +0,0 @@
<?php
namespace App\Domain\Scraping\Infrastructure\Persistence;
use App\Domain\Scraping\Domain\Contract\Repository\ScrapingJobRepositoryInterface;
use App\Domain\Scraping\Domain\Model\ScrapingJob;
use App\Domain\Scraping\Domain\Model\ScrapingStatus;
use App\Domain\Scraping\Infrastructure\Persistence\Entity\ScrapingJobEntity;
use Doctrine\ORM\EntityManagerInterface;
readonly class DoctrineScrapingJobRepository implements ScrapingJobRepositoryInterface
{
public function __construct(
private EntityManagerInterface $entityManager
) {
}
public function save(ScrapingJob $job): void
{
/** @var ScrapingJobEntity $existingEntity */
$existingEntity = $this->entityManager->getRepository(ScrapingJobEntity::class)->find($job->getId());
if ($existingEntity) {
$existingEntity->setStatus($job->status->value);
$existingEntity->setPages($job->pages);
$existingEntity->setCompletedAt($job->completedAt);
$existingEntity->setCbzPath($job->cbzPath?->getPath());
$existingEntity->setFailureReason($job->failureReason);
} else {
$entity = ScrapingJobEntity::fromDomain($job);
$this->entityManager->persist($entity);
}
$this->entityManager->flush();
}
public function findById(string $id): ?ScrapingJob
{
$entity = $this->entityManager->getRepository(ScrapingJobEntity::class)
->find($id);
return $entity?->toDomain();
}
public function findByChapterId(string $chapterId): ?ScrapingJob
{
$entity = $this->entityManager->getRepository(ScrapingJobEntity::class)
->findOneBy(['chapterId' => $chapterId]);
return $entity?->toDomain();
}
public function findPendingJobs(): array
{
$entities = $this->entityManager->getRepository(ScrapingJobEntity::class)
->createQueryBuilder('sj')
->where('sj.status = :status')
->setParameter('status', ScrapingStatus::PENDING->value)
->getQuery()
->getResult();
return array_map(fn (ScrapingJobEntity $entity) => $entity->toDomain(), $entities);
}
public function findInProgressJobs(): array
{
$entities = $this->entityManager->getRepository(ScrapingJobEntity::class)
->createQueryBuilder('sj')
->where('sj.status = :status')
->setParameter('status', ScrapingStatus::IN_PROGRESS->value)
->getQuery()
->getResult();
return array_map(fn (ScrapingJobEntity $entity) => $entity->toDomain(), $entities);
}
}

View File

@@ -1,103 +0,0 @@
<?php
namespace App\Domain\Scraping\Infrastructure\Persistence\Entity;
use App\Domain\Scraping\Domain\Model\ScrapingJob;
use App\Domain\Scraping\Domain\Model\ScrapingStatus;
use Doctrine\ORM\Mapping as ORM;
#[ORM\Entity]
#[ORM\Table(name: 'scraping_jobs')]
class ScrapingJobEntity
{
#[ORM\Id]
#[ORM\Column(type: 'string', length: 36)]
private string $id;
#[ORM\Column(type: 'string')]
private string $chapterNumber;
#[ORM\Column(type: 'string')]
private string $mangaId;
#[ORM\Column(type: 'string')]
private string $sourceId;
#[ORM\Column(type: 'json')]
private array $pages = [];
#[ORM\Column(type: 'string')]
private string $status;
#[ORM\Column(type: 'string', nullable: true)]
private ?string $cbzPath = null;
#[ORM\Column(type: 'string', nullable: true)]
private ?string $failureReason = '';
#[ORM\Column(type: 'datetime_immutable')]
private \DateTimeImmutable $createdAt;
#[ORM\Column(type: 'datetime_immutable', nullable: true)]
private ?\DateTimeImmutable $completedAt = null;
public static function fromDomain(ScrapingJob $job): self
{
$entity = new self();
$entity->id = $job->getId();
$entity->chapterNumber = $job->getChapterNumber();
$entity->mangaId = $job->getMangaId();
$entity->sourceId = $job->getSourceId();
$entity->pages = $job->pages;
$entity->status = $job->status->value;
$entity->createdAt = $job->createdAt;
$entity->completedAt = $job->completedAt;
$entity->cbzPath = $job->cbzPath?->getPath();
$entity->failureReason = $job->failureReason;
return $entity;
}
public function toDomain(): ScrapingJob
{
$job = new ScrapingJob(
id: $this->id,
mangaId: $this->mangaId,
chapterNumber: $this->chapterNumber,
sourceId: $this->sourceId
);
$job->status = ScrapingStatus::from($this->status);
$job->pages = $this->pages;
$job->createdAt = $this->createdAt;
$job->completedAt = $this->completedAt;
$job->cbzPath = $this->cbzPath;
$job->failureReason = $this->failureReason;
return $job;
}
public function setStatus(string $status): void
{
$this->status = $status;
}
public function setPages(array $pages): void
{
$this->pages = $pages;
}
public function setCompletedAt(?\DateTimeImmutable $completedAt): void
{
$this->completedAt = $completedAt;
}
public function setCbzPath(?string $cbzPath = null): void
{
$this->cbzPath = $cbzPath;
}
public function setFailureReason(string $failureReason): void
{
$this->failureReason = $failureReason;
}
}

View File

@@ -1,76 +0,0 @@
<?php
namespace App\Domain\Scraping\Infrastructure\Service\Scraper;
use App\Domain\Scraping\Domain\Contract\Service\ImageDownloaderInterface;
use App\Domain\Scraping\Domain\Contract\Service\ScraperInterface;
use App\Domain\Scraping\Domain\Event\PageScrapingProgressed;
use App\Domain\Scraping\Domain\Model\ScrapingJob;
use App\Domain\Scraping\Domain\Model\ScrapingProgress;
use App\Domain\Scraping\Domain\Model\Source;
use App\Domain\Scraping\Domain\Model\ValueObject\ScrapingRequest;
use App\Domain\Scraping\Domain\Model\ValueObject\ScrapingResult;
use App\Domain\Scraping\Domain\Model\ValueObject\TempDirectory;
use Symfony\Component\Messenger\MessageBusInterface;
use Ramsey\Uuid\Uuid;
abstract class AbstractScraper implements ScraperInterface
{
public function __construct(
protected ImageDownloaderInterface $imageDownloader,
protected MessageBusInterface $eventBus
) {
}
abstract public function scrape(ScrapingRequest $request): ScrapingResult;
abstract protected function scrapePages(ScrapingJob $job, Source $source): array;
protected function cleanupTempDirectory(string $tempDir): void
{
if (is_dir($tempDir)) {
$files = new \RecursiveIteratorIterator(
new \RecursiveDirectoryIterator($tempDir, \RecursiveDirectoryIterator::SKIP_DOTS),
\RecursiveIteratorIterator::CHILD_FIRST
);
foreach ($files as $file) {
if ($file->isDir()) {
rmdir($file->getRealPath());
} else {
unlink($file->getRealPath());
}
}
rmdir($tempDir);
}
}
protected function dispatchProgressEvent(ScrapingJob $job, int $currentPage, int $totalPages): void
{
$progress = new ScrapingProgress($currentPage, $totalPages);
$this->eventBus->dispatch(new PageScrapingProgressed($job->getId(), $progress));
}
protected function downloadImage(string $imageUrl, string $destination): void
{
$this->imageDownloader->download($imageUrl, $destination);
}
protected function createTempDirectory(): TempDirectory
{
return new TempDirectory(sys_get_temp_dir() . '/' . uniqid('manga_scraper_'));
}
protected function cleanupTempFiles(TempDirectory $tempDirectory): void
{
$files = glob($tempDirectory->getPath() . '/*');
foreach ($files as $file) {
if (is_file($file)) {
unlink($file);
}
}
rmdir($tempDirectory->getPath());
}
abstract public function supports(string $sourceType): bool;
}

View File

@@ -2,16 +2,7 @@
namespace App\Domain\Scraping\Infrastructure\Service\Scraper;
use App\Domain\Scraping\Domain\Contract\Repository\MangaRepositoryInterface;
use App\Domain\Scraping\Domain\Contract\Repository\ScrapingJobRepositoryInterface;
use App\Domain\Scraping\Domain\Model\ScrapingJob;
use App\Domain\Scraping\Domain\Model\Source;
use App\Domain\Scraping\Domain\Model\ValueObject\ImageUrl;
use App\Domain\Scraping\Domain\Model\ValueObject\PageNumber;
use App\Domain\Scraping\Domain\Contract\Repository\SourceRepositoryInterface;
use App\Domain\Scraping\Domain\Contract\Service\CbzGeneratorInterface;
use App\Domain\Scraping\Domain\Contract\Service\ImageDownloaderInterface;
use App\Domain\Scraping\Domain\Model\ValueObject\ChapterUrl;
use Symfony\Component\DomCrawler\Crawler;
use Symfony\Contracts\HttpClient\HttpClientInterface;
use Symfony\Component\Messenger\MessageBusInterface;
@@ -19,7 +10,6 @@ use App\Domain\Scraping\Domain\Contract\Service\ScraperInterface;
use App\Domain\Scraping\Domain\Event\PageScrapingProgressed;
use App\Domain\Scraping\Domain\Model\ValueObject\ScrapingRequest;
use App\Domain\Scraping\Domain\Model\ValueObject\ScrapingResult;
use App\Domain\Scraping\Domain\Model\ValueObject\TempDirectory;
use App\Domain\Scraping\Domain\Model\ScrapingProgress;
class HtmlScraper implements ScraperInterface
@@ -90,8 +80,6 @@ class HtmlScraper implements ScraperInterface
$nextLink = $crawler->filter($params['nextPageSelector']);
$currentUrl = $nextLink->count() > 0 ? $nextLink->attr('href') : null;
$this->dispatchProgressEvent($request->getJobId(), count($pages), count($pages));
}
return $pages;
@@ -117,12 +105,4 @@ class HtmlScraper implements ScraperInterface
{
return preg_replace('/[\x00-\x1F\x7F]/', '', trim($url));
}
private function dispatchProgressEvent(string $jobId, int $currentPage, int $totalPages): void
{
$this->eventBus->dispatch(new PageScrapingProgressed(
$jobId,
new ScrapingProgress($currentPage, $totalPages)
));
}
}