feat: ajout de la gestion des jobs avec création, récupération et filtrage via l'API, incluant des entités et des mappers pour les échecs et les jobs
This commit is contained in:
parent
d7088b14c2
commit
d7ccc1e603
@@ -1,42 +0,0 @@
|
||||
<?php
|
||||
|
||||
namespace App\Domain\Scraping\Infrastructure\ApiPlatform\Dto;
|
||||
|
||||
use ApiPlatform\Metadata\ApiProperty;
|
||||
use ApiPlatform\Metadata\ApiResource;
|
||||
use ApiPlatform\Metadata\Get;
|
||||
use App\Domain\Scraping\Infrastructure\ApiPlatform\State\Provider\ScrapingStatusStateProvider;
|
||||
use ApiPlatform\Metadata\Link;
|
||||
use App\Domain\Scraping\Domain\Model\ScrapingJob;
|
||||
|
||||
#[ApiResource(
|
||||
shortName: 'Scraping',
|
||||
operations: [
|
||||
new Get(
|
||||
uriTemplate: '/scraping/jobs/{jobId}/status',
|
||||
provider: ScrapingStatusStateProvider::class,
|
||||
uriVariables: [
|
||||
'jobId' => new Link(
|
||||
fromProperty: 'jobId',
|
||||
toProperty: 'id',
|
||||
fromClass: ScrapingStatusResponse::class,
|
||||
toClass: ScrapingJob::class
|
||||
)
|
||||
]
|
||||
),
|
||||
],
|
||||
)]
|
||||
readonly class ScrapingStatusResponse
|
||||
{
|
||||
public function __construct(
|
||||
#[ApiProperty(identifier: true)]
|
||||
public string $jobId,
|
||||
#[ApiProperty]
|
||||
public string $status,
|
||||
#[ApiProperty]
|
||||
public ?float $progress = null,
|
||||
#[ApiProperty]
|
||||
public ?string $error = null
|
||||
) {
|
||||
}
|
||||
}
|
||||
@@ -1,37 +0,0 @@
|
||||
<?php
|
||||
|
||||
namespace App\Domain\Scraping\Infrastructure\ApiPlatform\State\Provider;
|
||||
|
||||
use ApiPlatform\Metadata\Operation;
|
||||
use ApiPlatform\State\ProviderInterface;
|
||||
use App\Domain\Scraping\Domain\Contract\Repository\ScrapingJobRepositoryInterface;
|
||||
use App\Domain\Scraping\Infrastructure\ApiPlatform\Dto\ScrapingStatusResponse;
|
||||
use Symfony\Component\HttpKernel\Exception\NotFoundHttpException;
|
||||
|
||||
final readonly class ScrapingStatusStateProvider implements ProviderInterface
|
||||
{
|
||||
public function __construct(
|
||||
private ScrapingJobRepositoryInterface $scrapingJobRepository
|
||||
) {
|
||||
}
|
||||
|
||||
public function provide(Operation $operation, array $uriVariables = [], array $context = []): ScrapingStatusResponse
|
||||
{
|
||||
$job = $this->scrapingJobRepository->findById($uriVariables['jobId']);
|
||||
|
||||
if (!$job) {
|
||||
throw new NotFoundHttpException('Job de scraping non trouvé');
|
||||
}
|
||||
|
||||
$progress = 0;
|
||||
if ($job->totalPages > 0) {
|
||||
$progress = (count($job->pages) / $job->totalPages) * 100;
|
||||
}
|
||||
|
||||
return new ScrapingStatusResponse(
|
||||
jobId: $job->getId(),
|
||||
status: $job->status->value,
|
||||
progress: $progress
|
||||
);
|
||||
}
|
||||
}
|
||||
@@ -1,76 +0,0 @@
|
||||
<?php
|
||||
|
||||
namespace App\Domain\Scraping\Infrastructure\Persistence;
|
||||
|
||||
use App\Domain\Scraping\Domain\Contract\Repository\ScrapingJobRepositoryInterface;
|
||||
use App\Domain\Scraping\Domain\Model\ScrapingJob;
|
||||
use App\Domain\Scraping\Domain\Model\ScrapingStatus;
|
||||
use App\Domain\Scraping\Infrastructure\Persistence\Entity\ScrapingJobEntity;
|
||||
use Doctrine\ORM\EntityManagerInterface;
|
||||
|
||||
readonly class DoctrineScrapingJobRepository implements ScrapingJobRepositoryInterface
|
||||
{
|
||||
public function __construct(
|
||||
private EntityManagerInterface $entityManager
|
||||
) {
|
||||
}
|
||||
|
||||
public function save(ScrapingJob $job): void
|
||||
{
|
||||
/** @var ScrapingJobEntity $existingEntity */
|
||||
$existingEntity = $this->entityManager->getRepository(ScrapingJobEntity::class)->find($job->getId());
|
||||
|
||||
if ($existingEntity) {
|
||||
$existingEntity->setStatus($job->status->value);
|
||||
$existingEntity->setPages($job->pages);
|
||||
$existingEntity->setCompletedAt($job->completedAt);
|
||||
$existingEntity->setCbzPath($job->cbzPath?->getPath());
|
||||
$existingEntity->setFailureReason($job->failureReason);
|
||||
} else {
|
||||
$entity = ScrapingJobEntity::fromDomain($job);
|
||||
$this->entityManager->persist($entity);
|
||||
}
|
||||
|
||||
$this->entityManager->flush();
|
||||
}
|
||||
|
||||
public function findById(string $id): ?ScrapingJob
|
||||
{
|
||||
$entity = $this->entityManager->getRepository(ScrapingJobEntity::class)
|
||||
->find($id);
|
||||
|
||||
return $entity?->toDomain();
|
||||
}
|
||||
|
||||
public function findByChapterId(string $chapterId): ?ScrapingJob
|
||||
{
|
||||
$entity = $this->entityManager->getRepository(ScrapingJobEntity::class)
|
||||
->findOneBy(['chapterId' => $chapterId]);
|
||||
|
||||
return $entity?->toDomain();
|
||||
}
|
||||
|
||||
public function findPendingJobs(): array
|
||||
{
|
||||
$entities = $this->entityManager->getRepository(ScrapingJobEntity::class)
|
||||
->createQueryBuilder('sj')
|
||||
->where('sj.status = :status')
|
||||
->setParameter('status', ScrapingStatus::PENDING->value)
|
||||
->getQuery()
|
||||
->getResult();
|
||||
|
||||
return array_map(fn (ScrapingJobEntity $entity) => $entity->toDomain(), $entities);
|
||||
}
|
||||
|
||||
public function findInProgressJobs(): array
|
||||
{
|
||||
$entities = $this->entityManager->getRepository(ScrapingJobEntity::class)
|
||||
->createQueryBuilder('sj')
|
||||
->where('sj.status = :status')
|
||||
->setParameter('status', ScrapingStatus::IN_PROGRESS->value)
|
||||
->getQuery()
|
||||
->getResult();
|
||||
|
||||
return array_map(fn (ScrapingJobEntity $entity) => $entity->toDomain(), $entities);
|
||||
}
|
||||
}
|
||||
@@ -1,103 +0,0 @@
|
||||
<?php
|
||||
|
||||
namespace App\Domain\Scraping\Infrastructure\Persistence\Entity;
|
||||
|
||||
use App\Domain\Scraping\Domain\Model\ScrapingJob;
|
||||
use App\Domain\Scraping\Domain\Model\ScrapingStatus;
|
||||
use Doctrine\ORM\Mapping as ORM;
|
||||
|
||||
#[ORM\Entity]
|
||||
#[ORM\Table(name: 'scraping_jobs')]
|
||||
class ScrapingJobEntity
|
||||
{
|
||||
#[ORM\Id]
|
||||
#[ORM\Column(type: 'string', length: 36)]
|
||||
private string $id;
|
||||
|
||||
#[ORM\Column(type: 'string')]
|
||||
private string $chapterNumber;
|
||||
|
||||
#[ORM\Column(type: 'string')]
|
||||
private string $mangaId;
|
||||
|
||||
#[ORM\Column(type: 'string')]
|
||||
private string $sourceId;
|
||||
|
||||
#[ORM\Column(type: 'json')]
|
||||
private array $pages = [];
|
||||
|
||||
#[ORM\Column(type: 'string')]
|
||||
private string $status;
|
||||
|
||||
#[ORM\Column(type: 'string', nullable: true)]
|
||||
private ?string $cbzPath = null;
|
||||
|
||||
#[ORM\Column(type: 'string', nullable: true)]
|
||||
private ?string $failureReason = '';
|
||||
|
||||
#[ORM\Column(type: 'datetime_immutable')]
|
||||
private \DateTimeImmutable $createdAt;
|
||||
|
||||
#[ORM\Column(type: 'datetime_immutable', nullable: true)]
|
||||
private ?\DateTimeImmutable $completedAt = null;
|
||||
|
||||
public static function fromDomain(ScrapingJob $job): self
|
||||
{
|
||||
$entity = new self();
|
||||
$entity->id = $job->getId();
|
||||
$entity->chapterNumber = $job->getChapterNumber();
|
||||
$entity->mangaId = $job->getMangaId();
|
||||
$entity->sourceId = $job->getSourceId();
|
||||
$entity->pages = $job->pages;
|
||||
$entity->status = $job->status->value;
|
||||
$entity->createdAt = $job->createdAt;
|
||||
$entity->completedAt = $job->completedAt;
|
||||
$entity->cbzPath = $job->cbzPath?->getPath();
|
||||
$entity->failureReason = $job->failureReason;
|
||||
return $entity;
|
||||
}
|
||||
|
||||
public function toDomain(): ScrapingJob
|
||||
{
|
||||
$job = new ScrapingJob(
|
||||
id: $this->id,
|
||||
mangaId: $this->mangaId,
|
||||
chapterNumber: $this->chapterNumber,
|
||||
sourceId: $this->sourceId
|
||||
);
|
||||
|
||||
$job->status = ScrapingStatus::from($this->status);
|
||||
$job->pages = $this->pages;
|
||||
$job->createdAt = $this->createdAt;
|
||||
$job->completedAt = $this->completedAt;
|
||||
$job->cbzPath = $this->cbzPath;
|
||||
$job->failureReason = $this->failureReason;
|
||||
|
||||
return $job;
|
||||
}
|
||||
|
||||
public function setStatus(string $status): void
|
||||
{
|
||||
$this->status = $status;
|
||||
}
|
||||
|
||||
public function setPages(array $pages): void
|
||||
{
|
||||
$this->pages = $pages;
|
||||
}
|
||||
|
||||
public function setCompletedAt(?\DateTimeImmutable $completedAt): void
|
||||
{
|
||||
$this->completedAt = $completedAt;
|
||||
}
|
||||
|
||||
public function setCbzPath(?string $cbzPath = null): void
|
||||
{
|
||||
$this->cbzPath = $cbzPath;
|
||||
}
|
||||
|
||||
public function setFailureReason(string $failureReason): void
|
||||
{
|
||||
$this->failureReason = $failureReason;
|
||||
}
|
||||
}
|
||||
@@ -1,76 +0,0 @@
|
||||
<?php
|
||||
|
||||
namespace App\Domain\Scraping\Infrastructure\Service\Scraper;
|
||||
|
||||
use App\Domain\Scraping\Domain\Contract\Service\ImageDownloaderInterface;
|
||||
use App\Domain\Scraping\Domain\Contract\Service\ScraperInterface;
|
||||
use App\Domain\Scraping\Domain\Event\PageScrapingProgressed;
|
||||
use App\Domain\Scraping\Domain\Model\ScrapingJob;
|
||||
use App\Domain\Scraping\Domain\Model\ScrapingProgress;
|
||||
use App\Domain\Scraping\Domain\Model\Source;
|
||||
use App\Domain\Scraping\Domain\Model\ValueObject\ScrapingRequest;
|
||||
use App\Domain\Scraping\Domain\Model\ValueObject\ScrapingResult;
|
||||
use App\Domain\Scraping\Domain\Model\ValueObject\TempDirectory;
|
||||
use Symfony\Component\Messenger\MessageBusInterface;
|
||||
use Ramsey\Uuid\Uuid;
|
||||
|
||||
abstract class AbstractScraper implements ScraperInterface
|
||||
{
|
||||
public function __construct(
|
||||
protected ImageDownloaderInterface $imageDownloader,
|
||||
protected MessageBusInterface $eventBus
|
||||
) {
|
||||
}
|
||||
|
||||
abstract public function scrape(ScrapingRequest $request): ScrapingResult;
|
||||
|
||||
abstract protected function scrapePages(ScrapingJob $job, Source $source): array;
|
||||
|
||||
protected function cleanupTempDirectory(string $tempDir): void
|
||||
{
|
||||
if (is_dir($tempDir)) {
|
||||
$files = new \RecursiveIteratorIterator(
|
||||
new \RecursiveDirectoryIterator($tempDir, \RecursiveDirectoryIterator::SKIP_DOTS),
|
||||
\RecursiveIteratorIterator::CHILD_FIRST
|
||||
);
|
||||
|
||||
foreach ($files as $file) {
|
||||
if ($file->isDir()) {
|
||||
rmdir($file->getRealPath());
|
||||
} else {
|
||||
unlink($file->getRealPath());
|
||||
}
|
||||
}
|
||||
rmdir($tempDir);
|
||||
}
|
||||
}
|
||||
|
||||
protected function dispatchProgressEvent(ScrapingJob $job, int $currentPage, int $totalPages): void
|
||||
{
|
||||
$progress = new ScrapingProgress($currentPage, $totalPages);
|
||||
$this->eventBus->dispatch(new PageScrapingProgressed($job->getId(), $progress));
|
||||
}
|
||||
|
||||
protected function downloadImage(string $imageUrl, string $destination): void
|
||||
{
|
||||
$this->imageDownloader->download($imageUrl, $destination);
|
||||
}
|
||||
|
||||
protected function createTempDirectory(): TempDirectory
|
||||
{
|
||||
return new TempDirectory(sys_get_temp_dir() . '/' . uniqid('manga_scraper_'));
|
||||
}
|
||||
|
||||
protected function cleanupTempFiles(TempDirectory $tempDirectory): void
|
||||
{
|
||||
$files = glob($tempDirectory->getPath() . '/*');
|
||||
foreach ($files as $file) {
|
||||
if (is_file($file)) {
|
||||
unlink($file);
|
||||
}
|
||||
}
|
||||
rmdir($tempDirectory->getPath());
|
||||
}
|
||||
|
||||
abstract public function supports(string $sourceType): bool;
|
||||
}
|
||||
@@ -2,16 +2,7 @@
|
||||
|
||||
namespace App\Domain\Scraping\Infrastructure\Service\Scraper;
|
||||
|
||||
use App\Domain\Scraping\Domain\Contract\Repository\MangaRepositoryInterface;
|
||||
use App\Domain\Scraping\Domain\Contract\Repository\ScrapingJobRepositoryInterface;
|
||||
use App\Domain\Scraping\Domain\Model\ScrapingJob;
|
||||
use App\Domain\Scraping\Domain\Model\Source;
|
||||
use App\Domain\Scraping\Domain\Model\ValueObject\ImageUrl;
|
||||
use App\Domain\Scraping\Domain\Model\ValueObject\PageNumber;
|
||||
use App\Domain\Scraping\Domain\Contract\Repository\SourceRepositoryInterface;
|
||||
use App\Domain\Scraping\Domain\Contract\Service\CbzGeneratorInterface;
|
||||
use App\Domain\Scraping\Domain\Contract\Service\ImageDownloaderInterface;
|
||||
use App\Domain\Scraping\Domain\Model\ValueObject\ChapterUrl;
|
||||
use Symfony\Component\DomCrawler\Crawler;
|
||||
use Symfony\Contracts\HttpClient\HttpClientInterface;
|
||||
use Symfony\Component\Messenger\MessageBusInterface;
|
||||
@@ -19,7 +10,6 @@ use App\Domain\Scraping\Domain\Contract\Service\ScraperInterface;
|
||||
use App\Domain\Scraping\Domain\Event\PageScrapingProgressed;
|
||||
use App\Domain\Scraping\Domain\Model\ValueObject\ScrapingRequest;
|
||||
use App\Domain\Scraping\Domain\Model\ValueObject\ScrapingResult;
|
||||
use App\Domain\Scraping\Domain\Model\ValueObject\TempDirectory;
|
||||
use App\Domain\Scraping\Domain\Model\ScrapingProgress;
|
||||
|
||||
class HtmlScraper implements ScraperInterface
|
||||
@@ -90,8 +80,6 @@ class HtmlScraper implements ScraperInterface
|
||||
|
||||
$nextLink = $crawler->filter($params['nextPageSelector']);
|
||||
$currentUrl = $nextLink->count() > 0 ? $nextLink->attr('href') : null;
|
||||
|
||||
$this->dispatchProgressEvent($request->getJobId(), count($pages), count($pages));
|
||||
}
|
||||
|
||||
return $pages;
|
||||
@@ -117,12 +105,4 @@ class HtmlScraper implements ScraperInterface
|
||||
{
|
||||
return preg_replace('/[\x00-\x1F\x7F]/', '', trim($url));
|
||||
}
|
||||
|
||||
private function dispatchProgressEvent(string $jobId, int $currentPage, int $totalPages): void
|
||||
{
|
||||
$this->eventBus->dispatch(new PageScrapingProgressed(
|
||||
$jobId,
|
||||
new ScrapingProgress($currentPage, $totalPages)
|
||||
));
|
||||
}
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user