Files
Mangarr/src/Domain/Scraping/Infrastructure/Service/Scraper/HtmlScraper.php
ext.jeremy.guillot@maxicoffee.domains 0e3d72cc5e feat: debut rerefonte DDD CQRS
2025-02-01 17:03:28 +01:00

61 lines
2.1 KiB
PHP

<?php
namespace App\Domain\Scraping\Infrastructure\Service\Scraper;
use App\Domain\Scraping\Domain\Contract\ScraperInterface as ContractScraperInterface;
use App\Domain\Scraping\Domain\Service\ScraperInterface;
use App\Domain\Scraping\Domain\Model\ScrapingJob;
use App\Domain\Scraping\Domain\Model\ValueObject\ImageUrl;
use App\Domain\Scraping\Domain\Model\ValueObject\PageNumber;
use App\Domain\Scraping\Domain\Event\PageScrapingProgressed;
use App\Domain\Scraping\Domain\Event\ChapterScrapingCompleted;
use Symfony\Component\DomCrawler\Crawler;
use Symfony\Component\EventDispatcher\EventDispatcherInterface;
use Symfony\Contracts\HttpClient\HttpClientInterface;
class HtmlScraper implements ContractScraperInterface
{
public function __construct(
private readonly HttpClientInterface $httpClient,
private readonly EventDispatcherInterface $eventDispatcher
) {}
public function createScrapingJob(string $chapterId, string $sourceId): ScrapingJob
{
return new ScrapingJob(
uniqid('scraping_'),
$chapterId,
$sourceId
);
}
public function scrape(ScrapingJob $job): void
{
$url = $this->buildUrl($job); // À implémenter selon votre logique
$response = $this->httpClient->request('GET', $url);
$crawler = new Crawler($response->getContent());
$images = $crawler->filter('img.manga-page'); // Adapter selon le site cible
$pageNumber = 1;
$images->each(function (Crawler $image) use ($job, $pageNumber) {
$imageUrl = new ImageUrl($image->attr('src'));
$job->addPage(new PageNumber($pageNumber), $imageUrl);
$this->eventDispatcher->dispatch(
new PageScrapingProgressed($job->getId(), $job->getProgress())
);
$pageNumber++;
});
$this->eventDispatcher->dispatch(
new ChapterScrapingCompleted($job->getId(), $job->getPages())
);
}
public function supports(string $sourceType): bool
{
return $sourceType === 'html';
}
}