61 lines
2.1 KiB
PHP
61 lines
2.1 KiB
PHP
<?php
|
|
|
|
namespace App\Domain\Scraping\Infrastructure\Service\Scraper;
|
|
|
|
use App\Domain\Scraping\Domain\Contract\ScraperInterface as ContractScraperInterface;
|
|
use App\Domain\Scraping\Domain\Service\ScraperInterface;
|
|
use App\Domain\Scraping\Domain\Model\ScrapingJob;
|
|
use App\Domain\Scraping\Domain\Model\ValueObject\ImageUrl;
|
|
use App\Domain\Scraping\Domain\Model\ValueObject\PageNumber;
|
|
use App\Domain\Scraping\Domain\Event\PageScrapingProgressed;
|
|
use App\Domain\Scraping\Domain\Event\ChapterScrapingCompleted;
|
|
use Symfony\Component\DomCrawler\Crawler;
|
|
use Symfony\Component\EventDispatcher\EventDispatcherInterface;
|
|
use Symfony\Contracts\HttpClient\HttpClientInterface;
|
|
|
|
class HtmlScraper implements ContractScraperInterface
|
|
{
|
|
public function __construct(
|
|
private readonly HttpClientInterface $httpClient,
|
|
private readonly EventDispatcherInterface $eventDispatcher
|
|
) {}
|
|
|
|
public function createScrapingJob(string $chapterId, string $sourceId): ScrapingJob
|
|
{
|
|
return new ScrapingJob(
|
|
uniqid('scraping_'),
|
|
$chapterId,
|
|
$sourceId
|
|
);
|
|
}
|
|
|
|
public function scrape(ScrapingJob $job): void
|
|
{
|
|
$url = $this->buildUrl($job); // À implémenter selon votre logique
|
|
$response = $this->httpClient->request('GET', $url);
|
|
|
|
$crawler = new Crawler($response->getContent());
|
|
$images = $crawler->filter('img.manga-page'); // Adapter selon le site cible
|
|
|
|
$pageNumber = 1;
|
|
$images->each(function (Crawler $image) use ($job, $pageNumber) {
|
|
$imageUrl = new ImageUrl($image->attr('src'));
|
|
$job->addPage(new PageNumber($pageNumber), $imageUrl);
|
|
|
|
$this->eventDispatcher->dispatch(
|
|
new PageScrapingProgressed($job->getId(), $job->getProgress())
|
|
);
|
|
|
|
$pageNumber++;
|
|
});
|
|
|
|
$this->eventDispatcher->dispatch(
|
|
new ChapterScrapingCompleted($job->getId(), $job->getPages())
|
|
);
|
|
}
|
|
|
|
public function supports(string $sourceType): bool
|
|
{
|
|
return $sourceType === 'html';
|
|
}
|
|
}
|