Files
Mangarr/src/Domain/Scraping/Infrastructure/Service/Scraper/HtmlScraper.php
ext.jeremy.guillot@maxicoffee.domains 97d7bcf061 feat: suite du passage en DDD de Scraping
2025-02-01 17:03:28 +01:00

63 lines
2.0 KiB
PHP

<?php
namespace App\Domain\Scraping\Infrastructure\Service\Scraper;
use App\Domain\Scraping\Domain\Model\ScrapingJob;
use App\Domain\Scraping\Domain\Model\ValueObject\ImageUrl;
use App\Domain\Scraping\Domain\Model\ValueObject\PageNumber;
use Symfony\Component\DomCrawler\Crawler;
class HtmlScraper extends AbstractScraper
{
protected function scrapePages(ScrapingJob $job): array
{
$url = $this->buildUrl($job);
$response = $this->httpClient->request('GET', $url);
$crawler = new Crawler($response->getContent());
$images = $crawler->filter('img.manga-page'); // Adapter selon le site
$pages = [];
$images->each(function (Crawler $image) use (&$pages) {
$pages[] = [
'url' => $image->attr('src'),
'number' => count($pages) + 1
];
});
return $pages;
}
protected function downloadPage(ScrapingJob $job, array $page, string $tempDir): void
{
$imageUrl = new ImageUrl($page['url']);
$pageNumber = new PageNumber($page['number']);
$fileName = sprintf('%s/%03d.%s',
$tempDir,
$pageNumber->getValue(),
$imageUrl->getExtension()
);
$response = $this->httpClient->request('GET', $imageUrl->getValue());
file_put_contents($fileName, $response->getContent());
$job->addPage($pageNumber, $imageUrl);
$this->dispatchProgressEvent($job, $page['number'], count($pages));
}
public function supports(string $sourceType): bool
{
return $sourceType === 'html';
}
private function buildUrl(ScrapingJob $job): string
{
// À implémenter selon votre logique de construction d'URL
// Vous aurez probablement besoin d'injecter un service pour récupérer les informations du chapitre
return sprintf('https://example.com/manga/%s/chapter/%s',
$job->getMangaId(),
$job->getChapterId()
);
}
}