feat: suite du passage en DDD de Scraping
This commit is contained in:
committed by
ThysTips
parent
0e3d72cc5e
commit
97d7bcf061
@@ -2,60 +2,62 @@
|
||||
|
||||
namespace App\Domain\Scraping\Infrastructure\Service\Scraper;
|
||||
|
||||
use App\Domain\Scraping\Domain\Contract\ScraperInterface as ContractScraperInterface;
|
||||
use App\Domain\Scraping\Domain\Service\ScraperInterface;
|
||||
use App\Domain\Scraping\Domain\Model\ScrapingJob;
|
||||
use App\Domain\Scraping\Domain\Model\ValueObject\ImageUrl;
|
||||
use App\Domain\Scraping\Domain\Model\ValueObject\PageNumber;
|
||||
use App\Domain\Scraping\Domain\Event\PageScrapingProgressed;
|
||||
use App\Domain\Scraping\Domain\Event\ChapterScrapingCompleted;
|
||||
use Symfony\Component\DomCrawler\Crawler;
|
||||
use Symfony\Component\EventDispatcher\EventDispatcherInterface;
|
||||
use Symfony\Contracts\HttpClient\HttpClientInterface;
|
||||
|
||||
class HtmlScraper implements ContractScraperInterface
|
||||
class HtmlScraper extends AbstractScraper
|
||||
{
|
||||
public function __construct(
|
||||
private readonly HttpClientInterface $httpClient,
|
||||
private readonly EventDispatcherInterface $eventDispatcher
|
||||
) {}
|
||||
|
||||
public function createScrapingJob(string $chapterId, string $sourceId): ScrapingJob
|
||||
protected function scrapePages(ScrapingJob $job): array
|
||||
{
|
||||
return new ScrapingJob(
|
||||
uniqid('scraping_'),
|
||||
$chapterId,
|
||||
$sourceId
|
||||
);
|
||||
}
|
||||
|
||||
public function scrape(ScrapingJob $job): void
|
||||
{
|
||||
$url = $this->buildUrl($job); // À implémenter selon votre logique
|
||||
$url = $this->buildUrl($job);
|
||||
$response = $this->httpClient->request('GET', $url);
|
||||
|
||||
$crawler = new Crawler($response->getContent());
|
||||
$images = $crawler->filter('img.manga-page'); // Adapter selon le site cible
|
||||
$images = $crawler->filter('img.manga-page'); // Adapter selon le site
|
||||
|
||||
$pageNumber = 1;
|
||||
$images->each(function (Crawler $image) use ($job, $pageNumber) {
|
||||
$imageUrl = new ImageUrl($image->attr('src'));
|
||||
$job->addPage(new PageNumber($pageNumber), $imageUrl);
|
||||
|
||||
$this->eventDispatcher->dispatch(
|
||||
new PageScrapingProgressed($job->getId(), $job->getProgress())
|
||||
);
|
||||
|
||||
$pageNumber++;
|
||||
$pages = [];
|
||||
$images->each(function (Crawler $image) use (&$pages) {
|
||||
$pages[] = [
|
||||
'url' => $image->attr('src'),
|
||||
'number' => count($pages) + 1
|
||||
];
|
||||
});
|
||||
|
||||
$this->eventDispatcher->dispatch(
|
||||
new ChapterScrapingCompleted($job->getId(), $job->getPages())
|
||||
return $pages;
|
||||
}
|
||||
|
||||
protected function downloadPage(ScrapingJob $job, array $page, string $tempDir): void
|
||||
{
|
||||
$imageUrl = new ImageUrl($page['url']);
|
||||
$pageNumber = new PageNumber($page['number']);
|
||||
|
||||
$fileName = sprintf('%s/%03d.%s',
|
||||
$tempDir,
|
||||
$pageNumber->getValue(),
|
||||
$imageUrl->getExtension()
|
||||
);
|
||||
|
||||
$response = $this->httpClient->request('GET', $imageUrl->getValue());
|
||||
file_put_contents($fileName, $response->getContent());
|
||||
|
||||
$job->addPage($pageNumber, $imageUrl);
|
||||
$this->dispatchProgressEvent($job, $page['number'], count($pages));
|
||||
}
|
||||
|
||||
public function supports(string $sourceType): bool
|
||||
{
|
||||
return $sourceType === 'html';
|
||||
}
|
||||
}
|
||||
|
||||
private function buildUrl(ScrapingJob $job): string
|
||||
{
|
||||
// À implémenter selon votre logique de construction d'URL
|
||||
// Vous aurez probablement besoin d'injecter un service pour récupérer les informations du chapitre
|
||||
return sprintf('https://example.com/manga/%s/chapter/%s',
|
||||
$job->getMangaId(),
|
||||
$job->getChapterId()
|
||||
);
|
||||
}
|
||||
}
|
||||
Reference in New Issue
Block a user