Files
Mangarr/src/Domain/Scraping/Infrastructure/Service/ImageDownloader.php
ext.jeremy.guillot@maxicoffee.domains 874003eb35 fix(scraping): corriger les 403 sur les images avec protection anti-hotlink
- Ajouter le header Referer (origin de l'image) dans ImageDownloader pour les téléchargements backend
- Ajouter referrerpolicy="no-referrer" sur les <img> de la modale de test pour les previews navigateur
2026-03-16 00:11:17 +01:00

169 lines
5.3 KiB
PHP

<?php
namespace App\Domain\Scraping\Infrastructure\Service;
use App\Domain\Scraping\Domain\Contract\Service\ImageDownloaderInterface;
use App\Domain\Scraping\Domain\Event\PageScrapingProgressed;
use App\Domain\Scraping\Domain\Model\ScrapingProgress;
use App\Domain\Scraping\Domain\Model\ValueObject\DownloadResult;
use App\Domain\Scraping\Domain\Model\ValueObject\TempDirectory;
use Symfony\Component\Messenger\MessageBusInterface;
use Symfony\Contracts\HttpClient\HttpClientInterface;
readonly class ImageDownloader implements ImageDownloaderInterface
{
public function __construct(
private HttpClientInterface $httpClient,
private MessageBusInterface $eventBus
) {
}
public function download(string $url, string $destination): void
{
$urlParts = parse_url($url);
$referer = ($urlParts['scheme'] ?? 'https') . '://' . ($urlParts['host'] ?? '');
$response = $this->httpClient->request('GET', $url, [
'headers' => [
'Referer' => $referer,
],
]);
$contentType = $response->getHeaders()['content-type'][0] ?? '';
if (!str_starts_with($contentType, 'image/')) {
throw new \RuntimeException('Invalid content type: ' . $contentType);
}
$imageData = $response->getContent();
$tempFilePath = $this->saveTempFile($imageData);
try {
$image = $this->createImageResource($tempFilePath, $contentType);
if (false === $image) {
throw new \RuntimeException('Failed to create image resource');
}
$destination = $this->ensureJpgExtension($destination);
if (!imagejpeg($image, $destination)) {
imagedestroy($image);
unlink($tempFilePath);
throw new \RuntimeException('Failed to save image as JPG');
}
imagedestroy($image);
} finally {
if (file_exists($tempFilePath)) {
unlink($tempFilePath);
}
}
}
public function downloadBatch(array $urls, TempDirectory $tempDir, string $jobId): array
{
$results = [];
$totalUrls = count($urls);
foreach ($urls as $index => $url) {
try {
$destination = sprintf(
'%s/%03d.jpg',
$tempDir->getPath(),
$index + 1
);
$this->download($url, $destination);
$results[] = new DownloadResult($destination, $url);
$this->dispatchProgressEvent($jobId, $index + 1, $totalUrls);
} catch (\Exception $e) {
// Log l'erreur mais continue avec les autres images
error_log("Failed to download image {$url}: " . $e->getMessage());
}
}
if (empty($results)) {
throw new \RuntimeException('Failed to download any images');
}
return $results;
}
private function dispatchProgressEvent(string $jobId, int $currentPage, int $totalPages): void
{
$this->eventBus->dispatch(new PageScrapingProgressed(
$jobId,
new ScrapingProgress($currentPage, $totalPages)
));
}
private function saveTempFile(string $data): string
{
$tempFilePath = tempnam(sys_get_temp_dir(), 'manga_img_');
file_put_contents($tempFilePath, $data);
return $tempFilePath;
}
/**
* @throws \RuntimeException
*/
private function createImageResource(string $filePath, string $contentType)
{
$realFormat = $this->detectImageFormat($filePath);
return match ($realFormat) {
'webp' => imagecreatefromwebp($filePath),
'png' => imagecreatefrompng($filePath),
'jpeg' => imagecreatefromjpeg($filePath),
'gif' => imagecreatefromgif($filePath),
'bmp' => imagecreatefromwbmp($filePath),
default => throw new \RuntimeException('Unsupported image format: ' . $realFormat . ' (content-type: ' . $contentType . ')'),
};
}
private function detectImageFormat(string $filePath): string
{
$handle = fopen($filePath, 'rb');
if (!$handle) {
throw new \RuntimeException('Cannot open file for format detection');
}
$header = fread($handle, 12);
fclose($handle);
// JPEG: starts with FF D8 FF
if (str_starts_with($header, "\xFF\xD8\xFF")) {
return 'jpeg';
}
// PNG: starts with 89 50 4E 47 0D 0A 1A 0A
if (str_starts_with($header, "\x89PNG\r\n\x1A\n")) {
return 'png';
}
// WebP: starts with RIFF....WEBP
if (str_starts_with($header, 'RIFF') && strpos($header, 'WEBP', 8) === 8) {
return 'webp';
}
// GIF: starts with GIF87a or GIF89a
if (str_starts_with($header, 'GIF87a') || str_starts_with($header, 'GIF89a')) {
return 'gif';
}
// BMP: starts with BM
if (str_starts_with($header, 'BM')) {
return 'bmp';
}
throw new \RuntimeException('Unknown image format. Header: ' . bin2hex(substr($header, 0, 8)));
}
private function ensureJpgExtension(string $path): string
{
$info = pathinfo($path);
return $info['dirname'] . '/' . $info['filename'] . '.jpg';
}
}