- Ajouter le header Referer (origin de l'image) dans ImageDownloader pour les téléchargements backend - Ajouter referrerpolicy="no-referrer" sur les <img> de la modale de test pour les previews navigateur
169 lines
5.3 KiB
PHP
169 lines
5.3 KiB
PHP
<?php
|
|
|
|
namespace App\Domain\Scraping\Infrastructure\Service;
|
|
|
|
use App\Domain\Scraping\Domain\Contract\Service\ImageDownloaderInterface;
|
|
use App\Domain\Scraping\Domain\Event\PageScrapingProgressed;
|
|
use App\Domain\Scraping\Domain\Model\ScrapingProgress;
|
|
use App\Domain\Scraping\Domain\Model\ValueObject\DownloadResult;
|
|
use App\Domain\Scraping\Domain\Model\ValueObject\TempDirectory;
|
|
use Symfony\Component\Messenger\MessageBusInterface;
|
|
use Symfony\Contracts\HttpClient\HttpClientInterface;
|
|
|
|
readonly class ImageDownloader implements ImageDownloaderInterface
|
|
{
|
|
public function __construct(
|
|
private HttpClientInterface $httpClient,
|
|
private MessageBusInterface $eventBus
|
|
) {
|
|
}
|
|
|
|
public function download(string $url, string $destination): void
|
|
{
|
|
$urlParts = parse_url($url);
|
|
$referer = ($urlParts['scheme'] ?? 'https') . '://' . ($urlParts['host'] ?? '');
|
|
|
|
$response = $this->httpClient->request('GET', $url, [
|
|
'headers' => [
|
|
'Referer' => $referer,
|
|
],
|
|
]);
|
|
$contentType = $response->getHeaders()['content-type'][0] ?? '';
|
|
|
|
if (!str_starts_with($contentType, 'image/')) {
|
|
throw new \RuntimeException('Invalid content type: ' . $contentType);
|
|
}
|
|
|
|
$imageData = $response->getContent();
|
|
$tempFilePath = $this->saveTempFile($imageData);
|
|
|
|
try {
|
|
$image = $this->createImageResource($tempFilePath, $contentType);
|
|
if (false === $image) {
|
|
throw new \RuntimeException('Failed to create image resource');
|
|
}
|
|
|
|
$destination = $this->ensureJpgExtension($destination);
|
|
if (!imagejpeg($image, $destination)) {
|
|
imagedestroy($image);
|
|
unlink($tempFilePath);
|
|
throw new \RuntimeException('Failed to save image as JPG');
|
|
}
|
|
|
|
imagedestroy($image);
|
|
} finally {
|
|
if (file_exists($tempFilePath)) {
|
|
unlink($tempFilePath);
|
|
}
|
|
}
|
|
}
|
|
|
|
public function downloadBatch(array $urls, TempDirectory $tempDir, string $jobId): array
|
|
{
|
|
$results = [];
|
|
$totalUrls = count($urls);
|
|
|
|
foreach ($urls as $index => $url) {
|
|
try {
|
|
$destination = sprintf(
|
|
'%s/%03d.jpg',
|
|
$tempDir->getPath(),
|
|
$index + 1
|
|
);
|
|
|
|
$this->download($url, $destination);
|
|
$results[] = new DownloadResult($destination, $url);
|
|
|
|
$this->dispatchProgressEvent($jobId, $index + 1, $totalUrls);
|
|
} catch (\Exception $e) {
|
|
// Log l'erreur mais continue avec les autres images
|
|
error_log("Failed to download image {$url}: " . $e->getMessage());
|
|
}
|
|
}
|
|
|
|
if (empty($results)) {
|
|
throw new \RuntimeException('Failed to download any images');
|
|
}
|
|
|
|
return $results;
|
|
}
|
|
|
|
private function dispatchProgressEvent(string $jobId, int $currentPage, int $totalPages): void
|
|
{
|
|
$this->eventBus->dispatch(new PageScrapingProgressed(
|
|
$jobId,
|
|
new ScrapingProgress($currentPage, $totalPages)
|
|
));
|
|
}
|
|
|
|
private function saveTempFile(string $data): string
|
|
{
|
|
$tempFilePath = tempnam(sys_get_temp_dir(), 'manga_img_');
|
|
file_put_contents($tempFilePath, $data);
|
|
|
|
return $tempFilePath;
|
|
}
|
|
|
|
/**
|
|
* @throws \RuntimeException
|
|
*/
|
|
private function createImageResource(string $filePath, string $contentType)
|
|
{
|
|
$realFormat = $this->detectImageFormat($filePath);
|
|
|
|
return match ($realFormat) {
|
|
'webp' => imagecreatefromwebp($filePath),
|
|
'png' => imagecreatefrompng($filePath),
|
|
'jpeg' => imagecreatefromjpeg($filePath),
|
|
'gif' => imagecreatefromgif($filePath),
|
|
'bmp' => imagecreatefromwbmp($filePath),
|
|
default => throw new \RuntimeException('Unsupported image format: ' . $realFormat . ' (content-type: ' . $contentType . ')'),
|
|
};
|
|
}
|
|
|
|
private function detectImageFormat(string $filePath): string
|
|
{
|
|
$handle = fopen($filePath, 'rb');
|
|
if (!$handle) {
|
|
throw new \RuntimeException('Cannot open file for format detection');
|
|
}
|
|
|
|
$header = fread($handle, 12);
|
|
fclose($handle);
|
|
|
|
// JPEG: starts with FF D8 FF
|
|
if (str_starts_with($header, "\xFF\xD8\xFF")) {
|
|
return 'jpeg';
|
|
}
|
|
|
|
// PNG: starts with 89 50 4E 47 0D 0A 1A 0A
|
|
if (str_starts_with($header, "\x89PNG\r\n\x1A\n")) {
|
|
return 'png';
|
|
}
|
|
|
|
// WebP: starts with RIFF....WEBP
|
|
if (str_starts_with($header, 'RIFF') && strpos($header, 'WEBP', 8) === 8) {
|
|
return 'webp';
|
|
}
|
|
|
|
// GIF: starts with GIF87a or GIF89a
|
|
if (str_starts_with($header, 'GIF87a') || str_starts_with($header, 'GIF89a')) {
|
|
return 'gif';
|
|
}
|
|
|
|
// BMP: starts with BM
|
|
if (str_starts_with($header, 'BM')) {
|
|
return 'bmp';
|
|
}
|
|
|
|
throw new \RuntimeException('Unknown image format. Header: ' . bin2hex(substr($header, 0, 8)));
|
|
}
|
|
|
|
private function ensureJpgExtension(string $path): string
|
|
{
|
|
$info = pathinfo($path);
|
|
|
|
return $info['dirname'] . '/' . $info['filename'] . '.jpg';
|
|
}
|
|
}
|