diff --git a/src/Domain/Scraping/Infrastructure/Service/ImageDownloader.php b/src/Domain/Scraping/Infrastructure/Service/ImageDownloader.php index eafd788..a886bcc 100644 --- a/src/Domain/Scraping/Infrastructure/Service/ImageDownloader.php +++ b/src/Domain/Scraping/Infrastructure/Service/ImageDownloader.php @@ -21,12 +21,34 @@ readonly class ImageDownloader implements ImageDownloaderInterface public function download(string $url, string $destination): void { $response = $this->httpClient->request('GET', $url); + $contentType = $response->getHeaders()['content-type'][0] ?? ''; - if (!str_starts_with($response->getHeaders()['content-type'][0], 'image/')) { - throw new \RuntimeException('Invalid content type'); + if (!str_starts_with($contentType, 'image/')) { + throw new \RuntimeException('Invalid content type: ' . $contentType); } - file_put_contents($destination, $response->getContent()); + $imageData = $response->getContent(); + $tempFilePath = $this->saveTempFile($imageData); + + try { + $image = $this->createImageResource($tempFilePath, $contentType); + if (false === $image) { + throw new \RuntimeException('Failed to create image resource'); + } + + $destination = $this->ensureJpgExtension($destination); + if (!imagejpeg($image, $destination)) { + imagedestroy($image); + unlink($tempFilePath); + throw new \RuntimeException('Failed to save image as JPG'); + } + + imagedestroy($image); + } finally { + if (file_exists($tempFilePath)) { + unlink($tempFilePath); + } + } } public function downloadBatch(array $urls, TempDirectory $tempDir, string $jobId): array @@ -36,12 +58,10 @@ readonly class ImageDownloader implements ImageDownloaderInterface foreach ($urls as $index => $url) { try { - $extension = pathinfo(parse_url($url, PHP_URL_PATH), PATHINFO_EXTENSION) ?: 'jpg'; $destination = sprintf( - '%s/%03d.%s', + '%s/%03d.jpg', $tempDir->getPath(), - $index + 1, - $extension + $index + 1 ); $this->download($url, $destination); @@ -68,4 +88,74 @@ readonly class ImageDownloader implements ImageDownloaderInterface new ScrapingProgress($currentPage, $totalPages) )); } + + private function saveTempFile(string $data): string + { + $tempFilePath = tempnam(sys_get_temp_dir(), 'manga_img_'); + file_put_contents($tempFilePath, $data); + + return $tempFilePath; + } + + /** + * @throws \RuntimeException + */ + private function createImageResource(string $filePath, string $contentType) + { + $realFormat = $this->detectImageFormat($filePath); + + return match ($realFormat) { + 'webp' => imagecreatefromwebp($filePath), + 'png' => imagecreatefrompng($filePath), + 'jpeg' => imagecreatefromjpeg($filePath), + 'gif' => imagecreatefromgif($filePath), + 'bmp' => imagecreatefromwbmp($filePath), + default => throw new \RuntimeException('Unsupported image format: ' . $realFormat . ' (content-type: ' . $contentType . ')'), + }; + } + + private function detectImageFormat(string $filePath): string + { + $handle = fopen($filePath, 'rb'); + if (!$handle) { + throw new \RuntimeException('Cannot open file for format detection'); + } + + $header = fread($handle, 12); + fclose($handle); + + // JPEG: starts with FF D8 FF + if (str_starts_with($header, "\xFF\xD8\xFF")) { + return 'jpeg'; + } + + // PNG: starts with 89 50 4E 47 0D 0A 1A 0A + if (str_starts_with($header, "\x89PNG\r\n\x1A\n")) { + return 'png'; + } + + // WebP: starts with RIFF....WEBP + if (str_starts_with($header, 'RIFF') && strpos($header, 'WEBP', 8) === 8) { + return 'webp'; + } + + // GIF: starts with GIF87a or GIF89a + if (str_starts_with($header, 'GIF87a') || str_starts_with($header, 'GIF89a')) { + return 'gif'; + } + + // BMP: starts with BM + if (str_starts_with($header, 'BM')) { + return 'bmp'; + } + + throw new \RuntimeException('Unknown image format. Header: ' . bin2hex(substr($header, 0, 8))); + } + + private function ensureJpgExtension(string $path): string + { + $info = pathinfo($path); + + return $info['dirname'] . '/' . $info['filename'] . '.jpg'; + } }