Convertion des images webp et png vers jpeg
This commit is contained in:
parent
21b2adfa07
commit
5f15d14ae1
@@ -8,7 +8,6 @@ use App\Entity\Manga;
|
||||
use App\Event\PageScrappingProgressEvent;
|
||||
use App\Manager\FileSystemManager;
|
||||
use Doctrine\ORM\EntityManagerInterface;
|
||||
use Exception;
|
||||
use GuzzleHttp\Client;
|
||||
use GuzzleHttp\Exception\GuzzleException;
|
||||
use GuzzleHttp\Exception\RequestException;
|
||||
@@ -19,11 +18,10 @@ abstract class AbstractScraper implements ScraperInterface
|
||||
protected Client $httpClient;
|
||||
|
||||
public function __construct(
|
||||
protected FileSystemManager $fileSystemManager,
|
||||
protected FileSystemManager $fileSystemManager,
|
||||
protected EventDispatcherInterface $eventDispatcher,
|
||||
protected EntityManagerInterface $entityManager
|
||||
)
|
||||
{
|
||||
protected EntityManagerInterface $entityManager
|
||||
) {
|
||||
$this->httpClient = new Client();
|
||||
}
|
||||
|
||||
@@ -45,7 +43,8 @@ abstract class AbstractScraper implements ScraperInterface
|
||||
{
|
||||
try {
|
||||
$response = $this->httpClient->head($url);
|
||||
return $response->getStatusCode() === 200;
|
||||
|
||||
return 200 === $response->getStatusCode();
|
||||
} catch (RequestException $e) {
|
||||
return false;
|
||||
}
|
||||
@@ -60,14 +59,15 @@ abstract class AbstractScraper implements ScraperInterface
|
||||
$chapter->getVolume(),
|
||||
$chapter->getNumber()
|
||||
);
|
||||
return $volumeDir . '/' . $fileName;
|
||||
|
||||
return $volumeDir.'/'.$fileName;
|
||||
}
|
||||
|
||||
protected function createCbzFile(array $pageData, string $cbzFilePath): void
|
||||
{
|
||||
$zip = new \ZipArchive();
|
||||
|
||||
if ($zip->open($cbzFilePath, \ZipArchive::CREATE) === TRUE) {
|
||||
if (true === $zip->open($cbzFilePath, \ZipArchive::CREATE)) {
|
||||
foreach ($pageData as $page) {
|
||||
$zip->addFile($page['local_image_url'], basename($page['local_image_url']));
|
||||
}
|
||||
@@ -93,21 +93,67 @@ abstract class AbstractScraper implements ScraperInterface
|
||||
|
||||
/**
|
||||
* @throws GuzzleException
|
||||
* @throws Exception
|
||||
* @throws \Exception
|
||||
*/
|
||||
protected function downloadAndSaveImage(string $imageUrl, string $destinationPath): void
|
||||
protected function downloadAndSaveImage(string $imageUrl, string $destinationPath): string
|
||||
{
|
||||
try {
|
||||
$response = $this->httpClient->get($imageUrl);
|
||||
$contentType = $response->getHeaderLine('Content-Type');
|
||||
|
||||
if (str_starts_with($contentType, 'image/')) {
|
||||
file_put_contents($destinationPath, $response->getBody()->getContents());
|
||||
} else {
|
||||
throw new Exception('Le contenu récupéré n\'est pas une image. Type de contenu : ' . $contentType);
|
||||
if (!str_starts_with($contentType, 'image/')) {
|
||||
throw new \Exception('Le contenu récupéré n\'est pas une image. Type de contenu : '.$contentType);
|
||||
}
|
||||
} catch (Exception $e) {
|
||||
throw new Exception('Erreur lors de la récupération de l\'image : ' . $e->getMessage());
|
||||
|
||||
$imageData = $response->getBody()->getContents();
|
||||
$tempFilePath = $this->saveTempFile($imageData);
|
||||
|
||||
$image = $this->createImageResource($tempFilePath, $contentType);
|
||||
if (false === $image) {
|
||||
throw new \Exception('Échec de la création de la ressource image.');
|
||||
}
|
||||
|
||||
$destinationPath = $this->ensureJpgExtension($destinationPath);
|
||||
if (!imagejpeg($image, $destinationPath)) {
|
||||
imagedestroy($image);
|
||||
unlink($tempFilePath);
|
||||
throw new \Exception('Échec de la sauvegarde de l\'image en JPG.');
|
||||
}
|
||||
|
||||
imagedestroy($image);
|
||||
unlink($tempFilePath);
|
||||
|
||||
return $destinationPath;
|
||||
} catch (\Exception $e) {
|
||||
throw new \Exception('Erreur lors de la récupération de l\'image : '.$e->getMessage());
|
||||
}
|
||||
}
|
||||
|
||||
private function saveTempFile(string $data): string
|
||||
{
|
||||
$tempFilePath = tempnam(sys_get_temp_dir(), 'manga_img_');
|
||||
file_put_contents($tempFilePath, $data);
|
||||
|
||||
return $tempFilePath;
|
||||
}
|
||||
|
||||
/**
|
||||
* @throws \Exception
|
||||
*/
|
||||
private function createImageResource(string $filePath, string $contentType)
|
||||
{
|
||||
return match ($contentType) {
|
||||
'image/webp' => imagecreatefromwebp($filePath),
|
||||
'image/png' => imagecreatefrompng($filePath),
|
||||
'image/jpeg', 'image/jpg' => imagecreatefromjpeg($filePath),
|
||||
default => throw new \Exception('Format d\'image non pris en charge : '.$contentType),
|
||||
};
|
||||
}
|
||||
|
||||
private function ensureJpgExtension(string $path): string
|
||||
{
|
||||
$info = pathinfo($path);
|
||||
|
||||
return $info['dirname'].'/'.$info['filename'].'.jpg';
|
||||
}
|
||||
}
|
||||
|
||||
@@ -4,17 +4,13 @@ namespace App\Service\Scraper;
|
||||
|
||||
use App\Entity\Chapter;
|
||||
use App\Entity\ContentSource;
|
||||
use Doctrine\ORM\EntityManagerInterface;
|
||||
use Exception;
|
||||
use GuzzleHttp\Client;
|
||||
use GuzzleHttp\Exception\GuzzleException;
|
||||
use Symfony\Component\EventDispatcher\EventDispatcherInterface;
|
||||
use Symfony\Component\DomCrawler\Crawler;
|
||||
|
||||
class HtmlScraper extends AbstractScraper
|
||||
{
|
||||
/**
|
||||
* @throws Exception
|
||||
* @throws \Exception
|
||||
* @throws GuzzleException
|
||||
*/
|
||||
public function scrapeChapter(Chapter $chapter, ContentSource $contentSource): array|bool
|
||||
@@ -23,15 +19,15 @@ class HtmlScraper extends AbstractScraper
|
||||
$chapterUrl = $this->getValidChapterUrl($contentSource, $manga, $chapter->getNumber());
|
||||
|
||||
if (!$chapterUrl) {
|
||||
throw new Exception("Aucune URL valide trouvée pour le chapitre {$chapter->getNumber()} du manga {$manga->getTitle()}");
|
||||
throw new \Exception("Aucune URL valide trouvée pour le chapitre {$chapter->getNumber()} du manga {$manga->getTitle()}");
|
||||
}
|
||||
|
||||
$tempDir = sys_get_temp_dir() . '/' . uniqid('manga_scraper_');
|
||||
$tempDir = sys_get_temp_dir().'/'.uniqid('manga_scraper_');
|
||||
mkdir($tempDir);
|
||||
|
||||
$pageData = [];
|
||||
|
||||
if ($contentSource->getNextPageSelector() === null) {
|
||||
if (null === $contentSource->getNextPageSelector()) {
|
||||
// Lecteur vertical
|
||||
$html = $this->fetchHtml($chapterUrl);
|
||||
$pageData = $this->scrapeVerticalReader($html, $contentSource);
|
||||
@@ -43,13 +39,13 @@ class HtmlScraper extends AbstractScraper
|
||||
// Télécharger et sauvegarder les images
|
||||
foreach ($pageData as $index => &$page) {
|
||||
$imageName = sprintf('%03d.%s', $index + 1, pathinfo(parse_url($page['image_url'], PHP_URL_PATH), PATHINFO_EXTENSION));
|
||||
$imagePath = $tempDir . '/' . $imageName;
|
||||
$imagePath = $tempDir.'/'.$imageName;
|
||||
|
||||
$this->downloadAndSaveImage($page['image_url'], $imagePath);
|
||||
$destinationPath = $this->downloadAndSaveImage($page['image_url'], $imagePath);
|
||||
|
||||
$this->dispatchProgressEvent($chapter, $index + 1, count($pageData));
|
||||
|
||||
$page['local_image_url'] = $imagePath;
|
||||
$page['local_image_url'] = $destinationPath;
|
||||
}
|
||||
|
||||
$cbzFilePath = $this->generateCbzPath($manga, $chapter);
|
||||
@@ -59,26 +55,25 @@ class HtmlScraper extends AbstractScraper
|
||||
$this->entityManager->persist($chapter);
|
||||
$this->entityManager->flush();
|
||||
|
||||
// Nettoyage du répertoire temporaire
|
||||
$this->cleanupTempFiles($tempDir);
|
||||
|
||||
return $pageData;
|
||||
}
|
||||
|
||||
/**
|
||||
* @throws Exception
|
||||
* @throws \Exception
|
||||
*/
|
||||
public function testScraping(string $mangaSlug, string $chapterNumber, ContentSource $contentSource): array
|
||||
{
|
||||
$chapterUrl = $contentSource->getChapterUrl($mangaSlug, $chapterNumber);
|
||||
|
||||
if (!$this->isChapterUrlValid($chapterUrl)) {
|
||||
throw new \Exception("Invalid URL, check format and slug");
|
||||
throw new \Exception('Invalid URL, check format and slug');
|
||||
}
|
||||
|
||||
$html = $this->fetchHtml($chapterUrl);
|
||||
|
||||
if ($contentSource->getNextPageSelector() === null) {
|
||||
if (null === $contentSource->getNextPageSelector()) {
|
||||
return $this->scrapeVerticalReader($html, $contentSource);
|
||||
} else {
|
||||
return $this->scrapeHorizontalReader($chapterUrl, $contentSource);
|
||||
@@ -87,7 +82,7 @@ class HtmlScraper extends AbstractScraper
|
||||
|
||||
public function supports(string $scrapingType): bool
|
||||
{
|
||||
return $scrapingType === 'html';
|
||||
return 'html' === $scrapingType;
|
||||
}
|
||||
|
||||
private function scrapeVerticalReader(string $html, ContentSource $contentSource): array
|
||||
@@ -108,7 +103,7 @@ class HtmlScraper extends AbstractScraper
|
||||
}
|
||||
|
||||
/**
|
||||
* @throws Exception
|
||||
* @throws \Exception
|
||||
*/
|
||||
private function scrapeHorizontalReader(string $chapterUrl, ContentSource $contentSource): array
|
||||
{
|
||||
@@ -135,18 +130,18 @@ class HtmlScraper extends AbstractScraper
|
||||
try {
|
||||
$response = $this->httpClient->get($url, [
|
||||
'http_errors' => true,
|
||||
'allow_redirects' => false
|
||||
'allow_redirects' => false,
|
||||
]);
|
||||
|
||||
$statusCode = $response->getStatusCode();
|
||||
|
||||
if ($statusCode >= 300 && $statusCode < 400 || $statusCode == 404) {
|
||||
throw new Exception('Chapter Not Found at ' . $url);
|
||||
if ($statusCode >= 300 && $statusCode < 400 || 404 == $statusCode) {
|
||||
throw new \Exception('Chapter Not Found at '.$url);
|
||||
}
|
||||
|
||||
return (string)$response->getBody();
|
||||
} catch (Exception $e) {
|
||||
throw new Exception('Bad Request: ' . $e->getMessage());
|
||||
return (string) $response->getBody();
|
||||
} catch (\Exception $e) {
|
||||
throw new \Exception('Bad Request: '.$e->getMessage());
|
||||
}
|
||||
}
|
||||
|
||||
@@ -164,7 +159,7 @@ class HtmlScraper extends AbstractScraper
|
||||
$urlComponents = parse_url($mangaSource->getBaseUrl());
|
||||
$scheme = $urlComponents['scheme'];
|
||||
$host = $urlComponents['host'];
|
||||
$imgUrl = $scheme . '://' . $host . '/' . ltrim($imgUrl, '/');
|
||||
$imgUrl = $scheme.'://'.$host.'/'.ltrim($imgUrl, '/');
|
||||
}
|
||||
|
||||
return [
|
||||
|
||||
@@ -40,10 +40,10 @@ class JavascriptScraper extends AbstractScraper
|
||||
$imageName = sprintf('%03d.%s', $index + 1, pathinfo(parse_url($page['image_url'], PHP_URL_PATH), PATHINFO_EXTENSION));
|
||||
$imagePath = $tempDir . '/' . $imageName;
|
||||
|
||||
$this->downloadAndSaveImage($page['image_url'], $imagePath);
|
||||
$destinationPath = $this->downloadAndSaveImage($page['image_url'], $imagePath);
|
||||
$this->dispatchProgressEvent($chapter, $index + 1, count($pageData));
|
||||
|
||||
$page['local_image_url'] = $imagePath;
|
||||
$page['local_image_url'] = $destinationPath;
|
||||
}
|
||||
|
||||
$cbzFilePath = $this->generateCbzPath($manga, $chapter);
|
||||
|
||||
@@ -20,6 +20,6 @@ class ScraperFactory
|
||||
return $scraper;
|
||||
}
|
||||
}
|
||||
throw new \InvalidArgumentException('Unsupported scraping type: ' . $contentSource->getScrapingType());
|
||||
throw new \InvalidArgumentException('Unsupported scraping type: '.$contentSource->getScrapingType());
|
||||
}
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user