Convertion des images webp et png vers jpeg

This commit is contained in:
ext.jeremy.guillot@maxicoffee.domains
2024-09-30 22:16:20 +02:00
parent 21b2adfa07
commit 5f15d14ae1
13 changed files with 226 additions and 158 deletions

View File

@@ -4,17 +4,13 @@ namespace App\Service\Scraper;
use App\Entity\Chapter;
use App\Entity\ContentSource;
use Doctrine\ORM\EntityManagerInterface;
use Exception;
use GuzzleHttp\Client;
use GuzzleHttp\Exception\GuzzleException;
use Symfony\Component\EventDispatcher\EventDispatcherInterface;
use Symfony\Component\DomCrawler\Crawler;
class HtmlScraper extends AbstractScraper
{
/**
* @throws Exception
* @throws \Exception
* @throws GuzzleException
*/
public function scrapeChapter(Chapter $chapter, ContentSource $contentSource): array|bool
@@ -23,15 +19,15 @@ class HtmlScraper extends AbstractScraper
$chapterUrl = $this->getValidChapterUrl($contentSource, $manga, $chapter->getNumber());
if (!$chapterUrl) {
throw new Exception("Aucune URL valide trouvée pour le chapitre {$chapter->getNumber()} du manga {$manga->getTitle()}");
throw new \Exception("Aucune URL valide trouvée pour le chapitre {$chapter->getNumber()} du manga {$manga->getTitle()}");
}
$tempDir = sys_get_temp_dir() . '/' . uniqid('manga_scraper_');
$tempDir = sys_get_temp_dir().'/'.uniqid('manga_scraper_');
mkdir($tempDir);
$pageData = [];
if ($contentSource->getNextPageSelector() === null) {
if (null === $contentSource->getNextPageSelector()) {
// Lecteur vertical
$html = $this->fetchHtml($chapterUrl);
$pageData = $this->scrapeVerticalReader($html, $contentSource);
@@ -43,13 +39,13 @@ class HtmlScraper extends AbstractScraper
// Télécharger et sauvegarder les images
foreach ($pageData as $index => &$page) {
$imageName = sprintf('%03d.%s', $index + 1, pathinfo(parse_url($page['image_url'], PHP_URL_PATH), PATHINFO_EXTENSION));
$imagePath = $tempDir . '/' . $imageName;
$imagePath = $tempDir.'/'.$imageName;
$this->downloadAndSaveImage($page['image_url'], $imagePath);
$destinationPath = $this->downloadAndSaveImage($page['image_url'], $imagePath);
$this->dispatchProgressEvent($chapter, $index + 1, count($pageData));
$page['local_image_url'] = $imagePath;
$page['local_image_url'] = $destinationPath;
}
$cbzFilePath = $this->generateCbzPath($manga, $chapter);
@@ -59,26 +55,25 @@ class HtmlScraper extends AbstractScraper
$this->entityManager->persist($chapter);
$this->entityManager->flush();
// Nettoyage du répertoire temporaire
$this->cleanupTempFiles($tempDir);
return $pageData;
}
/**
* @throws Exception
* @throws \Exception
*/
public function testScraping(string $mangaSlug, string $chapterNumber, ContentSource $contentSource): array
{
$chapterUrl = $contentSource->getChapterUrl($mangaSlug, $chapterNumber);
if (!$this->isChapterUrlValid($chapterUrl)) {
throw new \Exception("Invalid URL, check format and slug");
throw new \Exception('Invalid URL, check format and slug');
}
$html = $this->fetchHtml($chapterUrl);
if ($contentSource->getNextPageSelector() === null) {
if (null === $contentSource->getNextPageSelector()) {
return $this->scrapeVerticalReader($html, $contentSource);
} else {
return $this->scrapeHorizontalReader($chapterUrl, $contentSource);
@@ -87,7 +82,7 @@ class HtmlScraper extends AbstractScraper
public function supports(string $scrapingType): bool
{
return $scrapingType === 'html';
return 'html' === $scrapingType;
}
private function scrapeVerticalReader(string $html, ContentSource $contentSource): array
@@ -108,7 +103,7 @@ class HtmlScraper extends AbstractScraper
}
/**
* @throws Exception
* @throws \Exception
*/
private function scrapeHorizontalReader(string $chapterUrl, ContentSource $contentSource): array
{
@@ -135,18 +130,18 @@ class HtmlScraper extends AbstractScraper
try {
$response = $this->httpClient->get($url, [
'http_errors' => true,
'allow_redirects' => false
'allow_redirects' => false,
]);
$statusCode = $response->getStatusCode();
if ($statusCode >= 300 && $statusCode < 400 || $statusCode == 404) {
throw new Exception('Chapter Not Found at ' . $url);
if ($statusCode >= 300 && $statusCode < 400 || 404 == $statusCode) {
throw new \Exception('Chapter Not Found at '.$url);
}
return (string)$response->getBody();
} catch (Exception $e) {
throw new Exception('Bad Request: ' . $e->getMessage());
return (string) $response->getBody();
} catch (\Exception $e) {
throw new \Exception('Bad Request: '.$e->getMessage());
}
}
@@ -164,7 +159,7 @@ class HtmlScraper extends AbstractScraper
$urlComponents = parse_url($mangaSource->getBaseUrl());
$scheme = $urlComponents['scheme'];
$host = $urlComponents['host'];
$imgUrl = $scheme . '://' . $host . '/' . ltrim($imgUrl, '/');
$imgUrl = $scheme.'://'.$host.'/'.ltrim($imgUrl, '/');
}
return [