- Updated Reader
- fix image download for JavascriptScraper.php
This commit is contained in:
Jérémy Guillot
2024-07-23 15:30:05 +02:00
parent c56f72b813
commit 4484be4d4e
11 changed files with 356 additions and 62 deletions

View File

@@ -7,6 +7,7 @@ use App\Entity\ContentSource;
use Doctrine\ORM\EntityManagerInterface;
use Exception;
use GuzzleHttp\Client;
use GuzzleHttp\Exception\GuzzleException;
use Symfony\Component\EventDispatcher\EventDispatcherInterface;
use Symfony\Component\DomCrawler\Crawler;
@@ -25,6 +26,7 @@ class HtmlScraper extends AbstractScraper
/**
* @throws Exception
* @throws GuzzleException
*/
public function scrapeChapter(Chapter $chapter, ContentSource $contentSource): array|bool
{
@@ -116,6 +118,9 @@ class HtmlScraper extends AbstractScraper
return $pageData;
}
/**
* @throws Exception
*/
private function scrapeHorizontalReader(string $chapterUrl, ContentSource $contentSource): array
{
$pageData = [];
@@ -156,22 +161,6 @@ class HtmlScraper extends AbstractScraper
}
}
private function downloadAndSaveImage(string $imageUrl, string $destinationPath): void
{
try {
$response = $this->client->get($imageUrl);
$contentType = $response->getHeaderLine('Content-Type');
if (str_starts_with($contentType, 'image/')) {
file_put_contents($destinationPath, $response->getBody()->getContents());
} else {
throw new Exception('Le contenu récupéré n\'est pas une image. Type de contenu : ' . $contentType);
}
} catch (Exception $e) {
throw new Exception('Erreur lors de la récupération de l\'image : ' . $e->getMessage());
}
}
private function extractMangaPageData(string $html, ContentSource $mangaSource): array
{
$crawler = new Crawler($html);