- Updated Reader
- fix image download for JavascriptScraper.php
This commit is contained in:
Jérémy Guillot
2024-07-23 15:30:05 +02:00
parent c56f72b813
commit 4484be4d4e
11 changed files with 356 additions and 62 deletions

View File

@@ -7,6 +7,7 @@ use App\Entity\ContentSource;
use App\Entity\Manga;
use App\Event\PageScrappingProgressEvent;
use Doctrine\ORM\EntityManagerInterface;
use Exception;
use GuzzleHttp\Client;
use GuzzleHttp\Exception\GuzzleException;
use GuzzleHttp\Exception\RequestException;
@@ -107,4 +108,24 @@ abstract class AbstractScraper implements ScraperInterface
$event = new PageScrappingProgressEvent($chapter->getId(), $currentPage, $totalPages);
$this->eventDispatcher->dispatch($event, PageScrappingProgressEvent::NAME);
}
/**
* @throws GuzzleException
* @throws Exception
*/
protected function downloadAndSaveImage(string $imageUrl, string $destinationPath): void
{
try {
$response = $this->httpClient->get($imageUrl);
$contentType = $response->getHeaderLine('Content-Type');
if (str_starts_with($contentType, 'image/')) {
file_put_contents($destinationPath, $response->getBody()->getContents());
} else {
throw new Exception('Le contenu récupéré n\'est pas une image. Type de contenu : ' . $contentType);
}
} catch (Exception $e) {
throw new Exception('Erreur lors de la récupération de l\'image : ' . $e->getMessage());
}
}
}

View File

@@ -7,6 +7,7 @@ use App\Entity\ContentSource;
use Doctrine\ORM\EntityManagerInterface;
use Exception;
use GuzzleHttp\Client;
use GuzzleHttp\Exception\GuzzleException;
use Symfony\Component\EventDispatcher\EventDispatcherInterface;
use Symfony\Component\DomCrawler\Crawler;
@@ -25,6 +26,7 @@ class HtmlScraper extends AbstractScraper
/**
* @throws Exception
* @throws GuzzleException
*/
public function scrapeChapter(Chapter $chapter, ContentSource $contentSource): array|bool
{
@@ -116,6 +118,9 @@ class HtmlScraper extends AbstractScraper
return $pageData;
}
/**
* @throws Exception
*/
private function scrapeHorizontalReader(string $chapterUrl, ContentSource $contentSource): array
{
$pageData = [];
@@ -156,22 +161,6 @@ class HtmlScraper extends AbstractScraper
}
}
private function downloadAndSaveImage(string $imageUrl, string $destinationPath): void
{
try {
$response = $this->client->get($imageUrl);
$contentType = $response->getHeaderLine('Content-Type');
if (str_starts_with($contentType, 'image/')) {
file_put_contents($destinationPath, $response->getBody()->getContents());
} else {
throw new Exception('Le contenu récupéré n\'est pas une image. Type de contenu : ' . $contentType);
}
} catch (Exception $e) {
throw new Exception('Erreur lors de la récupération de l\'image : ' . $e->getMessage());
}
}
private function extractMangaPageData(string $html, ContentSource $mangaSource): array
{
$crawler = new Crawler($html);

View File

@@ -5,10 +5,14 @@ namespace App\Service\Scraper;
use App\Entity\Chapter;
use App\Entity\ContentSource;
use Exception;
use GuzzleHttp\Exception\GuzzleException;
use Symfony\Component\Panther\Client as PantherClient;
class JavascriptScraper extends AbstractScraper
{
/**
* @throws Exception
*/
public function scrapeChapter(Chapter $chapter, ContentSource $contentSource): array|bool
{
$manga = $chapter->getManga();
@@ -36,7 +40,7 @@ class JavascriptScraper extends AbstractScraper
$imageName = sprintf('%03d.%s', $index + 1, pathinfo(parse_url($page['image_url'], PHP_URL_PATH), PATHINFO_EXTENSION));
$imagePath = $tempDir . '/' . $imageName;
file_put_contents($imagePath, file_get_contents($page['image_url']));
$this->downloadAndSaveImage($page['image_url'], $imagePath);
$this->dispatchProgressEvent($chapter, $index + 1, count($pageData));
$page['local_image_url'] = $imagePath;
@@ -52,9 +56,6 @@ class JavascriptScraper extends AbstractScraper
$this->cleanupTempFiles($tempDir);
return $pageData;
} catch (Exception $e) {
// Log the error
return false;
} finally {
$pantherClient->close();
}

View File

@@ -80,10 +80,4 @@ class MangadexScraper extends AbstractScraper
{
return $scrapingType === 'mangadex';
}
private function downloadAndSaveImage(string $imageUrl, string $destinationPath): void
{
$response = $this->client->get($imageUrl);
file_put_contents($destinationPath, $response->getBody()->getContents());
}
}