Files
Mangarr/src/Service/Scraper/AbstractScraper.php
Jérémy Guillot 4484be4d4e Added:
- Updated Reader
- fix image download for JavascriptScraper.php
2024-07-23 15:30:05 +02:00

132 lines
4.2 KiB
PHP

<?php
namespace App\Service\Scraper;
use App\Entity\Chapter;
use App\Entity\ContentSource;
use App\Entity\Manga;
use App\Event\PageScrappingProgressEvent;
use Doctrine\ORM\EntityManagerInterface;
use Exception;
use GuzzleHttp\Client;
use GuzzleHttp\Exception\GuzzleException;
use GuzzleHttp\Exception\RequestException;
use Symfony\Component\EventDispatcher\EventDispatcherInterface;
abstract class AbstractScraper implements ScraperInterface
{
const string PUBLIC_CBZ = '/public/cbz';
protected Client $httpClient;
public function __construct(
protected string $projectDir,
protected EventDispatcherInterface $eventDispatcher,
protected EntityManagerInterface $entityManager
)
{
$this->httpClient = new Client();
}
protected function getValidChapterUrl(ContentSource $contentSource, Manga $manga, float $chapterNumber): ?string
{
$slugs = array_merge([$manga->getSlug()], $manga->getAlternativeSlugs() ?? []);
foreach ($slugs as $slug) {
$url = $contentSource->getChapterUrl($slug, $chapterNumber);
if ($this->isChapterUrlValid($url)) {
return $url;
}
}
return null;
}
protected function isChapterUrlValid(string $url): bool
{
try {
$response = $this->httpClient->head($url);
return $response->getStatusCode() === 200;
} catch (RequestException $e) {
return false;
}
}
protected function generateCbzPath(Manga $manga, Chapter $chapter): string
{
$volumeDir = $this->createDirectories($manga, $chapter->getVolume());
$fileName = sprintf('%s_vol%d_ch%s.cbz',
$manga->getSlug(),
$chapter->getVolume(),
$chapter->getNumber()
);
return $volumeDir . '/' . $fileName;
}
protected function createCbzFile(string $tempDir, array $pageData, string $cbzFilePath): void
{
$zip = new \ZipArchive();
if ($zip->open($cbzFilePath, \ZipArchive::CREATE) === TRUE) {
foreach ($pageData as $page) {
$zip->addFile($page['local_image_url'], basename($page['local_image_url']));
}
$zip->close();
}
}
protected function cleanupTempFiles(string $directory): void
{
$files = glob($directory . '/*');
foreach ($files as $file) {
if (is_file($file)) {
unlink($file);
}
}
rmdir($directory);
}
protected function createDirectories(Manga $manga, int $volume): string
{
$mangaYear = $manga->getPublicationYear() ?? 'unknown';
$mangaDir = sprintf('%s/%s (%s)', $this->projectDir . self::PUBLIC_CBZ, ucfirst($manga->getSlug()), $mangaYear);
$volumeDir = sprintf('%s/volume_%d', $mangaDir, sprintf('%02d', $volume));
if (!is_dir($volumeDir)) {
mkdir($volumeDir, 0755, true);
}
return $volumeDir;
}
protected function cleanImageUrl(string $url): string
{
return preg_replace('/[\x00-\x1F\x7F]/', '', trim($url));
}
protected function dispatchProgressEvent(Chapter $chapter, int $currentPage, int $totalPages): void
{
$event = new PageScrappingProgressEvent($chapter->getId(), $currentPage, $totalPages);
$this->eventDispatcher->dispatch($event, PageScrappingProgressEvent::NAME);
}
/**
* @throws GuzzleException
* @throws Exception
*/
protected function downloadAndSaveImage(string $imageUrl, string $destinationPath): void
{
try {
$response = $this->httpClient->get($imageUrl);
$contentType = $response->getHeaderLine('Content-Type');
if (str_starts_with($contentType, 'image/')) {
file_put_contents($destinationPath, $response->getBody()->getContents());
} else {
throw new Exception('Le contenu récupéré n\'est pas une image. Type de contenu : ' . $contentType);
}
} catch (Exception $e) {
throw new Exception('Erreur lors de la récupération de l\'image : ' . $e->getMessage());
}
}
}