132 lines
4.2 KiB
PHP
132 lines
4.2 KiB
PHP
<?php
|
|
|
|
namespace App\Service\Scraper;
|
|
|
|
use App\Entity\Chapter;
|
|
use App\Entity\ContentSource;
|
|
use App\Entity\Manga;
|
|
use App\Event\PageScrappingProgressEvent;
|
|
use Doctrine\ORM\EntityManagerInterface;
|
|
use Exception;
|
|
use GuzzleHttp\Client;
|
|
use GuzzleHttp\Exception\GuzzleException;
|
|
use GuzzleHttp\Exception\RequestException;
|
|
use Symfony\Component\EventDispatcher\EventDispatcherInterface;
|
|
|
|
abstract class AbstractScraper implements ScraperInterface
|
|
{
|
|
const string PUBLIC_CBZ = '/public/cbz';
|
|
protected Client $httpClient;
|
|
|
|
public function __construct(
|
|
protected string $projectDir,
|
|
protected EventDispatcherInterface $eventDispatcher,
|
|
protected EntityManagerInterface $entityManager
|
|
)
|
|
{
|
|
$this->httpClient = new Client();
|
|
}
|
|
|
|
protected function getValidChapterUrl(ContentSource $contentSource, Manga $manga, float $chapterNumber): ?string
|
|
{
|
|
$slugs = array_merge([$manga->getSlug()], $manga->getAlternativeSlugs() ?? []);
|
|
|
|
foreach ($slugs as $slug) {
|
|
$url = $contentSource->getChapterUrl($slug, $chapterNumber);
|
|
if ($this->isChapterUrlValid($url)) {
|
|
return $url;
|
|
}
|
|
}
|
|
|
|
return null;
|
|
}
|
|
|
|
protected function isChapterUrlValid(string $url): bool
|
|
{
|
|
try {
|
|
$response = $this->httpClient->head($url);
|
|
return $response->getStatusCode() === 200;
|
|
} catch (RequestException $e) {
|
|
return false;
|
|
}
|
|
}
|
|
|
|
protected function generateCbzPath(Manga $manga, Chapter $chapter): string
|
|
{
|
|
$volumeDir = $this->createDirectories($manga, $chapter->getVolume());
|
|
$fileName = sprintf('%s_vol%d_ch%s.cbz',
|
|
$manga->getSlug(),
|
|
$chapter->getVolume(),
|
|
$chapter->getNumber()
|
|
);
|
|
return $volumeDir . '/' . $fileName;
|
|
}
|
|
|
|
protected function createCbzFile(string $tempDir, array $pageData, string $cbzFilePath): void
|
|
{
|
|
$zip = new \ZipArchive();
|
|
|
|
if ($zip->open($cbzFilePath, \ZipArchive::CREATE) === TRUE) {
|
|
foreach ($pageData as $page) {
|
|
$zip->addFile($page['local_image_url'], basename($page['local_image_url']));
|
|
}
|
|
$zip->close();
|
|
}
|
|
}
|
|
|
|
protected function cleanupTempFiles(string $directory): void
|
|
{
|
|
$files = glob($directory . '/*');
|
|
foreach ($files as $file) {
|
|
if (is_file($file)) {
|
|
unlink($file);
|
|
}
|
|
}
|
|
rmdir($directory);
|
|
}
|
|
|
|
protected function createDirectories(Manga $manga, int $volume): string
|
|
{
|
|
$mangaYear = $manga->getPublicationYear() ?? 'unknown';
|
|
$mangaDir = sprintf('%s/%s (%s)', $this->projectDir . self::PUBLIC_CBZ, ucfirst($manga->getSlug()), $mangaYear);
|
|
$volumeDir = sprintf('%s/volume_%d', $mangaDir, sprintf('%02d', $volume));
|
|
|
|
if (!is_dir($volumeDir)) {
|
|
mkdir($volumeDir, 0755, true);
|
|
}
|
|
|
|
return $volumeDir;
|
|
}
|
|
|
|
protected function cleanImageUrl(string $url): string
|
|
{
|
|
return preg_replace('/[\x00-\x1F\x7F]/', '', trim($url));
|
|
}
|
|
|
|
protected function dispatchProgressEvent(Chapter $chapter, int $currentPage, int $totalPages): void
|
|
{
|
|
$event = new PageScrappingProgressEvent($chapter->getId(), $currentPage, $totalPages);
|
|
$this->eventDispatcher->dispatch($event, PageScrappingProgressEvent::NAME);
|
|
}
|
|
|
|
/**
|
|
* @throws GuzzleException
|
|
* @throws Exception
|
|
*/
|
|
protected function downloadAndSaveImage(string $imageUrl, string $destinationPath): void
|
|
{
|
|
try {
|
|
$response = $this->httpClient->get($imageUrl);
|
|
$contentType = $response->getHeaderLine('Content-Type');
|
|
|
|
if (str_starts_with($contentType, 'image/')) {
|
|
file_put_contents($destinationPath, $response->getBody()->getContents());
|
|
} else {
|
|
throw new Exception('Le contenu récupéré n\'est pas une image. Type de contenu : ' . $contentType);
|
|
}
|
|
} catch (Exception $e) {
|
|
throw new Exception('Erreur lors de la récupération de l\'image : ' . $e->getMessage());
|
|
}
|
|
}
|
|
}
|