Added:
- Refactor MangaScraperService (not used everywhere now) - Added JavascriptScraper.php - Added alternatives slugs in Manga.php - Improvement in manga edit form
This commit is contained in:
110
src/Service/Scraper/AbstractScraper.php
Normal file
110
src/Service/Scraper/AbstractScraper.php
Normal file
@@ -0,0 +1,110 @@
|
||||
<?php
|
||||
|
||||
namespace App\Service\Scraper;
|
||||
|
||||
use App\Entity\Chapter;
|
||||
use App\Entity\ContentSource;
|
||||
use App\Entity\Manga;
|
||||
use App\Event\PageScrappingProgressEvent;
|
||||
use Doctrine\ORM\EntityManagerInterface;
|
||||
use GuzzleHttp\Client;
|
||||
use GuzzleHttp\Exception\GuzzleException;
|
||||
use GuzzleHttp\Exception\RequestException;
|
||||
use Symfony\Component\EventDispatcher\EventDispatcherInterface;
|
||||
|
||||
abstract class AbstractScraper implements ScraperInterface
|
||||
{
|
||||
const string PUBLIC_CBZ = '/public/cbz';
|
||||
protected Client $httpClient;
|
||||
|
||||
public function __construct(
|
||||
protected string $projectDir,
|
||||
protected EventDispatcherInterface $eventDispatcher,
|
||||
protected EntityManagerInterface $entityManager
|
||||
)
|
||||
{
|
||||
$this->httpClient = new Client();
|
||||
}
|
||||
|
||||
protected function getValidChapterUrl(ContentSource $contentSource, Manga $manga, float $chapterNumber): ?string
|
||||
{
|
||||
$slugs = array_merge([$manga->getSlug()], $manga->getAlternativeSlugs() ?? []);
|
||||
|
||||
foreach ($slugs as $slug) {
|
||||
$url = $contentSource->getChapterUrl($slug, $chapterNumber);
|
||||
if ($this->isChapterUrlValid($url)) {
|
||||
return $url;
|
||||
}
|
||||
}
|
||||
|
||||
return null;
|
||||
}
|
||||
|
||||
protected function isChapterUrlValid(string $url): bool
|
||||
{
|
||||
try {
|
||||
$response = $this->httpClient->head($url);
|
||||
return $response->getStatusCode() === 200;
|
||||
} catch (RequestException $e) {
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
protected function generateCbzPath(Manga $manga, Chapter $chapter): string
|
||||
{
|
||||
$volumeDir = $this->createDirectories($manga, $chapter->getVolume());
|
||||
$fileName = sprintf('%s_vol%d_ch%s.cbz',
|
||||
$manga->getSlug(),
|
||||
$chapter->getVolume(),
|
||||
$chapter->getNumber()
|
||||
);
|
||||
return $volumeDir . '/' . $fileName;
|
||||
}
|
||||
|
||||
protected function createCbzFile(string $tempDir, array $pageData, string $cbzFilePath): void
|
||||
{
|
||||
$zip = new \ZipArchive();
|
||||
|
||||
if ($zip->open($cbzFilePath, \ZipArchive::CREATE) === TRUE) {
|
||||
foreach ($pageData as $page) {
|
||||
$zip->addFile($page['local_image_url'], basename($page['local_image_url']));
|
||||
}
|
||||
$zip->close();
|
||||
}
|
||||
}
|
||||
|
||||
protected function cleanupTempFiles(string $directory): void
|
||||
{
|
||||
$files = glob($directory . '/*');
|
||||
foreach ($files as $file) {
|
||||
if (is_file($file)) {
|
||||
unlink($file);
|
||||
}
|
||||
}
|
||||
rmdir($directory);
|
||||
}
|
||||
|
||||
protected function createDirectories(Manga $manga, int $volume): string
|
||||
{
|
||||
$mangaYear = $manga->getPublicationYear() ?? 'unknown';
|
||||
$mangaDir = sprintf('%s/%s (%s)', $this->projectDir . self::PUBLIC_CBZ, ucfirst($manga->getSlug()), $mangaYear);
|
||||
$volumeDir = sprintf('%s/volume_%d', $mangaDir, sprintf('%02d', $volume));
|
||||
|
||||
if (!is_dir($volumeDir)) {
|
||||
mkdir($volumeDir, 0755, true);
|
||||
}
|
||||
|
||||
return $volumeDir;
|
||||
}
|
||||
|
||||
protected function cleanImageUrl(string $url): string
|
||||
{
|
||||
return preg_replace('/[\x00-\x1F\x7F]/', '', trim($url));
|
||||
}
|
||||
|
||||
protected function dispatchProgressEvent(Chapter $chapter, int $currentPage, int $totalPages): void
|
||||
{
|
||||
$event = new PageScrappingProgressEvent($chapter->getId(), $currentPage, $totalPages);
|
||||
$this->eventDispatcher->dispatch($event, PageScrappingProgressEvent::NAME);
|
||||
}
|
||||
}
|
||||
197
src/Service/Scraper/HtmlScraper.php
Normal file
197
src/Service/Scraper/HtmlScraper.php
Normal file
@@ -0,0 +1,197 @@
|
||||
<?php
|
||||
|
||||
namespace App\Service\Scraper;
|
||||
|
||||
use App\Entity\Chapter;
|
||||
use App\Entity\ContentSource;
|
||||
use Doctrine\ORM\EntityManagerInterface;
|
||||
use Exception;
|
||||
use GuzzleHttp\Client;
|
||||
use Symfony\Component\EventDispatcher\EventDispatcherInterface;
|
||||
use Symfony\Component\DomCrawler\Crawler;
|
||||
|
||||
class HtmlScraper extends AbstractScraper
|
||||
{
|
||||
private Client $client;
|
||||
|
||||
public function __construct(
|
||||
string $projectDir,
|
||||
EventDispatcherInterface $eventDispatcher,
|
||||
EntityManagerInterface $entityManager
|
||||
) {
|
||||
parent::__construct($projectDir, $eventDispatcher, $entityManager);
|
||||
$this->client = new Client();
|
||||
}
|
||||
|
||||
/**
|
||||
* @throws Exception
|
||||
*/
|
||||
public function scrapeChapter(Chapter $chapter, ContentSource $contentSource): array|bool
|
||||
{
|
||||
$manga = $chapter->getManga();
|
||||
$chapterUrl = $this->getValidChapterUrl($contentSource, $manga, $chapter->getNumber());
|
||||
|
||||
if (!$chapterUrl) {
|
||||
throw new Exception("Aucune URL valide trouvée pour le chapitre {$chapter->getNumber()} du manga {$manga->getTitle()}");
|
||||
}
|
||||
|
||||
$tempDir = sys_get_temp_dir() . '/' . uniqid('manga_scraper_');
|
||||
mkdir($tempDir);
|
||||
|
||||
$pageData = [];
|
||||
|
||||
if ($contentSource->getNextPageSelector() === null) {
|
||||
// Lecteur vertical
|
||||
$html = $this->fetchHtml($chapterUrl);
|
||||
$pageData = $this->scrapeVerticalReader($html, $contentSource);
|
||||
} else {
|
||||
// Lecteur horizontal (paginé)
|
||||
$pageData = $this->scrapeHorizontalReader($chapterUrl, $contentSource);
|
||||
}
|
||||
|
||||
// Télécharger et sauvegarder les images
|
||||
foreach ($pageData as $index => &$page) {
|
||||
$imageName = sprintf('%03d.%s', $index + 1, pathinfo(parse_url($page['image_url'], PHP_URL_PATH), PATHINFO_EXTENSION));
|
||||
$imagePath = $tempDir . '/' . $imageName;
|
||||
|
||||
$this->downloadAndSaveImage($page['image_url'], $imagePath);
|
||||
|
||||
$this->dispatchProgressEvent($chapter, $index + 1, count($pageData));
|
||||
|
||||
$page['local_image_url'] = $imagePath;
|
||||
}
|
||||
|
||||
$cbzFilePath = $this->generateCbzPath($manga, $chapter);
|
||||
$this->createCbzFile($tempDir, $pageData, $cbzFilePath);
|
||||
|
||||
$chapter->setCbzPath($cbzFilePath);
|
||||
$this->entityManager->persist($chapter);
|
||||
$this->entityManager->flush();
|
||||
|
||||
// Nettoyage du répertoire temporaire
|
||||
$this->cleanupTempFiles($tempDir);
|
||||
|
||||
return $pageData;
|
||||
}
|
||||
|
||||
/**
|
||||
* @throws Exception
|
||||
*/
|
||||
public function testScraping(string $mangaSlug, string $chapterNumber, ContentSource $contentSource): array
|
||||
{
|
||||
$chapterUrl = $contentSource->getChapterUrl($mangaSlug, $chapterNumber);
|
||||
|
||||
if (!$this->isChapterUrlValid($chapterUrl)) {
|
||||
throw new \Exception("Invalid URL, check format and slug");
|
||||
}
|
||||
|
||||
$html = $this->fetchHtml($chapterUrl);
|
||||
|
||||
if ($contentSource->getNextPageSelector() === null) {
|
||||
return $this->scrapeVerticalReader($html, $contentSource);
|
||||
} else {
|
||||
return $this->scrapeHorizontalReader($chapterUrl, $contentSource);
|
||||
}
|
||||
}
|
||||
|
||||
public function supports(string $scrapingType): bool
|
||||
{
|
||||
return $scrapingType === 'html';
|
||||
}
|
||||
|
||||
private function scrapeVerticalReader(string $html, ContentSource $contentSource): array
|
||||
{
|
||||
$crawler = new Crawler($html);
|
||||
$images = $crawler->filter($contentSource->getImageSelector());
|
||||
|
||||
$pageData = [];
|
||||
foreach ($images as $index => $image) {
|
||||
$imgUrl = $image->getAttribute('src') ?: $image->getAttribute('data-src');
|
||||
$pageData[] = [
|
||||
'image_url' => $this->cleanImageUrl($imgUrl),
|
||||
'page_number' => $index + 1,
|
||||
];
|
||||
}
|
||||
|
||||
return $pageData;
|
||||
}
|
||||
|
||||
private function scrapeHorizontalReader(string $chapterUrl, ContentSource $contentSource): array
|
||||
{
|
||||
$pageData = [];
|
||||
$currentPageUrl = $chapterUrl;
|
||||
|
||||
do {
|
||||
$html = $this->fetchHtml($currentPageUrl);
|
||||
$page = $this->extractMangaPageData($html, $contentSource);
|
||||
|
||||
$pageData[] = [
|
||||
'image_url' => $this->cleanImageUrl($page['image_url']),
|
||||
'page_number' => count($pageData) + 1,
|
||||
];
|
||||
|
||||
$currentPageUrl = $page['next_page_url'];
|
||||
} while ($currentPageUrl);
|
||||
|
||||
return $pageData;
|
||||
}
|
||||
|
||||
private function fetchHtml(string $url): string
|
||||
{
|
||||
try {
|
||||
$response = $this->client->get($url, [
|
||||
'http_errors' => true,
|
||||
'allow_redirects' => false
|
||||
]);
|
||||
|
||||
$statusCode = $response->getStatusCode();
|
||||
|
||||
if ($statusCode >= 300 && $statusCode < 400 || $statusCode == 404) {
|
||||
throw new Exception('Chapter Not Found at ' . $url);
|
||||
}
|
||||
|
||||
return (string)$response->getBody();
|
||||
} catch (Exception $e) {
|
||||
throw new Exception('Bad Request: ' . $e->getMessage());
|
||||
}
|
||||
}
|
||||
|
||||
private function downloadAndSaveImage(string $imageUrl, string $destinationPath): void
|
||||
{
|
||||
try {
|
||||
$response = $this->client->get($imageUrl);
|
||||
$contentType = $response->getHeaderLine('Content-Type');
|
||||
|
||||
if (str_starts_with($contentType, 'image/')) {
|
||||
file_put_contents($destinationPath, $response->getBody()->getContents());
|
||||
} else {
|
||||
throw new Exception('Le contenu récupéré n\'est pas une image. Type de contenu : ' . $contentType);
|
||||
}
|
||||
} catch (Exception $e) {
|
||||
throw new Exception('Erreur lors de la récupération de l\'image : ' . $e->getMessage());
|
||||
}
|
||||
}
|
||||
|
||||
private function extractMangaPageData(string $html, ContentSource $mangaSource): array
|
||||
{
|
||||
$crawler = new Crawler($html);
|
||||
$imgUrl = $crawler->filter($mangaSource->getImageSelector())->attr('src')
|
||||
?? $crawler->filter($mangaSource->getImageSelector())->attr('data-src');
|
||||
|
||||
$nextLink = $crawler->filter($mangaSource->getNextPageSelector());
|
||||
$nextUrl = $nextLink->count() > 0 ? $nextLink->attr('href') : null;
|
||||
|
||||
// Convert relative URLs to absolute URLs
|
||||
if (!preg_match('/^https?:\/\//', $imgUrl)) {
|
||||
$urlComponents = parse_url($mangaSource->getBaseUrl());
|
||||
$scheme = $urlComponents['scheme'];
|
||||
$host = $urlComponents['host'];
|
||||
$imgUrl = $scheme . '://' . $host . '/' . ltrim($imgUrl, '/');
|
||||
}
|
||||
|
||||
return [
|
||||
'image_url' => $imgUrl,
|
||||
'next_page_url' => $nextUrl,
|
||||
];
|
||||
}
|
||||
}
|
||||
188
src/Service/Scraper/JavascriptScraper.php
Normal file
188
src/Service/Scraper/JavascriptScraper.php
Normal file
@@ -0,0 +1,188 @@
|
||||
<?php
|
||||
|
||||
namespace App\Service\Scraper;
|
||||
|
||||
use App\Entity\Chapter;
|
||||
use App\Entity\ContentSource;
|
||||
use Exception;
|
||||
use Symfony\Component\Panther\Client as PantherClient;
|
||||
|
||||
class JavascriptScraper extends AbstractScraper
|
||||
{
|
||||
public function scrapeChapter(Chapter $chapter, ContentSource $contentSource): array|bool
|
||||
{
|
||||
$manga = $chapter->getManga();
|
||||
$pantherClient = PantherClient::createChromeClient();
|
||||
$chapterUrl = $this->getValidChapterUrl($contentSource, $manga, $chapter->getNumber());
|
||||
|
||||
if (!$chapterUrl) {
|
||||
throw new Exception("Aucune URL valide trouvée pour le chapitre {$chapter->getNumber()} du manga {$manga->getTitle()}");
|
||||
}
|
||||
|
||||
$pantherClient->request('GET', $chapterUrl);
|
||||
|
||||
try {
|
||||
$this->selectChapter($pantherClient, $chapter, $contentSource);
|
||||
|
||||
$pageData = $contentSource->getNextPageSelector() === null
|
||||
? $this->scrapeVerticalReaderJavascript($pantherClient, $contentSource, $chapter)
|
||||
: $this->scrapeHorizontalReaderJavascript($pantherClient, $contentSource, $chapter);
|
||||
|
||||
$tempDir = sys_get_temp_dir() . '/' . uniqid('manga_scraper_');
|
||||
mkdir($tempDir);
|
||||
|
||||
// Télécharger et sauvegarder les images
|
||||
foreach ($pageData as $index => &$page) {
|
||||
$imageName = sprintf('%03d.%s', $index + 1, pathinfo(parse_url($page['image_url'], PHP_URL_PATH), PATHINFO_EXTENSION));
|
||||
$imagePath = $tempDir . '/' . $imageName;
|
||||
|
||||
file_put_contents($imagePath, file_get_contents($page['image_url']));
|
||||
|
||||
$page['local_image_url'] = $imagePath;
|
||||
}
|
||||
|
||||
$cbzFilePath = $this->generateCbzPath($manga, $chapter);
|
||||
$this->createCbzFile($tempDir, $pageData, $cbzFilePath);
|
||||
|
||||
$chapter->setCbzPath($cbzFilePath);
|
||||
$this->entityManager->persist($chapter);
|
||||
$this->entityManager->flush();
|
||||
|
||||
$this->cleanupTempFiles($tempDir);
|
||||
|
||||
return $pageData;
|
||||
} catch (Exception $e) {
|
||||
// Log the error
|
||||
return false;
|
||||
} finally {
|
||||
$pantherClient->close();
|
||||
}
|
||||
}
|
||||
|
||||
public function testScraping(string $mangaSlug, string $chapterNumber, ContentSource $contentSource): array
|
||||
{
|
||||
$chapterUrl = $contentSource->getChapterUrl($mangaSlug, $chapterNumber);
|
||||
|
||||
if (!$this->isChapterUrlValid($chapterUrl)) {
|
||||
throw new \Exception("Invalid URL, check format and slug");
|
||||
}
|
||||
|
||||
$pantherClient = PantherClient::createChromeClient();
|
||||
$pantherClient->request('GET', $chapterUrl);
|
||||
|
||||
try {
|
||||
$chapter = new Chapter();
|
||||
$chapter->setNumber((float)$chapterNumber);
|
||||
|
||||
$this->selectChapter($pantherClient, $chapter, $contentSource);
|
||||
|
||||
return $contentSource->getNextPageSelector() === null
|
||||
? $this->scrapeVerticalReaderJavascript($pantherClient, $contentSource, $chapter)
|
||||
: $this->scrapeHorizontalReaderJavascript($pantherClient, $contentSource, $chapter);
|
||||
} catch (Exception $e) {
|
||||
throw $e;
|
||||
} finally {
|
||||
$pantherClient->close();
|
||||
}
|
||||
}
|
||||
|
||||
public function supports(string $scrapingType): bool
|
||||
{
|
||||
return $scrapingType === 'javascript';
|
||||
}
|
||||
|
||||
private function selectChapter(PantherClient $pantherClient, Chapter $chapter, ContentSource $contentSource): void
|
||||
{
|
||||
$chapterSelector = $contentSource->getChapterSelector();
|
||||
if (!$chapterSelector) {
|
||||
return; // Si aucun sélecteur n'est défini, on ne fait rien
|
||||
}
|
||||
|
||||
$crawler = $pantherClient->waitFor($chapterSelector);
|
||||
$select = $crawler->filter($chapterSelector);
|
||||
|
||||
if ($select->count() > 0) {
|
||||
$chapterNumber = $chapter->getNumber();
|
||||
$options = $select->filter('option');
|
||||
$targetIndex = null;
|
||||
|
||||
foreach ($options as $index => $option) {
|
||||
if (preg_match("/\b{$chapterNumber}\b/", $option->getText())) {
|
||||
$targetIndex = $index;
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
if ($targetIndex !== null) {
|
||||
$pantherClient->executeScript("
|
||||
var select = document.querySelector('$chapterSelector');
|
||||
select.selectedIndex = $targetIndex;
|
||||
select.dispatchEvent(new Event('change'));
|
||||
");
|
||||
|
||||
$this->waitForImagesLoaded($pantherClient, $contentSource);
|
||||
} else {
|
||||
throw new Exception("Chapitre $chapterNumber non trouvé dans le menu déroulant");
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
private function waitForImagesLoaded(PantherClient $pantherClient, ContentSource $contentSource): void
|
||||
{
|
||||
$imageSelector = $contentSource->getImageSelector();
|
||||
$pantherClient->wait(30)->until(
|
||||
function ($driver) use ($imageSelector) {
|
||||
return $driver->executeScript("
|
||||
return new Promise((resolve) => {
|
||||
let lastImageCount = 0;
|
||||
let stableCount = 0;
|
||||
const stableThreshold = 10;
|
||||
|
||||
function checkImages() {
|
||||
const images = document.querySelectorAll('$imageSelector');
|
||||
const loadedImages = Array.from(images).filter(img => img.complete && img.naturalWidth > 0);
|
||||
|
||||
if (loadedImages.length === lastImageCount) {
|
||||
stableCount++;
|
||||
} else {
|
||||
stableCount = 0;
|
||||
lastImageCount = loadedImages.length;
|
||||
}
|
||||
|
||||
if (stableCount >= stableThreshold) {
|
||||
resolve(true);
|
||||
} else {
|
||||
setTimeout(checkImages, 200);
|
||||
}
|
||||
}
|
||||
|
||||
checkImages();
|
||||
});
|
||||
");
|
||||
}
|
||||
);
|
||||
}
|
||||
|
||||
private function scrapeVerticalReaderJavascript(PantherClient $pantherClient, ContentSource $contentSource, Chapter $chapter): array
|
||||
{
|
||||
$pageData = [];
|
||||
$crawler = $pantherClient->waitFor($contentSource->getImageSelector());
|
||||
$images = $crawler->filter($contentSource->getImageSelector());
|
||||
|
||||
foreach ($images as $index => $image) {
|
||||
$imageUrl = $image->getAttribute('src') ?: $image->getAttribute('data-src');
|
||||
$pageData[] = [
|
||||
'image_url' => $this->cleanImageUrl($imageUrl),
|
||||
'page_number' => $index + 1,
|
||||
];
|
||||
}
|
||||
|
||||
return $pageData;
|
||||
}
|
||||
|
||||
private function scrapeHorizontalReaderJavascript(PantherClient $pantherClient, ContentSource $contentSource, Chapter $chapter): array
|
||||
{
|
||||
$pageData = [];
|
||||
return $pageData;
|
||||
}
|
||||
}
|
||||
28
src/Service/Scraper/MangaScraperService.php
Normal file
28
src/Service/Scraper/MangaScraperService.php
Normal file
@@ -0,0 +1,28 @@
|
||||
<?php
|
||||
|
||||
namespace App\Service\Scraper;
|
||||
|
||||
use App\Entity\Chapter;
|
||||
use App\Entity\ContentSource;
|
||||
|
||||
class MangaScraperService
|
||||
{
|
||||
private ScraperFactory $scraperFactory;
|
||||
|
||||
public function __construct(ScraperFactory $scraperFactory)
|
||||
{
|
||||
$this->scraperFactory = $scraperFactory;
|
||||
}
|
||||
|
||||
public function scrapeChapter(Chapter $chapter, ContentSource $contentSource): array|bool
|
||||
{
|
||||
$scraper = $this->scraperFactory->createScraper($contentSource);
|
||||
return $scraper->scrapeChapter($chapter, $contentSource);
|
||||
}
|
||||
|
||||
public function testScraping(string $mangaSlug, string $chapterNumber, ContentSource $contentSource): array
|
||||
{
|
||||
$scraper = $this->scraperFactory->createScraper($contentSource);
|
||||
return $scraper->testScraping($mangaSlug, $chapterNumber, $contentSource);
|
||||
}
|
||||
}
|
||||
89
src/Service/Scraper/MangadexScraper.php
Normal file
89
src/Service/Scraper/MangadexScraper.php
Normal file
@@ -0,0 +1,89 @@
|
||||
<?php
|
||||
|
||||
namespace App\Service\Scraper;
|
||||
|
||||
use App\Entity\Chapter;
|
||||
use App\Entity\ContentSource;
|
||||
use Doctrine\ORM\EntityManagerInterface;
|
||||
use GuzzleHttp\Client;
|
||||
use Symfony\Component\EventDispatcher\EventDispatcherInterface;
|
||||
|
||||
class MangadexScraper extends AbstractScraper
|
||||
{
|
||||
private Client $client;
|
||||
|
||||
public function __construct(
|
||||
string $projectDir,
|
||||
EventDispatcherInterface $eventDispatcher,
|
||||
EntityManagerInterface $entityManager
|
||||
) {
|
||||
parent::__construct($projectDir, $eventDispatcher, $entityManager);
|
||||
$this->client = new Client();
|
||||
}
|
||||
|
||||
public function scrapeChapter(Chapter $chapter, ContentSource $contentSource): array|bool
|
||||
{
|
||||
$chapterUrl = $contentSource->getBaseUrl() . sprintf($contentSource->getChapterUrlFormat(), $chapter->getExternalId());
|
||||
$manga = $chapter->getManga();
|
||||
$pageData = [];
|
||||
|
||||
try {
|
||||
$response = $this->client->get($chapterUrl);
|
||||
$results = json_decode($response->getBody()->getContents(), true);
|
||||
|
||||
if ($results['result'] !== 'ok' || count($results['chapter']['dataSaver']) === 0) {
|
||||
throw new \Exception('Error while fetching chapter data from Mangadex ' . $manga->getTitle() . ' ' . $chapter->getNumber());
|
||||
}
|
||||
|
||||
$tempDir = sys_get_temp_dir() . '/' . uniqid('manga_scraper_');
|
||||
mkdir($tempDir);
|
||||
|
||||
foreach ($results['chapter']['dataSaver'] as $index => $page) {
|
||||
$pageUrl = $results['baseUrl'] . '/data-saver/' . $results['chapter']['hash'] . '/' . $page;
|
||||
$imagePath = $tempDir . '/' . sprintf('%03d.%s', $index + 1, pathinfo($page, PATHINFO_EXTENSION));
|
||||
|
||||
$this->downloadAndSaveImage($pageUrl, $imagePath);
|
||||
|
||||
$this->dispatchProgressEvent($chapter, $index + 1, count($results['chapter']['dataSaver']));
|
||||
|
||||
$pageData[] = [
|
||||
'image_url' => $pageUrl,
|
||||
'local_image_url' => $imagePath,
|
||||
'page_number' => $index + 1,
|
||||
];
|
||||
}
|
||||
|
||||
$cbzFilePath = $this->generateCbzPath($manga, $chapter);
|
||||
$this->createCbzFile($tempDir, $pageData, $cbzFilePath);
|
||||
|
||||
$chapter->setCbzPath($cbzFilePath);
|
||||
$this->entityManager->persist($chapter);
|
||||
$this->entityManager->flush();
|
||||
|
||||
$this->cleanupTempFiles($tempDir);
|
||||
|
||||
return $pageData;
|
||||
} catch (\Exception $e) {
|
||||
// Log the error
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
public function testScraping(string $mangaSlug, string $chapterNumber, ContentSource $contentSource): array
|
||||
{
|
||||
// For Mangadex, we need the chapter's external ID, which we don't have in this context.
|
||||
// We could potentially fetch it first, but for simplicity, let's return an empty array.
|
||||
return [];
|
||||
}
|
||||
|
||||
public function supports(string $scrapingType): bool
|
||||
{
|
||||
return $scrapingType === 'mangadex';
|
||||
}
|
||||
|
||||
private function downloadAndSaveImage(string $imageUrl, string $destinationPath): void
|
||||
{
|
||||
$response = $this->client->get($imageUrl);
|
||||
file_put_contents($destinationPath, $response->getBody()->getContents());
|
||||
}
|
||||
}
|
||||
25
src/Service/Scraper/ScraperFactory.php
Normal file
25
src/Service/Scraper/ScraperFactory.php
Normal file
@@ -0,0 +1,25 @@
|
||||
<?php
|
||||
|
||||
namespace App\Service\Scraper;
|
||||
|
||||
use App\Entity\ContentSource;
|
||||
|
||||
class ScraperFactory
|
||||
{
|
||||
private array $scrapers;
|
||||
|
||||
public function __construct(iterable $scrapers)
|
||||
{
|
||||
$this->scrapers = iterator_to_array($scrapers);
|
||||
}
|
||||
|
||||
public function createScraper(ContentSource $contentSource): ScraperInterface
|
||||
{
|
||||
foreach ($this->scrapers as $scraper) {
|
||||
if ($scraper->supports($contentSource->getScrapingType())) {
|
||||
return $scraper;
|
||||
}
|
||||
}
|
||||
throw new \InvalidArgumentException('Unsupported scraping type: ' . $contentSource->getScrapingType());
|
||||
}
|
||||
}
|
||||
13
src/Service/Scraper/ScraperInterface.php
Normal file
13
src/Service/Scraper/ScraperInterface.php
Normal file
@@ -0,0 +1,13 @@
|
||||
<?php
|
||||
|
||||
namespace App\Service\Scraper;
|
||||
|
||||
use App\Entity\Chapter;
|
||||
use App\Entity\ContentSource;
|
||||
|
||||
interface ScraperInterface
|
||||
{
|
||||
public function scrapeChapter(Chapter $chapter, ContentSource $contentSource): array|bool;
|
||||
public function testScraping(string $mangaSlug, string $chapterNumber, ContentSource $contentSource): array;
|
||||
public function supports(string $scrapingType): bool;
|
||||
}
|
||||
Reference in New Issue
Block a user