Added:
- manga import - read from cbz - save cbz from scrapping - menu interactions
This commit is contained in:
@@ -7,6 +7,7 @@ use App\Entity\Manga;
|
||||
use App\Entity\ContentSource;
|
||||
use App\Event\PageScrappingProgressEvent;
|
||||
use App\EventSubscriber\MangaScrapedEvent;
|
||||
use Doctrine\ORM\EntityManagerInterface;
|
||||
use Exception;
|
||||
use GuzzleHttp\Client;
|
||||
use GuzzleHttp\Exception\GuzzleException;
|
||||
@@ -23,15 +24,14 @@ use Symfony\Contracts\EventDispatcher\EventDispatcherInterface;
|
||||
|
||||
class MangaScraperService
|
||||
{
|
||||
const string IMG_BASE_DIR = '/public/manga-images';
|
||||
private string $projectDir;
|
||||
private EventDispatcherInterface $eventDispatcher;
|
||||
private string $scrapingType = '';
|
||||
const string PUBLIC_CBZ = '/public/cbz';
|
||||
|
||||
public function __construct($projectDir, EventDispatcherInterface $eventDispatcher)
|
||||
public function __construct(
|
||||
private readonly string $projectDir,
|
||||
private readonly EventDispatcherInterface $eventDispatcher,
|
||||
private readonly EntityManagerInterface $entityManager
|
||||
)
|
||||
{
|
||||
$this->projectDir = $projectDir;
|
||||
$this->eventDispatcher = $eventDispatcher;
|
||||
}
|
||||
|
||||
private function extractMangaPageData(string $html, ContentSource $mangaSource): array
|
||||
@@ -94,71 +94,54 @@ class MangaScraperService
|
||||
};
|
||||
}
|
||||
|
||||
// private function scrapeChapterHtml(Manga $manga, Chapter $chapter, MangaSource $mangaSource): array|bool
|
||||
// {
|
||||
// $chapterUrl = $mangaSource->getChapterUrl($manga->getTitle(), $chapter->getChapterNumber());
|
||||
// $html = $this->fetchHtml($chapterUrl);
|
||||
// $imgUrls = $this->extractMangaPageData($html);
|
||||
//
|
||||
// return $this->saveChapterImages($manga, $chapter, $imgUrls);
|
||||
// }
|
||||
|
||||
/**
|
||||
* @throws GuzzleException
|
||||
* @throws Exception
|
||||
*/
|
||||
private function scrapeChapterMangadex(Chapter $chapter, ContentSource $mangaSource): array|bool
|
||||
private function scrapeChapterMangadex(Chapter $chapter, ContentSource $mangaSource): bool
|
||||
{
|
||||
$this->scrapingType = 'mangadex';
|
||||
$client = new Client();
|
||||
$chapterUrl = $mangaSource->getBaseUrl() . sprintf($mangaSource->getChapterUrlFormat(), $chapter->getExternalId());
|
||||
$mangaTitle = $chapter->getManga()->getTitle();
|
||||
$chapterNumber = $chapter->getNumber();
|
||||
$manga = $chapter->getManga();
|
||||
$pageData = [];
|
||||
|
||||
$response = $client->get($chapterUrl);
|
||||
$results = json_decode($response->getBody()->getContents(), true);
|
||||
|
||||
$mangaDir = sprintf('%s/%s', $this->projectDir . self::IMG_BASE_DIR, $mangaTitle);
|
||||
if (!is_dir($mangaDir)) {
|
||||
mkdir($mangaDir, 0755, true);
|
||||
}
|
||||
$chapterDir = sprintf('%s/%s', $mangaDir, $chapterNumber);
|
||||
if (!is_dir($chapterDir)) {
|
||||
mkdir($chapterDir, 0755, true);
|
||||
if ($results['result'] !== 'ok' || count($results['chapter']['dataSaver']) === 0) {
|
||||
throw new Exception('Error while fetching chapter data from Mangadex ' . $manga->getTitle() . ' ' . $chapter->getNumber());
|
||||
}
|
||||
|
||||
if(count($results['chapter']['dataSaver']) === 0){
|
||||
throw new Exception('Error while fetching chapter data from Mangadex ' . $chapter->getManga()->getTitle() . ' ' . $chapter->getNumber());
|
||||
$tempDir = sys_get_temp_dir() . '/' . uniqid('manga_scraper_');
|
||||
mkdir($tempDir);
|
||||
|
||||
foreach ($results['chapter']['dataSaver'] as $index => $page) {
|
||||
$pageUrl = $results['baseUrl'] . '/data-saver/' . $results['chapter']['hash'] . '/' . $page;
|
||||
$imagePath = $tempDir . '/' . sprintf('%03d.%s', $index + 1, pathinfo($page, PATHINFO_EXTENSION));
|
||||
|
||||
$this->downloadAndSaveImage($pageUrl, $imagePath);
|
||||
|
||||
$event = new PageScrappingProgressEvent($chapter->getId(), $index + 1, count($results['chapter']['dataSaver']));
|
||||
$this->eventDispatcher->dispatch($event, PageScrappingProgressEvent::NAME);
|
||||
|
||||
$pageData[] = [
|
||||
'image_url' => $pageUrl,
|
||||
'local_image_url' => $imagePath,
|
||||
'page_number' => $index + 1,
|
||||
];
|
||||
}
|
||||
|
||||
if ($results['result'] === 'ok') {
|
||||
foreach ($results['chapter']['dataSaver'] as $page) {
|
||||
$pageUrl = $results['baseUrl'] . '/data-saver/' . $results['chapter']['hash'] . '/' . $page;
|
||||
// Déterminer l'extension de l'image
|
||||
$imageExtension = pathinfo(parse_url($pageUrl, PHP_URL_PATH), PATHINFO_EXTENSION);
|
||||
$cbzFilePath = $this->generateCbzPath($manga, $chapter);
|
||||
$this->createCbzFile($tempDir, $pageData, $cbzFilePath);
|
||||
|
||||
// Construire le nom de fichier de l'image
|
||||
$imageName = sprintf('%03d.%s', count($pageData) + 1, $imageExtension);
|
||||
$imagePath = sprintf('%s/%s', $chapterDir, $imageName);
|
||||
$chapter->setCbzPath($cbzFilePath);
|
||||
$this->entityManager->persist($chapter);
|
||||
$this->entityManager->flush();
|
||||
|
||||
$this->downloadAndSaveImage($pageUrl, $imagePath);
|
||||
// Nettoyage du répertoire temporaire
|
||||
$this->cleanupTempFiles($tempDir);
|
||||
|
||||
$event = new PageScrappingProgressEvent($chapter->getId(), count($pageData) + 1, count($results['chapter']['dataSaver']));
|
||||
$this->eventDispatcher->dispatch($event, PageScrappingProgressEvent::NAME);
|
||||
|
||||
$pageData[] = [
|
||||
'image_url' => $pageUrl,
|
||||
'local_image_url' => sprintf('/manga-images/%s/%s/%s', $mangaTitle, $chapterNumber, $imageName),
|
||||
'page_number' => count($pageData) + 1,
|
||||
];
|
||||
}
|
||||
}
|
||||
|
||||
$event = new MangaScrapedEvent($mangaTitle, $chapterNumber, $pageData, $chapterDir);
|
||||
$this->eventDispatcher->dispatch($event, MangaScrapedEvent::NAME);
|
||||
|
||||
return $pageData;
|
||||
return true;
|
||||
}
|
||||
|
||||
private function scrapeChapterJavaScript(Manga $manga, Chapter $chapter, ContentSource $mangaSource): array|bool
|
||||
@@ -166,7 +149,7 @@ class MangaScraperService
|
||||
$chapterUrl = $mangaSource->getChapterUrl($manga->getTitle(), $chapter->getNumber());
|
||||
$imgUrls = $this->fetchImagesUsingPuppeteer($chapterUrl, $mangaSource->getImageSelector(), $mangaSource->getNextPageSelector());
|
||||
|
||||
return $this->saveChapterImages($manga, $chapter, $imgUrls);
|
||||
return false;
|
||||
}
|
||||
|
||||
private function fetchImagesUsingPuppeteer(string $url, string $imageSelector, string $nextButtonSelector): array
|
||||
@@ -188,34 +171,20 @@ class MangaScraperService
|
||||
*/
|
||||
private function scrapeChapterHtml(Manga $manga, Chapter $chapter, ContentSource $mangaSource): array|bool
|
||||
{
|
||||
$this->scrapingType = 'html';
|
||||
$chapterUrl = $mangaSource->getChapterUrl($manga->getSlug(), $chapter->getNumber());
|
||||
|
||||
$pageData = [];
|
||||
$currentPageUrl = $chapterUrl;
|
||||
$mangaTitle = $manga->getTitle();
|
||||
$chapterNumber = $chapter->getNumber();
|
||||
|
||||
$mangaDir = sprintf('%s/%s', $this->projectDir . self::IMG_BASE_DIR, $mangaTitle);
|
||||
if (!is_dir($mangaDir)) {
|
||||
mkdir($mangaDir, 0755, true);
|
||||
}
|
||||
|
||||
$chapterDir = sprintf('%s/%s', $mangaDir, $chapterNumber);
|
||||
if (!is_dir($chapterDir)) {
|
||||
mkdir($chapterDir, 0755, true);
|
||||
}
|
||||
$tempDir = sys_get_temp_dir() . '/' . uniqid('manga_scraper_');
|
||||
mkdir($tempDir);
|
||||
|
||||
do {
|
||||
$html = $this->fetchHtml($currentPageUrl);
|
||||
$page = $this->extractMangaPageData($html, $mangaSource);
|
||||
|
||||
// Déterminer l'extension de l'image
|
||||
$imageExtension = pathinfo(parse_url($page['image_url'], PHP_URL_PATH), PATHINFO_EXTENSION);
|
||||
|
||||
// Construire le nom de fichier de l'image
|
||||
$imageName = sprintf('%03d.%s', count($pageData) + 1, $imageExtension);
|
||||
$imagePath = sprintf('%s/%s', $chapterDir, $imageName);
|
||||
$imageName = sprintf('%03d.%s', count($pageData) + 1, pathinfo(parse_url($page['image_url'], PHP_URL_PATH), PATHINFO_EXTENSION));
|
||||
$imagePath = $tempDir . '/' . $imageName;
|
||||
|
||||
$this->downloadAndSaveImage($page['image_url'], $imagePath);
|
||||
|
||||
@@ -224,17 +193,24 @@ class MangaScraperService
|
||||
|
||||
$pageData[] = [
|
||||
'image_url' => $page['image_url'],
|
||||
'local_image_url' => sprintf('/manga-images/%s/%s/%s', $mangaTitle, $chapterNumber, $imageName),
|
||||
'local_image_url' => $imagePath,
|
||||
'page_number' => count($pageData) + 1,
|
||||
];
|
||||
|
||||
$currentPageUrl = $page['next_page_url'];
|
||||
} while ($currentPageUrl);
|
||||
|
||||
$event = new MangaScrapedEvent($mangaTitle, $chapterNumber, $pageData, $chapterDir);
|
||||
$this->eventDispatcher->dispatch($event, MangaScrapedEvent::NAME);
|
||||
$cbzFilePath = $this->generateCbzPath($manga, $chapter);
|
||||
$this->createCbzFile($tempDir, $pageData, $cbzFilePath);
|
||||
|
||||
return $pageData;
|
||||
$chapter->setCbzPath($cbzFilePath);
|
||||
$this->entityManager->persist($chapter);
|
||||
$this->entityManager->flush();
|
||||
|
||||
// Nettoyage du répertoire temporaire
|
||||
$this->cleanupTempFiles($tempDir);
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
/**
|
||||
@@ -283,13 +259,13 @@ class MangaScraperService
|
||||
|
||||
if (str_starts_with($contentType, 'image/')) {
|
||||
file_put_contents($destinationPath, $response->getBody()->getContents());
|
||||
if ($this->scrapingType === 'mangadex') {
|
||||
$this->sendReport($imageUrl, true, $isCached, (int)$contentLength, ($endTime - $startTime) * 1000);
|
||||
}
|
||||
// if ($this->scrapingType === 'mangadex') {
|
||||
// $this->sendReport($imageUrl, true, $isCached, (int)$contentLength, ($endTime - $startTime) * 1000);
|
||||
// }
|
||||
} else {
|
||||
if ($this->scrapingType === 'mangadex') {
|
||||
$this->sendReport($imageUrl, false, $isCached, (int)$contentLength, ($endTime - $startTime) * 1000);
|
||||
}
|
||||
// if ($this->scrapingType === 'mangadex') {
|
||||
// $this->sendReport($imageUrl, false, $isCached, (int)$contentLength, ($endTime - $startTime) * 1000);
|
||||
// }
|
||||
throw new \Exception('Le contenu récupéré n\'est pas une image. Type de contenu : ' . $contentType);
|
||||
}
|
||||
} catch
|
||||
@@ -298,41 +274,6 @@ class MangaScraperService
|
||||
}
|
||||
}
|
||||
|
||||
private function saveChapterImages(Manga $manga, Chapter $chapter, array $imgUrls): array
|
||||
{
|
||||
$mangaTitle = $manga->getTitle();
|
||||
$chapterNumber = $chapter->getNumber();
|
||||
|
||||
$mangaDir = sprintf('%s/%s', $this->projectDir . self::IMG_BASE_DIR, $mangaTitle);
|
||||
if (!is_dir($mangaDir)) {
|
||||
mkdir($mangaDir, 0755, true);
|
||||
}
|
||||
|
||||
$chapterDir = sprintf('%s/%s', $mangaDir, $chapterNumber);
|
||||
if (!is_dir($chapterDir)) {
|
||||
mkdir($chapterDir, 0755, true);
|
||||
}
|
||||
|
||||
$pageData = [];
|
||||
foreach ($imgUrls as $index => $imgUrl) {
|
||||
$imageName = sprintf('%03d.%s', $index + 1, pathinfo(parse_url($imgUrl, PHP_URL_PATH), PATHINFO_EXTENSION));
|
||||
$imagePath = sprintf('%s/%s', $chapterDir, $imageName);
|
||||
|
||||
$this->downloadAndSaveImage($imgUrl, $imagePath);
|
||||
|
||||
$pageData[] = [
|
||||
'image_url' => $imgUrl,
|
||||
'local_image_url' => sprintf('/manga-images/%s/%s/%s', $mangaTitle, $chapterNumber, $imageName),
|
||||
'page_number' => $index + 1,
|
||||
];
|
||||
}
|
||||
|
||||
$event = new MangaScrapedEvent($mangaTitle, $chapterNumber, $pageData, $chapterDir);
|
||||
$this->eventDispatcher->dispatch($event, MangaScrapedEvent::NAME);
|
||||
|
||||
return $pageData;
|
||||
}
|
||||
|
||||
/**
|
||||
* @throws GuzzleException
|
||||
*/
|
||||
@@ -379,4 +320,51 @@ class MangaScraperService
|
||||
throw new \Exception('Erreur lors de l\'envoi du rapport : ' . $e->getMessage());
|
||||
}
|
||||
}
|
||||
|
||||
private function createCbzFile(string $tempDir, array $pageData, string $cbzFilePath): void
|
||||
{
|
||||
$zip = new \ZipArchive();
|
||||
|
||||
if ($zip->open($cbzFilePath, \ZipArchive::CREATE) === TRUE) {
|
||||
foreach ($pageData as $page) {
|
||||
$zip->addFile($page['local_image_url'], basename($page['local_image_url']));
|
||||
}
|
||||
$zip->close();
|
||||
}
|
||||
}
|
||||
|
||||
private function generateCbzPath(Manga $manga, Chapter $chapter): string
|
||||
{
|
||||
$volumeDir = $this->createDirectories($manga, $chapter->getVolume());
|
||||
$fileName = sprintf('%s_vol%d_ch%s.cbz',
|
||||
$manga->getSlug(),
|
||||
$chapter->getVolume(),
|
||||
$chapter->getNumber()
|
||||
);
|
||||
return $volumeDir . '/' . $fileName;
|
||||
}
|
||||
|
||||
private function createDirectories(Manga $manga, int $volume): string
|
||||
{
|
||||
$mangaYear = $manga->getPublicationYear() ?? 'unknown';
|
||||
$mangaDir = sprintf('%s/%s (%s)', $this->projectDir . self::PUBLIC_CBZ, ucfirst($manga->getSlug()), $mangaYear);
|
||||
$volumeDir = sprintf('%s/volume_%d', $mangaDir, sprintf('%02d', $volume));
|
||||
|
||||
if (!is_dir($volumeDir)) {
|
||||
mkdir($volumeDir, 0755, true);
|
||||
}
|
||||
|
||||
return $volumeDir;
|
||||
}
|
||||
|
||||
private function cleanupTempFiles(string $directory): void
|
||||
{
|
||||
$files = glob($directory . '/*');
|
||||
foreach ($files as $file) {
|
||||
if (is_file($file)) {
|
||||
unlink($file);
|
||||
}
|
||||
}
|
||||
rmdir($directory);
|
||||
}
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user