- ContentSource handling in message
- ContentSource list, add/update ui
- nextPageSelector and imageSelector can be null
- cleanup
This commit is contained in:
Jérémy Guillot
2024-06-30 20:47:27 +02:00
parent ba30d3102d
commit 3012adfee7
24 changed files with 762 additions and 707 deletions

View File

@@ -1,67 +0,0 @@
<?php
namespace App\Service;
use App\Entity\Manga;
use App\Interface\ContentProviderInterface;
use Symfony\Component\BrowserKit\HttpBrowser as Client;
use Symfony\Component\DomCrawler\Crawler;
class LelScansProviderService implements ContentProviderInterface
{
const PROVIDER_URL = 'https://lelscans.net/';
const MANGA_SLUG = '/{manga}/{chapter}/{page}';
private Client $client;
public function __construct()
{
$this->client = new Client();
}
public function getMangaList(): array
{
$crawler = $this->client->request('GET', self::PROVIDER_URL);
$mangaList = [];
$crawler->filter('select > option')->each(function (Crawler $node) use (&$mangaList) {
$mangaName = $node->text();
$mangaUrl = $node->attr('value');
if ($mangaName && $mangaUrl && !preg_match('/^\d+(\.\d+)?$/', $mangaName)) {
$mangaList[] = [
'name' => $mangaName,
'url' => $mangaUrl,
];
}
});
return $mangaList;
}
public function getChapterList($mangaSlug): array
{
$crawler = $this->client->request('GET', self::PROVIDER_URL . 'lecture-en-ligne-' . $mangaSlug . '.php');
$chapterList = [];
$crawler->filter('select > option')->each(function (Crawler $node) use (&$chapterList) {
$chapterName = $node->text();
$chapterUrl = $node->attr('value');
if ($chapterName && $chapterUrl && preg_match('/^\d+(\.\d+)?$/', $chapterName)) {
$chapterList[] = [
'number' => $chapterName,
];
}
});
return $chapterList;
}
#[\Override] public function getAvailableContent(Manga $manga): array
{
// TODO: Implement getAvailableContent() method.
}
#[\Override] public function getContent(Manga $manga): array
{
// TODO: Implement getContent() method.
}
}

View File

@@ -1,100 +0,0 @@
<?php
namespace App\Service;
use Symfony\Component\Filesystem\Filesystem;
use Symfony\Component\HttpFoundation\BinaryFileResponse;
use Symfony\Component\HttpFoundation\ResponseHeaderBag;
use ZipArchive;
use RecursiveDirectoryIterator;
use RecursiveIteratorIterator;
class MangaExportService
{
const IMG_BASE_DIR = '/public/manga-images';
const EXPORT_BASE_DIR = '/public/manga-export';
private string $projectDir;
public function __construct($projectDir)
{
$this->projectDir = $projectDir;
}
public function exportMangaChapter(string $mangaTitle, int $chapterNumber): bool|string
{
$chapterDir = $this->getMangaDir($mangaTitle, $chapterNumber);
$cbzFilePath = $this->getExportDir($mangaTitle, $chapterNumber);
if(!is_dir($chapterDir)){
return false;
}
$cbzDirectory = dirname($cbzFilePath);
if (!is_dir($cbzDirectory)) {
mkdir($cbzDirectory, 0755, true);
}
$fileSystem = new Filesystem();
if($fileSystem->exists($cbzFilePath)){
return 'already_exported';
}
return $this->createCbzFromDirectory($chapterDir, $cbzFilePath);
}
public function downloadCbz(string $mangaTitle, int $chapterNumber): BinaryFileResponse|bool
{
$filePathCbz = $this->getExportDir($mangaTitle, $chapterNumber);
$fileSystem = new Filesystem();
if($fileSystem->exists($filePathCbz)){
return new BinaryFileResponse($filePathCbz);
}
$chapterDir = $this->getMangaDir($mangaTitle, $chapterNumber);
if(is_dir($chapterDir)){
if($this->exportMangaChapter($mangaTitle, $chapterNumber)){
return new BinaryFileResponse($filePathCbz);
}
}
return false;
}
private function createCbzFromDirectory(string $sourceDirectory, string $cbzFilePath): bool
{
$zip = new ZipArchive();
// Ouvre le fichier .cbz en écriture
if ($zip->open($cbzFilePath, ZipArchive::CREATE | ZipArchive::OVERWRITE) !== true) {
return false;
}
$files = new RecursiveIteratorIterator(
new RecursiveDirectoryIterator($sourceDirectory),
RecursiveIteratorIterator::LEAVES_ONLY
);
// Ajoute les fichiers d'image au fichier .cbz
foreach ($files as $file) {
if (!$file->isDir()) {
$filePath = $file->getRealPath();
$relativePath = substr($filePath, strlen($sourceDirectory) + 1);
$zip->addFile($filePath, $relativePath);
}
}
$zip->close();
return true;
}
private function getMangaDir(string $mangaTitle, int $chapterNumber): string
{
return sprintf('%s/%s/%d', $this->projectDir . self::IMG_BASE_DIR, $mangaTitle, $chapterNumber);
}
private function getExportDir(string $mangaTitle, int $chapterNumber): string
{
return sprintf('%s/%s/%d', $this->projectDir . self::EXPORT_BASE_DIR, $mangaTitle, $chapterNumber) . '.cbz';
}
}

View File

@@ -1,17 +0,0 @@
<?php
namespace App\Service;
use App\Interface\ContentProviderInterface;
class MangaProviderFactory
{
public static function create($providerName): ContentProviderInterface
{
return match ($providerName) {
'LelScans' => new LelScansProviderService(),
'AutreManga' => new AutreMangaProviderService(),
default => throw new \Exception("Provider {$providerName} non supporté."),
};
}
}

View File

@@ -166,6 +166,21 @@ class MangaScraperService
return json_decode(implode("", $output), true);
}
/**
* @throws GuzzleException
*/
public function testScrapingHtml(string $mangaSlug, string $chapterNumber, ContentSource $contentSource): array
{
$chapterUrl = $contentSource->getChapterUrl($mangaSlug, $chapterNumber);
$html = $this->fetchHtml($chapterUrl);
if ($contentSource->getNextPageSelector() === null) {
return $this->scrapeVerticalReader($html, $contentSource);
} else {
return $this->scrapeHorizontalReader($chapterUrl, $contentSource);
}
}
/**
* @throws GuzzleException
*/
@@ -173,32 +188,32 @@ class MangaScraperService
{
$chapterUrl = $mangaSource->getChapterUrl($manga->getSlug(), $chapter->getNumber());
$pageData = [];
$currentPageUrl = $chapterUrl;
$tempDir = sys_get_temp_dir() . '/' . uniqid('manga_scraper_');
mkdir($tempDir);
do {
$html = $this->fetchHtml($currentPageUrl);
$page = $this->extractMangaPageData($html, $mangaSource);
$pageData = [];
$imageName = sprintf('%03d.%s', count($pageData) + 1, pathinfo(parse_url($page['image_url'], PHP_URL_PATH), PATHINFO_EXTENSION));
if ($mangaSource->getNextPageSelector() === null) {
// Lecteur vertical
$html = $this->fetchHtml($chapterUrl);
$pageData = $this->scrapeVerticalReader($html, $mangaSource);
} else {
// Lecteur horizontal (paginé)
$pageData = $this->scrapeHorizontalReader($chapterUrl, $mangaSource);
}
// Télécharger et sauvegarder les images
foreach ($pageData as $index => &$page) {
$imageName = sprintf('%03d.%s', $index + 1, pathinfo(parse_url($page['image_url'], PHP_URL_PATH), PATHINFO_EXTENSION));
$imagePath = $tempDir . '/' . $imageName;
$this->downloadAndSaveImage($page['image_url'], $imagePath);
$event = new PageScrappingProgressEvent($chapter->getId(), count($pageData) + 1, 0);
$event = new PageScrappingProgressEvent($chapter->getId(), $index + 1, count($pageData));
$this->eventDispatcher->dispatch($event, PageScrappingProgressEvent::NAME);
$pageData[] = [
'image_url' => $page['image_url'],
'local_image_url' => $imagePath,
'page_number' => count($pageData) + 1,
];
$currentPageUrl = $page['next_page_url'];
} while ($currentPageUrl);
$page['local_image_url'] = $imagePath;
}
$cbzFilePath = $this->generateCbzPath($manga, $chapter);
$this->createCbzFile($tempDir, $pageData, $cbzFilePath);
@@ -210,7 +225,78 @@ class MangaScraperService
// Nettoyage du répertoire temporaire
$this->cleanupTempFiles($tempDir);
return true;
return $pageData;
}
private function scrapeVerticalReader(string $html, ContentSource $contentSource): array
{
$crawler = new Crawler($html);
$images = $crawler->filter($contentSource->getImageSelector());
$pageData = [];
foreach ($images as $index => $image) {
if($image->getAttribute('src') === ''){
$imgUrl = $image->getAttribute('data-src');
}else{
$imgUrl = $image->getAttribute('src');
}
$pageData[] = [
'image_url' => $this->cleanImageUrl($imgUrl),
'page_number' => $index + 1,
];
}
return $pageData;
}
/**
* @throws GuzzleException
*/
private function scrapeHorizontalReader(string $chapterUrl, ContentSource $contentSource): array
{
$pageData = [];
$currentPageUrl = $chapterUrl;
do {
$html = $this->fetchHtml($currentPageUrl);
$page = $this->extractMangaPageData($html, $contentSource);
$pageData[] = [
'image_url' => $this->cleanImageUrl($page['image_url']),
'page_number' => count($pageData) + 1,
];
$currentPageUrl = $page['next_page_url'];
} while ($currentPageUrl);
return $pageData;
}
/**
* Processes a single image
* @throws GuzzleException
*/
private function processImage(string $imgUrl, string $tempDir, array &$pageData, int $index, Chapter $chapter): void
{
$imgUrl = $this->cleanImageUrl($imgUrl);
$imageName = sprintf('%03d.%s', $index + 1, pathinfo(parse_url($imgUrl, PHP_URL_PATH), PATHINFO_EXTENSION));
$imagePath = $tempDir . '/' . $imageName;
$this->downloadAndSaveImage($imgUrl, $imagePath);
// $event = new PageScrappingProgressEvent($chapter->getId(), $index + 1, 0);
// $this->eventDispatcher->dispatch($event, PageScrappingProgressEvent::NAME);
$pageData[] = [
'image_url' => $imgUrl,
'local_image_url' => $imagePath,
'page_number' => $index + 1,
];
}
private function cleanImageUrl(string $url): string
{
return preg_replace('/[\x00-\x1F\x7F]/', '', trim($url));
}
/**

View File

@@ -1,157 +0,0 @@
<?php
namespace App\Service;
use App\EventSubscriber\MangaScrapedEvent;
use GuzzleHttp\Client;
use GuzzleHttp\Exception\GuzzleException;
use Symfony\Component\DomCrawler\Crawler;
use Symfony\Component\Routing\Matcher\UrlMatcher;
use Symfony\Component\Routing\RequestContext;
use Symfony\Component\Routing\Route;
use Symfony\Component\Routing\RouteCollection;
use Symfony\Contracts\EventDispatcher\EventDispatcherInterface;
class MangaScraperServiceOld
{
const string IMG_BASE_DIR = '/public/manga-images';
private string $projectDir;
private EventDispatcherInterface $eventDispatcher;
public function __construct($projectDir, EventDispatcherInterface $eventDispatcher)
{
$this->projectDir = $projectDir;
$this->eventDispatcher = $eventDispatcher;
}
public function extractMangaPageData(string $html): array
{
$baseUrl = 'https://lelscans.net';
//pour éviter à PhpStorm de gueuler...
$selector = 'img';
$crawler = new Crawler($html);
$imgUrl = $crawler->filter($selector)->attr('src');
$nextLink = $crawler->filter('a[title="Suivant"]');
if (!preg_match('/^https?:\/\//', $imgUrl)) {
$urlComponents = parse_url($baseUrl);
$scheme = $urlComponents['scheme'];
$host = $urlComponents['host'];
// Construit l'URL absolue de l'image
$imgUrl = $scheme . '://' . $host . '/' . ltrim($imgUrl, '/');
}
if($nextLink->count() > 0){
$nextUrl = $nextLink->attr('href');
}else{
$nextUrl = null;
}
return [
'image_url' => $imgUrl,
'next_page_url' => $nextUrl,
];
}
/**
* @throws GuzzleException
*/
public function scrapeMangaChapter(string $chapterUrl, string $mangaTitle, float $chapterNumber): array|bool
{
if(!$this->isChapterAvailable($chapterUrl, $chapterNumber)){
return false;
}
$pageData = [];
$currentPageUrl = $chapterUrl;
$mangaDir = sprintf('%s/%s', $this->projectDir . self::IMG_BASE_DIR, $mangaTitle);
if (!is_dir($mangaDir)) {
mkdir($mangaDir, 0755, true);
}
// Créez le dossier du chapitre s'il n'existe pas
$chapterDir = sprintf('%s/%s', $mangaDir, $chapterNumber);
if (!is_dir($chapterDir)) {
mkdir($chapterDir, 0755, true);
}
do {
$html = $this->fetchHtml($currentPageUrl);
$page = $this->extractMangaPageData($html);
$pageData[] = $page;
$currentPageUrl = $page['next_page_url'];
// Construisez le nom de fichier de l'image
$imageName = sprintf('%03d.jpg', count($pageData));
// Construisez le chemin du fichier de l'image
$imagePath = sprintf('%s/%s', $chapterDir, $imageName);
// Téléchargez et enregistrez l'image
$this->downloadAndSaveImage($page['image_url'], $imagePath);
// Modifiez les données de la page pour inclure l'URL de l'image stockée localement
$pageData[count($pageData) - 1]['local_image_url'] = sprintf('/manga-images/%s/%s/%s', $mangaTitle, $chapterNumber, $imageName);
$pageData[count($pageData) - 1]['page_number'] = count($pageData);
} while ($currentPageUrl);
$event = new MangaScrapedEvent($mangaTitle, $chapterNumber, $pageData);
$this->eventDispatcher->dispatch($event, MangaScrapedEvent::NAME);
return $pageData;
}
/**
* @throws GuzzleException
*/
private function fetchHtml(string $url): string
{
$client = new Client();
$response = $client->get($url);
return (string) $response->getBody();
}
/**
* @throws GuzzleException
*/
private function downloadAndSaveImage(string $imageUrl, string $destinationPath): void
{
$client = new Client();
$response = $client->get($imageUrl);
file_put_contents($destinationPath, $response->getBody()->getContents());
}
/**
* @throws GuzzleException
*/
private function isChapterAvailable(string $chapterUrl, float $chapterNumber): bool
{
$html = $this->fetchHtml($chapterUrl);
$crawler = new Crawler($html);
$nextLink = $crawler->filter('a[title="Suivant"]');
if($nextLink->count() === 0){
return false;
}else{
$nextUrl = $nextLink->attr('href');
}
$routeCollection = new RouteCollection();
$routeCollection->add('manga_chapter', new Route('/scan-{manga}/{chapter}/{page}'));
$context = new RequestContext('/');
$matcher = new UrlMatcher($routeCollection, $context);
$path = parse_url($nextUrl, PHP_URL_PATH);
$parameters = $matcher->match($path);
if((float) $parameters['chapter'] !== $chapterNumber){
return false;
}
return true;
}
}

View File

@@ -125,12 +125,19 @@ readonly class MangadexProvider implements MetadataProviderInterface
private function getFeedWithPagination(string $externalId, int $page): array
{
return $this->client->get('/manga/' . $externalId . '/feed', [
'limit' => 500,
'translatedLanguage' =>['en', 'fr'],
'order' => ['chapter' => 'asc'],
'offset' => $page * 500
]);
try {
$response = $this->client->get('/manga/' . $externalId . '/feed', [
'limit' => 500,
'translatedLanguage' =>['en', 'fr'],
'order' => ['chapter' => 'asc'],
'offset' => $page * 500
]);
}catch(\Exception $e){
$this->notificationService->sendUpdate(['status' => 'error', 'message' => 'An error occurred while fetching data from Mangadex.']);
return [];
}
return $response;
}
public function getMangaAggregate(Manga $manga): array
@@ -139,7 +146,12 @@ readonly class MangadexProvider implements MetadataProviderInterface
return [];
}
$response = $this->client->get('/manga/' . $manga->getExternalId() . '/aggregate');
try {
$response = $this->client->get('/manga/' . $manga->getExternalId() . '/aggregate');
}catch(\Exception $e){
// $this->notificationService->sendUpdate(['status' => 'error', 'message' => 'An error occurred while fetching data from Mangadex.']);
return [];
}
$chapterEntities = [];
if($response['result'] === 'ok'){

View File

@@ -1,73 +0,0 @@
<?php
namespace App\Service;
use App\Entity\Manga;
use App\Interface\ContentProviderInterface;
use Symfony\Component\BrowserKit\HttpBrowser;
use Symfony\Component\BrowserKit\HttpBrowser as Client;
//use GuzzleHttp\Client;
use GuzzleHttp\Exception\GuzzleException;
use Symfony\Component\DomCrawler\Crawler;
use Symfony\Component\HttpClient\HttpClient;
class SushiScanProviderService
{
const PROVIDER_URL = 'https://sushiscan.net/catalogue/';
const MANGA_SLUG = '/{manga}/{chapter}/{page}';
const CONTENT_TYPE = ['volume', 'chapitre'];
private Client $client;
public function __construct()
{
$httpClient = HttpClient::create(['timeout' => 60]);
$this->client = new HttpBrowser($httpClient);
}
public function getAvailableContent(Manga $manga)
{
$url = 'http://flaresolverr:8191/v1';
$jsonContent = json_encode([
'cmd' => 'request.get',
'url' => self::PROVIDER_URL . $manga->getSlug(),
'maxTimeout' => 90000,
]);
try{
$crawler = $this->client->request('POST', $url, [], [], [
'HTTP_CONTENT_TYPE' => 'application/json',
], $jsonContent);
}catch (\Exception $e) {
dd($e);
}
$contentList = [];
dd($crawler);
$crawler->filter('#chapterList ul > li')->each(function (Crawler $node) use (&$contentList) {
dump($node);
// $contentName = $node->text();
// $contentUrl = $node->attr('href');
// if ($contentName && $contentUrl) {
// $contentList[] = [
// 'name' => $contentName,
// 'url' => $contentUrl,
// ];
// }
});
return $contentList;
}
/**
* @param string $mangaSlug
* @return array
*/
public function getChapterList(string $mangaSlug): array
{
// TODO: Implement getChapterList() method.
}
}