- trop de trucs d'un coup... je vais faire attention ensuite ^^'
This commit is contained in:
@@ -1,10 +1,12 @@
|
||||
<?php
|
||||
namespace App\Service;
|
||||
|
||||
use App\Entity\Manga;
|
||||
use App\Interface\ContentProviderInterface;
|
||||
use Symfony\Component\BrowserKit\HttpBrowser as Client;
|
||||
use Symfony\Component\DomCrawler\Crawler;
|
||||
|
||||
class LelScansProviderService implements MangaProviderInterface
|
||||
class LelScansProviderService implements ContentProviderInterface
|
||||
{
|
||||
const PROVIDER_URL = 'https://lelscans.net/';
|
||||
const MANGA_SLUG = '/{manga}/{chapter}/{page}';
|
||||
@@ -53,4 +55,13 @@ class LelScansProviderService implements MangaProviderInterface
|
||||
return $chapterList;
|
||||
}
|
||||
|
||||
#[\Override] public function getAvailableContent(Manga $manga): array
|
||||
{
|
||||
// TODO: Implement getAvailableContent() method.
|
||||
}
|
||||
|
||||
#[\Override] public function getContent(Manga $manga): array
|
||||
{
|
||||
// TODO: Implement getContent() method.
|
||||
}
|
||||
}
|
||||
|
||||
@@ -1,10 +0,0 @@
|
||||
<?php
|
||||
|
||||
namespace App\Service;
|
||||
|
||||
use Doctrine\Common\Collections\Collection;
|
||||
|
||||
interface MangaDbProviderInterface
|
||||
{
|
||||
public function search(string $title): Collection;
|
||||
}
|
||||
@@ -2,9 +2,11 @@
|
||||
|
||||
namespace App\Service;
|
||||
|
||||
use App\Interface\ContentProviderInterface;
|
||||
|
||||
class MangaProviderFactory
|
||||
{
|
||||
public static function create($providerName): MangaProviderInterface
|
||||
public static function create($providerName): ContentProviderInterface
|
||||
{
|
||||
return match ($providerName) {
|
||||
'LelScans' => new LelScansProviderService(),
|
||||
@@ -12,4 +14,4 @@ class MangaProviderFactory
|
||||
default => throw new \Exception("Provider {$providerName} non supporté."),
|
||||
};
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@@ -1,9 +0,0 @@
|
||||
<?php
|
||||
|
||||
namespace App\Service;
|
||||
|
||||
interface MangaProviderInterface
|
||||
{
|
||||
public function getMangaList(): array;
|
||||
public function getChapterList(string $mangaSlug): array;
|
||||
}
|
||||
@@ -2,6 +2,9 @@
|
||||
|
||||
namespace App\Service;
|
||||
|
||||
use App\Entity\Chapter;
|
||||
use App\Entity\Manga;
|
||||
use App\Entity\ContentSource;
|
||||
use App\EventSubscriber\MangaScrapedEvent;
|
||||
use GuzzleHttp\Client;
|
||||
use GuzzleHttp\Exception\GuzzleException;
|
||||
@@ -14,144 +17,256 @@ use Symfony\Contracts\EventDispatcher\EventDispatcherInterface;
|
||||
|
||||
class MangaScraperService
|
||||
{
|
||||
const string IMG_BASE_DIR = '/public/manga-images';
|
||||
private string $projectDir;
|
||||
private EventDispatcherInterface $eventDispatcher;
|
||||
const IMG_BASE_DIR = '/public/manga-images';
|
||||
private string $projectDir;
|
||||
private EventDispatcherInterface $eventDispatcher;
|
||||
|
||||
public function __construct($projectDir, EventDispatcherInterface $eventDispatcher)
|
||||
{
|
||||
$this->projectDir = $projectDir;
|
||||
$this->eventDispatcher = $eventDispatcher;
|
||||
}
|
||||
public function __construct($projectDir, EventDispatcherInterface $eventDispatcher)
|
||||
{
|
||||
$this->projectDir = $projectDir;
|
||||
$this->eventDispatcher = $eventDispatcher;
|
||||
}
|
||||
|
||||
public function extractMangaPageData(string $html): array
|
||||
{
|
||||
$baseUrl = 'https://lelscans.net';
|
||||
//pour éviter à PhpStorm de gueuler...
|
||||
$selector = 'img';
|
||||
$crawler = new Crawler($html);
|
||||
$imgUrl = $crawler->filter($selector)->attr('src');
|
||||
$nextLink = $crawler->filter('a[title="Suivant"]');
|
||||
public function extractMangaPageData(string $html, ContentSource $mangaSource): array
|
||||
{
|
||||
$crawler = new Crawler($html);
|
||||
$imgUrls = [];
|
||||
|
||||
if (!preg_match('/^https?:\/\//', $imgUrl)) {
|
||||
$urlComponents = parse_url($baseUrl);
|
||||
$scheme = $urlComponents['scheme'];
|
||||
$host = $urlComponents['host'];
|
||||
// Search for images with different extensions
|
||||
foreach (['img[src$=".jpg"]', 'img[src$=".jpeg"]', 'img[src$=".png"]', 'img'] as $selector) {
|
||||
$crawler->filter($selector)->each(function (Crawler $node) use (&$imgUrls) {
|
||||
$src = $node->attr('src') ?? $node->attr('data-src');
|
||||
if ($src) {
|
||||
$imgUrls[] = $src;
|
||||
}
|
||||
});
|
||||
}
|
||||
|
||||
// Construit l'URL absolue de l'image
|
||||
$imgUrl = $scheme . '://' . $host . '/' . ltrim($imgUrl, '/');
|
||||
}
|
||||
if (empty($imgUrls)) {
|
||||
throw new \Exception('No valid image found on the page.');
|
||||
}
|
||||
|
||||
if($nextLink->count() > 0){
|
||||
$nextUrl = $nextLink->attr('href');
|
||||
}else{
|
||||
$nextUrl = null;
|
||||
}
|
||||
$nextLink = $crawler->filter($mangaSource->getNextPageSelector());
|
||||
$nextUrl = $nextLink->count() > 0 ? $nextLink->attr('href') : null;
|
||||
|
||||
return [
|
||||
'image_url' => $imgUrl,
|
||||
'next_page_url' => $nextUrl,
|
||||
];
|
||||
}
|
||||
// Convert relative URLs to absolute URLs
|
||||
$baseUrl = $mangaSource->getBaseUrl();
|
||||
$imgUrls = array_map(function ($imgUrl) use ($baseUrl) {
|
||||
if (!preg_match('/^https?:\/\//', $imgUrl)) {
|
||||
$urlComponents = parse_url($baseUrl);
|
||||
$scheme = $urlComponents['scheme'];
|
||||
$host = $urlComponents['host'];
|
||||
$imgUrl = $scheme . '://' . $host . '/' . ltrim($imgUrl, '/');
|
||||
}
|
||||
return $imgUrl;
|
||||
}, $imgUrls);
|
||||
|
||||
return [
|
||||
'image_urls' => $imgUrls,
|
||||
'next_page_url' => $nextUrl,
|
||||
];
|
||||
}
|
||||
|
||||
/**
|
||||
* @throws GuzzleException
|
||||
*/
|
||||
public function scrapeMangaChapter(string $chapterUrl, string $mangaTitle, float $chapterNumber): array|bool
|
||||
{
|
||||
if(!$this->isChapterAvailable($chapterUrl, $chapterNumber)){
|
||||
return false;
|
||||
}
|
||||
public function scrapeManga(Manga $manga, ContentSource $mangaSource): array
|
||||
{
|
||||
$allChaptersData = [];
|
||||
|
||||
$pageData = [];
|
||||
$currentPageUrl = $chapterUrl;
|
||||
foreach ($manga->getChapters() as $chapter) {
|
||||
$chapterData = $this->scrapeChapter($manga, $chapter, $mangaSource);
|
||||
if ($chapterData !== false) {
|
||||
$allChaptersData[$chapter->getNumber()] = $chapterData;
|
||||
}
|
||||
}
|
||||
|
||||
$mangaDir = sprintf('%s/%s', $this->projectDir . self::IMG_BASE_DIR, $mangaTitle);
|
||||
if (!is_dir($mangaDir)) {
|
||||
mkdir($mangaDir, 0755, true);
|
||||
}
|
||||
return $allChaptersData;
|
||||
}
|
||||
|
||||
// Créez le dossier du chapitre s'il n'existe pas
|
||||
$chapterDir = sprintf('%s/%s', $mangaDir, $chapterNumber);
|
||||
if (!is_dir($chapterDir)) {
|
||||
mkdir($chapterDir, 0755, true);
|
||||
}
|
||||
private function scrapeChapter(Manga $manga, Chapter $chapter, ContentSource $mangaSource): array|bool
|
||||
{
|
||||
switch ($mangaSource->getScrapingType()) {
|
||||
case 'html':
|
||||
return $this->scrapeChapterHtml($manga, $chapter, $mangaSource);
|
||||
case 'javascript':
|
||||
return $this->scrapeChapterJavaScript($manga, $chapter, $mangaSource);
|
||||
// case 'api':
|
||||
// // Implémentez la méthode de scraping par API si nécessaire
|
||||
// return $this->scrapeChapterApi($manga, $chapter, $mangaSource);
|
||||
default:
|
||||
throw new \Exception('Unsupported scraping type: ' . $mangaSource->getScrapingType());
|
||||
}
|
||||
}
|
||||
|
||||
do {
|
||||
$html = $this->fetchHtml($currentPageUrl);
|
||||
$page = $this->extractMangaPageData($html);
|
||||
$pageData[] = $page;
|
||||
$currentPageUrl = $page['next_page_url'];
|
||||
// private function scrapeChapterHtml(Manga $manga, Chapter $chapter, MangaSource $mangaSource): array|bool
|
||||
// {
|
||||
// $chapterUrl = $mangaSource->getChapterUrl($manga->getTitle(), $chapter->getChapterNumber());
|
||||
// $html = $this->fetchHtml($chapterUrl);
|
||||
// $imgUrls = $this->extractMangaPageData($html);
|
||||
//
|
||||
// return $this->saveChapterImages($manga, $chapter, $imgUrls);
|
||||
// }
|
||||
|
||||
// Construisez le nom de fichier de l'image
|
||||
$imageName = sprintf('%03d.jpg', count($pageData));
|
||||
private function scrapeChapterJavaScript(Manga $manga, Chapter $chapter, ContentSource $mangaSource): array|bool
|
||||
{
|
||||
$chapterUrl = $mangaSource->getChapterUrl($manga->getTitle(), $chapter->getNumber());
|
||||
$imgUrls = $this->fetchImagesUsingPuppeteer($chapterUrl, $mangaSource->getImageSelector(), $mangaSource->getNextPageSelector());
|
||||
|
||||
// Construisez le chemin du fichier de l'image
|
||||
$imagePath = sprintf('%s/%s', $chapterDir, $imageName);
|
||||
return $this->saveChapterImages($manga, $chapter, $imgUrls);
|
||||
}
|
||||
|
||||
// Téléchargez et enregistrez l'image
|
||||
$this->downloadAndSaveImage($page['image_url'], $imagePath);
|
||||
private function fetchImagesUsingPuppeteer(string $url, string $imageSelector, string $nextButtonSelector): array
|
||||
{
|
||||
// Appeler le script Puppeteer avec les paramètres nécessaires
|
||||
$output = [];
|
||||
$command = sprintf('node puppeteer-script.js "%s" "%s" "%s" 2>&1', $url, $imageSelector, $nextButtonSelector); // Redirect stderr to stdout
|
||||
dump($command);
|
||||
// exec($command, $output, $return_var);
|
||||
|
||||
// Modifiez les données de la page pour inclure l'URL de l'image stockée localement
|
||||
$pageData[count($pageData) - 1]['local_image_url'] = sprintf('/manga-images/%s/%s/%s', $mangaTitle, $chapterNumber, $imageName);
|
||||
$pageData[count($pageData) - 1]['page_number'] = count($pageData);
|
||||
dd($command, $output);
|
||||
|
||||
} while ($currentPageUrl);
|
||||
// Convertir la sortie JSON en tableau PHP
|
||||
return json_decode(implode("", $output), true);
|
||||
}
|
||||
|
||||
$event = new MangaScrapedEvent($mangaTitle, $chapterNumber, $pageData);
|
||||
$this->eventDispatcher->dispatch($event, MangaScrapedEvent::NAME);
|
||||
/**
|
||||
* @throws GuzzleException
|
||||
*/
|
||||
private function scrapeChapterHtml(Manga $manga, Chapter $chapter, ContentSource $mangaSource): array|bool
|
||||
{
|
||||
$chapterUrl = $mangaSource->getChapterUrl($manga->getSlug(), $chapter->getNumber());
|
||||
|
||||
return $pageData;
|
||||
}
|
||||
$pageData = [];
|
||||
$currentPageUrl = $chapterUrl;
|
||||
$mangaTitle = $manga->getTitle();
|
||||
$chapterNumber = $chapter->getNumber();
|
||||
|
||||
$mangaDir = sprintf('%s/%s', $this->projectDir . self::IMG_BASE_DIR, $mangaTitle);
|
||||
if (!is_dir($mangaDir)) {
|
||||
mkdir($mangaDir, 0755, true);
|
||||
}
|
||||
|
||||
$chapterDir = sprintf('%s/%s', $mangaDir, $chapterNumber);
|
||||
if (!is_dir($chapterDir)) {
|
||||
mkdir($chapterDir, 0755, true);
|
||||
}
|
||||
|
||||
do {
|
||||
$html = $this->fetchHtml($currentPageUrl);
|
||||
$page = $this->extractMangaPageData($html, $mangaSource);
|
||||
|
||||
foreach ($page['image_urls'] as $imgUrl) {
|
||||
dump($imgUrl);
|
||||
dump(base64_decode($imgUrl));
|
||||
// Déterminer l'extension de l'image
|
||||
$imageExtension = pathinfo(parse_url($imgUrl, PHP_URL_PATH), PATHINFO_EXTENSION);
|
||||
|
||||
// Construire le nom de fichier de l'image
|
||||
$imageName = sprintf('%03d.%s', count($pageData) + 1, $imageExtension);
|
||||
$imagePath = sprintf('%s/%s', $chapterDir, $imageName);
|
||||
|
||||
$this->downloadAndSaveImage($imgUrl, $imagePath);
|
||||
|
||||
$pageData[] = [
|
||||
'image_url' => $imgUrl,
|
||||
'local_image_url' => sprintf('/manga-images/%s/%s/%s', $mangaTitle, $chapterNumber, $imageName),
|
||||
'page_number' => count($pageData) + 1,
|
||||
];
|
||||
}
|
||||
|
||||
// Si plus d'une image a été trouvée, ne pas chercher la page suivante
|
||||
if (count($page['image_urls']) > 1) {
|
||||
break;
|
||||
}
|
||||
|
||||
$currentPageUrl = $page['next_page_url'];
|
||||
} while ($currentPageUrl);
|
||||
|
||||
$event = new MangaScrapedEvent($mangaTitle, $chapterNumber, $pageData);
|
||||
$this->eventDispatcher->dispatch($event, MangaScrapedEvent::NAME);
|
||||
|
||||
return $pageData;
|
||||
}
|
||||
|
||||
/**
|
||||
* @throws GuzzleException
|
||||
*/
|
||||
private function fetchHtml(string $url): string
|
||||
{
|
||||
$client = new Client();
|
||||
$response = $client->get($url);
|
||||
{
|
||||
$client = new Client();
|
||||
$response = $client->get($url);
|
||||
|
||||
return (string) $response->getBody();
|
||||
}
|
||||
return (string)$response->getBody();
|
||||
}
|
||||
|
||||
/**
|
||||
* @throws GuzzleException
|
||||
*/
|
||||
private function downloadAndSaveImage(string $imageUrl, string $destinationPath): void
|
||||
{
|
||||
$client = new Client();
|
||||
$response = $client->get($imageUrl);
|
||||
{
|
||||
$client = new Client();
|
||||
$response = $client->get($imageUrl);
|
||||
|
||||
file_put_contents($destinationPath, $response->getBody()->getContents());
|
||||
}
|
||||
file_put_contents($destinationPath, $response->getBody()->getContents());
|
||||
}
|
||||
|
||||
private function saveChapterImages(Manga $manga, Chapter $chapter, array $imgUrls): array
|
||||
{
|
||||
$mangaTitle = $manga->getTitle();
|
||||
$chapterNumber = $chapter->getNumber();
|
||||
|
||||
$mangaDir = sprintf('%s/%s', $this->projectDir . self::IMG_BASE_DIR, $mangaTitle);
|
||||
if (!is_dir($mangaDir)) {
|
||||
mkdir($mangaDir, 0755, true);
|
||||
}
|
||||
|
||||
$chapterDir = sprintf('%s/%s', $mangaDir, $chapterNumber);
|
||||
if (!is_dir($chapterDir)) {
|
||||
mkdir($chapterDir, 0755, true);
|
||||
}
|
||||
|
||||
$pageData = [];
|
||||
foreach ($imgUrls as $index => $imgUrl) {
|
||||
$imageName = sprintf('%03d.%s', $index + 1, pathinfo(parse_url($imgUrl, PHP_URL_PATH), PATHINFO_EXTENSION));
|
||||
$imagePath = sprintf('%s/%s', $chapterDir, $imageName);
|
||||
|
||||
$this->downloadAndSaveImage($imgUrl, $imagePath);
|
||||
|
||||
$pageData[] = [
|
||||
'image_url' => $imgUrl,
|
||||
'local_image_url' => sprintf('/manga-images/%s/%s/%s', $mangaTitle, $chapterNumber, $imageName),
|
||||
'page_number' => $index + 1,
|
||||
];
|
||||
}
|
||||
|
||||
$event = new MangaScrapedEvent($mangaTitle, $chapterNumber, $pageData);
|
||||
$this->eventDispatcher->dispatch($event, MangaScrapedEvent::NAME);
|
||||
|
||||
return $pageData;
|
||||
}
|
||||
|
||||
/**
|
||||
* @throws GuzzleException
|
||||
*/
|
||||
private function isChapterAvailable(string $chapterUrl, float $chapterNumber): bool
|
||||
{
|
||||
$html = $this->fetchHtml($chapterUrl);
|
||||
$crawler = new Crawler($html);
|
||||
$nextLink = $crawler->filter('a[title="Suivant"]');
|
||||
private function isChapterAvailable(string $chapterUrl, float $chapterNumber, ContentSource $mangaSource): bool
|
||||
{
|
||||
$html = $this->fetchHtml($chapterUrl);
|
||||
$crawler = new Crawler($html);
|
||||
$nextLink = $crawler->filter($mangaSource->getNextPageSelector());
|
||||
|
||||
if($nextLink->count() === 0){
|
||||
return false;
|
||||
}else{
|
||||
$nextUrl = $nextLink->attr('href');
|
||||
}
|
||||
if ($nextLink->count() === 0) {
|
||||
return false;
|
||||
}
|
||||
|
||||
$routeCollection = new RouteCollection();
|
||||
$routeCollection->add('manga_chapter', new Route('/scan-{manga}/{chapter}/{page}'));
|
||||
$context = new RequestContext('/');
|
||||
$matcher = new UrlMatcher($routeCollection, $context);
|
||||
$path = parse_url($nextUrl, PHP_URL_PATH);
|
||||
$parameters = $matcher->match($path);
|
||||
$nextUrl = $nextLink->attr('href');
|
||||
$routeCollection = new RouteCollection();
|
||||
$routeCollection->add('manga_chapter', new Route('/scan-{manga}/{chapter}/{page}'));
|
||||
$context = new RequestContext('/');
|
||||
$matcher = new UrlMatcher($routeCollection, $context);
|
||||
$path = parse_url($nextUrl, PHP_URL_PATH);
|
||||
$parameters = $matcher->match($path);
|
||||
|
||||
if((float) $parameters['chapter'] !== $chapterNumber){
|
||||
return false;
|
||||
}
|
||||
|
||||
return true;
|
||||
}
|
||||
return (float)$parameters['chapter'] === $chapterNumber;
|
||||
}
|
||||
}
|
||||
|
||||
157
src/Service/MangaScraperServiceOld.php
Normal file
157
src/Service/MangaScraperServiceOld.php
Normal file
@@ -0,0 +1,157 @@
|
||||
<?php
|
||||
|
||||
namespace App\Service;
|
||||
|
||||
use App\EventSubscriber\MangaScrapedEvent;
|
||||
use GuzzleHttp\Client;
|
||||
use GuzzleHttp\Exception\GuzzleException;
|
||||
use Symfony\Component\DomCrawler\Crawler;
|
||||
use Symfony\Component\Routing\Matcher\UrlMatcher;
|
||||
use Symfony\Component\Routing\RequestContext;
|
||||
use Symfony\Component\Routing\Route;
|
||||
use Symfony\Component\Routing\RouteCollection;
|
||||
use Symfony\Contracts\EventDispatcher\EventDispatcherInterface;
|
||||
|
||||
class MangaScraperServiceOld
|
||||
{
|
||||
const string IMG_BASE_DIR = '/public/manga-images';
|
||||
private string $projectDir;
|
||||
private EventDispatcherInterface $eventDispatcher;
|
||||
|
||||
public function __construct($projectDir, EventDispatcherInterface $eventDispatcher)
|
||||
{
|
||||
$this->projectDir = $projectDir;
|
||||
$this->eventDispatcher = $eventDispatcher;
|
||||
}
|
||||
|
||||
public function extractMangaPageData(string $html): array
|
||||
{
|
||||
$baseUrl = 'https://lelscans.net';
|
||||
//pour éviter à PhpStorm de gueuler...
|
||||
$selector = 'img';
|
||||
$crawler = new Crawler($html);
|
||||
$imgUrl = $crawler->filter($selector)->attr('src');
|
||||
$nextLink = $crawler->filter('a[title="Suivant"]');
|
||||
|
||||
if (!preg_match('/^https?:\/\//', $imgUrl)) {
|
||||
$urlComponents = parse_url($baseUrl);
|
||||
$scheme = $urlComponents['scheme'];
|
||||
$host = $urlComponents['host'];
|
||||
|
||||
// Construit l'URL absolue de l'image
|
||||
$imgUrl = $scheme . '://' . $host . '/' . ltrim($imgUrl, '/');
|
||||
}
|
||||
|
||||
if($nextLink->count() > 0){
|
||||
$nextUrl = $nextLink->attr('href');
|
||||
}else{
|
||||
$nextUrl = null;
|
||||
}
|
||||
|
||||
return [
|
||||
'image_url' => $imgUrl,
|
||||
'next_page_url' => $nextUrl,
|
||||
];
|
||||
}
|
||||
|
||||
/**
|
||||
* @throws GuzzleException
|
||||
*/
|
||||
public function scrapeMangaChapter(string $chapterUrl, string $mangaTitle, float $chapterNumber): array|bool
|
||||
{
|
||||
if(!$this->isChapterAvailable($chapterUrl, $chapterNumber)){
|
||||
return false;
|
||||
}
|
||||
|
||||
$pageData = [];
|
||||
$currentPageUrl = $chapterUrl;
|
||||
|
||||
$mangaDir = sprintf('%s/%s', $this->projectDir . self::IMG_BASE_DIR, $mangaTitle);
|
||||
if (!is_dir($mangaDir)) {
|
||||
mkdir($mangaDir, 0755, true);
|
||||
}
|
||||
|
||||
// Créez le dossier du chapitre s'il n'existe pas
|
||||
$chapterDir = sprintf('%s/%s', $mangaDir, $chapterNumber);
|
||||
if (!is_dir($chapterDir)) {
|
||||
mkdir($chapterDir, 0755, true);
|
||||
}
|
||||
|
||||
do {
|
||||
$html = $this->fetchHtml($currentPageUrl);
|
||||
$page = $this->extractMangaPageData($html);
|
||||
$pageData[] = $page;
|
||||
$currentPageUrl = $page['next_page_url'];
|
||||
|
||||
// Construisez le nom de fichier de l'image
|
||||
$imageName = sprintf('%03d.jpg', count($pageData));
|
||||
|
||||
// Construisez le chemin du fichier de l'image
|
||||
$imagePath = sprintf('%s/%s', $chapterDir, $imageName);
|
||||
|
||||
// Téléchargez et enregistrez l'image
|
||||
$this->downloadAndSaveImage($page['image_url'], $imagePath);
|
||||
|
||||
// Modifiez les données de la page pour inclure l'URL de l'image stockée localement
|
||||
$pageData[count($pageData) - 1]['local_image_url'] = sprintf('/manga-images/%s/%s/%s', $mangaTitle, $chapterNumber, $imageName);
|
||||
$pageData[count($pageData) - 1]['page_number'] = count($pageData);
|
||||
|
||||
} while ($currentPageUrl);
|
||||
|
||||
$event = new MangaScrapedEvent($mangaTitle, $chapterNumber, $pageData);
|
||||
$this->eventDispatcher->dispatch($event, MangaScrapedEvent::NAME);
|
||||
|
||||
return $pageData;
|
||||
}
|
||||
|
||||
/**
|
||||
* @throws GuzzleException
|
||||
*/
|
||||
private function fetchHtml(string $url): string
|
||||
{
|
||||
$client = new Client();
|
||||
$response = $client->get($url);
|
||||
|
||||
return (string) $response->getBody();
|
||||
}
|
||||
|
||||
/**
|
||||
* @throws GuzzleException
|
||||
*/
|
||||
private function downloadAndSaveImage(string $imageUrl, string $destinationPath): void
|
||||
{
|
||||
$client = new Client();
|
||||
$response = $client->get($imageUrl);
|
||||
|
||||
file_put_contents($destinationPath, $response->getBody()->getContents());
|
||||
}
|
||||
|
||||
/**
|
||||
* @throws GuzzleException
|
||||
*/
|
||||
private function isChapterAvailable(string $chapterUrl, float $chapterNumber): bool
|
||||
{
|
||||
$html = $this->fetchHtml($chapterUrl);
|
||||
$crawler = new Crawler($html);
|
||||
$nextLink = $crawler->filter('a[title="Suivant"]');
|
||||
|
||||
if($nextLink->count() === 0){
|
||||
return false;
|
||||
}else{
|
||||
$nextUrl = $nextLink->attr('href');
|
||||
}
|
||||
|
||||
$routeCollection = new RouteCollection();
|
||||
$routeCollection->add('manga_chapter', new Route('/scan-{manga}/{chapter}/{page}'));
|
||||
$context = new RequestContext('/');
|
||||
$matcher = new UrlMatcher($routeCollection, $context);
|
||||
$path = parse_url($nextUrl, PHP_URL_PATH);
|
||||
$parameters = $matcher->match($path);
|
||||
|
||||
if((float) $parameters['chapter'] !== $chapterNumber){
|
||||
return false;
|
||||
}
|
||||
|
||||
return true;
|
||||
}
|
||||
}
|
||||
@@ -3,20 +3,19 @@
|
||||
namespace App\Service;
|
||||
|
||||
use App\Entity\Manga;
|
||||
use App\Interface\MetadataProviderInterface;
|
||||
use Doctrine\Common\Collections\ArrayCollection;
|
||||
use Doctrine\Common\Collections\Collection;
|
||||
use Exception;
|
||||
use GuzzleHttp\Client;
|
||||
use GuzzleHttp\Exception\GuzzleException;
|
||||
use Symfony\Component\BrowserKit\HttpBrowser;
|
||||
use Symfony\Component\String\Slugger\SluggerInterface;
|
||||
use Symfony\Contracts\HttpClient\HttpClientInterface;
|
||||
|
||||
class MangaUpdatesDbProvider implements MangaDbProviderInterface
|
||||
class MangaUpdatesMetadataProvider implements MetadataProviderInterface
|
||||
{
|
||||
private Client $client;
|
||||
|
||||
public function __construct(private SluggerInterface $slugger)
|
||||
public function __construct(private readonly SluggerInterface $slugger)
|
||||
{
|
||||
$this->client = new Client();
|
||||
}
|
||||
@@ -40,6 +39,9 @@ class MangaUpdatesDbProvider implements MangaDbProviderInterface
|
||||
$results = $this->client->request('POST', 'https://api.mangaupdates.com/v1/series/search', [
|
||||
'json' => [
|
||||
'search' => $title,
|
||||
'licensed' => 'yes',
|
||||
'type' => ['Manga'],
|
||||
'exclude_genre' => ['Doujinshi', 'Adult', 'Hentai', 'Ecchi', 'Yaoi', 'Yuri', 'Josei', 'Smut', 'Gender Bender'],
|
||||
'orderby' => 'score',
|
||||
]
|
||||
])->withHeader('Authorization', 'Bearer ' . $jwt)
|
||||
@@ -50,13 +52,21 @@ class MangaUpdatesDbProvider implements MangaDbProviderInterface
|
||||
$mangas = [];
|
||||
foreach (json_decode($results, true)['results'] as $record) {
|
||||
$record = $record['record'];
|
||||
|
||||
$genres = [];
|
||||
foreach ($record['genres'] as $genre) {
|
||||
$genres[] = $genre['genre'];
|
||||
}
|
||||
|
||||
$mangas[] = (new Manga())
|
||||
->setTitle($record['title'])
|
||||
->setSlug($this->slugger->slug($record['title'])->lower())
|
||||
->setDescription($record['description'])
|
||||
->setImageUrl($record['image']['url']['original'])
|
||||
->setGenres($record['genres'])
|
||||
->setPublicationYear((int)$record['year']);
|
||||
->setGenres($genres)
|
||||
->setPublicationYear((int)$record['year'])
|
||||
->setRating((float)$record['bayesian_rating'])
|
||||
;
|
||||
}
|
||||
|
||||
return new ArrayCollection($mangas);
|
||||
123
src/Service/MangadexProvider.php
Normal file
123
src/Service/MangadexProvider.php
Normal file
@@ -0,0 +1,123 @@
|
||||
<?php
|
||||
|
||||
namespace App\Service;
|
||||
|
||||
use App\Entity\Chapter;
|
||||
use App\Entity\Manga;
|
||||
use App\Interface\ClientInterface;
|
||||
use App\Interface\MetadataProviderInterface;
|
||||
use Doctrine\Common\Collections\ArrayCollection;
|
||||
use Doctrine\Common\Collections\Collection;
|
||||
use Symfony\Component\String\Slugger\SluggerInterface;
|
||||
|
||||
readonly class MangadexProvider implements MetadataProviderInterface
|
||||
{
|
||||
public function __construct(private ClientInterface $client, private SluggerInterface $slugger)
|
||||
{
|
||||
}
|
||||
|
||||
public function search(?string $title): Collection
|
||||
{
|
||||
if($title === null) {
|
||||
return new ArrayCollection();
|
||||
}
|
||||
|
||||
$results = $this->client->get('/manga', [
|
||||
'title' => $title,
|
||||
'contentRating' => ['safe'],
|
||||
'includes' => ['cover_art', 'author']
|
||||
]);
|
||||
|
||||
$mangas = [];
|
||||
foreach ($results['data'] as $result) {
|
||||
$mangas[] = (new Manga())
|
||||
->setExternalId($result['id'])
|
||||
->setTitle($result['attributes']['title']['en'])
|
||||
->setSlug($this->slugger->slug($result['attributes']['title']['en'])->lower())
|
||||
->setDescription($result['attributes']['description']['fr'] ?? $result['attributes']['description']['en'] ?? '')
|
||||
->setPublicationYear($result['attributes']['year'])
|
||||
;
|
||||
$tags = [];
|
||||
foreach($result['attributes']['tags'] as $tag){
|
||||
$tags[] = $tag['attributes']['name']['en'];
|
||||
}
|
||||
|
||||
$mangas[count($mangas) - 1]->setGenres($tags);
|
||||
|
||||
foreach($result['relationships'] as $relationship) {
|
||||
if($relationship['type'] === 'author') {
|
||||
$mangas[count($mangas) - 1]->setAuthor($relationship['attributes']['name']);
|
||||
}
|
||||
|
||||
if($relationship['type'] === 'cover_art') {
|
||||
$mangas[count($mangas) - 1]->setImageUrl('https://mangadex.org/covers/' . $result['id'] . '/' .$relationship['attributes']['fileName']);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
$test = array_map(fn($manga) => $manga->getExternalId(), $mangas);
|
||||
|
||||
$ratings = $this->client->get('/statistics/manga', [
|
||||
'manga' => $test
|
||||
]);
|
||||
|
||||
foreach($mangas as $manga) {
|
||||
$manga->setRating($ratings['statistics'][$manga->getExternalId()]['rating']['average']);
|
||||
}
|
||||
|
||||
return new ArrayCollection($mangas);
|
||||
}
|
||||
|
||||
public function getFeed(Manga $manga): Manga
|
||||
{
|
||||
if($manga->getExternalId() === null) {
|
||||
return $manga;
|
||||
}
|
||||
|
||||
$chapters = [];
|
||||
$page = 0;
|
||||
|
||||
do {
|
||||
$results = $this->getFeedWithPagination($manga->getExternalId(), $page);
|
||||
if (isset($results['data'])) {
|
||||
$chapters = array_merge($chapters, $results['data']);
|
||||
} else {
|
||||
break;
|
||||
}
|
||||
$page++;
|
||||
} while (count($chapters) < $results['total']);
|
||||
|
||||
foreach($chapters as $result) {
|
||||
$chapterNumber = (float)$result['attributes']['chapter'];
|
||||
|
||||
// Utilisez la méthode exists de Doctrine pour vérifier si un chapitre avec le même numéro existe déjà
|
||||
$chapterExists = $manga->getChapters()->exists(function($key, $existingChapter) use ($chapterNumber) {
|
||||
return $existingChapter->getNumber() === $chapterNumber;
|
||||
});
|
||||
|
||||
// Si le chapitre existe déjà, on skip
|
||||
if ($chapterExists) {
|
||||
continue;
|
||||
}
|
||||
|
||||
// Créez et ajoutez le nouveau chapitre
|
||||
$chapter = new Chapter();
|
||||
$chapter->setNumber($chapterNumber)
|
||||
->setTitle($result['attributes']['title'])
|
||||
->setVolume((int)$result['attributes']['volume'] ?? null);
|
||||
|
||||
$manga->addChapter($chapter);
|
||||
}
|
||||
|
||||
return $manga;
|
||||
}
|
||||
|
||||
private function getFeedWithPagination(string $externalId, int $page){
|
||||
return $this->client->get('/manga/' . $externalId . '/feed', [
|
||||
'limit' => 500,
|
||||
'translatedLanguage' =>['en'],
|
||||
'order' => ['chapter' => 'asc'],
|
||||
'offset' => $page * 500
|
||||
]);
|
||||
}
|
||||
}
|
||||
@@ -2,33 +2,72 @@
|
||||
|
||||
namespace App\Service;
|
||||
|
||||
use Goutte\Client;
|
||||
use App\Entity\Manga;
|
||||
use App\Interface\ContentProviderInterface;
|
||||
use Symfony\Component\BrowserKit\HttpBrowser;
|
||||
use Symfony\Component\BrowserKit\HttpBrowser as Client;
|
||||
//use GuzzleHttp\Client;
|
||||
use GuzzleHttp\Exception\GuzzleException;
|
||||
use Symfony\Component\DomCrawler\Crawler;
|
||||
use Symfony\Component\HttpClient\HttpClient;
|
||||
|
||||
class SushiScanProviderService implements MangaProviderInterface
|
||||
class SushiScanProviderService
|
||||
{
|
||||
const PROVIDER_URL = 'https://sushiscan.com/';
|
||||
const MANGA_SLUG = '/{manga}/{chapter}/{page}';
|
||||
private Client $client;
|
||||
const PROVIDER_URL = 'https://sushiscan.net/catalogue/';
|
||||
const MANGA_SLUG = '/{manga}/{chapter}/{page}';
|
||||
|
||||
public function __construct()
|
||||
{
|
||||
$this->client = new Client();
|
||||
}
|
||||
const CONTENT_TYPE = ['volume', 'chapitre'];
|
||||
private Client $client;
|
||||
|
||||
/**
|
||||
* @return array
|
||||
*/
|
||||
public function getMangaList(): array
|
||||
{
|
||||
// TODO: Implement getMangaList() method.
|
||||
}
|
||||
public function __construct()
|
||||
{
|
||||
$httpClient = HttpClient::create(['timeout' => 60]);
|
||||
$this->client = new HttpBrowser($httpClient);
|
||||
}
|
||||
|
||||
/**
|
||||
* @param string $mangaSlug
|
||||
* @return array
|
||||
*/
|
||||
public function getChapterList(string $mangaSlug): array
|
||||
{
|
||||
// TODO: Implement getChapterList() method.
|
||||
}
|
||||
}
|
||||
public function getAvailableContent(Manga $manga)
|
||||
{
|
||||
$url = 'http://flaresolverr:8191/v1';
|
||||
$jsonContent = json_encode([
|
||||
'cmd' => 'request.get',
|
||||
'url' => self::PROVIDER_URL . $manga->getSlug(),
|
||||
'maxTimeout' => 90000,
|
||||
]);
|
||||
|
||||
|
||||
try{
|
||||
$crawler = $this->client->request('POST', $url, [], [], [
|
||||
'HTTP_CONTENT_TYPE' => 'application/json',
|
||||
], $jsonContent);
|
||||
|
||||
}catch (\Exception $e) {
|
||||
dd($e);
|
||||
}
|
||||
$contentList = [];
|
||||
|
||||
dd($crawler);
|
||||
|
||||
$crawler->filter('#chapterList ul > li')->each(function (Crawler $node) use (&$contentList) {
|
||||
dump($node);
|
||||
// $contentName = $node->text();
|
||||
// $contentUrl = $node->attr('href');
|
||||
// if ($contentName && $contentUrl) {
|
||||
// $contentList[] = [
|
||||
// 'name' => $contentName,
|
||||
// 'url' => $contentUrl,
|
||||
// ];
|
||||
// }
|
||||
});
|
||||
|
||||
return $contentList;
|
||||
}
|
||||
|
||||
/**
|
||||
* @param string $mangaSlug
|
||||
* @return array
|
||||
*/
|
||||
public function getChapterList(string $mangaSlug): array
|
||||
{
|
||||
// TODO: Implement getChapterList() method.
|
||||
}
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user