- Portage des fonctionnalités de la branche main

- Ajout de node et npm dans la Dockerfile

- Ajout des Factories et Fixtures

- Ajout de npm-install dans Make install
This commit is contained in:
Jérémy Guillot
2024-06-03 19:41:24 +02:00
parent 41a1a8c44c
commit 291e85338a
53 changed files with 11825 additions and 18 deletions

View File

@@ -0,0 +1,145 @@
<?php
namespace App\Service;
use App\Event\MangaScrapedEvent;
use GuzzleHttp\Client;
use PHPUnit\Util\PHP\AbstractPhpProcess;
use Psr\Container\ContainerInterface;
use Symfony\Component\DomCrawler\Crawler;
use Symfony\Component\Routing\Matcher\UrlMatcher;
use Symfony\Component\Routing\RequestContext;
use Symfony\Component\Routing\Route;
use Symfony\Component\Routing\RouteCollection;
use Symfony\Contracts\EventDispatcher\EventDispatcherInterface;
class MangaScraperService
{
const IMG_BASE_DIR = '/public/manga-images';
private string $projectDir;
private EventDispatcherInterface $eventDispatcher;
public function __construct($projectDir, EventDispatcherInterface $eventDispatcher)
{
$this->projectDir = $projectDir;
$this->eventDispatcher = $eventDispatcher;
}
public function extractMangaPageData(string $html): array
{
$baseUrl = 'https://lelscans.net';
$crawler = new Crawler($html);
$imgUrl = $crawler->filter('img')->attr('src');
$nextLink = $crawler->filter('a[title="Suivant"]');
if (!preg_match('/^https?:\/\//', $imgUrl)) {
$urlComponents = parse_url($baseUrl);
$scheme = $urlComponents['scheme'];
$host = $urlComponents['host'];
// Construit l'URL absolue de l'image
$imgUrl = $scheme . '://' . $host . '/' . ltrim($imgUrl, '/');
}
if($nextLink->count() > 0){
$nextUrl = $nextLink->attr('href');
}else{
$nextUrl = null;
}
return [
'image_url' => $imgUrl,
'next_page_url' => $nextUrl,
];
}
public function scrapeMangaChapter(string $chapterUrl, string $mangaTitle, float $chapterNumber): array|bool
{
if(!$this->isChapterAvailable($chapterUrl, $chapterNumber)){
return false;
}
$pageData = [];
$currentPageUrl = $chapterUrl;
$mangaDir = sprintf('%s/%s', $this->projectDir . self::IMG_BASE_DIR, $mangaTitle);
if (!is_dir($mangaDir)) {
mkdir($mangaDir, 0755, true);
}
// Créez le dossier du chapitre s'il n'existe pas
$chapterDir = sprintf('%s/%s', $mangaDir, $chapterNumber);
if (!is_dir($chapterDir)) {
mkdir($chapterDir, 0755, true);
}
do {
$html = $this->fetchHtml($currentPageUrl);
$page = $this->extractMangaPageData($html);
$pageData[] = $page;
$currentPageUrl = $page['next_page_url'];
// Construisez le nom de fichier de l'image
$imageName = sprintf('%03d.jpg', count($pageData));
// Construisez le chemin du fichier de l'image
$imagePath = sprintf('%s/%s', $chapterDir, $imageName);
// Téléchargez et enregistrez l'image
$this->downloadAndSaveImage($page['image_url'], $imagePath);
// Modifiez les données de la page pour inclure l'URL de l'image stockée localement
$pageData[count($pageData) - 1]['local_image_url'] = sprintf('/manga-images/%s/%s/%s', $mangaTitle, $chapterNumber, $imageName);
$pageData[count($pageData) - 1]['page_number'] = count($pageData);
} while ($currentPageUrl);
$event = new MangaScrapedEvent($mangaTitle, $chapterNumber, $pageData);
$this->eventDispatcher->dispatch($event, MangaScrapedEvent::NAME);
return $pageData;
}
private function fetchHtml(string $url): string
{
$client = new Client();
$response = $client->get($url);
return (string) $response->getBody();
}
private function downloadAndSaveImage(string $imageUrl, string $destinationPath): void
{
$client = new Client();
$response = $client->get($imageUrl);
file_put_contents($destinationPath, $response->getBody()->getContents());
}
private function isChapterAvailable(string $chapterUrl, float $chapterNumber): bool
{
$html = $this->fetchHtml($chapterUrl);
$crawler = new Crawler($html);
$nextLink = $crawler->filter('a[title="Suivant"]');
if($nextLink->count() === 0){
return false;
}else{
$nextUrl = $nextLink->attr('href');
}
$routeCollection = new RouteCollection();
$routeCollection->add('manga_chapter', new Route('/scan-{manga}/{chapter}/{page}'));
$context = new RequestContext('/');
$matcher = new UrlMatcher($routeCollection, $context);
$path = parse_url($nextUrl, PHP_URL_PATH);
$parameters = $matcher->match($path);
if((float) $parameters['chapter'] !== $chapterNumber){
return false;
}
return true;
}
}