- Refactor MangaScraperService (not used everywhere now)
- Added JavascriptScraper.php
- Added alternatives slugs in Manga.php
- Improvement in manga edit form
This commit is contained in:
Jérémy Guillot
2024-07-21 19:08:46 +02:00
parent ff59aa5d77
commit fafff5014c
21 changed files with 1180 additions and 28 deletions

View File

@@ -0,0 +1,26 @@
import {Controller} from '@hotwired/stimulus';
/*
* The following line makes this controller "lazy": it won't be downloaded until needed
* See https://github.com/symfony/stimulus-bridge#lazy-controllers
*/
/* stimulusFetch: 'lazy' */
export default class extends Controller {
static targets = ['container', 'template', 'item'];
connect() {
this.index = this.itemTargets.length;
}
add(event) {
event.preventDefault();
const template = this.templateTarget.innerHTML.replace(/__name__/g, this.index);
this.containerTarget.insertAdjacentHTML('beforeend', template);
this.index++;
}
remove(event) {
event.preventDefault();
event.target.closest('.collection-item').remove();
}
}

View File

@@ -76,3 +76,29 @@ services:
App\Service\MangadexProvider:
arguments:
$client: '@App\Client\MangadexClient'
# Scrapers
App\Service\Scraper\HtmlScraper:
arguments:
$projectDir: '%kernel.project_dir%'
tags: [ 'app.scraper' ]
App\Service\Scraper\JavascriptScraper:
arguments:
$projectDir: '%kernel.project_dir%'
tags: [ 'app.scraper' ]
App\Service\Scraper\MangadexScraper:
arguments:
$projectDir: '%kernel.project_dir%'
tags: [ 'app.scraper' ]
# Scraper Factory
App\Service\Scraper\ScraperFactory:
arguments:
$scrapers: !tagged_iterator app.scraper
# Manga Scraper Service
App\Service\Scraper\MangaScraperService:
arguments:
$scraperFactory: '@App\Service\Scraper\ScraperFactory'

View File

@@ -0,0 +1,34 @@
<?php
declare(strict_types=1);
namespace DoctrineMigrations;
use Doctrine\DBAL\Schema\Schema;
use Doctrine\Migrations\AbstractMigration;
/**
* Auto-generated Migration: Please modify to your needs!
*/
final class Version20240721142304 extends AbstractMigration
{
public function getDescription(): string
{
return '';
}
public function up(Schema $schema): void
{
// this up() migration is auto-generated, please modify it to your needs
$this->addSql('ALTER TABLE content_source ADD chapter_selector VARCHAR(255) DEFAULT NULL');
$this->addSql('ALTER TABLE manga ALTER monitored DROP DEFAULT');
}
public function down(Schema $schema): void
{
// this down() migration is auto-generated, please modify it to your needs
$this->addSql('CREATE SCHEMA public');
$this->addSql('ALTER TABLE manga ALTER monitored SET DEFAULT false');
$this->addSql('ALTER TABLE content_source DROP chapter_selector');
}
}

View File

@@ -0,0 +1,32 @@
<?php
declare(strict_types=1);
namespace DoctrineMigrations;
use Doctrine\DBAL\Schema\Schema;
use Doctrine\Migrations\AbstractMigration;
/**
* Auto-generated Migration: Please modify to your needs!
*/
final class Version20240721145225 extends AbstractMigration
{
public function getDescription(): string
{
return '';
}
public function up(Schema $schema): void
{
// this up() migration is auto-generated, please modify it to your needs
$this->addSql('ALTER TABLE manga ADD alternative_slugs JSON DEFAULT NULL');
}
public function down(Schema $schema): void
{
// this down() migration is auto-generated, please modify it to your needs
$this->addSql('CREATE SCHEMA public');
$this->addSql('ALTER TABLE manga DROP alternative_slugs');
}
}

View File

@@ -4,6 +4,7 @@ namespace App\Controller;
use App\Entity\Chapter;
use App\Entity\Manga;
use App\Form\MangaEditType;
use App\Manager\Toolbar\Factory\ToolbarFactory;
use App\Message\DownloadChapter;
use App\Message\RefreshMetadata;
@@ -79,9 +80,12 @@ class MangaController extends AbstractController
throw new NotFoundHttpException("Le manga demandé n'existe pas.");
}
$form = $this->createForm(MangaEditType::class, $manga);
return $this->render('manga/show_chapters.html.twig', [
'manga' => $manga,
'toolbar' => $this->toolbarFactory->createToolbar('chapter_list', ['mangaId' => $manga->getId(), 'isMonitored' => (int) $manga->isMonitored()])->getGroups(),
'form' => $form->createView(),
]);
}
@@ -101,6 +105,25 @@ class MangaController extends AbstractController
}
}
#[Route('/manga/{id}/edit', name: 'app_manga_edit', methods: ['POST'])]
public function edit(Request $request, Manga $manga, EntityManagerInterface $entityManager): JsonResponse|Response
{
$form = $this->createForm(MangaEditType::class, $manga);
$form->handleRequest($request);
if ($form->isSubmitted() && $form->isValid()) {
$entityManager->flush();
return $this->redirectToRoute('app_manga_show', ['mangaSlug' => $manga->getSlug()]);
}
$errors = [];
foreach ($form->getErrors(true) as $error) {
$errors[] = $error->getMessage();
}
return new JsonResponse(['errors' => $errors], 400);
}
public function _chaptersByManga(int $id): Response
{

View File

@@ -5,8 +5,9 @@ namespace App\Controller;
use App\Entity\ContentSource;
use App\Form\ContentSourceType;
use App\Repository\ContentSourceRepository;
use App\Service\MangaScraperService;
use App\Service\NotificationService;
use App\Service\Scraper\MangaScraperService;
use Doctrine\ORM\EntityManagerInterface;
use GuzzleHttp\Exception\GuzzleException;
use Symfony\Bundle\FrameworkBundle\Controller\AbstractController;
@@ -97,7 +98,7 @@ class SettingsController extends AbstractController
$chapterNumber = $request->request->get('chapterNumber');
try {
$scrapedData = $this->mangaScraperService->testScrapingHtml($mangaSlug, $chapterNumber, $contentSource);
$scrapedData = $this->mangaScraperService->testScraping($mangaSlug, $chapterNumber, $contentSource);
}catch (\Exception $e){
$this->notificationService->sendUpdate(['status' => 'error', 'message' => $e->getMessage()]);
return new JsonResponse([

View File

@@ -33,6 +33,9 @@ class ContentSource
#[ORM\Column(length: 255)]
private ?string $scrapingType = null;
#[ORM\Column(length: 255, nullable: true)]
private ?string $ChapterSelector = null;
public function getId(): ?int
{
return $this->id;
@@ -103,4 +106,16 @@ class ContentSource
return $this;
}
public function getChapterSelector(): ?string
{
return $this->ChapterSelector;
}
public function setChapterSelector(?string $ChapterSelector): static
{
$this->ChapterSelector = $ChapterSelector;
return $this;
}
}

View File

@@ -59,6 +59,9 @@ class Manga
#[ORM\Column]
private ?bool $monitored = null;
#[ORM\Column(type: Types::JSON, nullable: true)]
private ?array $AlternativeSlugs = null;
public function __construct()
{
$this->chapters = new ArrayCollection();
@@ -265,4 +268,16 @@ class Manga
return $this;
}
public function getAlternativeSlugs(): ?array
{
return $this->AlternativeSlugs;
}
public function setAlternativeSlugs(?array $AlternativeSlugs): static
{
$this->AlternativeSlugs = $AlternativeSlugs;
return $this;
}
}

View File

@@ -28,6 +28,10 @@ class ContentSourceType extends AbstractType
'label' => 'Next Page Selector (let empty if vertical reader)',
'required' => false,
])
->add('ChapterSelector', TextType::class, [
'label' => 'Chapter Selector (required for Javascript scraping)',
'required' => false,
])
->add('scrapingType', ChoiceType::class, [
'label' => 'Scraping Type',
'choices' => [

View File

@@ -0,0 +1,95 @@
<?php
namespace App\Form;
use App\Entity\Manga;
use Symfony\Component\Form\AbstractType;
use Symfony\Component\Form\Extension\Core\Type\CollectionType;
use Symfony\Component\Form\Extension\Core\Type\DateTimeType;
use Symfony\Component\Form\Extension\Core\Type\NumberType;
use Symfony\Component\Form\Extension\Core\Type\TextareaType;
use Symfony\Component\Form\Extension\Core\Type\TextType;
use Symfony\Component\Form\FormBuilderInterface;
use Symfony\Component\Form\FormEvent;
use Symfony\Component\Form\FormEvents;
use Symfony\Component\OptionsResolver\OptionsResolver;
class MangaEditType extends AbstractType
{
public function buildForm(FormBuilderInterface $builder, array $options): void
{
$builder
->add('title', TextType::class, [
'label' => 'Titre',
'attr' => ['class' => 'w-full px-3 py-2 border border-gray-300 rounded-md focus:outline-none focus:ring-indigo-500 focus:border-indigo-500']
])
->add('slug', TextType::class, [
'label' => 'Slug',
'attr' => [
'readonly' => true,
'class' => 'bg-gray-100 w-full px-3 py-2 border border-gray-300 rounded-md focus:outline-none focus:ring-indigo-500 focus:border-indigo-500'
],
])
->add('alternativeSlugs', CollectionType::class, [
'entry_type' => TextType::class,
'allow_add' => true,
'allow_delete' => true,
'by_reference' => false,
'label' => false,
'prototype' => true,
'entry_options' => ['attr' => ['class' => 'w-full px-3 py-2 border border-gray-300 rounded-md focus:outline-none focus:ring-indigo-500 focus:border-indigo-500'], 'label' => false],
'required' => false,
])
->add('publicationYear', NumberType::class, [
'label' => 'Année de publication',
'attr' => ['class' => 'w-full px-3 py-2 border border-gray-300 rounded-md focus:outline-none focus:ring-indigo-500 focus:border-indigo-500']
])
->add('description', TextareaType::class, [
'label' => 'Description',
'attr' => ['class' => 'w-full px-3 py-2 border border-gray-300 rounded-md focus:outline-none focus:ring-indigo-500 focus:border-indigo-500', 'rows' => 8]
])
->add('genres', CollectionType::class, [
'entry_type' => TextType::class,
'allow_add' => true,
'allow_delete' => true,
'by_reference' => false,
'label' => 'Genres',
'entry_options' => ['attr' => ['class' => 'w-full px-3 py-2 border border-gray-300 rounded-md focus:outline-none focus:ring-indigo-500 focus:border-indigo-500']],
'required' => false,
])
->add('rating', NumberType::class, [
'label' => 'Note',
'attr' => ['class' => 'w-full px-3 py-2 border border-gray-300 rounded-md focus:outline-none focus:ring-indigo-500 focus:border-indigo-500'],
'required' => false,
])
->add('author', TextType::class, [
'label' => 'Auteur',
'attr' => ['class' => 'w-full px-3 py-2 border border-gray-300 rounded-md focus:outline-none focus:ring-indigo-500 focus:border-indigo-500'],
'required' => false,
])
->add('status', TextType::class, [
'label' => 'Statut',
'attr' => ['class' => 'w-full px-3 py-2 border border-gray-300 rounded-md focus:outline-none focus:ring-indigo-500 focus:border-indigo-500'],
'required' => false,
])
;
$builder->addEventListener(FormEvents::PRE_SUBMIT, function (FormEvent $event) {
$data = $event->getData();
$manga = $event->getForm()->getData();
if ($manga && $manga->getSlug()) {
$data['slug'] = $manga->getSlug();
}
$event->setData($data);
});
}
public function configureOptions(OptionsResolver $resolver): void
{
$resolver->setDefaults([
'data_class' => Manga::class,
]);
}
}

View File

@@ -26,7 +26,7 @@ class ChapterUrlGenerator
private function validateUrlFormat(string $format): void
{
if (!str_contains($format, '{slug}') || !str_contains($format, '{chapterNumber}')) {
if (!str_contains($format, '{slug}')) {
throw new InvalidArgumentException("The URL format must contain both {slug} and {chapterNumber} placeholders.");
}
}

View File

@@ -6,8 +6,12 @@ use App\Entity\Chapter;
use App\Entity\Manga;
use App\Entity\ContentSource;
use App\Event\PageScrappingProgressEvent;
use App\Repository\ChapterRepository;
use App\Repository\MangaRepository;
use Doctrine\ORM\EntityManagerInterface;
use Exception;
use Facebook\WebDriver\Remote\RemoteWebElement;
use Facebook\WebDriver\WebDriverExpectedCondition;
use GuzzleHttp\Client;
use GuzzleHttp\Exception\GuzzleException;
use GuzzleHttp\Exception\RequestException;
@@ -18,6 +22,8 @@ use Symfony\Component\Routing\Route;
use Symfony\Component\Routing\RouteCollection;
use Symfony\Contracts\EventDispatcher\EventDispatcherInterface;
use Symfony\Component\Panther\Client as PantherClient;
class MangaScraperService
{
const string PUBLIC_CBZ = '/public/cbz';
@@ -25,7 +31,8 @@ class MangaScraperService
public function __construct(
private readonly string $projectDir,
private readonly EventDispatcherInterface $eventDispatcher,
private readonly EntityManagerInterface $entityManager
private readonly EntityManagerInterface $entityManager,
private readonly MangaRepository $mangaRepository,
)
{
}
@@ -140,14 +147,162 @@ class MangaScraperService
return true;
}
private function scrapeChapterJavaScript(Manga $manga, Chapter $chapter, ContentSource $mangaSource): array|bool
private function scrapeChapterJavascript(Manga $manga, Chapter $chapter, ContentSource $mangaSource): array|bool
{
$chapterUrl = $mangaSource->getChapterUrl($manga->getTitle(), $chapter->getNumber());
$imgUrls = $this->fetchImagesUsingPuppeteer($chapterUrl, $mangaSource->getImageSelector(), $mangaSource->getNextPageSelector());
$pantherClient = PantherClient::createChromeClient();
$chapterUrl = $mangaSource->getChapterUrl($manga->getSlug(), $chapter->getNumber());
$pantherClient->request('GET', $chapterUrl);
// Sélection du chapitre dans le menu déroulant
try {
$crawler = $pantherClient->waitFor('body');
$select = $crawler->filter('#selectChapitres');
if ($select->count() > 0) {
$chapterNumber = $chapter->getNumber();
$options = $select->filter('option');
$targetindex = null;
/** @var RemoteWebElement $option */
foreach ($options->getIterator() as $index => $option) {
$optionText = $option->getText();
// Recherche plus flexible du numéro de chapitre
if (preg_match("/\b{$chapterNumber}\b/", $optionText)) {
$targetIndex = $index;
break;
}
}
if ($targetIndex !== null) {
$pantherClient->executeScript("
var select = document.querySelector('#selectChapitres');
select.selectedIndex = $targetIndex;
select.dispatchEvent(new Event('change'));
");
// Attendre que la page se mette à jour après la sélection
$pantherClient->wait(60000)->until( // 60 secondes de timeout
function ($driver) {
return $driver->executeScript("
var scansPlacement = document.querySelector('#scansPlacement');
if (!scansPlacement) return false;
var lazyImages = scansPlacement.querySelectorAll('img.lazy');
var loadingGif = scansPlacement.querySelector('img[src*=\"loading_scans.gif\"]');
// Vérifier que toutes les images lazy sont chargées et que le GIF de chargement n'est plus présent
var allImagesLoaded = Array.from(lazyImages).every(img => img.complete && img.naturalWidth > 0);
return lazyImages.length > 0 && allImagesLoaded && !loadingGif;
");
}
);
} else {
throw new \Exception("Chapitre $chapterNumber non trouvé dans le menu déroulant");
}
}
} catch (\Exception $e) {
// $this->logger->warning('Erreur lors de la sélection du chapitre : ' . $e->getMessage());
$pantherClient->close();
return false;
}
$pageData = [];
try {
if ($mangaSource->getNextPageSelector() === null) {
// Lecteur vertical
$pageData = $this->scrapeVerticalReaderJavascript($pantherClient, $mangaSource, $chapter);
} else {
// Lecteur horizontal
$pageData = $this->scrapeHorizontalReaderJavascript($pantherClient, $mangaSource, $chapter);
}
} catch (\Exception $e) {
throw $e;
// $this->logger->warning('Erreur lors du scraping du chapitre ' . $chapter->getNumber() . ' du manga ' . $manga->getTitle() . ': ' . $e->getMessage());
} finally {
$pantherClient->close();
}
return $pageData;
}
private function scrapeVerticalReaderJavascript(PantherClient $pantherClient, ContentSource $mangaSource, Chapter $chapter): array
{
$pageData = [];
$pageNumber = 1;
$crawler = $pantherClient->waitFor($mangaSource->getImageSelector());
$images = $crawler->filter($mangaSource->getImageSelector());
foreach ($images->getIterator() as $image) {
$imageUrl = $image->getAttribute('src') ?: $image->getAttribute('data-src');
$pageData[] = [
'image_url' => $this->cleanImageUrl($imageUrl),
'page_number' => $pageNumber,
];
$event = new PageScrappingProgressEvent($chapter->getId(), $pageNumber, $images->count());
$this->eventDispatcher->dispatch($event, PageScrappingProgressEvent::NAME);
$pageNumber++;
}
return $pageData;
}
private function scrapeHorizontalReaderJavascript(PantherClient $pantherClient, ContentSource $mangaSource, Chapter $chapter): array
{
$pageData = [];
$pageNumber = 1;
while (true) {
try {
$crawler = $pantherClient->waitFor($mangaSource->getImageSelector());
$imageElement = $crawler->filter($mangaSource->getImageSelector())->first();
if ($imageElement->count() === 0) {
break; // Fin du chapitre
}
$imageUrl = $imageElement->attr('src') ?: $imageElement->attr('data-src');
$pageData[] = [
'image_url' => $this->cleanImageUrl($imageUrl),
'page_number' => $pageNumber,
];
$event = new PageScrappingProgressEvent($chapter->getId(), $pageNumber, 0);
$this->eventDispatcher->dispatch($event, PageScrappingProgressEvent::NAME);
// Passer à la page suivante
$nextButton = $pantherCrawler->filter($mangaSource->getNextPageSelector());
if ($nextButton->count() === 0) {
break; // Pas de bouton suivant, fin du chapitre
}
$nextButton->click();
// Attendre que la page change
$pantherClient->waitFor($mangaSource->getImageSelector(), 10);
// Mettre à jour le crawler avec le nouveau contenu de la page
$pantherCrawler = $pantherClient->refreshCrawler();
$pageNumber++;
} catch (\Exception $e) {
throw $e;
// $this->logger->warning('Erreur lors du scraping de la page ' . $pageNumber . ' du chapitre ' . $chapter->getNumber() . ': ' . $e->getMessage());
break;
}
}
return $pageData;
}
private function fetchImagesUsingPuppeteer(string $url, string $imageSelector, string $nextButtonSelector): array
{
// Appeler le script Puppeteer avec les paramètres nécessaires
@@ -162,6 +317,26 @@ class MangaScraperService
return json_decode(implode("", $output), true);
}
public function testScraping(string $mangaSlug, string $chapterNumber, ContentSource $contentSource): array
{
return match ($contentSource->getScrapingType()) {
'html' => $this->testScrapingHtml($mangaSlug, $chapterNumber, $contentSource),
'javascript' => $this->testScrapingJavascript($mangaSlug, $chapterNumber, $contentSource),
default => throw new Exception('Unsupported scraping type: ' . $contentSource->getScrapingType()),
};
}
/**
* @throws Exception
*/
public function testScrapingJavascript(string $mangaSlug, string $chapterNumber, ContentSource $contentSource): array
{
$manga = $this->mangaRepository->findOneBy(['slug' => $mangaSlug]);
$chapter = $manga->getChapterByNumber($chapterNumber);
return $this->scrapeChapterJavascript($manga, $chapter, $contentSource);
}
/**
* @throws GuzzleException
*/

View File

@@ -0,0 +1,110 @@
<?php
namespace App\Service\Scraper;
use App\Entity\Chapter;
use App\Entity\ContentSource;
use App\Entity\Manga;
use App\Event\PageScrappingProgressEvent;
use Doctrine\ORM\EntityManagerInterface;
use GuzzleHttp\Client;
use GuzzleHttp\Exception\GuzzleException;
use GuzzleHttp\Exception\RequestException;
use Symfony\Component\EventDispatcher\EventDispatcherInterface;
abstract class AbstractScraper implements ScraperInterface
{
const string PUBLIC_CBZ = '/public/cbz';
protected Client $httpClient;
public function __construct(
protected string $projectDir,
protected EventDispatcherInterface $eventDispatcher,
protected EntityManagerInterface $entityManager
)
{
$this->httpClient = new Client();
}
protected function getValidChapterUrl(ContentSource $contentSource, Manga $manga, float $chapterNumber): ?string
{
$slugs = array_merge([$manga->getSlug()], $manga->getAlternativeSlugs() ?? []);
foreach ($slugs as $slug) {
$url = $contentSource->getChapterUrl($slug, $chapterNumber);
if ($this->isChapterUrlValid($url)) {
return $url;
}
}
return null;
}
protected function isChapterUrlValid(string $url): bool
{
try {
$response = $this->httpClient->head($url);
return $response->getStatusCode() === 200;
} catch (RequestException $e) {
return false;
}
}
protected function generateCbzPath(Manga $manga, Chapter $chapter): string
{
$volumeDir = $this->createDirectories($manga, $chapter->getVolume());
$fileName = sprintf('%s_vol%d_ch%s.cbz',
$manga->getSlug(),
$chapter->getVolume(),
$chapter->getNumber()
);
return $volumeDir . '/' . $fileName;
}
protected function createCbzFile(string $tempDir, array $pageData, string $cbzFilePath): void
{
$zip = new \ZipArchive();
if ($zip->open($cbzFilePath, \ZipArchive::CREATE) === TRUE) {
foreach ($pageData as $page) {
$zip->addFile($page['local_image_url'], basename($page['local_image_url']));
}
$zip->close();
}
}
protected function cleanupTempFiles(string $directory): void
{
$files = glob($directory . '/*');
foreach ($files as $file) {
if (is_file($file)) {
unlink($file);
}
}
rmdir($directory);
}
protected function createDirectories(Manga $manga, int $volume): string
{
$mangaYear = $manga->getPublicationYear() ?? 'unknown';
$mangaDir = sprintf('%s/%s (%s)', $this->projectDir . self::PUBLIC_CBZ, ucfirst($manga->getSlug()), $mangaYear);
$volumeDir = sprintf('%s/volume_%d', $mangaDir, sprintf('%02d', $volume));
if (!is_dir($volumeDir)) {
mkdir($volumeDir, 0755, true);
}
return $volumeDir;
}
protected function cleanImageUrl(string $url): string
{
return preg_replace('/[\x00-\x1F\x7F]/', '', trim($url));
}
protected function dispatchProgressEvent(Chapter $chapter, int $currentPage, int $totalPages): void
{
$event = new PageScrappingProgressEvent($chapter->getId(), $currentPage, $totalPages);
$this->eventDispatcher->dispatch($event, PageScrappingProgressEvent::NAME);
}
}

View File

@@ -0,0 +1,197 @@
<?php
namespace App\Service\Scraper;
use App\Entity\Chapter;
use App\Entity\ContentSource;
use Doctrine\ORM\EntityManagerInterface;
use Exception;
use GuzzleHttp\Client;
use Symfony\Component\EventDispatcher\EventDispatcherInterface;
use Symfony\Component\DomCrawler\Crawler;
class HtmlScraper extends AbstractScraper
{
private Client $client;
public function __construct(
string $projectDir,
EventDispatcherInterface $eventDispatcher,
EntityManagerInterface $entityManager
) {
parent::__construct($projectDir, $eventDispatcher, $entityManager);
$this->client = new Client();
}
/**
* @throws Exception
*/
public function scrapeChapter(Chapter $chapter, ContentSource $contentSource): array|bool
{
$manga = $chapter->getManga();
$chapterUrl = $this->getValidChapterUrl($contentSource, $manga, $chapter->getNumber());
if (!$chapterUrl) {
throw new Exception("Aucune URL valide trouvée pour le chapitre {$chapter->getNumber()} du manga {$manga->getTitle()}");
}
$tempDir = sys_get_temp_dir() . '/' . uniqid('manga_scraper_');
mkdir($tempDir);
$pageData = [];
if ($contentSource->getNextPageSelector() === null) {
// Lecteur vertical
$html = $this->fetchHtml($chapterUrl);
$pageData = $this->scrapeVerticalReader($html, $contentSource);
} else {
// Lecteur horizontal (paginé)
$pageData = $this->scrapeHorizontalReader($chapterUrl, $contentSource);
}
// Télécharger et sauvegarder les images
foreach ($pageData as $index => &$page) {
$imageName = sprintf('%03d.%s', $index + 1, pathinfo(parse_url($page['image_url'], PHP_URL_PATH), PATHINFO_EXTENSION));
$imagePath = $tempDir . '/' . $imageName;
$this->downloadAndSaveImage($page['image_url'], $imagePath);
$this->dispatchProgressEvent($chapter, $index + 1, count($pageData));
$page['local_image_url'] = $imagePath;
}
$cbzFilePath = $this->generateCbzPath($manga, $chapter);
$this->createCbzFile($tempDir, $pageData, $cbzFilePath);
$chapter->setCbzPath($cbzFilePath);
$this->entityManager->persist($chapter);
$this->entityManager->flush();
// Nettoyage du répertoire temporaire
$this->cleanupTempFiles($tempDir);
return $pageData;
}
/**
* @throws Exception
*/
public function testScraping(string $mangaSlug, string $chapterNumber, ContentSource $contentSource): array
{
$chapterUrl = $contentSource->getChapterUrl($mangaSlug, $chapterNumber);
if (!$this->isChapterUrlValid($chapterUrl)) {
throw new \Exception("Invalid URL, check format and slug");
}
$html = $this->fetchHtml($chapterUrl);
if ($contentSource->getNextPageSelector() === null) {
return $this->scrapeVerticalReader($html, $contentSource);
} else {
return $this->scrapeHorizontalReader($chapterUrl, $contentSource);
}
}
public function supports(string $scrapingType): bool
{
return $scrapingType === 'html';
}
private function scrapeVerticalReader(string $html, ContentSource $contentSource): array
{
$crawler = new Crawler($html);
$images = $crawler->filter($contentSource->getImageSelector());
$pageData = [];
foreach ($images as $index => $image) {
$imgUrl = $image->getAttribute('src') ?: $image->getAttribute('data-src');
$pageData[] = [
'image_url' => $this->cleanImageUrl($imgUrl),
'page_number' => $index + 1,
];
}
return $pageData;
}
private function scrapeHorizontalReader(string $chapterUrl, ContentSource $contentSource): array
{
$pageData = [];
$currentPageUrl = $chapterUrl;
do {
$html = $this->fetchHtml($currentPageUrl);
$page = $this->extractMangaPageData($html, $contentSource);
$pageData[] = [
'image_url' => $this->cleanImageUrl($page['image_url']),
'page_number' => count($pageData) + 1,
];
$currentPageUrl = $page['next_page_url'];
} while ($currentPageUrl);
return $pageData;
}
private function fetchHtml(string $url): string
{
try {
$response = $this->client->get($url, [
'http_errors' => true,
'allow_redirects' => false
]);
$statusCode = $response->getStatusCode();
if ($statusCode >= 300 && $statusCode < 400 || $statusCode == 404) {
throw new Exception('Chapter Not Found at ' . $url);
}
return (string)$response->getBody();
} catch (Exception $e) {
throw new Exception('Bad Request: ' . $e->getMessage());
}
}
private function downloadAndSaveImage(string $imageUrl, string $destinationPath): void
{
try {
$response = $this->client->get($imageUrl);
$contentType = $response->getHeaderLine('Content-Type');
if (str_starts_with($contentType, 'image/')) {
file_put_contents($destinationPath, $response->getBody()->getContents());
} else {
throw new Exception('Le contenu récupéré n\'est pas une image. Type de contenu : ' . $contentType);
}
} catch (Exception $e) {
throw new Exception('Erreur lors de la récupération de l\'image : ' . $e->getMessage());
}
}
private function extractMangaPageData(string $html, ContentSource $mangaSource): array
{
$crawler = new Crawler($html);
$imgUrl = $crawler->filter($mangaSource->getImageSelector())->attr('src')
?? $crawler->filter($mangaSource->getImageSelector())->attr('data-src');
$nextLink = $crawler->filter($mangaSource->getNextPageSelector());
$nextUrl = $nextLink->count() > 0 ? $nextLink->attr('href') : null;
// Convert relative URLs to absolute URLs
if (!preg_match('/^https?:\/\//', $imgUrl)) {
$urlComponents = parse_url($mangaSource->getBaseUrl());
$scheme = $urlComponents['scheme'];
$host = $urlComponents['host'];
$imgUrl = $scheme . '://' . $host . '/' . ltrim($imgUrl, '/');
}
return [
'image_url' => $imgUrl,
'next_page_url' => $nextUrl,
];
}
}

View File

@@ -0,0 +1,188 @@
<?php
namespace App\Service\Scraper;
use App\Entity\Chapter;
use App\Entity\ContentSource;
use Exception;
use Symfony\Component\Panther\Client as PantherClient;
class JavascriptScraper extends AbstractScraper
{
public function scrapeChapter(Chapter $chapter, ContentSource $contentSource): array|bool
{
$manga = $chapter->getManga();
$pantherClient = PantherClient::createChromeClient();
$chapterUrl = $this->getValidChapterUrl($contentSource, $manga, $chapter->getNumber());
if (!$chapterUrl) {
throw new Exception("Aucune URL valide trouvée pour le chapitre {$chapter->getNumber()} du manga {$manga->getTitle()}");
}
$pantherClient->request('GET', $chapterUrl);
try {
$this->selectChapter($pantherClient, $chapter, $contentSource);
$pageData = $contentSource->getNextPageSelector() === null
? $this->scrapeVerticalReaderJavascript($pantherClient, $contentSource, $chapter)
: $this->scrapeHorizontalReaderJavascript($pantherClient, $contentSource, $chapter);
$tempDir = sys_get_temp_dir() . '/' . uniqid('manga_scraper_');
mkdir($tempDir);
// Télécharger et sauvegarder les images
foreach ($pageData as $index => &$page) {
$imageName = sprintf('%03d.%s', $index + 1, pathinfo(parse_url($page['image_url'], PHP_URL_PATH), PATHINFO_EXTENSION));
$imagePath = $tempDir . '/' . $imageName;
file_put_contents($imagePath, file_get_contents($page['image_url']));
$page['local_image_url'] = $imagePath;
}
$cbzFilePath = $this->generateCbzPath($manga, $chapter);
$this->createCbzFile($tempDir, $pageData, $cbzFilePath);
$chapter->setCbzPath($cbzFilePath);
$this->entityManager->persist($chapter);
$this->entityManager->flush();
$this->cleanupTempFiles($tempDir);
return $pageData;
} catch (Exception $e) {
// Log the error
return false;
} finally {
$pantherClient->close();
}
}
public function testScraping(string $mangaSlug, string $chapterNumber, ContentSource $contentSource): array
{
$chapterUrl = $contentSource->getChapterUrl($mangaSlug, $chapterNumber);
if (!$this->isChapterUrlValid($chapterUrl)) {
throw new \Exception("Invalid URL, check format and slug");
}
$pantherClient = PantherClient::createChromeClient();
$pantherClient->request('GET', $chapterUrl);
try {
$chapter = new Chapter();
$chapter->setNumber((float)$chapterNumber);
$this->selectChapter($pantherClient, $chapter, $contentSource);
return $contentSource->getNextPageSelector() === null
? $this->scrapeVerticalReaderJavascript($pantherClient, $contentSource, $chapter)
: $this->scrapeHorizontalReaderJavascript($pantherClient, $contentSource, $chapter);
} catch (Exception $e) {
throw $e;
} finally {
$pantherClient->close();
}
}
public function supports(string $scrapingType): bool
{
return $scrapingType === 'javascript';
}
private function selectChapter(PantherClient $pantherClient, Chapter $chapter, ContentSource $contentSource): void
{
$chapterSelector = $contentSource->getChapterSelector();
if (!$chapterSelector) {
return; // Si aucun sélecteur n'est défini, on ne fait rien
}
$crawler = $pantherClient->waitFor($chapterSelector);
$select = $crawler->filter($chapterSelector);
if ($select->count() > 0) {
$chapterNumber = $chapter->getNumber();
$options = $select->filter('option');
$targetIndex = null;
foreach ($options as $index => $option) {
if (preg_match("/\b{$chapterNumber}\b/", $option->getText())) {
$targetIndex = $index;
break;
}
}
if ($targetIndex !== null) {
$pantherClient->executeScript("
var select = document.querySelector('$chapterSelector');
select.selectedIndex = $targetIndex;
select.dispatchEvent(new Event('change'));
");
$this->waitForImagesLoaded($pantherClient, $contentSource);
} else {
throw new Exception("Chapitre $chapterNumber non trouvé dans le menu déroulant");
}
}
}
private function waitForImagesLoaded(PantherClient $pantherClient, ContentSource $contentSource): void
{
$imageSelector = $contentSource->getImageSelector();
$pantherClient->wait(30)->until(
function ($driver) use ($imageSelector) {
return $driver->executeScript("
return new Promise((resolve) => {
let lastImageCount = 0;
let stableCount = 0;
const stableThreshold = 10;
function checkImages() {
const images = document.querySelectorAll('$imageSelector');
const loadedImages = Array.from(images).filter(img => img.complete && img.naturalWidth > 0);
if (loadedImages.length === lastImageCount) {
stableCount++;
} else {
stableCount = 0;
lastImageCount = loadedImages.length;
}
if (stableCount >= stableThreshold) {
resolve(true);
} else {
setTimeout(checkImages, 200);
}
}
checkImages();
});
");
}
);
}
private function scrapeVerticalReaderJavascript(PantherClient $pantherClient, ContentSource $contentSource, Chapter $chapter): array
{
$pageData = [];
$crawler = $pantherClient->waitFor($contentSource->getImageSelector());
$images = $crawler->filter($contentSource->getImageSelector());
foreach ($images as $index => $image) {
$imageUrl = $image->getAttribute('src') ?: $image->getAttribute('data-src');
$pageData[] = [
'image_url' => $this->cleanImageUrl($imageUrl),
'page_number' => $index + 1,
];
}
return $pageData;
}
private function scrapeHorizontalReaderJavascript(PantherClient $pantherClient, ContentSource $contentSource, Chapter $chapter): array
{
$pageData = [];
return $pageData;
}
}

View File

@@ -0,0 +1,28 @@
<?php
namespace App\Service\Scraper;
use App\Entity\Chapter;
use App\Entity\ContentSource;
class MangaScraperService
{
private ScraperFactory $scraperFactory;
public function __construct(ScraperFactory $scraperFactory)
{
$this->scraperFactory = $scraperFactory;
}
public function scrapeChapter(Chapter $chapter, ContentSource $contentSource): array|bool
{
$scraper = $this->scraperFactory->createScraper($contentSource);
return $scraper->scrapeChapter($chapter, $contentSource);
}
public function testScraping(string $mangaSlug, string $chapterNumber, ContentSource $contentSource): array
{
$scraper = $this->scraperFactory->createScraper($contentSource);
return $scraper->testScraping($mangaSlug, $chapterNumber, $contentSource);
}
}

View File

@@ -0,0 +1,89 @@
<?php
namespace App\Service\Scraper;
use App\Entity\Chapter;
use App\Entity\ContentSource;
use Doctrine\ORM\EntityManagerInterface;
use GuzzleHttp\Client;
use Symfony\Component\EventDispatcher\EventDispatcherInterface;
class MangadexScraper extends AbstractScraper
{
private Client $client;
public function __construct(
string $projectDir,
EventDispatcherInterface $eventDispatcher,
EntityManagerInterface $entityManager
) {
parent::__construct($projectDir, $eventDispatcher, $entityManager);
$this->client = new Client();
}
public function scrapeChapter(Chapter $chapter, ContentSource $contentSource): array|bool
{
$chapterUrl = $contentSource->getBaseUrl() . sprintf($contentSource->getChapterUrlFormat(), $chapter->getExternalId());
$manga = $chapter->getManga();
$pageData = [];
try {
$response = $this->client->get($chapterUrl);
$results = json_decode($response->getBody()->getContents(), true);
if ($results['result'] !== 'ok' || count($results['chapter']['dataSaver']) === 0) {
throw new \Exception('Error while fetching chapter data from Mangadex ' . $manga->getTitle() . ' ' . $chapter->getNumber());
}
$tempDir = sys_get_temp_dir() . '/' . uniqid('manga_scraper_');
mkdir($tempDir);
foreach ($results['chapter']['dataSaver'] as $index => $page) {
$pageUrl = $results['baseUrl'] . '/data-saver/' . $results['chapter']['hash'] . '/' . $page;
$imagePath = $tempDir . '/' . sprintf('%03d.%s', $index + 1, pathinfo($page, PATHINFO_EXTENSION));
$this->downloadAndSaveImage($pageUrl, $imagePath);
$this->dispatchProgressEvent($chapter, $index + 1, count($results['chapter']['dataSaver']));
$pageData[] = [
'image_url' => $pageUrl,
'local_image_url' => $imagePath,
'page_number' => $index + 1,
];
}
$cbzFilePath = $this->generateCbzPath($manga, $chapter);
$this->createCbzFile($tempDir, $pageData, $cbzFilePath);
$chapter->setCbzPath($cbzFilePath);
$this->entityManager->persist($chapter);
$this->entityManager->flush();
$this->cleanupTempFiles($tempDir);
return $pageData;
} catch (\Exception $e) {
// Log the error
return false;
}
}
public function testScraping(string $mangaSlug, string $chapterNumber, ContentSource $contentSource): array
{
// For Mangadex, we need the chapter's external ID, which we don't have in this context.
// We could potentially fetch it first, but for simplicity, let's return an empty array.
return [];
}
public function supports(string $scrapingType): bool
{
return $scrapingType === 'mangadex';
}
private function downloadAndSaveImage(string $imageUrl, string $destinationPath): void
{
$response = $this->client->get($imageUrl);
file_put_contents($destinationPath, $response->getBody()->getContents());
}
}

View File

@@ -0,0 +1,25 @@
<?php
namespace App\Service\Scraper;
use App\Entity\ContentSource;
class ScraperFactory
{
private array $scrapers;
public function __construct(iterable $scrapers)
{
$this->scrapers = iterator_to_array($scrapers);
}
public function createScraper(ContentSource $contentSource): ScraperInterface
{
foreach ($this->scrapers as $scraper) {
if ($scraper->supports($contentSource->getScrapingType())) {
return $scraper;
}
}
throw new \InvalidArgumentException('Unsupported scraping type: ' . $contentSource->getScrapingType());
}
}

View File

@@ -0,0 +1,13 @@
<?php
namespace App\Service\Scraper;
use App\Entity\Chapter;
use App\Entity\ContentSource;
interface ScraperInterface
{
public function scrapeChapter(Chapter $chapter, ContentSource $contentSource): array|bool;
public function testScraping(string $mangaSlug, string $chapterNumber, ContentSource $contentSource): array;
public function supports(string $scrapingType): bool;
}

View File

@@ -15,7 +15,7 @@
<span class="hidden sm:inline-block sm:align-middle sm:h-screen" aria-hidden="true">&#8203;</span>
{# Modal panel #}
<div class="inline-block align-bottom bg-white rounded-sm text-left overflow-hidden shadow-xl transform transition-all sm:my-8 sm:align-middle sm:max-w-lg sm:w-full">
<div class="inline-block align-bottom bg-white rounded-sm text-left overflow-hidden shadow-xl transform transition-all sm:my-8 sm:align-middle {{ modalClass|default('sm:max-w-lg') }} sm:w-full">
<div class="bg-white px-4 pt-5 pb-4 sm:p-6 sm:pb-4">
<h3 class="text-lg leading-6 font-medium text-gray-900" id="modal-title">
{{ title }}

View File

@@ -55,33 +55,89 @@
openTrigger="openEditModal"
closeTrigger="closeEditModal"
title="Edit Manga"
modalClass="w-full max-w-4xl"
>
{% block content %}
<form id="editForm" method="post" action="">
<div class="mb-4">
<label for="title" class="block text-gray-700 text-sm font-bold mb-2">Title:</label>
<input type="text" id="title" name="title" value="{{ manga.title }}"
class="shadow appearance-none border rounded w-full py-2 px-3 text-gray-700 leading-tight focus:outline-none focus:shadow-outline">
{{ form_start(form, {'action': path('app_manga_edit', {'id': manga.id}), 'attr': {'id': 'editForm', 'data-turbo-form': 'true'}}) }}
{% do form.alternativeSlugs.setRendered() %}
<div class="space-y-4 overflow-y-auto px-4">
{{ form_row(form.title, {'label_attr': {'class': 'block text-sm font-medium text-gray-700'}, 'row_attr': {'class': 'mt-1'}}) }}
{{ form_row(form.slug, {
'label_attr': {'class': 'block text-sm font-medium text-gray-700'},
'row_attr': {'class': 'mt-1'},
'attr': {
'class': 'w-full px-3 py-2 bg-gray-100 border border-gray-300 rounded-md focus:outline-none text-gray-500',
'readonly': true
}
}) }}
{{ form_row(form.publicationYear, {'label_attr': {'class': 'block text-sm font-medium text-gray-700'}, 'row_attr': {'class': 'mt-1'}}) }}
{{ form_row(form.description, {'label_attr': {'class': 'block text-sm font-medium text-gray-700'}, 'row_attr': {'class': 'mt-1'}}) }}
{{ form_row(form.author, {'label_attr': {'class': 'block text-sm font-medium text-gray-700'}, 'row_attr': {'class': 'mt-1'}}) }}
{{ form_row(form.status, {'label_attr': {'class': 'block text-sm font-medium text-gray-700'}, 'row_attr': {'class': 'mt-1'}}) }}
{{ form_row(form.rating, {'label_attr': {'class': 'block text-sm font-medium text-gray-700'}, 'row_attr': {'class': 'mt-1'}}) }}
<div {{ stimulus_controller('collection') }}>
<label class="block text-sm font-medium text-gray-700">Slugs alternatifs</label>
<div data-collection-target="container" class="grid grid-cols-4 gap-2 mt-1">
{% for slug in form.alternativeSlugs %}
<div class="inline-flex items-center bg-gray-100 rounded-full px-3 py-1 text-sm collection-item">
{{ form_widget(slug, {'attr': {'class': 'bg-transparent border-none focus:outline-none focus:border-b focus:border-green-500 p-0 w-full'}}) }}
<button type="button" data-action="collection#remove" class="ml-2 text-gray-500 hover:text-green-500 flex-shrink-0">
<i class="fas fa-times"></i>
</button>
</div>
<div class="mb-4">
<label for="description"
class="block text-gray-700 text-sm font-bold mb-2">Description:</label>
<textarea id="description" name="description"
class="shadow appearance-none border rounded w-full py-2 px-3 text-gray-700 leading-tight focus:outline-none focus:shadow-outline"
rows="3">{{ manga.description }}</textarea>
{% endfor %}
</div>
{# Ajoutez d'autres champs selon vos besoins #}
</form>
<button type="button" data-action="collection#add" class="mt-2 text-sm text-green-500 hover:text-green-700">
+ Ajouter un slug alternatif
</button>
<template data-collection-target="template">
<div class="inline-flex items-center bg-gray-100 rounded-full px-3 py-1 text-sm collection-item">
{{ form_widget(form.alternativeSlugs.vars.prototype, {'attr': {'class': 'bg-transparent border-none focus:outline-none focus:border-b focus:border-green-500 p-0 w-full'}}) }}
<button type="button" data-action="collection#remove" class="ml-2 text-gray-500 hover:text-green-500 flex-shrink-0">
<i class="fas fa-times"></i>
</button>
</div>
</template>
</div>
<div {{ stimulus_controller('collection') }}>
<label class="block text-sm font-medium text-gray-700">{{ form_label(form.genres) }}</label>
<div data-collection-target="container" class="grid grid-cols-4 gap-2 mt-1">
{% for genre in form.genres %}
<div class="inline-flex items-center bg-gray-100 rounded-full px-3 py-1 text-sm collection-item">
{{ form_widget(genre, {'attr': {'class': 'bg-transparent border-none focus:outline-none focus:border-b focus:border-green-500 p-0 w-full'}}) }}
<button type="button" data-action="collection#remove" class="ml-2 text-gray-500 hover:text-green-500 flex-shrink-0">
<i class="fas fa-times"></i>
</button>
</div>
{% endfor %}
</div>
<button type="button" data-action="collection#add" class="mt-2 text-sm text-green-500 hover:text-green-700">
+ Ajouter un genre
</button>
<template data-collection-target="template">
<div class="inline-flex items-center bg-gray-100 rounded-full px-3 py-1 text-sm collection-item">
{{ form_widget(form.genres.vars.prototype, {'attr': {'class': 'bg-transparent border-none focus:outline-none focus:border-b focus:border-green-500 p-0 w-full'}}) }}
<button type="button" data-action="collection#remove" class="ml-2 text-gray-500 hover:text-green-500 flex-shrink-0">
<i class="fas fa-times"></i>
</button>
</div>
</template>
</div>
</div>
{{ form_end(form) }}
{% endblock %}
{% block footer %}
<button type="submit" form="editForm"
class="w-full inline-flex justify-center rounded-md border border-transparent shadow-sm px-4 py-2 bg-blue-600 text-base font-medium text-white hover:bg-blue-700 focus:outline-none focus:ring-2 focus:ring-offset-2 focus:ring-blue-500 sm:ml-3 sm:w-auto sm:text-sm">
Save
</button>
<button type="button" data-action="modal#close"
class="mt-3 w-full inline-flex justify-center rounded-md border border-gray-300 shadow-sm px-4 py-2 bg-white text-base font-medium text-gray-700 hover:bg-gray-50 focus:outline-none focus:ring-2 focus:ring-offset-2 focus:ring-indigo-500 sm:mt-0 sm:ml-3 sm:w-auto sm:text-sm">
Cancel
</button>
<button type="submit" form="editForm"
class="w-full inline-flex justify-center rounded-md border border-transparent shadow-sm px-4 py-2 bg-green-600 text-base font-medium text-white hover:bg-green-700 focus:outline-none focus:ring-2 focus:ring-offset-2 focus:ring-green-500 sm:ml-3 sm:w-auto sm:text-sm">
Save
</button>
{% endblock %}
</twig:Modal>