- Gettings chapters from non En/Fr sources
- mercure fonctionne!
This commit is contained in:
Jérémy Guillot
2024-06-16 13:14:32 +02:00
parent bc85649789
commit 671551c7f8
11 changed files with 313 additions and 65 deletions

View File

@@ -44,7 +44,8 @@
"symfony/webpack-encore-bundle": "^2.1",
"symfony/yaml": "7.0.*",
"twig/extra-bundle": "^2.12|^3.0",
"twig/twig": "^2.12|^3.0"
"twig/twig": "^2.12|^3.0",
"ext-zip": "*"
},
"config": {
"allow-plugins": {

View File

@@ -0,0 +1,32 @@
<?php
declare(strict_types=1);
namespace DoctrineMigrations;
use Doctrine\DBAL\Schema\Schema;
use Doctrine\Migrations\AbstractMigration;
/**
* Auto-generated Migration: Please modify to your needs!
*/
final class Version20240613163400 extends AbstractMigration
{
public function getDescription(): string
{
return '';
}
public function up(Schema $schema): void
{
// this up() migration is auto-generated, please modify it to your needs
$this->addSql('ALTER TABLE chapter ADD external_id VARCHAR(255) DEFAULT NULL');
}
public function down(Schema $schema): void
{
// this down() migration is auto-generated, please modify it to your needs
$this->addSql('CREATE SCHEMA public');
$this->addSql('ALTER TABLE chapter DROP external_id');
}
}

View File

@@ -63,12 +63,24 @@ class MangaController extends AbstractController
foreach ($manga->getChapters() as $chapter) {
$volume = $chapter->getVolume() ?? 'Not Found';
$chaptersByVolume[$volume][] = $chapter;
usort($chaptersByVolume[$volume], function ($a, $b) {
return $a->getNumber() <=> $b->getNumber();
});
}
$chaptersByVolume = array_map('array_reverse', array_reverse($chaptersByVolume, true));
foreach ($chaptersByVolume as $volume => &$chapters) {
usort($chapters, function ($a, $b) {
return $b->getNumber() <=> $a->getNumber();
});
}
unset($chapters);
uksort($chaptersByVolume, function ($a, $b) {
if ($a == 0) {
return -1;
}
if ($b == 0) {
return 1;
}
return $b <=> $a;
});
return $this->render('manga/show_chapters.html.twig', [
'chapters_by_volume' => $chaptersByVolume,
@@ -125,7 +137,7 @@ class MangaController extends AbstractController
$chapter = $this->chapterRepository->find($id);
if (!$chapter) {
return new JsonResponse(['error' => 'Chapter Not Found.'], 400);
}elseif ($chapter->getLocalPath() !== null){
} elseif ($chapter->getLocalPath() !== null) {
return new JsonResponse(['error' => 'Chapter already scraped.'], 400);
}
@@ -134,10 +146,11 @@ class MangaController extends AbstractController
return new JsonResponse(['success' => 'Scrapping started...'], 200);
}
#[Route('/manga/{mangaSlug}/chapter/{chapterNumber}/download', name: 'download_chapter')]
public function downloadChapter(string $mangaSlug, float $chapterNumber): BinaryFileResponse
#[Route('/download-cbz/{chapterId}', name: 'download_cbz')]
public function downloadChapter(int $chapterId): BinaryFileResponse
{
$response = $this->mangaExportService->downloadCbz($this->slugToTitle($mangaSlug), $chapterNumber);
$chapter = $this->chapterRepository->find($chapterId);
$response = $this->mangaExportService->downloadCbz($chapter->getManga()->getTitle(), $chapter->getNumber());
if ($response === false) {
throw $this->createNotFoundException("Le chapitre demandé n'existe pas.");
@@ -147,7 +160,7 @@ class MangaController extends AbstractController
$response->headers->set('Content-Type', 'application/x-cbz');
$response->setContentDisposition(
ResponseHeaderBag::DISPOSITION_ATTACHMENT,
"{$mangaSlug}_{$chapterNumber}.cbz"
"{$chapter->getManga()->getSlug()}_{$chapter->getNumber()}.cbz"
);
return $response;

View File

@@ -37,6 +37,9 @@ class Chapter
#[ORM\Column(length: 255, nullable: true)]
private ?string $localPath = null;
#[ORM\Column(length: 255, nullable: true)]
private ?string $externalId = null;
public function __construct()
{
$this->pagesLink = new ArrayCollection();
@@ -162,4 +165,16 @@ class Chapter
return $this;
}
public function getExternalId(): ?string
{
return $this->externalId;
}
public function setExternalId(?string $externalId): static
{
$this->externalId = $externalId;
return $this;
}
}

View File

@@ -10,6 +10,7 @@ use App\Service\LelScansProviderService;
use App\Service\MangaScraperService;
use App\Service\NotificationService;
use Exception;
use GuzzleHttp\Exception\GuzzleException;
use Symfony\Component\HttpKernel\Exception\BadRequestHttpException;
use Symfony\Component\Messenger\Attribute\AsMessageHandler;
@@ -34,24 +35,52 @@ readonly class DownloadChapterHandler
if (!$chapter) {
$this->notificationService->sendUpdate('notification', ['status' => 'error', 'message' => 'Chapter not found.']);
throw new BadRequestHttpException('Chapter not found');
}elseif ($chapter->getLocalPath() !== null){
} elseif ($chapter->getLocalPath() !== null) {
$this->notificationService->sendUpdate('notification', ['status' => 'error', 'message' => 'Chapter already scraped.']);
throw new BadRequestHttpException('Chapter already downloaded');
}
$lelScanSource = new ContentSource();
$lelScanSource->setBaseUrl('https://lelscans.net')
$sources = [
(new ContentSource())
->setBaseUrl('https://lelscans.net')
->setImageSelector('#image img')
->setChapterUrlFormat('https://lelscans.net/scan-%s/%s')
->setNextPageSelector('a[title="Suivant"]')
->setScrapingType('html');
->setScrapingType('html'),
(new ContentSource())
->setBaseUrl('https://api.mangadex.org/')
->setImageSelector('img')
->setChapterUrlFormat('at-home/server/%s')
->setScrapingType('mangadex')
];
$scrapedSuccessfully = false;
foreach ($sources as $source) {
try {
$this->mangaScraperService->scrapeChapter($chapter, $lelScanSource);
$this->mangaScraperService->scrapeChapter($chapter, $source);
$scrapedSuccessfully = true;
break;
} catch (Exception $e) {
$this->notificationService->sendUpdate('notification', ['status' => 'error', 'message' => 'An error occurred while scraping the chapter.']);
throw new Exception('Error scraping chapter: ' . $e->getMessage());
$this->notificationService->sendUpdate('notification', [
'status' => 'warning',
'message' => 'An error occurred while scraping with source: ' . $source->getBaseUrl() . '. Trying next source...'
]);
} catch (GuzzleException $e) {
}
}
if (!$scrapedSuccessfully) {
$this->notificationService->sendUpdate('notification', [
'status' => 'error',
'message' => 'All sources failed to scrape the chapter ' . $chapter->getManga()->getTitle() . ' ' . $chapter->getNumber() . '.'
]);
throw new Exception('All sources failed to scrape the chapter ' . $chapter->getManga()->getTitle() . ' ' . $chapter->getNumber() . '.');
}
$this->notificationService->sendUpdate('notification', ['status' => 'success', 'message' => 'Chapter scraped successfully.']);
}
}

View File

@@ -6,6 +6,7 @@ use App\Entity\Chapter;
use App\Entity\Manga;
use App\Entity\ContentSource;
use App\EventSubscriber\MangaScrapedEvent;
use Exception;
use GuzzleHttp\Client;
use GuzzleHttp\Exception\GuzzleException;
use GuzzleHttp\Exception\RequestException;
@@ -21,9 +22,10 @@ use Symfony\Contracts\EventDispatcher\EventDispatcherInterface;
class MangaScraperService
{
const IMG_BASE_DIR = '/public/manga-images';
const string IMG_BASE_DIR = '/public/manga-images';
private string $projectDir;
private EventDispatcherInterface $eventDispatcher;
private string $scrapingType = '';
public function __construct($projectDir, EventDispatcherInterface $eventDispatcher)
{
@@ -77,19 +79,18 @@ class MangaScraperService
return $allChaptersData;
}
/**
* @throws GuzzleException
* @throws Exception
*/
public function scrapeChapter(Chapter $chapter, ContentSource $mangaSource): array|bool
{
switch ($mangaSource->getScrapingType()) {
case 'html':
return $this->scrapeChapterHtml($chapter->getManga(), $chapter, $mangaSource);
case 'javascript':
return $this->scrapeChapterJavaScript($chapter->getManga(), $chapter, $mangaSource);
// case 'api':
// // Implémentez la méthode de scraping par API si nécessaire
// return $this->scrapeChapterApi($manga, $chapter, $mangaSource);
default:
throw new \Exception('Unsupported scraping type: ' . $mangaSource->getScrapingType());
}
return match ($mangaSource->getScrapingType()) {
'html' => $this->scrapeChapterHtml($chapter->getManga(), $chapter, $mangaSource),
'javascript' => $this->scrapeChapterJavaScript($chapter->getManga(), $chapter, $mangaSource),
'mangadex' => $this->scrapeChapterMangadex($chapter, $mangaSource),
default => throw new Exception('Unsupported scraping type: ' . $mangaSource->getScrapingType()),
};
}
// private function scrapeChapterHtml(Manga $manga, Chapter $chapter, MangaSource $mangaSource): array|bool
@@ -101,6 +102,61 @@ class MangaScraperService
// return $this->saveChapterImages($manga, $chapter, $imgUrls);
// }
/**
* @throws GuzzleException
* @throws Exception
*/
private function scrapeChapterMangadex(Chapter $chapter, ContentSource $mangaSource): array|bool
{
$this->scrapingType = 'mangadex';
$client = new Client();
$chapterUrl = $mangaSource->getBaseUrl() . sprintf($mangaSource->getChapterUrlFormat(), $chapter->getExternalId());
$mangaTitle = $chapter->getManga()->getTitle();
$chapterNumber = $chapter->getNumber();
$pageData = [];
$response = $client->get($chapterUrl);
$results = json_decode($response->getBody()->getContents(), true);
$mangaDir = sprintf('%s/%s', $this->projectDir . self::IMG_BASE_DIR, $mangaTitle);
if (!is_dir($mangaDir)) {
mkdir($mangaDir, 0755, true);
}
$chapterDir = sprintf('%s/%s', $mangaDir, $chapterNumber);
if (!is_dir($chapterDir)) {
mkdir($chapterDir, 0755, true);
}
if(count($results['chapter']['dataSaver']) === 0){
throw new Exception('Error while fetching chapter data from Mangadex ' . $chapter->getManga()->getTitle() . ' ' . $chapter->getNumber());
}
if ($results['result'] === 'ok') {
foreach ($results['chapter']['dataSaver'] as $page) {
$pageUrl = $results['baseUrl'] . '/data-saver/' . $results['chapter']['hash'] . '/' . $page;
// Déterminer l'extension de l'image
$imageExtension = pathinfo(parse_url($pageUrl, PHP_URL_PATH), PATHINFO_EXTENSION);
// Construire le nom de fichier de l'image
$imageName = sprintf('%03d.%s', count($pageData) + 1, $imageExtension);
$imagePath = sprintf('%s/%s', $chapterDir, $imageName);
$this->downloadAndSaveImage($pageUrl, $imagePath);
$pageData[] = [
'image_url' => $pageUrl,
'local_image_url' => sprintf('/manga-images/%s/%s/%s', $mangaTitle, $chapterNumber, $imageName),
'page_number' => count($pageData) + 1,
];
}
}
$event = new MangaScrapedEvent($mangaTitle, $chapterNumber, $pageData, $chapterDir);
$this->eventDispatcher->dispatch($event, MangaScrapedEvent::NAME);
return $pageData;
}
private function scrapeChapterJavaScript(Manga $manga, Chapter $chapter, ContentSource $mangaSource): array|bool
{
$chapterUrl = $mangaSource->getChapterUrl($manga->getTitle(), $chapter->getNumber());
@@ -128,6 +184,7 @@ class MangaScraperService
*/
private function scrapeChapterHtml(Manga $manga, Chapter $chapter, ContentSource $mangaSource): array|bool
{
$this->scrapingType = 'html';
$chapterUrl = $mangaSource->getChapterUrl($manga->getSlug(), $chapter->getNumber());
$pageData = [];
@@ -175,6 +232,7 @@ class MangaScraperService
/**
* @throws GuzzleException
* @throws Exception
*/
private function fetchHtml(string $url): string
{
@@ -189,14 +247,14 @@ class MangaScraperService
$statusCode = $response->getStatusCode();
if ($statusCode >= 300 && $statusCode < 400) {
throw new NotFoundHttpException('Chapter Not Found at ' . $url);
throw new Exception('Chapter Not Found at ' . $url);
} elseif ($statusCode == 404) {
throw new NotFoundHttpException('Chapter Not Found at ' . $url);
throw new Exception('Chapter Not Found at ' . $url);
}
return (string)$response->getBody();
} catch (HttpException $e) {
throw new BadRequestHttpException('Bad Request: ' . $e->getMessage());
} catch (Exception $e) {
throw new Exception('Bad Request: ' . $e->getMessage());
}
}
@@ -206,9 +264,31 @@ class MangaScraperService
private function downloadAndSaveImage(string $imageUrl, string $destinationPath): void
{
$client = new Client();
$response = $client->get($imageUrl);
$startTime = microtime(true);
try {
$response = $client->get($imageUrl);
$endTime = microtime(true);
$contentType = $response->getHeaderLine('Content-Type');
$xCacheHeader = $response->getHeaderLine('X-Cache');
$isCached = str_starts_with($xCacheHeader, 'HIT');
$contentLength = $response->getHeaderLine('Content-Length');
if (str_starts_with($contentType, 'image/')) {
file_put_contents($destinationPath, $response->getBody()->getContents());
if ($this->scrapingType === 'mangadex') {
$this->sendReport($imageUrl, true, $isCached, (int)$contentLength, ($endTime - $startTime) * 1000);
}
} else {
if ($this->scrapingType === 'mangadex') {
$this->sendReport($imageUrl, false, $isCached, (int)$contentLength, ($endTime - $startTime) * 1000);
}
throw new \Exception('Le contenu récupéré n\'est pas une image. Type de contenu : ' . $contentType);
}
} catch
(RequestException $e) {
throw new \Exception('Erreur lors de la récupération de l\'image : ' . $e->getMessage());
}
}
private function saveChapterImages(Manga $manga, Chapter $chapter, array $imgUrls): array
@@ -269,4 +349,27 @@ class MangaScraperService
return (float)$parameters['chapter'] === $chapterNumber;
}
private function sendReport(string $imageUrl, bool $success, bool $cached, int $bytes, float $duration): void
{
$client = new Client();
try {
$client->post('https://api.mangadex.network/report', [
'headers' => [
'Content-Type' => 'application/json',
],
'json' => [
'url' => $imageUrl,
'success' => $success,
'cached' => $cached,
'bytes' => $bytes,
'duration' => $duration,
],
]);
} catch (RequestException $e) {
// Gérer les exceptions de requête pour le rapport
throw new \Exception('Erreur lors de l\'envoi du rapport : ' . $e->getMessage());
}
}
}

View File

@@ -69,13 +69,14 @@ readonly class MangadexProvider implements MetadataProviderInterface
return new ArrayCollection($mangas);
}
public function getFeed(Manga $manga): Manga
public function getFeed(Manga $manga): array
{
if($manga->getExternalId() === null) {
return $manga;
return [];
}
$chapters = [];
$chapterEntities = [];
$page = 0;
do {
@@ -105,21 +106,51 @@ readonly class MangadexProvider implements MetadataProviderInterface
$chapter = new Chapter();
$chapter->setNumber($chapterNumber)
->setTitle($result['attributes']['title'])
->setVolume((int)$result['attributes']['volume'] ?? null);
->setVolume((int)$result['attributes']['volume'] ?? null)
->setExternalId($result['id'])
;
$manga->addChapter($chapter);
$chapterEntities[] = $chapter;
// $manga->addChapter($chapter);
}
return $manga;
return $chapterEntities;
}
private function getFeedWithPagination(string $externalId, int $page): array
{
return $this->client->get('/manga/' . $externalId . '/feed', [
'limit' => 500,
'translatedLanguage' =>['en'],
'translatedLanguage' =>['en', 'fr'],
'order' => ['chapter' => 'asc'],
'offset' => $page * 500
]);
}
public function getMangaAggregate(Manga $manga): array
{
if($manga->getExternalId() === null) {
return [];
}
$response = $this->client->get('/manga/' . $manga->getExternalId() . '/aggregate');
$chapterEntities = [];
if($response['result'] === 'ok'){
foreach($response['volumes'] as $volume){
$volumeNumber = $volume['volume'] === 'none' ? 0 : (float) $volume['volume'];
foreach($volume['chapters'] as $chapter){
$chapterEntity = new Chapter();
$chapterEntity->setNumber((float) $chapter['chapter'])
->setTitle('Chapter ' . $chapter['chapter'])
->setVolume($volumeNumber)
->setExternalId('');
$chapterEntities[] = $chapterEntity;
// $manga->addChapter($chapterEntity);
}
}
}
return $chapterEntities;
}
}

View File

@@ -59,7 +59,31 @@ class NewMangaForm
->setRating($this->mangaData['rating'])
->setExternalId($this->mangaData['externalId']);
$mangadexProvider->getFeed($manga);
$mangaFeed = $mangadexProvider->getFeed($manga);
$mangaAggregate = $mangadexProvider->getMangaAggregate($manga);
$allChapters = array_merge($mangaFeed, $mangaAggregate);
$mergedChapters = [];
foreach ($allChapters as $chapter) {
$number = $chapter->getNumber();
if (isset($mergedChapters[$number])) {
$existingChapter = $mergedChapters[$number];
if (!empty($chapter->getExternalId()) ||
(empty($existingChapter->getExternalId()) && !strpos($chapter->getTitle(), 'Chapter ') == 0)) {
$mergedChapters[$number] = $chapter;
}
} else {
$mergedChapters[$number] = $chapter;
}
}
foreach($mergedChapters as $chapter) {
$manga->addChapter($chapter);
}
try {
foreach ($manga->getChapters() as $chapter) {
$entityManager->persist($chapter);

View File

@@ -19,7 +19,7 @@
style="width: 150px; height: 220px;"
>
<div class="ml-4">
<p>{{ manga.description }}</p>
<p>{{ manga.description|truncate(250) }}</p>
<p><strong>Année de publication:</strong> {{ manga.publicationYear }}</p>
<p><strong>Genres:</strong> {{ manga.genres|join(', ') }}</p>
<p><strong>Note:</strong> {{ manga.rating }}</p>

View File

@@ -3,7 +3,7 @@
{% block title %}{{ manga.title }} - Chapitre {{ chapter.number }}{% endblock %}
{% block body %}
<div class="container w-full ml-60 p-4">
<div class="w-full mx-auto p-4">
<h1 class="text-center text-3xl my-4">{{ manga.title }} - Chapitre {{ chapter.number }}</h1>
<div class="flex justify-center my-4">

View File

@@ -133,7 +133,7 @@
</span>
</button>
{% endif %}
<a href="#" class="text-gray-500 hover:text-green-500">
<a href="{{ path('download_cbz', {chapterId: chapter.id}) }}" class="text-gray-500 hover:text-green-500">
<i class="fas fa-download"></i>
</a>
{# <a href="#" class="text-gray-500 hover:text-green-500"> #}