- Gettings chapters from non En/Fr sources
- mercure fonctionne!
This commit is contained in:
Jérémy Guillot
2024-06-16 13:14:32 +02:00
parent bc85649789
commit 671551c7f8
11 changed files with 313 additions and 65 deletions

View File

@@ -6,6 +6,7 @@ use App\Entity\Chapter;
use App\Entity\Manga;
use App\Entity\ContentSource;
use App\EventSubscriber\MangaScrapedEvent;
use Exception;
use GuzzleHttp\Client;
use GuzzleHttp\Exception\GuzzleException;
use GuzzleHttp\Exception\RequestException;
@@ -21,9 +22,10 @@ use Symfony\Contracts\EventDispatcher\EventDispatcherInterface;
class MangaScraperService
{
const IMG_BASE_DIR = '/public/manga-images';
const string IMG_BASE_DIR = '/public/manga-images';
private string $projectDir;
private EventDispatcherInterface $eventDispatcher;
private string $scrapingType = '';
public function __construct($projectDir, EventDispatcherInterface $eventDispatcher)
{
@@ -77,19 +79,18 @@ class MangaScraperService
return $allChaptersData;
}
/**
* @throws GuzzleException
* @throws Exception
*/
public function scrapeChapter(Chapter $chapter, ContentSource $mangaSource): array|bool
{
switch ($mangaSource->getScrapingType()) {
case 'html':
return $this->scrapeChapterHtml($chapter->getManga(), $chapter, $mangaSource);
case 'javascript':
return $this->scrapeChapterJavaScript($chapter->getManga(), $chapter, $mangaSource);
// case 'api':
// // Implémentez la méthode de scraping par API si nécessaire
// return $this->scrapeChapterApi($manga, $chapter, $mangaSource);
default:
throw new \Exception('Unsupported scraping type: ' . $mangaSource->getScrapingType());
}
return match ($mangaSource->getScrapingType()) {
'html' => $this->scrapeChapterHtml($chapter->getManga(), $chapter, $mangaSource),
'javascript' => $this->scrapeChapterJavaScript($chapter->getManga(), $chapter, $mangaSource),
'mangadex' => $this->scrapeChapterMangadex($chapter, $mangaSource),
default => throw new Exception('Unsupported scraping type: ' . $mangaSource->getScrapingType()),
};
}
// private function scrapeChapterHtml(Manga $manga, Chapter $chapter, MangaSource $mangaSource): array|bool
@@ -101,6 +102,61 @@ class MangaScraperService
// return $this->saveChapterImages($manga, $chapter, $imgUrls);
// }
/**
* @throws GuzzleException
* @throws Exception
*/
private function scrapeChapterMangadex(Chapter $chapter, ContentSource $mangaSource): array|bool
{
$this->scrapingType = 'mangadex';
$client = new Client();
$chapterUrl = $mangaSource->getBaseUrl() . sprintf($mangaSource->getChapterUrlFormat(), $chapter->getExternalId());
$mangaTitle = $chapter->getManga()->getTitle();
$chapterNumber = $chapter->getNumber();
$pageData = [];
$response = $client->get($chapterUrl);
$results = json_decode($response->getBody()->getContents(), true);
$mangaDir = sprintf('%s/%s', $this->projectDir . self::IMG_BASE_DIR, $mangaTitle);
if (!is_dir($mangaDir)) {
mkdir($mangaDir, 0755, true);
}
$chapterDir = sprintf('%s/%s', $mangaDir, $chapterNumber);
if (!is_dir($chapterDir)) {
mkdir($chapterDir, 0755, true);
}
if(count($results['chapter']['dataSaver']) === 0){
throw new Exception('Error while fetching chapter data from Mangadex ' . $chapter->getManga()->getTitle() . ' ' . $chapter->getNumber());
}
if ($results['result'] === 'ok') {
foreach ($results['chapter']['dataSaver'] as $page) {
$pageUrl = $results['baseUrl'] . '/data-saver/' . $results['chapter']['hash'] . '/' . $page;
// Déterminer l'extension de l'image
$imageExtension = pathinfo(parse_url($pageUrl, PHP_URL_PATH), PATHINFO_EXTENSION);
// Construire le nom de fichier de l'image
$imageName = sprintf('%03d.%s', count($pageData) + 1, $imageExtension);
$imagePath = sprintf('%s/%s', $chapterDir, $imageName);
$this->downloadAndSaveImage($pageUrl, $imagePath);
$pageData[] = [
'image_url' => $pageUrl,
'local_image_url' => sprintf('/manga-images/%s/%s/%s', $mangaTitle, $chapterNumber, $imageName),
'page_number' => count($pageData) + 1,
];
}
}
$event = new MangaScrapedEvent($mangaTitle, $chapterNumber, $pageData, $chapterDir);
$this->eventDispatcher->dispatch($event, MangaScrapedEvent::NAME);
return $pageData;
}
private function scrapeChapterJavaScript(Manga $manga, Chapter $chapter, ContentSource $mangaSource): array|bool
{
$chapterUrl = $mangaSource->getChapterUrl($manga->getTitle(), $chapter->getNumber());
@@ -128,6 +184,7 @@ class MangaScraperService
*/
private function scrapeChapterHtml(Manga $manga, Chapter $chapter, ContentSource $mangaSource): array|bool
{
$this->scrapingType = 'html';
$chapterUrl = $mangaSource->getChapterUrl($manga->getSlug(), $chapter->getNumber());
$pageData = [];
@@ -175,6 +232,7 @@ class MangaScraperService
/**
* @throws GuzzleException
* @throws Exception
*/
private function fetchHtml(string $url): string
{
@@ -189,14 +247,14 @@ class MangaScraperService
$statusCode = $response->getStatusCode();
if ($statusCode >= 300 && $statusCode < 400) {
throw new NotFoundHttpException('Chapter Not Found at ' . $url);
throw new Exception('Chapter Not Found at ' . $url);
} elseif ($statusCode == 404) {
throw new NotFoundHttpException('Chapter Not Found at ' . $url);
throw new Exception('Chapter Not Found at ' . $url);
}
return (string)$response->getBody();
} catch (HttpException $e) {
throw new BadRequestHttpException('Bad Request: ' . $e->getMessage());
} catch (Exception $e) {
throw new Exception('Bad Request: ' . $e->getMessage());
}
}
@@ -206,9 +264,31 @@ class MangaScraperService
private function downloadAndSaveImage(string $imageUrl, string $destinationPath): void
{
$client = new Client();
$response = $client->get($imageUrl);
$startTime = microtime(true);
file_put_contents($destinationPath, $response->getBody()->getContents());
try {
$response = $client->get($imageUrl);
$endTime = microtime(true);
$contentType = $response->getHeaderLine('Content-Type');
$xCacheHeader = $response->getHeaderLine('X-Cache');
$isCached = str_starts_with($xCacheHeader, 'HIT');
$contentLength = $response->getHeaderLine('Content-Length');
if (str_starts_with($contentType, 'image/')) {
file_put_contents($destinationPath, $response->getBody()->getContents());
if ($this->scrapingType === 'mangadex') {
$this->sendReport($imageUrl, true, $isCached, (int)$contentLength, ($endTime - $startTime) * 1000);
}
} else {
if ($this->scrapingType === 'mangadex') {
$this->sendReport($imageUrl, false, $isCached, (int)$contentLength, ($endTime - $startTime) * 1000);
}
throw new \Exception('Le contenu récupéré n\'est pas une image. Type de contenu : ' . $contentType);
}
} catch
(RequestException $e) {
throw new \Exception('Erreur lors de la récupération de l\'image : ' . $e->getMessage());
}
}
private function saveChapterImages(Manga $manga, Chapter $chapter, array $imgUrls): array
@@ -269,4 +349,27 @@ class MangaScraperService
return (float)$parameters['chapter'] === $chapterNumber;
}
private function sendReport(string $imageUrl, bool $success, bool $cached, int $bytes, float $duration): void
{
$client = new Client();
try {
$client->post('https://api.mangadex.network/report', [
'headers' => [
'Content-Type' => 'application/json',
],
'json' => [
'url' => $imageUrl,
'success' => $success,
'cached' => $cached,
'bytes' => $bytes,
'duration' => $duration,
],
]);
} catch (RequestException $e) {
// Gérer les exceptions de requête pour le rapport
throw new \Exception('Erreur lors de l\'envoi du rapport : ' . $e->getMessage());
}
}
}