getManga(); $pantherClient = PantherClient::createChromeClient(); $chapterUrl = $this->getValidChapterUrl($contentSource, $manga, $chapter->getNumber()); if (!$chapterUrl) { throw new Exception("Aucune URL valide trouvée pour le chapitre {$chapter->getNumber()} du manga {$manga->getTitle()}"); } $pantherClient->request('GET', $chapterUrl); try { $this->selectChapter($pantherClient, $chapter, $contentSource); $pageData = $contentSource->getNextPageSelector() === null ? $this->scrapeVerticalReaderJavascript($pantherClient, $contentSource, $chapter) : $this->scrapeHorizontalReaderJavascript($pantherClient, $contentSource, $chapter); $tempDir = sys_get_temp_dir() . '/' . uniqid('manga_scraper_'); mkdir($tempDir); // Télécharger et sauvegarder les images foreach ($pageData as $index => &$page) { $imageName = sprintf('%03d.%s', $index + 1, pathinfo(parse_url($page['image_url'], PHP_URL_PATH), PATHINFO_EXTENSION)); $imagePath = $tempDir . '/' . $imageName; $this->downloadAndSaveImage($page['image_url'], $imagePath); $this->dispatchProgressEvent($chapter, $index + 1, count($pageData)); $page['local_image_url'] = $imagePath; } $cbzFilePath = $this->generateCbzPath($manga, $chapter); $this->createCbzFile($pageData, $cbzFilePath); $chapter->setCbzPath($cbzFilePath); $this->entityManager->persist($chapter); $this->entityManager->flush(); $this->cleanupTempFiles($tempDir); return $pageData; } finally { $pantherClient->close(); } } public function testScraping(string $mangaSlug, string $chapterNumber, ContentSource $contentSource): array { $chapterUrl = $contentSource->getChapterUrl($mangaSlug, $chapterNumber); if (!$this->isChapterUrlValid($chapterUrl)) { throw new \Exception("Invalid URL, check format and slug"); } $pantherClient = PantherClient::createChromeClient(); $pantherClient->request('GET', $chapterUrl); try { $chapter = new Chapter(); $chapter->setNumber((float)$chapterNumber); $this->selectChapter($pantherClient, $chapter, $contentSource); return $contentSource->getNextPageSelector() === null ? $this->scrapeVerticalReaderJavascript($pantherClient, $contentSource, $chapter) : $this->scrapeHorizontalReaderJavascript($pantherClient, $contentSource, $chapter); } catch (Exception $e) { throw $e; } finally { $pantherClient->close(); } } public function supports(string $scrapingType): bool { return $scrapingType === 'javascript'; } private function selectChapter(PantherClient $pantherClient, Chapter $chapter, ContentSource $contentSource): void { $chapterSelector = $contentSource->getChapterSelector(); if (!$chapterSelector) { return; } $crawler = $pantherClient->waitFor($chapterSelector); $select = $crawler->filter($chapterSelector); if ($select->count() > 0) { $chapterNumber = $chapter->getNumber(); $options = $select->filter('option'); $targetIndex = null; foreach ($options as $index => $option) { if (preg_match("/\b{$chapterNumber}\b/", $option->getText())) { $targetIndex = $index; break; } } if ($targetIndex !== null) { $pantherClient->executeScript(" var select = document.querySelector('$chapterSelector'); select.selectedIndex = $targetIndex; select.dispatchEvent(new Event('change')); "); $this->waitForImagesLoaded($pantherClient, $contentSource); } else { throw new Exception("Chapitre $chapterNumber non trouvé dans le menu déroulant"); } } } private function waitForImagesLoaded(PantherClient $pantherClient, ContentSource $contentSource): void { $imageSelector = $contentSource->getImageSelector(); $pantherClient->wait(30)->until( function ($driver) use ($imageSelector) { return $driver->executeScript(" return new Promise((resolve) => { let lastImageCount = 0; let stableCount = 0; const stableThreshold = 10; function checkImages() { const images = document.querySelectorAll('$imageSelector'); const loadedImages = Array.from(images).filter(img => img.complete && img.naturalWidth > 0); if (loadedImages.length === lastImageCount) { stableCount++; } else { stableCount = 0; lastImageCount = loadedImages.length; } if (stableCount >= stableThreshold) { resolve(true); } else { setTimeout(checkImages, 200); } } checkImages(); }); "); } ); } private function scrapeVerticalReaderJavascript(PantherClient $pantherClient, ContentSource $contentSource, Chapter $chapter): array { $pageData = []; $crawler = $pantherClient->waitFor($contentSource->getImageSelector()); $images = $crawler->filter($contentSource->getImageSelector()); foreach ($images as $index => $image) { $imageUrl = $image->getAttribute('src') ?: $image->getAttribute('data-src'); $pageData[] = [ 'image_url' => $this->cleanImageUrl($imageUrl), 'page_number' => $index + 1, ]; } return $pageData; } private function scrapeHorizontalReaderJavascript(PantherClient $pantherClient, ContentSource $contentSource, Chapter $chapter): array { $pageData = []; return $pageData; } }