feat: ajout d'une nouvelle infrastructure de scraping avec des scrapers pour HTML, HTML avancé et JavaScript, ainsi qu'une factory pour gérer leur création et leur sélection. Mise à jour des gestionnaires de commandes pour intégrer cette nouvelle architecture et améliorer la gestion des erreurs lors du scraping des chapitres.

2025-07-08 15:30:22 +02:00
parent cbb62989d4
commit b456f9304d
10 changed files with 1244 additions and 36 deletions
--- a/config/services.yaml
+++ b/config/services.yaml
@@ -93,6 +93,29 @@ services:
        arguments:
            $scraperFactory: '@App\Service\Scraper\ScraperFactory'

+    # New Scrapers Factory for Domain Layer
+    App\Domain\Scraping\Infrastructure\Service\ScraperFactory:
+        arguments:
+            $projectDir: '%kernel.project_dir%'
+
+    # Scraper Factory Interface alias
+    App\Domain\Scraping\Domain\Contract\Service\ScraperFactoryInterface:
+        alias: App\Domain\Scraping\Infrastructure\Service\ScraperFactory
+
+    # Test Scraper Configuration Handler
+    App\Domain\Scraping\Application\CommandHandler\TestScraperConfigurationHandler: ~
+
+    # JavaScript Scraper
+    App\Domain\Scraping\Infrastructure\Service\Scraper\JavaScriptScraper:
+        arguments:
+            $projectDir: '%kernel.project_dir%'
+
+    # Advanced HTML Scraper
+    App\Domain\Scraping\Infrastructure\Service\Scraper\AdvancedHtmlScraper: ~
+
+    # Scrape Chapter Handler
+    App\Domain\Scraping\Application\CommandHandler\ScrapeChapterHandler: ~
+
    App\Domain\Scraping\Infrastructure\CommandHandler\SymfonyScrapeChapterHandler:
        tags:
            - { name: messenger.message_handler, bus: command.bus }
--- a/public/puppeteer-scraper.js
+++ b/public/puppeteer-scraper.js
@@ -0,0 +1,520 @@
+const puppeteer = require('puppeteer');
+
+// Configuration par défaut
+const CONFIG = {
+    // Timeout en millisecondes
+    PAGE_TIMEOUT: 30000,
+    NAVIGATION_TIMEOUT: 10000,
+    SCROLL_DELAY: 100,
+    SCROLL_DISTANCE: 100,
+    // Timeout réduit pour la détection d'erreur
+    ERROR_DETECTION_TIMEOUT: 5000,
+
+    // User agents pour contourner la détection
+    USER_AGENTS: [
+        'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36',
+        'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36',
+        'Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36'
+    ],
+
+    // Arguments pour contourner la détection
+    BROWSER_ARGS: [
+        '--no-sandbox',
+        '--disable-setuid-sandbox',
+        '--disable-dev-shm-usage',
+        '--disable-accelerated-2d-canvas',
+        '--no-first-run',
+        '--no-zygote',
+        '--single-process',
+        '--disable-gpu',
+        '--disable-web-security',
+        '--disable-features=VizDisplayCompositor',
+        '--disable-blink-features=AutomationControlled'
+    ]
+};
+
+class ChapterNotFoundError extends Error {
+    constructor(chapterNumber) {
+        super(`Chapter ${chapterNumber} not found`);
+        this.name = 'ChapterNotFoundError';
+        this.chapterNumber = chapterNumber;
+    }
+}
+
+class PuppeteerScraper {
+    constructor(options = {}) {
+        this.options = this.parseArguments(options);
+        this.browser = null;
+        this.page = null;
+        this.imageUrls = new Set();
+        this.lastResponseStatus = null;
+        this.navigationError = null;
+    }
+
+    parseArguments(options) {
+        const args = process.argv.slice(2);
+        const parsed = { ...options };
+
+        args.forEach(arg => {
+            if (arg.startsWith('--')) {
+                const [key, value] = arg.substring(2).split('=');
+                parsed[key.replace(/-/g, '_')] = value === 'true' ? true : value === 'false' ? false : value;
+            }
+        });
+
+        return parsed;
+    }
+
+    async launch() {
+        // Essayer de trouver un exécutable Chrome/Chromium disponible
+        const possiblePaths = [
+            process.env.CHROME_BIN,
+            '/usr/bin/chromium',
+            '/usr/bin/chromium-browser',
+            '/usr/bin/google-chrome',
+            '/usr/bin/google-chrome-stable',
+            '/snap/bin/chromium'
+        ].filter(path => path); // Supprimer les valeurs nulles/undefined
+
+        let executablePath = null;
+
+        // Vérifier si on peut utiliser un des chemins
+        for (const path of possiblePaths) {
+            try {
+                const fs = require('fs');
+                if (fs.existsSync(path)) {
+                    executablePath = path;
+                    console.log(`Using Chrome at: ${path}`);
+                    break;
+                }
+            } catch (e) {
+                // Continuer avec le chemin suivant
+            }
+        }
+
+        // Si aucun exécutable trouvé, laisser Puppeteer utiliser celui installé via npm
+        this.browser = await puppeteer.launch({
+            headless: 'new',
+            executablePath: executablePath,
+            args: CONFIG.BROWSER_ARGS
+        });
+
+        this.page = await this.browser.newPage();
+
+        // Configuration anti-détection
+        await this.setupAntiDetection();
+
+        console.log('Browser launched and configured');
+    }
+
+    async setupAntiDetection() {
+        // Rotation des User-Agent
+        const userAgent = CONFIG.USER_AGENTS[Math.floor(Math.random() * CONFIG.USER_AGENTS.length)];
+        await this.page.setUserAgent(userAgent);
+
+        // Écouter les réponses pour détecter rapidement les erreurs HTTP
+        this.page.on('response', (response) => {
+            // Ne surveiller que les réponses de navigation principales
+            if (response.request().isNavigationRequest()) {
+                this.lastResponseStatus = response.status();
+
+                if (response.status() >= 400) {
+                    this.navigationError = {
+                        status: response.status(),
+                        statusText: response.statusText(),
+                        url: response.url()
+                    };
+                    console.log(`❌ HTTP Error ${response.status()} detected for: ${response.url()}`);
+                }
+            }
+        });
+
+        // Désactiver seulement les fonts et certains styles pour optimiser
+        await this.page.setRequestInterception(true);
+        this.page.on('request', (request) => {
+            if (['font'].includes(request.resourceType())) {
+                request.abort();
+            } else {
+                request.continue();
+            }
+        });
+
+        // Masquer les propriétés de détection de Puppeteer
+        await this.page.evaluateOnNewDocument(() => {
+            Object.defineProperty(navigator, 'webdriver', { get: () => undefined });
+            Object.defineProperty(navigator, 'plugins', { get: () => [1, 2, 3, 4, 5] });
+            Object.defineProperty(navigator, 'languages', { get: () => ['en-US', 'en'] });
+            window.chrome = { runtime: {} };
+        });
+
+        // Viewport aléatoire
+        await this.page.setViewport({
+            width: 1366 + Math.floor(Math.random() * 200),
+            height: 768 + Math.floor(Math.random() * 200)
+        });
+    }
+
+    async navigateToPage(url, useReducedTimeout = false) {
+        // Reset des variables de détection d'erreur
+        this.lastResponseStatus = null;
+        this.navigationError = null;
+
+        const timeout = useReducedTimeout ? CONFIG.ERROR_DETECTION_TIMEOUT : CONFIG.PAGE_TIMEOUT;
+
+        try {
+            console.log(`🌐 Navigating to: ${url}`);
+
+            const response = await this.page.goto(url, {
+                waitUntil: 'domcontentloaded', // Plus rapide que networkidle2 pour la détection d'erreur
+                timeout: timeout
+            });
+
+            // Vérifier immédiatement le code de statut de la réponse
+            if (response && response.status() >= 400) {
+                throw new Error(`HTTP ${response.status()}: ${response.statusText()} for URL: ${url}`);
+            }
+
+            // Si pas d'erreur immédiate, attendre que le contenu se charge complètement
+            if (!this.navigationError) {
+                // Attendre un court délai pour permettre aux ressources de se charger
+                await new Promise(resolve => setTimeout(resolve, 1000));
+            } else {
+                throw new Error(`Navigation error: ${this.navigationError.status} ${this.navigationError.statusText}`);
+            }
+
+            console.log(`✅ Successfully loaded: ${url}`);
+            return response;
+
+        } catch (error) {
+            // Distinguer les erreurs de timeout des erreurs HTTP
+            if (error.message.includes('HTTP 4') || error.message.includes('HTTP 5')) {
+                console.log(`🚫 HTTP Error detected quickly: ${error.message}`);
+                throw error;
+            } else if (error.message.includes('timeout')) {
+                console.log(`⏱️ Navigation timeout for: ${url}`);
+                throw new Error(`Navigation timeout after ${timeout}ms for URL: ${url}`);
+            } else {
+                console.log(`❌ Navigation error: ${error.message}`);
+                throw error;
+            }
+        }
+    }
+
+    async navigateToPageWithFallback(url) {
+        try {
+            // Première tentative avec timeout réduit pour détection rapide d'erreur
+            return await this.navigateToPage(url, true);
+        } catch (error) {
+            if (error.message.includes('HTTP 4') || error.message.includes('HTTP 5')) {
+                // Erreur HTTP confirmée, ne pas réessayer
+                throw error;
+            }
+
+            // Si c'est un timeout, réessayer avec timeout complet
+            console.log(`🔄 Quick check failed, retrying with full timeout...`);
+            return await this.navigateToPage(url, false);
+        }
+    }
+
+    async selectChapter(chapterSelector, chapterNumber) {
+        try {
+            console.log(`📚 Looking for chapter selector: ${chapterSelector}`);
+
+            // Attendre que le sélecteur soit présent
+            await this.page.waitForSelector(chapterSelector, { timeout: CONFIG.NAVIGATION_TIMEOUT });
+
+            // Lister toutes les options disponibles
+            const options = await this.page.$$eval(chapterSelector + ' option', opts =>
+                opts.map(opt => ({
+                    value: opt.value,
+                    text: opt.textContent.trim(),
+                    selected: opt.selected
+                }))
+            );
+
+            console.log(`📖 Found ${options.length} chapter options`);
+
+            // Chercher l'option correspondant au chapitre demandé
+            const targetOption = options.find(opt => {
+                const text = opt.text.toLowerCase();
+                const chapterStr = chapterNumber.toString();
+
+                return text.includes(chapterStr) ||
+                       text.includes(`chapitre ${chapterStr}`) ||
+                       text.includes(`chapter ${chapterStr}`) ||
+                       opt.value === chapterStr ||
+                       text.includes(`${chapterStr}.0`) ||
+                       text.includes(`${chapterStr} -`);
+            });
+
+            if (targetOption) {
+                console.log(`🎯 Found target chapter: ${targetOption.text} (value: ${targetOption.value})`);
+
+                // Sélectionner le chapitre
+                await this.page.select(chapterSelector, targetOption.value);
+                console.log('✅ Chapter selected, waiting for page reload...');
+
+                // Attendre que la page se recharge après la sélection
+                try {
+                    await this.page.waitForNavigation({
+                        waitUntil: 'domcontentloaded',
+                        timeout: CONFIG.ERROR_DETECTION_TIMEOUT
+                    });
+                    console.log('🔄 Page reloaded after chapter selection');
+                } catch (error) {
+                    console.log(`⚠️ Warning during chapter navigation: ${error.message}`);
+                    // Attendre un peu même si la navigation échoue
+                    await new Promise(resolve => setTimeout(resolve, 2000));
+                }
+
+            } else {
+                // Lancer une exception spécifique pour le chapitre non trouvé
+                throw new ChapterNotFoundError(chapterNumber);
+            }
+
+        } catch (error) {
+            if (error instanceof ChapterNotFoundError) {
+                // Re-lancer l'exception pour qu'elle soit gérée en amont
+                throw error;
+            }
+            console.log(`⚠️ Error selecting chapter: ${error.message}`);
+            // Continuer même si la sélection échoue pour les autres erreurs
+        }
+    }
+
+    async scrapeVertical() {
+        const url = this.options.url;
+        const imageSelector = this.options.image_selector;
+        const waitForImages = this.options.wait_for_images === 'true';
+        const shouldScroll = this.options.scroll === 'true';
+        const chapterSelector = this.options.chapter_selector;
+        const chapterNumber = this.options.chapter_number;
+
+        try {
+            await this.navigateToPageWithFallback(url);
+        } catch (error) {
+            if (error.message.includes('HTTP 4') || error.message.includes('HTTP 5')) {
+                console.log(`🚫 Cannot access page: ${error.message}`);
+                return []; // Retourner un tableau vide pour les erreurs HTTP
+            }
+            throw error; // Re-lancer les autres erreurs
+        }
+
+        // Gérer la sélection de chapitre si nécessaire
+        if (chapterSelector && chapterNumber) {
+            try {
+                await this.selectChapter(chapterSelector, chapterNumber);
+            } catch (error) {
+                if (error instanceof ChapterNotFoundError) {
+                    console.log(`📚 MANGA_EXISTS_BUT_CHAPTER_NOT_FOUND: ${error.message}`);
+                    return {
+                        error: 'CHAPTER_NOT_FOUND',
+                        message: `Le manga existe mais le chapitre ${error.chapterNumber} n'est pas disponible.`,
+                        images: []
+                    };
+                }
+                throw error; // Re-lancer les autres erreurs
+            }
+        }
+
+        // Attendre le sélecteur d'image
+        if (waitForImages) {
+            await this.page.waitForSelector(imageSelector, { timeout: CONFIG.NAVIGATION_TIMEOUT });
+        }
+
+        // Scroll pour charger toutes les images lazy-load
+        if (shouldScroll) {
+            await this.autoScroll();
+        }
+
+        // Attendre un peu pour que les images se chargent (plus de temps pour lazy loading)
+        await new Promise(resolve => setTimeout(resolve, 3000));
+
+        // Collecter les URLs d'images
+        const imageUrls = await this.page.$$eval(imageSelector, imgs => {
+            return imgs.map(img => {
+                // Priorité au src, puis aux attributs data-*
+                return img.src ||
+                       img.getAttribute('src') ||
+                       img.getAttribute('data-src') ||
+                       img.getAttribute('data-lazy-src') ||
+                       img.getAttribute('data-original');
+            }).filter(url => url && url !== 'about:blank');
+        });
+
+        console.log(`Found ${imageUrls.length} images`);
+        return imageUrls;
+    }
+
+    async scrapeHorizontal() {
+        const url = this.options.url;
+        const imageSelector = this.options.image_selector;
+        const nextSelector = this.options.next_selector;
+        const waitForImages = this.options.wait_for_images === 'true';
+        const chapterSelector = this.options.chapter_selector;
+        const chapterNumber = this.options.chapter_number;
+
+        let currentUrl = url;
+        let pageCount = 0;
+        const maxPages = 200; // Limite de sécurité
+
+        while (currentUrl && pageCount < maxPages) {
+            console.log(`Scraping page ${pageCount + 1}: ${currentUrl}`);
+
+            try {
+                await this.navigateToPageWithFallback(currentUrl);
+            } catch (error) {
+                if (error.message.includes('HTTP 4') || error.message.includes('HTTP 5')) {
+                    console.log(`🚫 Cannot access page ${pageCount + 1}: ${error.message}`);
+                    break; // Arrêter le scraping si on rencontre une 404
+                }
+                // Pour les autres erreurs, essayer de continuer
+                console.log(`⚠️ Warning on page ${pageCount + 1}: ${error.message}, continuing...`);
+            }
+
+            // Gérer la sélection de chapitre pour la première page seulement
+            if (pageCount === 0 && chapterSelector && chapterNumber) {
+                try {
+                    await this.selectChapter(chapterSelector, chapterNumber);
+                } catch (error) {
+                    if (error instanceof ChapterNotFoundError) {
+                        console.log(`📚 MANGA_EXISTS_BUT_CHAPTER_NOT_FOUND: ${error.message}`);
+                        return {
+                            error: 'CHAPTER_NOT_FOUND',
+                            message: `Le manga existe mais le chapitre ${error.chapterNumber} n'est pas disponible.`,
+                            images: []
+                        };
+                    }
+                    throw error; // Re-lancer les autres erreurs
+                }
+            }
+
+            // Attendre le sélecteur d'image
+            if (waitForImages) {
+                try {
+                    await this.page.waitForSelector(imageSelector, { timeout: CONFIG.NAVIGATION_TIMEOUT });
+                } catch (e) {
+                    console.log(`No image found on page ${pageCount + 1}, skipping`);
+                    break;
+                }
+            }
+
+            // Récupérer l'image de la page
+            const imageUrl = await this.page.$eval(imageSelector, img => {
+                return img.src ||
+                       img.getAttribute('src') ||
+                       img.getAttribute('data-src') ||
+                       img.getAttribute('data-lazy-src') ||
+                       img.getAttribute('data-original');
+            }).catch(() => null);
+
+            if (imageUrl) {
+                this.imageUrls.add(imageUrl);
+                console.log(`Image found: ${imageUrl}`);
+            }
+
+            // Chercher le bouton/lien suivant
+            const nextElement = await this.page.$(nextSelector);
+            if (!nextElement) {
+                console.log('No next button found, ending scraping');
+                break;
+            }
+
+            // Récupérer l'URL suivante
+            currentUrl = await nextElement.evaluate(el => {
+                return el.href || el.getAttribute('href');
+            });
+
+            if (!currentUrl) {
+                console.log('No next URL found, ending scraping');
+                break;
+            }
+
+            pageCount++;
+            await new Promise(resolve => setTimeout(resolve, 1000)); // Pause entre les pages
+        }
+
+        return Array.from(this.imageUrls);
+    }
+
+    async autoScroll() {
+        await this.page.evaluate(async (config) => {
+            await new Promise((resolve) => {
+                let totalHeight = 0;
+                let lastHeight = 0;
+
+                const timer = setInterval(() => {
+                    const scrollHeight = document.body.scrollHeight;
+
+                    // Si la hauteur a changé, on continue
+                    if (scrollHeight !== lastHeight) {
+                        lastHeight = scrollHeight;
+                        totalHeight = 0; // Reset le counter car plus de contenu apparaît
+                    }
+
+                    window.scrollBy(0, config.SCROLL_DISTANCE);
+                    totalHeight += config.SCROLL_DISTANCE;
+
+                    // Arrêter si on a atteint le bas ET que rien de nouveau ne charge
+                    if (totalHeight >= scrollHeight) {
+                        clearInterval(timer);
+                        // Scroll final jusqu'à la vraie fin
+                        window.scrollTo(0, document.body.scrollHeight);
+                        resolve();
+                    }
+                }, config.SCROLL_DELAY);
+            });
+        }, CONFIG);
+    }
+
+    async close() {
+        if (this.browser) {
+            await this.browser.close();
+        }
+    }
+}
+
+(async () => {
+    const scraper = new PuppeteerScraper();
+
+    try {
+        await scraper.launch();
+
+        let result = [];
+
+        if (scraper.options.mode === 'vertical') {
+            result = await scraper.scrapeVertical();
+        } else if (scraper.options.mode === 'horizontal') {
+            result = await scraper.scrapeHorizontal();
+        } else {
+            throw new Error('Invalid mode. Use --mode=vertical or --mode=horizontal');
+        }
+
+        // Vérifier si le résultat est un objet d'erreur ou un tableau d'URLs
+        if (result && typeof result === 'object' && result.error === 'CHAPTER_NOT_FOUND') {
+            // Cas où le chapitre n'est pas trouvé
+            console.log(`CHAPTER_NOT_FOUND:${JSON.stringify(result)}`);
+        } else {
+            // Cas normal - nettoyer les URLs
+            const imageUrls = Array.isArray(result) ? result : [];
+            const cleanUrls = imageUrls.filter(url => url && typeof url === 'string');
+            console.log(`RESULT:${JSON.stringify(cleanUrls)}`);
+        }
+
+    } catch (error) {
+        if (error instanceof ChapterNotFoundError) {
+            // Cette erreur est déjà gérée dans les fonctions de scraping
+            // Mais au cas où elle remonterait ici
+            console.log(`CHAPTER_NOT_FOUND:${JSON.stringify({
+                error: 'CHAPTER_NOT_FOUND',
+                message: `Le manga existe mais le chapitre ${error.chapterNumber} n'est pas disponible.`,
+                images: []
+            })}`);
+        } else {
+            console.error('Error:', error.message);
+            process.exit(1);
+        }
+    } finally {
+        await scraper.close();
+    }
+})();
--- a/src/Domain/Scraping/Application/CommandHandler/ScrapeChapterHandler.php
+++ b/src/Domain/Scraping/Application/CommandHandler/ScrapeChapterHandler.php
@@ -9,6 +9,7 @@ use App\Domain\Scraping\Domain\Contract\Repository\SourceRepositoryInterface;
 use App\Domain\Scraping\Domain\Contract\Service\CbzGeneratorInterface;
 use App\Domain\Scraping\Domain\Contract\Service\ImageDownloaderInterface;
 use App\Domain\Scraping\Domain\Contract\Service\ScraperInterface;
+use App\Domain\Scraping\Domain\Contract\Service\ScraperFactoryInterface;
 use App\Domain\Scraping\Domain\Event\ChapterScraped;
 use App\Domain\Scraping\Domain\Event\ChapterScrapingFailed;
 use App\Domain\Scraping\Domain\Model\Chapter;
@@ -25,7 +26,7 @@ use Doctrine\ORM\EntityManagerInterface;
 readonly class ScrapeChapterHandler
 {
    public function __construct(
-        private ScraperInterface $scraper,
+        private ScraperFactoryInterface $scraperFactory,
        private ImageDownloaderInterface $imageDownloader,
        private CbzGeneratorInterface $cbzGenerator,
        private JobRepositoryInterface $jobRepository,
@@ -87,13 +88,19 @@ readonly class ScrapeChapterHandler
                        $this->entityManager->beginTransaction();

                        // 5. Scraping des URLs avec le slug courant
+                        $scrapingParameters = $source->getScrappingParameters();
+                        $scrapingParameters['chapterNumber'] = $chapter->chapterNumber;
+                        $scrapingType = $scrapingParameters['scrapingType'] ?? 'html';
+
                        $scrapingRequest = new ScrapingRequest(
-                            'html',
+                            $scrapingType,
                            $source->buildChapterUrl($slug, $chapter->chapterNumber),
-                            $source->getScrappingParameters()
+                            $scrapingParameters
                        );

-                        $scrapingResult = $this->scraper->scrape($scrapingRequest);
+                        // Sélection du scraper approprié selon le type
+                        $scraper = $this->scraperFactory->getScraperWithFallback($scrapingType);
+                        $scrapingResult = $scraper->scrape($scrapingRequest);

                        // 6. Téléchargement des images
                        $tempDir = new TempDirectory();
@@ -134,6 +141,8 @@ readonly class ScrapeChapterHandler
                        break;

                    } catch (\Exception $e) {
+                        dump('EXCEPTION for source ' . $source->getName() . ' with slug ' . $slug . ': ' . $e->getMessage());
+
                        $this->entityManager->rollback();

                        if (isset($job)) {
@@ -184,6 +193,11 @@ readonly class ScrapeChapterHandler
                if ($source) {
                    $preferredSources[] = $source;
                }
+
+                // Limiter à 3 sources préférées maximum
+                if (count($preferredSources) >= 3) {
+                    break;
+                }
            }

            if (!empty($preferredSources)) {
--- a/src/Domain/Scraping/Application/CommandHandler/TestScraperConfigurationHandler.php
+++ b/src/Domain/Scraping/Application/CommandHandler/TestScraperConfigurationHandler.php
@@ -5,46 +5,43 @@ namespace App\Domain\Scraping\Application\CommandHandler;
 use App\Domain\Scraping\Application\Command\TestScraperConfiguration;
 use App\Domain\Scraping\Application\Response\TestScraperConfigurationResponse;
 use App\Domain\Scraping\Domain\Contract\Service\ScraperInterface;
+use App\Domain\Scraping\Domain\Contract\Service\ScraperFactoryInterface;
 use App\Domain\Scraping\Domain\Model\ValueObject\ScrapingRequest;

 readonly class TestScraperConfigurationHandler
 {
    public function __construct(
-        private ScraperInterface $scraper
+        private ScraperFactoryInterface $scraperFactory
    ) {}

    public function handle(TestScraperConfiguration $command): TestScraperConfigurationResponse
    {
+        // Construction des paramètres de scraping depuis les données de la commande
+        $scrapingParameters = [
+            'imageSelector' => $command->imageSelector,
+            'nextPageSelector' => $command->nextPageSelector,
+            'chapterUrlFormat' => $command->chapterUrlFormat,
+            'scrapingType' => $command->scrapingType,
+            'chapterSelector' => $command->chapterSelector,
+            'chapterNumber' => $command->chapterNumber
+        ];
+        // Vérification que le scraper supporte le type de scraping
+        if (!$this->scraperFactory->isSupported($command->scrapingType)) {
+            return $this->tryWithFallbackScrapers($command, $scrapingParameters);
+        }
+
+        // Essayer avec le scraper demandé
        try {
-            // Construction des paramètres de scraping depuis les données de la commande
-            $scrapingParameters = [
-                'imageSelector' => $command->imageSelector,
-                'nextPageSelector' => $command->nextPageSelector,
-                'chapterUrlFormat' => $command->chapterUrlFormat,
-                'scrapingType' => $command->scrapingType,
-                'chapterSelector' => $command->chapterSelector
-            ];
-
-            // Vérification que le scraper supporte le type de scraping
-            if (!$this->scraper->supports($command->scrapingType)) {
-                return TestScraperConfigurationResponse::failure(
-                    $command->testUrl,
-                    $command->scrapingType,
-                    ["Type de scraping '{$command->scrapingType}' non supporté"]
-                );
-            }
-
-            // Création de la requête de scraping avec l'URL de test fournie directement
+            $scraper = $this->scraperFactory->createScraper($command->scrapingType);
            $scrapingRequest = new ScrapingRequest(
                $command->scrapingType,
                $command->testUrl,
                $scrapingParameters
            );

-            // Tentative de scraping
-            $scrapingResult = $this->scraper->scrape($scrapingRequest);
+            $scrapingResult = $scraper->scrape($scrapingRequest);
+

-            // Retour du succès avec les URLs trouvées
            return TestScraperConfigurationResponse::success(
                $scrapingResult->getImageUrls(),
                $command->testUrl,
@@ -52,17 +49,80 @@ readonly class TestScraperConfigurationHandler
            );

        } catch (\Exception $e) {
-            // Analyse de l'erreur pour fournir un message plus détaillé
-            $errors = $this->analyzeError($e, $command);
-
            return TestScraperConfigurationResponse::failure(
                $command->testUrl,
                $command->scrapingType,
-                $errors
+                $this->analyzeError($e, $command)
            );
        }
    }

+    private function tryWithFallbackScrapers(
+        TestScraperConfiguration $command,
+        array $scrapingParameters,
+        ?\Exception $originalException = null
+    ): TestScraperConfigurationResponse {
+        $errors = [];
+
+        if ($originalException) {
+            $errors[] = [
+                'type' => 'primary_scraper_failed',
+                'scraper' => $command->scrapingType,
+                'message' => $originalException->getMessage()
+            ];
+        }
+
+        // Essayer avec tous les scrapers disponibles
+        $availableScrapers = $this->scraperFactory->getSupportedTypes();
+        $triedScrapers = [];
+
+        foreach ($availableScrapers as $scraperType) {
+            if ($scraperType === $command->scrapingType) {
+                continue; // Déjà essayé
+            }
+
+            try {
+                $scraper = $this->scraperFactory->createScraper($scraperType);
+                $scrapingRequest = new ScrapingRequest(
+                    $scraperType,
+                    $command->testUrl,
+                    $scrapingParameters
+                );
+
+                $scrapingResult = $scraper->scrape($scrapingRequest);
+
+                // Succès avec un scraper alternatif
+                return TestScraperConfigurationResponse::success(
+                    $scrapingResult->getImageUrls(),
+                    $command->testUrl,
+                    $scraperType, // Retourner le type de scraper qui a fonctionné
+                    "Scraper alternatif utilisé: {$scraperType} (au lieu de {$command->scrapingType})"
+                );
+
+            } catch (\Exception $e) {
+                $triedScrapers[] = $scraperType;
+                $errors[] = [
+                    'type' => 'fallback_scraper_failed',
+                    'scraper' => $scraperType,
+                    'message' => $e->getMessage()
+                ];
+            }
+        }
+
+        // Tous les scrapers ont échoué
+        $errors[] = [
+            'type' => 'all_scrapers_failed',
+            'message' => 'Aucun scraper disponible n\'a réussi à traiter cette URL',
+            'tried_scrapers' => array_merge([$command->scrapingType], $triedScrapers)
+        ];
+
+        return TestScraperConfigurationResponse::failure(
+            $command->testUrl,
+            $command->scrapingType,
+            $errors
+        );
+    }
+
    private function analyzeError(\Exception $e, TestScraperConfiguration $command): array
    {
        $errors = [];
--- a/src/Domain/Scraping/Domain/Contract/Service/ScraperFactoryInterface.php
+++ b/src/Domain/Scraping/Domain/Contract/Service/ScraperFactoryInterface.php
@@ -0,0 +1,36 @@
+<?php
+
+namespace App\Domain\Scraping\Domain\Contract\Service;
+
+interface ScraperFactoryInterface
+{
+    /**
+     * Créer un scraper pour un type spécifique
+     */
+    public function createScraper(string $type): ScraperInterface;
+
+    /**
+     * Obtenir le scraper le plus approprié selon la priorité
+     */
+    public function getBestScraper(): ScraperInterface;
+
+    /**
+     * Obtenir le scraper de fallback (le plus simple)
+     */
+    public function getFallbackScraper(): ScraperInterface;
+
+    /**
+     * Essayer plusieurs scrapers en cascade jusqu'à ce qu'un fonctionne
+     */
+    public function getScraperWithFallback(string $preferredType): ScraperInterface;
+
+    /**
+     * Obtenir les types de scrapers supportés
+     */
+    public function getSupportedTypes(): array;
+
+    /**
+     * Vérifier si un type de scraper est supporté
+     */
+    public function isSupported(string $type): bool;
+}
--- a/src/Domain/Scraping/Domain/Exception/ChapterNotFoundException.php
+++ b/src/Domain/Scraping/Domain/Exception/ChapterNotFoundException.php
@@ -4,8 +4,8 @@ namespace App\Domain\Scraping\Domain\Exception;

 class ChapterNotFoundException extends \Exception
 {
-    public function __construct()
+    public function __construct(string $message = 'Chapter not found')
    {
-        parent::__construct('Chapter not found');
+        parent::__construct($message);
    }
-}
+}
--- a/src/Domain/Scraping/Domain/Model/ValueObject/ChapterUrl.php
+++ b/src/Domain/Scraping/Domain/Model/ValueObject/ChapterUrl.php
@@ -32,8 +32,8 @@ class ChapterUrl

    private function validateUrlFormat(string $format): string
    {
-        if (!str_contains($format, '{slug}') || !str_contains($format, '{chapterNumber}')) {
-            throw new InvalidArgumentException("The URL format must contain both {slug} and {chapterNumber} placeholders.");
+        if (!str_contains($format, '{slug}')) {
+            throw new InvalidArgumentException("The URL format must contain {slug} placeholder.");
        }

        return $format;
--- a/src/Domain/Scraping/Infrastructure/Service/Scraper/AdvancedHtmlScraper.php
+++ b/src/Domain/Scraping/Infrastructure/Service/Scraper/AdvancedHtmlScraper.php
@@ -0,0 +1,252 @@
+<?php
+
+namespace App\Domain\Scraping\Infrastructure\Service\Scraper;
+
+use App\Domain\Scraping\Domain\Contract\Service\ScraperInterface;
+use App\Domain\Scraping\Domain\Model\ValueObject\ScrapingRequest;
+use App\Domain\Scraping\Domain\Model\ValueObject\ScrapingResult;
+use Symfony\Component\DomCrawler\Crawler;
+use Symfony\Contracts\HttpClient\HttpClientInterface;
+use Symfony\Component\HttpClient\HttpClient;
+
+class AdvancedHtmlScraper implements ScraperInterface
+{
+    private const USER_AGENTS = [
+        'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36',
+        'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36',
+        'Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36',
+        'Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:121.0) Gecko/20100101 Firefox/121.0',
+        'Mozilla/5.0 (Macintosh; Intel Mac OS X 10.15; rv:121.0) Gecko/20100101 Firefox/121.0',
+        'Mozilla/5.0 (X11; Linux x86_64; rv:121.0) Gecko/20100101 Firefox/121.0'
+    ];
+
+    private const ACCEPT_HEADERS = [
+        'text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,image/apng,*/*;q=0.8',
+        'text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,*/*;q=0.8',
+        'text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8'
+    ];
+
+    private const ACCEPT_LANGUAGE_HEADERS = [
+        'en-US,en;q=0.9',
+        'en-US,en;q=0.8',
+        'en-GB,en;q=0.9',
+        'fr-FR,fr;q=0.9,en;q=0.8'
+    ];
+
+    private const RETRY_ATTEMPTS = 3;
+    private const RETRY_DELAY = 2; // secondes
+    private const REQUEST_TIMEOUT = 30;
+
+    public function __construct(
+        private readonly HttpClientInterface $httpClient
+    ) {
+        // Utiliser un client HTTP personnalisé si non fourni
+        if (!$this->httpClient) {
+            $this->httpClient = HttpClient::create([
+                'timeout' => self::REQUEST_TIMEOUT,
+                'verify_peer' => false,
+                'verify_host' => false
+            ]);
+        }
+    }
+
+    public function scrape(ScrapingRequest $request): ScrapingResult
+    {
+        $scrapingParameters = $request->getScrapingParameters();
+
+        try {
+            $pages = !$scrapingParameters['nextPageSelector']
+                ? $this->scrapeVerticalReader($request)
+                : $this->scrapeHorizontalReader($request);
+
+            return new ScrapingResult($pages, count($pages));
+        } catch (\Exception $e) {
+            throw new \RuntimeException('Advanced HTML scraping failed: ' . $e->getMessage(), 0, $e);
+        }
+    }
+
+    public function supports(string $sourceType): bool
+    {
+        return 'advanced_html' === $sourceType;
+    }
+
+    private function scrapeVerticalReader(ScrapingRequest $request): array
+    {
+        $html = $this->fetchHtmlWithRetry($request->getChapterUrl());
+        $crawler = new Crawler($html);
+        $params = $request->getScrapingParameters();
+
+        $images = $crawler->filter($params['imageSelector'])
+            ->each(function ($node) {
+                // Essayer plusieurs attributs pour trouver l'URL de l'image
+                $src = $node->attr('src') ?:
+                       $node->attr('data-src') ?:
+                       $node->attr('data-lazy-src') ?:
+                       $node->attr('data-original') ?:
+                       $node->attr('data-zoom-image') ?:
+                       $node->attr('data-full-src');
+
+                return $this->cleanImageUrl($src);
+            });
+
+        return array_filter($images, fn($url) => !empty($url));
+    }
+
+    private function scrapeHorizontalReader(ScrapingRequest $request): array
+    {
+        $pages = [];
+        $currentUrl = $request->getChapterUrl();
+        $params = $request->getScrapingParameters();
+        $visitedUrls = new \SplObjectStorage();
+        $maxPages = 200; // Limite de sécurité
+        $pageCount = 0;
+
+        while ($currentUrl && $pageCount < $maxPages) {
+            // Éviter les boucles infinies
+            if (isset($visitedUrls[$currentUrl])) {
+                break;
+            }
+            $visitedUrls[$currentUrl] = true;
+
+            $html = $this->fetchHtmlWithRetry($currentUrl);
+            $crawler = new Crawler($html);
+
+            // Récupérer l'image de la page
+            $imageNode = $crawler->filter($params['imageSelector'])->first();
+            if ($imageNode->count() > 0) {
+                $imageUrl = $imageNode->attr('src') ?:
+                           $imageNode->attr('data-src') ?:
+                           $imageNode->attr('data-lazy-src') ?:
+                           $imageNode->attr('data-original');
+
+                if ($imageUrl) {
+                    $imageUrl = $this->resolveRelativeUrl($imageUrl, $currentUrl);
+                    $pages[] = $this->cleanImageUrl($imageUrl);
+                }
+            }
+
+            // Chercher le lien suivant
+            $nextLink = $crawler->filter($params['nextPageSelector'])->first();
+            if ($nextLink->count() === 0) {
+                break;
+            }
+
+            $nextUrl = $nextLink->attr('href');
+            if (!$nextUrl) {
+                break;
+            }
+
+            $currentUrl = $this->resolveRelativeUrl($nextUrl, $currentUrl);
+            $pageCount++;
+
+            // Pause entre les requêtes pour éviter la détection
+            sleep(1);
+        }
+
+        return array_filter($pages, fn($url) => !empty($url));
+    }
+
+    private function fetchHtmlWithRetry(string $url): string
+    {
+        $lastException = null;
+
+        for ($attempt = 1; $attempt <= self::RETRY_ATTEMPTS; $attempt++) {
+            try {
+                return $this->fetchHtml($url);
+            } catch (\Exception $e) {
+                $lastException = $e;
+
+                if ($attempt < self::RETRY_ATTEMPTS) {
+                    // Attendre avant de réessayer
+                    sleep(self::RETRY_DELAY * $attempt);
+                }
+            }
+        }
+
+        throw $lastException;
+    }
+
+    private function fetchHtml(string $url): string
+    {
+        $headers = $this->generateHeaders();
+
+        try {
+            $response = $this->httpClient->request('GET', $url, [
+                'headers' => $headers,
+                'timeout' => self::REQUEST_TIMEOUT
+            ]);
+
+            $statusCode = $response->getStatusCode();
+
+            if ($statusCode >= 400) {
+                throw new \RuntimeException("HTTP {$statusCode} error for URL: {$url}");
+            }
+
+            $content = $response->getContent();
+
+            // Vérifier si on a été bloqué par Cloudflare
+            if (strpos($content, 'cf-browser-verification') !== false ||
+                strpos($content, 'Checking your browser') !== false) {
+                throw new \RuntimeException('Blocked by Cloudflare protection');
+            }
+
+            return $content;
+        } catch (\Exception $e) {
+            throw new \RuntimeException('Failed to fetch HTML: ' . $e->getMessage(), 0, $e);
+        }
+    }
+
+    private function generateHeaders(): array
+    {
+        return [
+            'User-Agent' => self::USER_AGENTS[array_rand(self::USER_AGENTS)],
+            'Accept' => self::ACCEPT_HEADERS[array_rand(self::ACCEPT_HEADERS)],
+            'Accept-Language' => self::ACCEPT_LANGUAGE_HEADERS[array_rand(self::ACCEPT_LANGUAGE_HEADERS)],
+            'Accept-Encoding' => 'gzip, deflate, br',
+            'DNT' => '1',
+            'Connection' => 'keep-alive',
+            'Upgrade-Insecure-Requests' => '1',
+            'Sec-Fetch-Dest' => 'document',
+            'Sec-Fetch-Mode' => 'navigate',
+            'Sec-Fetch-Site' => 'none',
+            'Sec-Fetch-User' => '?1',
+            'Cache-Control' => 'max-age=0'
+        ];
+    }
+
+    private function resolveRelativeUrl(string $url, string $baseUrl): string
+    {
+        if (preg_match('/^https?:\/\//', $url)) {
+            return $url;
+        }
+
+        $parsedBase = parse_url($baseUrl);
+        $scheme = $parsedBase['scheme'];
+        $host = $parsedBase['host'];
+        $port = isset($parsedBase['port']) ? ':' . $parsedBase['port'] : '';
+
+        if (strpos($url, '/') === 0) {
+            // URL absolue relative à la racine
+            return $scheme . '://' . $host . $port . $url;
+        } else {
+            // URL relative au chemin actuel
+            $path = isset($parsedBase['path']) ? dirname($parsedBase['path']) : '';
+            return $scheme . '://' . $host . $port . $path . '/' . $url;
+        }
+    }
+
+    private function cleanImageUrl(string $url): string
+    {
+        if (empty($url)) {
+            return '';
+        }
+
+        // Supprimer les caractères de contrôle
+        $url = preg_replace('/[\x00-\x1F\x7F]/', '', trim($url));
+
+        // Supprimer les paramètres de requête inutiles
+        $url = preg_replace('/(\?|&)(utm_[^&]*|ref[^&]*|source[^&]*)/i', '', $url);
+
+        return $url;
+    }
+}
--- a/src/Domain/Scraping/Infrastructure/Service/Scraper/JavaScriptScraper.php
+++ b/src/Domain/Scraping/Infrastructure/Service/Scraper/JavaScriptScraper.php
@@ -0,0 +1,157 @@
+<?php
+
+namespace App\Domain\Scraping\Infrastructure\Service\Scraper;
+
+use App\Domain\Scraping\Domain\Contract\Service\ScraperInterface;
+use App\Domain\Scraping\Domain\Exception\ChapterNotFoundException;
+use App\Domain\Scraping\Domain\Model\ValueObject\ScrapingRequest;
+use App\Domain\Scraping\Domain\Model\ValueObject\ScrapingResult;
+use Symfony\Component\Process\Process;
+
+class JavaScriptScraper implements ScraperInterface
+{
+    private const PUPPETEER_TIMEOUT = 60; // secondes
+    private const PUPPETEER_SCRIPT_PATH = '/public/puppeteer-scraper.js';
+    private const NODE_EXECUTABLE = 'node';
+
+    public function __construct(
+        private readonly string $projectDir
+    ) {}
+
+    public function scrape(ScrapingRequest $request): ScrapingResult
+    {
+        $scrappingParameters = $request->getScrapingParameters();
+
+        try {
+            $scriptPath = $this->projectDir . self::PUPPETEER_SCRIPT_PATH;
+
+            if (!file_exists($scriptPath)) {
+                throw new \RuntimeException('Puppeteer script not found at: ' . $scriptPath);
+            }
+
+            $imageUrls = !empty($scrappingParameters['nextPageSelector'])
+                ? $this->scrapeHorizontalReader($request, $scriptPath)
+                : $this->scrapeVerticalReader($request, $scriptPath);
+
+            return new ScrapingResult($imageUrls, count($imageUrls));
+        } catch (\Exception $e) {
+            throw new \RuntimeException('JavaScript scraping failed: ' . $e->getMessage(), 0, $e);
+        }
+    }
+
+    public function supports(string $sourceType): bool
+    {
+        return 'javascript' === $sourceType;
+    }
+
+        private function scrapeVerticalReader(ScrapingRequest $request, string $scriptPath): array
+    {
+        $params = $request->getScrapingParameters();
+        $processArgs = [
+            self::NODE_EXECUTABLE,
+            $scriptPath,
+            '--mode=vertical',
+            '--url=' . $request->getChapterUrl(),
+            '--image-selector=' . $params['imageSelector'],
+            '--wait-for-images=true',
+            '--scroll=true'
+        ];
+
+        // Ajouter les paramètres de chapitre si disponibles
+        if (!empty($params['chapterSelector'])) {
+            $processArgs[] = '--chapter-selector=' . $params['chapterSelector'];
+        }
+
+        if (isset($params['chapterNumber'])) {
+            $processArgs[] = '--chapter-number=' . $params['chapterNumber'];
+        }
+
+        $process = new Process($processArgs);
+        return $this->executeProcess($process);
+    }
+
+        private function scrapeHorizontalReader(ScrapingRequest $request, string $scriptPath): array
+    {
+        $params = $request->getScrapingParameters();
+
+        $processArgs = [
+            self::NODE_EXECUTABLE,
+            $scriptPath,
+            '--mode=horizontal',
+            '--url=' . $request->getChapterUrl(),
+            '--image-selector=' . $params['imageSelector'],
+            '--next-selector=' . $params['nextPageSelector'],
+            '--wait-for-images=true'
+        ];
+
+        // Ajouter les paramètres de chapitre si disponibles
+        if (!empty($params['chapterSelector'])) {
+            $processArgs[] = '--chapter-selector=' . $params['chapterSelector'];
+        }
+
+        if (isset($params['chapterNumber'])) {
+            $processArgs[] = '--chapter-number=' . $params['chapterNumber'];
+        }
+
+        $process = new Process($processArgs);
+        return $this->executeProcess($process);
+    }
+
+    private function executeProcess(Process $process): array
+    {
+        $process->setTimeout(self::PUPPETEER_TIMEOUT);
+        $process->run();
+
+        if (!$process->isSuccessful()) {
+            $error = $process->getErrorOutput() ?: $process->getOutput();
+            throw new \RuntimeException('Puppeteer process failed: ' . $error);
+        }
+
+        $output = $process->getOutput();
+        $lines = explode("\n", trim($output));
+        $resultLine = end($lines);
+
+        // Gérer le cas où le chapitre n'est pas trouvé
+        if (strpos($resultLine, 'CHAPTER_NOT_FOUND:') === 0) {
+            $jsonData = substr($resultLine, 18); // Remove 'CHAPTER_NOT_FOUND:' prefix
+            $errorData = json_decode($jsonData, true);
+
+            if (is_array($errorData) && isset($errorData['message'])) {
+                throw new ChapterNotFoundException($errorData['message']);
+            }
+
+            throw new ChapterNotFoundException('Le chapitre demandé n\'est pas disponible.');
+        }
+
+        // Gérer le cas normal avec des images
+        if (strpos($resultLine, 'RESULT:') === 0) {
+            $jsonData = substr($resultLine, 7); // Remove 'RESULT:' prefix
+            $imageUrls = json_decode($jsonData, true);
+
+            if (!is_array($imageUrls)) {
+                throw new \RuntimeException('Failed to parse Puppeteer output');
+            }
+
+            return $this->cleanImageUrls($imageUrls);
+        }
+
+        // Format de sortie non reconnu
+        throw new \RuntimeException('Invalid Puppeteer output format: ' . $resultLine);
+    }
+
+    private function cleanImageUrls(array $urls): array
+    {
+        return array_filter(
+            array_map(
+                fn($url) => $this->cleanImageUrl($url),
+                $urls
+            ),
+            fn($url) => !empty($url) && filter_var($url, FILTER_VALIDATE_URL)
+        );
+    }
+
+    private function cleanImageUrl(string $url): string
+    {
+        return preg_replace('/[\x00-\x1F\x7F]/', '', trim($url));
+    }
+}
--- a/src/Domain/Scraping/Infrastructure/Service/ScraperFactory.php
+++ b/src/Domain/Scraping/Infrastructure/Service/ScraperFactory.php
@@ -0,0 +1,146 @@
+<?php
+
+namespace App\Domain\Scraping\Infrastructure\Service;
+
+use App\Domain\Scraping\Domain\Contract\Service\ScraperInterface;
+use App\Domain\Scraping\Domain\Contract\Service\ScraperFactoryInterface;
+use App\Domain\Scraping\Infrastructure\Service\Scraper\HtmlScraper;
+use App\Domain\Scraping\Infrastructure\Service\Scraper\AdvancedHtmlScraper;
+use App\Domain\Scraping\Infrastructure\Service\Scraper\JavaScriptScraper;
+use App\Domain\Scraping\Domain\Contract\Service\ImageDownloaderInterface;
+use Symfony\Component\Messenger\MessageBusInterface;
+use Symfony\Contracts\HttpClient\HttpClientInterface;
+
+class ScraperFactory implements ScraperFactoryInterface
+{
+    private const SCRAPER_TYPES = [
+        'html' => HtmlScraper::class,
+        'advanced_html' => AdvancedHtmlScraper::class,
+        'javascript' => JavaScriptScraper::class,
+    ];
+
+    private const SCRAPER_PRIORITIES = [
+        'javascript' => 1,      // Le plus puissant pour contourner les protections
+        'advanced_html' => 2,   // Bon compromis entre performance et efficacité
+        'html' => 3,           // Le plus simple et rapide
+    ];
+
+    private array $scrapers = [];
+
+    public function __construct(
+        private readonly ImageDownloaderInterface $imageDownloader,
+        private readonly MessageBusInterface $eventBus,
+        private readonly HttpClientInterface $httpClient,
+        private readonly string $projectDir
+    ) {
+        $this->initializeScrapers();
+    }
+
+    /**
+     * Créer un scraper pour un type spécifique
+     */
+    public function createScraper(string $type): ScraperInterface
+    {
+        if (!isset($this->scrapers[$type])) {
+            throw new \InvalidArgumentException("Scraper type '{$type}' is not supported");
+        }
+
+        return $this->scrapers[$type];
+    }
+
+    /**
+     * Obtenir le scraper le plus approprié selon la priorité
+     */
+    public function getBestScraper(): ScraperInterface
+    {
+        $sortedTypes = array_keys(self::SCRAPER_PRIORITIES);
+        usort($sortedTypes, fn($a, $b) => self::SCRAPER_PRIORITIES[$a] <=> self::SCRAPER_PRIORITIES[$b]);
+
+        return $this->scrapers[$sortedTypes[0]];
+    }
+
+    /**
+     * Obtenir tous les scrapers disponibles
+     */
+    public function getAvailableScrapers(): array
+    {
+        return $this->scrapers;
+    }
+
+    /**
+     * Obtenir les types de scrapers supportés
+     */
+    public function getSupportedTypes(): array
+    {
+        return array_keys(self::SCRAPER_TYPES);
+    }
+
+    /**
+     * Vérifier si un type de scraper est supporté
+     */
+    public function isSupported(string $type): bool
+    {
+        return isset(self::SCRAPER_TYPES[$type]);
+    }
+
+    /**
+     * Obtenir le scraper de fallback (le plus simple)
+     */
+    public function getFallbackScraper(): ScraperInterface
+    {
+        return $this->scrapers['html'];
+    }
+
+    /**
+     * Essayer plusieurs scrapers en cascade jusqu'à ce qu'un fonctionne
+     */
+    public function getScraperWithFallback(string $preferredType): ScraperInterface
+    {
+        // Essayer le type préféré d'abord
+        if ($this->isSupported($preferredType)) {
+            return $this->scrapers[$preferredType];
+        }
+
+        // Fallback vers le scraper par défaut
+        return $this->getFallbackScraper();
+    }
+
+    /**
+     * Obtenir des statistiques sur les scrapers
+     */
+    public function getScraperStats(): array
+    {
+        return [
+            'total_scrapers' => count($this->scrapers),
+            'supported_types' => $this->getSupportedTypes(),
+            'priorities' => self::SCRAPER_PRIORITIES,
+            'best_scraper' => $this->getBestScraper()::class,
+            'fallback_scraper' => $this->getFallbackScraper()::class
+        ];
+    }
+
+    private function initializeScrapers(): void
+    {
+        foreach (self::SCRAPER_TYPES as $type => $class) {
+            $this->scrapers[$type] = $this->createScraperInstance($class);
+        }
+    }
+
+    private function createScraperInstance(string $class): ScraperInterface
+    {
+        return match ($class) {
+            HtmlScraper::class => new HtmlScraper(
+                $this->imageDownloader,
+                $this->eventBus,
+                $this->httpClient
+            ),
+            AdvancedHtmlScraper::class => new AdvancedHtmlScraper(
+                $this->httpClient
+            ),
+            JavaScriptScraper::class => new JavaScriptScraper(
+                $this->projectDir
+            ),
+            default => throw new \InvalidArgumentException("Unknown scraper class: {$class}")
+        };
+    }
+}