feat: ajout d'une nouvelle infrastructure de scraping avec des scrapers pour HTML, HTML avancé et JavaScript, ainsi qu'une factory pour gérer leur création et leur sélection. Mise à jour des gestionnaires de commandes pour intégrer cette nouvelle architecture et améliorer la gestion des erreurs lors du scraping des chapitres.
This commit is contained in:
parent
cbb62989d4
commit
b456f9304d
@@ -9,6 +9,7 @@ use App\Domain\Scraping\Domain\Contract\Repository\SourceRepositoryInterface;
|
||||
use App\Domain\Scraping\Domain\Contract\Service\CbzGeneratorInterface;
|
||||
use App\Domain\Scraping\Domain\Contract\Service\ImageDownloaderInterface;
|
||||
use App\Domain\Scraping\Domain\Contract\Service\ScraperInterface;
|
||||
use App\Domain\Scraping\Domain\Contract\Service\ScraperFactoryInterface;
|
||||
use App\Domain\Scraping\Domain\Event\ChapterScraped;
|
||||
use App\Domain\Scraping\Domain\Event\ChapterScrapingFailed;
|
||||
use App\Domain\Scraping\Domain\Model\Chapter;
|
||||
@@ -25,7 +26,7 @@ use Doctrine\ORM\EntityManagerInterface;
|
||||
readonly class ScrapeChapterHandler
|
||||
{
|
||||
public function __construct(
|
||||
private ScraperInterface $scraper,
|
||||
private ScraperFactoryInterface $scraperFactory,
|
||||
private ImageDownloaderInterface $imageDownloader,
|
||||
private CbzGeneratorInterface $cbzGenerator,
|
||||
private JobRepositoryInterface $jobRepository,
|
||||
@@ -87,13 +88,19 @@ readonly class ScrapeChapterHandler
|
||||
$this->entityManager->beginTransaction();
|
||||
|
||||
// 5. Scraping des URLs avec le slug courant
|
||||
$scrapingParameters = $source->getScrappingParameters();
|
||||
$scrapingParameters['chapterNumber'] = $chapter->chapterNumber;
|
||||
$scrapingType = $scrapingParameters['scrapingType'] ?? 'html';
|
||||
|
||||
$scrapingRequest = new ScrapingRequest(
|
||||
'html',
|
||||
$scrapingType,
|
||||
$source->buildChapterUrl($slug, $chapter->chapterNumber),
|
||||
$source->getScrappingParameters()
|
||||
$scrapingParameters
|
||||
);
|
||||
|
||||
$scrapingResult = $this->scraper->scrape($scrapingRequest);
|
||||
// Sélection du scraper approprié selon le type
|
||||
$scraper = $this->scraperFactory->getScraperWithFallback($scrapingType);
|
||||
$scrapingResult = $scraper->scrape($scrapingRequest);
|
||||
|
||||
// 6. Téléchargement des images
|
||||
$tempDir = new TempDirectory();
|
||||
@@ -134,6 +141,8 @@ readonly class ScrapeChapterHandler
|
||||
break;
|
||||
|
||||
} catch (\Exception $e) {
|
||||
dump('EXCEPTION for source ' . $source->getName() . ' with slug ' . $slug . ': ' . $e->getMessage());
|
||||
|
||||
$this->entityManager->rollback();
|
||||
|
||||
if (isset($job)) {
|
||||
@@ -184,6 +193,11 @@ readonly class ScrapeChapterHandler
|
||||
if ($source) {
|
||||
$preferredSources[] = $source;
|
||||
}
|
||||
|
||||
// Limiter à 3 sources préférées maximum
|
||||
if (count($preferredSources) >= 3) {
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
if (!empty($preferredSources)) {
|
||||
|
||||
Reference in New Issue
Block a user