From 89570ad951306d50a2a32c715ff24919c59fc6fd Mon Sep 17 00:00:00 2001 From: "ext.jeremy.guillot@maxicoffee.domains" Date: Mon, 3 Feb 2025 10:38:53 +0100 Subject: [PATCH] feat: firsts unit tests for ScrapeChapterHandler.php --- composer.json | 1 + composer.lock | 243 +++++++++++++++++- phpunit.xml.dist | 3 +- scrapers.json | 66 +++++ .../Application/Command/ScrapeChapter.php | 12 +- .../CommandHandler/ScrapeChapterHandler.php | 53 ++-- .../Repository/MangaRepositoryInterface.php | 10 + .../ScrapingJobRepositoryInterface.php | 4 +- .../Repository/SourceRepositoryInterface.php | 10 + .../Contract/Service/ScraperInterface.php | 4 +- .../Domain/Event/ChapterScrapingFailed.php | 21 ++ src/Domain/Scraping/Domain/Model/Manga.php | 39 +++ .../Scraping/Domain/Model/ScrapingJob.php | 2 +- src/Domain/Scraping/Domain/Model/Source.php | 59 +++++ .../Domain/Model/ValueObject/ChapterId.php | 18 ++ .../Domain/Model/ValueObject/SourceId.php | 18 ++ .../Model/ValueObject/TempDirectory.php | 18 ++ .../Persistence/DoctrineMangaRepository.php | 22 ++ .../Persistence/DoctrineSourceRepository.php | 26 ++ .../Persistence/Entity/MangaEntity.php | 75 ++++++ .../Persistence/Entity/ScrapingJobEntity.php | 20 -- .../Persistence/Entity/SourceEntity.php | 65 +++++ .../Service/ImageDownloader.php | 23 ++ .../Service/Scraper/AbstractScraper.php | 93 +++---- .../Service/Scraper/HtmlScraper.php | 142 +++++++--- .../Service/Scraper/JavascriptScraper.php | 38 --- .../Scraping/Adapter/InMemoryEventBus.php | 23 ++ .../Adapter/InMemoryScraperAdapter.php | 47 ++++ .../Adapter/InMemoryScrapingJobRepository.php | 44 ++++ .../ScrapeChapterHandlerTest.php | 85 ++++++ tests/Functional/UserResourceTest.php | 112 -------- 31 files changed, 1105 insertions(+), 291 deletions(-) create mode 100644 scrapers.json create mode 100644 src/Domain/Scraping/Domain/Contract/Repository/MangaRepositoryInterface.php create mode 100644 src/Domain/Scraping/Domain/Contract/Repository/SourceRepositoryInterface.php create mode 100644 src/Domain/Scraping/Domain/Event/ChapterScrapingFailed.php create mode 100644 src/Domain/Scraping/Domain/Model/Manga.php create mode 100644 src/Domain/Scraping/Domain/Model/Source.php create mode 100644 src/Domain/Scraping/Domain/Model/ValueObject/ChapterId.php create mode 100644 src/Domain/Scraping/Domain/Model/ValueObject/SourceId.php create mode 100644 src/Domain/Scraping/Domain/Model/ValueObject/TempDirectory.php create mode 100644 src/Domain/Scraping/Infrastructure/Persistence/DoctrineMangaRepository.php create mode 100644 src/Domain/Scraping/Infrastructure/Persistence/DoctrineSourceRepository.php create mode 100644 src/Domain/Scraping/Infrastructure/Persistence/Entity/MangaEntity.php create mode 100644 src/Domain/Scraping/Infrastructure/Persistence/Entity/SourceEntity.php create mode 100644 src/Domain/Scraping/Infrastructure/Service/ImageDownloader.php delete mode 100644 src/Domain/Scraping/Infrastructure/Service/Scraper/JavascriptScraper.php create mode 100644 tests/Domain/Scraping/Adapter/InMemoryEventBus.php create mode 100644 tests/Domain/Scraping/Adapter/InMemoryScraperAdapter.php create mode 100644 tests/Domain/Scraping/Adapter/InMemoryScrapingJobRepository.php create mode 100644 tests/Domain/Scraping/Application/CommandHandler/ScrapeChapterHandlerTest.php delete mode 100644 tests/Functional/UserResourceTest.php diff --git a/composer.json b/composer.json index 416f248..ad4371e 100644 --- a/composer.json +++ b/composer.json @@ -22,6 +22,7 @@ "nelmio/cors-bundle": "^2.4", "phpdocumentor/reflection-docblock": "^5.3", "phpstan/phpdoc-parser": "^1.25", + "ramsey/uuid": "^4.7", "runtime/frankenphp-symfony": "^0.2.0", "symfony/asset": "7.0.*", "symfony/console": "7.0.*", diff --git a/composer.lock b/composer.lock index 17101f5..10c137e 100644 --- a/composer.lock +++ b/composer.lock @@ -4,7 +4,7 @@ "Read more about it at https://getcomposer.org/doc/01-basic-usage.md#installing-dependencies", "This file is @generated automatically" ], - "content-hash": "6258706876617c8b0c08f13c5a158fe7", + "content-hash": "49014ec06c069804432e6a13701e46a4", "packages": [ { "name": "api-platform/core", @@ -172,6 +172,66 @@ }, "time": "2024-02-01T14:41:52+00:00" }, + { + "name": "brick/math", + "version": "0.12.1", + "source": { + "type": "git", + "url": "https://github.com/brick/math.git", + "reference": "f510c0a40911935b77b86859eb5223d58d660df1" + }, + "dist": { + "type": "zip", + "url": "https://api.github.com/repos/brick/math/zipball/f510c0a40911935b77b86859eb5223d58d660df1", + "reference": "f510c0a40911935b77b86859eb5223d58d660df1", + "shasum": "" + }, + "require": { + "php": "^8.1" + }, + "require-dev": { + "php-coveralls/php-coveralls": "^2.2", + "phpunit/phpunit": "^10.1", + "vimeo/psalm": "5.16.0" + }, + "type": "library", + "autoload": { + "psr-4": { + "Brick\\Math\\": "src/" + } + }, + "notification-url": "https://packagist.org/downloads/", + "license": [ + "MIT" + ], + "description": "Arbitrary-precision arithmetic library", + "keywords": [ + "Arbitrary-precision", + "BigInteger", + "BigRational", + "arithmetic", + "bigdecimal", + "bignum", + "bignumber", + "brick", + "decimal", + "integer", + "math", + "mathematics", + "rational" + ], + "support": { + "issues": "https://github.com/brick/math/issues", + "source": "https://github.com/brick/math/tree/0.12.1" + }, + "funding": [ + { + "url": "https://github.com/BenMorel", + "type": "github" + } + ], + "time": "2023-11-29T23:19:16+00:00" + }, { "name": "doctrine/cache", "version": "2.2.0", @@ -3050,6 +3110,187 @@ }, "time": "2019-03-08T08:55:37+00:00" }, + { + "name": "ramsey/collection", + "version": "2.0.0", + "source": { + "type": "git", + "url": "https://github.com/ramsey/collection.git", + "reference": "a4b48764bfbb8f3a6a4d1aeb1a35bb5e9ecac4a5" + }, + "dist": { + "type": "zip", + "url": "https://api.github.com/repos/ramsey/collection/zipball/a4b48764bfbb8f3a6a4d1aeb1a35bb5e9ecac4a5", + "reference": "a4b48764bfbb8f3a6a4d1aeb1a35bb5e9ecac4a5", + "shasum": "" + }, + "require": { + "php": "^8.1" + }, + "require-dev": { + "captainhook/plugin-composer": "^5.3", + "ergebnis/composer-normalize": "^2.28.3", + "fakerphp/faker": "^1.21", + "hamcrest/hamcrest-php": "^2.0", + "jangregor/phpstan-prophecy": "^1.0", + "mockery/mockery": "^1.5", + "php-parallel-lint/php-console-highlighter": "^1.0", + "php-parallel-lint/php-parallel-lint": "^1.3", + "phpcsstandards/phpcsutils": "^1.0.0-rc1", + "phpspec/prophecy-phpunit": "^2.0", + "phpstan/extension-installer": "^1.2", + "phpstan/phpstan": "^1.9", + "phpstan/phpstan-mockery": "^1.1", + "phpstan/phpstan-phpunit": "^1.3", + "phpunit/phpunit": "^9.5", + "psalm/plugin-mockery": "^1.1", + "psalm/plugin-phpunit": "^0.18.4", + "ramsey/coding-standard": "^2.0.3", + "ramsey/conventional-commits": "^1.3", + "vimeo/psalm": "^5.4" + }, + "type": "library", + "extra": { + "captainhook": { + "force-install": true + }, + "ramsey/conventional-commits": { + "configFile": "conventional-commits.json" + } + }, + "autoload": { + "psr-4": { + "Ramsey\\Collection\\": "src/" + } + }, + "notification-url": "https://packagist.org/downloads/", + "license": [ + "MIT" + ], + "authors": [ + { + "name": "Ben Ramsey", + "email": "ben@benramsey.com", + "homepage": "https://benramsey.com" + } + ], + "description": "A PHP library for representing and manipulating collections.", + "keywords": [ + "array", + "collection", + "hash", + "map", + "queue", + "set" + ], + "support": { + "issues": "https://github.com/ramsey/collection/issues", + "source": "https://github.com/ramsey/collection/tree/2.0.0" + }, + "funding": [ + { + "url": "https://github.com/ramsey", + "type": "github" + }, + { + "url": "https://tidelift.com/funding/github/packagist/ramsey/collection", + "type": "tidelift" + } + ], + "time": "2022-12-31T21:50:55+00:00" + }, + { + "name": "ramsey/uuid", + "version": "4.7.6", + "source": { + "type": "git", + "url": "https://github.com/ramsey/uuid.git", + "reference": "91039bc1faa45ba123c4328958e620d382ec7088" + }, + "dist": { + "type": "zip", + "url": "https://api.github.com/repos/ramsey/uuid/zipball/91039bc1faa45ba123c4328958e620d382ec7088", + "reference": "91039bc1faa45ba123c4328958e620d382ec7088", + "shasum": "" + }, + "require": { + "brick/math": "^0.8.8 || ^0.9 || ^0.10 || ^0.11 || ^0.12", + "ext-json": "*", + "php": "^8.0", + "ramsey/collection": "^1.2 || ^2.0" + }, + "replace": { + "rhumsaa/uuid": "self.version" + }, + "require-dev": { + "captainhook/captainhook": "^5.10", + "captainhook/plugin-composer": "^5.3", + "dealerdirect/phpcodesniffer-composer-installer": "^0.7.0", + "doctrine/annotations": "^1.8", + "ergebnis/composer-normalize": "^2.15", + "mockery/mockery": "^1.3", + "paragonie/random-lib": "^2", + "php-mock/php-mock": "^2.2", + "php-mock/php-mock-mockery": "^1.3", + "php-parallel-lint/php-parallel-lint": "^1.1", + "phpbench/phpbench": "^1.0", + "phpstan/extension-installer": "^1.1", + "phpstan/phpstan": "^1.8", + "phpstan/phpstan-mockery": "^1.1", + "phpstan/phpstan-phpunit": "^1.1", + "phpunit/phpunit": "^8.5 || ^9", + "ramsey/composer-repl": "^1.4", + "slevomat/coding-standard": "^8.4", + "squizlabs/php_codesniffer": "^3.5", + "vimeo/psalm": "^4.9" + }, + "suggest": { + "ext-bcmath": "Enables faster math with arbitrary-precision integers using BCMath.", + "ext-gmp": "Enables faster math with arbitrary-precision integers using GMP.", + "ext-uuid": "Enables the use of PeclUuidTimeGenerator and PeclUuidRandomGenerator.", + "paragonie/random-lib": "Provides RandomLib for use with the RandomLibAdapter", + "ramsey/uuid-doctrine": "Allows the use of Ramsey\\Uuid\\Uuid as Doctrine field type." + }, + "type": "library", + "extra": { + "captainhook": { + "force-install": true + } + }, + "autoload": { + "files": [ + "src/functions.php" + ], + "psr-4": { + "Ramsey\\Uuid\\": "src/" + } + }, + "notification-url": "https://packagist.org/downloads/", + "license": [ + "MIT" + ], + "description": "A PHP library for generating and working with universally unique identifiers (UUIDs).", + "keywords": [ + "guid", + "identifier", + "uuid" + ], + "support": { + "issues": "https://github.com/ramsey/uuid/issues", + "source": "https://github.com/ramsey/uuid/tree/4.7.6" + }, + "funding": [ + { + "url": "https://github.com/ramsey", + "type": "github" + }, + { + "url": "https://tidelift.com/funding/github/packagist/ramsey/uuid", + "type": "tidelift" + } + ], + "time": "2024-04-27T21:32:50+00:00" + }, { "name": "runtime/frankenphp-symfony", "version": "0.2.0", diff --git a/phpunit.xml.dist b/phpunit.xml.dist index 73f7b8b..6976b90 100644 --- a/phpunit.xml.dist +++ b/phpunit.xml.dist @@ -16,9 +16,10 @@ - + + src diff --git a/scrapers.json b/scrapers.json new file mode 100644 index 0000000..16f1cba --- /dev/null +++ b/scrapers.json @@ -0,0 +1,66 @@ +[ + { + "baseUrl": "https://darkscans.net/", + "imageSelector": ".reading-content img", + "nextPageSelector": null, + "chapterUrlFormat": "https://darkscans.net/mangas/{slug}/chapter-{chapterNumber}/", + "scrapingType": "html", + "chapterSelector": null + }, + { + "baseUrl": "https://lelscans.net", + "imageSelector": "#image img", + "nextPageSelector": "a[title=\"Suivant\"]", + "chapterUrlFormat": "https://lelscans.net/scan-{slug}/{chapterNumber}", + "scrapingType": "html", + "chapterSelector": null + }, + { + "baseUrl": "https://www.thebeginningaftertheend.fr/", + "imageSelector": ".reading-content img", + "nextPageSelector": null, + "chapterUrlFormat": "https://www.thebeginningaftertheend.fr/manga/{slug}-manga/chapitre-{chapterNumber}_1/", + "scrapingType": "html", + "chapterSelector": null + }, + { + "baseUrl": "https://lelscanfr.com", + "imageSelector": "#chapter-container img.chapter-image", + "nextPageSelector": null, + "chapterUrlFormat": "https://lelscanfr.com/manga/{slug}/{chapterNumber}", + "scrapingType": "html", + "chapterSelector": null + }, + { + "baseUrl": "https://read-versus.online", + "imageSelector": ".entry-content img", + "nextPageSelector": null, + "chapterUrlFormat": "https://read-versus.online/manga/{slug}-chapter-{chapterNumber}/", + "scrapingType": "html", + "chapterSelector": null + }, + { + "baseUrl": "https://anime-sama.fr", + "imageSelector": "#scansPlacement img.lazy", + "nextPageSelector": null, + "chapterUrlFormat": "https://anime-sama.fr/catalogue/{slug}/scan/vf/", + "scrapingType": "javascript", + "chapterSelector": null + }, + { + "baseUrl": "https://www.kaijuchapters.com/", + "imageSelector": ".entry-content img.article_ed__img", + "nextPageSelector": null, + "chapterUrlFormat": "https://www.kaijuchapters.com/manga/{slug}-chapter-{chapterNumber}/", + "scrapingType": "html", + "chapterSelector": null + }, + { + "baseUrl": "https://www.lelmanga.com", + "imageSelector": "#readerarea img", + "nextPageSelector": null, + "chapterUrlFormat": "https://www.lelmanga.com/{slug}-{chapterNumber}", + "scrapingType": "html", + "chapterSelector": null + } +] diff --git a/src/Domain/Scraping/Application/Command/ScrapeChapter.php b/src/Domain/Scraping/Application/Command/ScrapeChapter.php index 6c44853..cc91465 100644 --- a/src/Domain/Scraping/Application/Command/ScrapeChapter.php +++ b/src/Domain/Scraping/Application/Command/ScrapeChapter.php @@ -1,11 +1,13 @@ scraper->createScrapingJob( - $command->chapterId, - $command->sourceId - ); - - $this->scrapingJobRepository->save($job); - - $this->eventBus->dispatch(new ChapterScrapingStarted($job->getId())); - - $this->scraper->scrape($job); + private ScraperInterface $scraper, + private ScrapingJobRepositoryInterface $scrapingJobRepository, + private MessageBusInterface $eventBus + ) { } -} \ No newline at end of file + + public function handle(ScrapeChapter $command): void + { + try { + $job = $this->scraper->createScrapingJob( + $command->mangaId, + $command->chapterId, + $command->sourceId, + ); + + $this->scrapingJobRepository->save($job); + + $this->eventBus->dispatch(new ChapterScrapingStarted($job->getId())); + + $this->scraper->scrape($job); + } catch (\Exception $e) { + $this->eventBus->dispatch(new ChapterScrapingFailed($command->chapterId, $e->getMessage())); + throw $e; + } + } +} diff --git a/src/Domain/Scraping/Domain/Contract/Repository/MangaRepositoryInterface.php b/src/Domain/Scraping/Domain/Contract/Repository/MangaRepositoryInterface.php new file mode 100644 index 0000000..1ace6a8 --- /dev/null +++ b/src/Domain/Scraping/Domain/Contract/Repository/MangaRepositoryInterface.php @@ -0,0 +1,10 @@ +chapterId; + } + + public function getReason(): string + { + return $this->reason; + } +} \ No newline at end of file diff --git a/src/Domain/Scraping/Domain/Model/Manga.php b/src/Domain/Scraping/Domain/Model/Manga.php new file mode 100644 index 0000000..13f1709 --- /dev/null +++ b/src/Domain/Scraping/Domain/Model/Manga.php @@ -0,0 +1,39 @@ +id; + } + + public function getTitle(): string + { + return $this->title; + } + + public function getSlug(): string + { + return $this->slug; + } + + public function getDescription(): string + { + return $this->description; + } + + public function getAuthor(): string + { + return $this->author; + } +} \ No newline at end of file diff --git a/src/Domain/Scraping/Domain/Model/ScrapingJob.php b/src/Domain/Scraping/Domain/Model/ScrapingJob.php index 5361c7f..231c435 100644 --- a/src/Domain/Scraping/Domain/Model/ScrapingJob.php +++ b/src/Domain/Scraping/Domain/Model/ScrapingJob.php @@ -14,8 +14,8 @@ class ScrapingJob public function __construct( private readonly string $id, - private readonly string $chapterId, private readonly string $mangaId, + private readonly string $chapterId, private readonly string $sourceId ) { $this->status = ScrapingStatus::PENDING; diff --git a/src/Domain/Scraping/Domain/Model/Source.php b/src/Domain/Scraping/Domain/Model/Source.php new file mode 100644 index 0000000..e7e74f9 --- /dev/null +++ b/src/Domain/Scraping/Domain/Model/Source.php @@ -0,0 +1,59 @@ +id; + } + + public function getName(): string + { + return $this->name; + } + + public function getDescription(): string + { + return $this->description; + } + + public function getBaseUrl(): string + { + return $this->baseUrl; + } + + public function getScrappingParameters(): array + { + return $this->scrappingParameters; + } + + public function isActive(): bool + { + return $this->isActive; + } + + public function getCreatedAt(): DateTimeImmutable + { + return $this->createdAt; + } + + public function getUpdatedAt(): DateTimeImmutable + { + return $this->updatedAt; + } +} \ No newline at end of file diff --git a/src/Domain/Scraping/Domain/Model/ValueObject/ChapterId.php b/src/Domain/Scraping/Domain/Model/ValueObject/ChapterId.php new file mode 100644 index 0000000..eecb0a2 --- /dev/null +++ b/src/Domain/Scraping/Domain/Model/ValueObject/ChapterId.php @@ -0,0 +1,18 @@ +value; + } +} \ No newline at end of file diff --git a/src/Domain/Scraping/Domain/Model/ValueObject/SourceId.php b/src/Domain/Scraping/Domain/Model/ValueObject/SourceId.php new file mode 100644 index 0000000..045eefd --- /dev/null +++ b/src/Domain/Scraping/Domain/Model/ValueObject/SourceId.php @@ -0,0 +1,18 @@ +value; + } +} \ No newline at end of file diff --git a/src/Domain/Scraping/Domain/Model/ValueObject/TempDirectory.php b/src/Domain/Scraping/Domain/Model/ValueObject/TempDirectory.php new file mode 100644 index 0000000..6739eef --- /dev/null +++ b/src/Domain/Scraping/Domain/Model/ValueObject/TempDirectory.php @@ -0,0 +1,18 @@ +path; + } +} \ No newline at end of file diff --git a/src/Domain/Scraping/Infrastructure/Persistence/DoctrineMangaRepository.php b/src/Domain/Scraping/Infrastructure/Persistence/DoctrineMangaRepository.php new file mode 100644 index 0000000..de0b24a --- /dev/null +++ b/src/Domain/Scraping/Infrastructure/Persistence/DoctrineMangaRepository.php @@ -0,0 +1,22 @@ +entityManager->getRepository(MangaEntity::class)->find($id); + + return $manga ? $manga->toDomain() : null; + } +} \ No newline at end of file diff --git a/src/Domain/Scraping/Infrastructure/Persistence/DoctrineSourceRepository.php b/src/Domain/Scraping/Infrastructure/Persistence/DoctrineSourceRepository.php new file mode 100644 index 0000000..27bbc82 --- /dev/null +++ b/src/Domain/Scraping/Infrastructure/Persistence/DoctrineSourceRepository.php @@ -0,0 +1,26 @@ +entityManager->getRepository(SourceEntityEntity::class)->find($id); + + if (!$sourceEntity) { + return null; + } + + return $sourceEntity->toDomain(); + } +} \ No newline at end of file diff --git a/src/Domain/Scraping/Infrastructure/Persistence/Entity/MangaEntity.php b/src/Domain/Scraping/Infrastructure/Persistence/Entity/MangaEntity.php new file mode 100644 index 0000000..d9b0eb4 --- /dev/null +++ b/src/Domain/Scraping/Infrastructure/Persistence/Entity/MangaEntity.php @@ -0,0 +1,75 @@ +id = $manga->getId(); + $entity->title = $manga->getTitle(); + $entity->slug = $manga->getSlug(); + $entity->description = $manga->getDescription(); + $entity->author = $manga->getAuthor(); + + + return $entity; + } + + public function toDomain(): Manga + { + $manga = new Manga( + $this->id, + $this->title, + $this->slug, + $this->description, + $this->author + ); + + return $manga; + } +} diff --git a/src/Domain/Scraping/Infrastructure/Persistence/Entity/ScrapingJobEntity.php b/src/Domain/Scraping/Infrastructure/Persistence/Entity/ScrapingJobEntity.php index 6ed1e07..eeb02e4 100644 --- a/src/Domain/Scraping/Infrastructure/Persistence/Entity/ScrapingJobEntity.php +++ b/src/Domain/Scraping/Infrastructure/Persistence/Entity/ScrapingJobEntity.php @@ -3,7 +3,6 @@ namespace App\Domain\Scraping\Infrastructure\Persistence\Entity; use App\Domain\Scraping\Domain\Model\ScrapingJob; -use App\Domain\Scraping\Domain\Model\ScrapingStatus; use Doctrine\ORM\Mapping as ORM; #[ORM\Entity] @@ -59,25 +58,6 @@ class ScrapingJobEntity $this->sourceId ); - // Reconstruire l'état du job à partir des données persistées - $reflection = new \ReflectionClass(ScrapingJob::class); - - $pagesProperty = $reflection->getProperty('pages'); - $pagesProperty->setAccessible(true); - $pagesProperty->setValue($job, $this->pages); - - $statusProperty = $reflection->getProperty('status'); - $statusProperty->setAccessible(true); - $statusProperty->setValue($job, ScrapingStatus::from($this->status)); - - $createdAtProperty = $reflection->getProperty('createdAt'); - $createdAtProperty->setAccessible(true); - $createdAtProperty->setValue($job, $this->createdAt); - - $completedAtProperty = $reflection->getProperty('completedAt'); - $completedAtProperty->setAccessible(true); - $completedAtProperty->setValue($job, $this->completedAt); - return $job; } } \ No newline at end of file diff --git a/src/Domain/Scraping/Infrastructure/Persistence/Entity/SourceEntity.php b/src/Domain/Scraping/Infrastructure/Persistence/Entity/SourceEntity.php new file mode 100644 index 0000000..197e55b --- /dev/null +++ b/src/Domain/Scraping/Infrastructure/Persistence/Entity/SourceEntity.php @@ -0,0 +1,65 @@ +id = $source->getId(); + $entity->name = $source->getName(); + $entity->description = $source->getDescription(); + $entity->baseUrl = $source->getBaseUrl(); + $entity->scrappingParameters = $source->getScrappingParameters(); + $entity->isActive = $source->isActive(); + $entity->createdAt = $source->getCreatedAt(); + $entity->updatedAt = $source->getUpdatedAt(); + + return $entity; + } + + public function toDomain(): Source + { + return new Source( + $this->id, + $this->name ?? '', + $this->description ?? '', + $this->baseUrl, + $this->scrappingParameters, + $this->isActive, + $this->createdAt, + $this->updatedAt + ); + } +} \ No newline at end of file diff --git a/src/Domain/Scraping/Infrastructure/Service/ImageDownloader.php b/src/Domain/Scraping/Infrastructure/Service/ImageDownloader.php new file mode 100644 index 0000000..96804e9 --- /dev/null +++ b/src/Domain/Scraping/Infrastructure/Service/ImageDownloader.php @@ -0,0 +1,23 @@ +httpClient->request('GET', $url); + + if (!str_starts_with($response->getHeaders()['content-type'][0], 'image/')) { + throw new \RuntimeException('Invalid content type'); + } + + file_put_contents($destination, $response->getContent()); + } +} \ No newline at end of file diff --git a/src/Domain/Scraping/Infrastructure/Service/Scraper/AbstractScraper.php b/src/Domain/Scraping/Infrastructure/Service/Scraper/AbstractScraper.php index 75cef16..2c45d01 100644 --- a/src/Domain/Scraping/Infrastructure/Service/Scraper/AbstractScraper.php +++ b/src/Domain/Scraping/Infrastructure/Service/Scraper/AbstractScraper.php @@ -3,67 +3,37 @@ namespace App\Domain\Scraping\Infrastructure\Service\Scraper; use App\Domain\Scraping\Domain\Contract\ScraperInterface; -use App\Domain\Scraping\Domain\Model\ScrapingJob; -use App\Domain\Scraping\Domain\Event\PageScrapingProgressed; use App\Domain\Scraping\Domain\Event\ChapterScrapingCompleted; use App\Domain\Scraping\Domain\Event\ChapterScrapingStarted; +use App\Domain\Scraping\Domain\Event\PageScrapingProgressed; +use App\Domain\Scraping\Domain\Model\ScrapingJob; use App\Domain\Scraping\Domain\Model\ScrapingProgress; -use Symfony\Component\EventDispatcher\EventDispatcherInterface; -use Symfony\Contracts\HttpClient\HttpClientInterface; +use App\Domain\Scraping\Domain\Model\Source; +use App\Domain\Scraping\Domain\Model\ValueObject\TempDirectory; +use App\Domain\Scraping\Infrastructure\Service\ImageDownloader; +use Symfony\Component\Messenger\MessageBusInterface; +use Ramsey\Uuid\Uuid; abstract class AbstractScraper implements ScraperInterface { public function __construct( - protected readonly HttpClientInterface $httpClient, - protected readonly EventDispatcherInterface $eventDispatcher, - protected readonly string $tempDir + protected readonly ImageDownloader $imageDownloader, + protected readonly MessageBusInterface $eventBus ) {} - public function createScrapingJob(string $chapterId, string $sourceId): ScrapingJob + public function createScrapingJob(string $mangaId, string $chapterId, string $sourceId): ScrapingJob { return new ScrapingJob( - uniqid('scraping_'), + Uuid::uuid4()->toString(), + $mangaId, $chapterId, - $sourceId + $sourceId, ); } - public function scrape(ScrapingJob $job): void - { - try { - $this->eventDispatcher->dispatch(new ChapterScrapingStarted($job->getId())); - - $tempDir = $this->createTempDirectory($job); - $pageData = $this->scrapePages($job); - - foreach ($pageData as $page) { - $this->downloadPage($job, $page, $tempDir); - } - - $job->complete(); - - $this->eventDispatcher->dispatch( - new ChapterScrapingCompleted($job->getId(), $job->getPages()) - ); - - $this->cleanupTempDirectory($tempDir); - - } catch (\Exception $e) { - $job->fail(); - throw $e; - } - } - - abstract protected function scrapePages(ScrapingJob $job): array; - - protected function createTempDirectory(ScrapingJob $job): string - { - $tempDir = $this->tempDir . '/' . uniqid('scraping_' . $job->getId() . '_'); - if (!mkdir($tempDir) && !is_dir($tempDir)) { - throw new \RuntimeException("Failed to create temporary directory: $tempDir"); - } - return $tempDir; - } + abstract public function scrape(ScrapingJob $job): void; + + abstract protected function scrapePages(ScrapingJob $job, Source $source): array; protected function cleanupTempDirectory(string $tempDir): void { @@ -84,11 +54,32 @@ abstract class AbstractScraper implements ScraperInterface } } - protected function dispatchProgressEvent(ScrapingJob $job, int $current, int $total): void + protected function dispatchProgressEvent(ScrapingJob $job, int $currentPage, int $totalPages): void { - $progress = new ScrapingProgress($current, $total); - $this->eventDispatcher->dispatch( - new PageScrapingProgressed($job->getId(), $progress) - ); + $progress = new ScrapingProgress($currentPage, $totalPages); + $this->eventBus->dispatch(new PageScrapingProgressed($job->getId(), $progress)); } + + protected function downloadImage(string $imageUrl, string $destination): void + { + $this->imageDownloader->download($imageUrl, $destination); + } + + protected function createTempDirectory(): TempDirectory + { + return new TempDirectory(sys_get_temp_dir() . '/' . uniqid('manga_scraper_')); + } + + protected function cleanupTempFiles(TempDirectory $tempDirectory): void + { + $files = glob($tempDirectory->getPath() . '/*'); + foreach ($files as $file) { + if (is_file($file)) { + unlink($file); + } + } + rmdir($tempDirectory->getPath()); + } + + abstract public function supports(string $sourceType): bool; } \ No newline at end of file diff --git a/src/Domain/Scraping/Infrastructure/Service/Scraper/HtmlScraper.php b/src/Domain/Scraping/Infrastructure/Service/Scraper/HtmlScraper.php index 385563c..5e7c4a6 100644 --- a/src/Domain/Scraping/Infrastructure/Service/Scraper/HtmlScraper.php +++ b/src/Domain/Scraping/Infrastructure/Service/Scraper/HtmlScraper.php @@ -3,61 +3,131 @@ namespace App\Domain\Scraping\Infrastructure\Service\Scraper; use App\Domain\Scraping\Domain\Model\ScrapingJob; +use App\Domain\Scraping\Domain\Model\Source; use App\Domain\Scraping\Domain\Model\ValueObject\ImageUrl; use App\Domain\Scraping\Domain\Model\ValueObject\PageNumber; +use App\Domain\Scraping\Domain\Repository\SourceRepositoryInterface; use Symfony\Component\DomCrawler\Crawler; +use Symfony\Contracts\HttpClient\HttpClientInterface; +use Symfony\Component\Messenger\MessageBusInterface; +use App\Domain\Scraping\Infrastructure\Service\ImageDownloader; class HtmlScraper extends AbstractScraper { - protected function scrapePages(ScrapingJob $job): array + public function __construct( + ImageDownloader $imageDownloader, + MessageBusInterface $eventBus, + private readonly HttpClientInterface $httpClient, + private readonly SourceRepositoryInterface $sourceRepository + ) { + parent::__construct($imageDownloader, $eventBus); + } + + public function scrape(ScrapingJob $job): void { - $url = $this->buildUrl($job); - $response = $this->httpClient->request('GET', $url); + $sourceConfig = $this->sourceRepository->getById($job->getSourceId()); + $tempDir = $this->createTempDirectory(); + + try { + $pages = $this->scrapePages($job, $sourceConfig); + + foreach ($pages as $index => $imageUrl) { + $pageNumber = new PageNumber($index + 1); + $extension = pathinfo(parse_url($imageUrl, PHP_URL_PATH), PATHINFO_EXTENSION); + $destination = sprintf( + '%s/%s.%s', + $tempDir->getPath(), + $pageNumber->getFormattedNumber(), + $extension + ); + + $this->downloadImage($imageUrl, $destination); + $job->addPage($pageNumber, new ImageUrl($imageUrl)); + + $this->dispatchProgressEvent($job, $index + 1, count($pages)); + } + + $job->complete(); + } catch (\Exception $e) { + $job->fail(); + throw $e; + } finally { + $this->cleanupTempFiles($tempDir); + } + } + + protected function scrapePages(ScrapingJob $job, Source $sourceConfig): array + { + if (!$sourceConfig['next_page_selector']) { + return $this->scrapeVerticalReader($job, $sourceConfig); + } - $crawler = new Crawler($response->getContent()); - $images = $crawler->filter('img.manga-page'); // Adapter selon le site + return $this->scrapeHorizontalReader($job, $sourceConfig); + } + + private function scrapeVerticalReader(ScrapingJob $job, Source $sourceConfig): array + { + $html = $this->fetchHtml($this->buildChapterUrl($job, $sourceConfig)); + $crawler = new Crawler($html); + return $crawler->filter($sourceConfig['image_selector']) + ->each(function ($node) { + return $this->cleanImageUrl( + $node->attr('src') ?: $node->attr('data-src') + ); + }); + } + + private function scrapeHorizontalReader(ScrapingJob $job, Source $sourceConfig): array + { $pages = []; - $images->each(function (Crawler $image) use (&$pages) { - $pages[] = [ - 'url' => $image->attr('src'), - 'number' => count($pages) + 1 - ]; - }); - + $currentUrl = $this->buildChapterUrl($job, $sourceConfig); + + while ($currentUrl) { + $html = $this->fetchHtml($currentUrl); + $crawler = new Crawler($html); + + $imageUrl = $crawler->filter($sourceConfig['image_selector']) + ->attr('src') ?: $crawler->filter($sourceConfig['image_selector']) + ->attr('data-src'); + + $pages[] = $this->cleanImageUrl($imageUrl); + + $nextLink = $crawler->filter($sourceConfig['next_page_selector']); + $currentUrl = $nextLink->count() > 0 ? $nextLink->attr('href') : null; + } + return $pages; } - protected function downloadPage(ScrapingJob $job, array $page, string $tempDir): void + private function fetchHtml(string $url): string { - $imageUrl = new ImageUrl($page['url']); - $pageNumber = new PageNumber($page['number']); + $response = $this->httpClient->request('GET', $url); - $fileName = sprintf('%s/%03d.%s', - $tempDir, - $pageNumber->getValue(), - $imageUrl->getExtension() + if ($response->getStatusCode() >= 400) { + throw new \RuntimeException('Failed to fetch page: ' . $url); + } + + return $response->getContent(); + } + + private function cleanImageUrl(string $url): string + { + // Logique de nettoyage d'URL d'image + return $url; + } + + + private function buildChapterUrl(ScrapingJob $job, Source $sourceConfig): string + { + return sprintf( + $sourceConfig->getBaseUrl(), + $job->getChapterId() ); - - $response = $this->httpClient->request('GET', $imageUrl->getValue()); - file_put_contents($fileName, $response->getContent()); - - $job->addPage($pageNumber, $imageUrl); - $this->dispatchProgressEvent($job, $page['number'], count($pages)); } public function supports(string $sourceType): bool { - return $sourceType === 'html'; - } - - private function buildUrl(ScrapingJob $job): string - { - // À implémenter selon votre logique de construction d'URL - // Vous aurez probablement besoin d'injecter un service pour récupérer les informations du chapitre - return sprintf('https://example.com/manga/%s/chapter/%s', - $job->getMangaId(), - $job->getChapterId() - ); + return 'html' === $sourceType; } } \ No newline at end of file diff --git a/src/Domain/Scraping/Infrastructure/Service/Scraper/JavascriptScraper.php b/src/Domain/Scraping/Infrastructure/Service/Scraper/JavascriptScraper.php deleted file mode 100644 index 69dedc7..0000000 --- a/src/Domain/Scraping/Infrastructure/Service/Scraper/JavascriptScraper.php +++ /dev/null @@ -1,38 +0,0 @@ -buildUrl($job); - $crawler = $client->request('GET', $url); - - // Attendre que les images soient chargées - $crawler->waitFor('img.manga-page'); - - $pages = []; - $crawler->filter('img.manga-page')->each(function ($image) use (&$pages) { - $pages[] = [ - 'url' => $image->attr('src'), - 'number' => count($pages) + 1 - ]; - }); - - return $pages; - } finally { - $client->quit(); - } - } - - public function supports(string $sourceType): bool - { - return $sourceType === 'javascript'; - } -} \ No newline at end of file diff --git a/tests/Domain/Scraping/Adapter/InMemoryEventBus.php b/tests/Domain/Scraping/Adapter/InMemoryEventBus.php new file mode 100644 index 0000000..844e91f --- /dev/null +++ b/tests/Domain/Scraping/Adapter/InMemoryEventBus.php @@ -0,0 +1,23 @@ +dispatchedMessages[] = $message; + + return new Envelope($message); + } + + public function getDispatchedMessages(): array + { + return $this->dispatchedMessages; + } +} diff --git a/tests/Domain/Scraping/Adapter/InMemoryScraperAdapter.php b/tests/Domain/Scraping/Adapter/InMemoryScraperAdapter.php new file mode 100644 index 0000000..13a5e4f --- /dev/null +++ b/tests/Domain/Scraping/Adapter/InMemoryScraperAdapter.php @@ -0,0 +1,47 @@ +shouldThrowException) { + throw $this->shouldThrowException; + } + + $job = new ScrapingJob(Uuid::uuid4(), $mangaId, $chapterId, $sourceId); + $this->jobs[] = $job; + + return $job; + } + + public function scrape(ScrapingJob $job): void + { + if ($this->shouldThrowException) { + throw $this->shouldThrowException; + } + } + + public function simulateError(\Exception $exception): void + { + $this->shouldThrowException = $exception; + } + + public function getJobs(): array + { + return $this->jobs; + } + + public function supports(string $sourceType): bool + { + return true; + } +} diff --git a/tests/Domain/Scraping/Adapter/InMemoryScrapingJobRepository.php b/tests/Domain/Scraping/Adapter/InMemoryScrapingJobRepository.php new file mode 100644 index 0000000..bbb5219 --- /dev/null +++ b/tests/Domain/Scraping/Adapter/InMemoryScrapingJobRepository.php @@ -0,0 +1,44 @@ +jobs[] = $job; + } + + public function getJobs(): array + { + return $this->jobs; + } + + public function findById(string $id): ?ScrapingJob + { + foreach ($this->jobs as $job) { + if ($job->getId() === $id) { + return $job; + } + } + + return null; + } + + public function findByChapterId(string $chapterId): ?ScrapingJob + { + foreach ($this->jobs as $job) { + if ($job->getChapterId() === $chapterId) { + return $job; + } + } + + return null; + } +} diff --git a/tests/Domain/Scraping/Application/CommandHandler/ScrapeChapterHandlerTest.php b/tests/Domain/Scraping/Application/CommandHandler/ScrapeChapterHandlerTest.php new file mode 100644 index 0000000..11088ad --- /dev/null +++ b/tests/Domain/Scraping/Application/CommandHandler/ScrapeChapterHandlerTest.php @@ -0,0 +1,85 @@ +scraper = new InMemoryScraperAdapter(); + $this->repository = new InMemoryScrapingJobRepository(); + $this->eventBus = new InMemoryEventBus(); + $this->handler = new ScrapeChapterHandler( + $this->scraper, + $this->repository, + $this->eventBus + ); + } + + public function testHandleSuccessfully(): void + { + $command = new ScrapeChapter( + chapterId: 2, + sourceId: 3, + mangaId: 1 + ); + + $this->handler->handle($command); + + // Vérifier que le job a été créé + $scrapingJobs = $this->scraper->getJobs(); + $this->assertCount(1, $scrapingJobs); + $job = $scrapingJobs[0]; + + // Vérifier que le job a été sauvegardé + $savedJobs = $this->repository->getJobs(); + $this->assertCount(1, $savedJobs); + $this->assertSame($job, $savedJobs[0]); + + // Vérifier que l'événement a été dispatché + $dispatchedMessages = $this->eventBus->getDispatchedMessages(); + $this->assertCount(1, $dispatchedMessages); + $this->assertInstanceOf(ChapterScrapingStarted::class, $dispatchedMessages[0]); + $this->assertEquals($job->getId(), $dispatchedMessages[0]->getJobId()); + } + + public function testHandleThrowsException(): void + { + $command = new ScrapeChapter( + chapterId: 2, + sourceId: 3, + mangaId: 1 + ); + + $exception = new \Exception('Scraping failed'); + $this->scraper->simulateError($exception); + + $this->expectException(\Exception::class); + $this->expectExceptionMessage('Scraping failed'); + + try { + $this->handler->handle($command); + } finally { + // Vérifier que l'événement d'échec a été dispatché + $dispatchedMessages = $this->eventBus->getDispatchedMessages(); + $this->assertCount(1, $dispatchedMessages); + $this->assertInstanceOf(ChapterScrapingFailed::class, $dispatchedMessages[0]); + $this->assertEquals(2, $dispatchedMessages[0]->getChapterId()); + $this->assertEquals('Scraping failed', $dispatchedMessages[0]->getReason()); + } + } +} diff --git a/tests/Functional/UserResourceTest.php b/tests/Functional/UserResourceTest.php deleted file mode 100644 index 984766f..0000000 --- a/tests/Functional/UserResourceTest.php +++ /dev/null @@ -1,112 +0,0 @@ - $company]); - - $this->browser() - ->post('/login', [ - 'json' => [ - 'email' => $user->getEmail(), - 'password' => 'password' - ] - ]) - ->assertStatus(204) - ->assertHeaderContains('Location', '/api/users/' . $user->getId()); - } - - public function testUserLogoutHttp() - { - $user = UserFactory::createOne(); - $this->browser() - ->actingAs($user) - ->get('/logout') - ->assertStatus(204) - ; - } - - public function testUserLoginToken(): void - { - $token = ApiTokenFactory::createOne(); - - $this->browser() - ->get('api/users', [ - 'headers' => [ - 'Authorization' => 'Bearer ' . $token->getToken() - ] - ]) - ->assertStatus(200); - } - - public function testCanGetUser(): void - { - $user = UserFactory::createOne(); - - $this->browser() - ->actingAs($user) - ->get('/api/users/' . $user->getId()) - ->assertSuccessful() - ->assertJson() - ->assertJsonMatches('email', $user->getEmail()) - ->assertJsonMatches('firstName', $user->getFirstName()) - ->assertJsonMatches('lastName', $user->getLastName()) - ; - } - - public function testCanPostToCreateUser(): void - { - $loggedUser = UserFactory::createOne(); - - $this->browser() - ->actingAs($loggedUser) - ->post('/api/users', [ - 'json' => [ - 'email' => 'john.doe@mail.com', - 'firstName' => 'John', - 'lastName' => 'Doe', - 'password' => 'password', - ], - ]) - ->assertSuccessful() - ->post('/login', [ - 'json' => [ - 'email' => 'john.doe@mail.com', - 'password' => 'password', - ], - ]) - ->assertSuccessful(); - } - - public function testCanPatchToUpdateUser(): void - { - $loggedUser = UserFactory::createOne(); - - $this->browser() - ->actingAs($loggedUser) - ->patch('/api/users/' . $loggedUser->getId(), [ - 'json' => [ - 'firstName' => 'John', - 'lastName' => 'Doe', - ], - 'headers' => [ - 'Content-Type' => 'application/merge-patch+json' - ] - ]) - ->assertSuccessful() - ->get('/api/users/' . $loggedUser->getId()) - ->assertSuccessful() - ->assertJson() - ->assertJsonMatches('firstName', 'John') - ->assertJsonMatches('lastName', 'Doe'); - ; - } -}