diff --git a/composer.json b/composer.json
index 416f248..ad4371e 100644
--- a/composer.json
+++ b/composer.json
@@ -22,6 +22,7 @@
"nelmio/cors-bundle": "^2.4",
"phpdocumentor/reflection-docblock": "^5.3",
"phpstan/phpdoc-parser": "^1.25",
+ "ramsey/uuid": "^4.7",
"runtime/frankenphp-symfony": "^0.2.0",
"symfony/asset": "7.0.*",
"symfony/console": "7.0.*",
diff --git a/composer.lock b/composer.lock
index 17101f5..10c137e 100644
--- a/composer.lock
+++ b/composer.lock
@@ -4,7 +4,7 @@
"Read more about it at https://getcomposer.org/doc/01-basic-usage.md#installing-dependencies",
"This file is @generated automatically"
],
- "content-hash": "6258706876617c8b0c08f13c5a158fe7",
+ "content-hash": "49014ec06c069804432e6a13701e46a4",
"packages": [
{
"name": "api-platform/core",
@@ -172,6 +172,66 @@
},
"time": "2024-02-01T14:41:52+00:00"
},
+ {
+ "name": "brick/math",
+ "version": "0.12.1",
+ "source": {
+ "type": "git",
+ "url": "https://github.com/brick/math.git",
+ "reference": "f510c0a40911935b77b86859eb5223d58d660df1"
+ },
+ "dist": {
+ "type": "zip",
+ "url": "https://api.github.com/repos/brick/math/zipball/f510c0a40911935b77b86859eb5223d58d660df1",
+ "reference": "f510c0a40911935b77b86859eb5223d58d660df1",
+ "shasum": ""
+ },
+ "require": {
+ "php": "^8.1"
+ },
+ "require-dev": {
+ "php-coveralls/php-coveralls": "^2.2",
+ "phpunit/phpunit": "^10.1",
+ "vimeo/psalm": "5.16.0"
+ },
+ "type": "library",
+ "autoload": {
+ "psr-4": {
+ "Brick\\Math\\": "src/"
+ }
+ },
+ "notification-url": "https://packagist.org/downloads/",
+ "license": [
+ "MIT"
+ ],
+ "description": "Arbitrary-precision arithmetic library",
+ "keywords": [
+ "Arbitrary-precision",
+ "BigInteger",
+ "BigRational",
+ "arithmetic",
+ "bigdecimal",
+ "bignum",
+ "bignumber",
+ "brick",
+ "decimal",
+ "integer",
+ "math",
+ "mathematics",
+ "rational"
+ ],
+ "support": {
+ "issues": "https://github.com/brick/math/issues",
+ "source": "https://github.com/brick/math/tree/0.12.1"
+ },
+ "funding": [
+ {
+ "url": "https://github.com/BenMorel",
+ "type": "github"
+ }
+ ],
+ "time": "2023-11-29T23:19:16+00:00"
+ },
{
"name": "doctrine/cache",
"version": "2.2.0",
@@ -3050,6 +3110,187 @@
},
"time": "2019-03-08T08:55:37+00:00"
},
+ {
+ "name": "ramsey/collection",
+ "version": "2.0.0",
+ "source": {
+ "type": "git",
+ "url": "https://github.com/ramsey/collection.git",
+ "reference": "a4b48764bfbb8f3a6a4d1aeb1a35bb5e9ecac4a5"
+ },
+ "dist": {
+ "type": "zip",
+ "url": "https://api.github.com/repos/ramsey/collection/zipball/a4b48764bfbb8f3a6a4d1aeb1a35bb5e9ecac4a5",
+ "reference": "a4b48764bfbb8f3a6a4d1aeb1a35bb5e9ecac4a5",
+ "shasum": ""
+ },
+ "require": {
+ "php": "^8.1"
+ },
+ "require-dev": {
+ "captainhook/plugin-composer": "^5.3",
+ "ergebnis/composer-normalize": "^2.28.3",
+ "fakerphp/faker": "^1.21",
+ "hamcrest/hamcrest-php": "^2.0",
+ "jangregor/phpstan-prophecy": "^1.0",
+ "mockery/mockery": "^1.5",
+ "php-parallel-lint/php-console-highlighter": "^1.0",
+ "php-parallel-lint/php-parallel-lint": "^1.3",
+ "phpcsstandards/phpcsutils": "^1.0.0-rc1",
+ "phpspec/prophecy-phpunit": "^2.0",
+ "phpstan/extension-installer": "^1.2",
+ "phpstan/phpstan": "^1.9",
+ "phpstan/phpstan-mockery": "^1.1",
+ "phpstan/phpstan-phpunit": "^1.3",
+ "phpunit/phpunit": "^9.5",
+ "psalm/plugin-mockery": "^1.1",
+ "psalm/plugin-phpunit": "^0.18.4",
+ "ramsey/coding-standard": "^2.0.3",
+ "ramsey/conventional-commits": "^1.3",
+ "vimeo/psalm": "^5.4"
+ },
+ "type": "library",
+ "extra": {
+ "captainhook": {
+ "force-install": true
+ },
+ "ramsey/conventional-commits": {
+ "configFile": "conventional-commits.json"
+ }
+ },
+ "autoload": {
+ "psr-4": {
+ "Ramsey\\Collection\\": "src/"
+ }
+ },
+ "notification-url": "https://packagist.org/downloads/",
+ "license": [
+ "MIT"
+ ],
+ "authors": [
+ {
+ "name": "Ben Ramsey",
+ "email": "ben@benramsey.com",
+ "homepage": "https://benramsey.com"
+ }
+ ],
+ "description": "A PHP library for representing and manipulating collections.",
+ "keywords": [
+ "array",
+ "collection",
+ "hash",
+ "map",
+ "queue",
+ "set"
+ ],
+ "support": {
+ "issues": "https://github.com/ramsey/collection/issues",
+ "source": "https://github.com/ramsey/collection/tree/2.0.0"
+ },
+ "funding": [
+ {
+ "url": "https://github.com/ramsey",
+ "type": "github"
+ },
+ {
+ "url": "https://tidelift.com/funding/github/packagist/ramsey/collection",
+ "type": "tidelift"
+ }
+ ],
+ "time": "2022-12-31T21:50:55+00:00"
+ },
+ {
+ "name": "ramsey/uuid",
+ "version": "4.7.6",
+ "source": {
+ "type": "git",
+ "url": "https://github.com/ramsey/uuid.git",
+ "reference": "91039bc1faa45ba123c4328958e620d382ec7088"
+ },
+ "dist": {
+ "type": "zip",
+ "url": "https://api.github.com/repos/ramsey/uuid/zipball/91039bc1faa45ba123c4328958e620d382ec7088",
+ "reference": "91039bc1faa45ba123c4328958e620d382ec7088",
+ "shasum": ""
+ },
+ "require": {
+ "brick/math": "^0.8.8 || ^0.9 || ^0.10 || ^0.11 || ^0.12",
+ "ext-json": "*",
+ "php": "^8.0",
+ "ramsey/collection": "^1.2 || ^2.0"
+ },
+ "replace": {
+ "rhumsaa/uuid": "self.version"
+ },
+ "require-dev": {
+ "captainhook/captainhook": "^5.10",
+ "captainhook/plugin-composer": "^5.3",
+ "dealerdirect/phpcodesniffer-composer-installer": "^0.7.0",
+ "doctrine/annotations": "^1.8",
+ "ergebnis/composer-normalize": "^2.15",
+ "mockery/mockery": "^1.3",
+ "paragonie/random-lib": "^2",
+ "php-mock/php-mock": "^2.2",
+ "php-mock/php-mock-mockery": "^1.3",
+ "php-parallel-lint/php-parallel-lint": "^1.1",
+ "phpbench/phpbench": "^1.0",
+ "phpstan/extension-installer": "^1.1",
+ "phpstan/phpstan": "^1.8",
+ "phpstan/phpstan-mockery": "^1.1",
+ "phpstan/phpstan-phpunit": "^1.1",
+ "phpunit/phpunit": "^8.5 || ^9",
+ "ramsey/composer-repl": "^1.4",
+ "slevomat/coding-standard": "^8.4",
+ "squizlabs/php_codesniffer": "^3.5",
+ "vimeo/psalm": "^4.9"
+ },
+ "suggest": {
+ "ext-bcmath": "Enables faster math with arbitrary-precision integers using BCMath.",
+ "ext-gmp": "Enables faster math with arbitrary-precision integers using GMP.",
+ "ext-uuid": "Enables the use of PeclUuidTimeGenerator and PeclUuidRandomGenerator.",
+ "paragonie/random-lib": "Provides RandomLib for use with the RandomLibAdapter",
+ "ramsey/uuid-doctrine": "Allows the use of Ramsey\\Uuid\\Uuid as Doctrine field type."
+ },
+ "type": "library",
+ "extra": {
+ "captainhook": {
+ "force-install": true
+ }
+ },
+ "autoload": {
+ "files": [
+ "src/functions.php"
+ ],
+ "psr-4": {
+ "Ramsey\\Uuid\\": "src/"
+ }
+ },
+ "notification-url": "https://packagist.org/downloads/",
+ "license": [
+ "MIT"
+ ],
+ "description": "A PHP library for generating and working with universally unique identifiers (UUIDs).",
+ "keywords": [
+ "guid",
+ "identifier",
+ "uuid"
+ ],
+ "support": {
+ "issues": "https://github.com/ramsey/uuid/issues",
+ "source": "https://github.com/ramsey/uuid/tree/4.7.6"
+ },
+ "funding": [
+ {
+ "url": "https://github.com/ramsey",
+ "type": "github"
+ },
+ {
+ "url": "https://tidelift.com/funding/github/packagist/ramsey/uuid",
+ "type": "tidelift"
+ }
+ ],
+ "time": "2024-04-27T21:32:50+00:00"
+ },
{
"name": "runtime/frankenphp-symfony",
"version": "0.2.0",
diff --git a/phpunit.xml.dist b/phpunit.xml.dist
index 73f7b8b..6976b90 100644
--- a/phpunit.xml.dist
+++ b/phpunit.xml.dist
@@ -16,9 +16,10 @@
-
+
+
src
diff --git a/scrapers.json b/scrapers.json
new file mode 100644
index 0000000..16f1cba
--- /dev/null
+++ b/scrapers.json
@@ -0,0 +1,66 @@
+[
+ {
+ "baseUrl": "https://darkscans.net/",
+ "imageSelector": ".reading-content img",
+ "nextPageSelector": null,
+ "chapterUrlFormat": "https://darkscans.net/mangas/{slug}/chapter-{chapterNumber}/",
+ "scrapingType": "html",
+ "chapterSelector": null
+ },
+ {
+ "baseUrl": "https://lelscans.net",
+ "imageSelector": "#image img",
+ "nextPageSelector": "a[title=\"Suivant\"]",
+ "chapterUrlFormat": "https://lelscans.net/scan-{slug}/{chapterNumber}",
+ "scrapingType": "html",
+ "chapterSelector": null
+ },
+ {
+ "baseUrl": "https://www.thebeginningaftertheend.fr/",
+ "imageSelector": ".reading-content img",
+ "nextPageSelector": null,
+ "chapterUrlFormat": "https://www.thebeginningaftertheend.fr/manga/{slug}-manga/chapitre-{chapterNumber}_1/",
+ "scrapingType": "html",
+ "chapterSelector": null
+ },
+ {
+ "baseUrl": "https://lelscanfr.com",
+ "imageSelector": "#chapter-container img.chapter-image",
+ "nextPageSelector": null,
+ "chapterUrlFormat": "https://lelscanfr.com/manga/{slug}/{chapterNumber}",
+ "scrapingType": "html",
+ "chapterSelector": null
+ },
+ {
+ "baseUrl": "https://read-versus.online",
+ "imageSelector": ".entry-content img",
+ "nextPageSelector": null,
+ "chapterUrlFormat": "https://read-versus.online/manga/{slug}-chapter-{chapterNumber}/",
+ "scrapingType": "html",
+ "chapterSelector": null
+ },
+ {
+ "baseUrl": "https://anime-sama.fr",
+ "imageSelector": "#scansPlacement img.lazy",
+ "nextPageSelector": null,
+ "chapterUrlFormat": "https://anime-sama.fr/catalogue/{slug}/scan/vf/",
+ "scrapingType": "javascript",
+ "chapterSelector": null
+ },
+ {
+ "baseUrl": "https://www.kaijuchapters.com/",
+ "imageSelector": ".entry-content img.article_ed__img",
+ "nextPageSelector": null,
+ "chapterUrlFormat": "https://www.kaijuchapters.com/manga/{slug}-chapter-{chapterNumber}/",
+ "scrapingType": "html",
+ "chapterSelector": null
+ },
+ {
+ "baseUrl": "https://www.lelmanga.com",
+ "imageSelector": "#readerarea img",
+ "nextPageSelector": null,
+ "chapterUrlFormat": "https://www.lelmanga.com/{slug}-{chapterNumber}",
+ "scrapingType": "html",
+ "chapterSelector": null
+ }
+]
diff --git a/src/Domain/Scraping/Application/Command/ScrapeChapter.php b/src/Domain/Scraping/Application/Command/ScrapeChapter.php
index 6c44853..cc91465 100644
--- a/src/Domain/Scraping/Application/Command/ScrapeChapter.php
+++ b/src/Domain/Scraping/Application/Command/ScrapeChapter.php
@@ -1,11 +1,13 @@
scraper->createScrapingJob(
- $command->chapterId,
- $command->sourceId
- );
-
- $this->scrapingJobRepository->save($job);
-
- $this->eventBus->dispatch(new ChapterScrapingStarted($job->getId()));
-
- $this->scraper->scrape($job);
+ private ScraperInterface $scraper,
+ private ScrapingJobRepositoryInterface $scrapingJobRepository,
+ private MessageBusInterface $eventBus
+ ) {
}
-}
\ No newline at end of file
+
+ public function handle(ScrapeChapter $command): void
+ {
+ try {
+ $job = $this->scraper->createScrapingJob(
+ $command->mangaId,
+ $command->chapterId,
+ $command->sourceId,
+ );
+
+ $this->scrapingJobRepository->save($job);
+
+ $this->eventBus->dispatch(new ChapterScrapingStarted($job->getId()));
+
+ $this->scraper->scrape($job);
+ } catch (\Exception $e) {
+ $this->eventBus->dispatch(new ChapterScrapingFailed($command->chapterId, $e->getMessage()));
+ throw $e;
+ }
+ }
+}
diff --git a/src/Domain/Scraping/Domain/Contract/Repository/MangaRepositoryInterface.php b/src/Domain/Scraping/Domain/Contract/Repository/MangaRepositoryInterface.php
new file mode 100644
index 0000000..1ace6a8
--- /dev/null
+++ b/src/Domain/Scraping/Domain/Contract/Repository/MangaRepositoryInterface.php
@@ -0,0 +1,10 @@
+chapterId;
+ }
+
+ public function getReason(): string
+ {
+ return $this->reason;
+ }
+}
\ No newline at end of file
diff --git a/src/Domain/Scraping/Domain/Model/Manga.php b/src/Domain/Scraping/Domain/Model/Manga.php
new file mode 100644
index 0000000..13f1709
--- /dev/null
+++ b/src/Domain/Scraping/Domain/Model/Manga.php
@@ -0,0 +1,39 @@
+id;
+ }
+
+ public function getTitle(): string
+ {
+ return $this->title;
+ }
+
+ public function getSlug(): string
+ {
+ return $this->slug;
+ }
+
+ public function getDescription(): string
+ {
+ return $this->description;
+ }
+
+ public function getAuthor(): string
+ {
+ return $this->author;
+ }
+}
\ No newline at end of file
diff --git a/src/Domain/Scraping/Domain/Model/ScrapingJob.php b/src/Domain/Scraping/Domain/Model/ScrapingJob.php
index 5361c7f..231c435 100644
--- a/src/Domain/Scraping/Domain/Model/ScrapingJob.php
+++ b/src/Domain/Scraping/Domain/Model/ScrapingJob.php
@@ -14,8 +14,8 @@ class ScrapingJob
public function __construct(
private readonly string $id,
- private readonly string $chapterId,
private readonly string $mangaId,
+ private readonly string $chapterId,
private readonly string $sourceId
) {
$this->status = ScrapingStatus::PENDING;
diff --git a/src/Domain/Scraping/Domain/Model/Source.php b/src/Domain/Scraping/Domain/Model/Source.php
new file mode 100644
index 0000000..e7e74f9
--- /dev/null
+++ b/src/Domain/Scraping/Domain/Model/Source.php
@@ -0,0 +1,59 @@
+id;
+ }
+
+ public function getName(): string
+ {
+ return $this->name;
+ }
+
+ public function getDescription(): string
+ {
+ return $this->description;
+ }
+
+ public function getBaseUrl(): string
+ {
+ return $this->baseUrl;
+ }
+
+ public function getScrappingParameters(): array
+ {
+ return $this->scrappingParameters;
+ }
+
+ public function isActive(): bool
+ {
+ return $this->isActive;
+ }
+
+ public function getCreatedAt(): DateTimeImmutable
+ {
+ return $this->createdAt;
+ }
+
+ public function getUpdatedAt(): DateTimeImmutable
+ {
+ return $this->updatedAt;
+ }
+}
\ No newline at end of file
diff --git a/src/Domain/Scraping/Domain/Model/ValueObject/ChapterId.php b/src/Domain/Scraping/Domain/Model/ValueObject/ChapterId.php
new file mode 100644
index 0000000..eecb0a2
--- /dev/null
+++ b/src/Domain/Scraping/Domain/Model/ValueObject/ChapterId.php
@@ -0,0 +1,18 @@
+value;
+ }
+}
\ No newline at end of file
diff --git a/src/Domain/Scraping/Domain/Model/ValueObject/SourceId.php b/src/Domain/Scraping/Domain/Model/ValueObject/SourceId.php
new file mode 100644
index 0000000..045eefd
--- /dev/null
+++ b/src/Domain/Scraping/Domain/Model/ValueObject/SourceId.php
@@ -0,0 +1,18 @@
+value;
+ }
+}
\ No newline at end of file
diff --git a/src/Domain/Scraping/Domain/Model/ValueObject/TempDirectory.php b/src/Domain/Scraping/Domain/Model/ValueObject/TempDirectory.php
new file mode 100644
index 0000000..6739eef
--- /dev/null
+++ b/src/Domain/Scraping/Domain/Model/ValueObject/TempDirectory.php
@@ -0,0 +1,18 @@
+path;
+ }
+}
\ No newline at end of file
diff --git a/src/Domain/Scraping/Infrastructure/Persistence/DoctrineMangaRepository.php b/src/Domain/Scraping/Infrastructure/Persistence/DoctrineMangaRepository.php
new file mode 100644
index 0000000..de0b24a
--- /dev/null
+++ b/src/Domain/Scraping/Infrastructure/Persistence/DoctrineMangaRepository.php
@@ -0,0 +1,22 @@
+entityManager->getRepository(MangaEntity::class)->find($id);
+
+ return $manga ? $manga->toDomain() : null;
+ }
+}
\ No newline at end of file
diff --git a/src/Domain/Scraping/Infrastructure/Persistence/DoctrineSourceRepository.php b/src/Domain/Scraping/Infrastructure/Persistence/DoctrineSourceRepository.php
new file mode 100644
index 0000000..27bbc82
--- /dev/null
+++ b/src/Domain/Scraping/Infrastructure/Persistence/DoctrineSourceRepository.php
@@ -0,0 +1,26 @@
+entityManager->getRepository(SourceEntityEntity::class)->find($id);
+
+ if (!$sourceEntity) {
+ return null;
+ }
+
+ return $sourceEntity->toDomain();
+ }
+}
\ No newline at end of file
diff --git a/src/Domain/Scraping/Infrastructure/Persistence/Entity/MangaEntity.php b/src/Domain/Scraping/Infrastructure/Persistence/Entity/MangaEntity.php
new file mode 100644
index 0000000..d9b0eb4
--- /dev/null
+++ b/src/Domain/Scraping/Infrastructure/Persistence/Entity/MangaEntity.php
@@ -0,0 +1,75 @@
+id = $manga->getId();
+ $entity->title = $manga->getTitle();
+ $entity->slug = $manga->getSlug();
+ $entity->description = $manga->getDescription();
+ $entity->author = $manga->getAuthor();
+
+
+ return $entity;
+ }
+
+ public function toDomain(): Manga
+ {
+ $manga = new Manga(
+ $this->id,
+ $this->title,
+ $this->slug,
+ $this->description,
+ $this->author
+ );
+
+ return $manga;
+ }
+}
diff --git a/src/Domain/Scraping/Infrastructure/Persistence/Entity/ScrapingJobEntity.php b/src/Domain/Scraping/Infrastructure/Persistence/Entity/ScrapingJobEntity.php
index 6ed1e07..eeb02e4 100644
--- a/src/Domain/Scraping/Infrastructure/Persistence/Entity/ScrapingJobEntity.php
+++ b/src/Domain/Scraping/Infrastructure/Persistence/Entity/ScrapingJobEntity.php
@@ -3,7 +3,6 @@
namespace App\Domain\Scraping\Infrastructure\Persistence\Entity;
use App\Domain\Scraping\Domain\Model\ScrapingJob;
-use App\Domain\Scraping\Domain\Model\ScrapingStatus;
use Doctrine\ORM\Mapping as ORM;
#[ORM\Entity]
@@ -59,25 +58,6 @@ class ScrapingJobEntity
$this->sourceId
);
- // Reconstruire l'état du job à partir des données persistées
- $reflection = new \ReflectionClass(ScrapingJob::class);
-
- $pagesProperty = $reflection->getProperty('pages');
- $pagesProperty->setAccessible(true);
- $pagesProperty->setValue($job, $this->pages);
-
- $statusProperty = $reflection->getProperty('status');
- $statusProperty->setAccessible(true);
- $statusProperty->setValue($job, ScrapingStatus::from($this->status));
-
- $createdAtProperty = $reflection->getProperty('createdAt');
- $createdAtProperty->setAccessible(true);
- $createdAtProperty->setValue($job, $this->createdAt);
-
- $completedAtProperty = $reflection->getProperty('completedAt');
- $completedAtProperty->setAccessible(true);
- $completedAtProperty->setValue($job, $this->completedAt);
-
return $job;
}
}
\ No newline at end of file
diff --git a/src/Domain/Scraping/Infrastructure/Persistence/Entity/SourceEntity.php b/src/Domain/Scraping/Infrastructure/Persistence/Entity/SourceEntity.php
new file mode 100644
index 0000000..197e55b
--- /dev/null
+++ b/src/Domain/Scraping/Infrastructure/Persistence/Entity/SourceEntity.php
@@ -0,0 +1,65 @@
+id = $source->getId();
+ $entity->name = $source->getName();
+ $entity->description = $source->getDescription();
+ $entity->baseUrl = $source->getBaseUrl();
+ $entity->scrappingParameters = $source->getScrappingParameters();
+ $entity->isActive = $source->isActive();
+ $entity->createdAt = $source->getCreatedAt();
+ $entity->updatedAt = $source->getUpdatedAt();
+
+ return $entity;
+ }
+
+ public function toDomain(): Source
+ {
+ return new Source(
+ $this->id,
+ $this->name ?? '',
+ $this->description ?? '',
+ $this->baseUrl,
+ $this->scrappingParameters,
+ $this->isActive,
+ $this->createdAt,
+ $this->updatedAt
+ );
+ }
+}
\ No newline at end of file
diff --git a/src/Domain/Scraping/Infrastructure/Service/ImageDownloader.php b/src/Domain/Scraping/Infrastructure/Service/ImageDownloader.php
new file mode 100644
index 0000000..96804e9
--- /dev/null
+++ b/src/Domain/Scraping/Infrastructure/Service/ImageDownloader.php
@@ -0,0 +1,23 @@
+httpClient->request('GET', $url);
+
+ if (!str_starts_with($response->getHeaders()['content-type'][0], 'image/')) {
+ throw new \RuntimeException('Invalid content type');
+ }
+
+ file_put_contents($destination, $response->getContent());
+ }
+}
\ No newline at end of file
diff --git a/src/Domain/Scraping/Infrastructure/Service/Scraper/AbstractScraper.php b/src/Domain/Scraping/Infrastructure/Service/Scraper/AbstractScraper.php
index 75cef16..2c45d01 100644
--- a/src/Domain/Scraping/Infrastructure/Service/Scraper/AbstractScraper.php
+++ b/src/Domain/Scraping/Infrastructure/Service/Scraper/AbstractScraper.php
@@ -3,67 +3,37 @@
namespace App\Domain\Scraping\Infrastructure\Service\Scraper;
use App\Domain\Scraping\Domain\Contract\ScraperInterface;
-use App\Domain\Scraping\Domain\Model\ScrapingJob;
-use App\Domain\Scraping\Domain\Event\PageScrapingProgressed;
use App\Domain\Scraping\Domain\Event\ChapterScrapingCompleted;
use App\Domain\Scraping\Domain\Event\ChapterScrapingStarted;
+use App\Domain\Scraping\Domain\Event\PageScrapingProgressed;
+use App\Domain\Scraping\Domain\Model\ScrapingJob;
use App\Domain\Scraping\Domain\Model\ScrapingProgress;
-use Symfony\Component\EventDispatcher\EventDispatcherInterface;
-use Symfony\Contracts\HttpClient\HttpClientInterface;
+use App\Domain\Scraping\Domain\Model\Source;
+use App\Domain\Scraping\Domain\Model\ValueObject\TempDirectory;
+use App\Domain\Scraping\Infrastructure\Service\ImageDownloader;
+use Symfony\Component\Messenger\MessageBusInterface;
+use Ramsey\Uuid\Uuid;
abstract class AbstractScraper implements ScraperInterface
{
public function __construct(
- protected readonly HttpClientInterface $httpClient,
- protected readonly EventDispatcherInterface $eventDispatcher,
- protected readonly string $tempDir
+ protected readonly ImageDownloader $imageDownloader,
+ protected readonly MessageBusInterface $eventBus
) {}
- public function createScrapingJob(string $chapterId, string $sourceId): ScrapingJob
+ public function createScrapingJob(string $mangaId, string $chapterId, string $sourceId): ScrapingJob
{
return new ScrapingJob(
- uniqid('scraping_'),
+ Uuid::uuid4()->toString(),
+ $mangaId,
$chapterId,
- $sourceId
+ $sourceId,
);
}
- public function scrape(ScrapingJob $job): void
- {
- try {
- $this->eventDispatcher->dispatch(new ChapterScrapingStarted($job->getId()));
-
- $tempDir = $this->createTempDirectory($job);
- $pageData = $this->scrapePages($job);
-
- foreach ($pageData as $page) {
- $this->downloadPage($job, $page, $tempDir);
- }
-
- $job->complete();
-
- $this->eventDispatcher->dispatch(
- new ChapterScrapingCompleted($job->getId(), $job->getPages())
- );
-
- $this->cleanupTempDirectory($tempDir);
-
- } catch (\Exception $e) {
- $job->fail();
- throw $e;
- }
- }
-
- abstract protected function scrapePages(ScrapingJob $job): array;
-
- protected function createTempDirectory(ScrapingJob $job): string
- {
- $tempDir = $this->tempDir . '/' . uniqid('scraping_' . $job->getId() . '_');
- if (!mkdir($tempDir) && !is_dir($tempDir)) {
- throw new \RuntimeException("Failed to create temporary directory: $tempDir");
- }
- return $tempDir;
- }
+ abstract public function scrape(ScrapingJob $job): void;
+
+ abstract protected function scrapePages(ScrapingJob $job, Source $source): array;
protected function cleanupTempDirectory(string $tempDir): void
{
@@ -84,11 +54,32 @@ abstract class AbstractScraper implements ScraperInterface
}
}
- protected function dispatchProgressEvent(ScrapingJob $job, int $current, int $total): void
+ protected function dispatchProgressEvent(ScrapingJob $job, int $currentPage, int $totalPages): void
{
- $progress = new ScrapingProgress($current, $total);
- $this->eventDispatcher->dispatch(
- new PageScrapingProgressed($job->getId(), $progress)
- );
+ $progress = new ScrapingProgress($currentPage, $totalPages);
+ $this->eventBus->dispatch(new PageScrapingProgressed($job->getId(), $progress));
}
+
+ protected function downloadImage(string $imageUrl, string $destination): void
+ {
+ $this->imageDownloader->download($imageUrl, $destination);
+ }
+
+ protected function createTempDirectory(): TempDirectory
+ {
+ return new TempDirectory(sys_get_temp_dir() . '/' . uniqid('manga_scraper_'));
+ }
+
+ protected function cleanupTempFiles(TempDirectory $tempDirectory): void
+ {
+ $files = glob($tempDirectory->getPath() . '/*');
+ foreach ($files as $file) {
+ if (is_file($file)) {
+ unlink($file);
+ }
+ }
+ rmdir($tempDirectory->getPath());
+ }
+
+ abstract public function supports(string $sourceType): bool;
}
\ No newline at end of file
diff --git a/src/Domain/Scraping/Infrastructure/Service/Scraper/HtmlScraper.php b/src/Domain/Scraping/Infrastructure/Service/Scraper/HtmlScraper.php
index 385563c..5e7c4a6 100644
--- a/src/Domain/Scraping/Infrastructure/Service/Scraper/HtmlScraper.php
+++ b/src/Domain/Scraping/Infrastructure/Service/Scraper/HtmlScraper.php
@@ -3,61 +3,131 @@
namespace App\Domain\Scraping\Infrastructure\Service\Scraper;
use App\Domain\Scraping\Domain\Model\ScrapingJob;
+use App\Domain\Scraping\Domain\Model\Source;
use App\Domain\Scraping\Domain\Model\ValueObject\ImageUrl;
use App\Domain\Scraping\Domain\Model\ValueObject\PageNumber;
+use App\Domain\Scraping\Domain\Repository\SourceRepositoryInterface;
use Symfony\Component\DomCrawler\Crawler;
+use Symfony\Contracts\HttpClient\HttpClientInterface;
+use Symfony\Component\Messenger\MessageBusInterface;
+use App\Domain\Scraping\Infrastructure\Service\ImageDownloader;
class HtmlScraper extends AbstractScraper
{
- protected function scrapePages(ScrapingJob $job): array
+ public function __construct(
+ ImageDownloader $imageDownloader,
+ MessageBusInterface $eventBus,
+ private readonly HttpClientInterface $httpClient,
+ private readonly SourceRepositoryInterface $sourceRepository
+ ) {
+ parent::__construct($imageDownloader, $eventBus);
+ }
+
+ public function scrape(ScrapingJob $job): void
{
- $url = $this->buildUrl($job);
- $response = $this->httpClient->request('GET', $url);
+ $sourceConfig = $this->sourceRepository->getById($job->getSourceId());
+ $tempDir = $this->createTempDirectory();
+
+ try {
+ $pages = $this->scrapePages($job, $sourceConfig);
+
+ foreach ($pages as $index => $imageUrl) {
+ $pageNumber = new PageNumber($index + 1);
+ $extension = pathinfo(parse_url($imageUrl, PHP_URL_PATH), PATHINFO_EXTENSION);
+ $destination = sprintf(
+ '%s/%s.%s',
+ $tempDir->getPath(),
+ $pageNumber->getFormattedNumber(),
+ $extension
+ );
+
+ $this->downloadImage($imageUrl, $destination);
+ $job->addPage($pageNumber, new ImageUrl($imageUrl));
+
+ $this->dispatchProgressEvent($job, $index + 1, count($pages));
+ }
+
+ $job->complete();
+ } catch (\Exception $e) {
+ $job->fail();
+ throw $e;
+ } finally {
+ $this->cleanupTempFiles($tempDir);
+ }
+ }
+
+ protected function scrapePages(ScrapingJob $job, Source $sourceConfig): array
+ {
+ if (!$sourceConfig['next_page_selector']) {
+ return $this->scrapeVerticalReader($job, $sourceConfig);
+ }
- $crawler = new Crawler($response->getContent());
- $images = $crawler->filter('img.manga-page'); // Adapter selon le site
+ return $this->scrapeHorizontalReader($job, $sourceConfig);
+ }
+
+ private function scrapeVerticalReader(ScrapingJob $job, Source $sourceConfig): array
+ {
+ $html = $this->fetchHtml($this->buildChapterUrl($job, $sourceConfig));
+ $crawler = new Crawler($html);
+ return $crawler->filter($sourceConfig['image_selector'])
+ ->each(function ($node) {
+ return $this->cleanImageUrl(
+ $node->attr('src') ?: $node->attr('data-src')
+ );
+ });
+ }
+
+ private function scrapeHorizontalReader(ScrapingJob $job, Source $sourceConfig): array
+ {
$pages = [];
- $images->each(function (Crawler $image) use (&$pages) {
- $pages[] = [
- 'url' => $image->attr('src'),
- 'number' => count($pages) + 1
- ];
- });
-
+ $currentUrl = $this->buildChapterUrl($job, $sourceConfig);
+
+ while ($currentUrl) {
+ $html = $this->fetchHtml($currentUrl);
+ $crawler = new Crawler($html);
+
+ $imageUrl = $crawler->filter($sourceConfig['image_selector'])
+ ->attr('src') ?: $crawler->filter($sourceConfig['image_selector'])
+ ->attr('data-src');
+
+ $pages[] = $this->cleanImageUrl($imageUrl);
+
+ $nextLink = $crawler->filter($sourceConfig['next_page_selector']);
+ $currentUrl = $nextLink->count() > 0 ? $nextLink->attr('href') : null;
+ }
+
return $pages;
}
- protected function downloadPage(ScrapingJob $job, array $page, string $tempDir): void
+ private function fetchHtml(string $url): string
{
- $imageUrl = new ImageUrl($page['url']);
- $pageNumber = new PageNumber($page['number']);
+ $response = $this->httpClient->request('GET', $url);
- $fileName = sprintf('%s/%03d.%s',
- $tempDir,
- $pageNumber->getValue(),
- $imageUrl->getExtension()
+ if ($response->getStatusCode() >= 400) {
+ throw new \RuntimeException('Failed to fetch page: ' . $url);
+ }
+
+ return $response->getContent();
+ }
+
+ private function cleanImageUrl(string $url): string
+ {
+ // Logique de nettoyage d'URL d'image
+ return $url;
+ }
+
+
+ private function buildChapterUrl(ScrapingJob $job, Source $sourceConfig): string
+ {
+ return sprintf(
+ $sourceConfig->getBaseUrl(),
+ $job->getChapterId()
);
-
- $response = $this->httpClient->request('GET', $imageUrl->getValue());
- file_put_contents($fileName, $response->getContent());
-
- $job->addPage($pageNumber, $imageUrl);
- $this->dispatchProgressEvent($job, $page['number'], count($pages));
}
public function supports(string $sourceType): bool
{
- return $sourceType === 'html';
- }
-
- private function buildUrl(ScrapingJob $job): string
- {
- // À implémenter selon votre logique de construction d'URL
- // Vous aurez probablement besoin d'injecter un service pour récupérer les informations du chapitre
- return sprintf('https://example.com/manga/%s/chapter/%s',
- $job->getMangaId(),
- $job->getChapterId()
- );
+ return 'html' === $sourceType;
}
}
\ No newline at end of file
diff --git a/src/Domain/Scraping/Infrastructure/Service/Scraper/JavascriptScraper.php b/src/Domain/Scraping/Infrastructure/Service/Scraper/JavascriptScraper.php
deleted file mode 100644
index 69dedc7..0000000
--- a/src/Domain/Scraping/Infrastructure/Service/Scraper/JavascriptScraper.php
+++ /dev/null
@@ -1,38 +0,0 @@
-buildUrl($job);
- $crawler = $client->request('GET', $url);
-
- // Attendre que les images soient chargées
- $crawler->waitFor('img.manga-page');
-
- $pages = [];
- $crawler->filter('img.manga-page')->each(function ($image) use (&$pages) {
- $pages[] = [
- 'url' => $image->attr('src'),
- 'number' => count($pages) + 1
- ];
- });
-
- return $pages;
- } finally {
- $client->quit();
- }
- }
-
- public function supports(string $sourceType): bool
- {
- return $sourceType === 'javascript';
- }
-}
\ No newline at end of file
diff --git a/tests/Domain/Scraping/Adapter/InMemoryEventBus.php b/tests/Domain/Scraping/Adapter/InMemoryEventBus.php
new file mode 100644
index 0000000..844e91f
--- /dev/null
+++ b/tests/Domain/Scraping/Adapter/InMemoryEventBus.php
@@ -0,0 +1,23 @@
+dispatchedMessages[] = $message;
+
+ return new Envelope($message);
+ }
+
+ public function getDispatchedMessages(): array
+ {
+ return $this->dispatchedMessages;
+ }
+}
diff --git a/tests/Domain/Scraping/Adapter/InMemoryScraperAdapter.php b/tests/Domain/Scraping/Adapter/InMemoryScraperAdapter.php
new file mode 100644
index 0000000..13a5e4f
--- /dev/null
+++ b/tests/Domain/Scraping/Adapter/InMemoryScraperAdapter.php
@@ -0,0 +1,47 @@
+shouldThrowException) {
+ throw $this->shouldThrowException;
+ }
+
+ $job = new ScrapingJob(Uuid::uuid4(), $mangaId, $chapterId, $sourceId);
+ $this->jobs[] = $job;
+
+ return $job;
+ }
+
+ public function scrape(ScrapingJob $job): void
+ {
+ if ($this->shouldThrowException) {
+ throw $this->shouldThrowException;
+ }
+ }
+
+ public function simulateError(\Exception $exception): void
+ {
+ $this->shouldThrowException = $exception;
+ }
+
+ public function getJobs(): array
+ {
+ return $this->jobs;
+ }
+
+ public function supports(string $sourceType): bool
+ {
+ return true;
+ }
+}
diff --git a/tests/Domain/Scraping/Adapter/InMemoryScrapingJobRepository.php b/tests/Domain/Scraping/Adapter/InMemoryScrapingJobRepository.php
new file mode 100644
index 0000000..bbb5219
--- /dev/null
+++ b/tests/Domain/Scraping/Adapter/InMemoryScrapingJobRepository.php
@@ -0,0 +1,44 @@
+jobs[] = $job;
+ }
+
+ public function getJobs(): array
+ {
+ return $this->jobs;
+ }
+
+ public function findById(string $id): ?ScrapingJob
+ {
+ foreach ($this->jobs as $job) {
+ if ($job->getId() === $id) {
+ return $job;
+ }
+ }
+
+ return null;
+ }
+
+ public function findByChapterId(string $chapterId): ?ScrapingJob
+ {
+ foreach ($this->jobs as $job) {
+ if ($job->getChapterId() === $chapterId) {
+ return $job;
+ }
+ }
+
+ return null;
+ }
+}
diff --git a/tests/Domain/Scraping/Application/CommandHandler/ScrapeChapterHandlerTest.php b/tests/Domain/Scraping/Application/CommandHandler/ScrapeChapterHandlerTest.php
new file mode 100644
index 0000000..11088ad
--- /dev/null
+++ b/tests/Domain/Scraping/Application/CommandHandler/ScrapeChapterHandlerTest.php
@@ -0,0 +1,85 @@
+scraper = new InMemoryScraperAdapter();
+ $this->repository = new InMemoryScrapingJobRepository();
+ $this->eventBus = new InMemoryEventBus();
+ $this->handler = new ScrapeChapterHandler(
+ $this->scraper,
+ $this->repository,
+ $this->eventBus
+ );
+ }
+
+ public function testHandleSuccessfully(): void
+ {
+ $command = new ScrapeChapter(
+ chapterId: 2,
+ sourceId: 3,
+ mangaId: 1
+ );
+
+ $this->handler->handle($command);
+
+ // Vérifier que le job a été créé
+ $scrapingJobs = $this->scraper->getJobs();
+ $this->assertCount(1, $scrapingJobs);
+ $job = $scrapingJobs[0];
+
+ // Vérifier que le job a été sauvegardé
+ $savedJobs = $this->repository->getJobs();
+ $this->assertCount(1, $savedJobs);
+ $this->assertSame($job, $savedJobs[0]);
+
+ // Vérifier que l'événement a été dispatché
+ $dispatchedMessages = $this->eventBus->getDispatchedMessages();
+ $this->assertCount(1, $dispatchedMessages);
+ $this->assertInstanceOf(ChapterScrapingStarted::class, $dispatchedMessages[0]);
+ $this->assertEquals($job->getId(), $dispatchedMessages[0]->getJobId());
+ }
+
+ public function testHandleThrowsException(): void
+ {
+ $command = new ScrapeChapter(
+ chapterId: 2,
+ sourceId: 3,
+ mangaId: 1
+ );
+
+ $exception = new \Exception('Scraping failed');
+ $this->scraper->simulateError($exception);
+
+ $this->expectException(\Exception::class);
+ $this->expectExceptionMessage('Scraping failed');
+
+ try {
+ $this->handler->handle($command);
+ } finally {
+ // Vérifier que l'événement d'échec a été dispatché
+ $dispatchedMessages = $this->eventBus->getDispatchedMessages();
+ $this->assertCount(1, $dispatchedMessages);
+ $this->assertInstanceOf(ChapterScrapingFailed::class, $dispatchedMessages[0]);
+ $this->assertEquals(2, $dispatchedMessages[0]->getChapterId());
+ $this->assertEquals('Scraping failed', $dispatchedMessages[0]->getReason());
+ }
+ }
+}
diff --git a/tests/Functional/UserResourceTest.php b/tests/Functional/UserResourceTest.php
deleted file mode 100644
index 984766f..0000000
--- a/tests/Functional/UserResourceTest.php
+++ /dev/null
@@ -1,112 +0,0 @@
- $company]);
-
- $this->browser()
- ->post('/login', [
- 'json' => [
- 'email' => $user->getEmail(),
- 'password' => 'password'
- ]
- ])
- ->assertStatus(204)
- ->assertHeaderContains('Location', '/api/users/' . $user->getId());
- }
-
- public function testUserLogoutHttp()
- {
- $user = UserFactory::createOne();
- $this->browser()
- ->actingAs($user)
- ->get('/logout')
- ->assertStatus(204)
- ;
- }
-
- public function testUserLoginToken(): void
- {
- $token = ApiTokenFactory::createOne();
-
- $this->browser()
- ->get('api/users', [
- 'headers' => [
- 'Authorization' => 'Bearer ' . $token->getToken()
- ]
- ])
- ->assertStatus(200);
- }
-
- public function testCanGetUser(): void
- {
- $user = UserFactory::createOne();
-
- $this->browser()
- ->actingAs($user)
- ->get('/api/users/' . $user->getId())
- ->assertSuccessful()
- ->assertJson()
- ->assertJsonMatches('email', $user->getEmail())
- ->assertJsonMatches('firstName', $user->getFirstName())
- ->assertJsonMatches('lastName', $user->getLastName())
- ;
- }
-
- public function testCanPostToCreateUser(): void
- {
- $loggedUser = UserFactory::createOne();
-
- $this->browser()
- ->actingAs($loggedUser)
- ->post('/api/users', [
- 'json' => [
- 'email' => 'john.doe@mail.com',
- 'firstName' => 'John',
- 'lastName' => 'Doe',
- 'password' => 'password',
- ],
- ])
- ->assertSuccessful()
- ->post('/login', [
- 'json' => [
- 'email' => 'john.doe@mail.com',
- 'password' => 'password',
- ],
- ])
- ->assertSuccessful();
- }
-
- public function testCanPatchToUpdateUser(): void
- {
- $loggedUser = UserFactory::createOne();
-
- $this->browser()
- ->actingAs($loggedUser)
- ->patch('/api/users/' . $loggedUser->getId(), [
- 'json' => [
- 'firstName' => 'John',
- 'lastName' => 'Doe',
- ],
- 'headers' => [
- 'Content-Type' => 'application/merge-patch+json'
- ]
- ])
- ->assertSuccessful()
- ->get('/api/users/' . $loggedUser->getId())
- ->assertSuccessful()
- ->assertJson()
- ->assertJsonMatches('firstName', 'John')
- ->assertJsonMatches('lastName', 'Doe');
- ;
- }
-}