feat: firsts unit tests for ScrapeChapterHandler.php

This commit is contained in:
ext.jeremy.guillot@maxicoffee.domains
2025-02-03 10:38:53 +01:00
parent 21fcdd1084
commit 89570ad951
31 changed files with 1105 additions and 291 deletions

View File

@@ -22,6 +22,7 @@
"nelmio/cors-bundle": "^2.4",
"phpdocumentor/reflection-docblock": "^5.3",
"phpstan/phpdoc-parser": "^1.25",
"ramsey/uuid": "^4.7",
"runtime/frankenphp-symfony": "^0.2.0",
"symfony/asset": "7.0.*",
"symfony/console": "7.0.*",

243
composer.lock generated
View File

@@ -4,7 +4,7 @@
"Read more about it at https://getcomposer.org/doc/01-basic-usage.md#installing-dependencies",
"This file is @generated automatically"
],
"content-hash": "6258706876617c8b0c08f13c5a158fe7",
"content-hash": "49014ec06c069804432e6a13701e46a4",
"packages": [
{
"name": "api-platform/core",
@@ -172,6 +172,66 @@
},
"time": "2024-02-01T14:41:52+00:00"
},
{
"name": "brick/math",
"version": "0.12.1",
"source": {
"type": "git",
"url": "https://github.com/brick/math.git",
"reference": "f510c0a40911935b77b86859eb5223d58d660df1"
},
"dist": {
"type": "zip",
"url": "https://api.github.com/repos/brick/math/zipball/f510c0a40911935b77b86859eb5223d58d660df1",
"reference": "f510c0a40911935b77b86859eb5223d58d660df1",
"shasum": ""
},
"require": {
"php": "^8.1"
},
"require-dev": {
"php-coveralls/php-coveralls": "^2.2",
"phpunit/phpunit": "^10.1",
"vimeo/psalm": "5.16.0"
},
"type": "library",
"autoload": {
"psr-4": {
"Brick\\Math\\": "src/"
}
},
"notification-url": "https://packagist.org/downloads/",
"license": [
"MIT"
],
"description": "Arbitrary-precision arithmetic library",
"keywords": [
"Arbitrary-precision",
"BigInteger",
"BigRational",
"arithmetic",
"bigdecimal",
"bignum",
"bignumber",
"brick",
"decimal",
"integer",
"math",
"mathematics",
"rational"
],
"support": {
"issues": "https://github.com/brick/math/issues",
"source": "https://github.com/brick/math/tree/0.12.1"
},
"funding": [
{
"url": "https://github.com/BenMorel",
"type": "github"
}
],
"time": "2023-11-29T23:19:16+00:00"
},
{
"name": "doctrine/cache",
"version": "2.2.0",
@@ -3050,6 +3110,187 @@
},
"time": "2019-03-08T08:55:37+00:00"
},
{
"name": "ramsey/collection",
"version": "2.0.0",
"source": {
"type": "git",
"url": "https://github.com/ramsey/collection.git",
"reference": "a4b48764bfbb8f3a6a4d1aeb1a35bb5e9ecac4a5"
},
"dist": {
"type": "zip",
"url": "https://api.github.com/repos/ramsey/collection/zipball/a4b48764bfbb8f3a6a4d1aeb1a35bb5e9ecac4a5",
"reference": "a4b48764bfbb8f3a6a4d1aeb1a35bb5e9ecac4a5",
"shasum": ""
},
"require": {
"php": "^8.1"
},
"require-dev": {
"captainhook/plugin-composer": "^5.3",
"ergebnis/composer-normalize": "^2.28.3",
"fakerphp/faker": "^1.21",
"hamcrest/hamcrest-php": "^2.0",
"jangregor/phpstan-prophecy": "^1.0",
"mockery/mockery": "^1.5",
"php-parallel-lint/php-console-highlighter": "^1.0",
"php-parallel-lint/php-parallel-lint": "^1.3",
"phpcsstandards/phpcsutils": "^1.0.0-rc1",
"phpspec/prophecy-phpunit": "^2.0",
"phpstan/extension-installer": "^1.2",
"phpstan/phpstan": "^1.9",
"phpstan/phpstan-mockery": "^1.1",
"phpstan/phpstan-phpunit": "^1.3",
"phpunit/phpunit": "^9.5",
"psalm/plugin-mockery": "^1.1",
"psalm/plugin-phpunit": "^0.18.4",
"ramsey/coding-standard": "^2.0.3",
"ramsey/conventional-commits": "^1.3",
"vimeo/psalm": "^5.4"
},
"type": "library",
"extra": {
"captainhook": {
"force-install": true
},
"ramsey/conventional-commits": {
"configFile": "conventional-commits.json"
}
},
"autoload": {
"psr-4": {
"Ramsey\\Collection\\": "src/"
}
},
"notification-url": "https://packagist.org/downloads/",
"license": [
"MIT"
],
"authors": [
{
"name": "Ben Ramsey",
"email": "ben@benramsey.com",
"homepage": "https://benramsey.com"
}
],
"description": "A PHP library for representing and manipulating collections.",
"keywords": [
"array",
"collection",
"hash",
"map",
"queue",
"set"
],
"support": {
"issues": "https://github.com/ramsey/collection/issues",
"source": "https://github.com/ramsey/collection/tree/2.0.0"
},
"funding": [
{
"url": "https://github.com/ramsey",
"type": "github"
},
{
"url": "https://tidelift.com/funding/github/packagist/ramsey/collection",
"type": "tidelift"
}
],
"time": "2022-12-31T21:50:55+00:00"
},
{
"name": "ramsey/uuid",
"version": "4.7.6",
"source": {
"type": "git",
"url": "https://github.com/ramsey/uuid.git",
"reference": "91039bc1faa45ba123c4328958e620d382ec7088"
},
"dist": {
"type": "zip",
"url": "https://api.github.com/repos/ramsey/uuid/zipball/91039bc1faa45ba123c4328958e620d382ec7088",
"reference": "91039bc1faa45ba123c4328958e620d382ec7088",
"shasum": ""
},
"require": {
"brick/math": "^0.8.8 || ^0.9 || ^0.10 || ^0.11 || ^0.12",
"ext-json": "*",
"php": "^8.0",
"ramsey/collection": "^1.2 || ^2.0"
},
"replace": {
"rhumsaa/uuid": "self.version"
},
"require-dev": {
"captainhook/captainhook": "^5.10",
"captainhook/plugin-composer": "^5.3",
"dealerdirect/phpcodesniffer-composer-installer": "^0.7.0",
"doctrine/annotations": "^1.8",
"ergebnis/composer-normalize": "^2.15",
"mockery/mockery": "^1.3",
"paragonie/random-lib": "^2",
"php-mock/php-mock": "^2.2",
"php-mock/php-mock-mockery": "^1.3",
"php-parallel-lint/php-parallel-lint": "^1.1",
"phpbench/phpbench": "^1.0",
"phpstan/extension-installer": "^1.1",
"phpstan/phpstan": "^1.8",
"phpstan/phpstan-mockery": "^1.1",
"phpstan/phpstan-phpunit": "^1.1",
"phpunit/phpunit": "^8.5 || ^9",
"ramsey/composer-repl": "^1.4",
"slevomat/coding-standard": "^8.4",
"squizlabs/php_codesniffer": "^3.5",
"vimeo/psalm": "^4.9"
},
"suggest": {
"ext-bcmath": "Enables faster math with arbitrary-precision integers using BCMath.",
"ext-gmp": "Enables faster math with arbitrary-precision integers using GMP.",
"ext-uuid": "Enables the use of PeclUuidTimeGenerator and PeclUuidRandomGenerator.",
"paragonie/random-lib": "Provides RandomLib for use with the RandomLibAdapter",
"ramsey/uuid-doctrine": "Allows the use of Ramsey\\Uuid\\Uuid as Doctrine field type."
},
"type": "library",
"extra": {
"captainhook": {
"force-install": true
}
},
"autoload": {
"files": [
"src/functions.php"
],
"psr-4": {
"Ramsey\\Uuid\\": "src/"
}
},
"notification-url": "https://packagist.org/downloads/",
"license": [
"MIT"
],
"description": "A PHP library for generating and working with universally unique identifiers (UUIDs).",
"keywords": [
"guid",
"identifier",
"uuid"
],
"support": {
"issues": "https://github.com/ramsey/uuid/issues",
"source": "https://github.com/ramsey/uuid/tree/4.7.6"
},
"funding": [
{
"url": "https://github.com/ramsey",
"type": "github"
},
{
"url": "https://tidelift.com/funding/github/packagist/ramsey/uuid",
"type": "tidelift"
}
],
"time": "2024-04-27T21:32:50+00:00"
},
{
"name": "runtime/frankenphp-symfony",
"version": "0.2.0",

View File

@@ -16,9 +16,10 @@
</testsuite>
</testsuites>
<extensions>
<extension class="Symfony\Component\Panther\ServerExtension" />
<bootstrap class="Symfony\Component\Panther\ServerExtension" />
<bootstrap class="Zenstruck\Browser\Test\BrowserExtension"/>
</extensions>
<coverage/>
<source>
<include>
<directory suffix=".php">src</directory>

66
scrapers.json Normal file
View File

@@ -0,0 +1,66 @@
[
{
"baseUrl": "https://darkscans.net/",
"imageSelector": ".reading-content img",
"nextPageSelector": null,
"chapterUrlFormat": "https://darkscans.net/mangas/{slug}/chapter-{chapterNumber}/",
"scrapingType": "html",
"chapterSelector": null
},
{
"baseUrl": "https://lelscans.net",
"imageSelector": "#image img",
"nextPageSelector": "a[title=\"Suivant\"]",
"chapterUrlFormat": "https://lelscans.net/scan-{slug}/{chapterNumber}",
"scrapingType": "html",
"chapterSelector": null
},
{
"baseUrl": "https://www.thebeginningaftertheend.fr/",
"imageSelector": ".reading-content img",
"nextPageSelector": null,
"chapterUrlFormat": "https://www.thebeginningaftertheend.fr/manga/{slug}-manga/chapitre-{chapterNumber}_1/",
"scrapingType": "html",
"chapterSelector": null
},
{
"baseUrl": "https://lelscanfr.com",
"imageSelector": "#chapter-container img.chapter-image",
"nextPageSelector": null,
"chapterUrlFormat": "https://lelscanfr.com/manga/{slug}/{chapterNumber}",
"scrapingType": "html",
"chapterSelector": null
},
{
"baseUrl": "https://read-versus.online",
"imageSelector": ".entry-content img",
"nextPageSelector": null,
"chapterUrlFormat": "https://read-versus.online/manga/{slug}-chapter-{chapterNumber}/",
"scrapingType": "html",
"chapterSelector": null
},
{
"baseUrl": "https://anime-sama.fr",
"imageSelector": "#scansPlacement img.lazy",
"nextPageSelector": null,
"chapterUrlFormat": "https://anime-sama.fr/catalogue/{slug}/scan/vf/",
"scrapingType": "javascript",
"chapterSelector": null
},
{
"baseUrl": "https://www.kaijuchapters.com/",
"imageSelector": ".entry-content img.article_ed__img",
"nextPageSelector": null,
"chapterUrlFormat": "https://www.kaijuchapters.com/manga/{slug}-chapter-{chapterNumber}/",
"scrapingType": "html",
"chapterSelector": null
},
{
"baseUrl": "https://www.lelmanga.com",
"imageSelector": "#readerarea img",
"nextPageSelector": null,
"chapterUrlFormat": "https://www.lelmanga.com/{slug}-{chapterNumber}",
"scrapingType": "html",
"chapterSelector": null
}
]

View File

@@ -1,11 +1,13 @@
<?php
namespace App\Domain\Scraping\Application\Command\ScrapeChapter;
namespace App\Domain\Scraping\Application\Command;
class ScrapeChapterCommand
readonly class ScrapeChapter
{
public function __construct(
public readonly string $chapterId,
public readonly string $sourceId
) {}
public string $chapterId,
public string $sourceId,
public string $mangaId
) {
}
}

View File

@@ -1,31 +1,40 @@
<?php
namespace App\Domain\Scraping\Application\Command\ScrapeChapter;
namespace App\Domain\Scraping\Application\CommandHandler;
use App\Domain\Scraping\Domain\Contract\ScraperInterface;
use App\Domain\Scraping\Domain\Repository\ScrapingJobRepositoryInterface;
use App\Domain\Scraping\Application\Command\ScrapeChapter;
use App\Domain\Scraping\Domain\Contract\Repository\ScrapingJobRepositoryInterface;
use App\Domain\Scraping\Domain\Contract\Service\ScraperInterface;
use App\Domain\Scraping\Domain\Event\ChapterScrapingFailed;
use App\Domain\Scraping\Domain\Event\ChapterScrapingStarted;
use Symfony\Component\Messenger\MessageBusInterface;
class ScrapeChapterHandler
readonly class ScrapeChapterHandler
{
public function __construct(
private readonly ScraperInterface $scraper,
private readonly ScrapingJobRepositoryInterface $scrapingJobRepository,
private readonly MessageBusInterface $eventBus
) {}
public function handle(ScrapeChapterCommand $command): void
{
$job = $this->scraper->createScrapingJob(
$command->chapterId,
$command->sourceId
);
$this->scrapingJobRepository->save($job);
$this->eventBus->dispatch(new ChapterScrapingStarted($job->getId()));
$this->scraper->scrape($job);
private ScraperInterface $scraper,
private ScrapingJobRepositoryInterface $scrapingJobRepository,
private MessageBusInterface $eventBus
) {
}
}
public function handle(ScrapeChapter $command): void
{
try {
$job = $this->scraper->createScrapingJob(
$command->mangaId,
$command->chapterId,
$command->sourceId,
);
$this->scrapingJobRepository->save($job);
$this->eventBus->dispatch(new ChapterScrapingStarted($job->getId()));
$this->scraper->scrape($job);
} catch (\Exception $e) {
$this->eventBus->dispatch(new ChapterScrapingFailed($command->chapterId, $e->getMessage()));
throw $e;
}
}
}

View File

@@ -0,0 +1,10 @@
<?php
namespace App\Domain\Scraping\Domain\Contract\Repository;
use App\Domain\Scraping\Domain\Model\Manga;
interface MangaRepositoryInterface
{
public function getById(string $id): ?Manga;
}

View File

@@ -1,6 +1,6 @@
<?php
namespace App\Domain\Scraping\Domain\Repository;
namespace App\Domain\Scraping\Domain\Contract\Repository;
use App\Domain\Scraping\Domain\Model\ScrapingJob;
@@ -9,4 +9,4 @@ interface ScrapingJobRepositoryInterface
public function save(ScrapingJob $job): void;
public function findById(string $id): ?ScrapingJob;
public function findByChapterId(string $chapterId): ?ScrapingJob;
}
}

View File

@@ -0,0 +1,10 @@
<?php
namespace App\Domain\Scraping\Domain\Repository;
use App\Domain\Scraping\Domain\Model\Source;
interface SourceRepositoryInterface
{
public function getById(string $id): ?Source;
}

View File

@@ -1,12 +1,12 @@
<?php
namespace App\Domain\Scraping\Domain\Contract;
namespace App\Domain\Scraping\Domain\Contract\Service;
use App\Domain\Scraping\Domain\Model\ScrapingJob;
interface ScraperInterface
{
public function createScrapingJob(string $chapterId, string $sourceId): ScrapingJob;
public function createScrapingJob(string $mangaId, string $chapterId, string $sourceId): ScrapingJob;
public function scrape(ScrapingJob $job): void;
public function supports(string $sourceType): bool;
}

View File

@@ -0,0 +1,21 @@
<?php
namespace App\Domain\Scraping\Domain\Event;
class ChapterScrapingFailed
{
public function __construct(
private readonly string $chapterId,
private readonly string $reason
) {}
public function getChapterId(): string
{
return $this->chapterId;
}
public function getReason(): string
{
return $this->reason;
}
}

View File

@@ -0,0 +1,39 @@
<?php
namespace App\Domain\Scraping\Domain\Model;
class Manga
{
public function __construct(
private readonly string $id,
private readonly string $title,
private readonly string $slug,
private readonly string $description,
private readonly string $author,
) {}
public function getId(): string
{
return $this->id;
}
public function getTitle(): string
{
return $this->title;
}
public function getSlug(): string
{
return $this->slug;
}
public function getDescription(): string
{
return $this->description;
}
public function getAuthor(): string
{
return $this->author;
}
}

View File

@@ -14,8 +14,8 @@ class ScrapingJob
public function __construct(
private readonly string $id,
private readonly string $chapterId,
private readonly string $mangaId,
private readonly string $chapterId,
private readonly string $sourceId
) {
$this->status = ScrapingStatus::PENDING;

View File

@@ -0,0 +1,59 @@
<?php
namespace App\Domain\Scraping\Domain\Model;
use DateTimeImmutable;
class Source
{
public function __construct(
private readonly string $id,
private readonly string $name,
private readonly string $description,
private readonly string $baseUrl,
private readonly array $scrappingParameters,
private readonly bool $isActive,
private readonly DateTimeImmutable $createdAt,
private readonly DateTimeImmutable $updatedAt
) {}
public function getId(): string
{
return $this->id;
}
public function getName(): string
{
return $this->name;
}
public function getDescription(): string
{
return $this->description;
}
public function getBaseUrl(): string
{
return $this->baseUrl;
}
public function getScrappingParameters(): array
{
return $this->scrappingParameters;
}
public function isActive(): bool
{
return $this->isActive;
}
public function getCreatedAt(): DateTimeImmutable
{
return $this->createdAt;
}
public function getUpdatedAt(): DateTimeImmutable
{
return $this->updatedAt;
}
}

View File

@@ -0,0 +1,18 @@
<?php
namespace App\Domain\Scraping\Domain\Model\ValueObject;
class ChapterId
{
public function __construct(private readonly string $value)
{
if (empty($value)) {
throw new \InvalidArgumentException('Chapter ID cannot be empty');
}
}
public function getValue(): string
{
return $this->value;
}
}

View File

@@ -0,0 +1,18 @@
<?php
namespace App\Domain\Scraping\Domain\Model\ValueObject;
class SourceId
{
public function __construct(private readonly string $value)
{
if (empty($value)) {
throw new \InvalidArgumentException('Source ID cannot be empty');
}
}
public function getValue(): string
{
return $this->value;
}
}

View File

@@ -0,0 +1,18 @@
<?php
namespace App\Domain\Scraping\Domain\Model\ValueObject;
class TempDirectory
{
public function __construct(private readonly string $path)
{
if (!is_dir($path) && !mkdir($path)) {
throw new \RuntimeException("Failed to create directory: $path");
}
}
public function getPath(): string
{
return $this->path;
}
}

View File

@@ -0,0 +1,22 @@
<?php
namespace App\Domain\Scraping\Infrastructure\Persistence;
use App\Domain\Scraping\Domain\Contract\Repository\MangaRepositoryInterface;
use App\Domain\Scraping\Domain\Model\Manga;
use App\Domain\Scraping\Infrastructure\Persistence\Entity\MangaEntity;
use Doctrine\ORM\EntityManagerInterface;
class DoctrineMangaRepository implements MangaRepositoryInterface
{
public function __construct(
private readonly EntityManagerInterface $entityManager
) {}
public function getById(string $id): ?Manga
{
$manga = $this->entityManager->getRepository(MangaEntity::class)->find($id);
return $manga ? $manga->toDomain() : null;
}
}

View File

@@ -0,0 +1,26 @@
<?php
namespace App\Domain\Scraping\Infrastructure\Persistence;
use App\Domain\Scraping\Domain\Model\Source;
use App\Domain\Scraping\Domain\Repository\SourceRepositoryInterface;
use Doctrine\ORM\EntityManagerInterface;
use App\Domain\Scraping\Infrastructure\Persistence\Entity\SourceEntity as SourceEntityEntity;
class DoctrineSourceRepository implements SourceRepositoryInterface
{
public function __construct(
private readonly EntityManagerInterface $entityManager
) {}
public function getById(string $id): ?Source
{
$sourceEntity = $this->entityManager->getRepository(SourceEntityEntity::class)->find($id);
if (!$sourceEntity) {
return null;
}
return $sourceEntity->toDomain();
}
}

View File

@@ -0,0 +1,75 @@
<?php
namespace App\Domain\Scraping\Infrastructure\Persistence\Entity;
use App\Domain\Scraping\Domain\Model\Manga;
use Doctrine\DBAL\Types\Types;
use Doctrine\ORM\Mapping as ORM;
#[ORM\Entity]
#[ORM\Table(name: 'mangas')]
class MangaEntity
{
#[ORM\Id]
#[ORM\Column(type: 'string', length: 36)]
private string $id;
#[ORM\Column(length: 255)]
private string $title;
#[ORM\Column(length: 255, unique: true)]
private string $slug;
#[ORM\Column(length: 255, nullable: true)]
private ?string $imageUrl = null;
#[ORM\Column(nullable: true)]
private ?int $publicationYear = null;
#[ORM\Column(type: Types::TEXT, nullable: true)]
private ?string $description = null;
#[ORM\Column(type: Types::ARRAY, nullable: true)]
private ?array $genres = null;
#[ORM\Column(type: 'datetime_immutable')]
private \DateTimeImmutable $createdAt;
#[ORM\Column(nullable: true)]
private ?float $rating = null;
#[ORM\Column(length: 255, nullable: true)]
private ?string $author = null;
#[ORM\Column(length: 255, nullable: true)]
private ?string $status = null;
#[ORM\Column]
private bool $monitored;
public static function fromDomain(Manga $manga): self
{
$entity = new self();
$entity->id = $manga->getId();
$entity->title = $manga->getTitle();
$entity->slug = $manga->getSlug();
$entity->description = $manga->getDescription();
$entity->author = $manga->getAuthor();
return $entity;
}
public function toDomain(): Manga
{
$manga = new Manga(
$this->id,
$this->title,
$this->slug,
$this->description,
$this->author
);
return $manga;
}
}

View File

@@ -3,7 +3,6 @@
namespace App\Domain\Scraping\Infrastructure\Persistence\Entity;
use App\Domain\Scraping\Domain\Model\ScrapingJob;
use App\Domain\Scraping\Domain\Model\ScrapingStatus;
use Doctrine\ORM\Mapping as ORM;
#[ORM\Entity]
@@ -59,25 +58,6 @@ class ScrapingJobEntity
$this->sourceId
);
// Reconstruire l'état du job à partir des données persistées
$reflection = new \ReflectionClass(ScrapingJob::class);
$pagesProperty = $reflection->getProperty('pages');
$pagesProperty->setAccessible(true);
$pagesProperty->setValue($job, $this->pages);
$statusProperty = $reflection->getProperty('status');
$statusProperty->setAccessible(true);
$statusProperty->setValue($job, ScrapingStatus::from($this->status));
$createdAtProperty = $reflection->getProperty('createdAt');
$createdAtProperty->setAccessible(true);
$createdAtProperty->setValue($job, $this->createdAt);
$completedAtProperty = $reflection->getProperty('completedAt');
$completedAtProperty->setAccessible(true);
$completedAtProperty->setValue($job, $this->completedAt);
return $job;
}
}

View File

@@ -0,0 +1,65 @@
<?php
namespace App\Domain\Scraping\Infrastructure\Persistence\Entity;
use App\Domain\Scraping\Domain\Model\Source;
use Doctrine\ORM\Mapping as ORM;
#[ORM\Entity]
#[ORM\Table(name: 'sources')]
class SourceEntity
{
#[ORM\Id]
#[ORM\Column(type: 'string', length: 36)]
private string $id;
#[ORM\Column(type: 'string', nullable: true)]
private ?string $name = null;
#[ORM\Column(type: 'text', nullable: true)]
private ?string $description = null;
#[ORM\Column(type: 'string')]
private string $baseUrl;
#[ORM\Column(type: 'json')]
private array $scrappingParameters = [];
#[ORM\Column(type: 'boolean')]
private bool $isActive;
#[ORM\Column(type: 'datetime_immutable')]
private \DateTimeImmutable $createdAt;
#[ORM\Column(type: 'datetime_immutable')]
private \DateTimeImmutable $updatedAt;
public static function fromDomain(Source $source): self
{
$entity = new self();
$entity->id = $source->getId();
$entity->name = $source->getName();
$entity->description = $source->getDescription();
$entity->baseUrl = $source->getBaseUrl();
$entity->scrappingParameters = $source->getScrappingParameters();
$entity->isActive = $source->isActive();
$entity->createdAt = $source->getCreatedAt();
$entity->updatedAt = $source->getUpdatedAt();
return $entity;
}
public function toDomain(): Source
{
return new Source(
$this->id,
$this->name ?? '',
$this->description ?? '',
$this->baseUrl,
$this->scrappingParameters,
$this->isActive,
$this->createdAt,
$this->updatedAt
);
}
}

View File

@@ -0,0 +1,23 @@
<?php
namespace App\Domain\Scraping\Infrastructure\Service;
use Symfony\Contracts\HttpClient\HttpClientInterface;
class ImageDownloader
{
public function __construct(
private readonly HttpClientInterface $httpClient
) {}
public function download(string $url, string $destination): void
{
$response = $this->httpClient->request('GET', $url);
if (!str_starts_with($response->getHeaders()['content-type'][0], 'image/')) {
throw new \RuntimeException('Invalid content type');
}
file_put_contents($destination, $response->getContent());
}
}

View File

@@ -3,67 +3,37 @@
namespace App\Domain\Scraping\Infrastructure\Service\Scraper;
use App\Domain\Scraping\Domain\Contract\ScraperInterface;
use App\Domain\Scraping\Domain\Model\ScrapingJob;
use App\Domain\Scraping\Domain\Event\PageScrapingProgressed;
use App\Domain\Scraping\Domain\Event\ChapterScrapingCompleted;
use App\Domain\Scraping\Domain\Event\ChapterScrapingStarted;
use App\Domain\Scraping\Domain\Event\PageScrapingProgressed;
use App\Domain\Scraping\Domain\Model\ScrapingJob;
use App\Domain\Scraping\Domain\Model\ScrapingProgress;
use Symfony\Component\EventDispatcher\EventDispatcherInterface;
use Symfony\Contracts\HttpClient\HttpClientInterface;
use App\Domain\Scraping\Domain\Model\Source;
use App\Domain\Scraping\Domain\Model\ValueObject\TempDirectory;
use App\Domain\Scraping\Infrastructure\Service\ImageDownloader;
use Symfony\Component\Messenger\MessageBusInterface;
use Ramsey\Uuid\Uuid;
abstract class AbstractScraper implements ScraperInterface
{
public function __construct(
protected readonly HttpClientInterface $httpClient,
protected readonly EventDispatcherInterface $eventDispatcher,
protected readonly string $tempDir
protected readonly ImageDownloader $imageDownloader,
protected readonly MessageBusInterface $eventBus
) {}
public function createScrapingJob(string $chapterId, string $sourceId): ScrapingJob
public function createScrapingJob(string $mangaId, string $chapterId, string $sourceId): ScrapingJob
{
return new ScrapingJob(
uniqid('scraping_'),
Uuid::uuid4()->toString(),
$mangaId,
$chapterId,
$sourceId
$sourceId,
);
}
public function scrape(ScrapingJob $job): void
{
try {
$this->eventDispatcher->dispatch(new ChapterScrapingStarted($job->getId()));
$tempDir = $this->createTempDirectory($job);
$pageData = $this->scrapePages($job);
foreach ($pageData as $page) {
$this->downloadPage($job, $page, $tempDir);
}
$job->complete();
$this->eventDispatcher->dispatch(
new ChapterScrapingCompleted($job->getId(), $job->getPages())
);
$this->cleanupTempDirectory($tempDir);
} catch (\Exception $e) {
$job->fail();
throw $e;
}
}
abstract protected function scrapePages(ScrapingJob $job): array;
protected function createTempDirectory(ScrapingJob $job): string
{
$tempDir = $this->tempDir . '/' . uniqid('scraping_' . $job->getId() . '_');
if (!mkdir($tempDir) && !is_dir($tempDir)) {
throw new \RuntimeException("Failed to create temporary directory: $tempDir");
}
return $tempDir;
}
abstract public function scrape(ScrapingJob $job): void;
abstract protected function scrapePages(ScrapingJob $job, Source $source): array;
protected function cleanupTempDirectory(string $tempDir): void
{
@@ -84,11 +54,32 @@ abstract class AbstractScraper implements ScraperInterface
}
}
protected function dispatchProgressEvent(ScrapingJob $job, int $current, int $total): void
protected function dispatchProgressEvent(ScrapingJob $job, int $currentPage, int $totalPages): void
{
$progress = new ScrapingProgress($current, $total);
$this->eventDispatcher->dispatch(
new PageScrapingProgressed($job->getId(), $progress)
);
$progress = new ScrapingProgress($currentPage, $totalPages);
$this->eventBus->dispatch(new PageScrapingProgressed($job->getId(), $progress));
}
protected function downloadImage(string $imageUrl, string $destination): void
{
$this->imageDownloader->download($imageUrl, $destination);
}
protected function createTempDirectory(): TempDirectory
{
return new TempDirectory(sys_get_temp_dir() . '/' . uniqid('manga_scraper_'));
}
protected function cleanupTempFiles(TempDirectory $tempDirectory): void
{
$files = glob($tempDirectory->getPath() . '/*');
foreach ($files as $file) {
if (is_file($file)) {
unlink($file);
}
}
rmdir($tempDirectory->getPath());
}
abstract public function supports(string $sourceType): bool;
}

View File

@@ -3,61 +3,131 @@
namespace App\Domain\Scraping\Infrastructure\Service\Scraper;
use App\Domain\Scraping\Domain\Model\ScrapingJob;
use App\Domain\Scraping\Domain\Model\Source;
use App\Domain\Scraping\Domain\Model\ValueObject\ImageUrl;
use App\Domain\Scraping\Domain\Model\ValueObject\PageNumber;
use App\Domain\Scraping\Domain\Repository\SourceRepositoryInterface;
use Symfony\Component\DomCrawler\Crawler;
use Symfony\Contracts\HttpClient\HttpClientInterface;
use Symfony\Component\Messenger\MessageBusInterface;
use App\Domain\Scraping\Infrastructure\Service\ImageDownloader;
class HtmlScraper extends AbstractScraper
{
protected function scrapePages(ScrapingJob $job): array
public function __construct(
ImageDownloader $imageDownloader,
MessageBusInterface $eventBus,
private readonly HttpClientInterface $httpClient,
private readonly SourceRepositoryInterface $sourceRepository
) {
parent::__construct($imageDownloader, $eventBus);
}
public function scrape(ScrapingJob $job): void
{
$url = $this->buildUrl($job);
$response = $this->httpClient->request('GET', $url);
$sourceConfig = $this->sourceRepository->getById($job->getSourceId());
$tempDir = $this->createTempDirectory();
try {
$pages = $this->scrapePages($job, $sourceConfig);
foreach ($pages as $index => $imageUrl) {
$pageNumber = new PageNumber($index + 1);
$extension = pathinfo(parse_url($imageUrl, PHP_URL_PATH), PATHINFO_EXTENSION);
$destination = sprintf(
'%s/%s.%s',
$tempDir->getPath(),
$pageNumber->getFormattedNumber(),
$extension
);
$this->downloadImage($imageUrl, $destination);
$job->addPage($pageNumber, new ImageUrl($imageUrl));
$this->dispatchProgressEvent($job, $index + 1, count($pages));
}
$job->complete();
} catch (\Exception $e) {
$job->fail();
throw $e;
} finally {
$this->cleanupTempFiles($tempDir);
}
}
protected function scrapePages(ScrapingJob $job, Source $sourceConfig): array
{
if (!$sourceConfig['next_page_selector']) {
return $this->scrapeVerticalReader($job, $sourceConfig);
}
$crawler = new Crawler($response->getContent());
$images = $crawler->filter('img.manga-page'); // Adapter selon le site
return $this->scrapeHorizontalReader($job, $sourceConfig);
}
private function scrapeVerticalReader(ScrapingJob $job, Source $sourceConfig): array
{
$html = $this->fetchHtml($this->buildChapterUrl($job, $sourceConfig));
$crawler = new Crawler($html);
return $crawler->filter($sourceConfig['image_selector'])
->each(function ($node) {
return $this->cleanImageUrl(
$node->attr('src') ?: $node->attr('data-src')
);
});
}
private function scrapeHorizontalReader(ScrapingJob $job, Source $sourceConfig): array
{
$pages = [];
$images->each(function (Crawler $image) use (&$pages) {
$pages[] = [
'url' => $image->attr('src'),
'number' => count($pages) + 1
];
});
$currentUrl = $this->buildChapterUrl($job, $sourceConfig);
while ($currentUrl) {
$html = $this->fetchHtml($currentUrl);
$crawler = new Crawler($html);
$imageUrl = $crawler->filter($sourceConfig['image_selector'])
->attr('src') ?: $crawler->filter($sourceConfig['image_selector'])
->attr('data-src');
$pages[] = $this->cleanImageUrl($imageUrl);
$nextLink = $crawler->filter($sourceConfig['next_page_selector']);
$currentUrl = $nextLink->count() > 0 ? $nextLink->attr('href') : null;
}
return $pages;
}
protected function downloadPage(ScrapingJob $job, array $page, string $tempDir): void
private function fetchHtml(string $url): string
{
$imageUrl = new ImageUrl($page['url']);
$pageNumber = new PageNumber($page['number']);
$response = $this->httpClient->request('GET', $url);
$fileName = sprintf('%s/%03d.%s',
$tempDir,
$pageNumber->getValue(),
$imageUrl->getExtension()
if ($response->getStatusCode() >= 400) {
throw new \RuntimeException('Failed to fetch page: ' . $url);
}
return $response->getContent();
}
private function cleanImageUrl(string $url): string
{
// Logique de nettoyage d'URL d'image
return $url;
}
private function buildChapterUrl(ScrapingJob $job, Source $sourceConfig): string
{
return sprintf(
$sourceConfig->getBaseUrl(),
$job->getChapterId()
);
$response = $this->httpClient->request('GET', $imageUrl->getValue());
file_put_contents($fileName, $response->getContent());
$job->addPage($pageNumber, $imageUrl);
$this->dispatchProgressEvent($job, $page['number'], count($pages));
}
public function supports(string $sourceType): bool
{
return $sourceType === 'html';
}
private function buildUrl(ScrapingJob $job): string
{
// À implémenter selon votre logique de construction d'URL
// Vous aurez probablement besoin d'injecter un service pour récupérer les informations du chapitre
return sprintf('https://example.com/manga/%s/chapter/%s',
$job->getMangaId(),
$job->getChapterId()
);
return 'html' === $sourceType;
}
}

View File

@@ -1,38 +0,0 @@
<?php
namespace App\Domain\Scraping\Infrastructure\Service\Scraper;
use App\Domain\Scraping\Domain\Model\ScrapingJob;
use Symfony\Component\Panther\Client as PantherClient;
class JavascriptScraper extends AbstractScraper
{
protected function scrapePages(ScrapingJob $job): array
{
$client = PantherClient::createChromeClient();
try {
$url = $this->buildUrl($job);
$crawler = $client->request('GET', $url);
// Attendre que les images soient chargées
$crawler->waitFor('img.manga-page');
$pages = [];
$crawler->filter('img.manga-page')->each(function ($image) use (&$pages) {
$pages[] = [
'url' => $image->attr('src'),
'number' => count($pages) + 1
];
});
return $pages;
} finally {
$client->quit();
}
}
public function supports(string $sourceType): bool
{
return $sourceType === 'javascript';
}
}

View File

@@ -0,0 +1,23 @@
<?php
namespace App\Tests\Domain\Scraping\Adapter;
use Symfony\Component\Messenger\Envelope;
use Symfony\Component\Messenger\MessageBusInterface;
class InMemoryEventBus implements MessageBusInterface
{
private array $dispatchedMessages = [];
public function dispatch(object $message, array $stamps = []): Envelope
{
$this->dispatchedMessages[] = $message;
return new Envelope($message);
}
public function getDispatchedMessages(): array
{
return $this->dispatchedMessages;
}
}

View File

@@ -0,0 +1,47 @@
<?php
namespace App\Tests\Domain\Scraping\Adapter;
use App\Domain\Scraping\Domain\Contract\Service\ScraperInterface;
use App\Domain\Scraping\Domain\Model\ScrapingJob;
use Ramsey\Uuid\Uuid;
class InMemoryScraperAdapter implements ScraperInterface
{
private array $jobs = [];
private ?\Exception $shouldThrowException = null;
public function createScrapingJob(string $mangaId, string $chapterId, string $sourceId): ScrapingJob
{
if ($this->shouldThrowException) {
throw $this->shouldThrowException;
}
$job = new ScrapingJob(Uuid::uuid4(), $mangaId, $chapterId, $sourceId);
$this->jobs[] = $job;
return $job;
}
public function scrape(ScrapingJob $job): void
{
if ($this->shouldThrowException) {
throw $this->shouldThrowException;
}
}
public function simulateError(\Exception $exception): void
{
$this->shouldThrowException = $exception;
}
public function getJobs(): array
{
return $this->jobs;
}
public function supports(string $sourceType): bool
{
return true;
}
}

View File

@@ -0,0 +1,44 @@
<?php
namespace App\Tests\Domain\Scraping\Adapter;
use App\Domain\Scraping\Domain\Contract\Repository\ScrapingJobRepositoryInterface;
use App\Domain\Scraping\Domain\Model\ScrapingJob;
class InMemoryScrapingJobRepository implements ScrapingJobRepositoryInterface
{
/** @var ScrapingJob[] */
private array $jobs = [];
public function save(ScrapingJob $job): void
{
$this->jobs[] = $job;
}
public function getJobs(): array
{
return $this->jobs;
}
public function findById(string $id): ?ScrapingJob
{
foreach ($this->jobs as $job) {
if ($job->getId() === $id) {
return $job;
}
}
return null;
}
public function findByChapterId(string $chapterId): ?ScrapingJob
{
foreach ($this->jobs as $job) {
if ($job->getChapterId() === $chapterId) {
return $job;
}
}
return null;
}
}

View File

@@ -0,0 +1,85 @@
<?php
namespace App\Tests\Domain\Scraping\Application\CommandHandler;
use App\Domain\Scraping\Application\Command\ScrapeChapter;
use App\Domain\Scraping\Application\CommandHandler\ScrapeChapterHandler;
use App\Domain\Scraping\Domain\Event\ChapterScrapingFailed;
use App\Domain\Scraping\Domain\Event\ChapterScrapingStarted;
use App\Tests\Domain\Scraping\Adapter\InMemoryEventBus;
use App\Tests\Domain\Scraping\Adapter\InMemoryScraperAdapter;
use App\Tests\Domain\Scraping\Adapter\InMemoryScrapingJobRepository;
use PHPUnit\Framework\TestCase;
class ScrapeChapterHandlerTest extends TestCase
{
private InMemoryScraperAdapter $scraper;
private InMemoryScrapingJobRepository $repository;
private InMemoryEventBus $eventBus;
private ScrapeChapterHandler $handler;
protected function setUp(): void
{
$this->scraper = new InMemoryScraperAdapter();
$this->repository = new InMemoryScrapingJobRepository();
$this->eventBus = new InMemoryEventBus();
$this->handler = new ScrapeChapterHandler(
$this->scraper,
$this->repository,
$this->eventBus
);
}
public function testHandleSuccessfully(): void
{
$command = new ScrapeChapter(
chapterId: 2,
sourceId: 3,
mangaId: 1
);
$this->handler->handle($command);
// Vérifier que le job a été créé
$scrapingJobs = $this->scraper->getJobs();
$this->assertCount(1, $scrapingJobs);
$job = $scrapingJobs[0];
// Vérifier que le job a été sauvegardé
$savedJobs = $this->repository->getJobs();
$this->assertCount(1, $savedJobs);
$this->assertSame($job, $savedJobs[0]);
// Vérifier que l'événement a été dispatché
$dispatchedMessages = $this->eventBus->getDispatchedMessages();
$this->assertCount(1, $dispatchedMessages);
$this->assertInstanceOf(ChapterScrapingStarted::class, $dispatchedMessages[0]);
$this->assertEquals($job->getId(), $dispatchedMessages[0]->getJobId());
}
public function testHandleThrowsException(): void
{
$command = new ScrapeChapter(
chapterId: 2,
sourceId: 3,
mangaId: 1
);
$exception = new \Exception('Scraping failed');
$this->scraper->simulateError($exception);
$this->expectException(\Exception::class);
$this->expectExceptionMessage('Scraping failed');
try {
$this->handler->handle($command);
} finally {
// Vérifier que l'événement d'échec a été dispatché
$dispatchedMessages = $this->eventBus->getDispatchedMessages();
$this->assertCount(1, $dispatchedMessages);
$this->assertInstanceOf(ChapterScrapingFailed::class, $dispatchedMessages[0]);
$this->assertEquals(2, $dispatchedMessages[0]->getChapterId());
$this->assertEquals('Scraping failed', $dispatchedMessages[0]->getReason());
}
}
}

View File

@@ -1,112 +0,0 @@
<?php
namespace App\Tests\Functional;
use App\Factory\ApiTokenFactory;
use App\Factory\CompanyFactory;
use App\Factory\UserFactory;
class UserResourceTest extends ApiTestCase
{
public function testUserLoginHttp(): void
{
$company = CompanyFactory::createOne();
$user = UserFactory::createOne(['company' => $company]);
$this->browser()
->post('/login', [
'json' => [
'email' => $user->getEmail(),
'password' => 'password'
]
])
->assertStatus(204)
->assertHeaderContains('Location', '/api/users/' . $user->getId());
}
public function testUserLogoutHttp()
{
$user = UserFactory::createOne();
$this->browser()
->actingAs($user)
->get('/logout')
->assertStatus(204)
;
}
public function testUserLoginToken(): void
{
$token = ApiTokenFactory::createOne();
$this->browser()
->get('api/users', [
'headers' => [
'Authorization' => 'Bearer ' . $token->getToken()
]
])
->assertStatus(200);
}
public function testCanGetUser(): void
{
$user = UserFactory::createOne();
$this->browser()
->actingAs($user)
->get('/api/users/' . $user->getId())
->assertSuccessful()
->assertJson()
->assertJsonMatches('email', $user->getEmail())
->assertJsonMatches('firstName', $user->getFirstName())
->assertJsonMatches('lastName', $user->getLastName())
;
}
public function testCanPostToCreateUser(): void
{
$loggedUser = UserFactory::createOne();
$this->browser()
->actingAs($loggedUser)
->post('/api/users', [
'json' => [
'email' => 'john.doe@mail.com',
'firstName' => 'John',
'lastName' => 'Doe',
'password' => 'password',
],
])
->assertSuccessful()
->post('/login', [
'json' => [
'email' => 'john.doe@mail.com',
'password' => 'password',
],
])
->assertSuccessful();
}
public function testCanPatchToUpdateUser(): void
{
$loggedUser = UserFactory::createOne();
$this->browser()
->actingAs($loggedUser)
->patch('/api/users/' . $loggedUser->getId(), [
'json' => [
'firstName' => 'John',
'lastName' => 'Doe',
],
'headers' => [
'Content-Type' => 'application/merge-patch+json'
]
])
->assertSuccessful()
->get('/api/users/' . $loggedUser->getId())
->assertSuccessful()
->assertJson()
->assertJsonMatches('firstName', 'John')
->assertJsonMatches('lastName', 'Doe');
;
}
}