diff --git a/compose.yaml b/compose.yaml index f5e815a..d109c66 100644 --- a/compose.yaml +++ b/compose.yaml @@ -25,7 +25,7 @@ services: ports: # HTTP - target: 80 - published: ${HTTP_PORT:-80} + published: ${HTTP_PORT:-8081} protocol: tcp # HTTPS - target: 443 diff --git a/src/Domain/Scraping/Application/Command/ScrapeChapter.php b/src/Domain/Scraping/Application/Command/ScrapeChapter.php new file mode 100644 index 0000000..6c44853 --- /dev/null +++ b/src/Domain/Scraping/Application/Command/ScrapeChapter.php @@ -0,0 +1,11 @@ +scraper->createScrapingJob( + $command->chapterId, + $command->sourceId + ); + + $this->scrapingJobRepository->save($job); + + $this->eventBus->dispatch(new ChapterScrapingStarted($job->getId())); + + $this->scraper->scrape($job); + } +} \ No newline at end of file diff --git a/src/Domain/Scraping/Domain/Contract/Repository/ScrapingJobRepositoryInterface.php b/src/Domain/Scraping/Domain/Contract/Repository/ScrapingJobRepositoryInterface.php new file mode 100644 index 0000000..194e847 --- /dev/null +++ b/src/Domain/Scraping/Domain/Contract/Repository/ScrapingJobRepositoryInterface.php @@ -0,0 +1,12 @@ +jobId; + } + + public function getScrapedPages(): array + { + return $this->scrapedPages; + } +} \ No newline at end of file diff --git a/src/Domain/Scraping/Domain/Event/ChapterScrapingStarted.php b/src/Domain/Scraping/Domain/Event/ChapterScrapingStarted.php new file mode 100644 index 0000000..2549442 --- /dev/null +++ b/src/Domain/Scraping/Domain/Event/ChapterScrapingStarted.php @@ -0,0 +1,15 @@ +jobId; + } +} diff --git a/src/Domain/Scraping/Domain/Event/PageScrapingProgressed.php b/src/Domain/Scraping/Domain/Event/PageScrapingProgressed.php new file mode 100644 index 0000000..0f4e6c0 --- /dev/null +++ b/src/Domain/Scraping/Domain/Event/PageScrapingProgressed.php @@ -0,0 +1,23 @@ +jobId; + } + + public function getProgress(): ScrapingProgress + { + return $this->progress; + } +} \ No newline at end of file diff --git a/src/Domain/Scraping/Domain/Model/ScrapingJob.php b/src/Domain/Scraping/Domain/Model/ScrapingJob.php new file mode 100644 index 0000000..5361c7f --- /dev/null +++ b/src/Domain/Scraping/Domain/Model/ScrapingJob.php @@ -0,0 +1,84 @@ +status = ScrapingStatus::PENDING; + $this->createdAt = new \DateTimeImmutable(); + } + + public function addPage(PageNumber $pageNumber, ImageUrl $imageUrl): void + { + $this->pages[$pageNumber->getValue()] = $imageUrl->getValue(); + if ($this->status === ScrapingStatus::PENDING) { + $this->status = ScrapingStatus::IN_PROGRESS; + } + } + + public function complete(): void + { + $this->status = ScrapingStatus::COMPLETED; + $this->completedAt = new \DateTimeImmutable(); + } + + public function fail(): void + { + $this->status = ScrapingStatus::FAILED; + $this->completedAt = new \DateTimeImmutable(); + } + + public function getId(): string + { + return $this->id; + } + + public function getChapterId(): string + { + return $this->chapterId; + } + + public function getMangaId(): string + { + return $this->mangaId; + } + + public function getSourceId(): string + { + return $this->sourceId; + } + + public function getPages(): array + { + return $this->pages; + } + + public function getStatus(): ScrapingStatus + { + return $this->status; + } + + public function getCreatedAt(): \DateTimeImmutable + { + return $this->createdAt; + } + + public function getCompletedAt(): ?\DateTimeImmutable + { + return $this->completedAt; + } +} \ No newline at end of file diff --git a/src/Domain/Scraping/Domain/Model/ScrapingProgress.php b/src/Domain/Scraping/Domain/Model/ScrapingProgress.php new file mode 100644 index 0000000..b5861b3 --- /dev/null +++ b/src/Domain/Scraping/Domain/Model/ScrapingProgress.php @@ -0,0 +1,19 @@ +totalPages === 0) { + return 0; + } + return ($this->pagesScraped / $this->totalPages) * 100; + } +} \ No newline at end of file diff --git a/src/Domain/Scraping/Domain/Model/ScrapingStatus.php b/src/Domain/Scraping/Domain/Model/ScrapingStatus.php new file mode 100644 index 0000000..1d88c84 --- /dev/null +++ b/src/Domain/Scraping/Domain/Model/ScrapingStatus.php @@ -0,0 +1,11 @@ +url; + } + + public function getExtension(): string + { + return pathinfo(parse_url($this->url, PHP_URL_PATH), PATHINFO_EXTENSION); + } +} \ No newline at end of file diff --git a/src/Domain/Scraping/Domain/Model/ValueObject/PageNumber.php b/src/Domain/Scraping/Domain/Model/ValueObject/PageNumber.php new file mode 100644 index 0000000..5c0fa9d --- /dev/null +++ b/src/Domain/Scraping/Domain/Model/ValueObject/PageNumber.php @@ -0,0 +1,24 @@ +number; + } + + public function getFormattedNumber(): string + { + return sprintf('%03d', $this->number); + } +} \ No newline at end of file diff --git a/src/Domain/Scraping/Infrastructure/Persistence/DoctrineScrapingJobRepository.php b/src/Domain/Scraping/Infrastructure/Persistence/DoctrineScrapingJobRepository.php new file mode 100644 index 0000000..20ee0b1 --- /dev/null +++ b/src/Domain/Scraping/Infrastructure/Persistence/DoctrineScrapingJobRepository.php @@ -0,0 +1,51 @@ +entityManager->persist($job); + $this->entityManager->flush(); + } + + public function findById(string $id): ?ScrapingJob + { + return $this->entityManager->getRepository(ScrapingJob::class)->find($id); + } + + public function findByChapterId(string $chapterId): ?ScrapingJob + { + return $this->entityManager->getRepository(ScrapingJob::class) + ->findOneBy(['chapterId' => $chapterId]); + } + + public function findPendingJobs(): array + { + return $this->entityManager->getRepository(ScrapingJob::class) + ->createQueryBuilder('sj') + ->where('sj.status = :status') + ->setParameter('status', 'pending') + ->getQuery() + ->getResult(); + } + + public function findInProgressJobs(): array + { + return $this->entityManager->getRepository(ScrapingJob::class) + ->createQueryBuilder('sj') + ->where('sj.status = :status') + ->setParameter('status', 'in_progress') + ->getQuery() + ->getResult(); + } +} \ No newline at end of file diff --git a/src/Domain/Scraping/Infrastructure/Persistence/Entity/ScrapingJobEntity.php b/src/Domain/Scraping/Infrastructure/Persistence/Entity/ScrapingJobEntity.php new file mode 100644 index 0000000..6ed1e07 --- /dev/null +++ b/src/Domain/Scraping/Infrastructure/Persistence/Entity/ScrapingJobEntity.php @@ -0,0 +1,83 @@ +id = $job->getId(); + $entity->chapterId = $job->getChapterId(); + $entity->mangaId = $job->getMangaId(); + $entity->sourceId = $job->getSourceId(); + $entity->pages = $job->getPages(); + $entity->status = $job->getStatus()->value; + $entity->createdAt = $job->getCreatedAt(); + $entity->completedAt = $job->getCompletedAt(); + + return $entity; + } + + public function toDomain(): ScrapingJob + { + $job = new ScrapingJob( + $this->id, + $this->chapterId, + $this->mangaId, + $this->sourceId + ); + + // Reconstruire l'état du job à partir des données persistées + $reflection = new \ReflectionClass(ScrapingJob::class); + + $pagesProperty = $reflection->getProperty('pages'); + $pagesProperty->setAccessible(true); + $pagesProperty->setValue($job, $this->pages); + + $statusProperty = $reflection->getProperty('status'); + $statusProperty->setAccessible(true); + $statusProperty->setValue($job, ScrapingStatus::from($this->status)); + + $createdAtProperty = $reflection->getProperty('createdAt'); + $createdAtProperty->setAccessible(true); + $createdAtProperty->setValue($job, $this->createdAt); + + $completedAtProperty = $reflection->getProperty('completedAt'); + $completedAtProperty->setAccessible(true); + $completedAtProperty->setValue($job, $this->completedAt); + + return $job; + } +} \ No newline at end of file diff --git a/src/Domain/Scraping/Infrastructure/Service/Scraper/HtmlScraper.php b/src/Domain/Scraping/Infrastructure/Service/Scraper/HtmlScraper.php new file mode 100644 index 0000000..6939472 --- /dev/null +++ b/src/Domain/Scraping/Infrastructure/Service/Scraper/HtmlScraper.php @@ -0,0 +1,61 @@ +buildUrl($job); // À implémenter selon votre logique + $response = $this->httpClient->request('GET', $url); + + $crawler = new Crawler($response->getContent()); + $images = $crawler->filter('img.manga-page'); // Adapter selon le site cible + + $pageNumber = 1; + $images->each(function (Crawler $image) use ($job, $pageNumber) { + $imageUrl = new ImageUrl($image->attr('src')); + $job->addPage(new PageNumber($pageNumber), $imageUrl); + + $this->eventDispatcher->dispatch( + new PageScrapingProgressed($job->getId(), $job->getProgress()) + ); + + $pageNumber++; + }); + + $this->eventDispatcher->dispatch( + new ChapterScrapingCompleted($job->getId(), $job->getPages()) + ); + } + + public function supports(string $sourceType): bool + { + return $sourceType === 'html'; + } +} \ No newline at end of file