Added:
- Refactor MangaScraperService (not used everywhere now) - Added JavascriptScraper.php - Added alternatives slugs in Manga.php - Improvement in manga edit form
This commit is contained in:
110
src/Service/Scraper/AbstractScraper.php
Normal file
110
src/Service/Scraper/AbstractScraper.php
Normal file
@@ -0,0 +1,110 @@
|
||||
<?php
|
||||
|
||||
namespace App\Service\Scraper;
|
||||
|
||||
use App\Entity\Chapter;
|
||||
use App\Entity\ContentSource;
|
||||
use App\Entity\Manga;
|
||||
use App\Event\PageScrappingProgressEvent;
|
||||
use Doctrine\ORM\EntityManagerInterface;
|
||||
use GuzzleHttp\Client;
|
||||
use GuzzleHttp\Exception\GuzzleException;
|
||||
use GuzzleHttp\Exception\RequestException;
|
||||
use Symfony\Component\EventDispatcher\EventDispatcherInterface;
|
||||
|
||||
abstract class AbstractScraper implements ScraperInterface
|
||||
{
|
||||
const string PUBLIC_CBZ = '/public/cbz';
|
||||
protected Client $httpClient;
|
||||
|
||||
public function __construct(
|
||||
protected string $projectDir,
|
||||
protected EventDispatcherInterface $eventDispatcher,
|
||||
protected EntityManagerInterface $entityManager
|
||||
)
|
||||
{
|
||||
$this->httpClient = new Client();
|
||||
}
|
||||
|
||||
protected function getValidChapterUrl(ContentSource $contentSource, Manga $manga, float $chapterNumber): ?string
|
||||
{
|
||||
$slugs = array_merge([$manga->getSlug()], $manga->getAlternativeSlugs() ?? []);
|
||||
|
||||
foreach ($slugs as $slug) {
|
||||
$url = $contentSource->getChapterUrl($slug, $chapterNumber);
|
||||
if ($this->isChapterUrlValid($url)) {
|
||||
return $url;
|
||||
}
|
||||
}
|
||||
|
||||
return null;
|
||||
}
|
||||
|
||||
protected function isChapterUrlValid(string $url): bool
|
||||
{
|
||||
try {
|
||||
$response = $this->httpClient->head($url);
|
||||
return $response->getStatusCode() === 200;
|
||||
} catch (RequestException $e) {
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
protected function generateCbzPath(Manga $manga, Chapter $chapter): string
|
||||
{
|
||||
$volumeDir = $this->createDirectories($manga, $chapter->getVolume());
|
||||
$fileName = sprintf('%s_vol%d_ch%s.cbz',
|
||||
$manga->getSlug(),
|
||||
$chapter->getVolume(),
|
||||
$chapter->getNumber()
|
||||
);
|
||||
return $volumeDir . '/' . $fileName;
|
||||
}
|
||||
|
||||
protected function createCbzFile(string $tempDir, array $pageData, string $cbzFilePath): void
|
||||
{
|
||||
$zip = new \ZipArchive();
|
||||
|
||||
if ($zip->open($cbzFilePath, \ZipArchive::CREATE) === TRUE) {
|
||||
foreach ($pageData as $page) {
|
||||
$zip->addFile($page['local_image_url'], basename($page['local_image_url']));
|
||||
}
|
||||
$zip->close();
|
||||
}
|
||||
}
|
||||
|
||||
protected function cleanupTempFiles(string $directory): void
|
||||
{
|
||||
$files = glob($directory . '/*');
|
||||
foreach ($files as $file) {
|
||||
if (is_file($file)) {
|
||||
unlink($file);
|
||||
}
|
||||
}
|
||||
rmdir($directory);
|
||||
}
|
||||
|
||||
protected function createDirectories(Manga $manga, int $volume): string
|
||||
{
|
||||
$mangaYear = $manga->getPublicationYear() ?? 'unknown';
|
||||
$mangaDir = sprintf('%s/%s (%s)', $this->projectDir . self::PUBLIC_CBZ, ucfirst($manga->getSlug()), $mangaYear);
|
||||
$volumeDir = sprintf('%s/volume_%d', $mangaDir, sprintf('%02d', $volume));
|
||||
|
||||
if (!is_dir($volumeDir)) {
|
||||
mkdir($volumeDir, 0755, true);
|
||||
}
|
||||
|
||||
return $volumeDir;
|
||||
}
|
||||
|
||||
protected function cleanImageUrl(string $url): string
|
||||
{
|
||||
return preg_replace('/[\x00-\x1F\x7F]/', '', trim($url));
|
||||
}
|
||||
|
||||
protected function dispatchProgressEvent(Chapter $chapter, int $currentPage, int $totalPages): void
|
||||
{
|
||||
$event = new PageScrappingProgressEvent($chapter->getId(), $currentPage, $totalPages);
|
||||
$this->eventDispatcher->dispatch($event, PageScrappingProgressEvent::NAME);
|
||||
}
|
||||
}
|
||||
Reference in New Issue
Block a user