using System.Text.RegularExpressions; using FluentResults; using Microsoft.Extensions.Logging; using NexusReader.Application.Abstractions.Services; using VersOne.Epub; namespace NexusReader.Infrastructure.Services; public class EpubExtractor : IEpubExtractor { private readonly ILogger _logger; public EpubExtractor(ILogger logger) { _logger = logger; } public async Task>> ExtractChaptersTextAsync(string relativePath, CancellationToken cancellationToken = default) { try { var fullPath = ResolvePath(relativePath); if (string.IsNullOrEmpty(fullPath) || !File.Exists(fullPath)) { _logger.LogError("[EpubExtractor] EPUB file not found at path: {FilePath}", relativePath); return Result.Fail>($"Plik EPUB nie został znaleziony na dysku: {relativePath}"); } using var bookRef = await EpubReader.OpenBookAsync(fullPath); var readingOrder = bookRef.GetReadingOrder(); if (readingOrder == null || !readingOrder.Any()) { return Result.Fail>("EPUB nie zawiera czytelnych rozdziałów."); } var chapters = new List(); foreach (var chapterRef in readingOrder) { if (cancellationToken.IsCancellationRequested) { break; } var rawContent = await chapterRef.ReadContentAsTextAsync(); var cleanText = StripHtml(rawContent); chapters.Add(cleanText); } return Result.Ok(chapters); } catch (Exception ex) { _logger.LogError(ex, "[EpubExtractor] Error extracting chapters from EPUB: {FilePath}", relativePath); return Result.Fail>(new Error("Failed to parse and extract text from EPUB").CausedBy(ex)); } } private static string? ResolvePath(string relativePath) { var normalized = relativePath.Replace('/', Path.DirectorySeparatorChar); var currentDir = new DirectoryInfo(AppDomain.CurrentDomain.BaseDirectory); while (currentDir != null) { var candidate = Path.Combine(currentDir.FullName, "wwwroot", normalized); if (File.Exists(candidate)) return candidate; var devCandidate = Path.Combine(currentDir.FullName, "src", "NexusReader.Web", "wwwroot", normalized); if (File.Exists(devCandidate)) return devCandidate; currentDir = currentDir.Parent; } return null; } private static string StripHtml(string html) { if (string.IsNullOrEmpty(html)) return string.Empty; var clean = Regex.Replace(html, @"<(style|script)\b[^>]*>.*?", "", RegexOptions.IgnoreCase | RegexOptions.Singleline); clean = Regex.Replace(clean, @"<[^>]*>", " "); clean = System.Net.WebUtility.HtmlDecode(clean); clean = Regex.Replace(clean, @"\s+", " ").Trim(); return clean; } }