using System.Text.RegularExpressions; using FluentResults; using Microsoft.EntityFrameworkCore; using Microsoft.Extensions.Logging; using NexusReader.Application.Abstractions.Services; using NexusReader.Application.Queries.Reader; using NexusReader.Data.Persistence; using VersOne.Epub; namespace NexusReader.Infrastructure.Services; /// /// Reads and parses EPUB files from the storage path recorded in the database. /// public class EpubReaderService : IEpubReader { private readonly IDbContextFactory _dbContextFactory; private readonly ILogger _logger; private const int WordThreshold = 1000; public EpubReaderService( IDbContextFactory dbContextFactory, ILogger logger) { _dbContextFactory = dbContextFactory; _logger = logger; } /// public async Task> GetEpubContentAsync( Guid ebookId, int chapterIndex, string? userId = null, CancellationToken cancellationToken = default) { try { // 1. Resolve the file path from the database using var context = await _dbContextFactory.CreateDbContextAsync(cancellationToken); var ebook = await context.Ebooks .AsNoTracking() .FirstOrDefaultAsync( e => e.Id == ebookId && (userId == null || e.UserId == userId), cancellationToken); if (ebook == null) { return Result.Fail($"Ebook '{ebookId}' not found for user '{userId}'."); } // FilePath is stored as a web-relative path (e.g. "uploads/guid_title.epub"). // Resolve against the content root, then against the wwwroot sub-directory. var fullPath = ResolvePath(ebook.FilePath); if (fullPath == null || !File.Exists(fullPath)) { _logger.LogError("EPUB file for ebook {EbookId} not found at path '{FilePath}'.", ebookId, ebook.FilePath); return Result.Fail($"The EPUB file for this book could not be found on the server."); } // 2. Parse the EPUB using var bookRef = await EpubReader.OpenBookAsync(fullPath); var readingOrder = bookRef.GetReadingOrder(); if (readingOrder == null || !readingOrder.Any()) { return Result.Fail("The EPUB has no readable content files in ReadingOrder."); } if (chapterIndex < 0 || chapterIndex >= readingOrder.Count) { chapterIndex = 0; } var chapterRef = readingOrder[chapterIndex]; var navigation = bookRef.GetNavigation(); var chapterTitle = FindTitleInNavigation(navigation, chapterRef.FilePath) ?? Path.GetFileNameWithoutExtension(chapterRef.FilePath) ?? $"Chapter {chapterIndex + 1}"; var chapterContent = await chapterRef.ReadContentAsTextAsync(); // 3. Build content blocks var blocks = new List(); int totalWordCount = 0; int blockCounter = 0; var paragraphs = ExtractParagraphs(chapterContent); foreach (var p in paragraphs) { var sanitizedContent = SanitizeParagraph(p); if (string.IsNullOrWhiteSpace(sanitizedContent)) continue; blocks.Add(new TextSegmentBlock($"seg-{blockCounter++}", sanitizedContent)); int wordsInP = CountWords(sanitizedContent); totalWordCount += wordsInP; if (totalWordCount >= WordThreshold) { blocks.Add(CreateAiTrigger($"trigger-{blockCounter++}")); totalWordCount = 0; } } if (blocks.Any() && blocks.Last() is not AiActionTriggerBlock) { blocks.Add(CreateAiTrigger($"trigger-{blockCounter++}")); } return Result.Ok(new ReaderPageViewModel(blocks, chapterIndex, readingOrder.Count, chapterTitle, ebook.Id)); } catch (Exception ex) { _logger.LogError(ex, "Failed to process EPUB for ebook {EbookId}.", ebookId); return Result.Fail(new Error($"Failed to process EPUB: {ex.Message}").CausedBy(ex)); } } /// /// Attempts to resolve a web-relative storage path to an absolute filesystem path. /// Searches upward from the app base directory to handle both dev and production layouts. /// private static string? ResolvePath(string relativePath) { // Normalize forward-slashes to OS separator for file system access var normalized = relativePath.Replace('/', Path.DirectorySeparatorChar); var currentDir = new DirectoryInfo(AppDomain.CurrentDomain.BaseDirectory); while (currentDir != null) { var candidate = Path.Combine(currentDir.FullName, "wwwroot", normalized); if (File.Exists(candidate)) return candidate; // Also try src/NexusReader.Web/wwwroot (development layout) var devCandidate = Path.Combine(currentDir.FullName, "src", "NexusReader.Web", "wwwroot", normalized); if (File.Exists(devCandidate)) return devCandidate; currentDir = currentDir.Parent; } return null; } private static List ExtractParagraphs(string html) { var bodyMatch = Regex.Match(html, @"]*>(.*?)", RegexOptions.IgnoreCase | RegexOptions.Singleline); var content = bodyMatch.Success ? bodyMatch.Groups[1].Value : html; var paragraphs = new List(); var matches = Regex.Matches(content, @"<(p|h[1-6]|ul|ol|blockquote|pre)\b[^>]*>.*?|]*>", RegexOptions.IgnoreCase | RegexOptions.Singleline); foreach (Match match in matches) { paragraphs.Add(match.Value); } if (paragraphs.Count == 0) { paragraphs = content.Split(new[] { "
", "
", "\n\n", "\r\n\r\n" }, StringSplitOptions.RemoveEmptyEntries).ToList(); } return paragraphs; } private static string SanitizeParagraph(string html) { var clean = Regex.Replace(html, @"<(style|script)\b[^>]*>.*?", "", RegexOptions.IgnoreCase | RegexOptions.Singleline); clean = Regex.Replace(clean, @"<(?!/?(b|i|strong|em|h[1-6]|p|ul|ol|li|blockquote|pre|code|br|hr)\b)[^>]+>", "", RegexOptions.IgnoreCase); clean = Regex.Replace(clean, @"<(b|i|strong|em|h[1-6]|p|ul|ol|li|blockquote|pre|code|br|hr)\b[^>]*>", "<$1>", RegexOptions.IgnoreCase); clean = System.Net.WebUtility.HtmlDecode(clean); return clean.Trim(); } private static int CountWords(string text) { if (string.IsNullOrWhiteSpace(text)) return 0; return text.Split(new[] { ' ', '\r', '\n', '\t' }, StringSplitOptions.RemoveEmptyEntries).Length; } private static AiActionTriggerBlock CreateAiTrigger(string id) => new(id, "Wykryto ciekawy fragment! Czy chcesz, abym wygenerował podsumowanie lub quiz z tego rozdziału?", new List { "Podsumuj", "Generuj Quiz", "Pomiń" }); private static string? FindTitleInNavigation(IEnumerable navigation, string? filePath) { if (string.IsNullOrEmpty(filePath)) return null; var fileName = Path.GetFileName(filePath); foreach (var item in navigation) { if (item.Link?.ContentFilePath == filePath || item.Link?.ContentFilePath == fileName) return item.Title; if (item.NestedItems?.Any() == true) { var childTitle = FindTitleInNavigation(item.NestedItems, filePath); if (childTitle != null) return childTitle; } } return null; } }