Nexus.Reader/src/NexusReader.Infrastructure/Services/EpubReaderService.cs

using System.Text.RegularExpressions;
using FluentResults;
using Microsoft.EntityFrameworkCore;
using Microsoft.Extensions.Logging;
using NexusReader.Application.Abstractions.Services;
using NexusReader.Application.Queries.Reader;
using NexusReader.Data.Persistence;
using VersOne.Epub;

namespace NexusReader.Infrastructure.Services;

/// <summary>
/// Reads and parses EPUB files from the storage path recorded in the database.
/// </summary>
public class EpubReaderService : IEpubReader
{
    private readonly IDbContextFactory<AppDbContext> _dbContextFactory;
    private readonly ILogger<EpubReaderService> _logger;
    private const int WordThreshold = 1000;

    private static readonly Regex ImageTagRegex = new(@"<img\b(?<before>[^>]*?\bsrc=[""'])(?<src>[^""']*?)(?<after>[""'][^>]*?>)", RegexOptions.IgnoreCase | RegexOptions.Compiled);
    private static readonly Regex BodyMatchRegex = new(@"<body\b[^>]*>(.*?)</body>", RegexOptions.IgnoreCase | RegexOptions.Singleline | RegexOptions.Compiled);
    private static readonly Regex ParagraphMatchRegex = new(@"<(p|h[1-6]|ul|ol|blockquote|pre)\b[^>]*>.*?</\1>|<hr\b[^>]*>|<img\b[^>]*>", RegexOptions.IgnoreCase | RegexOptions.Singleline | RegexOptions.Compiled);
    private static readonly Regex StyleScriptRegex = new(@"<(style|script)\b[^>]*>.*?</\1>", RegexOptions.IgnoreCase | RegexOptions.Singleline | RegexOptions.Compiled);
    private static readonly Regex WhitelistTagsRegex = new(@"<(?!/?(b|i|strong|em|h[1-6]|p|ul|ol|li|blockquote|pre|code|br|hr|img)\b)[^>]+>", RegexOptions.IgnoreCase | RegexOptions.Compiled);
    private static readonly Regex StripAttributesRegex = new(@"<(b|i|strong|em|h[1-6]|p|ul|ol|li|blockquote|pre|code|br|hr)\b[^>]*>", RegexOptions.IgnoreCase | RegexOptions.Compiled);
    private static readonly Regex ImgTagSanitizerRegex = new(@"<img\b[^>]*>", RegexOptions.IgnoreCase | RegexOptions.Compiled);
    private static readonly Regex SrcAttributeRegex = new(@"\bsrc=[""'](?<src>[^""']*)[""']", RegexOptions.IgnoreCase | RegexOptions.Compiled);
    private static readonly Regex AltAttributeRegex = new(@"\balt=[""'](?<alt>[^""']*)[""']", RegexOptions.IgnoreCase | RegexOptions.Compiled);

    public EpubReaderService(
        IDbContextFactory<AppDbContext> dbContextFactory,
        ILogger<EpubReaderService> logger)
    {
        _dbContextFactory = dbContextFactory;
        _logger = logger;
    }

    /// <inheritdoc />
    public async Task<Result<ReaderPageViewModel>> GetEpubContentAsync(
        Guid ebookId,
        int chapterIndex,
        string? userId = null,
        CancellationToken cancellationToken = default)
    {
        try
        {
            // 1. Resolve the file path from the database
            using var context = await _dbContextFactory.CreateDbContextAsync(cancellationToken);

            var ebook = await context.Ebooks
                .AsNoTracking()
                .FirstOrDefaultAsync(
                    e => e.Id == ebookId && (userId == null || e.UserId == userId),
                    cancellationToken);

            if (ebook == null)
            {
                return Result.Fail($"Ebook '{ebookId}' not found for user '{userId}'.");
            }

            // FilePath is stored as a web-relative path (e.g. "uploads/guid_title.epub").
            // Resolve against the content root, then against the wwwroot sub-directory.
            var fullPath = ResolvePath(ebook.FilePath);
            if (fullPath == null || !File.Exists(fullPath))
            {
                _logger.LogError("EPUB file for ebook {EbookId} not found at path '{FilePath}'.", ebookId, ebook.FilePath);
                return Result.Fail($"The EPUB file for this book could not be found on the server.");
            }

            // 2. Parse the EPUB
            using var bookRef = await EpubReader.OpenBookAsync(fullPath);
            var readingOrder = bookRef.GetReadingOrder();

            if (readingOrder == null || !readingOrder.Any())
            {
                return Result.Fail("The EPUB has no readable content files in ReadingOrder.");
            }

            if (chapterIndex < 0 || chapterIndex >= readingOrder.Count)
            {
                chapterIndex = 0;
            }

            var chapterRef = readingOrder[chapterIndex];
            var navigation = bookRef.GetNavigation();
            var chapterTitle = FindTitleInNavigation(navigation, chapterRef.FilePath)
                               ?? Path.GetFileNameWithoutExtension(chapterRef.FilePath)
                               ?? $"Chapter {chapterIndex + 1}";

            var chapterContent = await chapterRef.ReadContentAsTextAsync();

            // Rewrite relative image src URLs to use the server-side API endpoint
            chapterContent = RewriteImageUrls(chapterContent, ebookId, chapterRef.FilePath);

            // 3. Build content blocks
            var blocks = new List<ContentBlock>();
            int totalWordCount = 0;
            int blockCounter = 0;

            var paragraphs = ExtractParagraphs(chapterContent);
            foreach (var p in paragraphs)
            {
                var sanitizedContent = SanitizeParagraph(p);
                if (string.IsNullOrWhiteSpace(sanitizedContent)) continue;

                blocks.Add(new TextSegmentBlock($"seg-{blockCounter++}", sanitizedContent));

                int wordsInP = CountWords(sanitizedContent);
                totalWordCount += wordsInP;

                if (totalWordCount >= WordThreshold)
                {
                    blocks.Add(CreateAiTrigger($"trigger-{blockCounter++}"));
                    totalWordCount = 0;
                }
            }

            if (blocks.Any() && blocks.Last() is not AiActionTriggerBlock)
            {
                blocks.Add(CreateAiTrigger($"trigger-{blockCounter++}"));
            }

            return Result.Ok(new ReaderPageViewModel(blocks, chapterIndex, readingOrder.Count, chapterTitle, ebook.Id));
        }
        catch (Exception ex)
        {
            _logger.LogError(ex, "Failed to process EPUB for ebook {EbookId}.", ebookId);
            return Result.Fail(new Error($"Failed to process EPUB: {ex.Message}").CausedBy(ex));
        }
    }

    /// <summary>
    /// Attempts to resolve a web-relative storage path to an absolute filesystem path.
    /// Searches upward from the app base directory to handle both dev and production layouts.
    /// </summary>
    private static string? ResolvePath(string relativePath)
    {
        // Normalize forward-slashes to OS separator for file system access
        var normalized = relativePath.Replace('/', Path.DirectorySeparatorChar);

        var currentDir = new DirectoryInfo(AppDomain.CurrentDomain.BaseDirectory);
        while (currentDir != null)
        {
            var candidate = Path.Combine(currentDir.FullName, "wwwroot", normalized);
            if (File.Exists(candidate)) return candidate;

            // Also try src/NexusReader.Web/wwwroot (development layout)
            var devCandidate = Path.Combine(currentDir.FullName, "src", "NexusReader.Web", "wwwroot", normalized);
            if (File.Exists(devCandidate)) return devCandidate;

            currentDir = currentDir.Parent;
        }

        return null;
    }

    /// <inheritdoc />
    public async Task<Result<byte[]>> GetEpubResourceAsync(
        Guid ebookId,
        string resourcePath,
        string? userId = null,
        CancellationToken cancellationToken = default)
    {
        try
        {
            using var context = await _dbContextFactory.CreateDbContextAsync(cancellationToken);
            var ebook = await context.Ebooks
                .AsNoTracking()
                .FirstOrDefaultAsync(
                    e => e.Id == ebookId && (userId == null || e.UserId == userId),
                    cancellationToken);

            if (ebook == null)
            {
                return Result.Fail($"Ebook '{ebookId}' not found.");
            }

            var fullPath = ResolvePath(ebook.FilePath);
            if (fullPath == null || !File.Exists(fullPath))
            {
                return Result.Fail("EPUB file not found.");
            }

            using var bookRef = await EpubReader.OpenBookAsync(fullPath);

            var decodedPath = System.Net.WebUtility.UrlDecode(resourcePath);
            if (decodedPath.Contains("..") || decodedPath.Contains(":") || decodedPath.StartsWith("/") || decodedPath.StartsWith("\\"))
            {
                return Result.Fail("Invalid resource path.");
            }

            decodedPath = decodedPath.Replace('\\', '/').TrimStart('/');

            EpubLocalContentFileRef? targetFile = null;
            if (bookRef.Content?.AllFiles?.Local != null)
            {
                foreach (var file in bookRef.Content.AllFiles.Local)
                {
                    var filePath = file.FilePath?.Replace('\\', '/').TrimStart('/') ?? "";
                    var fileKey = file.Key?.Replace('\\', '/').TrimStart('/') ?? "";
                    if (filePath.Equals(decodedPath, StringComparison.OrdinalIgnoreCase) ||
                        fileKey.Equals(decodedPath, StringComparison.OrdinalIgnoreCase))
                    {
                        targetFile = file;
                        break;
                    }
                }
            }

            if (targetFile != null)
            {
                if (targetFile is EpubLocalByteContentFileRef byteFile)
                {
                    byte[] bytes = await byteFile.ReadContentAsync();
                    return Result.Ok(bytes);
                }
                else if (targetFile is EpubLocalTextContentFileRef textFile)
                {
                    string text = await textFile.ReadContentAsync();
                    byte[] bytes = System.Text.Encoding.UTF8.GetBytes(text);
                    return Result.Ok(bytes);
                }
            }

            return Result.Fail($"Resource '{resourcePath}' not found in EPUB.");
        }
        catch (Exception ex)
        {
            _logger.LogError(ex, "Failed to retrieve EPUB resource '{ResourcePath}' for ebook {EbookId}.", resourcePath, ebookId);
            return Result.Fail(new Error($"Failed to retrieve EPUB resource: {ex.Message}").CausedBy(ex));
        }
    }

    private static string RewriteImageUrls(string html, Guid ebookId, string chapterPath)
    {
        if (string.IsNullOrEmpty(html)) return html;

        return ImageTagRegex.Replace(html, match =>
        {
            var rawSrc = match.Groups["src"].Value;

            if (rawSrc.StartsWith("javascript:", StringComparison.OrdinalIgnoreCase))
            {
                return ""; // Completely block script execution in image src
            }

            if (rawSrc.StartsWith("http://", StringComparison.OrdinalIgnoreCase) ||
                rawSrc.StartsWith("https://", StringComparison.OrdinalIgnoreCase) ||
                rawSrc.StartsWith("data:", StringComparison.OrdinalIgnoreCase))
            {
                return match.Value;
            }

            var resolvedPath = ResolveRelativePath(chapterPath, rawSrc);
            var rewrittenSrc = $"/api/epub/{ebookId}/resource?path={System.Net.WebUtility.UrlEncode(resolvedPath)}";
            return $"{match.Groups["before"].Value}{rewrittenSrc}{match.Groups["after"].Value}";
        });
    }

    private static string ResolveRelativePath(string basePath, string relativePath)
    {
        if (string.IsNullOrEmpty(relativePath)) return string.Empty;

        var decodedRelative = System.Net.WebUtility.UrlDecode(relativePath);
        var baseDir = Path.GetDirectoryName(basePath) ?? "";
        baseDir = baseDir.Replace('\\', '/');

        var combined = Path.Combine(baseDir, decodedRelative).Replace('\\', '/');
        var segments = combined.Split('/');
        var stack = new Stack<string>();

        foreach (var segment in segments)
        {
            if (segment == "." || string.IsNullOrEmpty(segment))
            {
                continue;
            }
            if (segment == "..")
            {
                if (stack.Count > 0)
                {
                    stack.Pop();
                }
            }
            else
            {
                stack.Push(segment);
            }
        }

        return string.Join("/", stack.Reverse());
    }

    private static List<string> ExtractParagraphs(string html)
    {
        var bodyMatch = BodyMatchRegex.Match(html);
        var content = bodyMatch.Success ? bodyMatch.Groups[1].Value : html;

        var paragraphs = new List<string>();
        var matches = ParagraphMatchRegex.Matches(content);

        foreach (Match match in matches)
        {
            paragraphs.Add(match.Value);
        }

        if (paragraphs.Count == 0)
        {
            paragraphs = content.Split(new[] { "<br />", "<br>", "\n\n", "\r\n\r\n" }, StringSplitOptions.RemoveEmptyEntries).ToList();
        }

        return paragraphs;
    }

    private static string SanitizeParagraph(string html)
    {
        var clean = StyleScriptRegex.Replace(html, "");
        clean = WhitelistTagsRegex.Replace(clean, "");
        clean = StripAttributesRegex.Replace(clean, "<$1>");

        // Securely sanitize img tags by keeping ONLY src and alt attributes to prevent XSS (onerror, onload, style, etc.)
        clean = ImgTagSanitizerRegex.Replace(clean, m =>
        {
            var srcMatch = SrcAttributeRegex.Match(m.Value);
            var altMatch = AltAttributeRegex.Match(m.Value);
            var srcAttr = srcMatch.Success ? $" src=\"{srcMatch.Groups["src"].Value}\"" : "";
            var altAttr = altMatch.Success ? $" alt=\"{altMatch.Groups["alt"].Value}\"" : "";
            return $"<img{srcAttr}{altAttr} />";
        });

        clean = System.Net.WebUtility.HtmlDecode(clean);
        return clean.Trim();
    }

    private static int CountWords(string text)
    {
        if (string.IsNullOrWhiteSpace(text)) return 0;
        return text.Split(new[] { ' ', '\r', '\n', '\t' }, StringSplitOptions.RemoveEmptyEntries).Length;
    }

    private static AiActionTriggerBlock CreateAiTrigger(string id) =>
        new(id,
            "Wykryto ciekawy fragment! Czy chcesz, abym wygenerował podsumowanie lub quiz z tego rozdziału?",
            new List<string> { "Podsumuj", "Generuj Quiz", "Pomiń" });

    private static string? FindTitleInNavigation(IEnumerable<EpubNavigationItemRef> navigation, string? filePath)
    {
        if (string.IsNullOrEmpty(filePath)) return null;
        var fileName = Path.GetFileName(filePath);

        foreach (var item in navigation)
        {
            if (item.Link?.ContentFilePath == filePath || item.Link?.ContentFilePath == fileName)
                return item.Title;

            if (item.NestedItems?.Any() == true)
            {
                var childTitle = FindTitleInNavigation(item.NestedItems, filePath);
                if (childTitle != null) return childTitle;
            }
        }

        return null;
    }
}