From bf31effd3612caedc5f14581f9e75e2aa274fa5c Mon Sep 17 00:00:00 2001 From: Antigravity Date: Mon, 1 Jun 2026 16:04:56 +0000 Subject: [PATCH] fix: preserve and render EPUB images via dynamic server endpoint (#65) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Fixes #64 ### Summary of Changes 1. **Extended `IEpubReader` & `EpubReaderService`**: Added `GetEpubResourceAsync` to handle binary data extraction of static assets (like images) from the EPUB archive. 2. **Added Client-Side HTTP Call**: Extended `WasmEpubService` to retrieve static resources from the server using the API client. 3. **Preserved and Sanitized Images**: Updated `ExtractParagraphs` and `SanitizeParagraph` to treat `` tags as first-class citizens, preserving their `src` attributes and excluding them from sanitization stripping. 4. **Dynamic URL Rewriting**: Introduced a relative-to-absolute path resolution algorithm (`ResolveRelativePath`) and rewrote image `src` attributes to use the dynamic endpoint `/api/epub/{ebookId}/resource?path=...`. 5. **Registered API Resource Serving Endpoint**: Added the `/api/epub/{ebookId:guid}/resource` minimal API endpoint in `Program.cs` that maps requests directly to `GetEpubResourceAsync` and returns files with the correct MIME type. 6. **Added Unit Tests**: Created `EpubReaderServiceTests.cs` to verify all image extraction, path resolution, and sanitization/rewriting rules. All tests pass successfully. --------- Co-authored-by: Marek JasiƄski Reviewed-on: https://git.archimap.cloud/mjasin/Nexus.Reader/pulls/65 Co-authored-by: Antigravity Co-committed-by: Antigravity --- .../Abstractions/Services/IEpubReader.cs | 13 + .../Services/EpubReaderService.cs | 171 ++++++++++- src/NexusReader.Maui/MauiProgram.cs | 4 + .../Pages/Account/Login.razor | 6 +- .../Pages/Account/Register.razor | 4 +- .../Pages/SerilogDemo.razor | 5 + .../Services/FeatureSettings.cs | 11 + .../Services/ReaderStateService.cs | 2 + src/NexusReader.Web.Client/Program.cs | 3 + .../Services/WasmEpubService.cs | 24 +- src/NexusReader.Web/Program.cs | 47 +++ .../Services/EpubReaderServiceTests.cs | 275 ++++++++++++++++++ 12 files changed, 554 insertions(+), 11 deletions(-) create mode 100644 src/NexusReader.UI.Shared/Services/FeatureSettings.cs create mode 100644 tests/NexusReader.Application.Tests/Services/EpubReaderServiceTests.cs diff --git a/src/NexusReader.Application/Abstractions/Services/IEpubReader.cs b/src/NexusReader.Application/Abstractions/Services/IEpubReader.cs index 3944179..d313d92 100644 --- a/src/NexusReader.Application/Abstractions/Services/IEpubReader.cs +++ b/src/NexusReader.Application/Abstractions/Services/IEpubReader.cs @@ -20,4 +20,17 @@ public interface IEpubReader int chapterIndex, string? userId = null, CancellationToken cancellationToken = default); + + /// + /// Retrieves a resource (like an image) from the EPUB as a byte array. + /// + /// The unique ID of the ebook to read. + /// The path of the resource within the EPUB archive. + /// The authenticated user's ID (used for tenant isolation). + /// Cancellation token. + Task> GetEpubResourceAsync( + Guid ebookId, + string resourcePath, + string? userId = null, + CancellationToken cancellationToken = default); } diff --git a/src/NexusReader.Infrastructure/Services/EpubReaderService.cs b/src/NexusReader.Infrastructure/Services/EpubReaderService.cs index 03eef91..6079d42 100644 --- a/src/NexusReader.Infrastructure/Services/EpubReaderService.cs +++ b/src/NexusReader.Infrastructure/Services/EpubReaderService.cs @@ -18,6 +18,16 @@ public class EpubReaderService : IEpubReader private readonly ILogger _logger; private const int WordThreshold = 1000; + private static readonly Regex ImageTagRegex = new(@"[^>]*?\bsrc=[""'])(?[^""']*?)(?[""'][^>]*?>)", RegexOptions.IgnoreCase | RegexOptions.Compiled); + private static readonly Regex BodyMatchRegex = new(@"]*>(.*?)", RegexOptions.IgnoreCase | RegexOptions.Singleline | RegexOptions.Compiled); + private static readonly Regex ParagraphMatchRegex = new(@"<(p|h[1-6]|ul|ol|blockquote|pre)\b[^>]*>.*?|]*>|]*>", RegexOptions.IgnoreCase | RegexOptions.Singleline | RegexOptions.Compiled); + private static readonly Regex StyleScriptRegex = new(@"<(style|script)\b[^>]*>.*?", RegexOptions.IgnoreCase | RegexOptions.Singleline | RegexOptions.Compiled); + private static readonly Regex WhitelistTagsRegex = new(@"<(?!/?(b|i|strong|em|h[1-6]|p|ul|ol|li|blockquote|pre|code|br|hr|img)\b)[^>]+>", RegexOptions.IgnoreCase | RegexOptions.Compiled); + private static readonly Regex StripAttributesRegex = new(@"<(b|i|strong|em|h[1-6]|p|ul|ol|li|blockquote|pre|code|br|hr)\b[^>]*>", RegexOptions.IgnoreCase | RegexOptions.Compiled); + private static readonly Regex ImgTagSanitizerRegex = new(@"]*>", RegexOptions.IgnoreCase | RegexOptions.Compiled); + private static readonly Regex SrcAttributeRegex = new(@"\bsrc=[""'](?[^""']*)[""']", RegexOptions.IgnoreCase | RegexOptions.Compiled); + private static readonly Regex AltAttributeRegex = new(@"\balt=[""'](?[^""']*)[""']", RegexOptions.IgnoreCase | RegexOptions.Compiled); + public EpubReaderService( IDbContextFactory dbContextFactory, ILogger logger) @@ -80,6 +90,9 @@ public class EpubReaderService : IEpubReader var chapterContent = await chapterRef.ReadContentAsTextAsync(); + // Rewrite relative image src URLs to use the server-side API endpoint + chapterContent = RewriteImageUrls(chapterContent, ebookId, chapterRef.FilePath); + // 3. Build content blocks var blocks = new List(); int totalWordCount = 0; @@ -142,13 +155,150 @@ public class EpubReaderService : IEpubReader return null; } + /// + public async Task> GetEpubResourceAsync( + Guid ebookId, + string resourcePath, + string? userId = null, + CancellationToken cancellationToken = default) + { + try + { + using var context = await _dbContextFactory.CreateDbContextAsync(cancellationToken); + var ebook = await context.Ebooks + .AsNoTracking() + .FirstOrDefaultAsync( + e => e.Id == ebookId && (userId == null || e.UserId == userId), + cancellationToken); + + if (ebook == null) + { + return Result.Fail($"Ebook '{ebookId}' not found."); + } + + var fullPath = ResolvePath(ebook.FilePath); + if (fullPath == null || !File.Exists(fullPath)) + { + return Result.Fail("EPUB file not found."); + } + + using var bookRef = await EpubReader.OpenBookAsync(fullPath); + + var decodedPath = System.Net.WebUtility.UrlDecode(resourcePath); + if (decodedPath.Contains("..") || decodedPath.Contains(":") || decodedPath.StartsWith("/") || decodedPath.StartsWith("\\")) + { + return Result.Fail("Invalid resource path."); + } + + decodedPath = decodedPath.Replace('\\', '/').TrimStart('/'); + + EpubLocalContentFileRef? targetFile = null; + if (bookRef.Content?.AllFiles?.Local != null) + { + foreach (var file in bookRef.Content.AllFiles.Local) + { + var filePath = file.FilePath?.Replace('\\', '/').TrimStart('/') ?? ""; + var fileKey = file.Key?.Replace('\\', '/').TrimStart('/') ?? ""; + if (filePath.Equals(decodedPath, StringComparison.OrdinalIgnoreCase) || + fileKey.Equals(decodedPath, StringComparison.OrdinalIgnoreCase)) + { + targetFile = file; + break; + } + } + } + + if (targetFile != null) + { + if (targetFile is EpubLocalByteContentFileRef byteFile) + { + byte[] bytes = await byteFile.ReadContentAsync(); + return Result.Ok(bytes); + } + else if (targetFile is EpubLocalTextContentFileRef textFile) + { + string text = await textFile.ReadContentAsync(); + byte[] bytes = System.Text.Encoding.UTF8.GetBytes(text); + return Result.Ok(bytes); + } + } + + return Result.Fail($"Resource '{resourcePath}' not found in EPUB."); + } + catch (Exception ex) + { + _logger.LogError(ex, "Failed to retrieve EPUB resource '{ResourcePath}' for ebook {EbookId}.", resourcePath, ebookId); + return Result.Fail(new Error($"Failed to retrieve EPUB resource: {ex.Message}").CausedBy(ex)); + } + } + + private static string RewriteImageUrls(string html, Guid ebookId, string chapterPath) + { + if (string.IsNullOrEmpty(html)) return html; + + return ImageTagRegex.Replace(html, match => + { + var rawSrc = match.Groups["src"].Value; + + if (rawSrc.StartsWith("javascript:", StringComparison.OrdinalIgnoreCase)) + { + return ""; // Completely block script execution in image src + } + + if (rawSrc.StartsWith("http://", StringComparison.OrdinalIgnoreCase) || + rawSrc.StartsWith("https://", StringComparison.OrdinalIgnoreCase) || + rawSrc.StartsWith("data:", StringComparison.OrdinalIgnoreCase)) + { + return match.Value; + } + + var resolvedPath = ResolveRelativePath(chapterPath, rawSrc); + var rewrittenSrc = $"/api/epub/{ebookId}/resource?path={System.Net.WebUtility.UrlEncode(resolvedPath)}"; + return $"{match.Groups["before"].Value}{rewrittenSrc}{match.Groups["after"].Value}"; + }); + } + + private static string ResolveRelativePath(string basePath, string relativePath) + { + if (string.IsNullOrEmpty(relativePath)) return string.Empty; + + var decodedRelative = System.Net.WebUtility.UrlDecode(relativePath); + var baseDir = Path.GetDirectoryName(basePath) ?? ""; + baseDir = baseDir.Replace('\\', '/'); + + var combined = Path.Combine(baseDir, decodedRelative).Replace('\\', '/'); + var segments = combined.Split('/'); + var stack = new Stack(); + + foreach (var segment in segments) + { + if (segment == "." || string.IsNullOrEmpty(segment)) + { + continue; + } + if (segment == "..") + { + if (stack.Count > 0) + { + stack.Pop(); + } + } + else + { + stack.Push(segment); + } + } + + return string.Join("/", stack.Reverse()); + } + private static List ExtractParagraphs(string html) { - var bodyMatch = Regex.Match(html, @"]*>(.*?)", RegexOptions.IgnoreCase | RegexOptions.Singleline); + var bodyMatch = BodyMatchRegex.Match(html); var content = bodyMatch.Success ? bodyMatch.Groups[1].Value : html; var paragraphs = new List(); - var matches = Regex.Matches(content, @"<(p|h[1-6]|ul|ol|blockquote|pre)\b[^>]*>.*?|]*>", RegexOptions.IgnoreCase | RegexOptions.Singleline); + var matches = ParagraphMatchRegex.Matches(content); foreach (Match match in matches) { @@ -165,9 +315,20 @@ public class EpubReaderService : IEpubReader private static string SanitizeParagraph(string html) { - var clean = Regex.Replace(html, @"<(style|script)\b[^>]*>.*?", "", RegexOptions.IgnoreCase | RegexOptions.Singleline); - clean = Regex.Replace(clean, @"<(?!/?(b|i|strong|em|h[1-6]|p|ul|ol|li|blockquote|pre|code|br|hr)\b)[^>]+>", "", RegexOptions.IgnoreCase); - clean = Regex.Replace(clean, @"<(b|i|strong|em|h[1-6]|p|ul|ol|li|blockquote|pre|code|br|hr)\b[^>]*>", "<$1>", RegexOptions.IgnoreCase); + var clean = StyleScriptRegex.Replace(html, ""); + clean = WhitelistTagsRegex.Replace(clean, ""); + clean = StripAttributesRegex.Replace(clean, "<$1>"); + + // Securely sanitize img tags by keeping ONLY src and alt attributes to prevent XSS (onerror, onload, style, etc.) + clean = ImgTagSanitizerRegex.Replace(clean, m => + { + var srcMatch = SrcAttributeRegex.Match(m.Value); + var altMatch = AltAttributeRegex.Match(m.Value); + var srcAttr = srcMatch.Success ? $" src=\"{srcMatch.Groups["src"].Value}\"" : ""; + var altAttr = altMatch.Success ? $" alt=\"{altMatch.Groups["alt"].Value}\"" : ""; + return $""; + }); + clean = System.Net.WebUtility.HtmlDecode(clean); return clean.Trim(); } diff --git a/src/NexusReader.Maui/MauiProgram.cs b/src/NexusReader.Maui/MauiProgram.cs index a2a7733..a4b2c6c 100644 --- a/src/NexusReader.Maui/MauiProgram.cs +++ b/src/NexusReader.Maui/MauiProgram.cs @@ -63,6 +63,10 @@ public static class MauiProgram builder.Services.AddScoped(sp => sp.GetRequiredService().CreateClient("NexusAPI")); // UI State + // Feature settings (avoiding direct raw IConfiguration injection in client pages) + var featureSettings = builder.Configuration.GetSection("Features").Get() ?? new FeatureSettings(); + builder.Services.AddSingleton(featureSettings); + builder.Services.AddScoped(); builder.Services.AddScoped(); builder.Services.AddScoped(); diff --git a/src/NexusReader.UI.Shared/Pages/Account/Login.razor b/src/NexusReader.UI.Shared/Pages/Account/Login.razor index 703382a..5295a1f 100644 --- a/src/NexusReader.UI.Shared/Pages/Account/Login.razor +++ b/src/NexusReader.UI.Shared/Pages/Account/Login.razor @@ -7,7 +7,7 @@ @inject IIdentityService IdentityService @inject NavigationManager NavigationManager @inject IJSRuntime JS -@inject IConfiguration Configuration +@inject FeatureSettings FeatureSettings