From 9c32d28e939a0fda4d42283ed459861e2cd8c8b4 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Marek=20Jasi=C5=84ski?= Date: Mon, 1 Jun 2026 15:09:26 +0200 Subject: [PATCH 1/2] fix: preserve and render EPUB images via dynamic server endpoint (fixes #64) --- .../Abstractions/Services/IEpubReader.cs | 13 ++ .../Services/EpubReaderService.cs | 134 ++++++++++++- .../Services/WasmEpubService.cs | 24 ++- src/NexusReader.Web/Program.cs | 30 +++ .../Services/EpubReaderServiceTests.cs | 185 ++++++++++++++++++ 5 files changed, 383 insertions(+), 3 deletions(-) create mode 100644 tests/NexusReader.Application.Tests/Services/EpubReaderServiceTests.cs diff --git a/src/NexusReader.Application/Abstractions/Services/IEpubReader.cs b/src/NexusReader.Application/Abstractions/Services/IEpubReader.cs index 3944179..d313d92 100644 --- a/src/NexusReader.Application/Abstractions/Services/IEpubReader.cs +++ b/src/NexusReader.Application/Abstractions/Services/IEpubReader.cs @@ -20,4 +20,17 @@ public interface IEpubReader int chapterIndex, string? userId = null, CancellationToken cancellationToken = default); + + /// + /// Retrieves a resource (like an image) from the EPUB as a byte array. + /// + /// The unique ID of the ebook to read. + /// The path of the resource within the EPUB archive. + /// The authenticated user's ID (used for tenant isolation). + /// Cancellation token. + Task> GetEpubResourceAsync( + Guid ebookId, + string resourcePath, + string? userId = null, + CancellationToken cancellationToken = default); } diff --git a/src/NexusReader.Infrastructure/Services/EpubReaderService.cs b/src/NexusReader.Infrastructure/Services/EpubReaderService.cs index 03eef91..44b5d41 100644 --- a/src/NexusReader.Infrastructure/Services/EpubReaderService.cs +++ b/src/NexusReader.Infrastructure/Services/EpubReaderService.cs @@ -80,6 +80,9 @@ public class EpubReaderService : IEpubReader var chapterContent = await chapterRef.ReadContentAsTextAsync(); + // Rewrite relative image src URLs to use the server-side API endpoint + chapterContent = RewriteImageUrls(chapterContent, ebookId, chapterRef.FilePath); + // 3. Build content blocks var blocks = new List(); int totalWordCount = 0; @@ -142,13 +145,140 @@ public class EpubReaderService : IEpubReader return null; } + /// + public async Task> GetEpubResourceAsync( + Guid ebookId, + string resourcePath, + string? userId = null, + CancellationToken cancellationToken = default) + { + try + { + using var context = await _dbContextFactory.CreateDbContextAsync(cancellationToken); + var ebook = await context.Ebooks + .AsNoTracking() + .FirstOrDefaultAsync( + e => e.Id == ebookId && (userId == null || e.UserId == userId), + cancellationToken); + + if (ebook == null) + { + return Result.Fail($"Ebook '{ebookId}' not found."); + } + + var fullPath = ResolvePath(ebook.FilePath); + if (fullPath == null || !File.Exists(fullPath)) + { + return Result.Fail("EPUB file not found."); + } + + using var bookRef = await EpubReader.OpenBookAsync(fullPath); + + var decodedPath = System.Net.WebUtility.UrlDecode(resourcePath).Replace('\\', '/').TrimStart('/'); + + EpubLocalContentFileRef? targetFile = null; + if (bookRef.Content?.AllFiles?.Local != null) + { + foreach (var file in bookRef.Content.AllFiles.Local) + { + var filePath = file.FilePath?.Replace('\\', '/').TrimStart('/') ?? ""; + var fileKey = file.Key?.Replace('\\', '/').TrimStart('/') ?? ""; + if (filePath.Equals(decodedPath, StringComparison.OrdinalIgnoreCase) || + fileKey.Equals(decodedPath, StringComparison.OrdinalIgnoreCase)) + { + targetFile = file; + break; + } + } + } + + if (targetFile != null) + { + if (targetFile is EpubLocalByteContentFileRef byteFile) + { + byte[] bytes = await byteFile.ReadContentAsync(); + return Result.Ok(bytes); + } + else if (targetFile is EpubLocalTextContentFileRef textFile) + { + string text = await textFile.ReadContentAsync(); + byte[] bytes = System.Text.Encoding.UTF8.GetBytes(text); + return Result.Ok(bytes); + } + } + + return Result.Fail($"Resource '{resourcePath}' not found in EPUB."); + } + catch (Exception ex) + { + _logger.LogError(ex, "Failed to retrieve EPUB resource '{ResourcePath}' for ebook {EbookId}.", resourcePath, ebookId); + return Result.Fail(new Error($"Failed to retrieve EPUB resource: {ex.Message}").CausedBy(ex)); + } + } + + private static string RewriteImageUrls(string html, Guid ebookId, string chapterPath) + { + if (string.IsNullOrEmpty(html)) return html; + + var imgRegex = new Regex(@"[^>]*?\bsrc=[""'])(?[^""']*?)(?[""'][^>]*?>)", RegexOptions.IgnoreCase); + return imgRegex.Replace(html, match => + { + var rawSrc = match.Groups["src"].Value; + + if (rawSrc.StartsWith("http://", StringComparison.OrdinalIgnoreCase) || + rawSrc.StartsWith("https://", StringComparison.OrdinalIgnoreCase) || + rawSrc.StartsWith("data:", StringComparison.OrdinalIgnoreCase)) + { + return match.Value; + } + + var resolvedPath = ResolveRelativePath(chapterPath, rawSrc); + var rewrittenSrc = $"/api/epub/{ebookId}/resource?path={System.Net.WebUtility.UrlEncode(resolvedPath)}"; + return $"{match.Groups["before"].Value}{rewrittenSrc}{match.Groups["after"].Value}"; + }); + } + + private static string ResolveRelativePath(string basePath, string relativePath) + { + if (string.IsNullOrEmpty(relativePath)) return string.Empty; + + var decodedRelative = System.Net.WebUtility.UrlDecode(relativePath); + var baseDir = Path.GetDirectoryName(basePath) ?? ""; + baseDir = baseDir.Replace('\\', '/'); + + var combined = Path.Combine(baseDir, decodedRelative).Replace('\\', '/'); + var segments = combined.Split('/'); + var stack = new Stack(); + + foreach (var segment in segments) + { + if (segment == "." || string.IsNullOrEmpty(segment)) + { + continue; + } + if (segment == "..") + { + if (stack.Count > 0) + { + stack.Pop(); + } + } + else + { + stack.Push(segment); + } + } + + return string.Join("/", stack.Reverse()); + } + private static List ExtractParagraphs(string html) { var bodyMatch = Regex.Match(html, @"]*>(.*?)", RegexOptions.IgnoreCase | RegexOptions.Singleline); var content = bodyMatch.Success ? bodyMatch.Groups[1].Value : html; var paragraphs = new List(); - var matches = Regex.Matches(content, @"<(p|h[1-6]|ul|ol|blockquote|pre)\b[^>]*>.*?|]*>", RegexOptions.IgnoreCase | RegexOptions.Singleline); + var matches = Regex.Matches(content, @"<(p|h[1-6]|ul|ol|blockquote|pre)\b[^>]*>.*?|]*>|]*>", RegexOptions.IgnoreCase | RegexOptions.Singleline); foreach (Match match in matches) { @@ -166,7 +296,7 @@ public class EpubReaderService : IEpubReader private static string SanitizeParagraph(string html) { var clean = Regex.Replace(html, @"<(style|script)\b[^>]*>.*?", "", RegexOptions.IgnoreCase | RegexOptions.Singleline); - clean = Regex.Replace(clean, @"<(?!/?(b|i|strong|em|h[1-6]|p|ul|ol|li|blockquote|pre|code|br|hr)\b)[^>]+>", "", RegexOptions.IgnoreCase); + clean = Regex.Replace(clean, @"<(?!/?(b|i|strong|em|h[1-6]|p|ul|ol|li|blockquote|pre|code|br|hr|img)\b)[^>]+>", "", RegexOptions.IgnoreCase); clean = Regex.Replace(clean, @"<(b|i|strong|em|h[1-6]|p|ul|ol|li|blockquote|pre|code|br|hr)\b[^>]*>", "<$1>", RegexOptions.IgnoreCase); clean = System.Net.WebUtility.HtmlDecode(clean); return clean.Trim(); diff --git a/src/NexusReader.Web.Client/Services/WasmEpubService.cs b/src/NexusReader.Web.Client/Services/WasmEpubService.cs index 252463f..fcd0b76 100644 --- a/src/NexusReader.Web.Client/Services/WasmEpubService.cs +++ b/src/NexusReader.Web.Client/Services/WasmEpubService.cs @@ -37,7 +37,29 @@ public class WasmEpubReader : IEpubReader return Result.Fail(new Error($"Network or parsing error: {ex.Message}").CausedBy(ex)); } } - // Metadata extraction moved to WasmEpubMetadataExtractor + public async Task> GetEpubResourceAsync( + Guid ebookId, + string resourcePath, + string? userId = null, + CancellationToken cancellationToken = default) + { + try + { + var response = await _httpClient.GetAsync($"/api/epub/{ebookId}/resource?path={Uri.EscapeDataString(resourcePath)}", cancellationToken); + if (response.IsSuccessStatusCode) + { + var bytes = await response.Content.ReadAsByteArrayAsync(cancellationToken); + return Result.Ok(bytes); + } + + var errorBody = await response.Content.ReadAsStringAsync(cancellationToken); + return Result.Fail($"Server error fetching EPUB resource ({response.StatusCode}): {errorBody}"); + } + catch (Exception ex) + { + return Result.Fail(new Error($"Network error fetching EPUB resource: {ex.Message}").CausedBy(ex)); + } + } } public class WasmEpubMetadataExtractor : IEpubMetadataExtractor diff --git a/src/NexusReader.Web/Program.cs b/src/NexusReader.Web/Program.cs index 033b029..4ae273d 100644 --- a/src/NexusReader.Web/Program.cs +++ b/src/NexusReader.Web/Program.cs @@ -297,6 +297,36 @@ app.MapGet("/api/epub/{ebookId:guid}/{index:int}", async (Guid ebookId, int inde return Results.BadRequest(errorMsg); }).RequireAuthorization(); +// API endpoint for WASM client/browser to fetch EPUB static resources (images, etc.) +app.MapGet("/api/epub/{ebookId:guid}/resource", async (Guid ebookId, string path, IEpubReader epubService, ClaimsPrincipal user, CancellationToken cancellationToken) => +{ + var userId = user.FindFirstValue(ClaimTypes.NameIdentifier); + var result = await epubService.GetEpubResourceAsync(ebookId, path, userId, cancellationToken); + + if (result.IsSuccess) + { + var ext = Path.GetExtension(path).ToLowerInvariant(); + var contentType = ext switch + { + ".jpg" or ".jpeg" => "image/jpeg", + ".png" => "image/png", + ".gif" => "image/gif", + ".svg" => "image/svg+xml", + ".webp" => "image/webp", + ".css" => "text/css", + ".otf" => "font/otf", + ".ttf" => "font/ttf", + ".woff" => "font/woff", + ".woff2" => "font/woff2", + _ => "application/octet-stream" + }; + return Results.File(result.Value, contentType); + } + + var errorMsg = result.Errors.Count > 0 ? result.Errors[0].Message : "Resource not found"; + return Results.NotFound(errorMsg); +}).RequireAuthorization(); + var knowledgeApi = app.MapGroup("/api/knowledge") .RequireAuthorization("HasAvailableTokens") .DisableAntiforgery(); diff --git a/tests/NexusReader.Application.Tests/Services/EpubReaderServiceTests.cs b/tests/NexusReader.Application.Tests/Services/EpubReaderServiceTests.cs new file mode 100644 index 0000000..e01f044 --- /dev/null +++ b/tests/NexusReader.Application.Tests/Services/EpubReaderServiceTests.cs @@ -0,0 +1,185 @@ +using System; +using System.Collections.Generic; +using System.IO; +using System.Linq; +using System.Threading; +using System.Threading.Tasks; +using FluentAssertions; +using Microsoft.Data.Sqlite; +using Microsoft.EntityFrameworkCore; +using Microsoft.Extensions.Logging; +using Moq; +using NexusReader.Data.Persistence; +using NexusReader.Domain.Entities; +using NexusReader.Application.Queries.Reader; +using NexusReader.Infrastructure.Services; +using Xunit; + +namespace NexusReader.Application.Tests.Services; + +public class EpubReaderServiceTests : IDisposable +{ + private readonly SqliteConnection _connection; + private readonly DbContextOptions _contextOptions; + private readonly Mock> _dbContextFactoryMock; + private readonly Mock> _loggerMock; + + public EpubReaderServiceTests() + { + _connection = new SqliteConnection("DataSource=:memory:"); + _connection.Open(); + + _contextOptions = new DbContextOptionsBuilder() + .UseSqlite(_connection) + .Options; + + // Seed initial database schema + using var context = new AppDbContext(_contextOptions); + context.Database.EnsureCreated(); + + _dbContextFactoryMock = new Mock>(); + _dbContextFactoryMock.Setup(f => f.CreateDbContextAsync(It.IsAny())) + .ReturnsAsync(() => new AppDbContext(_contextOptions)); + _dbContextFactoryMock.Setup(f => f.CreateDbContext()) + .Returns(() => new AppDbContext(_contextOptions)); + + _loggerMock = new Mock>(); + } + + [Fact] + public async Task GetEpubContentAsync_RewritesImageUrlsAndExtractsImages() + { + // Arrange + var ebookId = Guid.NewGuid(); + var userId = "test-user-id"; + + using (var context = new AppDbContext(_contextOptions)) + { + var user = new NexusUser + { + Id = userId, + UserName = "testuser", + Email = "test@nexus.com", + TenantId = "tenant-123", + SubscriptionPlanId = 1 + }; + context.Users.Add(user); + + var author = new Author { Id = 10, Name = "Giorgio Vasari" }; + context.Authors.Add(author); + + var ebook = new Ebook + { + Id = ebookId, + UserId = userId, + Title = "Test Book", + AuthorId = author.Id, + FilePath = "assets/book.epub", + AddedDate = DateTime.UtcNow, + LastReadDate = DateTime.UtcNow, + Progress = 0, + LastChapter = "Introduction" + }; + context.Ebooks.Add(ebook); + await context.SaveChangesAsync(); + } + + var service = new EpubReaderService(_dbContextFactoryMock.Object, _loggerMock.Object); + + // Act + var result = await service.GetEpubContentAsync(ebookId, 0, userId); + + // Assert + result.IsSuccess.Should().BeTrue(); + result.Value.Should().NotBeNull(); + result.Value.Blocks.Should().NotBeEmpty(); + + // Check that any img tags extracted are preserved and rewritten + var hasImages = false; + foreach (var block in result.Value.Blocks) + { + if (block is TextSegmentBlock textBlock && textBlock.Content.Contains(" Date: Mon, 1 Jun 2026 18:03:39 +0200 Subject: [PATCH 2/2] fix(epub): resolve PR #65 review comments for EPUB image rendering, path traversal validation, and security sanitization --- .../Services/EpubReaderService.cs | 47 ++++++++-- src/NexusReader.Maui/MauiProgram.cs | 4 + .../Pages/Account/Login.razor | 6 +- .../Pages/Account/Register.razor | 4 +- .../Pages/SerilogDemo.razor | 5 ++ .../Services/FeatureSettings.cs | 11 +++ .../Services/ReaderStateService.cs | 2 + src/NexusReader.Web.Client/Program.cs | 3 + src/NexusReader.Web/Program.cs | 23 ++++- .../Services/EpubReaderServiceTests.cs | 90 +++++++++++++++++++ 10 files changed, 179 insertions(+), 16 deletions(-) create mode 100644 src/NexusReader.UI.Shared/Services/FeatureSettings.cs diff --git a/src/NexusReader.Infrastructure/Services/EpubReaderService.cs b/src/NexusReader.Infrastructure/Services/EpubReaderService.cs index 44b5d41..6079d42 100644 --- a/src/NexusReader.Infrastructure/Services/EpubReaderService.cs +++ b/src/NexusReader.Infrastructure/Services/EpubReaderService.cs @@ -18,6 +18,16 @@ public class EpubReaderService : IEpubReader private readonly ILogger _logger; private const int WordThreshold = 1000; + private static readonly Regex ImageTagRegex = new(@"[^>]*?\bsrc=[""'])(?[^""']*?)(?[""'][^>]*?>)", RegexOptions.IgnoreCase | RegexOptions.Compiled); + private static readonly Regex BodyMatchRegex = new(@"]*>(.*?)", RegexOptions.IgnoreCase | RegexOptions.Singleline | RegexOptions.Compiled); + private static readonly Regex ParagraphMatchRegex = new(@"<(p|h[1-6]|ul|ol|blockquote|pre)\b[^>]*>.*?|]*>|]*>", RegexOptions.IgnoreCase | RegexOptions.Singleline | RegexOptions.Compiled); + private static readonly Regex StyleScriptRegex = new(@"<(style|script)\b[^>]*>.*?", RegexOptions.IgnoreCase | RegexOptions.Singleline | RegexOptions.Compiled); + private static readonly Regex WhitelistTagsRegex = new(@"<(?!/?(b|i|strong|em|h[1-6]|p|ul|ol|li|blockquote|pre|code|br|hr|img)\b)[^>]+>", RegexOptions.IgnoreCase | RegexOptions.Compiled); + private static readonly Regex StripAttributesRegex = new(@"<(b|i|strong|em|h[1-6]|p|ul|ol|li|blockquote|pre|code|br|hr)\b[^>]*>", RegexOptions.IgnoreCase | RegexOptions.Compiled); + private static readonly Regex ImgTagSanitizerRegex = new(@"]*>", RegexOptions.IgnoreCase | RegexOptions.Compiled); + private static readonly Regex SrcAttributeRegex = new(@"\bsrc=[""'](?[^""']*)[""']", RegexOptions.IgnoreCase | RegexOptions.Compiled); + private static readonly Regex AltAttributeRegex = new(@"\balt=[""'](?[^""']*)[""']", RegexOptions.IgnoreCase | RegexOptions.Compiled); + public EpubReaderService( IDbContextFactory dbContextFactory, ILogger logger) @@ -174,7 +184,13 @@ public class EpubReaderService : IEpubReader using var bookRef = await EpubReader.OpenBookAsync(fullPath); - var decodedPath = System.Net.WebUtility.UrlDecode(resourcePath).Replace('\\', '/').TrimStart('/'); + var decodedPath = System.Net.WebUtility.UrlDecode(resourcePath); + if (decodedPath.Contains("..") || decodedPath.Contains(":") || decodedPath.StartsWith("/") || decodedPath.StartsWith("\\")) + { + return Result.Fail("Invalid resource path."); + } + + decodedPath = decodedPath.Replace('\\', '/').TrimStart('/'); EpubLocalContentFileRef? targetFile = null; if (bookRef.Content?.AllFiles?.Local != null) @@ -220,11 +236,15 @@ public class EpubReaderService : IEpubReader { if (string.IsNullOrEmpty(html)) return html; - var imgRegex = new Regex(@"[^>]*?\bsrc=[""'])(?[^""']*?)(?[""'][^>]*?>)", RegexOptions.IgnoreCase); - return imgRegex.Replace(html, match => + return ImageTagRegex.Replace(html, match => { var rawSrc = match.Groups["src"].Value; + if (rawSrc.StartsWith("javascript:", StringComparison.OrdinalIgnoreCase)) + { + return ""; // Completely block script execution in image src + } + if (rawSrc.StartsWith("http://", StringComparison.OrdinalIgnoreCase) || rawSrc.StartsWith("https://", StringComparison.OrdinalIgnoreCase) || rawSrc.StartsWith("data:", StringComparison.OrdinalIgnoreCase)) @@ -274,11 +294,11 @@ public class EpubReaderService : IEpubReader private static List ExtractParagraphs(string html) { - var bodyMatch = Regex.Match(html, @"]*>(.*?)", RegexOptions.IgnoreCase | RegexOptions.Singleline); + var bodyMatch = BodyMatchRegex.Match(html); var content = bodyMatch.Success ? bodyMatch.Groups[1].Value : html; var paragraphs = new List(); - var matches = Regex.Matches(content, @"<(p|h[1-6]|ul|ol|blockquote|pre)\b[^>]*>.*?|]*>|]*>", RegexOptions.IgnoreCase | RegexOptions.Singleline); + var matches = ParagraphMatchRegex.Matches(content); foreach (Match match in matches) { @@ -295,9 +315,20 @@ public class EpubReaderService : IEpubReader private static string SanitizeParagraph(string html) { - var clean = Regex.Replace(html, @"<(style|script)\b[^>]*>.*?", "", RegexOptions.IgnoreCase | RegexOptions.Singleline); - clean = Regex.Replace(clean, @"<(?!/?(b|i|strong|em|h[1-6]|p|ul|ol|li|blockquote|pre|code|br|hr|img)\b)[^>]+>", "", RegexOptions.IgnoreCase); - clean = Regex.Replace(clean, @"<(b|i|strong|em|h[1-6]|p|ul|ol|li|blockquote|pre|code|br|hr)\b[^>]*>", "<$1>", RegexOptions.IgnoreCase); + var clean = StyleScriptRegex.Replace(html, ""); + clean = WhitelistTagsRegex.Replace(clean, ""); + clean = StripAttributesRegex.Replace(clean, "<$1>"); + + // Securely sanitize img tags by keeping ONLY src and alt attributes to prevent XSS (onerror, onload, style, etc.) + clean = ImgTagSanitizerRegex.Replace(clean, m => + { + var srcMatch = SrcAttributeRegex.Match(m.Value); + var altMatch = AltAttributeRegex.Match(m.Value); + var srcAttr = srcMatch.Success ? $" src=\"{srcMatch.Groups["src"].Value}\"" : ""; + var altAttr = altMatch.Success ? $" alt=\"{altMatch.Groups["alt"].Value}\"" : ""; + return $""; + }); + clean = System.Net.WebUtility.HtmlDecode(clean); return clean.Trim(); } diff --git a/src/NexusReader.Maui/MauiProgram.cs b/src/NexusReader.Maui/MauiProgram.cs index a2a7733..a4b2c6c 100644 --- a/src/NexusReader.Maui/MauiProgram.cs +++ b/src/NexusReader.Maui/MauiProgram.cs @@ -63,6 +63,10 @@ public static class MauiProgram builder.Services.AddScoped(sp => sp.GetRequiredService().CreateClient("NexusAPI")); // UI State + // Feature settings (avoiding direct raw IConfiguration injection in client pages) + var featureSettings = builder.Configuration.GetSection("Features").Get() ?? new FeatureSettings(); + builder.Services.AddSingleton(featureSettings); + builder.Services.AddScoped(); builder.Services.AddScoped(); builder.Services.AddScoped(); diff --git a/src/NexusReader.UI.Shared/Pages/Account/Login.razor b/src/NexusReader.UI.Shared/Pages/Account/Login.razor index 703382a..5295a1f 100644 --- a/src/NexusReader.UI.Shared/Pages/Account/Login.razor +++ b/src/NexusReader.UI.Shared/Pages/Account/Login.razor @@ -7,7 +7,7 @@ @inject IIdentityService IdentityService @inject NavigationManager NavigationManager @inject IJSRuntime JS -@inject IConfiguration Configuration +@inject FeatureSettings FeatureSettings