From 9c32d28e939a0fda4d42283ed459861e2cd8c8b4 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Marek=20Jasi=C5=84ski?= Date: Mon, 1 Jun 2026 15:09:26 +0200 Subject: [PATCH] fix: preserve and render EPUB images via dynamic server endpoint (fixes #64) --- .../Abstractions/Services/IEpubReader.cs | 13 ++ .../Services/EpubReaderService.cs | 134 ++++++++++++- .../Services/WasmEpubService.cs | 24 ++- src/NexusReader.Web/Program.cs | 30 +++ .../Services/EpubReaderServiceTests.cs | 185 ++++++++++++++++++ 5 files changed, 383 insertions(+), 3 deletions(-) create mode 100644 tests/NexusReader.Application.Tests/Services/EpubReaderServiceTests.cs diff --git a/src/NexusReader.Application/Abstractions/Services/IEpubReader.cs b/src/NexusReader.Application/Abstractions/Services/IEpubReader.cs index 3944179..d313d92 100644 --- a/src/NexusReader.Application/Abstractions/Services/IEpubReader.cs +++ b/src/NexusReader.Application/Abstractions/Services/IEpubReader.cs @@ -20,4 +20,17 @@ public interface IEpubReader int chapterIndex, string? userId = null, CancellationToken cancellationToken = default); + + /// + /// Retrieves a resource (like an image) from the EPUB as a byte array. + /// + /// The unique ID of the ebook to read. + /// The path of the resource within the EPUB archive. + /// The authenticated user's ID (used for tenant isolation). + /// Cancellation token. + Task> GetEpubResourceAsync( + Guid ebookId, + string resourcePath, + string? userId = null, + CancellationToken cancellationToken = default); } diff --git a/src/NexusReader.Infrastructure/Services/EpubReaderService.cs b/src/NexusReader.Infrastructure/Services/EpubReaderService.cs index 03eef91..44b5d41 100644 --- a/src/NexusReader.Infrastructure/Services/EpubReaderService.cs +++ b/src/NexusReader.Infrastructure/Services/EpubReaderService.cs @@ -80,6 +80,9 @@ public class EpubReaderService : IEpubReader var chapterContent = await chapterRef.ReadContentAsTextAsync(); + // Rewrite relative image src URLs to use the server-side API endpoint + chapterContent = RewriteImageUrls(chapterContent, ebookId, chapterRef.FilePath); + // 3. Build content blocks var blocks = new List(); int totalWordCount = 0; @@ -142,13 +145,140 @@ public class EpubReaderService : IEpubReader return null; } + /// + public async Task> GetEpubResourceAsync( + Guid ebookId, + string resourcePath, + string? userId = null, + CancellationToken cancellationToken = default) + { + try + { + using var context = await _dbContextFactory.CreateDbContextAsync(cancellationToken); + var ebook = await context.Ebooks + .AsNoTracking() + .FirstOrDefaultAsync( + e => e.Id == ebookId && (userId == null || e.UserId == userId), + cancellationToken); + + if (ebook == null) + { + return Result.Fail($"Ebook '{ebookId}' not found."); + } + + var fullPath = ResolvePath(ebook.FilePath); + if (fullPath == null || !File.Exists(fullPath)) + { + return Result.Fail("EPUB file not found."); + } + + using var bookRef = await EpubReader.OpenBookAsync(fullPath); + + var decodedPath = System.Net.WebUtility.UrlDecode(resourcePath).Replace('\\', '/').TrimStart('/'); + + EpubLocalContentFileRef? targetFile = null; + if (bookRef.Content?.AllFiles?.Local != null) + { + foreach (var file in bookRef.Content.AllFiles.Local) + { + var filePath = file.FilePath?.Replace('\\', '/').TrimStart('/') ?? ""; + var fileKey = file.Key?.Replace('\\', '/').TrimStart('/') ?? ""; + if (filePath.Equals(decodedPath, StringComparison.OrdinalIgnoreCase) || + fileKey.Equals(decodedPath, StringComparison.OrdinalIgnoreCase)) + { + targetFile = file; + break; + } + } + } + + if (targetFile != null) + { + if (targetFile is EpubLocalByteContentFileRef byteFile) + { + byte[] bytes = await byteFile.ReadContentAsync(); + return Result.Ok(bytes); + } + else if (targetFile is EpubLocalTextContentFileRef textFile) + { + string text = await textFile.ReadContentAsync(); + byte[] bytes = System.Text.Encoding.UTF8.GetBytes(text); + return Result.Ok(bytes); + } + } + + return Result.Fail($"Resource '{resourcePath}' not found in EPUB."); + } + catch (Exception ex) + { + _logger.LogError(ex, "Failed to retrieve EPUB resource '{ResourcePath}' for ebook {EbookId}.", resourcePath, ebookId); + return Result.Fail(new Error($"Failed to retrieve EPUB resource: {ex.Message}").CausedBy(ex)); + } + } + + private static string RewriteImageUrls(string html, Guid ebookId, string chapterPath) + { + if (string.IsNullOrEmpty(html)) return html; + + var imgRegex = new Regex(@"[^>]*?\bsrc=[""'])(?[^""']*?)(?[""'][^>]*?>)", RegexOptions.IgnoreCase); + return imgRegex.Replace(html, match => + { + var rawSrc = match.Groups["src"].Value; + + if (rawSrc.StartsWith("http://", StringComparison.OrdinalIgnoreCase) || + rawSrc.StartsWith("https://", StringComparison.OrdinalIgnoreCase) || + rawSrc.StartsWith("data:", StringComparison.OrdinalIgnoreCase)) + { + return match.Value; + } + + var resolvedPath = ResolveRelativePath(chapterPath, rawSrc); + var rewrittenSrc = $"/api/epub/{ebookId}/resource?path={System.Net.WebUtility.UrlEncode(resolvedPath)}"; + return $"{match.Groups["before"].Value}{rewrittenSrc}{match.Groups["after"].Value}"; + }); + } + + private static string ResolveRelativePath(string basePath, string relativePath) + { + if (string.IsNullOrEmpty(relativePath)) return string.Empty; + + var decodedRelative = System.Net.WebUtility.UrlDecode(relativePath); + var baseDir = Path.GetDirectoryName(basePath) ?? ""; + baseDir = baseDir.Replace('\\', '/'); + + var combined = Path.Combine(baseDir, decodedRelative).Replace('\\', '/'); + var segments = combined.Split('/'); + var stack = new Stack(); + + foreach (var segment in segments) + { + if (segment == "." || string.IsNullOrEmpty(segment)) + { + continue; + } + if (segment == "..") + { + if (stack.Count > 0) + { + stack.Pop(); + } + } + else + { + stack.Push(segment); + } + } + + return string.Join("/", stack.Reverse()); + } + private static List ExtractParagraphs(string html) { var bodyMatch = Regex.Match(html, @"]*>(.*?)", RegexOptions.IgnoreCase | RegexOptions.Singleline); var content = bodyMatch.Success ? bodyMatch.Groups[1].Value : html; var paragraphs = new List(); - var matches = Regex.Matches(content, @"<(p|h[1-6]|ul|ol|blockquote|pre)\b[^>]*>.*?|]*>", RegexOptions.IgnoreCase | RegexOptions.Singleline); + var matches = Regex.Matches(content, @"<(p|h[1-6]|ul|ol|blockquote|pre)\b[^>]*>.*?|]*>|]*>", RegexOptions.IgnoreCase | RegexOptions.Singleline); foreach (Match match in matches) { @@ -166,7 +296,7 @@ public class EpubReaderService : IEpubReader private static string SanitizeParagraph(string html) { var clean = Regex.Replace(html, @"<(style|script)\b[^>]*>.*?", "", RegexOptions.IgnoreCase | RegexOptions.Singleline); - clean = Regex.Replace(clean, @"<(?!/?(b|i|strong|em|h[1-6]|p|ul|ol|li|blockquote|pre|code|br|hr)\b)[^>]+>", "", RegexOptions.IgnoreCase); + clean = Regex.Replace(clean, @"<(?!/?(b|i|strong|em|h[1-6]|p|ul|ol|li|blockquote|pre|code|br|hr|img)\b)[^>]+>", "", RegexOptions.IgnoreCase); clean = Regex.Replace(clean, @"<(b|i|strong|em|h[1-6]|p|ul|ol|li|blockquote|pre|code|br|hr)\b[^>]*>", "<$1>", RegexOptions.IgnoreCase); clean = System.Net.WebUtility.HtmlDecode(clean); return clean.Trim(); diff --git a/src/NexusReader.Web.Client/Services/WasmEpubService.cs b/src/NexusReader.Web.Client/Services/WasmEpubService.cs index 252463f..fcd0b76 100644 --- a/src/NexusReader.Web.Client/Services/WasmEpubService.cs +++ b/src/NexusReader.Web.Client/Services/WasmEpubService.cs @@ -37,7 +37,29 @@ public class WasmEpubReader : IEpubReader return Result.Fail(new Error($"Network or parsing error: {ex.Message}").CausedBy(ex)); } } - // Metadata extraction moved to WasmEpubMetadataExtractor + public async Task> GetEpubResourceAsync( + Guid ebookId, + string resourcePath, + string? userId = null, + CancellationToken cancellationToken = default) + { + try + { + var response = await _httpClient.GetAsync($"/api/epub/{ebookId}/resource?path={Uri.EscapeDataString(resourcePath)}", cancellationToken); + if (response.IsSuccessStatusCode) + { + var bytes = await response.Content.ReadAsByteArrayAsync(cancellationToken); + return Result.Ok(bytes); + } + + var errorBody = await response.Content.ReadAsStringAsync(cancellationToken); + return Result.Fail($"Server error fetching EPUB resource ({response.StatusCode}): {errorBody}"); + } + catch (Exception ex) + { + return Result.Fail(new Error($"Network error fetching EPUB resource: {ex.Message}").CausedBy(ex)); + } + } } public class WasmEpubMetadataExtractor : IEpubMetadataExtractor diff --git a/src/NexusReader.Web/Program.cs b/src/NexusReader.Web/Program.cs index 033b029..4ae273d 100644 --- a/src/NexusReader.Web/Program.cs +++ b/src/NexusReader.Web/Program.cs @@ -297,6 +297,36 @@ app.MapGet("/api/epub/{ebookId:guid}/{index:int}", async (Guid ebookId, int inde return Results.BadRequest(errorMsg); }).RequireAuthorization(); +// API endpoint for WASM client/browser to fetch EPUB static resources (images, etc.) +app.MapGet("/api/epub/{ebookId:guid}/resource", async (Guid ebookId, string path, IEpubReader epubService, ClaimsPrincipal user, CancellationToken cancellationToken) => +{ + var userId = user.FindFirstValue(ClaimTypes.NameIdentifier); + var result = await epubService.GetEpubResourceAsync(ebookId, path, userId, cancellationToken); + + if (result.IsSuccess) + { + var ext = Path.GetExtension(path).ToLowerInvariant(); + var contentType = ext switch + { + ".jpg" or ".jpeg" => "image/jpeg", + ".png" => "image/png", + ".gif" => "image/gif", + ".svg" => "image/svg+xml", + ".webp" => "image/webp", + ".css" => "text/css", + ".otf" => "font/otf", + ".ttf" => "font/ttf", + ".woff" => "font/woff", + ".woff2" => "font/woff2", + _ => "application/octet-stream" + }; + return Results.File(result.Value, contentType); + } + + var errorMsg = result.Errors.Count > 0 ? result.Errors[0].Message : "Resource not found"; + return Results.NotFound(errorMsg); +}).RequireAuthorization(); + var knowledgeApi = app.MapGroup("/api/knowledge") .RequireAuthorization("HasAvailableTokens") .DisableAntiforgery(); diff --git a/tests/NexusReader.Application.Tests/Services/EpubReaderServiceTests.cs b/tests/NexusReader.Application.Tests/Services/EpubReaderServiceTests.cs new file mode 100644 index 0000000..e01f044 --- /dev/null +++ b/tests/NexusReader.Application.Tests/Services/EpubReaderServiceTests.cs @@ -0,0 +1,185 @@ +using System; +using System.Collections.Generic; +using System.IO; +using System.Linq; +using System.Threading; +using System.Threading.Tasks; +using FluentAssertions; +using Microsoft.Data.Sqlite; +using Microsoft.EntityFrameworkCore; +using Microsoft.Extensions.Logging; +using Moq; +using NexusReader.Data.Persistence; +using NexusReader.Domain.Entities; +using NexusReader.Application.Queries.Reader; +using NexusReader.Infrastructure.Services; +using Xunit; + +namespace NexusReader.Application.Tests.Services; + +public class EpubReaderServiceTests : IDisposable +{ + private readonly SqliteConnection _connection; + private readonly DbContextOptions _contextOptions; + private readonly Mock> _dbContextFactoryMock; + private readonly Mock> _loggerMock; + + public EpubReaderServiceTests() + { + _connection = new SqliteConnection("DataSource=:memory:"); + _connection.Open(); + + _contextOptions = new DbContextOptionsBuilder() + .UseSqlite(_connection) + .Options; + + // Seed initial database schema + using var context = new AppDbContext(_contextOptions); + context.Database.EnsureCreated(); + + _dbContextFactoryMock = new Mock>(); + _dbContextFactoryMock.Setup(f => f.CreateDbContextAsync(It.IsAny())) + .ReturnsAsync(() => new AppDbContext(_contextOptions)); + _dbContextFactoryMock.Setup(f => f.CreateDbContext()) + .Returns(() => new AppDbContext(_contextOptions)); + + _loggerMock = new Mock>(); + } + + [Fact] + public async Task GetEpubContentAsync_RewritesImageUrlsAndExtractsImages() + { + // Arrange + var ebookId = Guid.NewGuid(); + var userId = "test-user-id"; + + using (var context = new AppDbContext(_contextOptions)) + { + var user = new NexusUser + { + Id = userId, + UserName = "testuser", + Email = "test@nexus.com", + TenantId = "tenant-123", + SubscriptionPlanId = 1 + }; + context.Users.Add(user); + + var author = new Author { Id = 10, Name = "Giorgio Vasari" }; + context.Authors.Add(author); + + var ebook = new Ebook + { + Id = ebookId, + UserId = userId, + Title = "Test Book", + AuthorId = author.Id, + FilePath = "assets/book.epub", + AddedDate = DateTime.UtcNow, + LastReadDate = DateTime.UtcNow, + Progress = 0, + LastChapter = "Introduction" + }; + context.Ebooks.Add(ebook); + await context.SaveChangesAsync(); + } + + var service = new EpubReaderService(_dbContextFactoryMock.Object, _loggerMock.Object); + + // Act + var result = await service.GetEpubContentAsync(ebookId, 0, userId); + + // Assert + result.IsSuccess.Should().BeTrue(); + result.Value.Should().NotBeNull(); + result.Value.Blocks.Should().NotBeEmpty(); + + // Check that any img tags extracted are preserved and rewritten + var hasImages = false; + foreach (var block in result.Value.Blocks) + { + if (block is TextSegmentBlock textBlock && textBlock.Content.Contains("