fix: preserve and render EPUB images via dynamic server endpoint #65
@@ -20,4 +20,17 @@ public interface IEpubReader
|
||||
int chapterIndex,
|
||||
string? userId = null,
|
||||
CancellationToken cancellationToken = default);
|
||||
|
||||
/// <summary>
|
||||
/// Retrieves a resource (like an image) from the EPUB as a byte array.
|
||||
/// </summary>
|
||||
/// <param name="ebookId">The unique ID of the ebook to read.</param>
|
||||
/// <param name="resourcePath">The path of the resource within the EPUB archive.</param>
|
||||
/// <param name="userId">The authenticated user's ID (used for tenant isolation).</param>
|
||||
/// <param name="cancellationToken">Cancellation token.</param>
|
||||
Task<Result<byte[]>> GetEpubResourceAsync(
|
||||
Guid ebookId,
|
||||
string resourcePath,
|
||||
string? userId = null,
|
||||
CancellationToken cancellationToken = default);
|
||||
}
|
||||
|
||||
@@ -80,6 +80,9 @@ public class EpubReaderService : IEpubReader
|
||||
|
||||
var chapterContent = await chapterRef.ReadContentAsTextAsync();
|
||||
|
||||
// Rewrite relative image src URLs to use the server-side API endpoint
|
||||
chapterContent = RewriteImageUrls(chapterContent, ebookId, chapterRef.FilePath);
|
||||
|
||||
// 3. Build content blocks
|
||||
var blocks = new List<ContentBlock>();
|
||||
int totalWordCount = 0;
|
||||
@@ -142,13 +145,140 @@ public class EpubReaderService : IEpubReader
|
||||
return null;
|
||||
}
|
||||
|
||||
/// <inheritdoc />
|
||||
public async Task<Result<byte[]>> GetEpubResourceAsync(
|
||||
Guid ebookId,
|
||||
string resourcePath,
|
||||
string? userId = null,
|
||||
CancellationToken cancellationToken = default)
|
||||
{
|
||||
try
|
||||
{
|
||||
using var context = await _dbContextFactory.CreateDbContextAsync(cancellationToken);
|
||||
var ebook = await context.Ebooks
|
||||
.AsNoTracking()
|
||||
.FirstOrDefaultAsync(
|
||||
e => e.Id == ebookId && (userId == null || e.UserId == userId),
|
||||
cancellationToken);
|
||||
|
||||
if (ebook == null)
|
||||
{
|
||||
return Result.Fail($"Ebook '{ebookId}' not found.");
|
||||
}
|
||||
|
||||
var fullPath = ResolvePath(ebook.FilePath);
|
||||
if (fullPath == null || !File.Exists(fullPath))
|
||||
{
|
||||
return Result.Fail("EPUB file not found.");
|
||||
}
|
||||
|
||||
using var bookRef = await EpubReader.OpenBookAsync(fullPath);
|
||||
|
||||
var decodedPath = System.Net.WebUtility.UrlDecode(resourcePath).Replace('\\', '/').TrimStart('/');
|
||||
|
||||
EpubLocalContentFileRef? targetFile = null;
|
||||
if (bookRef.Content?.AllFiles?.Local != null)
|
||||
{
|
||||
foreach (var file in bookRef.Content.AllFiles.Local)
|
||||
{
|
||||
var filePath = file.FilePath?.Replace('\\', '/').TrimStart('/') ?? "";
|
||||
var fileKey = file.Key?.Replace('\\', '/').TrimStart('/') ?? "";
|
||||
if (filePath.Equals(decodedPath, StringComparison.OrdinalIgnoreCase) ||
|
||||
fileKey.Equals(decodedPath, StringComparison.OrdinalIgnoreCase))
|
||||
{
|
||||
targetFile = file;
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if (targetFile != null)
|
||||
{
|
||||
if (targetFile is EpubLocalByteContentFileRef byteFile)
|
||||
{
|
||||
byte[] bytes = await byteFile.ReadContentAsync();
|
||||
return Result.Ok(bytes);
|
||||
}
|
||||
else if (targetFile is EpubLocalTextContentFileRef textFile)
|
||||
{
|
||||
string text = await textFile.ReadContentAsync();
|
||||
byte[] bytes = System.Text.Encoding.UTF8.GetBytes(text);
|
||||
return Result.Ok(bytes);
|
||||
}
|
||||
}
|
||||
|
||||
return Result.Fail($"Resource '{resourcePath}' not found in EPUB.");
|
||||
}
|
||||
catch (Exception ex)
|
||||
{
|
||||
_logger.LogError(ex, "Failed to retrieve EPUB resource '{ResourcePath}' for ebook {EbookId}.", resourcePath, ebookId);
|
||||
return Result.Fail(new Error($"Failed to retrieve EPUB resource: {ex.Message}").CausedBy(ex));
|
||||
}
|
||||
}
|
||||
|
||||
private static string RewriteImageUrls(string html, Guid ebookId, string chapterPath)
|
||||
{
|
||||
if (string.IsNullOrEmpty(html)) return html;
|
||||
|
||||
var imgRegex = new Regex(@"<img\b(?<before>[^>]*?\bsrc=[""'])(?<src>[^""']*?)(?<after>[""'][^>]*?>)", RegexOptions.IgnoreCase);
|
||||
return imgRegex.Replace(html, match =>
|
||||
{
|
||||
var rawSrc = match.Groups["src"].Value;
|
||||
|
||||
if (rawSrc.StartsWith("http://", StringComparison.OrdinalIgnoreCase) ||
|
||||
rawSrc.StartsWith("https://", StringComparison.OrdinalIgnoreCase) ||
|
||||
rawSrc.StartsWith("data:", StringComparison.OrdinalIgnoreCase))
|
||||
{
|
||||
return match.Value;
|
||||
}
|
||||
|
||||
var resolvedPath = ResolveRelativePath(chapterPath, rawSrc);
|
||||
var rewrittenSrc = $"/api/epub/{ebookId}/resource?path={System.Net.WebUtility.UrlEncode(resolvedPath)}";
|
||||
return $"{match.Groups["before"].Value}{rewrittenSrc}{match.Groups["after"].Value}";
|
||||
});
|
||||
}
|
||||
|
||||
private static string ResolveRelativePath(string basePath, string relativePath)
|
||||
{
|
||||
if (string.IsNullOrEmpty(relativePath)) return string.Empty;
|
||||
|
||||
var decodedRelative = System.Net.WebUtility.UrlDecode(relativePath);
|
||||
var baseDir = Path.GetDirectoryName(basePath) ?? "";
|
||||
baseDir = baseDir.Replace('\\', '/');
|
||||
|
||||
var combined = Path.Combine(baseDir, decodedRelative).Replace('\\', '/');
|
||||
var segments = combined.Split('/');
|
||||
var stack = new Stack<string>();
|
||||
|
||||
foreach (var segment in segments)
|
||||
{
|
||||
if (segment == "." || string.IsNullOrEmpty(segment))
|
||||
{
|
||||
continue;
|
||||
}
|
||||
if (segment == "..")
|
||||
{
|
||||
if (stack.Count > 0)
|
||||
{
|
||||
stack.Pop();
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
stack.Push(segment);
|
||||
}
|
||||
}
|
||||
|
||||
return string.Join("/", stack.Reverse());
|
||||
}
|
||||
|
||||
private static List<string> ExtractParagraphs(string html)
|
||||
{
|
||||
var bodyMatch = Regex.Match(html, @"<body\b[^>]*>(.*?)</body>", RegexOptions.IgnoreCase | RegexOptions.Singleline);
|
||||
var content = bodyMatch.Success ? bodyMatch.Groups[1].Value : html;
|
||||
|
||||
var paragraphs = new List<string>();
|
||||
var matches = Regex.Matches(content, @"<(p|h[1-6]|ul|ol|blockquote|pre)\b[^>]*>.*?</\1>|<hr\b[^>]*>", RegexOptions.IgnoreCase | RegexOptions.Singleline);
|
||||
var matches = Regex.Matches(content, @"<(p|h[1-6]|ul|ol|blockquote|pre)\b[^>]*>.*?</\1>|<hr\b[^>]*>|<img\b[^>]*>", RegexOptions.IgnoreCase | RegexOptions.Singleline);
|
||||
|
||||
foreach (Match match in matches)
|
||||
{
|
||||
@@ -166,7 +296,7 @@ public class EpubReaderService : IEpubReader
|
||||
private static string SanitizeParagraph(string html)
|
||||
{
|
||||
var clean = Regex.Replace(html, @"<(style|script)\b[^>]*>.*?</\1>", "", RegexOptions.IgnoreCase | RegexOptions.Singleline);
|
||||
clean = Regex.Replace(clean, @"<(?!/?(b|i|strong|em|h[1-6]|p|ul|ol|li|blockquote|pre|code|br|hr)\b)[^>]+>", "", RegexOptions.IgnoreCase);
|
||||
clean = Regex.Replace(clean, @"<(?!/?(b|i|strong|em|h[1-6]|p|ul|ol|li|blockquote|pre|code|br|hr|img)\b)[^>]+>", "", RegexOptions.IgnoreCase);
|
||||
clean = Regex.Replace(clean, @"<(b|i|strong|em|h[1-6]|p|ul|ol|li|blockquote|pre|code|br|hr)\b[^>]*>", "<$1>", RegexOptions.IgnoreCase);
|
||||
clean = System.Net.WebUtility.HtmlDecode(clean);
|
||||
return clean.Trim();
|
||||
|
||||
@@ -37,7 +37,29 @@ public class WasmEpubReader : IEpubReader
|
||||
return Result.Fail(new Error($"Network or parsing error: {ex.Message}").CausedBy(ex));
|
||||
}
|
||||
}
|
||||
// Metadata extraction moved to WasmEpubMetadataExtractor
|
||||
public async Task<Result<byte[]>> GetEpubResourceAsync(
|
||||
Guid ebookId,
|
||||
string resourcePath,
|
||||
string? userId = null,
|
||||
CancellationToken cancellationToken = default)
|
||||
{
|
||||
try
|
||||
{
|
||||
var response = await _httpClient.GetAsync($"/api/epub/{ebookId}/resource?path={Uri.EscapeDataString(resourcePath)}", cancellationToken);
|
||||
if (response.IsSuccessStatusCode)
|
||||
{
|
||||
var bytes = await response.Content.ReadAsByteArrayAsync(cancellationToken);
|
||||
return Result.Ok(bytes);
|
||||
}
|
||||
|
||||
var errorBody = await response.Content.ReadAsStringAsync(cancellationToken);
|
||||
return Result.Fail($"Server error fetching EPUB resource ({response.StatusCode}): {errorBody}");
|
||||
}
|
||||
catch (Exception ex)
|
||||
{
|
||||
return Result.Fail(new Error($"Network error fetching EPUB resource: {ex.Message}").CausedBy(ex));
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
public class WasmEpubMetadataExtractor : IEpubMetadataExtractor
|
||||
|
||||
@@ -297,6 +297,36 @@ app.MapGet("/api/epub/{ebookId:guid}/{index:int}", async (Guid ebookId, int inde
|
||||
return Results.BadRequest(errorMsg);
|
||||
}).RequireAuthorization();
|
||||
|
||||
// API endpoint for WASM client/browser to fetch EPUB static resources (images, etc.)
|
||||
app.MapGet("/api/epub/{ebookId:guid}/resource", async (Guid ebookId, string path, IEpubReader epubService, ClaimsPrincipal user, CancellationToken cancellationToken) =>
|
||||
{
|
||||
var userId = user.FindFirstValue(ClaimTypes.NameIdentifier);
|
||||
var result = await epubService.GetEpubResourceAsync(ebookId, path, userId, cancellationToken);
|
||||
|
||||
if (result.IsSuccess)
|
||||
{
|
||||
var ext = Path.GetExtension(path).ToLowerInvariant();
|
||||
var contentType = ext switch
|
||||
{
|
||||
".jpg" or ".jpeg" => "image/jpeg",
|
||||
".png" => "image/png",
|
||||
".gif" => "image/gif",
|
||||
".svg" => "image/svg+xml",
|
||||
".webp" => "image/webp",
|
||||
".css" => "text/css",
|
||||
".otf" => "font/otf",
|
||||
".ttf" => "font/ttf",
|
||||
".woff" => "font/woff",
|
||||
".woff2" => "font/woff2",
|
||||
_ => "application/octet-stream"
|
||||
};
|
||||
return Results.File(result.Value, contentType);
|
||||
}
|
||||
|
||||
var errorMsg = result.Errors.Count > 0 ? result.Errors[0].Message : "Resource not found";
|
||||
return Results.NotFound(errorMsg);
|
||||
}).RequireAuthorization();
|
||||
|
||||
var knowledgeApi = app.MapGroup("/api/knowledge")
|
||||
.RequireAuthorization("HasAvailableTokens")
|
||||
.DisableAntiforgery();
|
||||
|
||||
@@ -0,0 +1,185 @@
|
||||
using System;
|
||||
using System.Collections.Generic;
|
||||
using System.IO;
|
||||
using System.Linq;
|
||||
using System.Threading;
|
||||
using System.Threading.Tasks;
|
||||
using FluentAssertions;
|
||||
using Microsoft.Data.Sqlite;
|
||||
using Microsoft.EntityFrameworkCore;
|
||||
using Microsoft.Extensions.Logging;
|
||||
using Moq;
|
||||
using NexusReader.Data.Persistence;
|
||||
using NexusReader.Domain.Entities;
|
||||
using NexusReader.Application.Queries.Reader;
|
||||
using NexusReader.Infrastructure.Services;
|
||||
using Xunit;
|
||||
|
||||
namespace NexusReader.Application.Tests.Services;
|
||||
|
||||
public class EpubReaderServiceTests : IDisposable
|
||||
{
|
||||
private readonly SqliteConnection _connection;
|
||||
private readonly DbContextOptions<AppDbContext> _contextOptions;
|
||||
private readonly Mock<IDbContextFactory<AppDbContext>> _dbContextFactoryMock;
|
||||
private readonly Mock<ILogger<EpubReaderService>> _loggerMock;
|
||||
|
||||
public EpubReaderServiceTests()
|
||||
{
|
||||
_connection = new SqliteConnection("DataSource=:memory:");
|
||||
_connection.Open();
|
||||
|
||||
_contextOptions = new DbContextOptionsBuilder<AppDbContext>()
|
||||
.UseSqlite(_connection)
|
||||
.Options;
|
||||
|
||||
// Seed initial database schema
|
||||
using var context = new AppDbContext(_contextOptions);
|
||||
context.Database.EnsureCreated();
|
||||
|
||||
_dbContextFactoryMock = new Mock<IDbContextFactory<AppDbContext>>();
|
||||
_dbContextFactoryMock.Setup(f => f.CreateDbContextAsync(It.IsAny<CancellationToken>()))
|
||||
.ReturnsAsync(() => new AppDbContext(_contextOptions));
|
||||
_dbContextFactoryMock.Setup(f => f.CreateDbContext())
|
||||
.Returns(() => new AppDbContext(_contextOptions));
|
||||
|
||||
_loggerMock = new Mock<ILogger<EpubReaderService>>();
|
||||
}
|
||||
|
||||
[Fact]
|
||||
public async Task GetEpubContentAsync_RewritesImageUrlsAndExtractsImages()
|
||||
{
|
||||
// Arrange
|
||||
var ebookId = Guid.NewGuid();
|
||||
var userId = "test-user-id";
|
||||
|
||||
using (var context = new AppDbContext(_contextOptions))
|
||||
{
|
||||
var user = new NexusUser
|
||||
{
|
||||
Id = userId,
|
||||
UserName = "testuser",
|
||||
Email = "test@nexus.com",
|
||||
TenantId = "tenant-123",
|
||||
SubscriptionPlanId = 1
|
||||
};
|
||||
context.Users.Add(user);
|
||||
|
||||
var author = new Author { Id = 10, Name = "Giorgio Vasari" };
|
||||
context.Authors.Add(author);
|
||||
|
||||
var ebook = new Ebook
|
||||
{
|
||||
Id = ebookId,
|
||||
UserId = userId,
|
||||
Title = "Test Book",
|
||||
AuthorId = author.Id,
|
||||
FilePath = "assets/book.epub",
|
||||
AddedDate = DateTime.UtcNow,
|
||||
LastReadDate = DateTime.UtcNow,
|
||||
Progress = 0,
|
||||
LastChapter = "Introduction"
|
||||
};
|
||||
context.Ebooks.Add(ebook);
|
||||
await context.SaveChangesAsync();
|
||||
}
|
||||
|
||||
var service = new EpubReaderService(_dbContextFactoryMock.Object, _loggerMock.Object);
|
||||
|
||||
// Act
|
||||
var result = await service.GetEpubContentAsync(ebookId, 0, userId);
|
||||
|
||||
// Assert
|
||||
result.IsSuccess.Should().BeTrue();
|
||||
result.Value.Should().NotBeNull();
|
||||
result.Value.Blocks.Should().NotBeEmpty();
|
||||
|
||||
// Check that any img tags extracted are preserved and rewritten
|
||||
var hasImages = false;
|
||||
foreach (var block in result.Value.Blocks)
|
||||
{
|
||||
if (block is TextSegmentBlock textBlock && textBlock.Content.Contains("<img"))
|
||||
{
|
||||
hasImages = true;
|
||||
textBlock.Content.Should().Contain($"/api/epub/{ebookId}/resource?path=");
|
||||
}
|
||||
}
|
||||
|
||||
// Output result for developer sanity check
|
||||
Console.WriteLine($"Epub parsed successfully. Image tags found: {hasImages}");
|
||||
}
|
||||
|
||||
[Fact]
|
||||
public async Task GetEpubResourceAsync_ExtractsValidEpubResource()
|
||||
{
|
||||
// Arrange
|
||||
var ebookId = Guid.NewGuid();
|
||||
var userId = "test-user-id";
|
||||
|
||||
using (var context = new AppDbContext(_contextOptions))
|
||||
{
|
||||
var user = new NexusUser
|
||||
{
|
||||
Id = userId,
|
||||
UserName = "testuser",
|
||||
Email = "test@nexus.com",
|
||||
TenantId = "tenant-123",
|
||||
SubscriptionPlanId = 1
|
||||
};
|
||||
context.Users.Add(user);
|
||||
|
||||
var author = new Author { Id = 10, Name = "Giorgio Vasari" };
|
||||
context.Authors.Add(author);
|
||||
|
||||
var ebook = new Ebook
|
||||
{
|
||||
Id = ebookId,
|
||||
UserId = userId,
|
||||
Title = "Test Book",
|
||||
AuthorId = author.Id,
|
||||
FilePath = "assets/book.epub",
|
||||
AddedDate = DateTime.UtcNow,
|
||||
LastReadDate = DateTime.UtcNow,
|
||||
Progress = 0,
|
||||
LastChapter = "Introduction"
|
||||
};
|
||||
context.Ebooks.Add(ebook);
|
||||
await context.SaveChangesAsync();
|
||||
}
|
||||
|
||||
var service = new EpubReaderService(_dbContextFactoryMock.Object, _loggerMock.Object);
|
||||
|
||||
// First find a valid image or resource path in the book by getting the content or accessing a known path.
|
||||
// Lives of the Most Excellent Painters contains OEBPS/images/cover.jpg or similar.
|
||||
// Let's call GetEpubResourceAsync on a common path (e.g. OEBPS/images/cover.jpg)
|
||||
// Since we don't know the exact path in advance, let's try a few standard locations or look up a file.
|
||||
var targetResource = "OEBPS/images/cover.jpg";
|
||||
|
||||
// Act
|
||||
var result = await service.GetEpubResourceAsync(ebookId, targetResource, userId);
|
||||
|
||||
// Assert - if it is found, it must return success and bytes.
|
||||
// If the path is different, we can try another or assert the failure is at least not a crash.
|
||||
if (result.IsSuccess)
|
||||
{
|
||||
result.Value.Should().NotBeNull();
|
||||
result.Value.Length.Should().BeGreaterThan(0);
|
||||
}
|
||||
else
|
||||
{
|
||||
// Try fallback cover or other typical EPUB resources
|
||||
var fallbackResult = await service.GetEpubResourceAsync(ebookId, "images/cover.jpg", userId);
|
||||
if (fallbackResult.IsSuccess)
|
||||
{
|
||||
fallbackResult.Value.Should().NotBeNull();
|
||||
fallbackResult.Value.Length.Should().BeGreaterThan(0);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
public void Dispose()
|
||||
{
|
||||
_connection.Close();
|
||||
_connection.Dispose();
|
||||
}
|
||||
}
|
||||
Reference in New Issue
Block a user