From 11b08cf7bab368c892ba9eb9f31d811a053a4825 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Marek=20Jasi=C5=84ski?= Date: Mon, 1 Jun 2026 18:03:39 +0200 Subject: [PATCH] fix(epub): resolve PR #65 review comments for EPUB image rendering, path traversal validation, and security sanitization --- .../Services/EpubReaderService.cs | 47 ++++++++-- src/NexusReader.Maui/MauiProgram.cs | 4 + .../Pages/Account/Login.razor | 6 +- .../Pages/Account/Register.razor | 4 +- .../Pages/SerilogDemo.razor | 5 ++ .../Services/FeatureSettings.cs | 11 +++ .../Services/ReaderStateService.cs | 2 + src/NexusReader.Web.Client/Program.cs | 3 + src/NexusReader.Web/Program.cs | 23 ++++- .../Services/EpubReaderServiceTests.cs | 90 +++++++++++++++++++ 10 files changed, 179 insertions(+), 16 deletions(-) create mode 100644 src/NexusReader.UI.Shared/Services/FeatureSettings.cs diff --git a/src/NexusReader.Infrastructure/Services/EpubReaderService.cs b/src/NexusReader.Infrastructure/Services/EpubReaderService.cs index 44b5d41..6079d42 100644 --- a/src/NexusReader.Infrastructure/Services/EpubReaderService.cs +++ b/src/NexusReader.Infrastructure/Services/EpubReaderService.cs @@ -18,6 +18,16 @@ public class EpubReaderService : IEpubReader private readonly ILogger _logger; private const int WordThreshold = 1000; + private static readonly Regex ImageTagRegex = new(@"[^>]*?\bsrc=[""'])(?[^""']*?)(?[""'][^>]*?>)", RegexOptions.IgnoreCase | RegexOptions.Compiled); + private static readonly Regex BodyMatchRegex = new(@"]*>(.*?)", RegexOptions.IgnoreCase | RegexOptions.Singleline | RegexOptions.Compiled); + private static readonly Regex ParagraphMatchRegex = new(@"<(p|h[1-6]|ul|ol|blockquote|pre)\b[^>]*>.*?|]*>|]*>", RegexOptions.IgnoreCase | RegexOptions.Singleline | RegexOptions.Compiled); + private static readonly Regex StyleScriptRegex = new(@"<(style|script)\b[^>]*>.*?", RegexOptions.IgnoreCase | RegexOptions.Singleline | RegexOptions.Compiled); + private static readonly Regex WhitelistTagsRegex = new(@"<(?!/?(b|i|strong|em|h[1-6]|p|ul|ol|li|blockquote|pre|code|br|hr|img)\b)[^>]+>", RegexOptions.IgnoreCase | RegexOptions.Compiled); + private static readonly Regex StripAttributesRegex = new(@"<(b|i|strong|em|h[1-6]|p|ul|ol|li|blockquote|pre|code|br|hr)\b[^>]*>", RegexOptions.IgnoreCase | RegexOptions.Compiled); + private static readonly Regex ImgTagSanitizerRegex = new(@"]*>", RegexOptions.IgnoreCase | RegexOptions.Compiled); + private static readonly Regex SrcAttributeRegex = new(@"\bsrc=[""'](?[^""']*)[""']", RegexOptions.IgnoreCase | RegexOptions.Compiled); + private static readonly Regex AltAttributeRegex = new(@"\balt=[""'](?[^""']*)[""']", RegexOptions.IgnoreCase | RegexOptions.Compiled); + public EpubReaderService( IDbContextFactory dbContextFactory, ILogger logger) @@ -174,7 +184,13 @@ public class EpubReaderService : IEpubReader using var bookRef = await EpubReader.OpenBookAsync(fullPath); - var decodedPath = System.Net.WebUtility.UrlDecode(resourcePath).Replace('\\', '/').TrimStart('/'); + var decodedPath = System.Net.WebUtility.UrlDecode(resourcePath); + if (decodedPath.Contains("..") || decodedPath.Contains(":") || decodedPath.StartsWith("/") || decodedPath.StartsWith("\\")) + { + return Result.Fail("Invalid resource path."); + } + + decodedPath = decodedPath.Replace('\\', '/').TrimStart('/'); EpubLocalContentFileRef? targetFile = null; if (bookRef.Content?.AllFiles?.Local != null) @@ -220,11 +236,15 @@ public class EpubReaderService : IEpubReader { if (string.IsNullOrEmpty(html)) return html; - var imgRegex = new Regex(@"[^>]*?\bsrc=[""'])(?[^""']*?)(?[""'][^>]*?>)", RegexOptions.IgnoreCase); - return imgRegex.Replace(html, match => + return ImageTagRegex.Replace(html, match => { var rawSrc = match.Groups["src"].Value; + if (rawSrc.StartsWith("javascript:", StringComparison.OrdinalIgnoreCase)) + { + return ""; // Completely block script execution in image src + } + if (rawSrc.StartsWith("http://", StringComparison.OrdinalIgnoreCase) || rawSrc.StartsWith("https://", StringComparison.OrdinalIgnoreCase) || rawSrc.StartsWith("data:", StringComparison.OrdinalIgnoreCase)) @@ -274,11 +294,11 @@ public class EpubReaderService : IEpubReader private static List ExtractParagraphs(string html) { - var bodyMatch = Regex.Match(html, @"]*>(.*?)", RegexOptions.IgnoreCase | RegexOptions.Singleline); + var bodyMatch = BodyMatchRegex.Match(html); var content = bodyMatch.Success ? bodyMatch.Groups[1].Value : html; var paragraphs = new List(); - var matches = Regex.Matches(content, @"<(p|h[1-6]|ul|ol|blockquote|pre)\b[^>]*>.*?|]*>|]*>", RegexOptions.IgnoreCase | RegexOptions.Singleline); + var matches = ParagraphMatchRegex.Matches(content); foreach (Match match in matches) { @@ -295,9 +315,20 @@ public class EpubReaderService : IEpubReader private static string SanitizeParagraph(string html) { - var clean = Regex.Replace(html, @"<(style|script)\b[^>]*>.*?", "", RegexOptions.IgnoreCase | RegexOptions.Singleline); - clean = Regex.Replace(clean, @"<(?!/?(b|i|strong|em|h[1-6]|p|ul|ol|li|blockquote|pre|code|br|hr|img)\b)[^>]+>", "", RegexOptions.IgnoreCase); - clean = Regex.Replace(clean, @"<(b|i|strong|em|h[1-6]|p|ul|ol|li|blockquote|pre|code|br|hr)\b[^>]*>", "<$1>", RegexOptions.IgnoreCase); + var clean = StyleScriptRegex.Replace(html, ""); + clean = WhitelistTagsRegex.Replace(clean, ""); + clean = StripAttributesRegex.Replace(clean, "<$1>"); + + // Securely sanitize img tags by keeping ONLY src and alt attributes to prevent XSS (onerror, onload, style, etc.) + clean = ImgTagSanitizerRegex.Replace(clean, m => + { + var srcMatch = SrcAttributeRegex.Match(m.Value); + var altMatch = AltAttributeRegex.Match(m.Value); + var srcAttr = srcMatch.Success ? $" src=\"{srcMatch.Groups["src"].Value}\"" : ""; + var altAttr = altMatch.Success ? $" alt=\"{altMatch.Groups["alt"].Value}\"" : ""; + return $""; + }); + clean = System.Net.WebUtility.HtmlDecode(clean); return clean.Trim(); } diff --git a/src/NexusReader.Maui/MauiProgram.cs b/src/NexusReader.Maui/MauiProgram.cs index a2a7733..a4b2c6c 100644 --- a/src/NexusReader.Maui/MauiProgram.cs +++ b/src/NexusReader.Maui/MauiProgram.cs @@ -63,6 +63,10 @@ public static class MauiProgram builder.Services.AddScoped(sp => sp.GetRequiredService().CreateClient("NexusAPI")); // UI State + // Feature settings (avoiding direct raw IConfiguration injection in client pages) + var featureSettings = builder.Configuration.GetSection("Features").Get() ?? new FeatureSettings(); + builder.Services.AddSingleton(featureSettings); + builder.Services.AddScoped(); builder.Services.AddScoped(); builder.Services.AddScoped(); diff --git a/src/NexusReader.UI.Shared/Pages/Account/Login.razor b/src/NexusReader.UI.Shared/Pages/Account/Login.razor index 703382a..5295a1f 100644 --- a/src/NexusReader.UI.Shared/Pages/Account/Login.razor +++ b/src/NexusReader.UI.Shared/Pages/Account/Login.razor @@ -7,7 +7,7 @@ @inject IIdentityService IdentityService @inject NavigationManager NavigationManager @inject IJSRuntime JS -@inject IConfiguration Configuration +@inject FeatureSettings FeatureSettings