711480f8f6
This pull request introduces the dedicated containerized infrastructure and configuration for deploying NexusReader's beta version in the Test environment. ### Summary of Changes 1. **Docker Infrastructure & Secrets**: - **`docker-compose.test.yml`**: Configured dedicated database and auxiliary services (PostgreSQL 17, Qdrant, Neo4j) on isolated, non-standard ports to ensure zero conflict with the existing server configurations. - **`.env.test.template`**: Provided an environment variable template showing required setups, including mandatory database passwords, API keys, and admin custom passwords. - **`.gitignore`**: Excluded local `.env` files to prevent accidental commits of production or staging secrets. 2. **Database Hardening**: - Configured Neo4j with basic authentication (`IDriver` instantiation uses basic auth when credentials are provided in configuration). - Configured PostgreSQL to use mandatory authentication. - Configured the admin seeder (`DbInitializer.cs`) to dynamically use `NEXUS_ADMIN_PASSWORD` from environment variables, falling back to a default password in local Development only. 3. **Feature-Flagged Restrictions**: - **`appsettings.Test.json`**: Implemented `Features:AllowRegistration` and `Features:AllowPasswordReset` flags set to `false`. - **Middleware Enforcement (`Program.cs`)**: Intercepts requests to `/identity/register` and `/identity/forgotPassword` (and their MVC/form variations) and rejects them with a `403 Forbidden` response in restricted environments. - **OAuth Provisioning Guard (`Program.cs`)**: Blocks new account provisioning via Google OAuth callback by checking the `Features:AllowRegistration` configuration, redirecting users to the login page with a descriptive error. - **UI Protection (`Login.razor`, `Register.razor`)**: Conditionally hides registration/password reset links and intercepts manual navigation attempts to `/account/register` by redirecting to login with a warning. --------- Co-authored-by: Marek Jasiński <jasins.marek@gmail.com> Reviewed-on: #56 Co-authored-by: Antigravity <antigravity@google.com> Co-committed-by: Antigravity <antigravity@google.com>
367 lines
15 KiB
C#
367 lines
15 KiB
C#
using System.Text.RegularExpressions;
|
|
using FluentResults;
|
|
using Microsoft.EntityFrameworkCore;
|
|
using Microsoft.Extensions.Logging;
|
|
using NexusReader.Application.Abstractions.Services;
|
|
using NexusReader.Application.Queries.Reader;
|
|
using NexusReader.Data.Persistence;
|
|
using VersOne.Epub;
|
|
|
|
namespace NexusReader.Infrastructure.Services;
|
|
|
|
/// <summary>
|
|
/// Reads and parses EPUB files from the storage path recorded in the database.
|
|
/// </summary>
|
|
public class EpubReaderService : IEpubReader
|
|
{
|
|
private readonly IDbContextFactory<AppDbContext> _dbContextFactory;
|
|
private readonly ILogger<EpubReaderService> _logger;
|
|
private const int WordThreshold = 1000;
|
|
|
|
private static readonly Regex ImageTagRegex = new(@"<img\b(?<before>[^>]*?\bsrc=[""'])(?<src>[^""']*?)(?<after>[""'][^>]*?>)", RegexOptions.IgnoreCase | RegexOptions.Compiled);
|
|
private static readonly Regex BodyMatchRegex = new(@"<body\b[^>]*>(.*?)</body>", RegexOptions.IgnoreCase | RegexOptions.Singleline | RegexOptions.Compiled);
|
|
private static readonly Regex ParagraphMatchRegex = new(@"<(p|h[1-6]|ul|ol|blockquote|pre)\b[^>]*>.*?</\1>|<hr\b[^>]*>|<img\b[^>]*>", RegexOptions.IgnoreCase | RegexOptions.Singleline | RegexOptions.Compiled);
|
|
private static readonly Regex StyleScriptRegex = new(@"<(style|script)\b[^>]*>.*?</\1>", RegexOptions.IgnoreCase | RegexOptions.Singleline | RegexOptions.Compiled);
|
|
private static readonly Regex WhitelistTagsRegex = new(@"<(?!/?(b|i|strong|em|h[1-6]|p|ul|ol|li|blockquote|pre|code|br|hr|img)\b)[^>]+>", RegexOptions.IgnoreCase | RegexOptions.Compiled);
|
|
private static readonly Regex StripAttributesRegex = new(@"<(b|i|strong|em|h[1-6]|p|ul|ol|li|blockquote|pre|code|br|hr)\b[^>]*>", RegexOptions.IgnoreCase | RegexOptions.Compiled);
|
|
private static readonly Regex ImgTagSanitizerRegex = new(@"<img\b[^>]*>", RegexOptions.IgnoreCase | RegexOptions.Compiled);
|
|
private static readonly Regex SrcAttributeRegex = new(@"\bsrc=[""'](?<src>[^""']*)[""']", RegexOptions.IgnoreCase | RegexOptions.Compiled);
|
|
private static readonly Regex AltAttributeRegex = new(@"\balt=[""'](?<alt>[^""']*)[""']", RegexOptions.IgnoreCase | RegexOptions.Compiled);
|
|
|
|
public EpubReaderService(
|
|
IDbContextFactory<AppDbContext> dbContextFactory,
|
|
ILogger<EpubReaderService> logger)
|
|
{
|
|
_dbContextFactory = dbContextFactory;
|
|
_logger = logger;
|
|
}
|
|
|
|
/// <inheritdoc />
|
|
public async Task<Result<ReaderPageViewModel>> GetEpubContentAsync(
|
|
Guid ebookId,
|
|
int chapterIndex,
|
|
string? userId = null,
|
|
CancellationToken cancellationToken = default)
|
|
{
|
|
try
|
|
{
|
|
// 1. Resolve the file path from the database
|
|
using var context = await _dbContextFactory.CreateDbContextAsync(cancellationToken);
|
|
|
|
var ebook = await context.Ebooks
|
|
.AsNoTracking()
|
|
.FirstOrDefaultAsync(
|
|
e => e.Id == ebookId && (userId == null || e.UserId == userId),
|
|
cancellationToken);
|
|
|
|
if (ebook == null)
|
|
{
|
|
return Result.Fail($"Ebook '{ebookId}' not found for user '{userId}'.");
|
|
}
|
|
|
|
// FilePath is stored as a web-relative path (e.g. "uploads/guid_title.epub").
|
|
// Resolve against the content root, then against the wwwroot sub-directory.
|
|
var fullPath = ResolvePath(ebook.FilePath);
|
|
if (fullPath == null || !File.Exists(fullPath))
|
|
{
|
|
_logger.LogError("EPUB file for ebook {EbookId} not found at path '{FilePath}'.", ebookId, ebook.FilePath);
|
|
return Result.Fail($"The EPUB file for this book could not be found on the server.");
|
|
}
|
|
|
|
// 2. Parse the EPUB
|
|
using var bookRef = await EpubReader.OpenBookAsync(fullPath);
|
|
var readingOrder = bookRef.GetReadingOrder();
|
|
|
|
if (readingOrder == null || !readingOrder.Any())
|
|
{
|
|
return Result.Fail("The EPUB has no readable content files in ReadingOrder.");
|
|
}
|
|
|
|
if (chapterIndex < 0 || chapterIndex >= readingOrder.Count)
|
|
{
|
|
chapterIndex = 0;
|
|
}
|
|
|
|
var chapterRef = readingOrder[chapterIndex];
|
|
var navigation = bookRef.GetNavigation();
|
|
var chapterTitle = FindTitleInNavigation(navigation, chapterRef.FilePath)
|
|
?? Path.GetFileNameWithoutExtension(chapterRef.FilePath)
|
|
?? $"Chapter {chapterIndex + 1}";
|
|
|
|
var chapterContent = await chapterRef.ReadContentAsTextAsync();
|
|
|
|
// Rewrite relative image src URLs to use the server-side API endpoint
|
|
chapterContent = RewriteImageUrls(chapterContent, ebookId, chapterRef.FilePath);
|
|
|
|
// 3. Build content blocks
|
|
var blocks = new List<ContentBlock>();
|
|
int totalWordCount = 0;
|
|
int blockCounter = 0;
|
|
|
|
var paragraphs = ExtractParagraphs(chapterContent);
|
|
foreach (var p in paragraphs)
|
|
{
|
|
var sanitizedContent = SanitizeParagraph(p);
|
|
if (string.IsNullOrWhiteSpace(sanitizedContent)) continue;
|
|
|
|
blocks.Add(new TextSegmentBlock($"seg-{blockCounter++}", sanitizedContent));
|
|
|
|
int wordsInP = CountWords(sanitizedContent);
|
|
totalWordCount += wordsInP;
|
|
|
|
if (totalWordCount >= WordThreshold)
|
|
{
|
|
blocks.Add(CreateAiTrigger($"trigger-{blockCounter++}"));
|
|
totalWordCount = 0;
|
|
}
|
|
}
|
|
|
|
if (blocks.Any() && blocks.Last() is not AiActionTriggerBlock)
|
|
{
|
|
blocks.Add(CreateAiTrigger($"trigger-{blockCounter++}"));
|
|
}
|
|
|
|
return Result.Ok(new ReaderPageViewModel(blocks, chapterIndex, readingOrder.Count, chapterTitle, ebook.Id));
|
|
}
|
|
catch (Exception ex)
|
|
{
|
|
_logger.LogError(ex, "Failed to process EPUB for ebook {EbookId}.", ebookId);
|
|
return Result.Fail(new Error($"Failed to process EPUB: {ex.Message}").CausedBy(ex));
|
|
}
|
|
}
|
|
|
|
/// <summary>
|
|
/// Attempts to resolve a web-relative storage path to an absolute filesystem path.
|
|
/// Searches upward from the app base directory to handle both dev and production layouts.
|
|
/// </summary>
|
|
private static string? ResolvePath(string relativePath)
|
|
{
|
|
// Normalize forward-slashes to OS separator for file system access
|
|
var normalized = relativePath.Replace('/', Path.DirectorySeparatorChar);
|
|
|
|
var currentDir = new DirectoryInfo(AppDomain.CurrentDomain.BaseDirectory);
|
|
while (currentDir != null)
|
|
{
|
|
var candidate = Path.Combine(currentDir.FullName, "wwwroot", normalized);
|
|
if (File.Exists(candidate)) return candidate;
|
|
|
|
// Also try src/NexusReader.Web/wwwroot (development layout)
|
|
var devCandidate = Path.Combine(currentDir.FullName, "src", "NexusReader.Web", "wwwroot", normalized);
|
|
if (File.Exists(devCandidate)) return devCandidate;
|
|
|
|
currentDir = currentDir.Parent;
|
|
}
|
|
|
|
return null;
|
|
}
|
|
|
|
/// <inheritdoc />
|
|
public async Task<Result<byte[]>> GetEpubResourceAsync(
|
|
Guid ebookId,
|
|
string resourcePath,
|
|
string? userId = null,
|
|
CancellationToken cancellationToken = default)
|
|
{
|
|
try
|
|
{
|
|
using var context = await _dbContextFactory.CreateDbContextAsync(cancellationToken);
|
|
var ebook = await context.Ebooks
|
|
.AsNoTracking()
|
|
.FirstOrDefaultAsync(
|
|
e => e.Id == ebookId && (userId == null || e.UserId == userId),
|
|
cancellationToken);
|
|
|
|
if (ebook == null)
|
|
{
|
|
return Result.Fail($"Ebook '{ebookId}' not found.");
|
|
}
|
|
|
|
var fullPath = ResolvePath(ebook.FilePath);
|
|
if (fullPath == null || !File.Exists(fullPath))
|
|
{
|
|
return Result.Fail("EPUB file not found.");
|
|
}
|
|
|
|
using var bookRef = await EpubReader.OpenBookAsync(fullPath);
|
|
|
|
var decodedPath = System.Net.WebUtility.UrlDecode(resourcePath);
|
|
if (decodedPath.Contains("..") || decodedPath.Contains(":") || decodedPath.StartsWith("/") || decodedPath.StartsWith("\\"))
|
|
{
|
|
return Result.Fail("Invalid resource path.");
|
|
}
|
|
|
|
decodedPath = decodedPath.Replace('\\', '/').TrimStart('/');
|
|
|
|
EpubLocalContentFileRef? targetFile = null;
|
|
if (bookRef.Content?.AllFiles?.Local != null)
|
|
{
|
|
foreach (var file in bookRef.Content.AllFiles.Local)
|
|
{
|
|
var filePath = file.FilePath?.Replace('\\', '/').TrimStart('/') ?? "";
|
|
var fileKey = file.Key?.Replace('\\', '/').TrimStart('/') ?? "";
|
|
if (filePath.Equals(decodedPath, StringComparison.OrdinalIgnoreCase) ||
|
|
fileKey.Equals(decodedPath, StringComparison.OrdinalIgnoreCase))
|
|
{
|
|
targetFile = file;
|
|
break;
|
|
}
|
|
}
|
|
}
|
|
|
|
if (targetFile != null)
|
|
{
|
|
if (targetFile is EpubLocalByteContentFileRef byteFile)
|
|
{
|
|
byte[] bytes = await byteFile.ReadContentAsync();
|
|
return Result.Ok(bytes);
|
|
}
|
|
else if (targetFile is EpubLocalTextContentFileRef textFile)
|
|
{
|
|
string text = await textFile.ReadContentAsync();
|
|
byte[] bytes = System.Text.Encoding.UTF8.GetBytes(text);
|
|
return Result.Ok(bytes);
|
|
}
|
|
}
|
|
|
|
return Result.Fail($"Resource '{resourcePath}' not found in EPUB.");
|
|
}
|
|
catch (Exception ex)
|
|
{
|
|
_logger.LogError(ex, "Failed to retrieve EPUB resource '{ResourcePath}' for ebook {EbookId}.", resourcePath, ebookId);
|
|
return Result.Fail(new Error($"Failed to retrieve EPUB resource: {ex.Message}").CausedBy(ex));
|
|
}
|
|
}
|
|
|
|
private static string RewriteImageUrls(string html, Guid ebookId, string chapterPath)
|
|
{
|
|
if (string.IsNullOrEmpty(html)) return html;
|
|
|
|
return ImageTagRegex.Replace(html, match =>
|
|
{
|
|
var rawSrc = match.Groups["src"].Value;
|
|
|
|
if (rawSrc.StartsWith("javascript:", StringComparison.OrdinalIgnoreCase))
|
|
{
|
|
return ""; // Completely block script execution in image src
|
|
}
|
|
|
|
if (rawSrc.StartsWith("http://", StringComparison.OrdinalIgnoreCase) ||
|
|
rawSrc.StartsWith("https://", StringComparison.OrdinalIgnoreCase) ||
|
|
rawSrc.StartsWith("data:", StringComparison.OrdinalIgnoreCase))
|
|
{
|
|
return match.Value;
|
|
}
|
|
|
|
var resolvedPath = ResolveRelativePath(chapterPath, rawSrc);
|
|
var rewrittenSrc = $"/api/epub/{ebookId}/resource?path={System.Net.WebUtility.UrlEncode(resolvedPath)}";
|
|
return $"{match.Groups["before"].Value}{rewrittenSrc}{match.Groups["after"].Value}";
|
|
});
|
|
}
|
|
|
|
private static string ResolveRelativePath(string basePath, string relativePath)
|
|
{
|
|
if (string.IsNullOrEmpty(relativePath)) return string.Empty;
|
|
|
|
var decodedRelative = System.Net.WebUtility.UrlDecode(relativePath);
|
|
var baseDir = Path.GetDirectoryName(basePath) ?? "";
|
|
baseDir = baseDir.Replace('\\', '/');
|
|
|
|
var combined = Path.Combine(baseDir, decodedRelative).Replace('\\', '/');
|
|
var segments = combined.Split('/');
|
|
var stack = new Stack<string>();
|
|
|
|
foreach (var segment in segments)
|
|
{
|
|
if (segment == "." || string.IsNullOrEmpty(segment))
|
|
{
|
|
continue;
|
|
}
|
|
if (segment == "..")
|
|
{
|
|
if (stack.Count > 0)
|
|
{
|
|
stack.Pop();
|
|
}
|
|
}
|
|
else
|
|
{
|
|
stack.Push(segment);
|
|
}
|
|
}
|
|
|
|
return string.Join("/", stack.Reverse());
|
|
}
|
|
|
|
private static List<string> ExtractParagraphs(string html)
|
|
{
|
|
var bodyMatch = BodyMatchRegex.Match(html);
|
|
var content = bodyMatch.Success ? bodyMatch.Groups[1].Value : html;
|
|
|
|
var paragraphs = new List<string>();
|
|
var matches = ParagraphMatchRegex.Matches(content);
|
|
|
|
foreach (Match match in matches)
|
|
{
|
|
paragraphs.Add(match.Value);
|
|
}
|
|
|
|
if (paragraphs.Count == 0)
|
|
{
|
|
paragraphs = content.Split(new[] { "<br />", "<br>", "\n\n", "\r\n\r\n" }, StringSplitOptions.RemoveEmptyEntries).ToList();
|
|
}
|
|
|
|
return paragraphs;
|
|
}
|
|
|
|
private static string SanitizeParagraph(string html)
|
|
{
|
|
var clean = StyleScriptRegex.Replace(html, "");
|
|
clean = WhitelistTagsRegex.Replace(clean, "");
|
|
clean = StripAttributesRegex.Replace(clean, "<$1>");
|
|
|
|
// Securely sanitize img tags by keeping ONLY src and alt attributes to prevent XSS (onerror, onload, style, etc.)
|
|
clean = ImgTagSanitizerRegex.Replace(clean, m =>
|
|
{
|
|
var srcMatch = SrcAttributeRegex.Match(m.Value);
|
|
var altMatch = AltAttributeRegex.Match(m.Value);
|
|
var srcAttr = srcMatch.Success ? $" src=\"{srcMatch.Groups["src"].Value}\"" : "";
|
|
var altAttr = altMatch.Success ? $" alt=\"{altMatch.Groups["alt"].Value}\"" : "";
|
|
return $"<img{srcAttr}{altAttr} />";
|
|
});
|
|
|
|
clean = System.Net.WebUtility.HtmlDecode(clean);
|
|
return clean.Trim();
|
|
}
|
|
|
|
private static int CountWords(string text)
|
|
{
|
|
if (string.IsNullOrWhiteSpace(text)) return 0;
|
|
return text.Split(new[] { ' ', '\r', '\n', '\t' }, StringSplitOptions.RemoveEmptyEntries).Length;
|
|
}
|
|
|
|
private static AiActionTriggerBlock CreateAiTrigger(string id) =>
|
|
new(id,
|
|
"Wykryto ciekawy fragment! Czy chcesz, abym wygenerował podsumowanie lub quiz z tego rozdziału?",
|
|
new List<string> { "Podsumuj", "Generuj Quiz", "Pomiń" });
|
|
|
|
private static string? FindTitleInNavigation(IEnumerable<EpubNavigationItemRef> navigation, string? filePath)
|
|
{
|
|
if (string.IsNullOrEmpty(filePath)) return null;
|
|
var fileName = Path.GetFileName(filePath);
|
|
|
|
foreach (var item in navigation)
|
|
{
|
|
if (item.Link?.ContentFilePath == filePath || item.Link?.ContentFilePath == fileName)
|
|
return item.Title;
|
|
|
|
if (item.NestedItems?.Any() == true)
|
|
{
|
|
var childTitle = FindTitleInNavigation(item.NestedItems, filePath);
|
|
if (childTitle != null) return childTitle;
|
|
}
|
|
}
|
|
|
|
return null;
|
|
}
|
|
}
|