feat: Ingestion Pipeline Stabilization and WASM Service Proxies (#42)
This PR stabilizes the Nexus Ingestion Engine by implementing functional service proxies for the Blazor WASM client and refining the backend infrastructure for real-time progress tracking and database compatibility. ### Key Changes - **Infrastructure Stabilization**: - Implemented production-grade `EbookRepository` with PostgreSQL `EF.Functions.ILike` support. - Enforced `IsReadyForReading = false` state for newly added ebooks (resolves #35). - Updated `SignalRSyncBroadcaster` to support targeted user messaging and ingestion-specific progress updates (resolves #37). - **WASM Client Functional Proxies**: - Replaced "Throwing" dummy services with `WasmEbookRepository`, `WasmSyncBroadcaster`, `WasmBookStorageService`, and `WasmEmbeddingGenerator`. - These services proxy requests to the backend via a new set of Minimal API endpoints in `NexusReader.Web`. - **Domain Refinement**: - Added `IsReadyForReading` flag to the `Ebook` entity to manage background AI processing states. ### Related Issues - Fixes #35 - Fixes #36 - Fixes #37 --------- Co-authored-by: Marek Jasiński <jasins.marek@gmail.com> Reviewed-on: #42 Co-authored-by: Antigravity <antigravity@google.com> Co-committed-by: Antigravity <antigravity@google.com>
This commit was merged in pull request #42.
This commit is contained in:
@@ -0,0 +1,205 @@
|
||||
using System.Text.RegularExpressions;
|
||||
using FluentResults;
|
||||
using Microsoft.EntityFrameworkCore;
|
||||
using Microsoft.Extensions.Logging;
|
||||
using NexusReader.Application.Abstractions.Services;
|
||||
using NexusReader.Application.Queries.Reader;
|
||||
using NexusReader.Data.Persistence;
|
||||
using VersOne.Epub;
|
||||
|
||||
namespace NexusReader.Infrastructure.Services;
|
||||
|
||||
/// <summary>
|
||||
/// Reads and parses EPUB files from the storage path recorded in the database.
|
||||
/// </summary>
|
||||
public class EpubReaderService : IEpubReader
|
||||
{
|
||||
private readonly IDbContextFactory<AppDbContext> _dbContextFactory;
|
||||
private readonly ILogger<EpubReaderService> _logger;
|
||||
private const int WordThreshold = 1000;
|
||||
|
||||
public EpubReaderService(
|
||||
IDbContextFactory<AppDbContext> dbContextFactory,
|
||||
ILogger<EpubReaderService> logger)
|
||||
{
|
||||
_dbContextFactory = dbContextFactory;
|
||||
_logger = logger;
|
||||
}
|
||||
|
||||
/// <inheritdoc />
|
||||
public async Task<Result<ReaderPageViewModel>> GetEpubContentAsync(
|
||||
Guid ebookId,
|
||||
int chapterIndex,
|
||||
string? userId = null,
|
||||
CancellationToken cancellationToken = default)
|
||||
{
|
||||
try
|
||||
{
|
||||
// 1. Resolve the file path from the database
|
||||
using var context = await _dbContextFactory.CreateDbContextAsync(cancellationToken);
|
||||
|
||||
var ebook = await context.Ebooks
|
||||
.AsNoTracking()
|
||||
.FirstOrDefaultAsync(
|
||||
e => e.Id == ebookId && (userId == null || e.UserId == userId),
|
||||
cancellationToken);
|
||||
|
||||
if (ebook == null)
|
||||
{
|
||||
return Result.Fail($"Ebook '{ebookId}' not found for user '{userId}'.");
|
||||
}
|
||||
|
||||
// FilePath is stored as a web-relative path (e.g. "uploads/guid_title.epub").
|
||||
// Resolve against the content root, then against the wwwroot sub-directory.
|
||||
var fullPath = ResolvePath(ebook.FilePath);
|
||||
if (fullPath == null || !File.Exists(fullPath))
|
||||
{
|
||||
_logger.LogError("EPUB file for ebook {EbookId} not found at path '{FilePath}'.", ebookId, ebook.FilePath);
|
||||
return Result.Fail($"The EPUB file for this book could not be found on the server.");
|
||||
}
|
||||
|
||||
// 2. Parse the EPUB
|
||||
using var bookRef = await EpubReader.OpenBookAsync(fullPath);
|
||||
var readingOrder = bookRef.GetReadingOrder();
|
||||
|
||||
if (readingOrder == null || !readingOrder.Any())
|
||||
{
|
||||
return Result.Fail("The EPUB has no readable content files in ReadingOrder.");
|
||||
}
|
||||
|
||||
if (chapterIndex < 0 || chapterIndex >= readingOrder.Count)
|
||||
{
|
||||
chapterIndex = 0;
|
||||
}
|
||||
|
||||
var chapterRef = readingOrder[chapterIndex];
|
||||
var navigation = bookRef.GetNavigation();
|
||||
var chapterTitle = FindTitleInNavigation(navigation, chapterRef.FilePath)
|
||||
?? Path.GetFileNameWithoutExtension(chapterRef.FilePath)
|
||||
?? $"Chapter {chapterIndex + 1}";
|
||||
|
||||
var chapterContent = await chapterRef.ReadContentAsTextAsync();
|
||||
|
||||
// 3. Build content blocks
|
||||
var blocks = new List<ContentBlock>();
|
||||
int totalWordCount = 0;
|
||||
int blockCounter = 0;
|
||||
|
||||
var paragraphs = ExtractParagraphs(chapterContent);
|
||||
foreach (var p in paragraphs)
|
||||
{
|
||||
var sanitizedContent = SanitizeParagraph(p);
|
||||
if (string.IsNullOrWhiteSpace(sanitizedContent)) continue;
|
||||
|
||||
blocks.Add(new TextSegmentBlock($"seg-{blockCounter++}", sanitizedContent));
|
||||
|
||||
int wordsInP = CountWords(sanitizedContent);
|
||||
totalWordCount += wordsInP;
|
||||
|
||||
if (totalWordCount >= WordThreshold)
|
||||
{
|
||||
blocks.Add(CreateAiTrigger($"trigger-{blockCounter++}"));
|
||||
totalWordCount = 0;
|
||||
}
|
||||
}
|
||||
|
||||
if (blocks.Any() && blocks.Last() is not AiActionTriggerBlock)
|
||||
{
|
||||
blocks.Add(CreateAiTrigger($"trigger-{blockCounter++}"));
|
||||
}
|
||||
|
||||
return Result.Ok(new ReaderPageViewModel(blocks, chapterIndex, readingOrder.Count, chapterTitle, ebook.Id));
|
||||
}
|
||||
catch (Exception ex)
|
||||
{
|
||||
_logger.LogError(ex, "Failed to process EPUB for ebook {EbookId}.", ebookId);
|
||||
return Result.Fail(new Error($"Failed to process EPUB: {ex.Message}").CausedBy(ex));
|
||||
}
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Attempts to resolve a web-relative storage path to an absolute filesystem path.
|
||||
/// Searches upward from the app base directory to handle both dev and production layouts.
|
||||
/// </summary>
|
||||
private static string? ResolvePath(string relativePath)
|
||||
{
|
||||
// Normalize forward-slashes to OS separator for file system access
|
||||
var normalized = relativePath.Replace('/', Path.DirectorySeparatorChar);
|
||||
|
||||
var currentDir = new DirectoryInfo(AppDomain.CurrentDomain.BaseDirectory);
|
||||
while (currentDir != null)
|
||||
{
|
||||
var candidate = Path.Combine(currentDir.FullName, "wwwroot", normalized);
|
||||
if (File.Exists(candidate)) return candidate;
|
||||
|
||||
// Also try src/NexusReader.Web/wwwroot (development layout)
|
||||
var devCandidate = Path.Combine(currentDir.FullName, "src", "NexusReader.Web", "wwwroot", normalized);
|
||||
if (File.Exists(devCandidate)) return devCandidate;
|
||||
|
||||
currentDir = currentDir.Parent;
|
||||
}
|
||||
|
||||
return null;
|
||||
}
|
||||
|
||||
private static List<string> ExtractParagraphs(string html)
|
||||
{
|
||||
var bodyMatch = Regex.Match(html, @"<body\b[^>]*>(.*?)</body>", RegexOptions.IgnoreCase | RegexOptions.Singleline);
|
||||
var content = bodyMatch.Success ? bodyMatch.Groups[1].Value : html;
|
||||
|
||||
var paragraphs = new List<string>();
|
||||
var matches = Regex.Matches(content, @"<(p|h[1-6]|ul|ol|blockquote|pre)\b[^>]*>.*?</\1>|<hr\b[^>]*>", RegexOptions.IgnoreCase | RegexOptions.Singleline);
|
||||
|
||||
foreach (Match match in matches)
|
||||
{
|
||||
paragraphs.Add(match.Value);
|
||||
}
|
||||
|
||||
if (paragraphs.Count == 0)
|
||||
{
|
||||
paragraphs = content.Split(new[] { "<br />", "<br>", "\n\n", "\r\n\r\n" }, StringSplitOptions.RemoveEmptyEntries).ToList();
|
||||
}
|
||||
|
||||
return paragraphs;
|
||||
}
|
||||
|
||||
private static string SanitizeParagraph(string html)
|
||||
{
|
||||
var clean = Regex.Replace(html, @"<(style|script)\b[^>]*>.*?</\1>", "", RegexOptions.IgnoreCase | RegexOptions.Singleline);
|
||||
clean = Regex.Replace(clean, @"<(?!/?(b|i|strong|em|h[1-6]|p|ul|ol|li|blockquote|pre|code|br|hr)\b)[^>]+>", "", RegexOptions.IgnoreCase);
|
||||
clean = Regex.Replace(clean, @"<(b|i|strong|em|h[1-6]|p|ul|ol|li|blockquote|pre|code|br|hr)\b[^>]*>", "<$1>", RegexOptions.IgnoreCase);
|
||||
clean = System.Net.WebUtility.HtmlDecode(clean);
|
||||
return clean.Trim();
|
||||
}
|
||||
|
||||
private static int CountWords(string text)
|
||||
{
|
||||
if (string.IsNullOrWhiteSpace(text)) return 0;
|
||||
return text.Split(new[] { ' ', '\r', '\n', '\t' }, StringSplitOptions.RemoveEmptyEntries).Length;
|
||||
}
|
||||
|
||||
private static AiActionTriggerBlock CreateAiTrigger(string id) =>
|
||||
new(id,
|
||||
"Wykryto ciekawy fragment! Czy chcesz, abym wygenerował podsumowanie lub quiz z tego rozdziału?",
|
||||
new List<string> { "Podsumuj", "Generuj Quiz", "Pomiń" });
|
||||
|
||||
private static string? FindTitleInNavigation(IEnumerable<EpubNavigationItemRef> navigation, string? filePath)
|
||||
{
|
||||
if (string.IsNullOrEmpty(filePath)) return null;
|
||||
var fileName = Path.GetFileName(filePath);
|
||||
|
||||
foreach (var item in navigation)
|
||||
{
|
||||
if (item.Link?.ContentFilePath == filePath || item.Link?.ContentFilePath == fileName)
|
||||
return item.Title;
|
||||
|
||||
if (item.NestedItems?.Any() == true)
|
||||
{
|
||||
var childTitle = FindTitleInNavigation(item.NestedItems, filePath);
|
||||
if (childTitle != null) return childTitle;
|
||||
}
|
||||
}
|
||||
|
||||
return null;
|
||||
}
|
||||
}
|
||||
Reference in New Issue
Block a user