feat: implement background ebook indexing with progress tracking and real-time UI updates

This commit is contained in:
2026-05-25 08:51:21 +02:00
parent 39717725ec
commit 1c6ee82d01
20 changed files with 718 additions and 57 deletions
@@ -0,0 +1,17 @@
using FluentResults;
namespace NexusReader.Application.Abstractions.Services;
/// <summary>
/// Service abstraction to extract raw text content from EPUB chapters.
/// </summary>
public interface IEpubExtractor
{
/// <summary>
/// Extracts the sanitized, plain-text content of each chapter in the EPUB file.
/// </summary>
/// <param name="relativePath">The relative storage path of the EPUB file.</param>
/// <param name="cancellationToken">Cancellation token.</param>
/// <returns>A list of plain-text chapters, or a failure result.</returns>
Task<Result<List<string>>> ExtractChaptersTextAsync(string relativePath, CancellationToken cancellationToken = default);
}
@@ -1,5 +1,6 @@
using FluentResults;
using MediatR;
using Microsoft.Extensions.DependencyInjection;
using NexusReader.Application.Abstractions.Messaging;
using NexusReader.Application.Abstractions.Persistence;
using NexusReader.Application.Abstractions.Services;
@@ -11,13 +12,16 @@ public class IngestEbookCommandHandler : IRequestHandler<IngestEbookCommand, Res
{
private readonly IEbookRepository _ebookRepository;
private readonly IBookStorageService _storageService;
private readonly IServiceScopeFactory _scopeFactory;
public IngestEbookCommandHandler(
IEbookRepository ebookRepository,
IBookStorageService storageService)
IBookStorageService storageService,
IServiceScopeFactory scopeFactory)
{
_ebookRepository = ebookRepository;
_storageService = storageService;
_scopeFactory = scopeFactory;
}
public async Task<Result<Guid>> Handle(IngestEbookCommand request, CancellationToken cancellationToken)
@@ -72,6 +76,21 @@ public class IngestEbookCommandHandler : IRequestHandler<IngestEbookCommand, Res
_ebookRepository.AddEbook(ebook);
await _ebookRepository.SaveChangesAsync(cancellationToken);
// 4. Trigger asynchronous background processing and vector indexing
_ = Task.Run(async () =>
{
try
{
using var scope = _scopeFactory.CreateScope();
var mediator = scope.ServiceProvider.GetRequiredService<IMediator>();
await mediator.Send(new ProcessEbookCommand(ebook.Id, request.UserId, request.TenantId));
}
catch (Exception)
{
// Swallowed to prevent ThreadPool crashes
}
});
return Result.Ok(ebook.Id);
}
catch (Exception ex)
@@ -0,0 +1,177 @@
using System.Text.RegularExpressions;
using FluentResults;
using MediatR;
using Microsoft.EntityFrameworkCore;
using Microsoft.Extensions.DependencyInjection;
using Microsoft.Extensions.Logging;
using NexusReader.Application.Abstractions.Messaging;
using NexusReader.Application.Abstractions.Services;
using NexusReader.Data.Persistence;
namespace NexusReader.Application.Commands.Library;
public record ProcessEbookCommand(
Guid EbookId,
string UserId,
string TenantId
) : ICommand<bool>;
public class ProcessEbookCommandHandler : IRequestHandler<ProcessEbookCommand, Result<bool>>
{
private readonly IDbContextFactory<AppDbContext> _dbContextFactory;
private readonly IKnowledgeService _knowledgeService;
private readonly IEpubExtractor _epubExtractor;
private readonly ISyncBroadcaster _broadcaster;
private readonly ILogger<ProcessEbookCommandHandler> _logger;
public ProcessEbookCommandHandler(
IDbContextFactory<AppDbContext> dbContextFactory,
IKnowledgeService knowledgeService,
IEpubExtractor epubExtractor,
ISyncBroadcaster broadcaster,
ILogger<ProcessEbookCommandHandler> logger)
{
_dbContextFactory = dbContextFactory;
_knowledgeService = knowledgeService;
_epubExtractor = epubExtractor;
_broadcaster = broadcaster;
_logger = logger;
}
public async Task<Result<bool>> Handle(ProcessEbookCommand request, CancellationToken cancellationToken)
{
_logger.LogInformation("[ProcessEbook] Starting background processing for Ebook: {EbookId}", request.EbookId);
try
{
await _broadcaster.BroadcastIngestionProgressAsync(request.UserId, "Wyszukiwanie e-booka w bazie danych...", 0.05, cancellationToken);
using var dbContext = await _dbContextFactory.CreateDbContextAsync(cancellationToken);
var ebook = await dbContext.Ebooks.FindAsync(new object[] { request.EbookId }, cancellationToken);
if (ebook == null)
{
_logger.LogError("[ProcessEbook] Ebook not found in database: {EbookId}", request.EbookId);
return Result.Fail<bool>($"Ebook nie znaleziony w bazie danych: {request.EbookId}");
}
_logger.LogInformation("[ProcessEbook] Extracting chapters text for Ebook: {Title} ({FilePath})", ebook.Title, ebook.FilePath);
await _broadcaster.BroadcastIngestionProgressAsync(request.UserId, "Otwieranie i parsowanie pliku EPUB...", 0.1, cancellationToken);
var extractionResult = await _epubExtractor.ExtractChaptersTextAsync(ebook.FilePath, cancellationToken);
if (extractionResult.IsFailed)
{
var errorMsg = extractionResult.Errors.FirstOrDefault()?.Message ?? "Failed to extract text chapters.";
_logger.LogError("[ProcessEbook] Extraction failed: {Error}", errorMsg);
return Result.Fail<bool>(extractionResult.Errors);
}
var chapters = extractionResult.Value;
if (chapters == null || !chapters.Any())
{
_logger.LogWarning("[ProcessEbook] EPUB has no readable content files: {EbookId}", request.EbookId);
return Result.Fail<bool>("EPUB nie zawiera czytelnych rozdziałów.");
}
int totalChapters = chapters.Count;
_logger.LogInformation("[ProcessEbook] Processing {Count} chapters for Ebook: {Title}", totalChapters, ebook.Title);
await _broadcaster.BroadcastIngestionProgressAsync(request.UserId, $"Analizowanie struktury ({totalChapters} rozdziałów)...", 0.15, cancellationToken);
int processedChapters = 0;
for (int i = 0; i < totalChapters; i++)
{
var cleanText = chapters[i];
if (cleanText.Length < 100)
{
_logger.LogInformation("[ProcessEbook] Skipping chapter {Index} (text too short: {Length} chars)", i, cleanText.Length);
processedChapters++;
continue;
}
// Chunk the text to maintain granular Knowledge Units
var chunks = ChunkText(cleanText, 3000);
_logger.LogInformation("[ProcessEbook] Chapter {Index} split into {ChunkCount} chunk(s)", i, chunks.Count);
foreach (var chunk in chunks)
{
try
{
// Invoke GetKnowledgeMapAsync to extract, embed, and upsert knowledge units
var result = await _knowledgeService.GetKnowledgeMapAsync(chunk, request.TenantId, request.EbookId, cancellationToken);
if (result.IsFailed)
{
_logger.LogWarning("[ProcessEbook] Failed to generate knowledge map for a chunk of chapter {Index}: {Error}", i, result.Errors.FirstOrDefault()?.Message);
}
}
catch (Exception ex)
{
_logger.LogError(ex, "[ProcessEbook] Exception during AI vectorization of chapter {Index} chunk", i);
}
}
processedChapters++;
double progress = 0.15 + (0.75 * processedChapters / totalChapters);
await _broadcaster.BroadcastIngestionProgressAsync(
request.UserId,
$"Przetwarzanie rozdziału {processedChapters} z {totalChapters} przez AI...",
progress,
cancellationToken);
}
// Mark the ebook as ready
ebook.IsReadyForReading = true;
await dbContext.SaveChangesAsync(cancellationToken);
_logger.LogInformation("[ProcessEbook] Ingestion and vector indexing completed for: {Title}", ebook.Title);
await _broadcaster.BroadcastIngestionProgressAsync(
request.UserId,
"Indeksowanie wektorowe e-booka przez Nexus AI zakończone pomyślnie!",
1.0,
cancellationToken);
return Result.Ok(true);
}
catch (Exception ex)
{
_logger.LogError(ex, "[ProcessEbook] Critical error during background EPUB vectorization of ebook {EbookId}", request.EbookId);
await _broadcaster.BroadcastIngestionProgressAsync(
request.UserId,
$"Błąd indeksowania: {ex.Message}",
1.0,
cancellationToken);
return Result.Fail<bool>(new Error("Wystąpił błąd podczas indeksowania e-booka przez AI").CausedBy(ex));
}
}
private static List<string> ChunkText(string text, int maxWords = 3000)
{
var words = text.Split(' ', StringSplitOptions.RemoveEmptyEntries);
var chunks = new List<string>();
if (words.Length <= maxWords)
{
chunks.Add(text);
return chunks;
}
var currentChunk = new List<string>();
int count = 0;
foreach (var word in words)
{
currentChunk.Add(word);
count++;
if (count >= maxWords)
{
chunks.Add(string.Join(" ", currentChunk));
currentChunk.Clear();
count = 0;
}
}
if (currentChunk.Any())
{
chunks.Add(string.Join(" ", currentChunk));
}
return chunks;
}
}
@@ -38,4 +38,5 @@ public record LastReadBookDto
public string? LastChapter { get; init; }
public int LastChapterIndex { get; init; }
public string? Description { get; init; }
public bool IsReadyForReading { get; init; }
}
@@ -1,9 +1,24 @@
using System.Text.Json.Serialization;
namespace NexusReader.Application.Queries.Graph;
public record GraphNodeDto(string Id, string Label, string Group, string? Type = null);
public record GraphLinkDto(string Source, string Target, string RelationType, int Value = 1);
public record GraphNodeDto(
[property: JsonPropertyName("id")] string Id,
[property: JsonPropertyName("label")] string Label,
[property: JsonPropertyName("group")] string Group,
[property: JsonPropertyName("description")] string? Description = null,
[property: JsonPropertyName("type")] string? Type = null
);
public record GraphLinkDto(
[property: JsonPropertyName("source")] string Source,
[property: JsonPropertyName("target")] string Target,
[property: JsonPropertyName("type")] string RelationType,
[property: JsonPropertyName("value")] int Value = 1
);
public record GraphDataDto
{
public List<GraphNodeDto> Nodes { get; init; } = new();
public List<GraphLinkDto> Links { get; init; } = new();
[JsonPropertyName("nodes")] public List<GraphNodeDto> Nodes { get; init; } = new();
[JsonPropertyName("links")] public List<GraphLinkDto> Links { get; init; } = new();
}
@@ -37,7 +37,8 @@ public class GetMyEbooksQueryHandler : IRequestHandler<GetMyEbooksQuery, Result<
Progress = e.Progress,
LastChapter = e.LastChapter ?? "Rozpoczynanie...",
LastChapterIndex = e.LastChapterIndex,
Description = e.Description
Description = e.Description,
IsReadyForReading = e.IsReadyForReading
})
.ToListAsync(cancellationToken);
@@ -48,7 +48,8 @@ public class GetUserProfileQueryHandler : IRequestHandler<GetUserProfileQuery, R
Progress = e.Progress,
LastChapter = e.LastChapter ?? "Rozpoczynanie...",
LastChapterIndex = e.LastChapterIndex,
Description = e.Description
Description = e.Description,
IsReadyForReading = e.IsReadyForReading
}).FirstOrDefault(),
Roles = dbContext.UserRoles
.Where(ur => ur.UserId == u.Id)