feat: implement background ebook indexing with progress tracking and real-time UI updates
This commit is contained in:
@@ -0,0 +1,17 @@
|
||||
using FluentResults;
|
||||
|
||||
namespace NexusReader.Application.Abstractions.Services;
|
||||
|
||||
/// <summary>
|
||||
/// Service abstraction to extract raw text content from EPUB chapters.
|
||||
/// </summary>
|
||||
public interface IEpubExtractor
|
||||
{
|
||||
/// <summary>
|
||||
/// Extracts the sanitized, plain-text content of each chapter in the EPUB file.
|
||||
/// </summary>
|
||||
/// <param name="relativePath">The relative storage path of the EPUB file.</param>
|
||||
/// <param name="cancellationToken">Cancellation token.</param>
|
||||
/// <returns>A list of plain-text chapters, or a failure result.</returns>
|
||||
Task<Result<List<string>>> ExtractChaptersTextAsync(string relativePath, CancellationToken cancellationToken = default);
|
||||
}
|
||||
@@ -1,5 +1,6 @@
|
||||
using FluentResults;
|
||||
using MediatR;
|
||||
using Microsoft.Extensions.DependencyInjection;
|
||||
using NexusReader.Application.Abstractions.Messaging;
|
||||
using NexusReader.Application.Abstractions.Persistence;
|
||||
using NexusReader.Application.Abstractions.Services;
|
||||
@@ -11,13 +12,16 @@ public class IngestEbookCommandHandler : IRequestHandler<IngestEbookCommand, Res
|
||||
{
|
||||
private readonly IEbookRepository _ebookRepository;
|
||||
private readonly IBookStorageService _storageService;
|
||||
private readonly IServiceScopeFactory _scopeFactory;
|
||||
|
||||
public IngestEbookCommandHandler(
|
||||
IEbookRepository ebookRepository,
|
||||
IBookStorageService storageService)
|
||||
IBookStorageService storageService,
|
||||
IServiceScopeFactory scopeFactory)
|
||||
{
|
||||
_ebookRepository = ebookRepository;
|
||||
_storageService = storageService;
|
||||
_scopeFactory = scopeFactory;
|
||||
}
|
||||
|
||||
public async Task<Result<Guid>> Handle(IngestEbookCommand request, CancellationToken cancellationToken)
|
||||
@@ -72,6 +76,21 @@ public class IngestEbookCommandHandler : IRequestHandler<IngestEbookCommand, Res
|
||||
_ebookRepository.AddEbook(ebook);
|
||||
await _ebookRepository.SaveChangesAsync(cancellationToken);
|
||||
|
||||
// 4. Trigger asynchronous background processing and vector indexing
|
||||
_ = Task.Run(async () =>
|
||||
{
|
||||
try
|
||||
{
|
||||
using var scope = _scopeFactory.CreateScope();
|
||||
var mediator = scope.ServiceProvider.GetRequiredService<IMediator>();
|
||||
await mediator.Send(new ProcessEbookCommand(ebook.Id, request.UserId, request.TenantId));
|
||||
}
|
||||
catch (Exception)
|
||||
{
|
||||
// Swallowed to prevent ThreadPool crashes
|
||||
}
|
||||
});
|
||||
|
||||
return Result.Ok(ebook.Id);
|
||||
}
|
||||
catch (Exception ex)
|
||||
|
||||
@@ -0,0 +1,177 @@
|
||||
using System.Text.RegularExpressions;
|
||||
using FluentResults;
|
||||
using MediatR;
|
||||
using Microsoft.EntityFrameworkCore;
|
||||
using Microsoft.Extensions.DependencyInjection;
|
||||
using Microsoft.Extensions.Logging;
|
||||
using NexusReader.Application.Abstractions.Messaging;
|
||||
using NexusReader.Application.Abstractions.Services;
|
||||
using NexusReader.Data.Persistence;
|
||||
|
||||
namespace NexusReader.Application.Commands.Library;
|
||||
|
||||
public record ProcessEbookCommand(
|
||||
Guid EbookId,
|
||||
string UserId,
|
||||
string TenantId
|
||||
) : ICommand<bool>;
|
||||
|
||||
public class ProcessEbookCommandHandler : IRequestHandler<ProcessEbookCommand, Result<bool>>
|
||||
{
|
||||
private readonly IDbContextFactory<AppDbContext> _dbContextFactory;
|
||||
private readonly IKnowledgeService _knowledgeService;
|
||||
private readonly IEpubExtractor _epubExtractor;
|
||||
private readonly ISyncBroadcaster _broadcaster;
|
||||
private readonly ILogger<ProcessEbookCommandHandler> _logger;
|
||||
|
||||
public ProcessEbookCommandHandler(
|
||||
IDbContextFactory<AppDbContext> dbContextFactory,
|
||||
IKnowledgeService knowledgeService,
|
||||
IEpubExtractor epubExtractor,
|
||||
ISyncBroadcaster broadcaster,
|
||||
ILogger<ProcessEbookCommandHandler> logger)
|
||||
{
|
||||
_dbContextFactory = dbContextFactory;
|
||||
_knowledgeService = knowledgeService;
|
||||
_epubExtractor = epubExtractor;
|
||||
_broadcaster = broadcaster;
|
||||
_logger = logger;
|
||||
}
|
||||
|
||||
public async Task<Result<bool>> Handle(ProcessEbookCommand request, CancellationToken cancellationToken)
|
||||
{
|
||||
_logger.LogInformation("[ProcessEbook] Starting background processing for Ebook: {EbookId}", request.EbookId);
|
||||
|
||||
try
|
||||
{
|
||||
await _broadcaster.BroadcastIngestionProgressAsync(request.UserId, "Wyszukiwanie e-booka w bazie danych...", 0.05, cancellationToken);
|
||||
|
||||
using var dbContext = await _dbContextFactory.CreateDbContextAsync(cancellationToken);
|
||||
var ebook = await dbContext.Ebooks.FindAsync(new object[] { request.EbookId }, cancellationToken);
|
||||
if (ebook == null)
|
||||
{
|
||||
_logger.LogError("[ProcessEbook] Ebook not found in database: {EbookId}", request.EbookId);
|
||||
return Result.Fail<bool>($"Ebook nie znaleziony w bazie danych: {request.EbookId}");
|
||||
}
|
||||
|
||||
_logger.LogInformation("[ProcessEbook] Extracting chapters text for Ebook: {Title} ({FilePath})", ebook.Title, ebook.FilePath);
|
||||
await _broadcaster.BroadcastIngestionProgressAsync(request.UserId, "Otwieranie i parsowanie pliku EPUB...", 0.1, cancellationToken);
|
||||
|
||||
var extractionResult = await _epubExtractor.ExtractChaptersTextAsync(ebook.FilePath, cancellationToken);
|
||||
if (extractionResult.IsFailed)
|
||||
{
|
||||
var errorMsg = extractionResult.Errors.FirstOrDefault()?.Message ?? "Failed to extract text chapters.";
|
||||
_logger.LogError("[ProcessEbook] Extraction failed: {Error}", errorMsg);
|
||||
return Result.Fail<bool>(extractionResult.Errors);
|
||||
}
|
||||
|
||||
var chapters = extractionResult.Value;
|
||||
if (chapters == null || !chapters.Any())
|
||||
{
|
||||
_logger.LogWarning("[ProcessEbook] EPUB has no readable content files: {EbookId}", request.EbookId);
|
||||
return Result.Fail<bool>("EPUB nie zawiera czytelnych rozdziałów.");
|
||||
}
|
||||
|
||||
int totalChapters = chapters.Count;
|
||||
_logger.LogInformation("[ProcessEbook] Processing {Count} chapters for Ebook: {Title}", totalChapters, ebook.Title);
|
||||
|
||||
await _broadcaster.BroadcastIngestionProgressAsync(request.UserId, $"Analizowanie struktury ({totalChapters} rozdziałów)...", 0.15, cancellationToken);
|
||||
|
||||
int processedChapters = 0;
|
||||
|
||||
for (int i = 0; i < totalChapters; i++)
|
||||
{
|
||||
var cleanText = chapters[i];
|
||||
|
||||
if (cleanText.Length < 100)
|
||||
{
|
||||
_logger.LogInformation("[ProcessEbook] Skipping chapter {Index} (text too short: {Length} chars)", i, cleanText.Length);
|
||||
processedChapters++;
|
||||
continue;
|
||||
}
|
||||
|
||||
// Chunk the text to maintain granular Knowledge Units
|
||||
var chunks = ChunkText(cleanText, 3000);
|
||||
_logger.LogInformation("[ProcessEbook] Chapter {Index} split into {ChunkCount} chunk(s)", i, chunks.Count);
|
||||
|
||||
foreach (var chunk in chunks)
|
||||
{
|
||||
try
|
||||
{
|
||||
// Invoke GetKnowledgeMapAsync to extract, embed, and upsert knowledge units
|
||||
var result = await _knowledgeService.GetKnowledgeMapAsync(chunk, request.TenantId, request.EbookId, cancellationToken);
|
||||
if (result.IsFailed)
|
||||
{
|
||||
_logger.LogWarning("[ProcessEbook] Failed to generate knowledge map for a chunk of chapter {Index}: {Error}", i, result.Errors.FirstOrDefault()?.Message);
|
||||
}
|
||||
}
|
||||
catch (Exception ex)
|
||||
{
|
||||
_logger.LogError(ex, "[ProcessEbook] Exception during AI vectorization of chapter {Index} chunk", i);
|
||||
}
|
||||
}
|
||||
|
||||
processedChapters++;
|
||||
double progress = 0.15 + (0.75 * processedChapters / totalChapters);
|
||||
await _broadcaster.BroadcastIngestionProgressAsync(
|
||||
request.UserId,
|
||||
$"Przetwarzanie rozdziału {processedChapters} z {totalChapters} przez AI...",
|
||||
progress,
|
||||
cancellationToken);
|
||||
}
|
||||
|
||||
// Mark the ebook as ready
|
||||
ebook.IsReadyForReading = true;
|
||||
await dbContext.SaveChangesAsync(cancellationToken);
|
||||
|
||||
_logger.LogInformation("[ProcessEbook] Ingestion and vector indexing completed for: {Title}", ebook.Title);
|
||||
|
||||
await _broadcaster.BroadcastIngestionProgressAsync(
|
||||
request.UserId,
|
||||
"Indeksowanie wektorowe e-booka przez Nexus AI zakończone pomyślnie!",
|
||||
1.0,
|
||||
cancellationToken);
|
||||
|
||||
return Result.Ok(true);
|
||||
}
|
||||
catch (Exception ex)
|
||||
{
|
||||
_logger.LogError(ex, "[ProcessEbook] Critical error during background EPUB vectorization of ebook {EbookId}", request.EbookId);
|
||||
await _broadcaster.BroadcastIngestionProgressAsync(
|
||||
request.UserId,
|
||||
$"Błąd indeksowania: {ex.Message}",
|
||||
1.0,
|
||||
cancellationToken);
|
||||
return Result.Fail<bool>(new Error("Wystąpił błąd podczas indeksowania e-booka przez AI").CausedBy(ex));
|
||||
}
|
||||
}
|
||||
|
||||
private static List<string> ChunkText(string text, int maxWords = 3000)
|
||||
{
|
||||
var words = text.Split(' ', StringSplitOptions.RemoveEmptyEntries);
|
||||
var chunks = new List<string>();
|
||||
if (words.Length <= maxWords)
|
||||
{
|
||||
chunks.Add(text);
|
||||
return chunks;
|
||||
}
|
||||
var currentChunk = new List<string>();
|
||||
int count = 0;
|
||||
foreach (var word in words)
|
||||
{
|
||||
currentChunk.Add(word);
|
||||
count++;
|
||||
if (count >= maxWords)
|
||||
{
|
||||
chunks.Add(string.Join(" ", currentChunk));
|
||||
currentChunk.Clear();
|
||||
count = 0;
|
||||
}
|
||||
}
|
||||
if (currentChunk.Any())
|
||||
{
|
||||
chunks.Add(string.Join(" ", currentChunk));
|
||||
}
|
||||
return chunks;
|
||||
}
|
||||
}
|
||||
@@ -38,4 +38,5 @@ public record LastReadBookDto
|
||||
public string? LastChapter { get; init; }
|
||||
public int LastChapterIndex { get; init; }
|
||||
public string? Description { get; init; }
|
||||
public bool IsReadyForReading { get; init; }
|
||||
}
|
||||
|
||||
@@ -1,9 +1,24 @@
|
||||
using System.Text.Json.Serialization;
|
||||
|
||||
namespace NexusReader.Application.Queries.Graph;
|
||||
|
||||
public record GraphNodeDto(string Id, string Label, string Group, string? Type = null);
|
||||
public record GraphLinkDto(string Source, string Target, string RelationType, int Value = 1);
|
||||
public record GraphNodeDto(
|
||||
[property: JsonPropertyName("id")] string Id,
|
||||
[property: JsonPropertyName("label")] string Label,
|
||||
[property: JsonPropertyName("group")] string Group,
|
||||
[property: JsonPropertyName("description")] string? Description = null,
|
||||
[property: JsonPropertyName("type")] string? Type = null
|
||||
);
|
||||
|
||||
public record GraphLinkDto(
|
||||
[property: JsonPropertyName("source")] string Source,
|
||||
[property: JsonPropertyName("target")] string Target,
|
||||
[property: JsonPropertyName("type")] string RelationType,
|
||||
[property: JsonPropertyName("value")] int Value = 1
|
||||
);
|
||||
|
||||
public record GraphDataDto
|
||||
{
|
||||
public List<GraphNodeDto> Nodes { get; init; } = new();
|
||||
public List<GraphLinkDto> Links { get; init; } = new();
|
||||
[JsonPropertyName("nodes")] public List<GraphNodeDto> Nodes { get; init; } = new();
|
||||
[JsonPropertyName("links")] public List<GraphLinkDto> Links { get; init; } = new();
|
||||
}
|
||||
|
||||
@@ -37,7 +37,8 @@ public class GetMyEbooksQueryHandler : IRequestHandler<GetMyEbooksQuery, Result<
|
||||
Progress = e.Progress,
|
||||
LastChapter = e.LastChapter ?? "Rozpoczynanie...",
|
||||
LastChapterIndex = e.LastChapterIndex,
|
||||
Description = e.Description
|
||||
Description = e.Description,
|
||||
IsReadyForReading = e.IsReadyForReading
|
||||
})
|
||||
.ToListAsync(cancellationToken);
|
||||
|
||||
|
||||
@@ -48,7 +48,8 @@ public class GetUserProfileQueryHandler : IRequestHandler<GetUserProfileQuery, R
|
||||
Progress = e.Progress,
|
||||
LastChapter = e.LastChapter ?? "Rozpoczynanie...",
|
||||
LastChapterIndex = e.LastChapterIndex,
|
||||
Description = e.Description
|
||||
Description = e.Description,
|
||||
IsReadyForReading = e.IsReadyForReading
|
||||
}).FirstOrDefault(),
|
||||
Roles = dbContext.UserRoles
|
||||
.Where(ur => ur.UserId == u.Id)
|
||||
|
||||
Reference in New Issue
Block a user