Files
Nexus.Reader/src/NexusReader.Application/Commands/Library/ProcessEbookCommand.cs
T

177 lines
7.4 KiB
C#

using System.Text.RegularExpressions;
using FluentResults;
using MediatR;
using Microsoft.EntityFrameworkCore;
using Microsoft.Extensions.DependencyInjection;
using Microsoft.Extensions.Logging;
using NexusReader.Application.Abstractions.Messaging;
using NexusReader.Application.Abstractions.Persistence;
using NexusReader.Application.Abstractions.Services;
namespace NexusReader.Application.Commands.Library;
public record ProcessEbookCommand(
Guid EbookId,
string UserId,
string TenantId
) : ICommand<bool>;
public class ProcessEbookCommandHandler : IRequestHandler<ProcessEbookCommand, Result<bool>>
{
private readonly IEbookRepository _ebookRepository;
private readonly IKnowledgeService _knowledgeService;
private readonly IEpubExtractor _epubExtractor;
private readonly ISyncBroadcaster _broadcaster;
private readonly ILogger<ProcessEbookCommandHandler> _logger;
public ProcessEbookCommandHandler(
IEbookRepository ebookRepository,
IKnowledgeService knowledgeService,
IEpubExtractor epubExtractor,
ISyncBroadcaster broadcaster,
ILogger<ProcessEbookCommandHandler> logger)
{
_ebookRepository = ebookRepository;
_knowledgeService = knowledgeService;
_epubExtractor = epubExtractor;
_broadcaster = broadcaster;
_logger = logger;
}
public async Task<Result<bool>> Handle(ProcessEbookCommand request, CancellationToken cancellationToken)
{
_logger.LogInformation("[ProcessEbook] Starting background processing for Ebook: {EbookId}", request.EbookId);
try
{
await _broadcaster.BroadcastIngestionProgressAsync(request.UserId, "Wyszukiwanie e-booka w bazie danych...", 0.05, cancellationToken);
var ebook = await _ebookRepository.FindByIdAsync(request.EbookId, cancellationToken);
if (ebook == null)
{
_logger.LogError("[ProcessEbook] Ebook not found in database: {EbookId}", request.EbookId);
return Result.Fail<bool>($"Ebook nie znaleziony w bazie danych: {request.EbookId}");
}
_logger.LogInformation("[ProcessEbook] Extracting chapters text for Ebook: {Title} ({FilePath})", ebook.Title, ebook.FilePath);
await _broadcaster.BroadcastIngestionProgressAsync(request.UserId, "Otwieranie i parsowanie pliku EPUB...", 0.1, cancellationToken);
var extractionResult = await _epubExtractor.ExtractChaptersTextAsync(ebook.FilePath, cancellationToken);
if (extractionResult.IsFailed)
{
var errorMsg = extractionResult.Errors.FirstOrDefault()?.Message ?? "Failed to extract text chapters.";
_logger.LogError("[ProcessEbook] Extraction failed: {Error}", errorMsg);
return Result.Fail<bool>(extractionResult.Errors);
}
var chapters = extractionResult.Value;
if (chapters == null || !chapters.Any())
{
_logger.LogWarning("[ProcessEbook] EPUB has no readable content files: {EbookId}", request.EbookId);
return Result.Fail<bool>("EPUB nie zawiera czytelnych rozdziałów.");
}
int totalChapters = chapters.Count;
_logger.LogInformation("[ProcessEbook] Processing {Count} chapters for Ebook: {Title}", totalChapters, ebook.Title);
await _broadcaster.BroadcastIngestionProgressAsync(request.UserId, $"Analizowanie struktury ({totalChapters} rozdziałów)...", 0.15, cancellationToken);
int processedChapters = 0;
for (int i = 0; i < totalChapters; i++)
{
var cleanText = chapters[i];
if (cleanText.Length < 100)
{
_logger.LogInformation("[ProcessEbook] Skipping chapter {Index} (text too short: {Length} chars)", i, cleanText.Length);
processedChapters++;
continue;
}
// Chunk the text to maintain granular Knowledge Units
var chunks = ChunkText(cleanText, 3000);
_logger.LogInformation("[ProcessEbook] Chapter {Index} split into {ChunkCount} chunk(s)", i, chunks.Count);
foreach (var chunk in chunks)
{
try
{
// Invoke GetKnowledgeMapAsync to extract, embed, and upsert knowledge units
var result = await _knowledgeService.GetKnowledgeMapAsync(chunk, request.TenantId, request.EbookId, cancellationToken);
if (result.IsFailed)
{
_logger.LogWarning("[ProcessEbook] Failed to generate knowledge map for a chunk of chapter {Index}: {Error}", i, result.Errors.FirstOrDefault()?.Message);
}
}
catch (Exception ex)
{
_logger.LogError(ex, "[ProcessEbook] Exception during AI vectorization of chapter {Index} chunk", i);
}
}
processedChapters++;
double progress = 0.15 + (0.75 * processedChapters / totalChapters);
await _broadcaster.BroadcastIngestionProgressAsync(
request.UserId,
$"Przetwarzanie rozdziału {processedChapters} z {totalChapters} przez AI...",
progress,
cancellationToken);
}
// Mark the ebook as ready
ebook.IsReadyForReading = true;
await _ebookRepository.SaveChangesAsync(cancellationToken);
_logger.LogInformation("[ProcessEbook] Ingestion and vector indexing completed for: {Title}", ebook.Title);
await _broadcaster.BroadcastIngestionProgressAsync(
request.UserId,
"Indeksowanie wektorowe e-booka przez Nexus AI zakończone pomyślnie!",
1.0,
cancellationToken);
return Result.Ok(true);
}
catch (Exception ex)
{
_logger.LogError(ex, "[ProcessEbook] Critical error during background EPUB vectorization of ebook {EbookId}", request.EbookId);
await _broadcaster.BroadcastIngestionProgressAsync(
request.UserId,
$"Błąd indeksowania: {ex.Message}",
1.0,
cancellationToken);
return Result.Fail<bool>(new Error("Wystąpił błąd podczas indeksowania e-booka przez AI").CausedBy(ex));
}
}
private static List<string> ChunkText(string text, int maxWords = 3000)
{
var words = text.Split(' ', StringSplitOptions.RemoveEmptyEntries);
var chunks = new List<string>();
if (words.Length <= maxWords)
{
chunks.Add(text);
return chunks;
}
var currentChunk = new List<string>();
int count = 0;
foreach (var word in words)
{
currentChunk.Add(word);
count++;
if (count >= maxWords)
{
chunks.Add(string.Join(" ", currentChunk));
currentChunk.Clear();
count = 0;
}
}
if (currentChunk.Any())
{
chunks.Add(string.Join(" ", currentChunk));
}
return chunks;
}
}