using System.Text.RegularExpressions; using FluentResults; using MediatR; using Microsoft.EntityFrameworkCore; using Microsoft.Extensions.DependencyInjection; using Microsoft.Extensions.Logging; using NexusReader.Application.Abstractions.Messaging; using NexusReader.Application.Abstractions.Persistence; using NexusReader.Application.Abstractions.Services; namespace NexusReader.Application.Commands.Library; public record ProcessEbookCommand( Guid EbookId, string UserId, string TenantId ) : ICommand; public class ProcessEbookCommandHandler : IRequestHandler> { private readonly IEbookRepository _ebookRepository; private readonly IKnowledgeService _knowledgeService; private readonly IEpubExtractor _epubExtractor; private readonly ISyncBroadcaster _broadcaster; private readonly ILogger _logger; public ProcessEbookCommandHandler( IEbookRepository ebookRepository, IKnowledgeService knowledgeService, IEpubExtractor epubExtractor, ISyncBroadcaster broadcaster, ILogger logger) { _ebookRepository = ebookRepository; _knowledgeService = knowledgeService; _epubExtractor = epubExtractor; _broadcaster = broadcaster; _logger = logger; } public async Task> Handle(ProcessEbookCommand request, CancellationToken cancellationToken) { _logger.LogInformation("[ProcessEbook] Starting background processing for Ebook: {EbookId}", request.EbookId); try { await _broadcaster.BroadcastIngestionProgressAsync(request.UserId, "Wyszukiwanie e-booka w bazie danych...", 0.05, cancellationToken); var ebook = await _ebookRepository.FindByIdAsync(request.EbookId, cancellationToken); if (ebook == null) { _logger.LogError("[ProcessEbook] Ebook not found in database: {EbookId}", request.EbookId); return Result.Fail($"Ebook nie znaleziony w bazie danych: {request.EbookId}"); } _logger.LogInformation("[ProcessEbook] Extracting chapters text for Ebook: {Title} ({FilePath})", ebook.Title, ebook.FilePath); await _broadcaster.BroadcastIngestionProgressAsync(request.UserId, "Otwieranie i parsowanie pliku EPUB...", 0.1, cancellationToken); var extractionResult = await _epubExtractor.ExtractChaptersTextAsync(ebook.FilePath, cancellationToken); if (extractionResult.IsFailed) { var errorMsg = extractionResult.Errors.FirstOrDefault()?.Message ?? "Failed to extract text chapters."; _logger.LogError("[ProcessEbook] Extraction failed: {Error}", errorMsg); return Result.Fail(extractionResult.Errors); } var chapters = extractionResult.Value; if (chapters == null || !chapters.Any()) { _logger.LogWarning("[ProcessEbook] EPUB has no readable content files: {EbookId}", request.EbookId); return Result.Fail("EPUB nie zawiera czytelnych rozdziałów."); } int totalChapters = chapters.Count; _logger.LogInformation("[ProcessEbook] Processing {Count} chapters for Ebook: {Title}", totalChapters, ebook.Title); await _broadcaster.BroadcastIngestionProgressAsync(request.UserId, $"Analizowanie struktury ({totalChapters} rozdziałów)...", 0.15, cancellationToken); int processedChapters = 0; for (int i = 0; i < totalChapters; i++) { var cleanText = chapters[i]; if (cleanText.Length < 100) { _logger.LogInformation("[ProcessEbook] Skipping chapter {Index} (text too short: {Length} chars)", i, cleanText.Length); processedChapters++; continue; } // Chunk the text to maintain granular Knowledge Units var chunks = ChunkText(cleanText, 3000); _logger.LogInformation("[ProcessEbook] Chapter {Index} split into {ChunkCount} chunk(s)", i, chunks.Count); foreach (var chunk in chunks) { try { // Invoke GetKnowledgeMapAsync to extract, embed, and upsert knowledge units var result = await _knowledgeService.GetKnowledgeMapAsync(chunk, request.TenantId, request.EbookId, cancellationToken); if (result.IsFailed) { _logger.LogWarning("[ProcessEbook] Failed to generate knowledge map for a chunk of chapter {Index}: {Error}", i, result.Errors.FirstOrDefault()?.Message); } } catch (Exception ex) { _logger.LogError(ex, "[ProcessEbook] Exception during AI vectorization of chapter {Index} chunk", i); } } processedChapters++; double progress = 0.15 + (0.75 * processedChapters / totalChapters); await _broadcaster.BroadcastIngestionProgressAsync( request.UserId, $"Przetwarzanie rozdziału {processedChapters} z {totalChapters} przez AI...", progress, cancellationToken); } // Mark the ebook as ready ebook.IsReadyForReading = true; await _ebookRepository.SaveChangesAsync(cancellationToken); _logger.LogInformation("[ProcessEbook] Ingestion and vector indexing completed for: {Title}", ebook.Title); await _broadcaster.BroadcastIngestionProgressAsync( request.UserId, "Indeksowanie wektorowe e-booka przez Nexus AI zakończone pomyślnie!", 1.0, cancellationToken); return Result.Ok(true); } catch (Exception ex) { _logger.LogError(ex, "[ProcessEbook] Critical error during background EPUB vectorization of ebook {EbookId}", request.EbookId); await _broadcaster.BroadcastIngestionProgressAsync( request.UserId, $"Błąd indeksowania: {ex.Message}", 1.0, cancellationToken); return Result.Fail(new Error("Wystąpił błąd podczas indeksowania e-booka przez AI").CausedBy(ex)); } } private static List ChunkText(string text, int maxWords = 3000) { var words = text.Split(' ', StringSplitOptions.RemoveEmptyEntries); var chunks = new List(); if (words.Length <= maxWords) { chunks.Add(text); return chunks; } var currentChunk = new List(); int count = 0; foreach (var word in words) { currentChunk.Add(word); count++; if (count >= maxWords) { chunks.Add(string.Join(" ", currentChunk)); currentChunk.Clear(); count = 0; } } if (currentChunk.Any()) { chunks.Add(string.Join(" ", currentChunk)); } return chunks; } }