feat: Ingestion Pipeline Stabilization and WASM Service Proxies (#42)
This PR stabilizes the Nexus Ingestion Engine by implementing functional service proxies for the Blazor WASM client and refining the backend infrastructure for real-time progress tracking and database compatibility. ### Key Changes - **Infrastructure Stabilization**: - Implemented production-grade `EbookRepository` with PostgreSQL `EF.Functions.ILike` support. - Enforced `IsReadyForReading = false` state for newly added ebooks (resolves #35). - Updated `SignalRSyncBroadcaster` to support targeted user messaging and ingestion-specific progress updates (resolves #37). - **WASM Client Functional Proxies**: - Replaced "Throwing" dummy services with `WasmEbookRepository`, `WasmSyncBroadcaster`, `WasmBookStorageService`, and `WasmEmbeddingGenerator`. - These services proxy requests to the backend via a new set of Minimal API endpoints in `NexusReader.Web`. - **Domain Refinement**: - Added `IsReadyForReading` flag to the `Ebook` entity to manage background AI processing states. ### Related Issues - Fixes #35 - Fixes #36 - Fixes #37 --------- Co-authored-by: Marek Jasiński <jasins.marek@gmail.com> Reviewed-on: #42 Co-authored-by: Antigravity <antigravity@google.com> Co-committed-by: Antigravity <antigravity@google.com>
This commit was merged in pull request #42.
This commit is contained in:
@@ -2,6 +2,7 @@ using System.Text.Json;
|
||||
using FluentResults;
|
||||
using Microsoft.EntityFrameworkCore;
|
||||
using Microsoft.Extensions.AI;
|
||||
using Microsoft.Extensions.Logging;
|
||||
using Microsoft.ML.Tokenizers;
|
||||
using NexusReader.Application.Abstractions.Services;
|
||||
using NexusReader.Application.DTOs.AI;
|
||||
@@ -19,12 +20,15 @@ namespace NexusReader.Infrastructure.Services;
|
||||
|
||||
public class KnowledgeService : IKnowledgeService
|
||||
{
|
||||
private static readonly JsonSerializerOptions JsonOptions = new() { PropertyNameCaseInsensitive = true };
|
||||
|
||||
private readonly IChatClient _chatClient;
|
||||
private readonly IEmbeddingGenerator<string, Embedding<float>> _embeddingGenerator;
|
||||
private readonly IDbContextFactory<AppDbContext> _dbContextFactory;
|
||||
private readonly ResiliencePipeline _retryPipeline;
|
||||
private readonly AiSettings _settings;
|
||||
private readonly Tokenizer _tokenizer;
|
||||
private readonly ILogger<KnowledgeService> _logger;
|
||||
private const string PromptVersion = "1.0";
|
||||
|
||||
public KnowledgeService(
|
||||
@@ -32,14 +36,16 @@ public class KnowledgeService : IKnowledgeService
|
||||
IEmbeddingGenerator<string, Embedding<float>> embeddingGenerator,
|
||||
IDbContextFactory<AppDbContext> dbContextFactory,
|
||||
ResiliencePipelineProvider<string> pipelineProvider,
|
||||
IOptions<AiSettings> settings)
|
||||
IOptions<AiSettings> settings,
|
||||
ILogger<KnowledgeService> logger)
|
||||
{
|
||||
_chatClient = chatClient;
|
||||
_embeddingGenerator = embeddingGenerator;
|
||||
_dbContextFactory = dbContextFactory;
|
||||
_retryPipeline = pipelineProvider.GetPipeline("ai-retry");
|
||||
_settings = settings.Value;
|
||||
// Use Tiktoken (cl100k_base) which is a standard for modern LLMs and provides
|
||||
_logger = logger;
|
||||
// Use Tiktoken (cl100k_base) which is a standard for modern LLMs and provides
|
||||
// a very reliable estimation for token usage in Gemini-based workloads.
|
||||
_tokenizer = TiktokenTokenizer.CreateForModel("gpt-4");
|
||||
}
|
||||
@@ -78,16 +84,19 @@ public class KnowledgeService : IKnowledgeService
|
||||
|
||||
if (cached != null && cached.PromptVersion == PromptVersion)
|
||||
{
|
||||
Console.WriteLine($"[KnowledgeService] Cache Hit for {traceType} ({hash})");
|
||||
_logger.LogDebug("[KnowledgeService] Cache Hit for {TraceType} ({Hash})", traceType, hash);
|
||||
try
|
||||
{
|
||||
var packet = JsonSerializer.Deserialize<KnowledgePacket>(cached.JsonData, new JsonSerializerOptions { PropertyNameCaseInsensitive = true });
|
||||
var packet = JsonSerializer.Deserialize<KnowledgePacket>(cached.JsonData, JsonOptions);
|
||||
if (packet != null) return Result.Ok(packet);
|
||||
}
|
||||
catch { /* fallback to regen */ }
|
||||
catch (JsonException ex)
|
||||
{
|
||||
_logger.LogWarning(ex, "[KnowledgeService] Cached JSON for {Hash} was invalid; regenerating.", hash);
|
||||
}
|
||||
}
|
||||
|
||||
Console.WriteLine($"[KnowledgeService] Cache Miss for {traceType} ({hash}). Requesting AI...");
|
||||
_logger.LogInformation("[KnowledgeService] Cache Miss for {TraceType} ({Hash}). Requesting AI...", traceType, hash);
|
||||
try
|
||||
{
|
||||
var options = new ChatOptions
|
||||
@@ -112,7 +121,7 @@ public class KnowledgeService : IKnowledgeService
|
||||
|
||||
try
|
||||
{
|
||||
var knowledgePacket = JsonSerializer.Deserialize<KnowledgePacket>(jsonResponse, new JsonSerializerOptions { PropertyNameCaseInsensitive = true });
|
||||
var knowledgePacket = JsonSerializer.Deserialize<KnowledgePacket>(jsonResponse, JsonOptions);
|
||||
if (knowledgePacket == null) return Result.Fail("Failed to deserialize AI response.");
|
||||
|
||||
// 3. Generate Embedding if not present
|
||||
@@ -125,7 +134,7 @@ public class KnowledgeService : IKnowledgeService
|
||||
}
|
||||
catch (Exception ex)
|
||||
{
|
||||
Console.WriteLine($"[KnowledgeService] Embedding Error: {ex.Message}");
|
||||
_logger.LogWarning(ex, "[KnowledgeService] Embedding generation failed; proceeding without vector.");
|
||||
// We continue even if embedding fails, as the primary goal was knowledge extraction
|
||||
}
|
||||
|
||||
@@ -159,7 +168,7 @@ public class KnowledgeService : IKnowledgeService
|
||||
}
|
||||
catch (JsonException ex)
|
||||
{
|
||||
Console.WriteLine($"[KnowledgeService] JSON Error: {ex.Message}. Raw length: {rawResponse.Length}");
|
||||
_logger.LogError(ex, "[KnowledgeService] JSON deserialization error. Raw response length: {Length}", rawResponse.Length);
|
||||
return Result.Fail($"Failed to deserialize AI response: {ex.Message}");
|
||||
}
|
||||
}
|
||||
@@ -231,7 +240,7 @@ public class KnowledgeService : IKnowledgeService
|
||||
}
|
||||
else
|
||||
{
|
||||
Console.WriteLine($"[KnowledgeService] WARNING: Skipping invalid link {linkDto.Source} -> {linkDto.Target} (Missing units).");
|
||||
_logger.LogWarning("[KnowledgeService] Skipping invalid link {Source} -> {Target}: one or both units are missing.", linkDto.Source, linkDto.Target);
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -264,7 +273,7 @@ public class KnowledgeService : IKnowledgeService
|
||||
var rawJson = response.Text?.Trim() ?? "{}";
|
||||
rawJson = rawJson.Replace("```json", "").Replace("```", "").Trim();
|
||||
|
||||
var result = JsonSerializer.Deserialize<GroundednessResult>(rawJson, new JsonSerializerOptions { PropertyNameCaseInsensitive = true });
|
||||
var result = JsonSerializer.Deserialize<GroundednessResult>(rawJson, JsonOptions);
|
||||
|
||||
return result != null ? Result.Ok(result) : Result.Fail("Failed to parse groundedness result");
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user