feat: Ingestion Pipeline Stabilization and WASM Service Proxies (#42)

This PR stabilizes the Nexus Ingestion Engine by implementing functional service proxies for the Blazor WASM client and refining the backend infrastructure for real-time progress tracking and database compatibility.

### Key Changes
- **Infrastructure Stabilization**:
  - Implemented production-grade `EbookRepository` with PostgreSQL `EF.Functions.ILike` support.
  - Enforced `IsReadyForReading = false` state for newly added ebooks (resolves #35).
  - Updated `SignalRSyncBroadcaster` to support targeted user messaging and ingestion-specific progress updates (resolves #37).
- **WASM Client Functional Proxies**:
  - Replaced "Throwing" dummy services with `WasmEbookRepository`, `WasmSyncBroadcaster`, `WasmBookStorageService`, and `WasmEmbeddingGenerator`.
  - These services proxy requests to the backend via a new set of Minimal API endpoints in `NexusReader.Web`.
- **Domain Refinement**:
  - Added `IsReadyForReading` flag to the `Ebook` entity to manage background AI processing states.

### Related Issues
- Fixes #35
- Fixes #36
- Fixes #37

---------

Co-authored-by: Marek Jasiński <jasins.marek@gmail.com>
Reviewed-on: #42
Co-authored-by: Antigravity <antigravity@google.com>
Co-committed-by: Antigravity <antigravity@google.com>
This commit was merged in pull request #42.
This commit is contained in:
2026-05-13 18:24:24 +00:00
committed by Marek Jaisński
parent d5c2952bec
commit 5a2223a4c8
39 changed files with 6134 additions and 301 deletions
@@ -2,6 +2,7 @@ using System.Text.Json;
using FluentResults;
using Microsoft.EntityFrameworkCore;
using Microsoft.Extensions.AI;
using Microsoft.Extensions.Logging;
using Microsoft.ML.Tokenizers;
using NexusReader.Application.Abstractions.Services;
using NexusReader.Application.DTOs.AI;
@@ -19,12 +20,15 @@ namespace NexusReader.Infrastructure.Services;
public class KnowledgeService : IKnowledgeService
{
private static readonly JsonSerializerOptions JsonOptions = new() { PropertyNameCaseInsensitive = true };
private readonly IChatClient _chatClient;
private readonly IEmbeddingGenerator<string, Embedding<float>> _embeddingGenerator;
private readonly IDbContextFactory<AppDbContext> _dbContextFactory;
private readonly ResiliencePipeline _retryPipeline;
private readonly AiSettings _settings;
private readonly Tokenizer _tokenizer;
private readonly ILogger<KnowledgeService> _logger;
private const string PromptVersion = "1.0";
public KnowledgeService(
@@ -32,14 +36,16 @@ public class KnowledgeService : IKnowledgeService
IEmbeddingGenerator<string, Embedding<float>> embeddingGenerator,
IDbContextFactory<AppDbContext> dbContextFactory,
ResiliencePipelineProvider<string> pipelineProvider,
IOptions<AiSettings> settings)
IOptions<AiSettings> settings,
ILogger<KnowledgeService> logger)
{
_chatClient = chatClient;
_embeddingGenerator = embeddingGenerator;
_dbContextFactory = dbContextFactory;
_retryPipeline = pipelineProvider.GetPipeline("ai-retry");
_settings = settings.Value;
// Use Tiktoken (cl100k_base) which is a standard for modern LLMs and provides
_logger = logger;
// Use Tiktoken (cl100k_base) which is a standard for modern LLMs and provides
// a very reliable estimation for token usage in Gemini-based workloads.
_tokenizer = TiktokenTokenizer.CreateForModel("gpt-4");
}
@@ -78,16 +84,19 @@ public class KnowledgeService : IKnowledgeService
if (cached != null && cached.PromptVersion == PromptVersion)
{
Console.WriteLine($"[KnowledgeService] Cache Hit for {traceType} ({hash})");
_logger.LogDebug("[KnowledgeService] Cache Hit for {TraceType} ({Hash})", traceType, hash);
try
{
var packet = JsonSerializer.Deserialize<KnowledgePacket>(cached.JsonData, new JsonSerializerOptions { PropertyNameCaseInsensitive = true });
var packet = JsonSerializer.Deserialize<KnowledgePacket>(cached.JsonData, JsonOptions);
if (packet != null) return Result.Ok(packet);
}
catch { /* fallback to regen */ }
catch (JsonException ex)
{
_logger.LogWarning(ex, "[KnowledgeService] Cached JSON for {Hash} was invalid; regenerating.", hash);
}
}
Console.WriteLine($"[KnowledgeService] Cache Miss for {traceType} ({hash}). Requesting AI...");
_logger.LogInformation("[KnowledgeService] Cache Miss for {TraceType} ({Hash}). Requesting AI...", traceType, hash);
try
{
var options = new ChatOptions
@@ -112,7 +121,7 @@ public class KnowledgeService : IKnowledgeService
try
{
var knowledgePacket = JsonSerializer.Deserialize<KnowledgePacket>(jsonResponse, new JsonSerializerOptions { PropertyNameCaseInsensitive = true });
var knowledgePacket = JsonSerializer.Deserialize<KnowledgePacket>(jsonResponse, JsonOptions);
if (knowledgePacket == null) return Result.Fail("Failed to deserialize AI response.");
// 3. Generate Embedding if not present
@@ -125,7 +134,7 @@ public class KnowledgeService : IKnowledgeService
}
catch (Exception ex)
{
Console.WriteLine($"[KnowledgeService] Embedding Error: {ex.Message}");
_logger.LogWarning(ex, "[KnowledgeService] Embedding generation failed; proceeding without vector.");
// We continue even if embedding fails, as the primary goal was knowledge extraction
}
@@ -159,7 +168,7 @@ public class KnowledgeService : IKnowledgeService
}
catch (JsonException ex)
{
Console.WriteLine($"[KnowledgeService] JSON Error: {ex.Message}. Raw length: {rawResponse.Length}");
_logger.LogError(ex, "[KnowledgeService] JSON deserialization error. Raw response length: {Length}", rawResponse.Length);
return Result.Fail($"Failed to deserialize AI response: {ex.Message}");
}
}
@@ -231,7 +240,7 @@ public class KnowledgeService : IKnowledgeService
}
else
{
Console.WriteLine($"[KnowledgeService] WARNING: Skipping invalid link {linkDto.Source} -> {linkDto.Target} (Missing units).");
_logger.LogWarning("[KnowledgeService] Skipping invalid link {Source} -> {Target}: one or both units are missing.", linkDto.Source, linkDto.Target);
}
}
}
@@ -264,7 +273,7 @@ public class KnowledgeService : IKnowledgeService
var rawJson = response.Text?.Trim() ?? "{}";
rawJson = rawJson.Replace("```json", "").Replace("```", "").Trim();
var result = JsonSerializer.Deserialize<GroundednessResult>(rawJson, new JsonSerializerOptions { PropertyNameCaseInsensitive = true });
var result = JsonSerializer.Deserialize<GroundednessResult>(rawJson, JsonOptions);
return result != null ? Result.Ok(result) : Result.Fail("Failed to parse groundedness result");
}