feat: KM-RAG Polyglot Ingestion Pipeline Migration (#46)

Resolves the KM-RAG Polyglot Persistence and Background Ingestion Pipeline Migration task.

### Key Changes
1. **Infrastructure Migration**: Integrated Qdrant (for vector embeddings) and Neo4j (for concept graphs), reducing reliance on PostgreSQL pgvector storage.
2. **Concurrent Background Job**: Implemented a robust Hangfire `EbookIngestionJob` utilizing Polly exponential retries for transient 429 rate limits, executing three core ingestion tasks concurrently via `Task.WhenAll`.
3. **Data Layer**: Standardized database schemas and entities; retained `Pgvector.EntityFrameworkCore` for migration compilation compatibility.
4. **Wasm Client & Tests**: Implemented client support for semantic search and refactored related tests in `QueryTests.cs` to mock `IKnowledgeService`.

### Verification Status
- **Build**: Successfully compiles with `dotnet build NexusReader.slnx --no-restore` (0 errors).
- **Tests**: All 5 unit tests pass cleanly with `dotnet test NexusReader.slnx --no-restore`.

**Resolve** #47

---------

Co-authored-by: Marek Jasiński <jasins.marek@gmail.com>
Reviewed-on: #46
Reviewed-by: Marek Jaisński <jasins.marek@gmail.com>
Co-authored-by: Antigravity <antigravity@google.com>
Co-committed-by: Antigravity <antigravity@google.com>
This commit was merged in pull request #46.
This commit is contained in:
2026-05-20 18:15:28 +00:00
committed by Marek Jaisński
parent 711822f5de
commit 23acaeb705
15 changed files with 348 additions and 287 deletions
@@ -1,8 +1,6 @@
using Microsoft.AspNetCore.Identity.EntityFrameworkCore;
using Microsoft.EntityFrameworkCore;
using NexusReader.Domain.Entities;
using Pgvector;
namespace NexusReader.Data.Persistence;
@@ -52,59 +50,24 @@ public class AppDbContext : IdentityDbContext<NexusUser>
entity.HasIndex(p => p.PlanName).IsUnique();
});
if (Database.IsSqlite())
modelBuilder.Entity<SemanticKnowledgeCache>(entity =>
{
var vectorConverter = new Microsoft.EntityFrameworkCore.Storage.ValueConversion.ValueConverter<Vector, string>(
v => v != null ? string.Join(",", v.ToArray()) : string.Empty,
s => !string.IsNullOrEmpty(s) ? new Vector(s.Split(',').Select(float.Parse).ToArray()) : null!
);
modelBuilder.Entity<SemanticKnowledgeCache>(entity =>
{
entity.HasKey(e => e.ContentHash);
entity.HasIndex(e => e.ContentHash).IsUnique();
entity.HasIndex(e => e.TenantId);
entity.Property(e => e.Vector).HasConversion(vectorConverter);
});
entity.HasKey(e => e.ContentHash);
entity.HasIndex(e => e.ContentHash).IsUnique();
entity.HasIndex(e => e.TenantId);
});
modelBuilder.Entity<KnowledgeUnit>(entity =>
{
entity.HasKey(e => e.Id);
entity.HasIndex(e => e.TenantId);
entity.HasIndex(e => e.EbookId);
entity.Property(e => e.Vector).HasConversion(vectorConverter);
entity.HasOne(e => e.Ebook)
.WithMany()
.HasForeignKey(e => e.EbookId)
.OnDelete(DeleteBehavior.Cascade);
});
}
else
modelBuilder.Entity<KnowledgeUnit>(entity =>
{
modelBuilder.HasPostgresExtension("vector");
entity.HasKey(e => e.Id);
entity.HasIndex(e => e.TenantId);
entity.HasIndex(e => e.EbookId);
modelBuilder.Entity<SemanticKnowledgeCache>(entity =>
{
entity.HasKey(e => e.ContentHash);
entity.HasIndex(e => e.ContentHash).IsUnique();
entity.HasIndex(e => e.TenantId);
entity.Property(e => e.Vector).HasColumnType("vector(1536)");
});
modelBuilder.Entity<KnowledgeUnit>(entity =>
{
entity.HasKey(e => e.Id);
entity.HasIndex(e => e.TenantId);
entity.HasIndex(e => e.EbookId);
entity.Property(e => e.Vector).HasColumnType("vector(768)");
entity.HasOne(e => e.Ebook)
.WithMany()
.HasForeignKey(e => e.EbookId)
.OnDelete(DeleteBehavior.Cascade);
});
}
entity.HasOne(e => e.Ebook)
.WithMany()
.HasForeignKey(e => e.EbookId)
.OnDelete(DeleteBehavior.Cascade);
});
modelBuilder.Entity<KnowledgeUnitLink>(entity =>
{