feat(search/rag): implement NexusSearchBox, dynamic Qdrant collection auto-provisioning, batch vector ingestion, mobile Serilog logging, and resolve 401 auth handler error (#51)
Resolves #52 This Pull Request introduces the **NexusSearchBox** search feature with premium unified styling, implements a robust **dynamic Qdrant collection auto-provisioning and batch-vector ingestion pipeline**, integrates a unified **Serilog logging infrastructure** for the Blazor Hybrid environment (MAUI), and resolves the **401 Unauthorized API header propagation error** inside mobile builds. ### 🚀 Key Implementations #### 1. Premium `NexusSearchBox` & Semantic Search UI * **NexusSearchBox Component:** Created an elegant search-as-you-type search box with smooth key navigation, quick-clearing, and seamless dynamic styling. * **Unified Aesthetics:** Refactored the search box isolated styling to align perfectly with the dashboard's design system using glassmorphism, `--nexus-neon` token gradients, and smooth pulse/fade animations. * **Semantic Search Integration:** Integrated semantic search query dispatching (`SearchLibrarySemanticallyQuery`) and wired up navigation seamlessly through the updated `ReaderNavigationService`. * **Tests Hardening:** Added/adapted query assertions in `QueryTests.cs` to guarantee safe parameterization and error boundary mapping. #### 2. Qdrant Collection Provisioning & Vector Ingestion * **Dynamic Auto-Provisioning:** Implemented dynamic checking and lazy-creation of the `knowledge_units` collection using 768 dimensions and Cosine distance. * **High-Performance Ingestion:** Optimized `ProcessKnowledgeUnitsAsync` with high-performance batch embedding generation using `_embeddingGenerator` and deterministic MD5 GUIDs for stable, duplicate-free upsertion. * **Database Cache Clear Sync:** Integrated Qdrant collection deletion in `ClearCacheAsync` to ensure absolute consistency between the PostgreSQL database cache and vector database indices. #### 3. Cross-Platform MAUI Logging (Serilog Infrastructure) * **Serilog Integration:** Configured cross-platform Serilog routing in `SerilogConfiguration.cs`, streaming diagnostic logs safely across native platforms and the Blazor Webview container. * **Interop Bridge:** Built `BlazorLoggingBridge.cs` to capture web console messages and pipe them directly to the native host logger. * **Demo Interface:** Added an interactive `SerilogDemo.razor` sandbox under Pages. #### 4. Resolving 401 Load Errors (Authentication Handler Flow) * **Authentication Header Handler:** Implemented the `MobileAuthenticationHeaderHandler` to correctly extract, validate, and inject bearer JWT tokens into outbound API requests. * **Configuration-based API Host:** Structured standard API URI routing to use clean configuration bindings in `appsettings.json`. --- ### 🧪 Verification & Build Status * Run `dotnet build` from the solution root: Successfully compiled the full multi-targeted solution (`Liczba błędów: 0`). * All unit and integration tests successfully executed and verified (`dotnet test`). --------- Co-authored-by: Marek Jasiński <jasins.marek@gmail.com> Co-authored-by: Marek Jaisński <jasins.marek@gmail.com> Reviewed-on: #51 Co-authored-by: Antigravity <antigravity@google.com> Co-committed-by: Antigravity <antigravity@google.com>
This commit was merged in pull request #51.
This commit is contained in:
@@ -15,6 +15,7 @@ using Polly.Registry;
|
||||
using Microsoft.Extensions.Options;
|
||||
using NexusReader.Infrastructure.Configuration;
|
||||
using Qdrant.Client;
|
||||
using Qdrant.Client.Grpc;
|
||||
using Neo4j.Driver;
|
||||
|
||||
namespace NexusReader.Infrastructure.Services;
|
||||
@@ -32,8 +33,9 @@ public class KnowledgeService : IKnowledgeService
|
||||
private readonly ILogger<KnowledgeService> _logger;
|
||||
private readonly QdrantClient _qdrantClient;
|
||||
private readonly IDriver _neo4jDriver;
|
||||
private const string PromptVersion = "1.3";
|
||||
private const string PromptVersion = "1.7";
|
||||
private static readonly ConcurrentDictionary<string, Lazy<Task<Result<KnowledgePacket>>>> _activeRequests = new();
|
||||
private static readonly SemaphoreSlim _collectionSemaphore = new(1, 1);
|
||||
|
||||
public KnowledgeService(
|
||||
IChatClient chatClient,
|
||||
@@ -84,11 +86,12 @@ public class KnowledgeService : IKnowledgeService
|
||||
|
||||
using var dbContext = await _dbContextFactory.CreateDbContextAsync(cancellationToken);
|
||||
var normalizedText = text.Trim();
|
||||
var hash = ContentHasher.ComputeHash(normalizedText);
|
||||
var hashInput = $"{normalizedText}:{traceType}:{PromptVersion}";
|
||||
var hash = ContentHasher.ComputeHash(hashInput);
|
||||
|
||||
// 1. Check Cache
|
||||
var cached = await dbContext.SemanticKnowledgeCache
|
||||
.FirstOrDefaultAsync(c => c.ContentHash == hash && c.TenantId == tenantId, cancellationToken);
|
||||
.FirstOrDefaultAsync(c => c.ContentHash == hash, cancellationToken);
|
||||
|
||||
if (cached != null && cached.PromptVersion == PromptVersion)
|
||||
{
|
||||
@@ -96,7 +99,12 @@ public class KnowledgeService : IKnowledgeService
|
||||
try
|
||||
{
|
||||
var packet = JsonSerializer.Deserialize<KnowledgePacket>(cached.JsonData, JsonOptions);
|
||||
if (packet != null) return Result.Ok(packet);
|
||||
if (packet != null)
|
||||
{
|
||||
await ProcessKnowledgeUnitsAsync(packet, tenantId, ebookId, dbContext, cancellationToken);
|
||||
await dbContext.SaveChangesAsync(cancellationToken);
|
||||
return Result.Ok(packet);
|
||||
}
|
||||
}
|
||||
catch (JsonException ex)
|
||||
{
|
||||
@@ -105,7 +113,7 @@ public class KnowledgeService : IKnowledgeService
|
||||
}
|
||||
|
||||
// Deduplicate concurrent active requests for the exact same hash
|
||||
var requestKey = $"{tenantId}:{hash}:{traceType}";
|
||||
var requestKey = $"{hash}:{traceType}";
|
||||
|
||||
var lazyTask = _activeRequests.GetOrAdd(requestKey, k =>
|
||||
new Lazy<Task<Result<KnowledgePacket>>>(
|
||||
@@ -177,7 +185,7 @@ public class KnowledgeService : IKnowledgeService
|
||||
|
||||
// 4. Save to Cache
|
||||
var cached = await dbContext.SemanticKnowledgeCache
|
||||
.FirstOrDefaultAsync(c => c.ContentHash == hash && c.TenantId == tenantId);
|
||||
.FirstOrDefaultAsync(c => c.ContentHash == hash);
|
||||
|
||||
var cacheEntry = new SemanticKnowledgeCache
|
||||
{
|
||||
@@ -201,7 +209,14 @@ public class KnowledgeService : IKnowledgeService
|
||||
// 5. Process structured KnowledgeUnits (Graph Expansion)
|
||||
await ProcessKnowledgeUnitsAsync(knowledgePacket, tenantId, ebookId, dbContext, default);
|
||||
|
||||
await dbContext.SaveChangesAsync();
|
||||
try
|
||||
{
|
||||
await dbContext.SaveChangesAsync();
|
||||
}
|
||||
catch (DbUpdateException ex) when (ex.InnerException is Npgsql.PostgresException pgEx && pgEx.SqlState == "23505")
|
||||
{
|
||||
_logger.LogWarning("[KnowledgeService] Concurrency collision on SemanticKnowledgeCache for {Hash}; another process saved it first. Swallowing.", hash);
|
||||
}
|
||||
return Result.Ok(knowledgePacket);
|
||||
}
|
||||
catch (JsonException ex)
|
||||
@@ -224,6 +239,30 @@ public class KnowledgeService : IKnowledgeService
|
||||
|
||||
private async Task ProcessKnowledgeUnitsAsync(KnowledgePacket packet, string tenantId, Guid? ebookId, AppDbContext dbContext, CancellationToken cancellationToken)
|
||||
{
|
||||
if (packet.Graph != null && (packet.Units == null || !packet.Units.Any()))
|
||||
{
|
||||
var graphUnits = packet.Graph.Nodes.Select(node => new KnowledgeUnitDto(
|
||||
node.Id,
|
||||
node.Type ?? "concept",
|
||||
node.Description ?? node.Label,
|
||||
new Dictionary<string, object>
|
||||
{
|
||||
["label"] = node.Label,
|
||||
["group"] = node.Group,
|
||||
["summary"] = node.Summary ?? "",
|
||||
["key_terms"] = node.KeyTerms ?? new List<string>()
|
||||
}
|
||||
)).ToList();
|
||||
|
||||
var graphLinks = packet.Graph.Links.Select(link => new KnowledgeLinkDto(
|
||||
link.Source,
|
||||
link.Target,
|
||||
link.RelationType
|
||||
)).ToList();
|
||||
|
||||
packet = packet with { Units = graphUnits, Links = graphLinks };
|
||||
}
|
||||
|
||||
var unitIds = packet.Units.Select(u => u.Id).ToList();
|
||||
var linkSourceIds = packet.Links.Select(l => l.Source).ToList();
|
||||
var linkTargetIds = packet.Links.Select(l => l.Target).ToList();
|
||||
@@ -285,6 +324,192 @@ public class KnowledgeService : IKnowledgeService
|
||||
_logger.LogWarning("[KnowledgeService] Skipping invalid link {Source} -> {Target}: one or both units are missing.", linkDto.Source, linkDto.Target);
|
||||
}
|
||||
}
|
||||
|
||||
// Generate and upsert vectors to Qdrant in batch
|
||||
var unitsToEmbed = packet.Units
|
||||
.Where(u => !string.IsNullOrEmpty(u.Content))
|
||||
.ToList();
|
||||
|
||||
if (unitsToEmbed.Any())
|
||||
{
|
||||
try
|
||||
{
|
||||
var contents = unitsToEmbed.Select(u => u.Content).ToList();
|
||||
|
||||
var embeddingResponse = await _retryPipeline.ExecuteAsync(async ct =>
|
||||
await _embeddingGenerator.GenerateAsync(
|
||||
contents,
|
||||
new EmbeddingGenerationOptions { Dimensions = 768 },
|
||||
cancellationToken: ct), cancellationToken);
|
||||
|
||||
var embeddings = embeddingResponse.ToList();
|
||||
var points = new List<PointStruct>();
|
||||
|
||||
for (int i = 0; i < unitsToEmbed.Count; i++)
|
||||
{
|
||||
var unitDto = unitsToEmbed[i];
|
||||
var vector = embeddings[i].Vector.ToArray();
|
||||
|
||||
var point = new PointStruct
|
||||
{
|
||||
Id = GetDeterministicGuid(unitDto.Id),
|
||||
Vectors = vector,
|
||||
Payload =
|
||||
{
|
||||
["content"] = unitDto.Content,
|
||||
["type"] = unitDto.Type ?? string.Empty,
|
||||
["tenantId"] = tenantId,
|
||||
["ebookId"] = ebookId?.ToString() ?? string.Empty,
|
||||
["metadataJson"] = JsonSerializer.Serialize(unitDto.Metadata)
|
||||
}
|
||||
};
|
||||
points.Add(point);
|
||||
}
|
||||
|
||||
if (points.Any())
|
||||
{
|
||||
await EnsureCollectionExistsAsync("knowledge_units", cancellationToken);
|
||||
await _qdrantClient.UpsertAsync("knowledge_units", points, cancellationToken: cancellationToken);
|
||||
_logger.LogInformation("[KnowledgeService] Successfully upserted {Count} points to Qdrant collection 'knowledge_units'.", points.Count);
|
||||
}
|
||||
}
|
||||
catch (Exception ex)
|
||||
{
|
||||
_logger.LogError(ex, "[KnowledgeService] Failed to generate and upsert embeddings for knowledge units to Qdrant.");
|
||||
}
|
||||
}
|
||||
|
||||
// 6. Synchronize to Neo4j graph database
|
||||
await SyncToNeo4jAsync(packet, cancellationToken);
|
||||
}
|
||||
|
||||
private async Task SyncToNeo4jAsync(KnowledgePacket packet, CancellationToken cancellationToken)
|
||||
{
|
||||
if (packet.Units == null || !packet.Units.Any()) return;
|
||||
|
||||
try
|
||||
{
|
||||
await using var session = _neo4jDriver.AsyncSession();
|
||||
|
||||
// 1. Merge nodes in a transaction
|
||||
await session.ExecuteWriteAsync(async tx =>
|
||||
{
|
||||
foreach (var unit in packet.Units)
|
||||
{
|
||||
var cypher = @"
|
||||
MERGE (u:KnowledgeUnit {id: $id})
|
||||
ON CREATE SET u.content = $content, u.type = $type
|
||||
ON MATCH SET u.content = $content, u.type = $type";
|
||||
|
||||
var guidStr = GetDeterministicGuid(unit.Id).ToString();
|
||||
await tx.RunAsync(cypher, new
|
||||
{
|
||||
id = guidStr,
|
||||
content = unit.Content ?? string.Empty,
|
||||
type = unit.Type ?? "concept"
|
||||
});
|
||||
}
|
||||
});
|
||||
|
||||
// 2. Merge links in a transaction
|
||||
if (packet.Links != null && packet.Links.Any())
|
||||
{
|
||||
await session.ExecuteWriteAsync(async tx =>
|
||||
{
|
||||
foreach (var link in packet.Links)
|
||||
{
|
||||
if (string.IsNullOrWhiteSpace(link.Source) || string.IsNullOrWhiteSpace(link.Target))
|
||||
continue;
|
||||
|
||||
var relationType = string.IsNullOrWhiteSpace(link.Relation) ? "RELATED_TO" : link.Relation.Trim().ToUpperInvariant();
|
||||
relationType = System.Text.RegularExpressions.Regex.Replace(relationType, @"[^A-Z0-9_]", "_");
|
||||
if (string.IsNullOrEmpty(relationType) || relationType == "_")
|
||||
{
|
||||
relationType = "RELATED_TO";
|
||||
}
|
||||
|
||||
var cypher = $@"
|
||||
MATCH (source:KnowledgeUnit {{id: $sourceId}})
|
||||
MATCH (target:KnowledgeUnit {{id: $targetId}})
|
||||
MERGE (source)-[r:{relationType}]->(target)";
|
||||
|
||||
var sourceGuidStr = GetDeterministicGuid(link.Source).ToString();
|
||||
var targetGuidStr = GetDeterministicGuid(link.Target).ToString();
|
||||
|
||||
await tx.RunAsync(cypher, new
|
||||
{
|
||||
sourceId = sourceGuidStr,
|
||||
targetId = targetGuidStr
|
||||
});
|
||||
}
|
||||
});
|
||||
}
|
||||
|
||||
_logger.LogInformation("[KnowledgeService] Successfully synchronized {NodeCount} nodes and {LinkCount} links to Neo4j.", packet.Units.Count, packet.Links?.Count ?? 0);
|
||||
}
|
||||
catch (Exception ex)
|
||||
{
|
||||
_logger.LogError(ex, "[KnowledgeService] Failed to synchronize knowledge graph to Neo4j.");
|
||||
}
|
||||
}
|
||||
|
||||
private async Task EnsureCollectionExistsAsync(string collectionName, CancellationToken cancellationToken = default)
|
||||
{
|
||||
await _collectionSemaphore.WaitAsync(cancellationToken);
|
||||
try
|
||||
{
|
||||
var exists = await _qdrantClient.CollectionExistsAsync(collectionName, cancellationToken);
|
||||
if (!exists)
|
||||
{
|
||||
_logger.LogInformation("[KnowledgeService] Creating Qdrant collection '{CollectionName}'...", collectionName);
|
||||
await _qdrantClient.CreateCollectionAsync(
|
||||
collectionName: collectionName,
|
||||
vectorsConfig: new VectorParams
|
||||
{
|
||||
Size = 768,
|
||||
Distance = Distance.Cosine
|
||||
},
|
||||
cancellationToken: cancellationToken
|
||||
);
|
||||
_logger.LogInformation("[KnowledgeService] Qdrant collection '{CollectionName}' created successfully.", collectionName);
|
||||
}
|
||||
}
|
||||
catch (Exception ex)
|
||||
{
|
||||
if (ex.Message.Contains("already exists", StringComparison.OrdinalIgnoreCase) ||
|
||||
(ex.InnerException != null && ex.InnerException.Message.Contains("already exists", StringComparison.OrdinalIgnoreCase)))
|
||||
{
|
||||
_logger.LogInformation("[KnowledgeService] Qdrant collection '{CollectionName}' was already created by another thread.", collectionName);
|
||||
}
|
||||
else
|
||||
{
|
||||
_logger.LogError(ex, "[KnowledgeService] Error ensuring Qdrant collection '{CollectionName}' exists.", collectionName);
|
||||
}
|
||||
}
|
||||
finally
|
||||
{
|
||||
_collectionSemaphore.Release();
|
||||
}
|
||||
}
|
||||
|
||||
private static Guid GetDeterministicGuid(string input)
|
||||
{
|
||||
if (Guid.TryParse(input, out var guid))
|
||||
{
|
||||
return guid;
|
||||
}
|
||||
|
||||
using var md5 = System.Security.Cryptography.MD5.Create();
|
||||
byte[] hash = md5.ComputeHash(System.Text.Encoding.UTF8.GetBytes(input));
|
||||
return new Guid(hash);
|
||||
}
|
||||
|
||||
private static string GetPointIdString(PointId pointId)
|
||||
{
|
||||
if (pointId == null) return string.Empty;
|
||||
return pointId.PointIdOptionsCase == PointId.PointIdOptionsOneofCase.Uuid
|
||||
? pointId.Uuid
|
||||
: pointId.Num.ToString();
|
||||
}
|
||||
|
||||
public async Task<Result<GroundednessResult>> VerifyGroundednessAsync(string answer, string context, string tenantId, CancellationToken cancellationToken = default)
|
||||
@@ -354,6 +579,7 @@ public class KnowledgeService : IKnowledgeService
|
||||
List<Qdrant.Client.Grpc.ScoredPoint> searchResult;
|
||||
try
|
||||
{
|
||||
await EnsureCollectionExistsAsync("knowledge_units", cancellationToken);
|
||||
var response = await _qdrantClient.SearchAsync(
|
||||
collectionName: "knowledge_units",
|
||||
vector: queryVector,
|
||||
@@ -363,15 +589,37 @@ public class KnowledgeService : IKnowledgeService
|
||||
);
|
||||
searchResult = response.ToList();
|
||||
}
|
||||
catch (Exception)
|
||||
catch (Exception ex)
|
||||
{
|
||||
_logger.LogWarning(ex, "[KnowledgeService] Qdrant search failed during GetRelevantContextAsync. Returning empty search results.");
|
||||
searchResult = new List<Qdrant.Client.Grpc.ScoredPoint>();
|
||||
}
|
||||
|
||||
var contexts = searchResult.Select(point => new RelevantContext
|
||||
var contexts = searchResult.Select(point =>
|
||||
{
|
||||
Text = point.Payload.TryGetValue("content", out var cv) ? cv.StringValue : string.Empty,
|
||||
Confidence = point.Score
|
||||
var content = point.Payload.TryGetValue("content", out var cv) ? cv.StringValue : string.Empty;
|
||||
var summary = string.Empty;
|
||||
if (point.Payload.TryGetValue("metadataJson", out var metaVal) && !string.IsNullOrEmpty(metaVal.StringValue))
|
||||
{
|
||||
try
|
||||
{
|
||||
var meta = JsonSerializer.Deserialize<Dictionary<string, object>>(metaVal.StringValue);
|
||||
if (meta != null && meta.TryGetValue("summary", out var sumObj))
|
||||
{
|
||||
summary = sumObj?.ToString();
|
||||
}
|
||||
}
|
||||
catch (JsonException ex)
|
||||
{
|
||||
_logger.LogWarning(ex, "[KnowledgeService] Failed to deserialize metadata JSON in RelevantContext mapping.");
|
||||
}
|
||||
}
|
||||
var text = string.IsNullOrEmpty(summary) ? content : $"{content}: {summary}";
|
||||
return new RelevantContext
|
||||
{
|
||||
Text = text,
|
||||
Confidence = point.Score
|
||||
};
|
||||
}).ToList();
|
||||
|
||||
return Result.Ok(contexts);
|
||||
@@ -417,6 +665,7 @@ public class KnowledgeService : IKnowledgeService
|
||||
List<Qdrant.Client.Grpc.ScoredPoint> searchResult;
|
||||
try
|
||||
{
|
||||
await EnsureCollectionExistsAsync("knowledge_units", cancellationToken);
|
||||
var response = await _qdrantClient.SearchAsync(
|
||||
collectionName: "knowledge_units",
|
||||
vector: queryVector,
|
||||
@@ -438,7 +687,7 @@ public class KnowledgeService : IKnowledgeService
|
||||
}
|
||||
|
||||
// 3. Graph Expansion via Neo4j
|
||||
var candidateIds = searchResult.Select(r => r.Id.ToString()).ToList();
|
||||
var candidateIds = searchResult.Select(r => GetPointIdString(r.Id)).ToList();
|
||||
var definitions = new Dictionary<string, List<string>>();
|
||||
|
||||
if (candidateIds.Any())
|
||||
@@ -447,7 +696,7 @@ public class KnowledgeService : IKnowledgeService
|
||||
{
|
||||
await using var session = _neo4jDriver.AsyncSession();
|
||||
var cypher = @"
|
||||
MATCH (source:KnowledgeUnit)-[r:DEFINES]->(target:KnowledgeUnit)
|
||||
MATCH (source:KnowledgeUnit)-[r]->(target:KnowledgeUnit)
|
||||
WHERE source.id IN $candidateIds
|
||||
RETURN source.id AS sourceId, target.content AS targetContent";
|
||||
|
||||
@@ -516,12 +765,15 @@ public class KnowledgeService : IKnowledgeService
|
||||
{
|
||||
metadata = JsonSerializer.Deserialize<Dictionary<string, object>>(metaVal.StringValue);
|
||||
}
|
||||
catch {}
|
||||
catch (JsonException ex)
|
||||
{
|
||||
_logger.LogWarning(ex, "[KnowledgeService] Failed to deserialize metadata JSON in search library mapping.");
|
||||
}
|
||||
}
|
||||
|
||||
var dto = new SemanticSearchResultDto
|
||||
{
|
||||
ContentHash = point.Id.ToString(),
|
||||
ContentHash = GetPointIdString(point.Id),
|
||||
Snippet = content,
|
||||
UnitType = type,
|
||||
RelevanceScore = point.Score,
|
||||
@@ -529,7 +781,7 @@ public class KnowledgeService : IKnowledgeService
|
||||
Metadata = metadata
|
||||
};
|
||||
|
||||
var pointIdStr = point.Id.ToString();
|
||||
var pointIdStr = GetPointIdString(point.Id);
|
||||
if (definitions.TryGetValue(pointIdStr, out var pointDefs) && pointDefs.Any())
|
||||
{
|
||||
dto.Snippet = $"[Context: {string.Join("; ", pointDefs)}]\n{dto.Snippet}";
|
||||
@@ -602,6 +854,7 @@ public class KnowledgeService : IKnowledgeService
|
||||
List<Qdrant.Client.Grpc.ScoredPoint> searchResult;
|
||||
try
|
||||
{
|
||||
await EnsureCollectionExistsAsync("knowledge_units", cancellationToken);
|
||||
var response = await _qdrantClient.SearchAsync(
|
||||
collectionName: "knowledge_units",
|
||||
vector: queryVector,
|
||||
@@ -627,11 +880,28 @@ public class KnowledgeService : IKnowledgeService
|
||||
}
|
||||
|
||||
// 3. Graph Expansion via Neo4j
|
||||
var candidateIds = searchResult.Select(r => r.Id.ToString()).ToList();
|
||||
var candidateIds = searchResult.Select(r => GetPointIdString(r.Id)).ToList();
|
||||
var relatedContexts = new List<string>();
|
||||
|
||||
// Keep map of point ID -> payload data for fast mapping later
|
||||
var pointMap = searchResult.ToDictionary(r => r.Id.ToString(), r => r);
|
||||
var pointMap = searchResult.ToDictionary(r => GetPointIdString(r.Id), r => r);
|
||||
|
||||
// Fetch knowledge units from PostgreSQL to map Guids back to rich metadata summaries
|
||||
var guidMap = new Dictionary<string, KnowledgeUnit>();
|
||||
try
|
||||
{
|
||||
using var dbContext = await _dbContextFactory.CreateDbContextAsync(cancellationToken);
|
||||
var units = await dbContext.KnowledgeUnits
|
||||
.Include(u => u.Ebook)
|
||||
.ThenInclude(e => e.Author)
|
||||
.Where(u => u.TenantId == tenantId && (ebookId == null || u.EbookId == ebookId))
|
||||
.ToListAsync(cancellationToken);
|
||||
guidMap = units.ToDictionary(u => GetDeterministicGuid(u.Id).ToString(), u => u);
|
||||
}
|
||||
catch (Exception ex)
|
||||
{
|
||||
_logger.LogWarning(ex, "[KnowledgeService] Failed to load KnowledgeUnits from PostgreSQL for Guid mapping.");
|
||||
}
|
||||
|
||||
if (candidateIds.Any())
|
||||
{
|
||||
@@ -641,7 +911,7 @@ public class KnowledgeService : IKnowledgeService
|
||||
var cypher = @"
|
||||
MATCH (source:KnowledgeUnit)
|
||||
WHERE source.id IN $candidateIds
|
||||
OPTIONAL MATCH (source)-[r:DEFINES|RELATED_TO]->(target:KnowledgeUnit)
|
||||
OPTIONAL MATCH (source)-[r]->(target:KnowledgeUnit)
|
||||
RETURN source.id AS sourceId, source.content AS sourceContent,
|
||||
collect({ targetId: target.id, targetContent: target.content, relation: type(r) }) AS relations";
|
||||
|
||||
@@ -654,23 +924,70 @@ public class KnowledgeService : IKnowledgeService
|
||||
foreach (var record in neoResult)
|
||||
{
|
||||
var sourceId = record["sourceId"].As<string>();
|
||||
var sourceContent = record["sourceContent"].As<string>();
|
||||
|
||||
relatedContexts.Add($"[Source ID: {sourceId}] {sourceContent}");
|
||||
var sourceText = string.Empty;
|
||||
if (guidMap.TryGetValue(sourceId, out var sourceUnit))
|
||||
{
|
||||
var summary = string.Empty;
|
||||
if (!string.IsNullOrEmpty(sourceUnit.MetadataJson))
|
||||
{
|
||||
try
|
||||
{
|
||||
var meta = JsonSerializer.Deserialize<Dictionary<string, object>>(sourceUnit.MetadataJson);
|
||||
if (meta != null && meta.TryGetValue("summary", out var sumObj))
|
||||
{
|
||||
summary = sumObj?.ToString();
|
||||
}
|
||||
}
|
||||
catch (JsonException jsonEx)
|
||||
{
|
||||
_logger.LogWarning(jsonEx, "[KnowledgeService] Failed to deserialize metadata JSON for unit {UnitId} in AskQuestionAsync source hydration.", sourceUnit.Id);
|
||||
}
|
||||
}
|
||||
sourceText = string.IsNullOrEmpty(summary) ? sourceUnit.Content : $"{sourceUnit.Content}: {summary}";
|
||||
}
|
||||
else
|
||||
{
|
||||
sourceText = record["sourceContent"].As<string>();
|
||||
}
|
||||
|
||||
relatedContexts.Add($"[Source ID: {sourceId}] {sourceText}");
|
||||
|
||||
var relations = record["relations"].As<List<object>>();
|
||||
if (relations != null)
|
||||
{
|
||||
foreach (var relObj in relations)
|
||||
{
|
||||
if (relObj is Dictionary<string, object> relDict &&
|
||||
relDict.TryGetValue("targetId", out var targetIdVal) && targetIdVal is string targetId &&
|
||||
relDict.TryGetValue("targetContent", out var targetContentVal) && targetContentVal is string targetContent &&
|
||||
relDict.TryGetValue("relation", out var relationVal) && relationVal is string relation)
|
||||
if (relObj is System.Collections.IDictionary relDict)
|
||||
{
|
||||
if (!string.IsNullOrEmpty(targetContent))
|
||||
var targetId = relDict["targetId"]?.ToString();
|
||||
var targetContent = relDict["targetContent"]?.ToString();
|
||||
var relation = relDict["relation"]?.ToString();
|
||||
|
||||
if (!string.IsNullOrEmpty(targetContent) && !string.IsNullOrEmpty(relation))
|
||||
{
|
||||
relatedContexts.Add($"[Related Context ({relation}) to {sourceId}] {targetContent}");
|
||||
var targetText = targetContent;
|
||||
if (!string.IsNullOrEmpty(targetId) && guidMap.TryGetValue(targetId, out var targetUnit))
|
||||
{
|
||||
var summary = string.Empty;
|
||||
if (!string.IsNullOrEmpty(targetUnit.MetadataJson))
|
||||
{
|
||||
try
|
||||
{
|
||||
var meta = JsonSerializer.Deserialize<Dictionary<string, object>>(targetUnit.MetadataJson);
|
||||
if (meta != null && meta.TryGetValue("summary", out var sumObj))
|
||||
{
|
||||
summary = sumObj?.ToString();
|
||||
}
|
||||
}
|
||||
catch (JsonException jsonEx)
|
||||
{
|
||||
_logger.LogWarning(jsonEx, "[KnowledgeService] Failed to deserialize metadata JSON for unit {UnitId} in AskQuestionAsync target hydration.", targetUnit.Id);
|
||||
}
|
||||
}
|
||||
targetText = string.IsNullOrEmpty(summary) ? targetUnit.Content : $"{targetUnit.Content}: {summary}";
|
||||
}
|
||||
relatedContexts.Add($"[Related Context ({relation}) to {sourceId}] {targetText}");
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -682,9 +999,35 @@ public class KnowledgeService : IKnowledgeService
|
||||
_logger.LogWarning(ex, "[KnowledgeService] Neo4j graph expansion failed. Falling back to direct Qdrant points.");
|
||||
foreach (var point in searchResult)
|
||||
{
|
||||
var sourceId = point.Id.ToString();
|
||||
var content = point.Payload.TryGetValue("content", out var cv) ? cv.StringValue : string.Empty;
|
||||
relatedContexts.Add($"[Source ID: {sourceId}] {content}");
|
||||
var sourceId = GetPointIdString(point.Id);
|
||||
|
||||
var sourceText = string.Empty;
|
||||
if (guidMap.TryGetValue(sourceId, out var sourceUnit))
|
||||
{
|
||||
var summary = string.Empty;
|
||||
if (!string.IsNullOrEmpty(sourceUnit.MetadataJson))
|
||||
{
|
||||
try
|
||||
{
|
||||
var meta = JsonSerializer.Deserialize<Dictionary<string, object>>(sourceUnit.MetadataJson);
|
||||
if (meta != null && meta.TryGetValue("summary", out var sumObj))
|
||||
{
|
||||
summary = sumObj?.ToString();
|
||||
}
|
||||
}
|
||||
catch (JsonException jsonEx)
|
||||
{
|
||||
_logger.LogWarning(jsonEx, "[KnowledgeService] Failed to deserialize metadata JSON for unit {UnitId} in fallback AskQuestionAsync.", sourceUnit.Id);
|
||||
}
|
||||
}
|
||||
sourceText = string.IsNullOrEmpty(summary) ? sourceUnit.Content : $"{sourceUnit.Content}: {summary}";
|
||||
}
|
||||
else
|
||||
{
|
||||
sourceText = point.Payload.TryGetValue("content", out var cv) ? cv.StringValue : string.Empty;
|
||||
}
|
||||
|
||||
relatedContexts.Add($"[Source ID: {sourceId}] {sourceText}");
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -708,33 +1051,14 @@ public class KnowledgeService : IKnowledgeService
|
||||
// 5. Build prompt and invoke Gemini with structured JSON formatting
|
||||
var contextBlocksText = string.Join("\n\n", relatedContexts);
|
||||
|
||||
var systemPrompt = @"
|
||||
You are an advanced, extremely precise Fact-Checking AI assistant. Your task is to answer the user's question using ONLY the provided context blocks.
|
||||
|
||||
Strict Grounding Rules:
|
||||
1. Rely EXCLUSIVELY on the provided context. Do NOT use any pre-existing external knowledge, facts, or assumptions.
|
||||
2. If the context does not contain the answer, you must state exactly: 'I cannot answer this based on the provided book context.'
|
||||
3. For every statement or claim you make in your answer, you must cite the specific source IDs (e.g., source chunk ID or hash) from the context.
|
||||
4. You must format your response ONLY as a JSON object matching the following structure:
|
||||
{
|
||||
""answer"": ""The answer text goes here, referencing [Source ID] as citations."",
|
||||
""citations"": [
|
||||
{
|
||||
""citationId"": ""The exact source ID cited (e.g., chunk hash/ID)"",
|
||||
""snippet"": ""The precise sentence or phrase from the context that supports this statement."",
|
||||
""sourceBook"": ""The book title or 'Unknown'""
|
||||
}
|
||||
]
|
||||
}
|
||||
";
|
||||
var systemPrompt = PromptRegistry.GroundedRAGSystemPrompt;
|
||||
|
||||
var userPrompt = $"Context:\n{contextBlocksText}\n\nQuestion: {question}";
|
||||
|
||||
var options = new ChatOptions
|
||||
{
|
||||
Temperature = 0.0f,
|
||||
MaxOutputTokens = 1500,
|
||||
ResponseFormat = ChatResponseFormat.Json
|
||||
MaxOutputTokens = 1500
|
||||
};
|
||||
|
||||
var chatResponse = await _retryPipeline.ExecuteAsync(async ct =>
|
||||
@@ -746,6 +1070,20 @@ Strict Grounding Rules:
|
||||
|
||||
var rawJson = chatResponse.Text?.Trim() ?? string.Empty;
|
||||
rawJson = rawJson.Replace("```json", "").Replace("```", "").Trim();
|
||||
|
||||
// Handle direct text fallback when model bypasses JSON format
|
||||
if (!rawJson.StartsWith("{") &&
|
||||
(rawJson.Contains("cannot answer", StringComparison.OrdinalIgnoreCase) ||
|
||||
rawJson.Contains("context does not contain", StringComparison.OrdinalIgnoreCase) ||
|
||||
rawJson.Contains("provided book context", StringComparison.OrdinalIgnoreCase)))
|
||||
{
|
||||
return Result.Ok(new GroundedResponseDto
|
||||
{
|
||||
Answer = "I cannot answer this based on the provided book context.",
|
||||
Citations = new List<CitationDto>()
|
||||
});
|
||||
}
|
||||
|
||||
rawJson = JsonRepairHelper.Repair(rawJson);
|
||||
|
||||
try
|
||||
@@ -756,15 +1094,42 @@ Strict Grounding Rules:
|
||||
return Result.Fail("Failed to deserialize grounded RAG response.");
|
||||
}
|
||||
|
||||
// Hydrate book titles for citations if unknown
|
||||
// Hydrate book titles, author, and page number for citations if unknown
|
||||
foreach (var citation in groundedResult.Citations)
|
||||
{
|
||||
if (pointMap.TryGetValue(citation.CitationId, out var point) &&
|
||||
point.Payload.TryGetValue("ebookId", out var ev) &&
|
||||
Guid.TryParse(ev.StringValue, out var ebId) &&
|
||||
ebookTitles.TryGetValue(ebId, out var title))
|
||||
Guid.TryParse(ev.StringValue, out var ebId))
|
||||
{
|
||||
citation.SourceBook = title;
|
||||
if (ebookTitles.TryGetValue(ebId, out var title))
|
||||
{
|
||||
citation.SourceBook = title;
|
||||
}
|
||||
}
|
||||
|
||||
// Look up from guidMap to get exact page number and author
|
||||
if (guidMap.TryGetValue(citation.CitationId, out var unit))
|
||||
{
|
||||
if (unit.Ebook?.Author != null)
|
||||
{
|
||||
citation.Author = unit.Ebook.Author.Name;
|
||||
}
|
||||
|
||||
if (!string.IsNullOrEmpty(unit.MetadataJson))
|
||||
{
|
||||
try
|
||||
{
|
||||
var meta = JsonSerializer.Deserialize<Dictionary<string, object>>(unit.MetadataJson);
|
||||
if (meta != null && meta.TryGetValue("page", out var pageObj) && int.TryParse(pageObj?.ToString(), out var pageVal))
|
||||
{
|
||||
citation.PageNumber = pageVal;
|
||||
}
|
||||
}
|
||||
catch (JsonException ex)
|
||||
{
|
||||
_logger.LogWarning(ex, "[KnowledgeService] Failed to deserialize metadata JSON for unit {UnitId} in AskQuestionAsync citation mapping.", unit.Id);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@@ -790,6 +1155,30 @@ Strict Grounding Rules:
|
||||
await dbContext.SemanticKnowledgeCache.ExecuteDeleteAsync(cancellationToken);
|
||||
await dbContext.KnowledgeUnits.ExecuteDeleteAsync(cancellationToken);
|
||||
await dbContext.KnowledgeUnitLinks.ExecuteDeleteAsync(cancellationToken);
|
||||
|
||||
try
|
||||
{
|
||||
await _qdrantClient.DeleteCollectionAsync("knowledge_units", cancellationToken: cancellationToken);
|
||||
}
|
||||
catch (Exception ex)
|
||||
{
|
||||
_logger.LogWarning(ex, "[KnowledgeService] Failed to drop Qdrant collection 'knowledge_units' during cache clear.");
|
||||
}
|
||||
|
||||
try
|
||||
{
|
||||
await using var session = _neo4jDriver.AsyncSession();
|
||||
await session.ExecuteWriteAsync(async tx =>
|
||||
{
|
||||
await tx.RunAsync("MATCH (n:KnowledgeUnit) DETACH DELETE n");
|
||||
});
|
||||
_logger.LogInformation("[KnowledgeService] Successfully wiped Neo4j 'KnowledgeUnit' nodes.");
|
||||
}
|
||||
catch (Exception ex)
|
||||
{
|
||||
_logger.LogWarning(ex, "[KnowledgeService] Failed to wipe Neo4j graph during cache clear.");
|
||||
}
|
||||
|
||||
return Result.Ok();
|
||||
}
|
||||
catch (Exception ex)
|
||||
|
||||
Reference in New Issue
Block a user