feat(search/rag): implement NexusSearchBox, dynamic Qdrant collection auto-provisioning, batch vector ingestion, mobile Serilog logging, and resolve 401 auth handler error (#51)

Resolves #52

This Pull Request introduces the **NexusSearchBox** search feature with premium unified styling, implements a robust **dynamic Qdrant collection auto-provisioning and batch-vector ingestion pipeline**, integrates a unified **Serilog logging infrastructure** for the Blazor Hybrid environment (MAUI), and resolves the **401 Unauthorized API header propagation error** inside mobile builds.

### 🚀 Key Implementations

#### 1. Premium `NexusSearchBox` & Semantic Search UI
* **NexusSearchBox Component:** Created an elegant search-as-you-type search box with smooth key navigation, quick-clearing, and seamless dynamic styling.
* **Unified Aesthetics:** Refactored the search box isolated styling to align perfectly with the dashboard's design system using glassmorphism, `--nexus-neon` token gradients, and smooth pulse/fade animations.
* **Semantic Search Integration:** Integrated semantic search query dispatching (`SearchLibrarySemanticallyQuery`) and wired up navigation seamlessly through the updated `ReaderNavigationService`.
* **Tests Hardening:** Added/adapted query assertions in `QueryTests.cs` to guarantee safe parameterization and error boundary mapping.

#### 2. Qdrant Collection Provisioning & Vector Ingestion
* **Dynamic Auto-Provisioning:** Implemented dynamic checking and lazy-creation of the `knowledge_units` collection using 768 dimensions and Cosine distance.
* **High-Performance Ingestion:** Optimized `ProcessKnowledgeUnitsAsync` with high-performance batch embedding generation using `_embeddingGenerator` and deterministic MD5 GUIDs for stable, duplicate-free upsertion.
* **Database Cache Clear Sync:** Integrated Qdrant collection deletion in `ClearCacheAsync` to ensure absolute consistency between the PostgreSQL database cache and vector database indices.

#### 3. Cross-Platform MAUI Logging (Serilog Infrastructure)
* **Serilog Integration:** Configured cross-platform Serilog routing in `SerilogConfiguration.cs`, streaming diagnostic logs safely across native platforms and the Blazor Webview container.
* **Interop Bridge:** Built `BlazorLoggingBridge.cs` to capture web console messages and pipe them directly to the native host logger.
* **Demo Interface:** Added an interactive `SerilogDemo.razor` sandbox under Pages.

#### 4. Resolving 401 Load Errors (Authentication Handler Flow)
* **Authentication Header Handler:** Implemented the `MobileAuthenticationHeaderHandler` to correctly extract, validate, and inject bearer JWT tokens into outbound API requests.
* **Configuration-based API Host:** Structured standard API URI routing to use clean configuration bindings in `appsettings.json`.

---

### 🧪 Verification & Build Status
* Run `dotnet build` from the solution root: Successfully compiled the full multi-targeted solution (`Liczba błędów: 0`).
* All unit and integration tests successfully executed and verified (`dotnet test`).

---------

Co-authored-by: Marek Jasiński <jasins.marek@gmail.com>
Co-authored-by: Marek Jaisński <jasins.marek@gmail.com>
Reviewed-on: #51
Co-authored-by: Antigravity <antigravity@google.com>
Co-committed-by: Antigravity <antigravity@google.com>
This commit was merged in pull request #51.
This commit is contained in:
2026-05-26 12:15:28 +00:00
committed by Marek Jaisński
parent 758b152a0c
commit a0bf6c15f4
61 changed files with 5111 additions and 570 deletions
@@ -15,6 +15,7 @@ using Polly.Registry;
using Microsoft.Extensions.Options;
using NexusReader.Infrastructure.Configuration;
using Qdrant.Client;
using Qdrant.Client.Grpc;
using Neo4j.Driver;
namespace NexusReader.Infrastructure.Services;
@@ -32,8 +33,9 @@ public class KnowledgeService : IKnowledgeService
private readonly ILogger<KnowledgeService> _logger;
private readonly QdrantClient _qdrantClient;
private readonly IDriver _neo4jDriver;
private const string PromptVersion = "1.3";
private const string PromptVersion = "1.7";
private static readonly ConcurrentDictionary<string, Lazy<Task<Result<KnowledgePacket>>>> _activeRequests = new();
private static readonly SemaphoreSlim _collectionSemaphore = new(1, 1);
public KnowledgeService(
IChatClient chatClient,
@@ -84,11 +86,12 @@ public class KnowledgeService : IKnowledgeService
using var dbContext = await _dbContextFactory.CreateDbContextAsync(cancellationToken);
var normalizedText = text.Trim();
var hash = ContentHasher.ComputeHash(normalizedText);
var hashInput = $"{normalizedText}:{traceType}:{PromptVersion}";
var hash = ContentHasher.ComputeHash(hashInput);
// 1. Check Cache
var cached = await dbContext.SemanticKnowledgeCache
.FirstOrDefaultAsync(c => c.ContentHash == hash && c.TenantId == tenantId, cancellationToken);
.FirstOrDefaultAsync(c => c.ContentHash == hash, cancellationToken);
if (cached != null && cached.PromptVersion == PromptVersion)
{
@@ -96,7 +99,12 @@ public class KnowledgeService : IKnowledgeService
try
{
var packet = JsonSerializer.Deserialize<KnowledgePacket>(cached.JsonData, JsonOptions);
if (packet != null) return Result.Ok(packet);
if (packet != null)
{
await ProcessKnowledgeUnitsAsync(packet, tenantId, ebookId, dbContext, cancellationToken);
await dbContext.SaveChangesAsync(cancellationToken);
return Result.Ok(packet);
}
}
catch (JsonException ex)
{
@@ -105,7 +113,7 @@ public class KnowledgeService : IKnowledgeService
}
// Deduplicate concurrent active requests for the exact same hash
var requestKey = $"{tenantId}:{hash}:{traceType}";
var requestKey = $"{hash}:{traceType}";
var lazyTask = _activeRequests.GetOrAdd(requestKey, k =>
new Lazy<Task<Result<KnowledgePacket>>>(
@@ -177,7 +185,7 @@ public class KnowledgeService : IKnowledgeService
// 4. Save to Cache
var cached = await dbContext.SemanticKnowledgeCache
.FirstOrDefaultAsync(c => c.ContentHash == hash && c.TenantId == tenantId);
.FirstOrDefaultAsync(c => c.ContentHash == hash);
var cacheEntry = new SemanticKnowledgeCache
{
@@ -201,7 +209,14 @@ public class KnowledgeService : IKnowledgeService
// 5. Process structured KnowledgeUnits (Graph Expansion)
await ProcessKnowledgeUnitsAsync(knowledgePacket, tenantId, ebookId, dbContext, default);
await dbContext.SaveChangesAsync();
try
{
await dbContext.SaveChangesAsync();
}
catch (DbUpdateException ex) when (ex.InnerException is Npgsql.PostgresException pgEx && pgEx.SqlState == "23505")
{
_logger.LogWarning("[KnowledgeService] Concurrency collision on SemanticKnowledgeCache for {Hash}; another process saved it first. Swallowing.", hash);
}
return Result.Ok(knowledgePacket);
}
catch (JsonException ex)
@@ -224,6 +239,30 @@ public class KnowledgeService : IKnowledgeService
private async Task ProcessKnowledgeUnitsAsync(KnowledgePacket packet, string tenantId, Guid? ebookId, AppDbContext dbContext, CancellationToken cancellationToken)
{
if (packet.Graph != null && (packet.Units == null || !packet.Units.Any()))
{
var graphUnits = packet.Graph.Nodes.Select(node => new KnowledgeUnitDto(
node.Id,
node.Type ?? "concept",
node.Description ?? node.Label,
new Dictionary<string, object>
{
["label"] = node.Label,
["group"] = node.Group,
["summary"] = node.Summary ?? "",
["key_terms"] = node.KeyTerms ?? new List<string>()
}
)).ToList();
var graphLinks = packet.Graph.Links.Select(link => new KnowledgeLinkDto(
link.Source,
link.Target,
link.RelationType
)).ToList();
packet = packet with { Units = graphUnits, Links = graphLinks };
}
var unitIds = packet.Units.Select(u => u.Id).ToList();
var linkSourceIds = packet.Links.Select(l => l.Source).ToList();
var linkTargetIds = packet.Links.Select(l => l.Target).ToList();
@@ -285,6 +324,192 @@ public class KnowledgeService : IKnowledgeService
_logger.LogWarning("[KnowledgeService] Skipping invalid link {Source} -> {Target}: one or both units are missing.", linkDto.Source, linkDto.Target);
}
}
// Generate and upsert vectors to Qdrant in batch
var unitsToEmbed = packet.Units
.Where(u => !string.IsNullOrEmpty(u.Content))
.ToList();
if (unitsToEmbed.Any())
{
try
{
var contents = unitsToEmbed.Select(u => u.Content).ToList();
var embeddingResponse = await _retryPipeline.ExecuteAsync(async ct =>
await _embeddingGenerator.GenerateAsync(
contents,
new EmbeddingGenerationOptions { Dimensions = 768 },
cancellationToken: ct), cancellationToken);
var embeddings = embeddingResponse.ToList();
var points = new List<PointStruct>();
for (int i = 0; i < unitsToEmbed.Count; i++)
{
var unitDto = unitsToEmbed[i];
var vector = embeddings[i].Vector.ToArray();
var point = new PointStruct
{
Id = GetDeterministicGuid(unitDto.Id),
Vectors = vector,
Payload =
{
["content"] = unitDto.Content,
["type"] = unitDto.Type ?? string.Empty,
["tenantId"] = tenantId,
["ebookId"] = ebookId?.ToString() ?? string.Empty,
["metadataJson"] = JsonSerializer.Serialize(unitDto.Metadata)
}
};
points.Add(point);
}
if (points.Any())
{
await EnsureCollectionExistsAsync("knowledge_units", cancellationToken);
await _qdrantClient.UpsertAsync("knowledge_units", points, cancellationToken: cancellationToken);
_logger.LogInformation("[KnowledgeService] Successfully upserted {Count} points to Qdrant collection 'knowledge_units'.", points.Count);
}
}
catch (Exception ex)
{
_logger.LogError(ex, "[KnowledgeService] Failed to generate and upsert embeddings for knowledge units to Qdrant.");
}
}
// 6. Synchronize to Neo4j graph database
await SyncToNeo4jAsync(packet, cancellationToken);
}
private async Task SyncToNeo4jAsync(KnowledgePacket packet, CancellationToken cancellationToken)
{
if (packet.Units == null || !packet.Units.Any()) return;
try
{
await using var session = _neo4jDriver.AsyncSession();
// 1. Merge nodes in a transaction
await session.ExecuteWriteAsync(async tx =>
{
foreach (var unit in packet.Units)
{
var cypher = @"
MERGE (u:KnowledgeUnit {id: $id})
ON CREATE SET u.content = $content, u.type = $type
ON MATCH SET u.content = $content, u.type = $type";
var guidStr = GetDeterministicGuid(unit.Id).ToString();
await tx.RunAsync(cypher, new
{
id = guidStr,
content = unit.Content ?? string.Empty,
type = unit.Type ?? "concept"
});
}
});
// 2. Merge links in a transaction
if (packet.Links != null && packet.Links.Any())
{
await session.ExecuteWriteAsync(async tx =>
{
foreach (var link in packet.Links)
{
if (string.IsNullOrWhiteSpace(link.Source) || string.IsNullOrWhiteSpace(link.Target))
continue;
var relationType = string.IsNullOrWhiteSpace(link.Relation) ? "RELATED_TO" : link.Relation.Trim().ToUpperInvariant();
relationType = System.Text.RegularExpressions.Regex.Replace(relationType, @"[^A-Z0-9_]", "_");
if (string.IsNullOrEmpty(relationType) || relationType == "_")
{
relationType = "RELATED_TO";
}
var cypher = $@"
MATCH (source:KnowledgeUnit {{id: $sourceId}})
MATCH (target:KnowledgeUnit {{id: $targetId}})
MERGE (source)-[r:{relationType}]->(target)";
var sourceGuidStr = GetDeterministicGuid(link.Source).ToString();
var targetGuidStr = GetDeterministicGuid(link.Target).ToString();
await tx.RunAsync(cypher, new
{
sourceId = sourceGuidStr,
targetId = targetGuidStr
});
}
});
}
_logger.LogInformation("[KnowledgeService] Successfully synchronized {NodeCount} nodes and {LinkCount} links to Neo4j.", packet.Units.Count, packet.Links?.Count ?? 0);
}
catch (Exception ex)
{
_logger.LogError(ex, "[KnowledgeService] Failed to synchronize knowledge graph to Neo4j.");
}
}
private async Task EnsureCollectionExistsAsync(string collectionName, CancellationToken cancellationToken = default)
{
await _collectionSemaphore.WaitAsync(cancellationToken);
try
{
var exists = await _qdrantClient.CollectionExistsAsync(collectionName, cancellationToken);
if (!exists)
{
_logger.LogInformation("[KnowledgeService] Creating Qdrant collection '{CollectionName}'...", collectionName);
await _qdrantClient.CreateCollectionAsync(
collectionName: collectionName,
vectorsConfig: new VectorParams
{
Size = 768,
Distance = Distance.Cosine
},
cancellationToken: cancellationToken
);
_logger.LogInformation("[KnowledgeService] Qdrant collection '{CollectionName}' created successfully.", collectionName);
}
}
catch (Exception ex)
{
if (ex.Message.Contains("already exists", StringComparison.OrdinalIgnoreCase) ||
(ex.InnerException != null && ex.InnerException.Message.Contains("already exists", StringComparison.OrdinalIgnoreCase)))
{
_logger.LogInformation("[KnowledgeService] Qdrant collection '{CollectionName}' was already created by another thread.", collectionName);
}
else
{
_logger.LogError(ex, "[KnowledgeService] Error ensuring Qdrant collection '{CollectionName}' exists.", collectionName);
}
}
finally
{
_collectionSemaphore.Release();
}
}
private static Guid GetDeterministicGuid(string input)
{
if (Guid.TryParse(input, out var guid))
{
return guid;
}
using var md5 = System.Security.Cryptography.MD5.Create();
byte[] hash = md5.ComputeHash(System.Text.Encoding.UTF8.GetBytes(input));
return new Guid(hash);
}
private static string GetPointIdString(PointId pointId)
{
if (pointId == null) return string.Empty;
return pointId.PointIdOptionsCase == PointId.PointIdOptionsOneofCase.Uuid
? pointId.Uuid
: pointId.Num.ToString();
}
public async Task<Result<GroundednessResult>> VerifyGroundednessAsync(string answer, string context, string tenantId, CancellationToken cancellationToken = default)
@@ -354,6 +579,7 @@ public class KnowledgeService : IKnowledgeService
List<Qdrant.Client.Grpc.ScoredPoint> searchResult;
try
{
await EnsureCollectionExistsAsync("knowledge_units", cancellationToken);
var response = await _qdrantClient.SearchAsync(
collectionName: "knowledge_units",
vector: queryVector,
@@ -363,15 +589,37 @@ public class KnowledgeService : IKnowledgeService
);
searchResult = response.ToList();
}
catch (Exception)
catch (Exception ex)
{
_logger.LogWarning(ex, "[KnowledgeService] Qdrant search failed during GetRelevantContextAsync. Returning empty search results.");
searchResult = new List<Qdrant.Client.Grpc.ScoredPoint>();
}
var contexts = searchResult.Select(point => new RelevantContext
var contexts = searchResult.Select(point =>
{
Text = point.Payload.TryGetValue("content", out var cv) ? cv.StringValue : string.Empty,
Confidence = point.Score
var content = point.Payload.TryGetValue("content", out var cv) ? cv.StringValue : string.Empty;
var summary = string.Empty;
if (point.Payload.TryGetValue("metadataJson", out var metaVal) && !string.IsNullOrEmpty(metaVal.StringValue))
{
try
{
var meta = JsonSerializer.Deserialize<Dictionary<string, object>>(metaVal.StringValue);
if (meta != null && meta.TryGetValue("summary", out var sumObj))
{
summary = sumObj?.ToString();
}
}
catch (JsonException ex)
{
_logger.LogWarning(ex, "[KnowledgeService] Failed to deserialize metadata JSON in RelevantContext mapping.");
}
}
var text = string.IsNullOrEmpty(summary) ? content : $"{content}: {summary}";
return new RelevantContext
{
Text = text,
Confidence = point.Score
};
}).ToList();
return Result.Ok(contexts);
@@ -417,6 +665,7 @@ public class KnowledgeService : IKnowledgeService
List<Qdrant.Client.Grpc.ScoredPoint> searchResult;
try
{
await EnsureCollectionExistsAsync("knowledge_units", cancellationToken);
var response = await _qdrantClient.SearchAsync(
collectionName: "knowledge_units",
vector: queryVector,
@@ -438,7 +687,7 @@ public class KnowledgeService : IKnowledgeService
}
// 3. Graph Expansion via Neo4j
var candidateIds = searchResult.Select(r => r.Id.ToString()).ToList();
var candidateIds = searchResult.Select(r => GetPointIdString(r.Id)).ToList();
var definitions = new Dictionary<string, List<string>>();
if (candidateIds.Any())
@@ -447,7 +696,7 @@ public class KnowledgeService : IKnowledgeService
{
await using var session = _neo4jDriver.AsyncSession();
var cypher = @"
MATCH (source:KnowledgeUnit)-[r:DEFINES]->(target:KnowledgeUnit)
MATCH (source:KnowledgeUnit)-[r]->(target:KnowledgeUnit)
WHERE source.id IN $candidateIds
RETURN source.id AS sourceId, target.content AS targetContent";
@@ -516,12 +765,15 @@ public class KnowledgeService : IKnowledgeService
{
metadata = JsonSerializer.Deserialize<Dictionary<string, object>>(metaVal.StringValue);
}
catch {}
catch (JsonException ex)
{
_logger.LogWarning(ex, "[KnowledgeService] Failed to deserialize metadata JSON in search library mapping.");
}
}
var dto = new SemanticSearchResultDto
{
ContentHash = point.Id.ToString(),
ContentHash = GetPointIdString(point.Id),
Snippet = content,
UnitType = type,
RelevanceScore = point.Score,
@@ -529,7 +781,7 @@ public class KnowledgeService : IKnowledgeService
Metadata = metadata
};
var pointIdStr = point.Id.ToString();
var pointIdStr = GetPointIdString(point.Id);
if (definitions.TryGetValue(pointIdStr, out var pointDefs) && pointDefs.Any())
{
dto.Snippet = $"[Context: {string.Join("; ", pointDefs)}]\n{dto.Snippet}";
@@ -602,6 +854,7 @@ public class KnowledgeService : IKnowledgeService
List<Qdrant.Client.Grpc.ScoredPoint> searchResult;
try
{
await EnsureCollectionExistsAsync("knowledge_units", cancellationToken);
var response = await _qdrantClient.SearchAsync(
collectionName: "knowledge_units",
vector: queryVector,
@@ -627,11 +880,28 @@ public class KnowledgeService : IKnowledgeService
}
// 3. Graph Expansion via Neo4j
var candidateIds = searchResult.Select(r => r.Id.ToString()).ToList();
var candidateIds = searchResult.Select(r => GetPointIdString(r.Id)).ToList();
var relatedContexts = new List<string>();
// Keep map of point ID -> payload data for fast mapping later
var pointMap = searchResult.ToDictionary(r => r.Id.ToString(), r => r);
var pointMap = searchResult.ToDictionary(r => GetPointIdString(r.Id), r => r);
// Fetch knowledge units from PostgreSQL to map Guids back to rich metadata summaries
var guidMap = new Dictionary<string, KnowledgeUnit>();
try
{
using var dbContext = await _dbContextFactory.CreateDbContextAsync(cancellationToken);
var units = await dbContext.KnowledgeUnits
.Include(u => u.Ebook)
.ThenInclude(e => e.Author)
.Where(u => u.TenantId == tenantId && (ebookId == null || u.EbookId == ebookId))
.ToListAsync(cancellationToken);
guidMap = units.ToDictionary(u => GetDeterministicGuid(u.Id).ToString(), u => u);
}
catch (Exception ex)
{
_logger.LogWarning(ex, "[KnowledgeService] Failed to load KnowledgeUnits from PostgreSQL for Guid mapping.");
}
if (candidateIds.Any())
{
@@ -641,7 +911,7 @@ public class KnowledgeService : IKnowledgeService
var cypher = @"
MATCH (source:KnowledgeUnit)
WHERE source.id IN $candidateIds
OPTIONAL MATCH (source)-[r:DEFINES|RELATED_TO]->(target:KnowledgeUnit)
OPTIONAL MATCH (source)-[r]->(target:KnowledgeUnit)
RETURN source.id AS sourceId, source.content AS sourceContent,
collect({ targetId: target.id, targetContent: target.content, relation: type(r) }) AS relations";
@@ -654,23 +924,70 @@ public class KnowledgeService : IKnowledgeService
foreach (var record in neoResult)
{
var sourceId = record["sourceId"].As<string>();
var sourceContent = record["sourceContent"].As<string>();
relatedContexts.Add($"[Source ID: {sourceId}] {sourceContent}");
var sourceText = string.Empty;
if (guidMap.TryGetValue(sourceId, out var sourceUnit))
{
var summary = string.Empty;
if (!string.IsNullOrEmpty(sourceUnit.MetadataJson))
{
try
{
var meta = JsonSerializer.Deserialize<Dictionary<string, object>>(sourceUnit.MetadataJson);
if (meta != null && meta.TryGetValue("summary", out var sumObj))
{
summary = sumObj?.ToString();
}
}
catch (JsonException jsonEx)
{
_logger.LogWarning(jsonEx, "[KnowledgeService] Failed to deserialize metadata JSON for unit {UnitId} in AskQuestionAsync source hydration.", sourceUnit.Id);
}
}
sourceText = string.IsNullOrEmpty(summary) ? sourceUnit.Content : $"{sourceUnit.Content}: {summary}";
}
else
{
sourceText = record["sourceContent"].As<string>();
}
relatedContexts.Add($"[Source ID: {sourceId}] {sourceText}");
var relations = record["relations"].As<List<object>>();
if (relations != null)
{
foreach (var relObj in relations)
{
if (relObj is Dictionary<string, object> relDict &&
relDict.TryGetValue("targetId", out var targetIdVal) && targetIdVal is string targetId &&
relDict.TryGetValue("targetContent", out var targetContentVal) && targetContentVal is string targetContent &&
relDict.TryGetValue("relation", out var relationVal) && relationVal is string relation)
if (relObj is System.Collections.IDictionary relDict)
{
if (!string.IsNullOrEmpty(targetContent))
var targetId = relDict["targetId"]?.ToString();
var targetContent = relDict["targetContent"]?.ToString();
var relation = relDict["relation"]?.ToString();
if (!string.IsNullOrEmpty(targetContent) && !string.IsNullOrEmpty(relation))
{
relatedContexts.Add($"[Related Context ({relation}) to {sourceId}] {targetContent}");
var targetText = targetContent;
if (!string.IsNullOrEmpty(targetId) && guidMap.TryGetValue(targetId, out var targetUnit))
{
var summary = string.Empty;
if (!string.IsNullOrEmpty(targetUnit.MetadataJson))
{
try
{
var meta = JsonSerializer.Deserialize<Dictionary<string, object>>(targetUnit.MetadataJson);
if (meta != null && meta.TryGetValue("summary", out var sumObj))
{
summary = sumObj?.ToString();
}
}
catch (JsonException jsonEx)
{
_logger.LogWarning(jsonEx, "[KnowledgeService] Failed to deserialize metadata JSON for unit {UnitId} in AskQuestionAsync target hydration.", targetUnit.Id);
}
}
targetText = string.IsNullOrEmpty(summary) ? targetUnit.Content : $"{targetUnit.Content}: {summary}";
}
relatedContexts.Add($"[Related Context ({relation}) to {sourceId}] {targetText}");
}
}
}
@@ -682,9 +999,35 @@ public class KnowledgeService : IKnowledgeService
_logger.LogWarning(ex, "[KnowledgeService] Neo4j graph expansion failed. Falling back to direct Qdrant points.");
foreach (var point in searchResult)
{
var sourceId = point.Id.ToString();
var content = point.Payload.TryGetValue("content", out var cv) ? cv.StringValue : string.Empty;
relatedContexts.Add($"[Source ID: {sourceId}] {content}");
var sourceId = GetPointIdString(point.Id);
var sourceText = string.Empty;
if (guidMap.TryGetValue(sourceId, out var sourceUnit))
{
var summary = string.Empty;
if (!string.IsNullOrEmpty(sourceUnit.MetadataJson))
{
try
{
var meta = JsonSerializer.Deserialize<Dictionary<string, object>>(sourceUnit.MetadataJson);
if (meta != null && meta.TryGetValue("summary", out var sumObj))
{
summary = sumObj?.ToString();
}
}
catch (JsonException jsonEx)
{
_logger.LogWarning(jsonEx, "[KnowledgeService] Failed to deserialize metadata JSON for unit {UnitId} in fallback AskQuestionAsync.", sourceUnit.Id);
}
}
sourceText = string.IsNullOrEmpty(summary) ? sourceUnit.Content : $"{sourceUnit.Content}: {summary}";
}
else
{
sourceText = point.Payload.TryGetValue("content", out var cv) ? cv.StringValue : string.Empty;
}
relatedContexts.Add($"[Source ID: {sourceId}] {sourceText}");
}
}
}
@@ -708,33 +1051,14 @@ public class KnowledgeService : IKnowledgeService
// 5. Build prompt and invoke Gemini with structured JSON formatting
var contextBlocksText = string.Join("\n\n", relatedContexts);
var systemPrompt = @"
You are an advanced, extremely precise Fact-Checking AI assistant. Your task is to answer the user's question using ONLY the provided context blocks.
Strict Grounding Rules:
1. Rely EXCLUSIVELY on the provided context. Do NOT use any pre-existing external knowledge, facts, or assumptions.
2. If the context does not contain the answer, you must state exactly: 'I cannot answer this based on the provided book context.'
3. For every statement or claim you make in your answer, you must cite the specific source IDs (e.g., source chunk ID or hash) from the context.
4. You must format your response ONLY as a JSON object matching the following structure:
{
""answer"": ""The answer text goes here, referencing [Source ID] as citations."",
""citations"": [
{
""citationId"": ""The exact source ID cited (e.g., chunk hash/ID)"",
""snippet"": ""The precise sentence or phrase from the context that supports this statement."",
""sourceBook"": ""The book title or 'Unknown'""
}
]
}
";
var systemPrompt = PromptRegistry.GroundedRAGSystemPrompt;
var userPrompt = $"Context:\n{contextBlocksText}\n\nQuestion: {question}";
var options = new ChatOptions
{
Temperature = 0.0f,
MaxOutputTokens = 1500,
ResponseFormat = ChatResponseFormat.Json
MaxOutputTokens = 1500
};
var chatResponse = await _retryPipeline.ExecuteAsync(async ct =>
@@ -746,6 +1070,20 @@ Strict Grounding Rules:
var rawJson = chatResponse.Text?.Trim() ?? string.Empty;
rawJson = rawJson.Replace("```json", "").Replace("```", "").Trim();
// Handle direct text fallback when model bypasses JSON format
if (!rawJson.StartsWith("{") &&
(rawJson.Contains("cannot answer", StringComparison.OrdinalIgnoreCase) ||
rawJson.Contains("context does not contain", StringComparison.OrdinalIgnoreCase) ||
rawJson.Contains("provided book context", StringComparison.OrdinalIgnoreCase)))
{
return Result.Ok(new GroundedResponseDto
{
Answer = "I cannot answer this based on the provided book context.",
Citations = new List<CitationDto>()
});
}
rawJson = JsonRepairHelper.Repair(rawJson);
try
@@ -756,15 +1094,42 @@ Strict Grounding Rules:
return Result.Fail("Failed to deserialize grounded RAG response.");
}
// Hydrate book titles for citations if unknown
// Hydrate book titles, author, and page number for citations if unknown
foreach (var citation in groundedResult.Citations)
{
if (pointMap.TryGetValue(citation.CitationId, out var point) &&
point.Payload.TryGetValue("ebookId", out var ev) &&
Guid.TryParse(ev.StringValue, out var ebId) &&
ebookTitles.TryGetValue(ebId, out var title))
Guid.TryParse(ev.StringValue, out var ebId))
{
citation.SourceBook = title;
if (ebookTitles.TryGetValue(ebId, out var title))
{
citation.SourceBook = title;
}
}
// Look up from guidMap to get exact page number and author
if (guidMap.TryGetValue(citation.CitationId, out var unit))
{
if (unit.Ebook?.Author != null)
{
citation.Author = unit.Ebook.Author.Name;
}
if (!string.IsNullOrEmpty(unit.MetadataJson))
{
try
{
var meta = JsonSerializer.Deserialize<Dictionary<string, object>>(unit.MetadataJson);
if (meta != null && meta.TryGetValue("page", out var pageObj) && int.TryParse(pageObj?.ToString(), out var pageVal))
{
citation.PageNumber = pageVal;
}
}
catch (JsonException ex)
{
_logger.LogWarning(ex, "[KnowledgeService] Failed to deserialize metadata JSON for unit {UnitId} in AskQuestionAsync citation mapping.", unit.Id);
}
}
}
}
@@ -790,6 +1155,30 @@ Strict Grounding Rules:
await dbContext.SemanticKnowledgeCache.ExecuteDeleteAsync(cancellationToken);
await dbContext.KnowledgeUnits.ExecuteDeleteAsync(cancellationToken);
await dbContext.KnowledgeUnitLinks.ExecuteDeleteAsync(cancellationToken);
try
{
await _qdrantClient.DeleteCollectionAsync("knowledge_units", cancellationToken: cancellationToken);
}
catch (Exception ex)
{
_logger.LogWarning(ex, "[KnowledgeService] Failed to drop Qdrant collection 'knowledge_units' during cache clear.");
}
try
{
await using var session = _neo4jDriver.AsyncSession();
await session.ExecuteWriteAsync(async tx =>
{
await tx.RunAsync("MATCH (n:KnowledgeUnit) DETACH DELETE n");
});
_logger.LogInformation("[KnowledgeService] Successfully wiped Neo4j 'KnowledgeUnit' nodes.");
}
catch (Exception ex)
{
_logger.LogWarning(ex, "[KnowledgeService] Failed to wipe Neo4j graph during cache clear.");
}
return Result.Ok();
}
catch (Exception ex)