feat: implement semantic search, knowledge unit extraction, and visualization components
This commit is contained in:
@@ -0,0 +1,15 @@
|
||||
using Microsoft.EntityFrameworkCore;
|
||||
using NexusReader.Domain.Entities;
|
||||
|
||||
namespace NexusReader.Application.Abstractions.Persistence;
|
||||
|
||||
public interface IApplicationDbContext
|
||||
{
|
||||
DbSet<SemanticKnowledgeCache> SemanticKnowledgeCache { get; }
|
||||
DbSet<KnowledgeUnit> KnowledgeUnits { get; }
|
||||
DbSet<KnowledgeUnitLink> KnowledgeUnitLinks { get; }
|
||||
DbSet<Ebook> Ebooks { get; }
|
||||
DbSet<QuizResult> QuizResults { get; }
|
||||
|
||||
Task<int> SaveChangesAsync(CancellationToken cancellationToken = default);
|
||||
}
|
||||
@@ -7,6 +7,8 @@ public interface IKnowledgeService
|
||||
{
|
||||
Task<Result<KnowledgePacket>> GetKnowledgeAsync(string text, CancellationToken cancellationToken = default);
|
||||
Task<Result<KnowledgePacket>> GetGraphDataAsync(string text, CancellationToken cancellationToken = default);
|
||||
Task<Result<KnowledgePacket>> GetKnowledgeMapAsync(string text, CancellationToken cancellationToken = default);
|
||||
Task<Result<KnowledgePacket>> GetSummaryAndQuizAsync(string text, CancellationToken cancellationToken = default);
|
||||
Task<Result<List<RelevantContext>>> GetRelevantContextAsync(string query, string tenantId, CancellationToken cancellationToken = default);
|
||||
Task<Result> ClearCacheAsync(CancellationToken cancellationToken = default);
|
||||
}
|
||||
|
||||
@@ -0,0 +1,52 @@
|
||||
using FluentResults;
|
||||
using MediatR;
|
||||
using Microsoft.Extensions.AI;
|
||||
using NexusReader.Infrastructure.Services; // For PromptRegistry
|
||||
|
||||
namespace NexusReader.Application.Commands.AI;
|
||||
|
||||
public record VerifyGroundednessCommand(string Answer, string Context) : IRequest<Result<GroundednessResult>>;
|
||||
|
||||
public record GroundednessResult(float Score, string Rationale, bool IsGrounded);
|
||||
|
||||
public class VerifyGroundednessCommandHandler : IRequestHandler<VerifyGroundednessCommand, Result<GroundednessResult>>
|
||||
{
|
||||
private readonly IChatClient _chatClient;
|
||||
|
||||
public VerifyGroundednessCommandHandler(IChatClient chatClient)
|
||||
{
|
||||
_chatClient = chatClient;
|
||||
}
|
||||
|
||||
public async Task<Result<GroundednessResult>> Handle(VerifyGroundednessCommand request, CancellationToken cancellationToken)
|
||||
{
|
||||
var systemPrompt = @"
|
||||
You are a Fact-Checking AI. Evaluate if the 'Answer' is supported by the 'Context'.
|
||||
Rate the groundedness from 0.0 to 1.0.
|
||||
Return ONLY a JSON object: { ""score"": 0.9, ""rationale"": ""string"", ""isGrounded"": true }
|
||||
";
|
||||
|
||||
var userPrompt = $"Context: {request.Context}\n\nAnswer: {request.Answer}";
|
||||
|
||||
try
|
||||
{
|
||||
var response = await _chatClient.GetResponseAsync(new List<ChatMessage>
|
||||
{
|
||||
new ChatMessage(ChatRole.System, systemPrompt),
|
||||
new ChatMessage(ChatRole.User, userPrompt)
|
||||
}, cancellationToken: cancellationToken);
|
||||
|
||||
var rawJson = response.Text?.Trim() ?? "{}";
|
||||
// Simple cleanup if needed
|
||||
rawJson = rawJson.Replace("```json", "").Replace("```", "").Trim();
|
||||
|
||||
var result = System.Text.Json.JsonSerializer.Deserialize<GroundednessResult>(rawJson, new System.Text.Json.JsonSerializerOptions { PropertyNameCaseInsensitive = true });
|
||||
|
||||
return result != null ? Result.Ok(result) : Result.Fail("Failed to parse groundedness result");
|
||||
}
|
||||
catch (Exception ex)
|
||||
{
|
||||
return Result.Fail(new Error("Failed to verify groundedness").CausedBy(ex));
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -13,10 +13,15 @@ public record QuizQuestion(
|
||||
[property: JsonPropertyName("correct_index")] int CorrectIndex
|
||||
);
|
||||
|
||||
public record KnowledgeUnitDto(string Id, string Type, string Content, Dictionary<string, object>? Metadata = null);
|
||||
public record KnowledgeLinkDto(string Source, string Target, string Relation);
|
||||
|
||||
public record KnowledgePacket
|
||||
{
|
||||
[JsonPropertyName("concepts")] public List<KeyConcept> Concepts { get; init; } = new();
|
||||
[JsonPropertyName("quizzes")] public List<QuizQuestion> Quizzes { get; init; } = new();
|
||||
[JsonPropertyName("units")] public List<KnowledgeUnitDto> Units { get; init; } = new();
|
||||
[JsonPropertyName("links")] public List<KnowledgeLinkDto> Links { get; init; } = new();
|
||||
[JsonPropertyName("graph")] public NexusReader.Application.Queries.Graph.GraphDataDto? Graph { get; init; }
|
||||
[JsonPropertyName("summary")] public string? Summary { get; init; }
|
||||
}
|
||||
|
||||
@@ -0,0 +1,8 @@
|
||||
namespace NexusReader.Application.DTOs.AI;
|
||||
|
||||
public class RelevantContext
|
||||
{
|
||||
public string Text { get; set; } = string.Empty;
|
||||
public string SourceId { get; set; } = string.Empty; // ContentHash or EbookTitle
|
||||
public double Confidence { get; set; }
|
||||
}
|
||||
@@ -0,0 +1,11 @@
|
||||
namespace NexusReader.Application.DTOs.AI;
|
||||
|
||||
public class SemanticSearchResultDto
|
||||
{
|
||||
public string ContentHash { get; set; } = string.Empty;
|
||||
public string Snippet { get; set; } = string.Empty;
|
||||
public string? UnitType { get; set; }
|
||||
public float RelevanceScore { get; set; }
|
||||
public string? SourceBookTitle { get; set; }
|
||||
public Dictionary<string, object>? Metadata { get; set; } // Bonus context
|
||||
}
|
||||
@@ -10,7 +10,10 @@
|
||||
<PackageReference Include="Mapster.DependencyInjection" Version="10.0.7" />
|
||||
<PackageReference Include="MediatR" Version="12.1.1" />
|
||||
<PackageReference Include="Microsoft.AspNetCore.Authorization" Version="10.0.7" />
|
||||
<PackageReference Include="Microsoft.EntityFrameworkCore" Version="10.0.7" />
|
||||
<PackageReference Include="Microsoft.Extensions.AI" Version="10.5.0" />
|
||||
<PackageReference Include="Microsoft.Extensions.Identity.Core" Version="10.0.7" />
|
||||
<PackageReference Include="Pgvector.EntityFrameworkCore" Version="0.2.1" />
|
||||
</ItemGroup>
|
||||
|
||||
<PropertyGroup>
|
||||
|
||||
@@ -1,7 +1,7 @@
|
||||
namespace NexusReader.Application.Queries.Graph;
|
||||
|
||||
public record GraphNodeDto(string Id, string Label, string Group);
|
||||
public record GraphLinkDto(string Source, string Target, int Value);
|
||||
public record GraphNodeDto(string Id, string Label, string Group, string? Type = null);
|
||||
public record GraphLinkDto(string Source, string Target, string RelationType, int Value = 1);
|
||||
public record GraphDataDto
|
||||
{
|
||||
public List<GraphNodeDto> Nodes { get; init; } = new();
|
||||
|
||||
@@ -0,0 +1,114 @@
|
||||
using FluentResults;
|
||||
using Mapster;
|
||||
using MediatR;
|
||||
using Microsoft.EntityFrameworkCore;
|
||||
using Microsoft.Extensions.AI;
|
||||
using NexusReader.Application.DTOs.AI;
|
||||
using NexusReader.Application.Abstractions.Persistence;
|
||||
using Pgvector.EntityFrameworkCore;
|
||||
|
||||
namespace NexusReader.Application.Queries.Library;
|
||||
|
||||
public record SearchLibrarySemanticallyQuery(string QueryText, string TenantId, int Limit = 5)
|
||||
: IRequest<Result<List<SemanticSearchResultDto>>>;
|
||||
|
||||
public class SearchLibrarySemanticallyQueryHandler : IRequestHandler<SearchLibrarySemanticallyQuery, Result<List<SemanticSearchResultDto>>>
|
||||
{
|
||||
private readonly IApplicationDbContext _dbContext;
|
||||
private readonly IEmbeddingGenerator<string, Embedding<float>> _embeddingGenerator;
|
||||
|
||||
public SearchLibrarySemanticallyQueryHandler(
|
||||
IApplicationDbContext dbContext,
|
||||
IEmbeddingGenerator<string, Embedding<float>> embeddingGenerator)
|
||||
{
|
||||
_dbContext = dbContext;
|
||||
_embeddingGenerator = embeddingGenerator;
|
||||
}
|
||||
|
||||
public async Task<Result<List<SemanticSearchResultDto>>> Handle(SearchLibrarySemanticallyQuery request, CancellationToken cancellationToken)
|
||||
{
|
||||
if (string.IsNullOrWhiteSpace(request.QueryText))
|
||||
{
|
||||
return Result.Fail("Query text cannot be empty.");
|
||||
}
|
||||
|
||||
try
|
||||
{
|
||||
// 1. Generate embedding for user query
|
||||
var embeddingResponse = await _embeddingGenerator.GenerateAsync(new[] { request.QueryText }, cancellationToken: cancellationToken);
|
||||
var queryVector = embeddingResponse.First().Vector.ToArray();
|
||||
|
||||
// 2. Perform Cosine Similarity Search on Knowledge Units
|
||||
var candidates = await _dbContext.KnowledgeUnits
|
||||
.AsNoTracking()
|
||||
.Where(x => (x.TenantId == request.TenantId || x.TenantId == "global") && x.Vector != null)
|
||||
.OrderBy(x => x.Vector!.CosineDistance(queryVector))
|
||||
.Take(request.Limit)
|
||||
.ToListAsync(cancellationToken);
|
||||
|
||||
if (!candidates.Any())
|
||||
{
|
||||
// Fallback to legacy cache if no granular units found
|
||||
var legacyResults = await _dbContext.SemanticKnowledgeCache
|
||||
.AsNoTracking()
|
||||
.Where(x => x.TenantId == request.TenantId && x.Vector != null)
|
||||
.OrderBy(x => x.Vector!.CosineDistance(queryVector))
|
||||
.Take(request.Limit)
|
||||
.ToListAsync(cancellationToken);
|
||||
|
||||
return Result.Ok(legacyResults.Select(r => new SemanticSearchResultDto
|
||||
{
|
||||
ContentHash = r.ContentHash,
|
||||
Snippet = r.OriginalText,
|
||||
RelevanceScore = (float)(1 - r.Vector!.CosineDistance(queryVector))
|
||||
}).ToList());
|
||||
}
|
||||
|
||||
// 3. Graph Expansion: Pull related units (e.g. Definitions, Next steps)
|
||||
var candidateIds = candidates.Select(c => c.Id).ToList();
|
||||
var links = await _dbContext.KnowledgeUnitLinks
|
||||
.AsNoTracking()
|
||||
.Where(l => candidateIds.Contains(l.SourceUnitId) && (l.RelationType == "Defines" || l.RelationType == "Next"))
|
||||
.ToListAsync(cancellationToken);
|
||||
|
||||
var relatedIds = links.Select(l => l.TargetUnitId).Distinct().ToList();
|
||||
var relatedUnits = await _dbContext.KnowledgeUnits
|
||||
.AsNoTracking()
|
||||
.Where(u => relatedIds.Contains(u.Id))
|
||||
.ToDictionaryAsync(u => u.Id, cancellationToken);
|
||||
|
||||
// 4. Mapping with Context Enrichment
|
||||
var dtos = candidates.Select(c =>
|
||||
{
|
||||
var dto = new SemanticSearchResultDto
|
||||
{
|
||||
ContentHash = c.Id,
|
||||
Snippet = c.Content,
|
||||
UnitType = c.Type.ToString(),
|
||||
RelevanceScore = (float)(1 - c.Vector!.CosineDistance(queryVector)),
|
||||
Metadata = string.IsNullOrEmpty(c.MetadataJson)
|
||||
? null
|
||||
: System.Text.Json.JsonSerializer.Deserialize<Dictionary<string, object>>(c.MetadataJson)
|
||||
};
|
||||
|
||||
// Enrich snippet with definitions if present
|
||||
var unitLinks = links.Where(l => l.SourceUnitId == c.Id && l.RelationType == "Defines").ToList();
|
||||
if (unitLinks.Any())
|
||||
{
|
||||
var definitions = unitLinks
|
||||
.Where(l => relatedUnits.ContainsKey(l.TargetUnitId))
|
||||
.Select(l => relatedUnits[l.TargetUnitId].Content);
|
||||
dto.Snippet = $"[Context: {string.Join("; ", definitions)}]\n{dto.Snippet}";
|
||||
}
|
||||
|
||||
return dto;
|
||||
}).ToList();
|
||||
|
||||
return Result.Ok(dtos);
|
||||
}
|
||||
catch (Exception ex)
|
||||
{
|
||||
return Result.Fail(new Error("Failed to perform semantic search").CausedBy(ex));
|
||||
}
|
||||
}
|
||||
}
|
||||
Reference in New Issue
Block a user