feat: implement AI-driven knowledge extraction service with semantic caching and persistent storage
This commit is contained in:
@@ -0,0 +1,145 @@
|
||||
using System.Text.Json;
|
||||
using FluentResults;
|
||||
using Microsoft.EntityFrameworkCore;
|
||||
using Microsoft.Extensions.AI;
|
||||
using NexusReader.Application.Abstractions.Services;
|
||||
using NexusReader.Application.DTOs.AI;
|
||||
using NexusReader.Domain.Entities;
|
||||
using NexusReader.Infrastructure.Helpers;
|
||||
using NexusReader.Infrastructure.Persistence;
|
||||
using Polly;
|
||||
using Polly.Retry;
|
||||
|
||||
namespace NexusReader.Infrastructure.Services;
|
||||
|
||||
public class KnowledgeService : IKnowledgeService
|
||||
{
|
||||
private readonly IChatClient _chatClient;
|
||||
private readonly AppDbContext _dbContext;
|
||||
private const string PromptVersion = "1.0";
|
||||
private const string ModelId = "gemini-1.5-flash";
|
||||
|
||||
private static readonly ResiliencePipeline _retryPipeline = new ResiliencePipelineBuilder()
|
||||
.AddRetry(new RetryStrategyOptions
|
||||
{
|
||||
ShouldHandle = new PredicateBuilder().Handle<Exception>(ex =>
|
||||
ex.Message.Contains("429") || ex.Message.Contains("Too Many Requests") || ex.Message.Contains("quota")),
|
||||
BackoffType = DelayBackoffType.Exponential,
|
||||
UseJitter = true,
|
||||
MaxRetryAttempts = 3,
|
||||
Delay = TimeSpan.FromSeconds(2)
|
||||
})
|
||||
.Build();
|
||||
|
||||
public KnowledgeService(IChatClient chatClient, AppDbContext dbContext)
|
||||
{
|
||||
_chatClient = chatClient;
|
||||
_dbContext = dbContext;
|
||||
}
|
||||
|
||||
public async Task<Result<KnowledgePacket>> GetKnowledgeAsync(string text, CancellationToken cancellationToken = default)
|
||||
{
|
||||
if (string.IsNullOrWhiteSpace(text))
|
||||
{
|
||||
return Result.Fail("Input text is empty.");
|
||||
}
|
||||
|
||||
// Normalize text to ensure consistent hashing and reduce token noise
|
||||
var normalizedText = ContentHasher.Normalize(text);
|
||||
|
||||
// Phase 4: Request Pre-processing (Token Saving)
|
||||
const int MaxInputLength = 15000; // Roughly 3k-4k tokens
|
||||
if (normalizedText.Length > MaxInputLength)
|
||||
{
|
||||
return Result.Fail($"Input text is too long ({normalizedText.Length} characters after normalization). Max allowed is {MaxInputLength}.");
|
||||
}
|
||||
|
||||
// Simple token estimation (4 chars per token)
|
||||
var estimatedTokens = normalizedText.Length / 4;
|
||||
Console.WriteLine($"[KnowledgeService] Processing request with ~{estimatedTokens} tokens.");
|
||||
|
||||
var hash = ContentHasher.ComputeHash(normalizedText);
|
||||
|
||||
// 1. Check Cache
|
||||
var cached = await _dbContext.SemanticKnowledgeCache
|
||||
.FirstOrDefaultAsync(c => c.ContentHash == hash && c.PromptVersion == PromptVersion, cancellationToken);
|
||||
|
||||
if (cached != null)
|
||||
{
|
||||
try
|
||||
{
|
||||
var packet = JsonSerializer.Deserialize<KnowledgePacket>(cached.JsonData);
|
||||
if (packet != null)
|
||||
{
|
||||
return Result.Ok(packet);
|
||||
}
|
||||
}
|
||||
catch (JsonException)
|
||||
{
|
||||
// If deserialization fails, we proceed to call the AI
|
||||
}
|
||||
}
|
||||
|
||||
// 2. Call AI Client
|
||||
try
|
||||
{
|
||||
var options = new ChatOptions
|
||||
{
|
||||
ResponseFormat = ChatResponseFormat.Json,
|
||||
Temperature = 0.1f,
|
||||
MaxOutputTokens = 1000
|
||||
};
|
||||
|
||||
var response = await _retryPipeline.ExecuteAsync(async ct =>
|
||||
await _chatClient.GetResponseAsync(new List<ChatMessage>
|
||||
{
|
||||
new ChatMessage(ChatRole.System, PromptRegistry.KnowledgeExtractionSystemPrompt),
|
||||
new ChatMessage(ChatRole.User, normalizedText)
|
||||
}, options, cancellationToken: ct), cancellationToken);
|
||||
|
||||
var jsonResponse = response.Text;
|
||||
if (string.IsNullOrWhiteSpace(jsonResponse))
|
||||
{
|
||||
return Result.Fail("AI returned an empty response.");
|
||||
}
|
||||
|
||||
// Cleanup potential markdown if Gemini still adds it despite options
|
||||
jsonResponse = jsonResponse.Replace("```json", "").Replace("```", "").Trim();
|
||||
|
||||
var knowledgePacket = JsonSerializer.Deserialize<KnowledgePacket>(jsonResponse);
|
||||
if (knowledgePacket == null)
|
||||
{
|
||||
return Result.Fail("Failed to deserialize AI response.");
|
||||
}
|
||||
|
||||
// 3. Save to Cache
|
||||
var cacheEntry = new SemanticKnowledgeCache
|
||||
{
|
||||
ContentHash = hash,
|
||||
JsonData = jsonResponse,
|
||||
ModelId = ModelId,
|
||||
PromptVersion = PromptVersion,
|
||||
CreatedAt = DateTime.UtcNow
|
||||
};
|
||||
|
||||
// Handle potential race condition if multiple requests for same text arrive
|
||||
if (cached == null)
|
||||
{
|
||||
_dbContext.SemanticKnowledgeCache.Add(cacheEntry);
|
||||
}
|
||||
else
|
||||
{
|
||||
cached.JsonData = jsonResponse;
|
||||
cached.CreatedAt = DateTime.UtcNow;
|
||||
}
|
||||
|
||||
await _dbContext.SaveChangesAsync(cancellationToken);
|
||||
|
||||
return Result.Ok(knowledgePacket);
|
||||
}
|
||||
catch (Exception ex)
|
||||
{
|
||||
return Result.Fail(new Error("Failed to extract knowledge from AI").CausedBy(ex));
|
||||
}
|
||||
}
|
||||
}
|
||||
Reference in New Issue
Block a user