feat: integrate AI-driven selection panel with context-aware text summarization and quiz generation features.

This commit is contained in:
2026-04-26 20:36:08 +02:00
parent 82d726097f
commit 39a9ca5706
25 changed files with 819 additions and 219 deletions
@@ -0,0 +1,69 @@
using System.Text;
using System.Text.RegularExpressions;
namespace NexusReader.Infrastructure.Helpers;
public static class JsonRepairHelper
{
public static string Repair(string json)
{
if (string.IsNullOrWhiteSpace(json)) return json;
json = json.Trim();
// 1. If it doesn't end with } or ], it's definitely truncated
if (!json.EndsWith("}") && !json.EndsWith("]"))
{
// Try to find the last "clean" closing point before the truncation
// We look for a comma, a closing brace, or a closing bracket that is followed by noise
int lastGoodComma = json.LastIndexOf(',');
int lastGoodBrace = json.LastIndexOf('}');
int lastGoodBracket = json.LastIndexOf(']');
int cutoff = Math.Max(lastGoodComma, Math.Max(lastGoodBrace, lastGoodBracket));
if (cutoff > 0)
{
// Prune the "garbage" at the end
json = json.Substring(0, cutoff);
}
// Now apply the standard stack-based closing logic
var stack = new Stack<char>();
bool inString = false;
bool escaped = false;
foreach (char c in json)
{
if (escaped) { escaped = false; continue; }
if (c == '\\') { escaped = true; continue; }
if (c == '"') { inString = !inString; continue; }
if (inString) continue;
if (c == '{' || c == '[') stack.Push(c);
else if (c == '}' || c == ']')
{
if (stack.Count > 0)
{
var last = stack.Peek();
if ((c == '}' && last == '{') || (c == ']' && last == '['))
stack.Pop();
}
}
}
var builder = new StringBuilder(json);
if (inString) builder.Append('"');
while (stack.Count > 0)
{
var c = stack.Pop();
if (c == '{') builder.Append("}");
else if (c == '[') builder.Append("]");
}
return builder.ToString();
}
return json;
}
}
@@ -35,29 +35,37 @@ public class KnowledgeService : IKnowledgeService
}
public async Task<Result<KnowledgePacket>> GetKnowledgeAsync(string text, CancellationToken cancellationToken = default)
{
return await GetKnowledgeInternalAsync(text, PromptRegistry.KnowledgeExtractionSystemPrompt, "full", cancellationToken);
}
public async Task<Result<KnowledgePacket>> GetGraphDataAsync(string text, CancellationToken cancellationToken = default)
{
return await GetKnowledgeInternalAsync(text, PromptRegistry.GraphExtractionPrompt, "graph", cancellationToken);
}
public async Task<Result<KnowledgePacket>> GetSummaryAndQuizAsync(string text, CancellationToken cancellationToken = default)
{
return await GetKnowledgeInternalAsync(text, PromptRegistry.SummaryAndQuizPrompt, "summary_quiz", cancellationToken);
}
private async Task<Result<KnowledgePacket>> GetKnowledgeInternalAsync(string text, string systemPrompt, string cacheSuffix, CancellationToken cancellationToken)
{
if (string.IsNullOrWhiteSpace(text))
{
return Result.Fail("Input text is empty.");
}
Console.WriteLine($"[KnowledgeService] Starting extraction for text: {text.Substring(0, Math.Min(text.Length, 50))}...");
Console.WriteLine($"[KnowledgeService] Starting extraction ({cacheSuffix}) for text sample: {text.Substring(0, Math.Min(text.Length, 50))}...");
// Normalize text to ensure consistent hashing and reduce token noise
var normalizedText = ContentHasher.Normalize(text);
// Phase 4: Request Pre-processing (Token Saving)
if (normalizedText.Length > _settings.MaxInputLength)
{
Console.WriteLine($"[KnowledgeService] Error: Input too long ({normalizedText.Length} > {_settings.MaxInputLength})");
return Result.Fail($"Input text is too long ({normalizedText.Length} characters after normalization). Max allowed is {_settings.MaxInputLength}.");
normalizedText = normalizedText.Substring(0, _settings.MaxInputLength);
Console.WriteLine($"[KnowledgeService] WARNING: Input text truncated to {_settings.MaxInputLength} chars.");
}
// Simple token estimation (4 chars per token)
var estimatedTokens = normalizedText.Length / 4;
Console.WriteLine($"[KnowledgeService] Processing request with ~{estimatedTokens} tokens.");
var hash = ContentHasher.ComputeHash(normalizedText);
var hash = ContentHasher.ComputeHash(normalizedText) + "_" + cacheSuffix;
// 1. Check Cache
var cached = await _dbContext.SemanticKnowledgeCache
@@ -65,28 +73,19 @@ public class KnowledgeService : IKnowledgeService
if (cached != null)
{
Console.WriteLine($"[KnowledgeService] Cache hit for hash: {hash}");
try
{
var packet = JsonSerializer.Deserialize<KnowledgePacket>(cached.JsonData);
if (packet != null)
{
return Result.Ok(packet);
}
}
catch (JsonException ex)
{
Console.WriteLine($"[KnowledgeService] Cache deserialization error: {ex.Message}");
var packet = JsonSerializer.Deserialize<KnowledgePacket>(cached.JsonData, new JsonSerializerOptions { PropertyNameCaseInsensitive = true });
if (packet != null) return Result.Ok(packet);
}
catch { }
}
// 2. Call AI Client
try
{
Console.WriteLine($"[KnowledgeService] Calling Gemini AI with Model: {_settings.Model}...");
var options = new ChatOptions
{
// ResponseFormat = ChatResponseFormat.Json, // Disabled due to GeminiMappingException in current library version
Temperature = (float)_settings.Temperature,
MaxOutputTokens = _settings.MaxOutputTokens
};
@@ -94,61 +93,46 @@ public class KnowledgeService : IKnowledgeService
var response = await _retryPipeline.ExecuteAsync(async ct =>
await _chatClient.GetResponseAsync(new List<ChatMessage>
{
new ChatMessage(ChatRole.System, PromptRegistry.KnowledgeExtractionSystemPrompt),
new ChatMessage(ChatRole.System, systemPrompt),
new ChatMessage(ChatRole.User, normalizedText)
}, options, cancellationToken: ct), cancellationToken);
var jsonResponse = response.Text;
if (string.IsNullOrWhiteSpace(jsonResponse))
var rawResponse = response.Text?.Trim() ?? string.Empty;
if (string.IsNullOrWhiteSpace(rawResponse)) return Result.Fail("AI returned an empty response.");
// Cleanup markdown code blocks and repair truncation
var jsonResponse = rawResponse.Replace("```json", "").Replace("```", "").Trim();
jsonResponse = JsonRepairHelper.Repair(jsonResponse);
try
{
Console.WriteLine("[KnowledgeService] AI returned empty response.");
return Result.Fail("AI returned an empty response.");
var knowledgePacket = JsonSerializer.Deserialize<KnowledgePacket>(jsonResponse, new JsonSerializerOptions { PropertyNameCaseInsensitive = true });
if (knowledgePacket == null) return Result.Fail("Failed to deserialize AI response.");
// 3. Save to Cache
var cacheEntry = new SemanticKnowledgeCache
{
ContentHash = hash,
JsonData = jsonResponse,
ModelId = _settings.Model,
PromptVersion = PromptVersion,
CreatedAt = DateTime.UtcNow
};
if (cached == null) _dbContext.SemanticKnowledgeCache.Add(cacheEntry);
else { cached.JsonData = jsonResponse; cached.CreatedAt = DateTime.UtcNow; }
await _dbContext.SaveChangesAsync(cancellationToken);
return Result.Ok(knowledgePacket);
}
Console.WriteLine($"[KnowledgeService] AI Response received ({jsonResponse.Length} chars).");
// Cleanup potential markdown if Gemini still adds it despite options
jsonResponse = jsonResponse.Replace("```json", "").Replace("```", "").Trim();
var knowledgePacket = JsonSerializer.Deserialize<KnowledgePacket>(jsonResponse);
if (knowledgePacket == null)
catch (JsonException ex)
{
Console.WriteLine("[KnowledgeService] Failed to deserialize JSON response.");
return Result.Fail("Failed to deserialize AI response.");
Console.WriteLine($"[KnowledgeService] JSON Error: {ex.Message}. Raw length: {rawResponse.Length}");
return Result.Fail($"Failed to deserialize AI response: {ex.Message}");
}
// 3. Save to Cache
Console.WriteLine("[KnowledgeService] Saving result to cache...");
var cacheEntry = new SemanticKnowledgeCache
{
ContentHash = hash,
JsonData = jsonResponse,
ModelId = _settings.Model,
PromptVersion = PromptVersion,
CreatedAt = DateTime.UtcNow
};
if (cached == null)
{
_dbContext.SemanticKnowledgeCache.Add(cacheEntry);
}
else
{
cached.JsonData = jsonResponse;
cached.CreatedAt = DateTime.UtcNow;
}
await _dbContext.SaveChangesAsync(cancellationToken);
Console.WriteLine("[KnowledgeService] Extraction successful.");
return Result.Ok(knowledgePacket);
}
catch (Exception ex)
{
Console.WriteLine($"[KnowledgeService] CRITICAL ERROR: {ex.GetType().Name}: {ex.Message}");
if (ex.InnerException != null)
Console.WriteLine($"[KnowledgeService] Inner Error: {ex.InnerException.Message}");
return Result.Fail(new Error("Failed to extract knowledge from AI").CausedBy(ex));
}
}
@@ -160,12 +144,10 @@ public class KnowledgeService : IKnowledgeService
Console.WriteLine("[KnowledgeService] Clearing SemanticKnowledgeCache...");
_dbContext.SemanticKnowledgeCache.RemoveRange(_dbContext.SemanticKnowledgeCache);
await _dbContext.SaveChangesAsync(cancellationToken);
Console.WriteLine("[KnowledgeService] Cache cleared successfully.");
return Result.Ok();
}
catch (Exception ex)
{
Console.WriteLine($"[KnowledgeService] Error clearing cache: {ex.Message}");
return Result.Fail($"Failed to clear cache: {ex.Message}");
}
}
@@ -10,4 +10,15 @@ public static class PromptRegistry
"\"quizzes\": [ { \"question\": \"string\", \"options\": [ \"string\" ], \"correct_index\": 0 } ], " +
"\"graph\": { \"nodes\": [ { \"id\": \"string\", \"label\": \"string\", \"group\": \"concept\" } ], \"links\": [ { \"source\": \"string\", \"target\": \"string\", \"value\": 1 } ] } " +
"}.";
public const string GraphExtractionPrompt =
"You are an expert at information architecture. Extract key concepts and their relationships from the text to build a knowledge graph. " +
"CRITICAL: Each paragraph in the user text starts with [ID: some-id]. You MUST use these exact IDs as the 'id' for the nodes representing those blocks. " +
"Include a 'current' node representing the block content itself if applicable. " +
"CRITICAL: Limit the result to a MAXIMUM of 15 most relevant connections. " +
"Return ONLY minified JSON. Schema: { \"graph\": { \"nodes\": [ { \"id\": \"string\", \"label\": \"string\", \"group\": \"concept|current\" } ], \"links\": [ { \"source\": \"string\", \"target\": \"string\", \"value\": 1 } ] } }";
public const string SummaryAndQuizPrompt =
"You are an expert educator. Provide a concise summary of the text and generate a challenging quiz (3-5 questions). " +
"Return ONLY minified JSON. Schema: { \"summary\": \"string\", \"quizzes\": [ { \"question\": \"string\", \"options\": [ \"string\" ], \"correct_index\": 0 } ] }";
}