feat: integrate AI-driven selection panel with context-aware text summarization and quiz generation features.
This commit is contained in:
@@ -0,0 +1,69 @@
|
||||
using System.Text;
|
||||
using System.Text.RegularExpressions;
|
||||
|
||||
namespace NexusReader.Infrastructure.Helpers;
|
||||
|
||||
public static class JsonRepairHelper
|
||||
{
|
||||
public static string Repair(string json)
|
||||
{
|
||||
if (string.IsNullOrWhiteSpace(json)) return json;
|
||||
json = json.Trim();
|
||||
|
||||
// 1. If it doesn't end with } or ], it's definitely truncated
|
||||
if (!json.EndsWith("}") && !json.EndsWith("]"))
|
||||
{
|
||||
// Try to find the last "clean" closing point before the truncation
|
||||
// We look for a comma, a closing brace, or a closing bracket that is followed by noise
|
||||
int lastGoodComma = json.LastIndexOf(',');
|
||||
int lastGoodBrace = json.LastIndexOf('}');
|
||||
int lastGoodBracket = json.LastIndexOf(']');
|
||||
|
||||
int cutoff = Math.Max(lastGoodComma, Math.Max(lastGoodBrace, lastGoodBracket));
|
||||
|
||||
if (cutoff > 0)
|
||||
{
|
||||
// Prune the "garbage" at the end
|
||||
json = json.Substring(0, cutoff);
|
||||
}
|
||||
|
||||
// Now apply the standard stack-based closing logic
|
||||
var stack = new Stack<char>();
|
||||
bool inString = false;
|
||||
bool escaped = false;
|
||||
|
||||
foreach (char c in json)
|
||||
{
|
||||
if (escaped) { escaped = false; continue; }
|
||||
if (c == '\\') { escaped = true; continue; }
|
||||
if (c == '"') { inString = !inString; continue; }
|
||||
if (inString) continue;
|
||||
|
||||
if (c == '{' || c == '[') stack.Push(c);
|
||||
else if (c == '}' || c == ']')
|
||||
{
|
||||
if (stack.Count > 0)
|
||||
{
|
||||
var last = stack.Peek();
|
||||
if ((c == '}' && last == '{') || (c == ']' && last == '['))
|
||||
stack.Pop();
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
var builder = new StringBuilder(json);
|
||||
if (inString) builder.Append('"');
|
||||
|
||||
while (stack.Count > 0)
|
||||
{
|
||||
var c = stack.Pop();
|
||||
if (c == '{') builder.Append("}");
|
||||
else if (c == '[') builder.Append("]");
|
||||
}
|
||||
|
||||
return builder.ToString();
|
||||
}
|
||||
|
||||
return json;
|
||||
}
|
||||
}
|
||||
@@ -35,29 +35,37 @@ public class KnowledgeService : IKnowledgeService
|
||||
}
|
||||
|
||||
public async Task<Result<KnowledgePacket>> GetKnowledgeAsync(string text, CancellationToken cancellationToken = default)
|
||||
{
|
||||
return await GetKnowledgeInternalAsync(text, PromptRegistry.KnowledgeExtractionSystemPrompt, "full", cancellationToken);
|
||||
}
|
||||
|
||||
public async Task<Result<KnowledgePacket>> GetGraphDataAsync(string text, CancellationToken cancellationToken = default)
|
||||
{
|
||||
return await GetKnowledgeInternalAsync(text, PromptRegistry.GraphExtractionPrompt, "graph", cancellationToken);
|
||||
}
|
||||
|
||||
public async Task<Result<KnowledgePacket>> GetSummaryAndQuizAsync(string text, CancellationToken cancellationToken = default)
|
||||
{
|
||||
return await GetKnowledgeInternalAsync(text, PromptRegistry.SummaryAndQuizPrompt, "summary_quiz", cancellationToken);
|
||||
}
|
||||
|
||||
private async Task<Result<KnowledgePacket>> GetKnowledgeInternalAsync(string text, string systemPrompt, string cacheSuffix, CancellationToken cancellationToken)
|
||||
{
|
||||
if (string.IsNullOrWhiteSpace(text))
|
||||
{
|
||||
return Result.Fail("Input text is empty.");
|
||||
}
|
||||
|
||||
Console.WriteLine($"[KnowledgeService] Starting extraction for text: {text.Substring(0, Math.Min(text.Length, 50))}...");
|
||||
Console.WriteLine($"[KnowledgeService] Starting extraction ({cacheSuffix}) for text sample: {text.Substring(0, Math.Min(text.Length, 50))}...");
|
||||
|
||||
// Normalize text to ensure consistent hashing and reduce token noise
|
||||
var normalizedText = ContentHasher.Normalize(text);
|
||||
|
||||
// Phase 4: Request Pre-processing (Token Saving)
|
||||
if (normalizedText.Length > _settings.MaxInputLength)
|
||||
{
|
||||
Console.WriteLine($"[KnowledgeService] Error: Input too long ({normalizedText.Length} > {_settings.MaxInputLength})");
|
||||
return Result.Fail($"Input text is too long ({normalizedText.Length} characters after normalization). Max allowed is {_settings.MaxInputLength}.");
|
||||
normalizedText = normalizedText.Substring(0, _settings.MaxInputLength);
|
||||
Console.WriteLine($"[KnowledgeService] WARNING: Input text truncated to {_settings.MaxInputLength} chars.");
|
||||
}
|
||||
|
||||
// Simple token estimation (4 chars per token)
|
||||
var estimatedTokens = normalizedText.Length / 4;
|
||||
Console.WriteLine($"[KnowledgeService] Processing request with ~{estimatedTokens} tokens.");
|
||||
|
||||
var hash = ContentHasher.ComputeHash(normalizedText);
|
||||
var hash = ContentHasher.ComputeHash(normalizedText) + "_" + cacheSuffix;
|
||||
|
||||
// 1. Check Cache
|
||||
var cached = await _dbContext.SemanticKnowledgeCache
|
||||
@@ -65,28 +73,19 @@ public class KnowledgeService : IKnowledgeService
|
||||
|
||||
if (cached != null)
|
||||
{
|
||||
Console.WriteLine($"[KnowledgeService] Cache hit for hash: {hash}");
|
||||
try
|
||||
{
|
||||
var packet = JsonSerializer.Deserialize<KnowledgePacket>(cached.JsonData);
|
||||
if (packet != null)
|
||||
{
|
||||
return Result.Ok(packet);
|
||||
}
|
||||
}
|
||||
catch (JsonException ex)
|
||||
{
|
||||
Console.WriteLine($"[KnowledgeService] Cache deserialization error: {ex.Message}");
|
||||
var packet = JsonSerializer.Deserialize<KnowledgePacket>(cached.JsonData, new JsonSerializerOptions { PropertyNameCaseInsensitive = true });
|
||||
if (packet != null) return Result.Ok(packet);
|
||||
}
|
||||
catch { }
|
||||
}
|
||||
|
||||
// 2. Call AI Client
|
||||
try
|
||||
{
|
||||
Console.WriteLine($"[KnowledgeService] Calling Gemini AI with Model: {_settings.Model}...");
|
||||
var options = new ChatOptions
|
||||
{
|
||||
// ResponseFormat = ChatResponseFormat.Json, // Disabled due to GeminiMappingException in current library version
|
||||
Temperature = (float)_settings.Temperature,
|
||||
MaxOutputTokens = _settings.MaxOutputTokens
|
||||
};
|
||||
@@ -94,61 +93,46 @@ public class KnowledgeService : IKnowledgeService
|
||||
var response = await _retryPipeline.ExecuteAsync(async ct =>
|
||||
await _chatClient.GetResponseAsync(new List<ChatMessage>
|
||||
{
|
||||
new ChatMessage(ChatRole.System, PromptRegistry.KnowledgeExtractionSystemPrompt),
|
||||
new ChatMessage(ChatRole.System, systemPrompt),
|
||||
new ChatMessage(ChatRole.User, normalizedText)
|
||||
}, options, cancellationToken: ct), cancellationToken);
|
||||
|
||||
var jsonResponse = response.Text;
|
||||
if (string.IsNullOrWhiteSpace(jsonResponse))
|
||||
var rawResponse = response.Text?.Trim() ?? string.Empty;
|
||||
if (string.IsNullOrWhiteSpace(rawResponse)) return Result.Fail("AI returned an empty response.");
|
||||
|
||||
// Cleanup markdown code blocks and repair truncation
|
||||
var jsonResponse = rawResponse.Replace("```json", "").Replace("```", "").Trim();
|
||||
jsonResponse = JsonRepairHelper.Repair(jsonResponse);
|
||||
|
||||
try
|
||||
{
|
||||
Console.WriteLine("[KnowledgeService] AI returned empty response.");
|
||||
return Result.Fail("AI returned an empty response.");
|
||||
var knowledgePacket = JsonSerializer.Deserialize<KnowledgePacket>(jsonResponse, new JsonSerializerOptions { PropertyNameCaseInsensitive = true });
|
||||
if (knowledgePacket == null) return Result.Fail("Failed to deserialize AI response.");
|
||||
|
||||
// 3. Save to Cache
|
||||
var cacheEntry = new SemanticKnowledgeCache
|
||||
{
|
||||
ContentHash = hash,
|
||||
JsonData = jsonResponse,
|
||||
ModelId = _settings.Model,
|
||||
PromptVersion = PromptVersion,
|
||||
CreatedAt = DateTime.UtcNow
|
||||
};
|
||||
|
||||
if (cached == null) _dbContext.SemanticKnowledgeCache.Add(cacheEntry);
|
||||
else { cached.JsonData = jsonResponse; cached.CreatedAt = DateTime.UtcNow; }
|
||||
|
||||
await _dbContext.SaveChangesAsync(cancellationToken);
|
||||
return Result.Ok(knowledgePacket);
|
||||
}
|
||||
|
||||
Console.WriteLine($"[KnowledgeService] AI Response received ({jsonResponse.Length} chars).");
|
||||
|
||||
// Cleanup potential markdown if Gemini still adds it despite options
|
||||
jsonResponse = jsonResponse.Replace("```json", "").Replace("```", "").Trim();
|
||||
|
||||
var knowledgePacket = JsonSerializer.Deserialize<KnowledgePacket>(jsonResponse);
|
||||
if (knowledgePacket == null)
|
||||
catch (JsonException ex)
|
||||
{
|
||||
Console.WriteLine("[KnowledgeService] Failed to deserialize JSON response.");
|
||||
return Result.Fail("Failed to deserialize AI response.");
|
||||
Console.WriteLine($"[KnowledgeService] JSON Error: {ex.Message}. Raw length: {rawResponse.Length}");
|
||||
return Result.Fail($"Failed to deserialize AI response: {ex.Message}");
|
||||
}
|
||||
|
||||
// 3. Save to Cache
|
||||
Console.WriteLine("[KnowledgeService] Saving result to cache...");
|
||||
var cacheEntry = new SemanticKnowledgeCache
|
||||
{
|
||||
ContentHash = hash,
|
||||
JsonData = jsonResponse,
|
||||
ModelId = _settings.Model,
|
||||
PromptVersion = PromptVersion,
|
||||
CreatedAt = DateTime.UtcNow
|
||||
};
|
||||
|
||||
if (cached == null)
|
||||
{
|
||||
_dbContext.SemanticKnowledgeCache.Add(cacheEntry);
|
||||
}
|
||||
else
|
||||
{
|
||||
cached.JsonData = jsonResponse;
|
||||
cached.CreatedAt = DateTime.UtcNow;
|
||||
}
|
||||
|
||||
await _dbContext.SaveChangesAsync(cancellationToken);
|
||||
Console.WriteLine("[KnowledgeService] Extraction successful.");
|
||||
|
||||
return Result.Ok(knowledgePacket);
|
||||
}
|
||||
catch (Exception ex)
|
||||
{
|
||||
Console.WriteLine($"[KnowledgeService] CRITICAL ERROR: {ex.GetType().Name}: {ex.Message}");
|
||||
if (ex.InnerException != null)
|
||||
Console.WriteLine($"[KnowledgeService] Inner Error: {ex.InnerException.Message}");
|
||||
|
||||
return Result.Fail(new Error("Failed to extract knowledge from AI").CausedBy(ex));
|
||||
}
|
||||
}
|
||||
@@ -160,12 +144,10 @@ public class KnowledgeService : IKnowledgeService
|
||||
Console.WriteLine("[KnowledgeService] Clearing SemanticKnowledgeCache...");
|
||||
_dbContext.SemanticKnowledgeCache.RemoveRange(_dbContext.SemanticKnowledgeCache);
|
||||
await _dbContext.SaveChangesAsync(cancellationToken);
|
||||
Console.WriteLine("[KnowledgeService] Cache cleared successfully.");
|
||||
return Result.Ok();
|
||||
}
|
||||
catch (Exception ex)
|
||||
{
|
||||
Console.WriteLine($"[KnowledgeService] Error clearing cache: {ex.Message}");
|
||||
return Result.Fail($"Failed to clear cache: {ex.Message}");
|
||||
}
|
||||
}
|
||||
|
||||
@@ -10,4 +10,15 @@ public static class PromptRegistry
|
||||
"\"quizzes\": [ { \"question\": \"string\", \"options\": [ \"string\" ], \"correct_index\": 0 } ], " +
|
||||
"\"graph\": { \"nodes\": [ { \"id\": \"string\", \"label\": \"string\", \"group\": \"concept\" } ], \"links\": [ { \"source\": \"string\", \"target\": \"string\", \"value\": 1 } ] } " +
|
||||
"}.";
|
||||
|
||||
public const string GraphExtractionPrompt =
|
||||
"You are an expert at information architecture. Extract key concepts and their relationships from the text to build a knowledge graph. " +
|
||||
"CRITICAL: Each paragraph in the user text starts with [ID: some-id]. You MUST use these exact IDs as the 'id' for the nodes representing those blocks. " +
|
||||
"Include a 'current' node representing the block content itself if applicable. " +
|
||||
"CRITICAL: Limit the result to a MAXIMUM of 15 most relevant connections. " +
|
||||
"Return ONLY minified JSON. Schema: { \"graph\": { \"nodes\": [ { \"id\": \"string\", \"label\": \"string\", \"group\": \"concept|current\" } ], \"links\": [ { \"source\": \"string\", \"target\": \"string\", \"value\": 1 } ] } }";
|
||||
|
||||
public const string SummaryAndQuizPrompt =
|
||||
"You are an expert educator. Provide a concise summary of the text and generate a challenging quiz (3-5 questions). " +
|
||||
"Return ONLY minified JSON. Schema: { \"summary\": \"string\", \"quizzes\": [ { \"question\": \"string\", \"options\": [ \"string\" ], \"correct_index\": 0 } ] }";
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user