feat(ai-ux): deduplicate AI queries, handle ServiceUnavailable retries, and optimize reader canvas graph prerendering (#44)

This Pull Request encapsulates all outstanding AI, Blazor InteractiveAuto lifecycle, pgvector, and Firefox authorization/session compatibility fixes.

### Key Accomplishments:
1. **Concurrent Request Deduplication (Option B):** Implemented a thread-safe active task registry in `KnowledgeService` that groups concurrent graph extraction queries for the same content, preventing duplicate AI calls completely.
2. **Resilience Strategy for Downstream Demands:** Extended the `ai-retry` resilience pipeline to automatically intercept and retry on temporary Google API `503 ServiceUnavailable` / `high demand` spikes.
3. **Interactive Graph Generation Guard (Option A):** Prevented server-side prerender-phase graph requests in the reader canvas component.
4. **Firefox Compatibility & Cookie Handler:** Implemented an authentication endpoint and hybrid hidden-form submission flow to solve login, registration, and logout redirections and cookies securely.
5. **Autoscrolling & Graph Exclusions:** Added concept-to-block smooth scrolling, active block badging, and filtered out markdown code blocks from being extracted as nodes.

All unit tests compiled and passed 100% cleanly.

---------

Co-authored-by: Marek Jasiński <jasins.marek@gmail.com>
Reviewed-on: #44
Co-authored-by: Antigravity <antigravity@google.com>
Co-committed-by: Antigravity <antigravity@google.com>
This commit was merged in pull request #44.
This commit is contained in:
2026-05-18 17:53:36 +00:00
committed by Marek Jaisński
parent f808734768
commit 541e9e1fb5
42 changed files with 2351 additions and 155 deletions
@@ -6,7 +6,7 @@ public class AiSettings
public string ApiKey { get; set; } = string.Empty;
public string Model { get; set; } = "gemini-1.5-flash";
public string EmbeddingModel { get; set; } = "text-embedding-004";
public string EmbeddingModel { get; set; } = "gemini-embedding-001";
/// <summary>
/// Maximum number of tokens allowed for input.
@@ -63,7 +63,12 @@ public static class DependencyInjection
builder.AddRetry(new RetryStrategyOptions
{
ShouldHandle = new PredicateBuilder().Handle<Exception>(ex =>
ex.Message.Contains("429") || ex.Message.Contains("Too Many Requests") || ex.Message.Contains("quota")),
ex.Message.Contains("429") ||
ex.Message.Contains("Too Many Requests") ||
ex.Message.Contains("quota") ||
ex.Message.Contains("503") ||
ex.Message.Contains("ServiceUnavailable") ||
ex.Message.Contains("demand")),
BackoffType = DelayBackoffType.Exponential,
UseJitter = true,
MaxRetryAttempts = aiSettings.RetryAttempts,
@@ -80,7 +85,7 @@ public static class DependencyInjection
services.AddEmbeddingGenerator(new GeminiEmbeddingGenerator(new GeminiClientOptions
{
ApiKey = aiSettings.ApiKey,
ModelId = aiSettings.EmbeddingModel ?? "text-embedding-004"
ModelId = aiSettings.EmbeddingModel ?? "gemini-embedding-001"
}));
// Application-layer service implementations
@@ -19,8 +19,9 @@ public class EpubMetadataExtractor : IEpubMetadataExtractor
using var bookRef = await EpubReader.OpenBookAsync(epubStream);
var title = bookRef.Title ?? "Unknown Title";
var author = bookRef.Author ?? "Unknown Author";
var description = bookRef.Description;
byte[]? cover = await bookRef.ReadCoverAsync();
return Result.Ok(new LocalEpubMetadata { Title = title, Author = author, CoverImage = cover });
return Result.Ok(new LocalEpubMetadata { Title = title, Author = author, CoverImage = cover, Description = description });
}
catch (Exception ex)
{
@@ -1,4 +1,5 @@
using System.Text.Json;
using System.Collections.Concurrent;
using FluentResults;
using Microsoft.EntityFrameworkCore;
using Microsoft.Extensions.AI;
@@ -29,7 +30,8 @@ public class KnowledgeService : IKnowledgeService
private readonly AiSettings _settings;
private readonly Tokenizer _tokenizer;
private readonly ILogger<KnowledgeService> _logger;
private const string PromptVersion = "1.0";
private const string PromptVersion = "1.3";
private static readonly ConcurrentDictionary<string, Task<Result<KnowledgePacket>>> _activeRequests = new();
public KnowledgeService(
IChatClient chatClient,
@@ -96,9 +98,27 @@ public class KnowledgeService : IKnowledgeService
}
}
// Deduplicate concurrent active requests for the exact same hash
var requestKey = $"{tenantId}:{hash}:{traceType}";
var task = _activeRequests.GetOrAdd(requestKey, _ =>
ExecuteAiRequestAndCacheAsync(normalizedText, tenantId, systemPrompt, traceType, ebookId, hash));
return await task;
}
private async Task<Result<KnowledgePacket>> ExecuteAiRequestAndCacheAsync(
string normalizedText,
string tenantId,
string systemPrompt,
string traceType,
Guid? ebookId,
string hash)
{
_logger.LogInformation("[KnowledgeService] Cache Miss for {TraceType} ({Hash}). Requesting AI...", traceType, hash);
try
{
using var dbContext = await _dbContextFactory.CreateDbContextAsync();
var options = new ChatOptions
{
Temperature = (float)_settings.Temperature,
@@ -110,7 +130,7 @@ public class KnowledgeService : IKnowledgeService
{
new ChatMessage(ChatRole.System, systemPrompt),
new ChatMessage(ChatRole.User, normalizedText)
}, options, cancellationToken: ct), cancellationToken);
}, options, cancellationToken: ct));
var rawResponse = response.Text?.Trim() ?? string.Empty;
if (string.IsNullOrWhiteSpace(rawResponse)) return Result.Fail("AI returned an empty response.");
@@ -129,16 +149,18 @@ public class KnowledgeService : IKnowledgeService
try
{
var embeddingResponse = await _retryPipeline.ExecuteAsync(async ct =>
await _embeddingGenerator.GenerateAsync(new[] { normalizedText }, cancellationToken: ct), cancellationToken);
await _embeddingGenerator.GenerateAsync(new[] { normalizedText }, new EmbeddingGenerationOptions { Dimensions = 1536 }, cancellationToken: ct));
vector = embeddingResponse.First().Vector.ToArray();
}
catch (Exception ex)
{
_logger.LogWarning(ex, "[KnowledgeService] Embedding generation failed; proceeding without vector.");
// We continue even if embedding fails, as the primary goal was knowledge extraction
}
// 4. Save to Cache
var cached = await dbContext.SemanticKnowledgeCache
.FirstOrDefaultAsync(c => c.ContentHash == hash && c.TenantId == tenantId);
var cacheEntry = new SemanticKnowledgeCache
{
ContentHash = hash,
@@ -161,9 +183,9 @@ public class KnowledgeService : IKnowledgeService
}
// 5. Process structured KnowledgeUnits (Graph Expansion)
await ProcessKnowledgeUnitsAsync(knowledgePacket, tenantId, ebookId, dbContext, cancellationToken);
await ProcessKnowledgeUnitsAsync(knowledgePacket, tenantId, ebookId, dbContext, default);
await dbContext.SaveChangesAsync(cancellationToken);
await dbContext.SaveChangesAsync();
return Result.Ok(knowledgePacket);
}
catch (JsonException ex)
@@ -176,8 +198,14 @@ public class KnowledgeService : IKnowledgeService
{
return Result.Fail(new Error("Failed to extract knowledge from AI").CausedBy(ex));
}
finally
{
var requestKey = $"{tenantId}:{hash}:{traceType}";
_activeRequests.TryRemove(requestKey, out _);
}
}
private async Task ProcessKnowledgeUnitsAsync(KnowledgePacket packet, string tenantId, Guid? ebookId, AppDbContext dbContext, CancellationToken cancellationToken)
{
var unitIds = packet.Units.Select(u => u.Id).ToList();
@@ -217,7 +245,7 @@ public class KnowledgeService : IKnowledgeService
try
{
var emb = await _retryPipeline.ExecuteAsync(async ct =>
await _embeddingGenerator.GenerateAsync(new[] { unit.Content }, cancellationToken: ct), cancellationToken);
await _embeddingGenerator.GenerateAsync(new[] { unit.Content }, new EmbeddingGenerationOptions { Dimensions = 768 }, cancellationToken: ct), cancellationToken);
unit.Vector = new Vector(emb.First().Vector.ToArray());
}
catch { /* Ignore embedding errors for now */ }
@@ -6,6 +6,7 @@ public static class PromptRegistry
"You are an expert educator. Analyze the provided text to extract key concepts, generate relevant quizzes, and construct a knowledge graph. " +
"CRITICAL: Restrict 'concept.label' to a maximum of 3 words (e.g., 'Dependency Injection' instead of full sentences). " +
"CRITICAL: Extract a MAXIMUM of 15 key concepts/plot points from the text. " +
"CRITICAL: Code blocks (e.g., markdown code snippets) must be excluded from the relationship graph, or summarized as a single node (e.g., 'Code Example'). Do NOT create nodes for variables, functions, namespaces, or individual lines of code. " +
"CRITICAL: Return ONLY a minified JSON object. Do NOT include markdown formatting like ```json or ```. Do NOT include explanations. " +
"Schema: { " +
"\"concepts\": [ { \"title\": \"string\", \"description\": \"string\" } ], " +
@@ -14,13 +15,14 @@ public static class PromptRegistry
"}.";
public const string GraphExtractionPrompt =
"You are an expert at information architecture. Extract key concepts and their relationships from the text to build a knowledge graph. " +
"CRITICAL: Restrict 'label' to a maximum of 3 words. " +
"CRITICAL: Extract a MAXIMUM of 15 key concepts/plot points and their relationships. " +
"CRITICAL: Each paragraph in the user text starts with [ID: some-id]. Use these IDs ONLY for nodes representing the blocks. " +
"CRITICAL: All other extracted 'concept' nodes MUST have unique, slug-style IDs based on their labels (e.g., 'dependency-injection'). " +
"Include a 'current' node representing the block content itself if applicable. " +
"CRITICAL: Limit the result to a MAXIMUM of 15 most relevant connections. " +
"You are an expert at information architecture. Extract key concepts and paragraph mappings from the text to build a unified knowledge graph. " +
"The input text consists of several paragraphs, each starting with its unique block ID in the format '[ID: seg-X]'. " +
"Extract two types of nodes: " +
"1. Concept Nodes (group: 'concept'): Extract the main technical concepts discussed (e.g., ID: 'dependency-injection', label: 'Dependency Injection'). Max 10 concepts. Labels must be at most 3 words. " +
"2. Block Nodes (group: 'current'): For each paragraph in the input, create a node representing that paragraph where 'id' is the exact block ID (e.g., 'seg-1'), and 'label' is a brief summary of that paragraph's content (max 3 words). " +
"CRITICAL: If a paragraph is a code block, represent it as a single block node with label 'Code Example' (group: 'current'). Do NOT extract low-level code elements (like variables, classes, methods, or namespaces) as separate concept nodes. " +
"CRITICAL: Connect related concept nodes together, and connect each concept node to the block nodes ('seg-X') where it is discussed. " +
"Limit connections to a MAXIMUM of 15 most relevant links. " +
"Return ONLY minified JSON. Schema: { \"graph\": { \"nodes\": [ { \"id\": \"string\", \"label\": \"string\", \"group\": \"concept|current\" } ], \"links\": [ { \"source\": \"string\", \"target\": \"string\", \"value\": 1 } ] } }";
@@ -32,6 +34,7 @@ public static class PromptRegistry
"You are an expert at Knowledge Engineering. Segment the provided text into discrete Knowledge Units. " +
"Identify 'units' (sections, tables, definitions, rules) and 'links' (how they relate). " +
"CRITICAL: Units must be granular. " +
"CRITICAL: Code blocks must be summarized under the parent unit or represented as a single 'Code Example' unit. Do NOT segment code blocks into granular low-level code details (e.g., classes, variables, parameters). " +
"Schema: { " +
"\"units\": [ { \"id\": \"string\", \"type\": \"Section|Table|Definition|Rule\", \"content\": \"string\", \"metadata\": { \"page\": 0 } } ], " +
"\"links\": [ { \"source\": \"string\", \"target\": \"string\", \"relation\": \"Next|Defines|Contains|References\" } ] " +