feat: implement Stripe product configuration and add token-based input validation using Microsoft.ML.Tokenizers

This commit is contained in:
2026-05-02 10:31:28 +02:00
parent 0ed89ef5a4
commit e5611758f1
7 changed files with 90 additions and 15 deletions
@@ -2,6 +2,7 @@ using System.Text.Json;
using FluentResults;
using Microsoft.EntityFrameworkCore;
using Microsoft.Extensions.AI;
using Microsoft.ML.Tokenizers;
using NexusReader.Application.Abstractions.Services;
using NexusReader.Application.DTOs.AI;
using NexusReader.Domain.Entities;
@@ -20,6 +21,7 @@ public class KnowledgeService : IKnowledgeService
private readonly AppDbContext _dbContext;
private readonly ResiliencePipeline _retryPipeline;
private readonly AiSettings _settings;
private readonly Tokenizer _tokenizer;
private const string PromptVersion = "1.0";
public KnowledgeService(
@@ -32,6 +34,9 @@ public class KnowledgeService : IKnowledgeService
_dbContext = dbContext;
_retryPipeline = pipelineProvider.GetPipeline("ai-retry");
_settings = settings.Value;
// Use Tiktoken (cl100k_base) which is a standard for modern LLMs and provides
// a very reliable estimation for token usage in Gemini-based workloads.
_tokenizer = TiktokenTokenizer.CreateForModel("gpt-4");
}
public async Task<Result<KnowledgePacket>> GetKnowledgeAsync(string text, CancellationToken cancellationToken = default)
@@ -59,10 +64,11 @@ public class KnowledgeService : IKnowledgeService
Console.WriteLine($"[KnowledgeService] Starting extraction ({cacheSuffix}) for text sample: {text.Substring(0, Math.Min(text.Length, 50))}...");
var normalizedText = ContentHasher.Normalize(text);
if (normalizedText.Length > _settings.MaxInputLength)
var tokenCount = EstimateTokenCount(normalizedText);
if (tokenCount > _settings.MaxInputTokens)
{
normalizedText = normalizedText.Substring(0, _settings.MaxInputLength);
Console.WriteLine($"[KnowledgeService] WARNING: Input text truncated to {_settings.MaxInputLength} chars.");
return Result.Fail($"Input exceeds maximum token limit. Estimated tokens: {tokenCount}, limit: {_settings.MaxInputTokens}.");
}
var hash = ContentHasher.ComputeHash(normalizedText) + "_" + cacheSuffix;
@@ -151,4 +157,10 @@ public class KnowledgeService : IKnowledgeService
return Result.Fail($"Failed to clear cache: {ex.Message}");
}
}
private int EstimateTokenCount(string text)
{
if (string.IsNullOrEmpty(text)) return 0;
return _tokenizer.CountTokens(text);
}
}