feat: implement Stripe product configuration and add token-based input validation using Microsoft.ML.Tokenizers
This commit is contained in:
@@ -2,6 +2,7 @@ using System.Text.Json;
|
||||
using FluentResults;
|
||||
using Microsoft.EntityFrameworkCore;
|
||||
using Microsoft.Extensions.AI;
|
||||
using Microsoft.ML.Tokenizers;
|
||||
using NexusReader.Application.Abstractions.Services;
|
||||
using NexusReader.Application.DTOs.AI;
|
||||
using NexusReader.Domain.Entities;
|
||||
@@ -20,6 +21,7 @@ public class KnowledgeService : IKnowledgeService
|
||||
private readonly AppDbContext _dbContext;
|
||||
private readonly ResiliencePipeline _retryPipeline;
|
||||
private readonly AiSettings _settings;
|
||||
private readonly Tokenizer _tokenizer;
|
||||
private const string PromptVersion = "1.0";
|
||||
|
||||
public KnowledgeService(
|
||||
@@ -32,6 +34,9 @@ public class KnowledgeService : IKnowledgeService
|
||||
_dbContext = dbContext;
|
||||
_retryPipeline = pipelineProvider.GetPipeline("ai-retry");
|
||||
_settings = settings.Value;
|
||||
// Use Tiktoken (cl100k_base) which is a standard for modern LLMs and provides
|
||||
// a very reliable estimation for token usage in Gemini-based workloads.
|
||||
_tokenizer = TiktokenTokenizer.CreateForModel("gpt-4");
|
||||
}
|
||||
|
||||
public async Task<Result<KnowledgePacket>> GetKnowledgeAsync(string text, CancellationToken cancellationToken = default)
|
||||
@@ -59,10 +64,11 @@ public class KnowledgeService : IKnowledgeService
|
||||
Console.WriteLine($"[KnowledgeService] Starting extraction ({cacheSuffix}) for text sample: {text.Substring(0, Math.Min(text.Length, 50))}...");
|
||||
|
||||
var normalizedText = ContentHasher.Normalize(text);
|
||||
if (normalizedText.Length > _settings.MaxInputLength)
|
||||
|
||||
var tokenCount = EstimateTokenCount(normalizedText);
|
||||
if (tokenCount > _settings.MaxInputTokens)
|
||||
{
|
||||
normalizedText = normalizedText.Substring(0, _settings.MaxInputLength);
|
||||
Console.WriteLine($"[KnowledgeService] WARNING: Input text truncated to {_settings.MaxInputLength} chars.");
|
||||
return Result.Fail($"Input exceeds maximum token limit. Estimated tokens: {tokenCount}, limit: {_settings.MaxInputTokens}.");
|
||||
}
|
||||
|
||||
var hash = ContentHasher.ComputeHash(normalizedText) + "_" + cacheSuffix;
|
||||
@@ -151,4 +157,10 @@ public class KnowledgeService : IKnowledgeService
|
||||
return Result.Fail($"Failed to clear cache: {ex.Message}");
|
||||
}
|
||||
}
|
||||
|
||||
private int EstimateTokenCount(string text)
|
||||
{
|
||||
if (string.IsNullOrEmpty(text)) return 0;
|
||||
return _tokenizer.CountTokens(text);
|
||||
}
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user