239 lines
9.5 KiB
C#
239 lines
9.5 KiB
C#
using System.Text;
|
|
using System.Text.RegularExpressions;
|
|
using FluentResults;
|
|
using NexusReader.Application.Abstractions.Services;
|
|
using NexusReader.Application.Queries.Reader;
|
|
using VersOne.Epub;
|
|
using Microsoft.EntityFrameworkCore;
|
|
using NexusReader.Data.Persistence;
|
|
using NexusReader.Domain.Entities;
|
|
|
|
namespace NexusReader.Infrastructure.Services;
|
|
|
|
public class EpubReaderService : IEpubReader
|
|
{
|
|
private readonly IDbContextFactory<AppDbContext> _dbContextFactory;
|
|
private const string EpubPath = "wwwroot/assets/book.epub";
|
|
private const int WordThreshold = 1000;
|
|
|
|
public EpubReaderService(IDbContextFactory<AppDbContext> dbContextFactory)
|
|
{
|
|
_dbContextFactory = dbContextFactory;
|
|
}
|
|
|
|
public async Task<Result<ReaderPageViewModel>> GetEpubContentAsync(int chapterIndex, string? userId = null)
|
|
{
|
|
try
|
|
{
|
|
// Path handling: Recursive search upwards to find the asset in development or production
|
|
var relativePath = Path.Combine("wwwroot", "assets", "book.epub");
|
|
string? fullPath = null;
|
|
var searchPaths = new List<string>();
|
|
|
|
var currentDir = new DirectoryInfo(AppDomain.CurrentDomain.BaseDirectory);
|
|
while (currentDir != null)
|
|
{
|
|
var checkPath1 = Path.Combine(currentDir.FullName, relativePath);
|
|
var checkPath2 = Path.Combine(currentDir.FullName, "src", "NexusReader.Web", relativePath);
|
|
|
|
searchPaths.Add(checkPath1);
|
|
if (File.Exists(checkPath1)) { fullPath = checkPath1; break; }
|
|
|
|
searchPaths.Add(checkPath2);
|
|
if (File.Exists(checkPath2)) { fullPath = checkPath2; break; }
|
|
|
|
currentDir = currentDir.Parent;
|
|
}
|
|
|
|
if (fullPath == null)
|
|
{
|
|
return Result.Fail($"EPUB file not found. Checked {searchPaths.Count} locations, including: {string.Join(", ", searchPaths.Take(3))}");
|
|
}
|
|
|
|
if (!File.Exists(fullPath))
|
|
{
|
|
return Result.Fail($"EPUB file at '{fullPath}' is not accessible or does not exist.");
|
|
}
|
|
|
|
using var bookRef = await EpubReader.OpenBookAsync(fullPath);
|
|
var readingOrder = bookRef.GetReadingOrder();
|
|
|
|
if (readingOrder == null || !readingOrder.Any())
|
|
{
|
|
return Result.Fail("The EPUB has no readable content files in ReadingOrder.");
|
|
}
|
|
|
|
// Ensure index is within bounds
|
|
if (chapterIndex < 0 || chapterIndex >= readingOrder.Count)
|
|
{
|
|
chapterIndex = 0; // Default to first chapter
|
|
}
|
|
|
|
var chapterRef = readingOrder[chapterIndex];
|
|
|
|
// Try to find a better title from navigation (TOC)
|
|
var navigation = bookRef.GetNavigation();
|
|
var chapterTitle = FindTitleInNavigation(navigation, chapterRef.FilePath)
|
|
?? Path.GetFileNameWithoutExtension(chapterRef.FilePath)
|
|
?? $"Chapter {chapterIndex + 1}";
|
|
|
|
var chapterContent = await chapterRef.ReadContentAsTextAsync();
|
|
|
|
var blocks = new List<ContentBlock>();
|
|
int totalWordCount = 0;
|
|
int blockCounter = 0;
|
|
|
|
var paragraphs = ExtractParagraphs(chapterContent);
|
|
foreach (var p in paragraphs)
|
|
{
|
|
var sanitizedContent = SanitizeParagraph(p);
|
|
if (string.IsNullOrWhiteSpace(sanitizedContent)) continue;
|
|
|
|
// Requirement: Each paragraph mapped to its own TextSegmentBlock
|
|
blocks.Add(new TextSegmentBlock($"seg-{blockCounter++}", sanitizedContent));
|
|
|
|
int wordsInP = CountWords(sanitizedContent);
|
|
totalWordCount += wordsInP;
|
|
|
|
// Requirement: Smart Injection after 1000 words
|
|
if (totalWordCount >= WordThreshold)
|
|
{
|
|
blocks.Add(CreateAiTrigger($"trigger-{blockCounter++}"));
|
|
totalWordCount = 0;
|
|
}
|
|
}
|
|
|
|
// End of chapter section trigger
|
|
if (blocks.Any() && blocks.Last() is not AiActionTriggerBlock)
|
|
{
|
|
blocks.Add(CreateAiTrigger($"trigger-{blockCounter++}"));
|
|
}
|
|
|
|
// Find the EbookId from DB for this file AND this user
|
|
using var context = await _dbContextFactory.CreateDbContextAsync();
|
|
var ebook = await context.Ebooks
|
|
.Where(e => e.FilePath.Contains("book.epub") && (userId == null || e.UserId == userId))
|
|
.FirstOrDefaultAsync();
|
|
|
|
// Auto-provision if not found for this user (convenience for dev)
|
|
if (ebook == null && !string.IsNullOrEmpty(userId))
|
|
{
|
|
var author = await context.Authors.FirstOrDefaultAsync() ?? new Author { Name = "Unknown Author" };
|
|
ebook = new Ebook
|
|
{
|
|
Title = "Lives of the Most Excellent Painters, Sculptors, and Architects",
|
|
FilePath = "wwwroot/assets/book.epub",
|
|
UserId = userId,
|
|
Author = author,
|
|
TenantId = "global"
|
|
};
|
|
context.Ebooks.Add(ebook);
|
|
await context.SaveChangesAsync();
|
|
}
|
|
|
|
return Result.Ok(new ReaderPageViewModel(blocks, chapterIndex, readingOrder.Count, chapterTitle, ebook?.Id ?? Guid.Empty));
|
|
}
|
|
catch (Exception ex)
|
|
{
|
|
return Result.Fail(new Error($"Failed to process EPUB: {ex.Message}").CausedBy(ex));
|
|
}
|
|
}
|
|
|
|
private List<string> ExtractParagraphs(string html)
|
|
{
|
|
var bodyMatch = Regex.Match(html, @"<body\b[^>]*>(.*?)</body>", RegexOptions.IgnoreCase | RegexOptions.Singleline);
|
|
var content = bodyMatch.Success ? bodyMatch.Groups[1].Value : html;
|
|
|
|
var paragraphs = new List<string>();
|
|
// Match block-level elements: h1-h6, p, ul, ol, blockquote, pre
|
|
// We match the whole tag to preserve it for sanitization
|
|
var matches = Regex.Matches(content, @"<(p|h[1-6]|ul|ol|blockquote|pre)\b[^>]*>.*?</\1>|<hr\b[^>]*>", RegexOptions.IgnoreCase | RegexOptions.Singleline);
|
|
|
|
foreach (Match match in matches)
|
|
{
|
|
paragraphs.Add(match.Value);
|
|
}
|
|
|
|
// Fallback: split by double newlines if no block tags found
|
|
if (paragraphs.Count == 0)
|
|
{
|
|
paragraphs = content.Split(new[] { "<br />", "<br>", "\n\n", "\r\n\r\n" }, StringSplitOptions.RemoveEmptyEntries).ToList();
|
|
}
|
|
|
|
return paragraphs;
|
|
}
|
|
|
|
private string SanitizeParagraph(string html)
|
|
{
|
|
// 1. Remove <style> and <script> blocks
|
|
var clean = Regex.Replace(html, @"<(style|script)\b[^>]*>.*?</\1>", "", RegexOptions.IgnoreCase | RegexOptions.Singleline);
|
|
|
|
// 2. Remove all tags except allowed structural and formatting tags
|
|
clean = Regex.Replace(clean, @"<(?!/?(b|i|strong|em|h[1-6]|p|ul|ol|li|blockquote|pre|code|br|hr)\b)[^>]+>", "", RegexOptions.IgnoreCase);
|
|
|
|
// 3. Requirement: Aggressively strip attributes (class, style, id) from allowed tags
|
|
clean = Regex.Replace(clean, @"<(b|i|strong|em|h[1-6]|p|ul|ol|li|blockquote|pre|code|br|hr)\b[^>]*>", "<$1>", RegexOptions.IgnoreCase);
|
|
|
|
// 4. Decode HTML entities
|
|
clean = System.Net.WebUtility.HtmlDecode(clean);
|
|
|
|
return clean.Trim();
|
|
}
|
|
|
|
private int CountWords(string text)
|
|
{
|
|
if (string.IsNullOrWhiteSpace(text)) return 0;
|
|
return text.Split(new[] { ' ', '\r', '\n', '\t' }, StringSplitOptions.RemoveEmptyEntries).Length;
|
|
}
|
|
|
|
private AiActionTriggerBlock CreateAiTrigger(string id)
|
|
{
|
|
return new AiActionTriggerBlock(
|
|
id,
|
|
"Wykryto ciekawy fragment! Czy chcesz, abym wygenerował podsumowanie lub quiz z tego rozdziału?",
|
|
new List<string> { "Podsumuj", "Generuj Quiz", "Pomiń" }
|
|
);
|
|
}
|
|
|
|
private string? FindTitleInNavigation(IEnumerable<EpubNavigationItemRef> navigation, string? filePath)
|
|
{
|
|
if (string.IsNullOrEmpty(filePath)) return null;
|
|
|
|
var fileName = Path.GetFileName(filePath);
|
|
|
|
foreach (var item in navigation)
|
|
{
|
|
// Match by full path or just filename as fallback
|
|
if (item.Link?.ContentFilePath == filePath || item.Link?.ContentFilePath == fileName)
|
|
return item.Title;
|
|
|
|
if (item.NestedItems != null && item.NestedItems.Any())
|
|
{
|
|
var childTitle = FindTitleInNavigation(item.NestedItems, filePath);
|
|
if (childTitle != null) return childTitle;
|
|
}
|
|
}
|
|
return null;
|
|
}
|
|
// Metadata extraction moved to EpubMetadataExtractor
|
|
}
|
|
|
|
public class EpubMetadataExtractor : IEpubMetadataExtractor
|
|
{
|
|
public async Task<Result<LocalEpubMetadata>> ExtractMetadataAsync(Stream epubStream)
|
|
{
|
|
try
|
|
{
|
|
using var bookRef = await EpubReader.OpenBookAsync(epubStream);
|
|
var title = bookRef.Title ?? "Unknown Title";
|
|
var author = bookRef.Author ?? "Unknown Author";
|
|
byte[]? cover = await bookRef.ReadCoverAsync();
|
|
return Result.Ok(new LocalEpubMetadata { Title = title, Author = author, CoverImage = cover });
|
|
}
|
|
catch (Exception ex)
|
|
{
|
|
return Result.Fail(new Error($"Failed to extract EPUB metadata locally: {ex.Message}").CausedBy(ex));
|
|
}
|
|
}
|
|
}
|