feat: implement background ebook indexing with progress tracking and real-time UI updates
This commit is contained in:
@@ -4,9 +4,10 @@ public static class PromptRegistry
|
||||
{
|
||||
public const string KnowledgeExtractionSystemPrompt =
|
||||
"You are an expert educator. Analyze the provided text to extract key concepts, generate relevant quizzes, and construct a knowledge graph. " +
|
||||
"CRITICAL: Restrict 'concept.label' to a maximum of 3 words (e.g., 'Dependency Injection' instead of full sentences). " +
|
||||
"**LANGUAGE CRITICAL**: Detect the language of the provided text. You MUST generate all human-readable fields ('title', 'description', 'question', 'options', 'label') in the EXACT SAME LANGUAGE as the source text. Do NOT translate them to English unless the source text is in English. " +
|
||||
"CRITICAL: Restrict 'concept.label' to a maximum of 3 words (e.g., 'Dependency Injection' or its exact foreign equivalent, never full sentences). " +
|
||||
"CRITICAL: Extract a MAXIMUM of 15 key concepts/plot points from the text. " +
|
||||
"CRITICAL: Code blocks (e.g., markdown code snippets) must be excluded from the relationship graph, or summarized as a single node (e.g., 'Code Example'). Do NOT create nodes for variables, functions, namespaces, or individual lines of code. " +
|
||||
"CRITICAL: Code blocks (e.g., markdown code snippets) must be excluded from the relationship graph, or summarized as a single node with the label 'Code Example' translated to the detected language. Do NOT create nodes for variables, functions, namespaces, or individual lines of code. " +
|
||||
"CRITICAL: Return ONLY a minified JSON object. Do NOT include markdown formatting like ```json or ```. Do NOT include explanations. " +
|
||||
"Schema: { " +
|
||||
"\"concepts\": [ { \"title\": \"string\", \"description\": \"string\" } ], " +
|
||||
@@ -15,26 +16,31 @@ public static class PromptRegistry
|
||||
"}.";
|
||||
|
||||
public const string GraphExtractionPrompt =
|
||||
"You are an expert at information architecture. Extract key concepts and paragraph mappings from the text to build a unified knowledge graph. " +
|
||||
"You are an expert at information architecture. Extract a highly strategic, clean, and educational knowledge graph from the provided technical text to act as a clear structural roadmap, avoiding clutter or hyper-connected noise hubs. " +
|
||||
"**LANGUAGE CRITICAL**: Detect the language of the provided text. The 'label' and 'description' fields MUST be generated in the EXACT SAME LANGUAGE as the source text. Do NOT translate them to English. " +
|
||||
"The input text consists of several paragraphs, each starting with its unique block ID in the format '[ID: seg-X]'. " +
|
||||
"Extract two types of nodes: " +
|
||||
"1. Concept Nodes (group: 'concept'): Extract the main technical concepts discussed (e.g., ID: 'dependency-injection', label: 'Dependency Injection'). Max 10 concepts. Labels must be at most 3 words. " +
|
||||
"2. Block Nodes (group: 'current'): For each paragraph in the input, create a node representing that paragraph where 'id' is the exact block ID (e.g., 'seg-1'), and 'label' is a brief summary of that paragraph's content (max 3 words). " +
|
||||
"CRITICAL: If a paragraph is a code block, represent it as a single block node with label 'Code Example' (group: 'current'). Do NOT extract low-level code elements (like variables, classes, methods, or namespaces) as separate concept nodes. " +
|
||||
"CRITICAL: Connect related concept nodes together, and connect each concept node to the block nodes ('seg-X') where it is discussed. " +
|
||||
"Limit connections to a MAXIMUM of 15 most relevant links. " +
|
||||
"Return ONLY minified JSON. Schema: { \"graph\": { \"nodes\": [ { \"id\": \"string\", \"label\": \"string\", \"group\": \"concept|current\" } ], \"links\": [ { \"source\": \"string\", \"target\": \"string\", \"value\": 1 } ] } }";
|
||||
|
||||
"Extract three distinct types of nodes based on strict hierarchical validation: " +
|
||||
"1. Concept Nodes (group: 'concept'): Extract major global architectural pillars discussed. Max 6 per segment. Labels must be 1-3 words max. " +
|
||||
"2. Bridge Nodes (group: 'bridge'): If the text directly compares a legacy paradigm (e.g., Desktop/WPF) to a modern framework alternative (.NET 10/Blazor), extract them as paired concepts to visually bridge the structural evolution. " +
|
||||
"3. Block Nodes (group: 'current'): Create a node ONLY for significant structural landmarks in the text (e.g., major headings). Do NOT connect every concept to every individual paragraph wrapper. Connect concepts only to the main section block where they are anchored. " +
|
||||
"CRITICAL NOISE SUPPRESSION: Absolutely forbid creating separate nodes for individual configuration files, files names, simple classes, servers, or methods (e.g., 'appsettings.json', 'Kestrel', 'Thread.Sleep', 'OnInitializedAsync'). These low-level details MUST be collapsed and described only within the 'description' field of their parent concept node. " +
|
||||
"CRITICAL: Code blocks must be completely ignored as separate nodes; represent them only as contextual attributes within descriptions. " +
|
||||
"Limit topology connections to a MAXIMUM of 10 highly relevant links total per segment. " +
|
||||
"System keys configuration: 'group' must be strictly 'concept', 'bridge', 'current', 'rule', 'definition', 'table', or 'section'. " +
|
||||
"Return ONLY minified JSON. Schema: { \"graph\": { \"nodes\": [ { \"id\": \"string\", \"label\": \"string\", \"group\": \"concept|bridge|current\", \"description\": \"string\" } ], \"links\": [ { \"source\": \"string\", \"target\": \"string\", \"type\": \"maps_to|contains|relates_to\" } ] } }";
|
||||
|
||||
public const string SummaryAndQuizPrompt =
|
||||
"You are an expert educator. Provide a concise summary of the text and generate a challenging quiz (3-5 questions). " +
|
||||
"**LANGUAGE CRITICAL**: Detect the language of the provided text. The generated 'summary', 'question', and 'options' MUST be in the EXACT SAME LANGUAGE as the source text. " +
|
||||
"Return ONLY minified JSON. Schema: { \"summary\": \"string\", \"quizzes\": [ { \"question\": \"string\", \"options\": [ \"string\" ], \"correct_index\": 0 } ] }";
|
||||
|
||||
public const string KM_ExtractionPrompt =
|
||||
"You are an expert at Knowledge Engineering. Segment the provided text into discrete Knowledge Units. " +
|
||||
"**LANGUAGE CRITICAL**: Detect the language of the provided text. The 'content' field MUST be in the EXACT SAME LANGUAGE as the source text. " +
|
||||
"Identify 'units' (sections, tables, definitions, rules) and 'links' (how they relate). " +
|
||||
"CRITICAL: Units must be granular. " +
|
||||
"CRITICAL: Code blocks must be summarized under the parent unit or represented as a single 'Code Example' unit. Do NOT segment code blocks into granular low-level code details (e.g., classes, variables, parameters). " +
|
||||
"CRITICAL: Code blocks must be summarized under the parent unit or represented as a single 'Code Example' unit (translate the name to the detected language). Do NOT segment code blocks into granular low-level code details (e.g., classes, variables, parameters). " +
|
||||
"CRITICAL SYSTEM VALUES: The fields 'type' (strictly: 'Section', 'Table', 'Definition', or 'Rule') and 'relation' (strictly: 'Next', 'Defines', 'Contains', or 'References') are system keys and MUST remain in English as specified. " +
|
||||
"Schema: { " +
|
||||
"\"units\": [ { \"id\": \"string\", \"type\": \"Section|Table|Definition|Rule\", \"content\": \"string\", \"metadata\": { \"page\": 0 } } ], " +
|
||||
"\"links\": [ { \"source\": \"string\", \"target\": \"string\", \"relation\": \"Next|Defines|Contains|References\" } ] " +
|
||||
|
||||
Reference in New Issue
Block a user