Files
gen_game-a0/src/core.py
vuongps38770 7c41ddaa82 init
2026-01-13 09:33:10 +07:00

928 lines
31 KiB
Python

"""
core.py - Simple Game Generator Core
3 USE CASES:
1. run_multi() - Analyze + generate nhiều games phù hợp
2. run_single() - Analyze + generate 1 game tốt nhất (1 API call)
3. generate() - Generate 1 game cụ thể (không analyze)
"""
import os
import json
import time
from typing import List, Dict, Any, Optional, Union
from dataclasses import dataclass
from langchain_core.prompts import ChatPromptTemplate
from .llm_config import ModelConfig, get_llm, get_default_config
from .game_registry import get_registry, get_game, get_analyzer_context
from .validator import QuoteValidator
from .logger import logger
@dataclass
class TokenUsage:
prompt_tokens: int = 0
completion_tokens: int = 0
total_tokens: int = 0
input_chars: int = 0 # Character count sent to LLM
output_chars: int = 0 # Character count received from LLM
def add(self, usage: Dict[str, int]):
self.prompt_tokens += usage.get("prompt_tokens", 0) or usage.get(
"input_tokens", 0
)
self.completion_tokens += usage.get("completion_tokens", 0) or usage.get(
"output_tokens", 0
)
self.total_tokens = self.prompt_tokens + self.completion_tokens
def add_chars(self, input_text: str, output_text: str):
"""Track character counts for LLM input/output"""
self.input_chars += len(input_text) if input_text else 0
self.output_chars += len(output_text) if output_text else 0
def to_dict(self) -> Dict[str, int]:
return {
"prompt_tokens": self.prompt_tokens,
"completion_tokens": self.completion_tokens,
"total_tokens": self.total_tokens,
"input_chars": self.input_chars,
"output_chars": self.output_chars,
}
class GameCore:
"""
Simple Game Generator.
Usage:
core = GameCore()
# 1. Generate nhiều games (analyze first)
result = core.run_multi(text)
# 2. Generate 1 game tốt nhất (1 API call)
result = core.run_single(text)
# 3. Generate 1 game cụ thể
result = core.generate("quiz", text)
"""
def __init__(self, llm_config: Optional[Union[ModelConfig, Dict, str]] = None):
self.llm_config = self._parse_config(llm_config)
self.llm = get_llm(self.llm_config)
self.validator = QuoteValidator()
self.registry = get_registry()
print(f"🤖 LLM: {self.llm_config.provider}/{self.llm_config.model_name}")
def _parse_config(self, config) -> ModelConfig:
if config is None:
if os.getenv("GOOGLE_API_KEY"):
return get_default_config("gemini")
elif os.getenv("OPENAI_API_KEY"):
return get_default_config("openai")
return get_default_config("ollama")
if isinstance(config, ModelConfig):
return config
if isinstance(config, str):
return get_default_config(config)
if isinstance(config, dict):
return ModelConfig(**config)
raise ValueError(f"Invalid config: {type(config)}")
# ============== 1. RUN MULTI (Analyze + Generate nhiều games) ==============
def run_multi(
self,
text: str,
enabled_games: Optional[List[str]] = None,
max_items: int = 3,
min_score: int = 20,
validate: bool = True,
debug: bool = False,
) -> Dict[str, Any]:
"""
Analyze text + Generate nhiều games phù hợp.
Returns: {success, games, results, errors, token_usage, llm}
"""
tracker = TokenUsage()
errors = []
# 1. Analyze (also returns metadata)
available = enabled_games or self.registry.get_game_types()
logger.info(f"Analyzing text for multi-gen. Available games: {available}")
games, scores, metadata, err = self._analyze(
text, available, min_score, tracker, debug
)
errors.extend(err)
if not games:
logger.warning("Analyzer found no suitable games matches.")
return self._result(False, [], {}, errors, tracker, metadata=metadata)
logger.info(f"Analyzer selected: {games}")
# 2. Generate
results, err = self._generate_multi(games, text, max_items, tracker, debug)
errors.extend(err)
# 3. Validate
if validate:
results = self._validate(results, text)
# Check if any game has items
has_items = any(
data.get("items", []) for data in results.values() if isinstance(data, dict)
)
return self._result(
has_items, games, results, errors, tracker, scores, metadata
)
# ============== 1.5. RUN FAST (1 API call: Analyze + Generate ALL suitable games) ==============
def run_fast(
self,
text: str,
enabled_games: Optional[List[str]] = None,
max_items: int = 3,
min_score: int = 50,
validate: bool = True,
debug: bool = False,
) -> Dict[str, Any]:
"""
OPTIMIZED: 1 API call để analyze + generate TẤT CẢ games phù hợp.
Output format GIỐNG HỆT run_multi():
- 1 call duy nhất thay vì 2 (analyze + generate)
- Trả về nhiều games với items
Returns: {success, games, game_scores, metadata, results, token_usage, llm}
"""
tracker = TokenUsage()
available = enabled_games or self.registry.get_game_types()
logger.info(f"[run_fast] Starting with games: {available}")
# Build FULL game schemas (giống _generate_multi)
games_schema = []
for gt in available:
game = get_game(gt)
if game:
games_schema.append(f"""### {gt.upper()}
{game.generated_system_prompt}
REQUIRED OUTPUT FORMAT:
{game.format_instructions}""")
# Format cho response
fmt = ", ".join(
[
f'"{gt}": {{"score": 0-100, "reason": "...", "items": [...]}}'
for gt in available
]
)
prompt = ChatPromptTemplate.from_messages(
[
(
"system",
"""You are an educational game analyzer AND generator. In ONE response:
1. SCORE each game type (0-100) based on how well the text matches game requirements
2. GENERATE items for games with score >= {min_score}
SCORING GUIDE:
- 70-100: Text matches game requirements well → GENERATE items
- 40-69: Partial match → GENERATE items if >= min_score
- 0-39: Does not match → DO NOT generate items
GENERATION RULES:
- KEEP original language from text
- original_quote = EXACT copy from source text
- ALL content must come from source text only
- Include ALL required fields (image_description, image_keywords, etc.)
- Generate max {max_items} items per game
- STRICTLY FOLLOW each game's GENERATION RULES defined in their schema below""",
),
(
"human",
"""GAMES AND THEIR SCHEMAS:
{schemas}
SOURCE TEXT:
{text}
RESPOND with this EXACT JSON structure:
{{
"metadata": {{
"title": "short title from source",
"description": "one sentence summary",
"grade": 1-5,
"difficulty": 1-5
}},
{format}
}}""",
),
]
)
if debug:
print(f"\n{'=' * 50}\n✨ RUN FAST (1 call)\n{'=' * 50}")
print(f"Text length: {len(text)}")
try:
# Build input for tracking
invoke_params = {
"schemas": "\n\n".join(games_schema),
"text": text,
"format": fmt,
"min_score": min_score,
"max_items": max_items,
}
resp = (prompt | self.llm).invoke(invoke_params)
tracker.add(self._get_usage(resp))
# Track character counts: input = all params joined, output = response content
input_text = " ".join(str(v) for v in invoke_params.values())
tracker.add_chars(input_text, resp.content)
if debug:
print(f"📝 Response: {resp.content[:500]}...")
data = self._parse_json(resp.content)
metadata = data.get("metadata", {})
# Process results - format giống _generate_multi
results = {}
scores = []
selected_games = []
errors = []
for gt in available:
game_data = data.get(gt, {})
if not isinstance(game_data, dict):
continue
score = game_data.get("score", 0)
reason = game_data.get("reason", "")
items = game_data.get("items", [])
scores.append({"type": gt, "score": score, "reason": reason})
if score >= min_score and items:
# Post-process items (giống _generate_multi)
processed_items = self._post_process(items, gt)
# Validate if needed
if validate:
processed_items = [
i
for i in processed_items
if self.validator.validate_quote(
i.get("original_quote", ""), text
).is_valid
]
# Thống nhất structure: {items: [...], metadata: {...}} - giống run_multi
results[gt] = {
"items": processed_items,
"metadata": game_data.get("metadata"),
}
if processed_items:
selected_games.append(gt)
else:
errors.append(f"No valid items for {gt}")
elif score >= min_score:
errors.append(f"No items generated for {gt}")
# Sort scores
scores.sort(key=lambda x: x.get("score", 0), reverse=True)
return self._result(
success=len(selected_games) > 0,
games=selected_games,
results=results,
errors=errors,
tracker=tracker,
scores=scores,
metadata=metadata,
)
except Exception as e:
logger.error(f"[run_fast] Error: {e}")
return self._result(False, [], {}, [str(e)], tracker)
# ============== 2. RUN SINGLE (1 API call: Analyze + Generate 1 game) ==============
def run_single(
self,
text: str,
enabled_games: Optional[List[str]] = None,
max_items: int = 3,
validate: bool = True,
debug: bool = False,
) -> Dict[str, Any]:
"""
1 API call: Analyze + Generate game tốt nhất.
Returns: {success, game_type, reason, items, errors, token_usage, llm}
"""
tracker = TokenUsage()
available = enabled_games or self.registry.get_game_types()
logger.info(f"Starting run_single for available games: {available}")
# Build games info
games_info = []
for gt in available:
game = get_game(gt)
if game:
example = (
json.dumps(
game.examples[0].get("output", {}), ensure_ascii=False, indent=2
)
if game.examples
else "{}"
)
games_info.append(
f"### {gt}\n{game.description}\nExample output:\n{example}"
)
prompt = ChatPromptTemplate.from_messages(
[
(
"system",
"""You are an educational game generator.
1. ANALYZE text and CHOOSE the BEST game type
2. GENERATE items for that game
RULES:
- KEEP original language
- original_quote = EXACT copy from source
- ALL content from source only""",
),
(
"human",
"""GAMES:
{games_info}
TEXT:
{text}
Choose BEST game from: {types}
Generate max {max_items} items.
Return JSON:
{{"game_type": "chosen", "reason": "why", "items": [...]}}""",
),
]
)
content = {
"games_info": "\n\n".join(games_info),
"text": text[:2000],
"types": ", ".join(available),
"max_items": max_items,
}
if debug:
print(f"\n{'=' * 50}\n🎯 RUN SINGLE\n{'=' * 50}")
try:
resp = (prompt | self.llm).invoke(content)
tracker.add(self._get_usage(resp))
# Track character counts
input_text = " ".join(str(v) for v in content.values())
tracker.add_chars(input_text, resp.content)
data = self._parse_json(resp.content)
game_type = data.get("game_type")
items = self._post_process(data.get("items", []), game_type)
if validate and items:
items = [
i
for i in items
if self.validator.validate_quote(
i.get("original_quote", ""), text
).is_valid
]
return {
"success": len(items) > 0,
"game_type": game_type,
"reason": data.get("reason", ""),
"items": items,
"errors": [],
"token_usage": tracker.to_dict(),
"llm": f"{self.llm_config.provider}/{self.llm_config.model_name}",
}
except Exception as e:
return {
"success": False,
"game_type": None,
"items": [],
"errors": [str(e)],
"token_usage": tracker.to_dict(),
"llm": f"{self.llm_config.provider}/{self.llm_config.model_name}",
}
# ============== 3. GENERATE (1 game cụ thể, không analyze) ==============
def generate(
self,
game_type: str,
text: str,
max_items: int = 3,
validate: bool = True,
debug: bool = False,
) -> Dict[str, Any]:
"""Generate 1 game cụ thể"""
tracker = TokenUsage()
logger.info(f"Generating single game content: {game_type}")
game = get_game(game_type)
if not game:
return {
"success": False,
"game_type": game_type,
"items": [],
"errors": [f"Game not found: {game_type}"],
"token_usage": {},
"llm": "",
}
# Build Format Rules Section
format_rules_section = ""
if game.input_format_rules:
rules_str = "\n".join(f"- {r}" for r in game.input_format_rules)
format_rules_section = f"""
CRITICAL: FIRST, VALIDATE THE INPUT TEXT.
Format Rules:
{rules_str}
If the text is completely UNSUITABLE for this game type, you MUST output strictly this JSON and nothing else:
{{{{ "format_error": "Input text incompatible with game requirements." }}}}
"""
prompt = ChatPromptTemplate.from_messages(
[
(
"system",
f"""{game.generated_system_prompt}
{format_rules_section}""",
),
(
"human",
"""TEXT TO PROCESS:
{text}
Generate content in JSON format:
{format_instructions}""",
),
]
)
if debug:
print(f"\n{'=' * 50}\n🎮 GENERATE: {game_type}\n{'=' * 50}")
try:
invoke_params = {
"text": text,
"format_instructions": game.format_instructions,
}
resp = (prompt | self.llm).invoke(invoke_params)
tracker.add(self._get_usage(resp))
# Track character counts
input_text = " ".join(str(v) for v in invoke_params.values())
tracker.add_chars(input_text, resp.content)
# 1. Parse as raw JSON first to check for format_error
raw_data = None
try:
raw_data = self._parse_json(resp.content)
except:
pass
# 2. Check if it's a format_error immediately
if raw_data and raw_data.get("format_error"):
return {
"success": False,
"game_type": game_type,
"data": None,
"format_error": raw_data["format_error"],
"errors": [raw_data["format_error"]],
"token_usage": tracker.to_dict(),
"llm": f"{self.llm_config.provider}/{self.llm_config.model_name}",
}
parsed_data = raw_data
# 3. Try output_parser for structured validation if present
if game.output_parser:
try:
parsed = game.output_parser.parse(resp.content)
parsed_data = parsed.model_dump()
except Exception as pe:
if debug:
print(f"⚠️ output_parser failed: {pe}")
# Keep raw_data if parser fails but we have JSON
# Check format error
if parsed_data and parsed_data.get("format_error"):
return {
"success": False,
"game_type": game_type,
"data": None,
"format_error": parsed_data["format_error"],
"errors": [parsed_data["format_error"]],
"token_usage": tracker.to_dict(),
"llm": f"{self.llm_config.provider}/{self.llm_config.model_name}",
}
# Post-process
items = parsed_data.get("items", []) if parsed_data else []
items = self._post_process(items, game_type)
if validate and items:
items = [
i
for i in items
if self.validator.validate_quote(
i.get("original_quote", ""), text
).is_valid
]
if not items:
return {
"success": False,
"game_type": game_type,
"data": None,
"format_error": "No items extracted",
"errors": [],
"token_usage": tracker.to_dict(),
"llm": f"{self.llm_config.provider}/{self.llm_config.model_name}",
}
if parsed_data:
parsed_data["items"] = items
return {
"success": True,
"game_type": game_type,
"data": parsed_data,
"errors": [],
"token_usage": tracker.to_dict(),
"llm": f"{self.llm_config.provider}/{self.llm_config.model_name}",
}
except Exception as e:
return {
"success": False,
"game_type": game_type,
"data": None,
"errors": [str(e)],
"token_usage": tracker.to_dict(),
"llm": f"{self.llm_config.provider}/{self.llm_config.model_name}",
}
# ============== PRIVATE METHODS ==============
def _analyze(
self,
text: str,
available: List[str],
min_score: int,
tracker: TokenUsage,
debug: bool,
) -> tuple:
"""Analyze text để suggest games - với retry"""
# Lấy context từ game configs
context = get_analyzer_context()
prompt = ChatPromptTemplate.from_messages(
[
(
"system",
"""You are a game type analyzer. Score each game 0-100 based on how well the text matches the game requirements.
GAME REQUIREMENTS:
{context}
SCORING:
- 70-100: Text matches game requirements well
- 40-69: Partial match
- 0-39: Does not match requirements
IMPORTANT: You MUST use the exact game type name (e.g. 'quiz', 'sequence') in the "type" field.
Return valid JSON with scores AND metadata about the content:
{{
"scores": [
{{
"type": "NAME_OF_GAME_TYPE",
"score": 80,
"reason": "..."
}}
],
"metadata": {{
"title": "Title from source or create short title",
"description": "One sentence summary",
"grade": 1-5,
"difficulty": 1-5
}}
}}""",
),
(
"human",
"""TEXT TO ANALYZE:
{text}
Analyze for games: {types}
Return JSON:""",
),
]
)
max_retries = 2
for attempt in range(max_retries):
try:
invoke_params = {
"context": context,
"text": text,
"types": ", ".join(available),
}
resp = (prompt | self.llm).invoke(invoke_params)
tracker.add(self._get_usage(resp))
# Track character counts
input_text = " ".join(str(v) for v in invoke_params.values())
tracker.add_chars(input_text, resp.content)
if debug:
print(f"📝 Analyzer raw: {resp.content}")
# Parse JSON với fallback
content = resp.content.strip()
if not content:
if debug:
print(f"⚠️ Empty response, retry {attempt + 1}")
continue
data = self._parse_json(content)
scores = [
s
for s in data.get("scores", [])
if s.get("type") in available and s.get("score", 0) >= min_score
]
scores.sort(key=lambda x: x.get("score", 0), reverse=True)
# Extract metadata from response
metadata = data.get("metadata", {})
if debug:
print(f"🔍 Scores: {scores}")
print(f"📋 Metadata: {metadata}")
return [s["type"] for s in scores], scores, metadata, []
except Exception as e:
if debug:
print(f"⚠️ Analyze attempt {attempt + 1} failed: {e}")
if attempt == max_retries - 1:
# Final fallback: return all games với low score
return available, [], {}, [f"Analyze error: {e}"]
return available, [], {}, ["Analyze failed after retries"]
def _generate_multi(
self,
games: List[str],
text: str,
max_items: int,
tracker: TokenUsage,
debug: bool,
) -> tuple:
"""Generate nhiều games"""
if len(games) == 1:
result = self.generate(
games[0], text, max_items, validate=False, debug=debug
)
tracker.add(result.get("token_usage", {}))
# Fix: generate returns {data: {items: [...]}} not {items: [...]}
data = result.get("data") or {}
items = data.get("items", []) if isinstance(data, dict) else []
return {
games[0]: {"items": items, "metadata": data.get("metadata")}
}, result.get("errors", [])
# Multi-game: Build schema info for each game
games_schema = []
for gt in games:
game = get_game(gt)
if game:
games_schema.append(f"""### {gt.upper()}
{game.generated_system_prompt}
REQUIRED OUTPUT FORMAT:
{game.format_instructions}""")
prompt = ChatPromptTemplate.from_messages(
[
(
"system",
"""You are a multi-game content generator. In ONE response:
1. Generate items for EACH game type following their EXACT schema
GENERATION RULES:
- KEEP original language from text
- original_quote = EXACT copy from source text
- ALL content must come from source text only
- Include ALL required fields (image_description, image_keywords, etc.)
- STRICTLY FOLLOW each game's GENERATION RULES defined in their schema below""",
),
(
"human",
"""GAMES AND THEIR SCHEMAS:
{schemas}
SOURCE TEXT:
{text}
Generate items for: {types}
Return valid JSON: {{{format}}}""",
),
]
)
fmt = ", ".join(
[f'"{gt}": {{"items": [...], "metadata": {{...}}}}' for gt in games]
)
try:
invoke_params = {
"schemas": "\n\n".join(games_schema),
"text": text,
"types": ", ".join(games),
"format": fmt,
}
resp = (prompt | self.llm).invoke(invoke_params)
tracker.add(self._get_usage(resp))
# Track character counts
input_text = " ".join(str(v) for v in invoke_params.values())
tracker.add_chars(input_text, resp.content)
data = self._parse_json(resp.content)
results = {}
errors = []
for gt in games:
game_data = data.get(gt, {}) if isinstance(data.get(gt), dict) else {}
items = game_data.get("items", [])
items = self._post_process(items, gt)
# Thống nhất structure: {items: [...], metadata: {...}}
results[gt] = {"items": items, "metadata": game_data.get("metadata")}
if not items:
errors.append(f"No items for {gt}")
return results, errors
except Exception as e:
return {gt: {"items": [], "metadata": None} for gt in games}, [
f"Generate error: {e}"
]
def _validate(self, results: Dict[str, dict], text: str) -> Dict[str, dict]:
"""Validate items trong results"""
validated = {}
for gt, data in results.items():
items = data.get("items", []) if isinstance(data, dict) else []
valid_items = [
i
for i in items
if self.validator.validate_quote(
i.get("original_quote", ""), text
).is_valid
]
validated[gt] = {
"items": valid_items,
"metadata": data.get("metadata") if isinstance(data, dict) else None,
}
return validated
def _post_process(self, items: List, game_type: str) -> List[Dict]:
ms = int(time.time() * 1000)
result = []
for i, item in enumerate(items):
d = (
item
if isinstance(item, dict)
else (item.model_dump() if hasattr(item, "model_dump") else {})
)
d["id"] = f"{game_type[:2].upper()}-{ms}-{i}"
d["game_type"] = game_type
result.append(d)
return result
def _parse_json(self, content: str) -> Dict:
if "```" in content:
content = content.split("```")[1].replace("json", "").strip()
return json.loads(content)
def _get_usage(self, resp) -> Dict:
if hasattr(resp, "response_metadata"):
meta = resp.response_metadata
return meta.get(
"usage", meta.get("usage_metadata", meta.get("token_usage", {}))
)
return getattr(resp, "usage_metadata", {})
def _result(
self,
success: bool,
games: List,
results: Dict,
errors: List,
tracker: TokenUsage,
scores: List = None,
metadata: Dict = None,
) -> Dict:
return {
"success": success,
"games": games,
"game_scores": scores or [],
"metadata": metadata or {},
"results": results,
"errors": errors,
"token_usage": tracker.to_dict(),
"llm": f"{self.llm_config.provider}/{self.llm_config.model_name}",
}
# ============== ASYNC WRAPPERS (for concurrent FastAPI handling) ==============
# These methods run the blocking LLM calls in a thread pool
async def run_fast_async(
self,
text: str,
enabled_games: Optional[List[str]] = None,
max_items: int = 3,
min_score: int = 50,
validate: bool = True,
debug: bool = False,
) -> Dict[str, Any]:
"""Async wrapper for run_fast - runs in thread pool to not block event loop"""
import asyncio
return await asyncio.to_thread(
self.run_fast, text, enabled_games, max_items, min_score, validate, debug
)
async def run_single_async(
self,
text: str,
enabled_games: Optional[List[str]] = None,
max_items: int = 3,
validate: bool = True,
debug: bool = False,
) -> Dict[str, Any]:
"""Async wrapper for run_single - runs in thread pool to not block event loop"""
import asyncio
return await asyncio.to_thread(
self.run_single, text, enabled_games, max_items, validate, debug
)
async def run_multi_async(
self,
text: str,
enabled_games: Optional[List[str]] = None,
max_items: int = 3,
validate: bool = True,
debug: bool = False,
) -> Dict[str, Any]:
"""Async wrapper for run_multi - runs in thread pool to not block event loop"""
import asyncio
return await asyncio.to_thread(
self.run_multi, text, enabled_games, max_items, validate, debug
)
async def generate_async(
self,
text: str,
game_types: Union[List[str], str],
max_items: int = 10,
validate: bool = True,
debug: bool = False,
) -> Dict[str, Any]:
"""Async wrapper for generate - runs in thread pool to not block event loop"""
import asyncio
return await asyncio.to_thread(
self.generate, text, game_types, max_items, validate, debug
)