check point

This commit is contained in:
vuongps38770
2025-12-25 18:06:29 +07:00
commit 31de8b0d84
34 changed files with 3209 additions and 0 deletions

513
src/core.py Normal file
View File

@@ -0,0 +1,513 @@
"""
core.py - Simple Game Generator Core
3 USE CASES:
1. run_multi() - Analyze + generate nhiều games phù hợp
2. run_single() - Analyze + generate 1 game tốt nhất (1 API call)
3. generate() - Generate 1 game cụ thể (không analyze)
"""
import os
import json
import time
from typing import List, Dict, Any, Optional, Union
from dataclasses import dataclass
from langchain_core.prompts import ChatPromptTemplate
from .llm_config import ModelConfig, get_llm, get_default_config
from .game_registry import get_registry, get_game, get_analyzer_context
from .validator import QuoteValidator
from .logger import logger
@dataclass
class TokenUsage:
prompt_tokens: int = 0
completion_tokens: int = 0
total_tokens: int = 0
def add(self, usage: Dict[str, int]):
self.prompt_tokens += usage.get("prompt_tokens", 0) or usage.get("input_tokens", 0)
self.completion_tokens += usage.get("completion_tokens", 0) or usage.get("output_tokens", 0)
self.total_tokens = self.prompt_tokens + self.completion_tokens
def to_dict(self) -> Dict[str, int]:
return {"prompt_tokens": self.prompt_tokens, "completion_tokens": self.completion_tokens, "total_tokens": self.total_tokens}
class GameCore:
"""
Simple Game Generator.
Usage:
core = GameCore()
# 1. Generate nhiều games (analyze first)
result = core.run_multi(text)
# 2. Generate 1 game tốt nhất (1 API call)
result = core.run_single(text)
# 3. Generate 1 game cụ thể
result = core.generate("quiz", text)
"""
def __init__(self, llm_config: Optional[Union[ModelConfig, Dict, str]] = None):
self.llm_config = self._parse_config(llm_config)
self.llm = get_llm(self.llm_config)
self.validator = QuoteValidator()
self.registry = get_registry()
print(f"🤖 LLM: {self.llm_config.provider}/{self.llm_config.model_name}")
def _parse_config(self, config) -> ModelConfig:
if config is None:
if os.getenv("GOOGLE_API_KEY"):
return get_default_config("gemini")
elif os.getenv("OPENAI_API_KEY"):
return get_default_config("openai")
return get_default_config("ollama")
if isinstance(config, ModelConfig):
return config
if isinstance(config, str):
return get_default_config(config)
if isinstance(config, dict):
return ModelConfig(**config)
raise ValueError(f"Invalid config: {type(config)}")
# ============== 1. RUN MULTI (Analyze + Generate nhiều games) ==============
def run_multi(
self,
text: str,
enabled_games: Optional[List[str]] = None,
max_items: int = 3,
min_score: int = 20,
validate: bool = True,
debug: bool = False
) -> Dict[str, Any]:
"""
Analyze text + Generate nhiều games phù hợp.
Returns: {success, games, results, errors, token_usage, llm}
"""
tracker = TokenUsage()
errors = []
# 1. Analyze (also returns metadata)
available = enabled_games or self.registry.get_game_types()
logger.info(f"Analyzing text for multi-gen. Available games: {available}")
games, scores, metadata, err = self._analyze(text, available, min_score, tracker, debug)
errors.extend(err)
if not games:
logger.warning("Analyzer found no suitable games matches.")
return self._result(False, [], {}, errors, tracker, metadata=metadata)
logger.info(f"Analyzer selected: {games}")
# 2. Generate
results, err = self._generate_multi(games, text, max_items, tracker, debug)
errors.extend(err)
# 3. Validate
if validate:
results = self._validate(results, text)
# Check if any game has items
has_items = any(data.get("items", []) for data in results.values() if isinstance(data, dict))
return self._result(has_items, games, results, errors, tracker, scores, metadata)
# ============== 2. RUN SINGLE (1 API call: Analyze + Generate 1 game) ==============
def run_single(
self,
text: str,
enabled_games: Optional[List[str]] = None,
max_items: int = 3,
validate: bool = True,
debug: bool = False
) -> Dict[str, Any]:
"""
1 API call: Analyze + Generate game tốt nhất.
Returns: {success, game_type, reason, items, errors, token_usage, llm}
"""
tracker = TokenUsage()
available = enabled_games or self.registry.get_game_types()
logger.info(f"Starting run_single for available games: {available}")
# Build games info
games_info = []
for gt in available:
game = get_game(gt)
if game:
example = json.dumps(game.examples[0].get('output', {}), ensure_ascii=False, indent=2) if game.examples else "{}"
games_info.append(f"### {gt}\n{game.description}\nExample output:\n{example}")
prompt = ChatPromptTemplate.from_messages([
("system", """You are an educational game generator.
1. ANALYZE text and CHOOSE the BEST game type
2. GENERATE items for that game
RULES:
- KEEP original language
- original_quote = EXACT copy from source
- ALL content from source only"""),
("human", """GAMES:
{games_info}
TEXT:
{text}
Choose BEST game from: {types}
Generate max {max_items} items.
Return JSON:
{{"game_type": "chosen", "reason": "why", "items": [...]}}""")
])
content = {"games_info": "\n\n".join(games_info), "text": text[:2000], "types": ", ".join(available), "max_items": max_items}
if debug:
print(f"\n{'='*50}\n🎯 RUN SINGLE\n{'='*50}")
try:
resp = (prompt | self.llm).invoke(content)
tracker.add(self._get_usage(resp))
data = self._parse_json(resp.content)
game_type = data.get("game_type")
items = self._post_process(data.get("items", []), game_type)
if validate and items:
items = [i for i in items if self.validator.validate_quote(i.get("original_quote", ""), text).is_valid]
return {
"success": len(items) > 0,
"game_type": game_type,
"reason": data.get("reason", ""),
"items": items,
"errors": [],
"token_usage": tracker.to_dict(),
"llm": f"{self.llm_config.provider}/{self.llm_config.model_name}"
}
except Exception as e:
return {"success": False, "game_type": None, "items": [], "errors": [str(e)], "token_usage": tracker.to_dict(), "llm": f"{self.llm_config.provider}/{self.llm_config.model_name}"}
# ============== 3. GENERATE (1 game cụ thể, không analyze) ==============
def generate(
self,
game_type: str,
text: str,
max_items: int = 3,
validate: bool = True,
debug: bool = False
) -> Dict[str, Any]:
"""Generate 1 game cụ thể"""
tracker = TokenUsage()
logger.info(f"Generating single game content: {game_type}")
game = get_game(game_type)
if not game:
return {"success": False, "game_type": game_type, "items": [], "errors": [f"Game not found: {game_type}"], "token_usage": {}, "llm": ""}
# Build Format Rules Section
format_rules_section = ""
if game.input_format_rules:
rules_str = "\n".join(f"- {r}" for r in game.input_format_rules)
format_rules_section = f"""
CRITICAL: FIRST, VALIDATE THE INPUT TEXT.
Format Rules:
{rules_str}
If the text is completely UNSUITABLE for this game type, you MUST output strictly this JSON and nothing else:
{{{{ "format_error": "Input text incompatible with game requirements." }}}}
"""
prompt = ChatPromptTemplate.from_messages([
("system", f"""{game.generated_system_prompt}
{format_rules_section}"""),
("human", """TEXT TO PROCESS:
{text}
Generate content in JSON format:
{format_instructions}""")
])
if debug:
print(f"\n{'='*50}\n🎮 GENERATE: {game_type}\n{'='*50}")
try:
resp = (prompt | self.llm).invoke({
"text": text,
"format_instructions": game.format_instructions
})
tracker.add(self._get_usage(resp))
# 1. Parse as raw JSON first to check for format_error
raw_data = None
try:
raw_data = self._parse_json(resp.content)
except:
pass
# 2. Check if it's a format_error immediately
if raw_data and raw_data.get("format_error"):
return {
"success": False,
"game_type": game_type,
"data": None,
"format_error": raw_data["format_error"],
"errors": [raw_data["format_error"]],
"token_usage": tracker.to_dict(),
"llm": f"{self.llm_config.provider}/{self.llm_config.model_name}"
}
parsed_data = raw_data
# 3. Try output_parser for structured validation if present
if game.output_parser:
try:
parsed = game.output_parser.parse(resp.content)
parsed_data = parsed.model_dump()
except Exception as pe:
if debug: print(f"⚠️ output_parser failed: {pe}")
# Keep raw_data if parser fails but we have JSON
# Check format error
if parsed_data and parsed_data.get("format_error"):
return {
"success": False,
"game_type": game_type,
"data": None,
"format_error": parsed_data["format_error"],
"errors": [parsed_data["format_error"]],
"token_usage": tracker.to_dict(),
"llm": f"{self.llm_config.provider}/{self.llm_config.model_name}"
}
# Post-process
items = parsed_data.get("items", []) if parsed_data else []
items = self._post_process(items, game_type)
if validate and items:
items = [i for i in items if self.validator.validate_quote(i.get("original_quote", ""), text).is_valid]
if not items:
return {
"success": False,
"game_type": game_type,
"data": None,
"format_error": "No items extracted",
"errors": [],
"token_usage": tracker.to_dict(),
"llm": f"{self.llm_config.provider}/{self.llm_config.model_name}"
}
if parsed_data:
parsed_data["items"] = items
return {
"success": True,
"game_type": game_type,
"data": parsed_data,
"errors": [],
"token_usage": tracker.to_dict(),
"llm": f"{self.llm_config.provider}/{self.llm_config.model_name}"
}
except Exception as e:
return {"success": False, "game_type": game_type, "data": None, "errors": [str(e)], "token_usage": tracker.to_dict(), "llm": f"{self.llm_config.provider}/{self.llm_config.model_name}"}
# ============== PRIVATE METHODS ==============
def _analyze(self, text: str, available: List[str], min_score: int, tracker: TokenUsage, debug: bool) -> tuple:
"""Analyze text để suggest games - với retry"""
# Lấy context từ game configs
context = get_analyzer_context()
prompt = ChatPromptTemplate.from_messages([
("system", """You are a game type analyzer. Score each game 0-100 based on how well the text matches the game requirements.
GAME REQUIREMENTS:
{context}
SCORING:
- 70-100: Text matches game requirements well
- 40-69: Partial match
- 0-39: Does not match requirements
IMPORTANT: You MUST use the exact game type name (e.g. 'quiz', 'sequence') in the "type" field.
Return valid JSON with scores AND metadata about the content:
{{
"scores": [
{{
"type": "NAME_OF_GAME_TYPE",
"score": 80,
"reason": "..."
}}
],
"metadata": {{
"title": "Title from source or create short title",
"description": "One sentence summary",
"grade": 1-5,
"difficulty": 1-5
}}
}}"""),
("human", """TEXT TO ANALYZE:
{text}
Analyze for games: {types}
Return JSON:""")
])
max_retries = 2
for attempt in range(max_retries):
try:
resp = (prompt | self.llm).invoke({
"context": context,
"text": text[:800],
"types": ", ".join(available)
})
tracker.add(self._get_usage(resp))
if debug:
print(f"📝 Analyzer raw: {resp.content[:300]}")
# Parse JSON với fallback
content = resp.content.strip()
if not content:
if debug:
print(f"⚠️ Empty response, retry {attempt + 1}")
continue
data = self._parse_json(content)
scores = [s for s in data.get("scores", []) if s.get("type") in available and s.get("score", 0) >= min_score]
scores.sort(key=lambda x: x.get("score", 0), reverse=True)
# Extract metadata from response
metadata = data.get("metadata", {})
if debug:
print(f"🔍 Scores: {scores}")
print(f"📋 Metadata: {metadata}")
return [s["type"] for s in scores], scores, metadata, []
except Exception as e:
if debug:
print(f"⚠️ Analyze attempt {attempt + 1} failed: {e}")
if attempt == max_retries - 1:
# Final fallback: return all games với low score
return available, [], {}, [f"Analyze error: {e}"]
return available, [], {}, ["Analyze failed after retries"]
def _generate_multi(self, games: List[str], text: str, max_items: int, tracker: TokenUsage, debug: bool) -> tuple:
"""Generate nhiều games"""
if len(games) == 1:
result = self.generate(games[0], text, max_items, validate=False, debug=debug)
tracker.add(result.get("token_usage", {}))
# Fix: generate returns {data: {items: [...]}} not {items: [...]}
data = result.get("data") or {}
items = data.get("items", []) if isinstance(data, dict) else []
return {games[0]: {"items": items, "metadata": data.get("metadata")}}, result.get("errors", [])
# Multi-game: Build schema info for each game
games_schema = []
for gt in games:
game = get_game(gt)
if game:
games_schema.append(f"""### {gt.upper()}
{game.generated_system_prompt}
REQUIRED OUTPUT FORMAT:
{game.format_instructions}""")
prompt = ChatPromptTemplate.from_messages([
("system", """You are a multi-game content generator.
Generate items for EACH game type following their EXACT schema.
IMPORTANT: Include ALL required fields for each item (image_description, image_keywords, etc.)
RULES: Keep original language, use exact quotes from text."""),
("human", """GAMES AND THEIR SCHEMAS:
{schemas}
SOURCE TEXT:
{text}
Generate items for: {types}
Return valid JSON: {{{format}}}""")
])
fmt = ", ".join([f'"{gt}": {{"items": [...], "metadata": {{...}}}}' for gt in games])
try:
resp = (prompt | self.llm).invoke({
"schemas": "\n\n".join(games_schema),
"text": text,
"types": ", ".join(games),
"format": fmt
})
tracker.add(self._get_usage(resp))
data = self._parse_json(resp.content)
results = {}
errors = []
for gt in games:
game_data = data.get(gt, {}) if isinstance(data.get(gt), dict) else {}
items = game_data.get("items", [])
items = self._post_process(items, gt)
# Thống nhất structure: {items: [...], metadata: {...}}
results[gt] = {"items": items, "metadata": game_data.get("metadata")}
if not items:
errors.append(f"No items for {gt}")
return results, errors
except Exception as e:
return {gt: {"items": [], "metadata": None} for gt in games}, [f"Generate error: {e}"]
def _validate(self, results: Dict[str, dict], text: str) -> Dict[str, dict]:
"""Validate items trong results"""
validated = {}
for gt, data in results.items():
items = data.get("items", []) if isinstance(data, dict) else []
valid_items = [i for i in items if self.validator.validate_quote(i.get("original_quote", ""), text).is_valid]
validated[gt] = {"items": valid_items, "metadata": data.get("metadata") if isinstance(data, dict) else None}
return validated
def _post_process(self, items: List, game_type: str) -> List[Dict]:
ms = int(time.time() * 1000)
result = []
for i, item in enumerate(items):
d = item if isinstance(item, dict) else (item.model_dump() if hasattr(item, 'model_dump') else {})
d["id"] = f"{game_type[:2].upper()}-{ms}-{i}"
d["game_type"] = game_type
result.append(d)
return result
def _parse_json(self, content: str) -> Dict:
if "```" in content:
content = content.split("```")[1].replace("json", "").strip()
return json.loads(content)
def _get_usage(self, resp) -> Dict:
if hasattr(resp, 'response_metadata'):
meta = resp.response_metadata
return meta.get('usage', meta.get('usage_metadata', meta.get('token_usage', {})))
return getattr(resp, 'usage_metadata', {})
def _result(self, success: bool, games: List, results: Dict, errors: List, tracker: TokenUsage, scores: List = None, metadata: Dict = None) -> Dict:
return {
"success": success,
"games": games,
"game_scores": scores or [],
"metadata": metadata or {},
"results": results,
"errors": errors,
"token_usage": tracker.to_dict(),
"llm": f"{self.llm_config.provider}/{self.llm_config.model_name}"
}