check point
This commit is contained in:
513
src/core.py
Normal file
513
src/core.py
Normal file
@@ -0,0 +1,513 @@
|
||||
"""
|
||||
core.py - Simple Game Generator Core
|
||||
|
||||
3 USE CASES:
|
||||
1. run_multi() - Analyze + generate nhiều games phù hợp
|
||||
2. run_single() - Analyze + generate 1 game tốt nhất (1 API call)
|
||||
3. generate() - Generate 1 game cụ thể (không analyze)
|
||||
"""
|
||||
import os
|
||||
import json
|
||||
import time
|
||||
from typing import List, Dict, Any, Optional, Union
|
||||
from dataclasses import dataclass
|
||||
|
||||
from langchain_core.prompts import ChatPromptTemplate
|
||||
|
||||
from .llm_config import ModelConfig, get_llm, get_default_config
|
||||
from .game_registry import get_registry, get_game, get_analyzer_context
|
||||
from .validator import QuoteValidator
|
||||
from .logger import logger
|
||||
|
||||
|
||||
@dataclass
|
||||
class TokenUsage:
|
||||
prompt_tokens: int = 0
|
||||
completion_tokens: int = 0
|
||||
total_tokens: int = 0
|
||||
|
||||
def add(self, usage: Dict[str, int]):
|
||||
self.prompt_tokens += usage.get("prompt_tokens", 0) or usage.get("input_tokens", 0)
|
||||
self.completion_tokens += usage.get("completion_tokens", 0) or usage.get("output_tokens", 0)
|
||||
self.total_tokens = self.prompt_tokens + self.completion_tokens
|
||||
|
||||
def to_dict(self) -> Dict[str, int]:
|
||||
return {"prompt_tokens": self.prompt_tokens, "completion_tokens": self.completion_tokens, "total_tokens": self.total_tokens}
|
||||
|
||||
|
||||
class GameCore:
|
||||
"""
|
||||
Simple Game Generator.
|
||||
|
||||
Usage:
|
||||
core = GameCore()
|
||||
|
||||
# 1. Generate nhiều games (analyze first)
|
||||
result = core.run_multi(text)
|
||||
|
||||
# 2. Generate 1 game tốt nhất (1 API call)
|
||||
result = core.run_single(text)
|
||||
|
||||
# 3. Generate 1 game cụ thể
|
||||
result = core.generate("quiz", text)
|
||||
"""
|
||||
|
||||
def __init__(self, llm_config: Optional[Union[ModelConfig, Dict, str]] = None):
|
||||
self.llm_config = self._parse_config(llm_config)
|
||||
self.llm = get_llm(self.llm_config)
|
||||
self.validator = QuoteValidator()
|
||||
self.registry = get_registry()
|
||||
print(f"🤖 LLM: {self.llm_config.provider}/{self.llm_config.model_name}")
|
||||
|
||||
def _parse_config(self, config) -> ModelConfig:
|
||||
if config is None:
|
||||
if os.getenv("GOOGLE_API_KEY"):
|
||||
return get_default_config("gemini")
|
||||
elif os.getenv("OPENAI_API_KEY"):
|
||||
return get_default_config("openai")
|
||||
return get_default_config("ollama")
|
||||
|
||||
if isinstance(config, ModelConfig):
|
||||
return config
|
||||
if isinstance(config, str):
|
||||
return get_default_config(config)
|
||||
if isinstance(config, dict):
|
||||
return ModelConfig(**config)
|
||||
raise ValueError(f"Invalid config: {type(config)}")
|
||||
|
||||
# ============== 1. RUN MULTI (Analyze + Generate nhiều games) ==============
|
||||
|
||||
def run_multi(
|
||||
self,
|
||||
text: str,
|
||||
enabled_games: Optional[List[str]] = None,
|
||||
max_items: int = 3,
|
||||
min_score: int = 20,
|
||||
validate: bool = True,
|
||||
debug: bool = False
|
||||
) -> Dict[str, Any]:
|
||||
"""
|
||||
Analyze text + Generate nhiều games phù hợp.
|
||||
|
||||
Returns: {success, games, results, errors, token_usage, llm}
|
||||
"""
|
||||
tracker = TokenUsage()
|
||||
errors = []
|
||||
|
||||
# 1. Analyze (also returns metadata)
|
||||
available = enabled_games or self.registry.get_game_types()
|
||||
logger.info(f"Analyzing text for multi-gen. Available games: {available}")
|
||||
games, scores, metadata, err = self._analyze(text, available, min_score, tracker, debug)
|
||||
errors.extend(err)
|
||||
|
||||
if not games:
|
||||
logger.warning("Analyzer found no suitable games matches.")
|
||||
return self._result(False, [], {}, errors, tracker, metadata=metadata)
|
||||
|
||||
logger.info(f"Analyzer selected: {games}")
|
||||
|
||||
# 2. Generate
|
||||
results, err = self._generate_multi(games, text, max_items, tracker, debug)
|
||||
errors.extend(err)
|
||||
|
||||
# 3. Validate
|
||||
if validate:
|
||||
results = self._validate(results, text)
|
||||
|
||||
# Check if any game has items
|
||||
has_items = any(data.get("items", []) for data in results.values() if isinstance(data, dict))
|
||||
return self._result(has_items, games, results, errors, tracker, scores, metadata)
|
||||
|
||||
# ============== 2. RUN SINGLE (1 API call: Analyze + Generate 1 game) ==============
|
||||
|
||||
def run_single(
|
||||
self,
|
||||
text: str,
|
||||
enabled_games: Optional[List[str]] = None,
|
||||
max_items: int = 3,
|
||||
validate: bool = True,
|
||||
debug: bool = False
|
||||
) -> Dict[str, Any]:
|
||||
"""
|
||||
1 API call: Analyze + Generate game tốt nhất.
|
||||
|
||||
Returns: {success, game_type, reason, items, errors, token_usage, llm}
|
||||
"""
|
||||
tracker = TokenUsage()
|
||||
available = enabled_games or self.registry.get_game_types()
|
||||
logger.info(f"Starting run_single for available games: {available}")
|
||||
|
||||
# Build games info
|
||||
games_info = []
|
||||
for gt in available:
|
||||
game = get_game(gt)
|
||||
if game:
|
||||
example = json.dumps(game.examples[0].get('output', {}), ensure_ascii=False, indent=2) if game.examples else "{}"
|
||||
games_info.append(f"### {gt}\n{game.description}\nExample output:\n{example}")
|
||||
|
||||
prompt = ChatPromptTemplate.from_messages([
|
||||
("system", """You are an educational game generator.
|
||||
1. ANALYZE text and CHOOSE the BEST game type
|
||||
2. GENERATE items for that game
|
||||
|
||||
RULES:
|
||||
- KEEP original language
|
||||
- original_quote = EXACT copy from source
|
||||
- ALL content from source only"""),
|
||||
("human", """GAMES:
|
||||
{games_info}
|
||||
|
||||
TEXT:
|
||||
{text}
|
||||
|
||||
Choose BEST game from: {types}
|
||||
Generate max {max_items} items.
|
||||
|
||||
Return JSON:
|
||||
{{"game_type": "chosen", "reason": "why", "items": [...]}}""")
|
||||
])
|
||||
|
||||
content = {"games_info": "\n\n".join(games_info), "text": text[:2000], "types": ", ".join(available), "max_items": max_items}
|
||||
|
||||
if debug:
|
||||
print(f"\n{'='*50}\n🎯 RUN SINGLE\n{'='*50}")
|
||||
|
||||
try:
|
||||
resp = (prompt | self.llm).invoke(content)
|
||||
tracker.add(self._get_usage(resp))
|
||||
|
||||
data = self._parse_json(resp.content)
|
||||
game_type = data.get("game_type")
|
||||
items = self._post_process(data.get("items", []), game_type)
|
||||
|
||||
if validate and items:
|
||||
items = [i for i in items if self.validator.validate_quote(i.get("original_quote", ""), text).is_valid]
|
||||
|
||||
return {
|
||||
"success": len(items) > 0,
|
||||
"game_type": game_type,
|
||||
"reason": data.get("reason", ""),
|
||||
"items": items,
|
||||
"errors": [],
|
||||
"token_usage": tracker.to_dict(),
|
||||
"llm": f"{self.llm_config.provider}/{self.llm_config.model_name}"
|
||||
}
|
||||
except Exception as e:
|
||||
return {"success": False, "game_type": None, "items": [], "errors": [str(e)], "token_usage": tracker.to_dict(), "llm": f"{self.llm_config.provider}/{self.llm_config.model_name}"}
|
||||
|
||||
# ============== 3. GENERATE (1 game cụ thể, không analyze) ==============
|
||||
|
||||
def generate(
|
||||
self,
|
||||
game_type: str,
|
||||
text: str,
|
||||
max_items: int = 3,
|
||||
validate: bool = True,
|
||||
debug: bool = False
|
||||
) -> Dict[str, Any]:
|
||||
"""Generate 1 game cụ thể"""
|
||||
tracker = TokenUsage()
|
||||
logger.info(f"Generating single game content: {game_type}")
|
||||
|
||||
game = get_game(game_type)
|
||||
|
||||
if not game:
|
||||
return {"success": False, "game_type": game_type, "items": [], "errors": [f"Game not found: {game_type}"], "token_usage": {}, "llm": ""}
|
||||
|
||||
# Build Format Rules Section
|
||||
format_rules_section = ""
|
||||
if game.input_format_rules:
|
||||
rules_str = "\n".join(f"- {r}" for r in game.input_format_rules)
|
||||
format_rules_section = f"""
|
||||
CRITICAL: FIRST, VALIDATE THE INPUT TEXT.
|
||||
Format Rules:
|
||||
{rules_str}
|
||||
|
||||
If the text is completely UNSUITABLE for this game type, you MUST output strictly this JSON and nothing else:
|
||||
{{{{ "format_error": "Input text incompatible with game requirements." }}}}
|
||||
"""
|
||||
|
||||
prompt = ChatPromptTemplate.from_messages([
|
||||
("system", f"""{game.generated_system_prompt}
|
||||
{format_rules_section}"""),
|
||||
("human", """TEXT TO PROCESS:
|
||||
{text}
|
||||
|
||||
Generate content in JSON format:
|
||||
{format_instructions}""")
|
||||
])
|
||||
|
||||
if debug:
|
||||
print(f"\n{'='*50}\n🎮 GENERATE: {game_type}\n{'='*50}")
|
||||
|
||||
try:
|
||||
resp = (prompt | self.llm).invoke({
|
||||
"text": text,
|
||||
"format_instructions": game.format_instructions
|
||||
})
|
||||
tracker.add(self._get_usage(resp))
|
||||
|
||||
# 1. Parse as raw JSON first to check for format_error
|
||||
raw_data = None
|
||||
try:
|
||||
raw_data = self._parse_json(resp.content)
|
||||
except:
|
||||
pass
|
||||
|
||||
# 2. Check if it's a format_error immediately
|
||||
if raw_data and raw_data.get("format_error"):
|
||||
return {
|
||||
"success": False,
|
||||
"game_type": game_type,
|
||||
"data": None,
|
||||
"format_error": raw_data["format_error"],
|
||||
"errors": [raw_data["format_error"]],
|
||||
"token_usage": tracker.to_dict(),
|
||||
"llm": f"{self.llm_config.provider}/{self.llm_config.model_name}"
|
||||
}
|
||||
|
||||
parsed_data = raw_data
|
||||
|
||||
# 3. Try output_parser for structured validation if present
|
||||
if game.output_parser:
|
||||
try:
|
||||
parsed = game.output_parser.parse(resp.content)
|
||||
parsed_data = parsed.model_dump()
|
||||
except Exception as pe:
|
||||
if debug: print(f"⚠️ output_parser failed: {pe}")
|
||||
# Keep raw_data if parser fails but we have JSON
|
||||
|
||||
|
||||
# Check format error
|
||||
if parsed_data and parsed_data.get("format_error"):
|
||||
return {
|
||||
"success": False,
|
||||
"game_type": game_type,
|
||||
"data": None,
|
||||
"format_error": parsed_data["format_error"],
|
||||
"errors": [parsed_data["format_error"]],
|
||||
"token_usage": tracker.to_dict(),
|
||||
"llm": f"{self.llm_config.provider}/{self.llm_config.model_name}"
|
||||
}
|
||||
|
||||
# Post-process
|
||||
items = parsed_data.get("items", []) if parsed_data else []
|
||||
items = self._post_process(items, game_type)
|
||||
|
||||
if validate and items:
|
||||
items = [i for i in items if self.validator.validate_quote(i.get("original_quote", ""), text).is_valid]
|
||||
|
||||
if not items:
|
||||
return {
|
||||
"success": False,
|
||||
"game_type": game_type,
|
||||
"data": None,
|
||||
"format_error": "No items extracted",
|
||||
"errors": [],
|
||||
"token_usage": tracker.to_dict(),
|
||||
"llm": f"{self.llm_config.provider}/{self.llm_config.model_name}"
|
||||
}
|
||||
|
||||
if parsed_data:
|
||||
parsed_data["items"] = items
|
||||
|
||||
return {
|
||||
"success": True,
|
||||
"game_type": game_type,
|
||||
"data": parsed_data,
|
||||
"errors": [],
|
||||
"token_usage": tracker.to_dict(),
|
||||
"llm": f"{self.llm_config.provider}/{self.llm_config.model_name}"
|
||||
}
|
||||
except Exception as e:
|
||||
return {"success": False, "game_type": game_type, "data": None, "errors": [str(e)], "token_usage": tracker.to_dict(), "llm": f"{self.llm_config.provider}/{self.llm_config.model_name}"}
|
||||
|
||||
# ============== PRIVATE METHODS ==============
|
||||
|
||||
def _analyze(self, text: str, available: List[str], min_score: int, tracker: TokenUsage, debug: bool) -> tuple:
|
||||
"""Analyze text để suggest games - với retry"""
|
||||
# Lấy context từ game configs
|
||||
context = get_analyzer_context()
|
||||
|
||||
prompt = ChatPromptTemplate.from_messages([
|
||||
("system", """You are a game type analyzer. Score each game 0-100 based on how well the text matches the game requirements.
|
||||
|
||||
GAME REQUIREMENTS:
|
||||
{context}
|
||||
|
||||
SCORING:
|
||||
- 70-100: Text matches game requirements well
|
||||
- 40-69: Partial match
|
||||
- 0-39: Does not match requirements
|
||||
|
||||
IMPORTANT: You MUST use the exact game type name (e.g. 'quiz', 'sequence') in the "type" field.
|
||||
|
||||
Return valid JSON with scores AND metadata about the content:
|
||||
{{
|
||||
"scores": [
|
||||
{{
|
||||
"type": "NAME_OF_GAME_TYPE",
|
||||
"score": 80,
|
||||
"reason": "..."
|
||||
}}
|
||||
],
|
||||
"metadata": {{
|
||||
"title": "Title from source or create short title",
|
||||
"description": "One sentence summary",
|
||||
"grade": 1-5,
|
||||
"difficulty": 1-5
|
||||
}}
|
||||
}}"""),
|
||||
("human", """TEXT TO ANALYZE:
|
||||
{text}
|
||||
|
||||
Analyze for games: {types}
|
||||
Return JSON:""")
|
||||
])
|
||||
|
||||
max_retries = 2
|
||||
for attempt in range(max_retries):
|
||||
try:
|
||||
resp = (prompt | self.llm).invoke({
|
||||
"context": context,
|
||||
"text": text[:800],
|
||||
"types": ", ".join(available)
|
||||
})
|
||||
tracker.add(self._get_usage(resp))
|
||||
|
||||
if debug:
|
||||
print(f"📝 Analyzer raw: {resp.content[:300]}")
|
||||
|
||||
# Parse JSON với fallback
|
||||
content = resp.content.strip()
|
||||
if not content:
|
||||
if debug:
|
||||
print(f"⚠️ Empty response, retry {attempt + 1}")
|
||||
continue
|
||||
|
||||
data = self._parse_json(content)
|
||||
scores = [s for s in data.get("scores", []) if s.get("type") in available and s.get("score", 0) >= min_score]
|
||||
scores.sort(key=lambda x: x.get("score", 0), reverse=True)
|
||||
|
||||
# Extract metadata from response
|
||||
metadata = data.get("metadata", {})
|
||||
|
||||
if debug:
|
||||
print(f"🔍 Scores: {scores}")
|
||||
print(f"📋 Metadata: {metadata}")
|
||||
|
||||
return [s["type"] for s in scores], scores, metadata, []
|
||||
|
||||
except Exception as e:
|
||||
if debug:
|
||||
print(f"⚠️ Analyze attempt {attempt + 1} failed: {e}")
|
||||
if attempt == max_retries - 1:
|
||||
# Final fallback: return all games với low score
|
||||
return available, [], {}, [f"Analyze error: {e}"]
|
||||
|
||||
return available, [], {}, ["Analyze failed after retries"]
|
||||
|
||||
def _generate_multi(self, games: List[str], text: str, max_items: int, tracker: TokenUsage, debug: bool) -> tuple:
|
||||
"""Generate nhiều games"""
|
||||
if len(games) == 1:
|
||||
result = self.generate(games[0], text, max_items, validate=False, debug=debug)
|
||||
tracker.add(result.get("token_usage", {}))
|
||||
# Fix: generate returns {data: {items: [...]}} not {items: [...]}
|
||||
data = result.get("data") or {}
|
||||
items = data.get("items", []) if isinstance(data, dict) else []
|
||||
return {games[0]: {"items": items, "metadata": data.get("metadata")}}, result.get("errors", [])
|
||||
|
||||
# Multi-game: Build schema info for each game
|
||||
games_schema = []
|
||||
for gt in games:
|
||||
game = get_game(gt)
|
||||
if game:
|
||||
games_schema.append(f"""### {gt.upper()}
|
||||
{game.generated_system_prompt}
|
||||
|
||||
REQUIRED OUTPUT FORMAT:
|
||||
{game.format_instructions}""")
|
||||
|
||||
prompt = ChatPromptTemplate.from_messages([
|
||||
("system", """You are a multi-game content generator.
|
||||
Generate items for EACH game type following their EXACT schema.
|
||||
IMPORTANT: Include ALL required fields for each item (image_description, image_keywords, etc.)
|
||||
RULES: Keep original language, use exact quotes from text."""),
|
||||
("human", """GAMES AND THEIR SCHEMAS:
|
||||
{schemas}
|
||||
|
||||
SOURCE TEXT:
|
||||
{text}
|
||||
|
||||
Generate items for: {types}
|
||||
Return valid JSON: {{{format}}}""")
|
||||
])
|
||||
|
||||
fmt = ", ".join([f'"{gt}": {{"items": [...], "metadata": {{...}}}}' for gt in games])
|
||||
|
||||
try:
|
||||
resp = (prompt | self.llm).invoke({
|
||||
"schemas": "\n\n".join(games_schema),
|
||||
"text": text,
|
||||
"types": ", ".join(games),
|
||||
"format": fmt
|
||||
})
|
||||
tracker.add(self._get_usage(resp))
|
||||
|
||||
data = self._parse_json(resp.content)
|
||||
results = {}
|
||||
errors = []
|
||||
for gt in games:
|
||||
game_data = data.get(gt, {}) if isinstance(data.get(gt), dict) else {}
|
||||
items = game_data.get("items", [])
|
||||
items = self._post_process(items, gt)
|
||||
# Thống nhất structure: {items: [...], metadata: {...}}
|
||||
results[gt] = {"items": items, "metadata": game_data.get("metadata")}
|
||||
if not items:
|
||||
errors.append(f"No items for {gt}")
|
||||
|
||||
return results, errors
|
||||
except Exception as e:
|
||||
return {gt: {"items": [], "metadata": None} for gt in games}, [f"Generate error: {e}"]
|
||||
|
||||
def _validate(self, results: Dict[str, dict], text: str) -> Dict[str, dict]:
|
||||
"""Validate items trong results"""
|
||||
validated = {}
|
||||
for gt, data in results.items():
|
||||
items = data.get("items", []) if isinstance(data, dict) else []
|
||||
valid_items = [i for i in items if self.validator.validate_quote(i.get("original_quote", ""), text).is_valid]
|
||||
validated[gt] = {"items": valid_items, "metadata": data.get("metadata") if isinstance(data, dict) else None}
|
||||
return validated
|
||||
|
||||
def _post_process(self, items: List, game_type: str) -> List[Dict]:
|
||||
ms = int(time.time() * 1000)
|
||||
result = []
|
||||
for i, item in enumerate(items):
|
||||
d = item if isinstance(item, dict) else (item.model_dump() if hasattr(item, 'model_dump') else {})
|
||||
d["id"] = f"{game_type[:2].upper()}-{ms}-{i}"
|
||||
d["game_type"] = game_type
|
||||
result.append(d)
|
||||
return result
|
||||
|
||||
def _parse_json(self, content: str) -> Dict:
|
||||
if "```" in content:
|
||||
content = content.split("```")[1].replace("json", "").strip()
|
||||
return json.loads(content)
|
||||
|
||||
def _get_usage(self, resp) -> Dict:
|
||||
if hasattr(resp, 'response_metadata'):
|
||||
meta = resp.response_metadata
|
||||
return meta.get('usage', meta.get('usage_metadata', meta.get('token_usage', {})))
|
||||
return getattr(resp, 'usage_metadata', {})
|
||||
|
||||
def _result(self, success: bool, games: List, results: Dict, errors: List, tracker: TokenUsage, scores: List = None, metadata: Dict = None) -> Dict:
|
||||
return {
|
||||
"success": success,
|
||||
"games": games,
|
||||
"game_scores": scores or [],
|
||||
"metadata": metadata or {},
|
||||
"results": results,
|
||||
"errors": errors,
|
||||
"token_usage": tracker.to_dict(),
|
||||
"llm": f"{self.llm_config.provider}/{self.llm_config.model_name}"
|
||||
}
|
||||
Reference in New Issue
Block a user