This commit is contained in:
vuongps38770
2026-01-13 09:33:10 +07:00
parent 29544da4c6
commit 7c41ddaa82
9 changed files with 1362 additions and 599 deletions

1
.env
View File

@@ -0,0 +1 @@
PORT=2088

258
api.py
View File

@@ -5,12 +5,19 @@ from fastapi.middleware.cors import CORSMiddleware
from pydantic import BaseModel, Field
from pathlib import Path
import re
from dotenv import load_dotenv
load_dotenv()
from src import (
GameCore, get_registry, reload_games,
get_active_game_types, get_active_type_ids,
get_game_by_id, id_to_type, type_to_id,
ModelConfig
GameCore,
get_registry,
reload_games,
get_active_game_types,
get_active_type_ids,
get_game_by_id,
id_to_type,
type_to_id,
ModelConfig,
)
@@ -18,7 +25,7 @@ from src import (
app = FastAPI(
title="Game Generator API",
description="API tạo game giáo dục từ văn bản",
version="2.0.0"
version="2.0.0",
)
app.add_middleware(
@@ -31,31 +38,43 @@ app.add_middleware(
# ============== REQUEST/RESPONSE MODELS ==============
class LLMConfigRequest(BaseModel):
provider: str = Field(default="gemini", description="ollama, gemini, openai")
model_name: str = Field(default="gemini-2.0-flash-lite")
api_key: Optional[str] = Field(default=None, description="API key (None = lấy từ env)")
api_key: Optional[str] = Field(
default=None, description="API key (None = lấy từ env)"
)
temperature: float = Field(default=0.1)
base_url: Optional[str] = Field(default=None, description="Base URL cho Ollama")
class GenerateRequest(BaseModel):
text: str = Field(description="Input text", min_length=10)
enabled_game_ids: Optional[List[int]] = Field(default=None, description="List of type_ids (1=quiz, 2=sequence_sentence, 3=sequence_word)")
enabled_game_ids: Optional[List[int]] = Field(
default=None,
description="List of type_ids (1=quiz, 2=sequence_sentence, 3=sequence_word)",
)
run_analyzer: bool = Field(default=True)
run_validator: bool = Field(default=True)
max_items: Optional[int] = Field(default=3)
min_score: int = Field(default=50, description="Minimum score (0-100) for analyzer to include a game")
max_items: Optional[int] = Field(default=100)
min_score: int = Field(
default=50, description="Minimum score (0-100) for analyzer to include a game"
)
debug: bool = Field(default=False, description="Print prompts to server log")
# LLM config (optional - override global)
llm_config: Optional[LLMConfigRequest] = Field(default=None, description="Override LLM config")
llm_config: Optional[LLMConfigRequest] = Field(
default=None, description="Override LLM config"
)
class TokenUsageResponse(BaseModel):
prompt_tokens: int = 0
completion_tokens: int = 0
total_tokens: int = 0
input_chars: int = 0 # Character count sent to LLM
output_chars: int = 0 # Character count received from LLM
class GameScoreInfo(BaseModel):
@@ -66,12 +85,14 @@ class GameScoreInfo(BaseModel):
class GameResultData(BaseModel):
"""Structure thống nhất cho mỗi game result"""
items: List[Dict[str, Any]] = []
metadata: Optional[Dict[str, Any]] = None
class CommonMetadataResponse(BaseModel):
"""Metadata chung cho toàn bộ kết quả generate"""
title: str = ""
description: str = ""
grade: int = 0
@@ -92,7 +113,7 @@ class GenerateResponse(BaseModel):
class GameInfo(BaseModel):
type_id: int
game_type: str # Keep for reference
game_type: str
display_name: str
description: str
active: bool
@@ -135,7 +156,7 @@ def get_core(config_override: Optional[LLMConfigRequest] = None) -> GameCore:
model_name=config_override.model_name,
api_key=config_override.api_key,
temperature=config_override.temperature,
base_url=config_override.base_url
base_url=config_override.base_url,
)
return GameCore(llm_config=config)
@@ -149,6 +170,7 @@ def get_core(config_override: Optional[LLMConfigRequest] = None) -> GameCore:
# ============== ENDPOINTS ==============
@app.post("/generate", response_model=GenerateResponse)
async def generate_games(request: GenerateRequest):
"""Generate games from text with scoring"""
@@ -157,17 +179,18 @@ async def generate_games(request: GenerateRequest):
# Convert type_ids to game_types
if request.enabled_game_ids:
games = [id_to_type(tid) for tid in request.enabled_game_ids if id_to_type(tid)]
games = [
id_to_type(tid) for tid in request.enabled_game_ids if id_to_type(tid)
]
else:
games = get_active_game_types()
result = core.run_multi(
result = await core.run_multi_async(
text=request.text,
enabled_games=games,
max_items=request.max_items or 3,
min_score=request.min_score,
max_items=request.max_items or 100,
validate=request.run_validator,
debug=request.debug
debug=request.debug,
)
# Convert game_types to type_ids in response
@@ -176,27 +199,33 @@ async def generate_games(request: GenerateRequest):
# Convert game_scores
game_scores = []
for s in result.get("game_scores", []):
game_scores.append(GameScoreInfo(
game_scores.append(
GameScoreInfo(
type_id=type_to_id(s.get("type", "")),
score=s.get("score", 0),
reason=s.get("reason", "")
))
reason=s.get("reason", ""),
)
)
# Convert results keys to type_ids
results_by_id = {}
for game_type, items in result.get("results", {}).items():
tid = type_to_id(game_type)
if tid > 0:
if tid >= 0: # 0=quiz, 1=sequence are valid
results_by_id[tid] = items
# Get common metadata from analyzer
core_meta = result.get("metadata", {})
common_metadata = CommonMetadataResponse(
common_metadata = (
CommonMetadataResponse(
title=core_meta.get("title", ""),
description=core_meta.get("description", ""),
grade=core_meta.get("grade", 0),
difficulty=core_meta.get("difficulty", 0)
) if core_meta else None
difficulty=core_meta.get("difficulty", 0),
)
if core_meta
else None
)
return GenerateResponse(
success=result.get("success", False),
@@ -206,25 +235,120 @@ async def generate_games(request: GenerateRequest):
results=results_by_id,
llm=result.get("llm"),
token_usage=result.get("token_usage"),
errors=result.get("errors", [])
errors=result.get("errors", []),
)
except Exception as e:
return GenerateResponse(
success=False,
games=[],
game_scores=[],
results={},
errors=[str(e)]
success=False, games=[], game_scores=[], results={}, errors=[str(e)]
)
# ============== FAST GENERATE (1 API call - OPTIMIZED) ==============
class FastGenerateRequest(BaseModel):
text: str = Field(description="Input text", min_length=10)
enabled_game_ids: Optional[List[int]] = Field(
default=None, description="Limit type_ids"
)
max_items: int = Field(default=100, description="Max items per game")
min_score: int = Field(default=50, description="Min score 0-100 to include game")
run_validator: bool = Field(default=True)
debug: bool = Field(default=False)
llm_config: Optional[LLMConfigRequest] = Field(default=None)
@app.post("/generate/fast", response_model=GenerateResponse)
async def generate_fast(request: FastGenerateRequest):
"""
🚀 OPTIMIZED: 1 API call để analyze + generate TẤT CẢ games phù hợp.
So với /generate (2+ calls):
- Chỉ 1 API call
- Tiết kiệm quota/tokens
- Nhanh hơn
So với /generate/single:
- Trả về NHIỀU games (không chỉ 1)
"""
try:
core = get_core(request.llm_config)
# Convert type_ids to game_types
if request.enabled_game_ids:
games = [
id_to_type(tid) for tid in request.enabled_game_ids if id_to_type(tid)
]
else:
games = get_active_game_types()
result = await core.run_fast_async(
text=request.text,
enabled_games=games,
max_items=request.max_items,
min_score=request.min_score,
validate=request.run_validator,
debug=request.debug,
)
# Convert to response format (same as /generate)
game_ids = [type_to_id(g) for g in result.get("games", [])]
game_scores = [
GameScoreInfo(
type_id=type_to_id(s.get("type", "")),
score=s.get("score", 0),
reason=s.get("reason", ""),
)
for s in result.get("game_scores", [])
]
results_by_id = {}
for game_type, data in result.get("results", {}).items():
tid = type_to_id(game_type)
if tid >= 0: # 0=quiz, 1=sequence are valid
results_by_id[tid] = data
core_meta = result.get("metadata", {})
common_metadata = (
CommonMetadataResponse(
title=core_meta.get("title", ""),
description=core_meta.get("description", ""),
grade=core_meta.get("grade", 0),
difficulty=core_meta.get("difficulty", 0),
)
if core_meta
else None
)
return GenerateResponse(
success=result.get("success", False),
games=game_ids,
game_scores=game_scores,
metadata=common_metadata,
results=results_by_id,
api_calls=1, # Always 1 for fast
llm=result.get("llm"),
token_usage=result.get("token_usage"),
errors=result.get("errors", []),
)
except Exception as e:
return GenerateResponse(
success=False, games=[], game_scores=[], results={}, errors=[str(e)]
)
# ============== SINGLE BEST (1 PROMPT) ==============
class SingleGenerateRequest(BaseModel):
text: str = Field(description="Input text", min_length=10)
enabled_game_ids: Optional[List[int]] = Field(default=None, description="Limit type_ids to choose from")
max_items: int = Field(default=3, description="Max items to generate")
enabled_game_ids: Optional[List[int]] = Field(
default=None, description="Limit type_ids to choose from"
)
max_items: int = Field(default=100, description="Max items to generate")
run_validator: bool = Field(default=True)
debug: bool = Field(default=False)
llm_config: Optional[LLMConfigRequest] = Field(default=None)
@@ -254,7 +378,9 @@ async def generate_single_game(request: SingleGenerateRequest):
# Convert type_ids to game_types
if request.enabled_game_ids:
games = [id_to_type(tid) for tid in request.enabled_game_ids if id_to_type(tid)]
games = [
id_to_type(tid) for tid in request.enabled_game_ids if id_to_type(tid)
]
else:
games = None
@@ -263,7 +389,7 @@ async def generate_single_game(request: SingleGenerateRequest):
enabled_games=games,
max_items=request.max_items,
debug=request.debug,
validate=request.run_validator
validate=request.run_validator,
)
# Convert game_type to type_id
@@ -277,21 +403,19 @@ async def generate_single_game(request: SingleGenerateRequest):
items=result.get("items", []),
token_usage=result.get("token_usage"),
llm=result.get("llm"),
errors=result.get("errors", [])
errors=result.get("errors", []),
)
except Exception as e:
return SingleGenerateResponse(
success=False,
errors=[str(e)]
)
return SingleGenerateResponse(success=False, errors=[str(e)])
# ============== DIRECT GENERATE (1 game cụ thể, không analyze) ==============
class DirectGenerateRequest(BaseModel):
text: str = Field(description="Input text", min_length=10)
max_items: int = Field(default=3, description="Max items to generate")
max_items: int = Field(default=100, description="Max items to generate")
run_validator: bool = Field(default=True)
debug: bool = Field(default=False)
llm_config: Optional[LLMConfigRequest] = Field(default=None)
@@ -299,6 +423,7 @@ class DirectGenerateRequest(BaseModel):
class DirectGenerateResponse(BaseModel):
"""Response thống nhất, giống GenerateResponse nhưng cho 1 game"""
success: bool
games: List[int] = [] # Single type_id in list
results: Dict[int, GameResultData] = {} # Same structure as GenerateResponse
@@ -322,7 +447,7 @@ async def generate_direct(type_id: int, request: DirectGenerateRequest):
return DirectGenerateResponse(
success=False,
games=[type_id],
errors=[f"Game with type_id={type_id} not found"]
errors=[f"Game with type_id={type_id} not found"],
)
core = get_core(request.llm_config)
@@ -332,7 +457,7 @@ async def generate_direct(type_id: int, request: DirectGenerateRequest):
text=request.text,
max_items=request.max_items,
validate=request.run_validator,
debug=request.debug
debug=request.debug,
)
format_error = result.get("format_error")
@@ -341,7 +466,7 @@ async def generate_direct(type_id: int, request: DirectGenerateRequest):
# Build results với structure thống nhất
game_result = GameResultData(
items=data.get("items", []) if isinstance(data, dict) else [],
metadata=data.get("metadata") if isinstance(data, dict) else None
metadata=data.get("metadata") if isinstance(data, dict) else None,
)
return DirectGenerateResponse(
@@ -352,15 +477,11 @@ async def generate_direct(type_id: int, request: DirectGenerateRequest):
format_error=format_error,
token_usage=result.get("token_usage"),
llm=result.get("llm"),
errors=result.get("errors", [])
errors=result.get("errors", []),
)
except Exception as e:
return DirectGenerateResponse(
success=False,
games=[type_id],
errors=[str(e)]
)
return DirectGenerateResponse(success=False, games=[type_id], errors=[str(e)])
@app.get("/games", response_model=GamesListResponse)
@@ -373,14 +494,16 @@ async def list_games():
active_count = 0
for game_type, game in all_games.items():
games_list.append(GameInfo(
games_list.append(
GameInfo(
type_id=game.type_id,
game_type=game.game_type,
display_name=game.display_name,
description=game.description,
active=game.active,
max_items=game.max_items,
))
)
)
if game.active:
active_count += 1
@@ -388,9 +511,7 @@ async def list_games():
games_list.sort(key=lambda g: g.type_id)
return GamesListResponse(
total=len(games_list),
active_count=active_count,
games=games_list
total=len(games_list), active_count=active_count, games=games_list
)
@@ -420,7 +541,7 @@ def _set_game_active(game_type: str, active: bool) -> ActionResponse:
if not re.search(pattern, content):
raise HTTPException(400, f"Cannot find 'active' field in {game_type}.py")
new_content = re.sub(pattern, f'\\1{new_value}', content)
new_content = re.sub(pattern, f"\\1{new_value}", content)
game_file.write_text(new_content, encoding="utf-8")
reload_games()
@@ -430,7 +551,7 @@ def _set_game_active(game_type: str, active: bool) -> ActionResponse:
success=True,
message=f"Game '{game_type}' has been {action}",
game_type=game_type,
active=active
active=active,
)
@@ -447,7 +568,7 @@ async def get_llm_config():
provider=_current_config.provider,
model_name=_current_config.model_name,
temperature=_current_config.temperature,
base_url=_current_config.base_url
base_url=_current_config.base_url,
)
@@ -461,7 +582,7 @@ async def set_llm_config(config: LLMConfigRequest):
model_name=config.model_name,
api_key=config.api_key,
temperature=config.temperature,
base_url=config.base_url
base_url=config.base_url,
)
try:
@@ -470,13 +591,10 @@ async def set_llm_config(config: LLMConfigRequest):
return ActionResponse(
success=True,
message=f"LLM changed to {config.provider}/{config.model_name}"
message=f"LLM changed to {config.provider}/{config.model_name}",
)
except Exception as e:
return ActionResponse(
success=False,
message=f"Failed to change LLM: {str(e)}"
)
return ActionResponse(success=False, message=f"Failed to change LLM: {str(e)}")
@app.post("/reload", response_model=ActionResponse)
@@ -488,17 +606,13 @@ async def reload_all_games():
_core = None
return ActionResponse(
success=True,
message=f"Reloaded. Active games: {get_active_game_types()}"
success=True, message=f"Reloaded. Active games: {get_active_game_types()}"
)
@app.get("/health")
async def health_check():
return {
"status": "healthy",
"active_games": get_active_game_types()
}
return {"status": "healthy", "active_games": get_active_game_types()}
# ============== STARTUP ==============
@@ -510,4 +624,8 @@ async def startup():
if __name__ == "__main__":
import uvicorn
uvicorn.run(app, host="0.0.0.0", port=2088)
port = os.getenv("PORT")
if not port:
raise ValueError("Missing required environment variable: PORT")
uvicorn.run(app, host="0.0.0.0", port=int(port))

View File

@@ -1,23 +1,57 @@
"""
games/match.py - Match Game - Match sentences with images
games/match.py - Match Game - Match words/phrases with images
type_id = 3
Input: Danh sách từ hoặc cụm từ
Output: Mỗi item gồm từ/cụm từ và mô tả hình ảnh tương ứng
"""
from typing import List
from typing import List, Literal
from pydantic import BaseModel, Field
from langchain_core.output_parsers import PydanticOutputParser
# ============== SCHEMA ==============
class MatchItem(BaseModel):
word: str = Field(description="The sentence to be matched (EXACT copy from source)")
match_with: str = Field(description="Short keyword for reference")
original_quote: str = Field(description="EXACT quote from source text")
image_description: str = Field(default="", description="Detailed visual description for image generation/search")
image_is_complex: bool = Field(default=False, description="True if image needs precise quantities, humans, or multiple detailed objects")
"""Schema cho 1 item của Match game"""
word: str = Field(
description="The word or phrase to be matched (EXACT copy from source, cleaned of numbering)"
)
original_quote: str = Field(
description="EXACT quote from source text before any cleaning"
)
image_description: str = Field(
description="Detailed visual description for image generation in ENGLISH. Must be specific and visual."
)
image_keywords: List[str] = Field(
default=[], description="2-3 English keywords for image search"
)
image_is_complex: bool = Field(
default=False,
description="True if image needs precise quantities, humans, or multiple detailed objects",
)
class MatchMetadata(BaseModel):
"""Metadata đánh giá nội dung"""
title: str = Field(description="Title from source or short descriptive title")
description: str = Field(description="One sentence summary of the content")
grade: int = Field(
description="Estimated grade level 1-5 (1=easy/young, 5=advanced)"
)
type: Literal["match"] = Field(default="match", description="Game type")
difficulty: int = Field(description="Difficulty 1-5 for that grade")
class MatchOutput(BaseModel):
"""Output wrapper for match items"""
items: List[MatchItem] = Field(description="List of match items generated from source text")
items: List[MatchItem] = Field(
description="List of match items generated from source text"
)
metadata: MatchMetadata = Field(description="Metadata about the content")
# Output parser
@@ -26,56 +60,110 @@ output_parser = PydanticOutputParser(pydantic_object=MatchOutput)
# ============== CONFIG ==============
GAME_CONFIG = {
# === REQUIRED ===
"game_type": "match",
"type_id": 3,
"display_name": "Match with Image",
"description": "Match sentences with images",
"active": True,
"min_items": 2,
"max_items": 10,
"description": "Match words or phrases with their corresponding images",
"schema": MatchItem,
"output_schema": MatchOutput,
"output_parser": output_parser,
"system_prompt": """Extract sentences and create image descriptions for matching game.
The game will show images and players must match them with the correct sentences.
YOUR TASK:
1. Extract meaningful sentences from the source text
2. Create a DETAILED image_description that clearly represents the sentence
3. The image should be distinct enough to match with its sentence
CRITICAL RULES:
1. KEEP THE ORIGINAL LANGUAGE - Do NOT translate the source text
2. original_quote MUST be an EXACT copy from source text
3. image_description must be DETAILED and SPECIFIC to the sentence content
4. Each image should be visually distinguishable from others""",
# === OPTIONAL ===
"active": True,
"max_items": 10,
# Input validation rules
"input_format_rules": [
"Text MUST be a list of words or phrases separated by commas, semicolons, or newlines",
"NOT suitable for long sentences or paragraphs",
"Each item should be a concrete noun/concept that can be visualized",
],
# Analyzer rules - khi nào nên chọn game này
"analyzer_rules": [
"Text is a list of words or short phrases",
"Words represent concrete objects/concepts that can be visualized",
"Examples: 'apple, banana, orange' or 'cat; dog; bird'",
"NOT suitable for abstract concepts or long sentences",
],
# Generation rules - cách tạo nội dung
"generation_rules": [
"KEEP ORIGINAL LANGUAGE for 'word' field - Do NOT translate",
"original_quote = EXACT copy from source before cleaning",
"Clean numbering like '1.', 'a)', '' from word field",
"Each word/phrase should represent a visualizable concept",
# Image rules
"image_description: MUST be DETAILED visual description in ENGLISH",
"image_description: Describe colors, shapes, actions, context",
"image_keywords: 2-3 English keywords for search",
"image_is_complex: TRUE for humans, precise counts, complex scenes",
"NEVER leave image_description empty!",
# Quality rules
"Each image should be visually DISTINCT from others",
"Avoid generic descriptions - be specific",
],
"examples": [], # Defined below
}
# ============== EXAMPLES ==============
EXAMPLES = [
{
"input": "The Sun is a star. The Moon orbits Earth.",
"input": "apple; banana;",
"output": {
"items": [
{
"word": "The Sun is a star.",
"match_with": "sun",
"original_quote": "The Sun is a star.",
"image_description": "A bright glowing yellow sun with solar flares",
"image_is_complex": False
"word": "apple",
"original_quote": "apple",
"image_description": "A shiny red apple with a green leaf on top",
"image_keywords": ["apple", "fruit", "red"],
"image_is_complex": False,
},
{
"word": "The Moon orbits Earth.",
"match_with": "moon",
"original_quote": "The Moon orbits Earth.",
"image_description": "A grey moon circling around the blue Earth planet",
"image_is_complex": False
}
]
"word": "banana",
"original_quote": "banana",
"image_description": "A curved yellow banana",
"image_keywords": ["banana", "fruit", "yellow"],
"image_is_complex": False,
},
],
"metadata": {
"title": "Fruits",
"description": "Common fruits vocabulary",
"grade": 1,
"type": "match",
"difficulty": 1,
},
},
"why_suitable": "Simple words representing concrete objects that can be visualized",
},
{
"input": "1. elephant\n2. giraffe\n",
"output": {
"items": [
{
"word": "elephant",
"original_quote": "1. elephant",
"image_description": "A large grey elephant with big ears and long trunk",
"image_keywords": ["elephant", "animal", "africa"],
"image_is_complex": False,
},
{
"word": "giraffe",
"original_quote": "2. giraffe",
"image_description": "A tall giraffe with brown spots and long neck",
"image_keywords": ["giraffe", "tall", "spots"],
"image_is_complex": False,
},
],
"metadata": {
"title": "African Animals",
"description": "Safari animals vocabulary",
"grade": 2,
"type": "match",
"difficulty": 1,
},
},
"why_suitable": "Numbered list of animals - numbering will be cleaned",
},
"why_suitable": "Has distinct concepts that can be visualized and matched"
}
]
GAME_CONFIG["examples"] = EXAMPLES

View File

@@ -6,6 +6,7 @@ core.py - Simple Game Generator Core
2. run_single() - Analyze + generate 1 game tốt nhất (1 API call)
3. generate() - Generate 1 game cụ thể (không analyze)
"""
import os
import json
import time
@@ -25,14 +26,31 @@ class TokenUsage:
prompt_tokens: int = 0
completion_tokens: int = 0
total_tokens: int = 0
input_chars: int = 0 # Character count sent to LLM
output_chars: int = 0 # Character count received from LLM
def add(self, usage: Dict[str, int]):
self.prompt_tokens += usage.get("prompt_tokens", 0) or usage.get("input_tokens", 0)
self.completion_tokens += usage.get("completion_tokens", 0) or usage.get("output_tokens", 0)
self.prompt_tokens += usage.get("prompt_tokens", 0) or usage.get(
"input_tokens", 0
)
self.completion_tokens += usage.get("completion_tokens", 0) or usage.get(
"output_tokens", 0
)
self.total_tokens = self.prompt_tokens + self.completion_tokens
def add_chars(self, input_text: str, output_text: str):
"""Track character counts for LLM input/output"""
self.input_chars += len(input_text) if input_text else 0
self.output_chars += len(output_text) if output_text else 0
def to_dict(self) -> Dict[str, int]:
return {"prompt_tokens": self.prompt_tokens, "completion_tokens": self.completion_tokens, "total_tokens": self.total_tokens}
return {
"prompt_tokens": self.prompt_tokens,
"completion_tokens": self.completion_tokens,
"total_tokens": self.total_tokens,
"input_chars": self.input_chars,
"output_chars": self.output_chars,
}
class GameCore:
@@ -84,7 +102,7 @@ class GameCore:
max_items: int = 3,
min_score: int = 20,
validate: bool = True,
debug: bool = False
debug: bool = False,
) -> Dict[str, Any]:
"""
Analyze text + Generate nhiều games phù hợp.
@@ -97,7 +115,9 @@ class GameCore:
# 1. Analyze (also returns metadata)
available = enabled_games or self.registry.get_game_types()
logger.info(f"Analyzing text for multi-gen. Available games: {available}")
games, scores, metadata, err = self._analyze(text, available, min_score, tracker, debug)
games, scores, metadata, err = self._analyze(
text, available, min_score, tracker, debug
)
errors.extend(err)
if not games:
@@ -115,8 +135,186 @@ class GameCore:
results = self._validate(results, text)
# Check if any game has items
has_items = any(data.get("items", []) for data in results.values() if isinstance(data, dict))
return self._result(has_items, games, results, errors, tracker, scores, metadata)
has_items = any(
data.get("items", []) for data in results.values() if isinstance(data, dict)
)
return self._result(
has_items, games, results, errors, tracker, scores, metadata
)
# ============== 1.5. RUN FAST (1 API call: Analyze + Generate ALL suitable games) ==============
def run_fast(
self,
text: str,
enabled_games: Optional[List[str]] = None,
max_items: int = 3,
min_score: int = 50,
validate: bool = True,
debug: bool = False,
) -> Dict[str, Any]:
"""
OPTIMIZED: 1 API call để analyze + generate TẤT CẢ games phù hợp.
Output format GIỐNG HỆT run_multi():
- 1 call duy nhất thay vì 2 (analyze + generate)
- Trả về nhiều games với items
Returns: {success, games, game_scores, metadata, results, token_usage, llm}
"""
tracker = TokenUsage()
available = enabled_games or self.registry.get_game_types()
logger.info(f"[run_fast] Starting with games: {available}")
# Build FULL game schemas (giống _generate_multi)
games_schema = []
for gt in available:
game = get_game(gt)
if game:
games_schema.append(f"""### {gt.upper()}
{game.generated_system_prompt}
REQUIRED OUTPUT FORMAT:
{game.format_instructions}""")
# Format cho response
fmt = ", ".join(
[
f'"{gt}": {{"score": 0-100, "reason": "...", "items": [...]}}'
for gt in available
]
)
prompt = ChatPromptTemplate.from_messages(
[
(
"system",
"""You are an educational game analyzer AND generator. In ONE response:
1. SCORE each game type (0-100) based on how well the text matches game requirements
2. GENERATE items for games with score >= {min_score}
SCORING GUIDE:
- 70-100: Text matches game requirements well → GENERATE items
- 40-69: Partial match → GENERATE items if >= min_score
- 0-39: Does not match → DO NOT generate items
GENERATION RULES:
- KEEP original language from text
- original_quote = EXACT copy from source text
- ALL content must come from source text only
- Include ALL required fields (image_description, image_keywords, etc.)
- Generate max {max_items} items per game
- STRICTLY FOLLOW each game's GENERATION RULES defined in their schema below""",
),
(
"human",
"""GAMES AND THEIR SCHEMAS:
{schemas}
SOURCE TEXT:
{text}
RESPOND with this EXACT JSON structure:
{{
"metadata": {{
"title": "short title from source",
"description": "one sentence summary",
"grade": 1-5,
"difficulty": 1-5
}},
{format}
}}""",
),
]
)
if debug:
print(f"\n{'=' * 50}\n✨ RUN FAST (1 call)\n{'=' * 50}")
print(f"Text length: {len(text)}")
try:
# Build input for tracking
invoke_params = {
"schemas": "\n\n".join(games_schema),
"text": text,
"format": fmt,
"min_score": min_score,
"max_items": max_items,
}
resp = (prompt | self.llm).invoke(invoke_params)
tracker.add(self._get_usage(resp))
# Track character counts: input = all params joined, output = response content
input_text = " ".join(str(v) for v in invoke_params.values())
tracker.add_chars(input_text, resp.content)
if debug:
print(f"📝 Response: {resp.content[:500]}...")
data = self._parse_json(resp.content)
metadata = data.get("metadata", {})
# Process results - format giống _generate_multi
results = {}
scores = []
selected_games = []
errors = []
for gt in available:
game_data = data.get(gt, {})
if not isinstance(game_data, dict):
continue
score = game_data.get("score", 0)
reason = game_data.get("reason", "")
items = game_data.get("items", [])
scores.append({"type": gt, "score": score, "reason": reason})
if score >= min_score and items:
# Post-process items (giống _generate_multi)
processed_items = self._post_process(items, gt)
# Validate if needed
if validate:
processed_items = [
i
for i in processed_items
if self.validator.validate_quote(
i.get("original_quote", ""), text
).is_valid
]
# Thống nhất structure: {items: [...], metadata: {...}} - giống run_multi
results[gt] = {
"items": processed_items,
"metadata": game_data.get("metadata"),
}
if processed_items:
selected_games.append(gt)
else:
errors.append(f"No valid items for {gt}")
elif score >= min_score:
errors.append(f"No items generated for {gt}")
# Sort scores
scores.sort(key=lambda x: x.get("score", 0), reverse=True)
return self._result(
success=len(selected_games) > 0,
games=selected_games,
results=results,
errors=errors,
tracker=tracker,
scores=scores,
metadata=metadata,
)
except Exception as e:
logger.error(f"[run_fast] Error: {e}")
return self._result(False, [], {}, [str(e)], tracker)
# ============== 2. RUN SINGLE (1 API call: Analyze + Generate 1 game) ==============
@@ -126,7 +324,7 @@ class GameCore:
enabled_games: Optional[List[str]] = None,
max_items: int = 3,
validate: bool = True,
debug: bool = False
debug: bool = False,
) -> Dict[str, Any]:
"""
1 API call: Analyze + Generate game tốt nhất.
@@ -142,19 +340,33 @@ class GameCore:
for gt in available:
game = get_game(gt)
if game:
example = json.dumps(game.examples[0].get('output', {}), ensure_ascii=False, indent=2) if game.examples else "{}"
games_info.append(f"### {gt}\n{game.description}\nExample output:\n{example}")
example = (
json.dumps(
game.examples[0].get("output", {}), ensure_ascii=False, indent=2
)
if game.examples
else "{}"
)
games_info.append(
f"### {gt}\n{game.description}\nExample output:\n{example}"
)
prompt = ChatPromptTemplate.from_messages([
("system", """You are an educational game generator.
prompt = ChatPromptTemplate.from_messages(
[
(
"system",
"""You are an educational game generator.
1. ANALYZE text and CHOOSE the BEST game type
2. GENERATE items for that game
RULES:
- KEEP original language
- original_quote = EXACT copy from source
- ALL content from source only"""),
("human", """GAMES:
- ALL content from source only""",
),
(
"human",
"""GAMES:
{games_info}
TEXT:
@@ -164,10 +376,17 @@ Choose BEST game from: {types}
Generate max {max_items} items.
Return JSON:
{{"game_type": "chosen", "reason": "why", "items": [...]}}""")
])
{{"game_type": "chosen", "reason": "why", "items": [...]}}""",
),
]
)
content = {"games_info": "\n\n".join(games_info), "text": text[:2000], "types": ", ".join(available), "max_items": max_items}
content = {
"games_info": "\n\n".join(games_info),
"text": text[:2000],
"types": ", ".join(available),
"max_items": max_items,
}
if debug:
print(f"\n{'=' * 50}\n🎯 RUN SINGLE\n{'=' * 50}")
@@ -176,12 +395,22 @@ Return JSON:
resp = (prompt | self.llm).invoke(content)
tracker.add(self._get_usage(resp))
# Track character counts
input_text = " ".join(str(v) for v in content.values())
tracker.add_chars(input_text, resp.content)
data = self._parse_json(resp.content)
game_type = data.get("game_type")
items = self._post_process(data.get("items", []), game_type)
if validate and items:
items = [i for i in items if self.validator.validate_quote(i.get("original_quote", ""), text).is_valid]
items = [
i
for i in items
if self.validator.validate_quote(
i.get("original_quote", ""), text
).is_valid
]
return {
"success": len(items) > 0,
@@ -190,10 +419,17 @@ Return JSON:
"items": items,
"errors": [],
"token_usage": tracker.to_dict(),
"llm": f"{self.llm_config.provider}/{self.llm_config.model_name}"
"llm": f"{self.llm_config.provider}/{self.llm_config.model_name}",
}
except Exception as e:
return {"success": False, "game_type": None, "items": [], "errors": [str(e)], "token_usage": tracker.to_dict(), "llm": f"{self.llm_config.provider}/{self.llm_config.model_name}"}
return {
"success": False,
"game_type": None,
"items": [],
"errors": [str(e)],
"token_usage": tracker.to_dict(),
"llm": f"{self.llm_config.provider}/{self.llm_config.model_name}",
}
# ============== 3. GENERATE (1 game cụ thể, không analyze) ==============
@@ -203,7 +439,7 @@ Return JSON:
text: str,
max_items: int = 3,
validate: bool = True,
debug: bool = False
debug: bool = False,
) -> Dict[str, Any]:
"""Generate 1 game cụ thể"""
tracker = TokenUsage()
@@ -212,7 +448,14 @@ Return JSON:
game = get_game(game_type)
if not game:
return {"success": False, "game_type": game_type, "items": [], "errors": [f"Game not found: {game_type}"], "token_usage": {}, "llm": ""}
return {
"success": False,
"game_type": game_type,
"items": [],
"errors": [f"Game not found: {game_type}"],
"token_usage": {},
"llm": "",
}
# Build Format Rules Section
format_rules_section = ""
@@ -227,26 +470,39 @@ If the text is completely UNSUITABLE for this game type, you MUST output strictl
{{{{ "format_error": "Input text incompatible with game requirements." }}}}
"""
prompt = ChatPromptTemplate.from_messages([
("system", f"""{game.generated_system_prompt}
{format_rules_section}"""),
("human", """TEXT TO PROCESS:
prompt = ChatPromptTemplate.from_messages(
[
(
"system",
f"""{game.generated_system_prompt}
{format_rules_section}""",
),
(
"human",
"""TEXT TO PROCESS:
{text}
Generate content in JSON format:
{format_instructions}""")
])
{format_instructions}""",
),
]
)
if debug:
print(f"\n{'=' * 50}\n🎮 GENERATE: {game_type}\n{'=' * 50}")
try:
resp = (prompt | self.llm).invoke({
invoke_params = {
"text": text,
"format_instructions": game.format_instructions
})
"format_instructions": game.format_instructions,
}
resp = (prompt | self.llm).invoke(invoke_params)
tracker.add(self._get_usage(resp))
# Track character counts
input_text = " ".join(str(v) for v in invoke_params.values())
tracker.add_chars(input_text, resp.content)
# 1. Parse as raw JSON first to check for format_error
raw_data = None
try:
@@ -263,7 +519,7 @@ Generate content in JSON format:
"format_error": raw_data["format_error"],
"errors": [raw_data["format_error"]],
"token_usage": tracker.to_dict(),
"llm": f"{self.llm_config.provider}/{self.llm_config.model_name}"
"llm": f"{self.llm_config.provider}/{self.llm_config.model_name}",
}
parsed_data = raw_data
@@ -274,10 +530,10 @@ Generate content in JSON format:
parsed = game.output_parser.parse(resp.content)
parsed_data = parsed.model_dump()
except Exception as pe:
if debug: print(f"⚠️ output_parser failed: {pe}")
if debug:
print(f"⚠️ output_parser failed: {pe}")
# Keep raw_data if parser fails but we have JSON
# Check format error
if parsed_data and parsed_data.get("format_error"):
return {
@@ -287,7 +543,7 @@ Generate content in JSON format:
"format_error": parsed_data["format_error"],
"errors": [parsed_data["format_error"]],
"token_usage": tracker.to_dict(),
"llm": f"{self.llm_config.provider}/{self.llm_config.model_name}"
"llm": f"{self.llm_config.provider}/{self.llm_config.model_name}",
}
# Post-process
@@ -295,7 +551,13 @@ Generate content in JSON format:
items = self._post_process(items, game_type)
if validate and items:
items = [i for i in items if self.validator.validate_quote(i.get("original_quote", ""), text).is_valid]
items = [
i
for i in items
if self.validator.validate_quote(
i.get("original_quote", ""), text
).is_valid
]
if not items:
return {
@@ -305,7 +567,7 @@ Generate content in JSON format:
"format_error": "No items extracted",
"errors": [],
"token_usage": tracker.to_dict(),
"llm": f"{self.llm_config.provider}/{self.llm_config.model_name}"
"llm": f"{self.llm_config.provider}/{self.llm_config.model_name}",
}
if parsed_data:
@@ -317,20 +579,37 @@ Generate content in JSON format:
"data": parsed_data,
"errors": [],
"token_usage": tracker.to_dict(),
"llm": f"{self.llm_config.provider}/{self.llm_config.model_name}"
"llm": f"{self.llm_config.provider}/{self.llm_config.model_name}",
}
except Exception as e:
return {"success": False, "game_type": game_type, "data": None, "errors": [str(e)], "token_usage": tracker.to_dict(), "llm": f"{self.llm_config.provider}/{self.llm_config.model_name}"}
return {
"success": False,
"game_type": game_type,
"data": None,
"errors": [str(e)],
"token_usage": tracker.to_dict(),
"llm": f"{self.llm_config.provider}/{self.llm_config.model_name}",
}
# ============== PRIVATE METHODS ==============
def _analyze(self, text: str, available: List[str], min_score: int, tracker: TokenUsage, debug: bool) -> tuple:
def _analyze(
self,
text: str,
available: List[str],
min_score: int,
tracker: TokenUsage,
debug: bool,
) -> tuple:
"""Analyze text để suggest games - với retry"""
# Lấy context từ game configs
context = get_analyzer_context()
prompt = ChatPromptTemplate.from_messages([
("system", """You are a game type analyzer. Score each game 0-100 based on how well the text matches the game requirements.
prompt = ChatPromptTemplate.from_messages(
[
(
"system",
"""You are a game type analyzer. Score each game 0-100 based on how well the text matches the game requirements.
GAME REQUIREMENTS:
{context}
@@ -357,26 +636,36 @@ Return valid JSON with scores AND metadata about the content:
"grade": 1-5,
"difficulty": 1-5
}}
}}"""),
("human", """TEXT TO ANALYZE:
}}""",
),
(
"human",
"""TEXT TO ANALYZE:
{text}
Analyze for games: {types}
Return JSON:""")
])
Return JSON:""",
),
]
)
max_retries = 2
for attempt in range(max_retries):
try:
resp = (prompt | self.llm).invoke({
invoke_params = {
"context": context,
"text": text[:800],
"types": ", ".join(available)
})
"text": text,
"types": ", ".join(available),
}
resp = (prompt | self.llm).invoke(invoke_params)
tracker.add(self._get_usage(resp))
# Track character counts
input_text = " ".join(str(v) for v in invoke_params.values())
tracker.add_chars(input_text, resp.content)
if debug:
print(f"📝 Analyzer raw: {resp.content[:300]}")
print(f"📝 Analyzer raw: {resp.content}")
# Parse JSON với fallback
content = resp.content.strip()
@@ -386,7 +675,11 @@ Return JSON:""")
continue
data = self._parse_json(content)
scores = [s for s in data.get("scores", []) if s.get("type") in available and s.get("score", 0) >= min_score]
scores = [
s
for s in data.get("scores", [])
if s.get("type") in available and s.get("score", 0) >= min_score
]
scores.sort(key=lambda x: x.get("score", 0), reverse=True)
# Extract metadata from response
@@ -407,15 +700,26 @@ Return JSON:""")
return available, [], {}, ["Analyze failed after retries"]
def _generate_multi(self, games: List[str], text: str, max_items: int, tracker: TokenUsage, debug: bool) -> tuple:
def _generate_multi(
self,
games: List[str],
text: str,
max_items: int,
tracker: TokenUsage,
debug: bool,
) -> tuple:
"""Generate nhiều games"""
if len(games) == 1:
result = self.generate(games[0], text, max_items, validate=False, debug=debug)
result = self.generate(
games[0], text, max_items, validate=False, debug=debug
)
tracker.add(result.get("token_usage", {}))
# Fix: generate returns {data: {items: [...]}} not {items: [...]}
data = result.get("data") or {}
items = data.get("items", []) if isinstance(data, dict) else []
return {games[0]: {"items": items, "metadata": data.get("metadata")}}, result.get("errors", [])
return {
games[0]: {"items": items, "metadata": data.get("metadata")}
}, result.get("errors", [])
# Multi-game: Build schema info for each game
games_schema = []
@@ -428,32 +732,52 @@ Return JSON:""")
REQUIRED OUTPUT FORMAT:
{game.format_instructions}""")
prompt = ChatPromptTemplate.from_messages([
("system", """You are a multi-game content generator.
Generate items for EACH game type following their EXACT schema.
IMPORTANT: Include ALL required fields for each item (image_description, image_keywords, etc.)
RULES: Keep original language, use exact quotes from text."""),
("human", """GAMES AND THEIR SCHEMAS:
prompt = ChatPromptTemplate.from_messages(
[
(
"system",
"""You are a multi-game content generator. In ONE response:
1. Generate items for EACH game type following their EXACT schema
GENERATION RULES:
- KEEP original language from text
- original_quote = EXACT copy from source text
- ALL content must come from source text only
- Include ALL required fields (image_description, image_keywords, etc.)
- STRICTLY FOLLOW each game's GENERATION RULES defined in their schema below""",
),
(
"human",
"""GAMES AND THEIR SCHEMAS:
{schemas}
SOURCE TEXT:
{text}
Generate items for: {types}
Return valid JSON: {{{format}}}""")
])
Return valid JSON: {{{format}}}""",
),
]
)
fmt = ", ".join([f'"{gt}": {{"items": [...], "metadata": {{...}}}}' for gt in games])
fmt = ", ".join(
[f'"{gt}": {{"items": [...], "metadata": {{...}}}}' for gt in games]
)
try:
resp = (prompt | self.llm).invoke({
invoke_params = {
"schemas": "\n\n".join(games_schema),
"text": text,
"types": ", ".join(games),
"format": fmt
})
"format": fmt,
}
resp = (prompt | self.llm).invoke(invoke_params)
tracker.add(self._get_usage(resp))
# Track character counts
input_text = " ".join(str(v) for v in invoke_params.values())
tracker.add_chars(input_text, resp.content)
data = self._parse_json(resp.content)
results = {}
errors = []
@@ -468,22 +792,37 @@ Return valid JSON: {{{format}}}""")
return results, errors
except Exception as e:
return {gt: {"items": [], "metadata": None} for gt in games}, [f"Generate error: {e}"]
return {gt: {"items": [], "metadata": None} for gt in games}, [
f"Generate error: {e}"
]
def _validate(self, results: Dict[str, dict], text: str) -> Dict[str, dict]:
"""Validate items trong results"""
validated = {}
for gt, data in results.items():
items = data.get("items", []) if isinstance(data, dict) else []
valid_items = [i for i in items if self.validator.validate_quote(i.get("original_quote", ""), text).is_valid]
validated[gt] = {"items": valid_items, "metadata": data.get("metadata") if isinstance(data, dict) else None}
valid_items = [
i
for i in items
if self.validator.validate_quote(
i.get("original_quote", ""), text
).is_valid
]
validated[gt] = {
"items": valid_items,
"metadata": data.get("metadata") if isinstance(data, dict) else None,
}
return validated
def _post_process(self, items: List, game_type: str) -> List[Dict]:
ms = int(time.time() * 1000)
result = []
for i, item in enumerate(items):
d = item if isinstance(item, dict) else (item.model_dump() if hasattr(item, 'model_dump') else {})
d = (
item
if isinstance(item, dict)
else (item.model_dump() if hasattr(item, "model_dump") else {})
)
d["id"] = f"{game_type[:2].upper()}-{ms}-{i}"
d["game_type"] = game_type
result.append(d)
@@ -495,12 +834,23 @@ Return valid JSON: {{{format}}}""")
return json.loads(content)
def _get_usage(self, resp) -> Dict:
if hasattr(resp, 'response_metadata'):
if hasattr(resp, "response_metadata"):
meta = resp.response_metadata
return meta.get('usage', meta.get('usage_metadata', meta.get('token_usage', {})))
return getattr(resp, 'usage_metadata', {})
return meta.get(
"usage", meta.get("usage_metadata", meta.get("token_usage", {}))
)
return getattr(resp, "usage_metadata", {})
def _result(self, success: bool, games: List, results: Dict, errors: List, tracker: TokenUsage, scores: List = None, metadata: Dict = None) -> Dict:
def _result(
self,
success: bool,
games: List,
results: Dict,
errors: List,
tracker: TokenUsage,
scores: List = None,
metadata: Dict = None,
) -> Dict:
return {
"success": success,
"games": games,
@@ -509,5 +859,69 @@ Return valid JSON: {{{format}}}""")
"results": results,
"errors": errors,
"token_usage": tracker.to_dict(),
"llm": f"{self.llm_config.provider}/{self.llm_config.model_name}"
"llm": f"{self.llm_config.provider}/{self.llm_config.model_name}",
}
# ============== ASYNC WRAPPERS (for concurrent FastAPI handling) ==============
# These methods run the blocking LLM calls in a thread pool
async def run_fast_async(
self,
text: str,
enabled_games: Optional[List[str]] = None,
max_items: int = 3,
min_score: int = 50,
validate: bool = True,
debug: bool = False,
) -> Dict[str, Any]:
"""Async wrapper for run_fast - runs in thread pool to not block event loop"""
import asyncio
return await asyncio.to_thread(
self.run_fast, text, enabled_games, max_items, min_score, validate, debug
)
async def run_single_async(
self,
text: str,
enabled_games: Optional[List[str]] = None,
max_items: int = 3,
validate: bool = True,
debug: bool = False,
) -> Dict[str, Any]:
"""Async wrapper for run_single - runs in thread pool to not block event loop"""
import asyncio
return await asyncio.to_thread(
self.run_single, text, enabled_games, max_items, validate, debug
)
async def run_multi_async(
self,
text: str,
enabled_games: Optional[List[str]] = None,
max_items: int = 3,
validate: bool = True,
debug: bool = False,
) -> Dict[str, Any]:
"""Async wrapper for run_multi - runs in thread pool to not block event loop"""
import asyncio
return await asyncio.to_thread(
self.run_multi, text, enabled_games, max_items, validate, debug
)
async def generate_async(
self,
text: str,
game_types: Union[List[str], str],
max_items: int = 10,
validate: bool = True,
debug: bool = False,
) -> Dict[str, Any]:
"""Async wrapper for generate - runs in thread pool to not block event loop"""
import asyncio
return await asyncio.to_thread(
self.generate, text, game_types, max_items, validate, debug
)

View File

@@ -10,6 +10,7 @@ Hệ thống sẽ:
THÊM GAME MỚI = TẠO FILE TRONG games/
BẬT/TẮT GAME = SỬA active: True/False trong file game
"""
import importlib.util
from pathlib import Path
from typing import Dict, List, Any, Optional
@@ -25,6 +26,7 @@ class GameRegistry:
- game_type (string): "quiz", "sequence"
- type_id (int): 1, 2
"""
_instance: Optional["GameRegistry"] = None
_all_games: Dict[str, GameType] = {} # Keyed by game_type
_id_map: Dict[int, str] = {} # type_id -> game_type
@@ -59,10 +61,12 @@ class GameRegistry:
game_def = self._load_game_from_file(file_path)
if game_def:
self._all_games[game_def.game_type] = game_def
if game_def.type_id > 0:
if game_def.type_id >= 0: # 0=quiz, 1=sequence are valid
self._id_map[game_def.type_id] = game_def.game_type
status = "" if game_def.active else "⏸️"
print(f"{status} Loaded: {game_def.game_type} (id={game_def.type_id}, active={game_def.active})")
print(
f"{status} Loaded: {game_def.game_type} (id={game_def.type_id}, active={game_def.active})"
)
except Exception as e:
print(f"❌ Error loading {file_path.name}: {e}")
@@ -116,9 +120,9 @@ class GameRegistry:
return self._id_map.get(type_id)
def get_id_by_game_type(self, game_type: str) -> int:
"""Convert game_type -> type_id"""
"""Convert game_type -> type_id. Returns -1 if not found."""
game = self._all_games.get(game_type)
return game.type_id if game else 0
return game.type_id if game else -1 # -1 = not found
def get_all_games(self) -> Dict[str, GameType]:
"""Lấy tất cả games ACTIVE"""
@@ -134,7 +138,9 @@ class GameRegistry:
def get_type_ids(self) -> List[int]:
"""Lấy danh sách type_ids ACTIVE"""
return [v.type_id for v in self._all_games.values() if v.active and v.type_id > 0]
return [
v.type_id for v in self._all_games.values() if v.active and v.type_id > 0
]
def get_analyzer_context(self) -> str:
"""Tạo context cho Analyzer (chỉ từ active games)"""

View File

@@ -4,88 +4,180 @@ games/_template.py - TEMPLATE CHO GAME MỚI
THÊM GAME MỚI CHỈ CẦN:
1. Copy file này
2. Rename thành <game_type>.py (ví dụ: matching.py)
3. Sửa nội dung bên trong
3. Sửa nội dung bên trong theo hướng dẫn
4. DONE! Hệ thống tự động nhận diện.
Không cần sửa bất kỳ file nào khác!
"""
from typing import List, Optional
from typing import List, Literal, Optional
from pydantic import BaseModel, Field
from langchain_core.output_parsers import PydanticOutputParser
# ============== 1. SCHEMA ==============
# ============== 1. ITEM SCHEMA ==============
# Định nghĩa structure của 1 item trong game
# BẮT BUỘC phải có: original_quote và explanation
# BẮT BUỘC phải có: original_quote
class YourGameItem(BaseModel):
"""Schema cho 1 item của game"""
# Các trường BẮT BUỘC (để chống hallucination)
# === TRƯỜNG BẮT BUỘC ===
original_quote: str = Field(
description="Trích dẫn NGUYÊN VĂN từ văn bản gốc"
description="EXACT quote from source text - dùng để verify không hallucinate"
)
explanation: str = Field(description="Giải thích")
# Thêm các trường riêng của game ở đây
# === TRƯỜNG RIÊNG CỦA GAME ===
# Thêm các trường cần thiết cho game của bạn
# Ví dụ:
# question: str = Field(description="Câu hỏi")
# answer: str = Field(description="Đáp án")
question: str = Field(description="The question")
answer: str = Field(description="The correct answer")
# === TRƯỜNG HÌNH ẢNH (Khuyến nghị) ===
image_description: str = Field(default="", description="Visual description in English")
image_keywords: List[str] = Field(default=[], description="2-3 English keywords for image search")
image_is_complex: bool = Field(default=False, description="True if needs precise quantities/humans/complex scene")
# ============== 2. CONFIG ==============
# Cấu hình cho game
# ============== 2. METADATA SCHEMA ==============
# Metadata mô tả nội dung được generate
class YourGameMetadata(BaseModel):
"""Metadata đánh giá nội dung"""
title: str = Field(description="Title from source or short descriptive title")
description: str = Field(description="One sentence summary")
grade: int = Field(description="Grade level 1-5 (1=easy, 5=advanced)")
type: Literal["your_game"] = Field(default="your_game", description="Game type - MUST match game_type below")
difficulty: int = Field(description="Difficulty 1-5 for that grade")
# ============== 3. OUTPUT SCHEMA ==============
# Wrapper chứa danh sách items và metadata
class YourGameOutput(BaseModel):
"""Output wrapper - BẮT BUỘC phải có"""
items: List[YourGameItem] = Field(description="List of game items")
metadata: YourGameMetadata = Field(description="Metadata about the content")
# Output parser - tự động từ output schema
output_parser = PydanticOutputParser(pydantic_object=YourGameOutput)
# ============== 4. CONFIG ==============
# Cấu hình cho game - ĐÂY LÀ PHẦN QUAN TRỌNG NHẤT
GAME_CONFIG = {
# Key duy nhất cho game (dùng trong API)
# === REQUIRED FIELDS ===
# Key duy nhất cho game (dùng trong API) - PHẢI unique
"game_type": "your_game",
# ID số nguyên unique - PHẢI khác các game khác
# Quiz=1, Sequence=2, ... tiếp tục từ 3
"type_id": 99, # TODO: Đổi thành số unique
# Tên hiển thị
"display_name": "Tên Game",
"display_name": "Your Game Name",
# Mô tả ngắn
"description": "Mô tả game của bạn",
"description": "Description of your game",
# Số lượng items
"max_items": 5,
# Trỏ đến schema class
# Schema classes - BẮT BUỘC
"schema": YourGameItem,
"output_schema": YourGameOutput,
"output_parser": output_parser,
# Prompt cho LLM
"system_prompt": """Bạn là chuyên gia tạo [tên game].
# === OPTIONAL FIELDS (có default) ===
NHIỆM VỤ: [Mô tả nhiệm vụ]
# Game có active không
"active": True,
QUY TẮC:
1. original_quote PHẢI là trích dẫn NGUYÊN VĂN
2. [Quy tắc khác]
3. [Quy tắc khác]""",
# Số lượng items tối đa
"max_items": 10,
# Rules validate input trước khi generate (Direct Mode)
"input_format_rules": [
"Text should contain ... suitable for this game.",
"Text MUST have ...",
],
# Rules cho Analyzer nhận diện game phù hợp
"analyzer_rules": [
"Text MUST contain ...",
"NOT suitable if text is ...",
],
# Rules cho Generator tạo nội dung
"generation_rules": [
"KEEP ORIGINAL LANGUAGE - Do NOT translate",
"original_quote = EXACT quote from source text",
"ALL content must come from source only - do NOT invent",
# Thêm rules riêng cho game của bạn
"Your specific rule 1",
"Your specific rule 2",
# Visual fields
"image_description: MUST be visual description in ENGLISH",
"image_keywords: MUST provide 2-3 English keywords",
"NEVER leave image fields empty!",
],
# Examples - giúp LLM học format
"examples": [] # Sẽ định nghĩa bên dưới
}
# ============== 3. EXAMPLES ==============
# Ví dụ input/output để:
# - Analyzer học khi nào nên suggest game này
# - Generator dùng làm few-shot
# ============== 5. EXAMPLES ==============
# Ví dụ input/output để LLM học pattern
EXAMPLES = [
{
# Input text mẫu
"input": "Văn bản mẫu ở đây...",
"input": "Sample text for your game...",
# Output mong đợi
# Output mong đợi - PHẢI match schema
"output": {
"items": [
{
"original_quote": "Trích dẫn từ văn bản",
"explanation": "Giải thích",
# Các trường khác của schema...
"original_quote": "Exact quote from input",
"question": "Sample question?",
"answer": "Sample answer",
"image_description": "Visual description",
"image_keywords": ["keyword1", "keyword2"],
"image_is_complex": False
}
],
"metadata": {
"title": "Sample Title",
"description": "Sample description",
"grade": 2,
"type": "your_game",
"difficulty": 2
}
]
},
# Analyzer học từ trường này
"why_suitable": "Giải thích tại sao văn bản này phù hợp với game này"
# Giải thích tại sao phù hợp - Analyzer học từ đây
"why_suitable": "Explain why this input is suitable for this game"
},
# Thêm 1-2 examples nữa...
# Thêm 1-2 examples nữa để LLM học tốt hơn...
]
# Gán examples vào config
GAME_CONFIG["examples"] = EXAMPLES
# ============== 6. POST PROCESS (Optional) ==============
# Function xử lý output sau khi LLM generate
def post_process_your_game(items: List[dict]) -> List[dict]:
"""Clean up hoặc transform items sau khi generate"""
for item in items:
# Ví dụ: clean up text
if item.get("answer"):
item["answer"] = item["answer"].strip()
return items
# Đăng ký handler (optional)
# GAME_CONFIG["post_process_handler"] = post_process_your_game

View File

@@ -1,139 +1,172 @@
"""
games/quiz.py - Quiz Game - Multiple choice questions
games/quiz.py - Optimized for LLM Performance while keeping System Integrity
"""
from typing import List, Literal
import re
from pydantic import BaseModel, Field
from typing import List, Literal, Optional
from pydantic import BaseModel, Field, field_validator
from langchain_core.output_parsers import PydanticOutputParser
import re
# ============== SCHEMA ==============
# ==========================================
# 1. OPTIMIZED SCHEMA (Thông minh hơn)
# ==========================================
class QuizItem(BaseModel):
question: str = Field(description="The question based on source content")
answers: str = Field(description="The correct answer")
options: List[str] = Field(description="List of options including correct answer")
original_quote: str = Field(description="EXACT quote from source text")
image_description: str = Field(default="", description="Visual description for the question")
image_keywords: List[str] = Field(default=[], description="Keywords for image search")
image_is_complex: bool = Field(default=False, description="True if image needs precise quantities, humans, or multiple detailed objects")
# LLM chỉ cần tập trung sinh ra raw data, việc clean để code lo
question: str = Field(description="Question text. Use ____ for blanks.")
# Request field có thể để default, logic xử lý sau
request: str = Field(
default="Choose the correct answer", description="Instruction type"
)
answer: str = Field(description="Correct answer text")
options: List[str] = Field(description="List of options")
original_quote: str = Field(description="Exact source sentence")
# Gom nhóm image fields để prompt gọn hơn
image_description: str = Field(
default="", description="Visual description (if needed)"
)
image_keywords: List[str] = Field(default=[])
image_is_complex: bool = Field(default=False)
@field_validator("answer", "options", mode="before")
@classmethod
def clean_prefixes(cls, v):
"""Tự động xóa A., B., (1)... ngay khi nhận dữ liệu từ LLM"""
def clean_str(text):
# Regex xóa (A), 1., Q: ở đầu và (1) ở cuối
text = re.sub(
r"^(\([A-Za-z0-9]\)|[A-Za-z0-9]\.|Q\d*:)\s*",
"",
str(text),
flags=re.IGNORECASE,
)
text = re.sub(r"\s*\([A-Za-z0-9]\)$", "", text)
return text.strip()
if isinstance(v, list):
return [clean_str(item) for item in v]
return clean_str(v)
class QuizMetadata(BaseModel):
"""Metadata đánh giá nội dung"""
title: str = Field(
description="Title for this content. Prefer title from source document if available and suitable, otherwise create a short descriptive title."
)
description: str = Field(
description="Short description summarizing the content/topic of the quiz."
)
grade: int = Field(
description="Estimated grade level 1-5 (1=easy/young, 5=advanced/older). Judge by vocabulary, concepts, required knowledge."
)
type: Literal["quiz"] = Field(default="quiz", description="Game type (always 'quiz')")
difficulty: int = Field(
description="Difficulty 1-5 for that grade (1=very easy, 5=very hard). Judge by question complexity, number of options, abstract concepts."
)
title: str = Field(description="Short content title")
description: str = Field(description="Summary")
grade: int = Field(description="Level 1-5")
type: Literal["quiz"] = "quiz"
difficulty: int = Field(description="Level 1-5")
class QuizOutput(BaseModel):
"""Output wrapper for quiz items"""
items: List[QuizItem] = Field(description="List of quiz items generated from source text")
metadata: QuizMetadata = Field(description="Metadata about the quiz content")
items: List[QuizItem]
metadata: QuizMetadata
# Output parser
output_parser = PydanticOutputParser(pydantic_object=QuizOutput)
# ==========================================
# 2. COMPACT CONFIG (Giữ đủ key, giảm nội dung)
# ==========================================
# ============== CONFIG ==============
# ============== CONFIG ==============
GAME_CONFIG = {
# --- SYSTEM FIELDS (Giữ nguyên không đổi) ---
"game_type": "quiz",
"display_name": "Quiz",
"description": "Multiple choice questions",
"type_id": 1,
"type_id": 0,
"active": True,
"max_items": 10,
"schema": QuizItem,
"output_schema": QuizOutput,
"output_parser": output_parser,
# --- USER UI HINTS (Rút gọn văn bản hiển thị) ---
"input_format_rules": [
"Text should contain facts or questions suitable for a quiz.",
"Prefer extracting existing multiple choice questions if available.",
"Text MUST contain questions with multiple choice options",
"Text must contain specific facts or Q&A content.",
"Suitable for multiple choice extraction.",
],
# 1. Recognition Rules (for Analyzer)
# --- PRE-CHECK LOGIC (Rút gọn) ---
"analyzer_rules": [
"Text MUST contain questions with multiple choice options",
"NOT suitable if text is just a list of words with no questions",
"Contains questions with options OR factual statements.",
"Not just a list of unconnected words.",
],
# 2. Rules tạo nội dung (cho Generator)
# --- LLM INSTRUCTIONS ---
"generation_rules": [
"KEEP ORIGINAL LANGUAGE - Do NOT translate",
"original_quote = EXACT quote from source text (full question block)",
"ALL content must come from source only - do NOT invent",
"REMOVE unnecessary numbering: 'Question 1:', '(1)', '(2)', 'A.', 'B.' from question/options/answers",
"STRICTLY CLEAN OUTPUT for 'answers': MUST contain ONLY the text content of the correct option.",
"FORBIDDEN in 'answers': Prefixes like '(1)', '(2)', 'A.', 'B.', '1.' - REMOVE THEM.",
"IMPORTANT: The 'answers' field MUST EXACTLY MATCH one of the 'options' values text-wise.",
# VISUAL FIELD COMPULSORY
"image_description: MUST be a visual description relevant to the question in ENGLISH.",
"image_keywords: MUST provide 2-3 English keywords for search.",
"image_is_complex: FALSE for simple/static objects, TRUE for quantities/humans/complex scenes",
"NEVER leave image fields empty!",
"MODE: STRICT EXTRACTION & LOCALITY PRIORITIZED.",
"1. MANDATORY OPTIONS & LOCALITY: Only create a quiz item if 2-4 options are EXPLICITLY present and located immediately after/below the question. SKIP if options are shared in a 'Word Box' or 'Word Bank' tại đầu/cuối trang.",
"2. ANSWER PRIORITY: Use the provided key if available. If the marker is empty, solve it yourself using grammar rules. Do not redefine existing keys.",
"3. ZERO FABRICATION: Do NOT invent distractors. Only extract what is explicitly present.",
"4. LOGICAL AMBIGUITY: If a question is grammatically correct with multiple options but lacks context, SKIP IT.",
"5. SEMANTIC OPTION EXTRACTION: Extract ONLY the meaningful word/phrase. Strip away ALL labels like (1), (A), or OCR noise.",
"6. SMART FILL-IN-THE-BLANK: If the question is a 'Fill in the blank' type, you MUST analyze the sentence structure and place the '____' at the grammatically correct position (e.g., 'Blood ____ oozing'). DO NOT blindly put it at the end. If the sentence is already a complete question (not a blank type), do not add '____'.",
"7. METADATA: Fill metadata accurately based on content. Do not leave empty."
],
"examples": EXAMPLES if 'EXAMPLES' in globals() else []
}
def clean_prefix(text: str) -> str:
"""Remove prefixes like (1), (A), 1., A. from text"""
if not text: return text
# Regex: Start with ( (number/letter) ) OR number/letter dot. Followed by spaces.
return re.sub(r'^(\(\d+\)|\([A-Za-z]\)|\d+\.|[A-Za-z]\.)\s*', '', text).strip()
def post_process_quiz(items: List[dict]) -> List[dict]:
"""Clean up answers and options prefixes"""
for item in items:
# Clean answers
if item.get("answers"):
item["answers"] = clean_prefix(item["answers"])
# Clean options
if item.get("options") and isinstance(item["options"], list):
item["options"] = [clean_prefix(opt) for opt in item["options"]]
return items
# Register handler
GAME_CONFIG["post_process_handler"] = post_process_quiz
# ============== EXAMPLES ==============
EXAMPLES = [
# --- EXAMPLES (Chỉ giữ 1 cái tốt nhất để làm mẫu format) ---
"examples": [
{
"input": "The Sun is a star at the center of the Solar System.",
"input": "The giraffe has a long neck. Options: neck, leg, tail.",
"output": {
"items": [{
"question": "Where is the Sun located?",
"answers": "At the center of the Solar System",
"options": ["At the center of the Solar System", "At the edge of the Solar System", "Near the Moon", "Outside the universe"],
"original_quote": "The Sun is a star at the center of the Solar System.",
"image_description": "The sun in the middle of planets",
"image_keywords": ["sun", "planets"],
"image_is_complex": False
}]
},
"why_suitable": "Has clear facts"
"items": [
{
"question": "The giraffe has a long ____.",
"request": "Fill in the blank",
"answer": "neck",
"options": ["neck", "leg", "tail"],
"original_quote": "The giraffe has a long neck.",
"image_description": "A giraffe",
"image_keywords": ["giraffe"],
"image_is_complex": False,
}
]
],
"metadata": {
"title": "Animals",
"description": "Giraffe anatomy",
"grade": 2,
"type": "quiz",
"difficulty": 1,
},
},
"why_suitable": "Valid extraction: Text has Fact + Options.",
}
],
}
# # ==========================================
# # 3. HANDLER (Logic hậu xử lý gọn nhẹ)
# # ==========================================
# def post_process_quiz(items: List[dict]) -> List[dict]:
# valid_items = []
# for item in items:
# options = item.get("options", [])
# answer = item.get("answer", "")
# if len(options) < 2:
# continue
# # Nếu có answer từ input, thì so khớp để làm sạch
# if answer:
# matched_option = next(
# (opt for opt in options if opt.lower() == answer.lower()), None
# )
# if matched_option:
# item["answer"] = matched_option
# # Nếu có answer mà không khớp option nào thì mới cân nhắc loại (hoặc để AI tự đoán lại)
# # Nếu answer rỗng (do ngoặc trống), ta vẫn giữ câu này lại
# # (với điều kiện LLM đã được dặn là phải tự điền vào trường answer)
# if not item.get("answer"):
# # Bạn có thể chọn loại bỏ hoặc tin tưởng vào đáp án LLM tự suy luận
# pass
# item["request"] = (
# "Fill in the blank"
# if "____" in item.get("question", "")
# else "Choose the correct answer"
# )
# valid_items.append(item)
# return valid_items
# # Đăng ký handler
# GAME_CONFIG["post_process_handler"] = post_process_quiz

View File

@@ -1,6 +1,6 @@
"""
games/sequence.py - Arrange Sequence Game (Sentences OR Words)
type_id = 2
type_id = 1
LLM tự quyết định dựa vào ngữ nghĩa:
- "good morning", "apple", "happy" → WORD
- "Hi, I'm Lisa", "The sun rises" → SENTENCE
@@ -38,7 +38,7 @@ class SequenceMetadata(BaseModel):
description="LLM decides: 'word' for words/phrases, 'sentence' for complete sentences"
)
difficulty: int = Field(
description="Difficulty 1-5 for that grade."
description="Difficulty 1-3 for that grade."
)
@@ -52,59 +52,7 @@ class SequenceOutput(BaseModel):
output_parser = PydanticOutputParser(pydantic_object=SequenceOutput)
# ============== CONFIG ==============
# ============== CONFIG ==============
GAME_CONFIG = {
"game_type": "sequence",
"display_name": "Arrange Sequence",
"description": "Arrange sentences or words in order",
"type_id": 2,
"active": True,
"max_items": 10,
"schema": SequenceItem,
"output_schema": SequenceOutput,
"output_parser": output_parser,
"input_format_rules": [
"Text MUST be a list of items (words, phrases, sentences) to be ordered.",
"Do NOT generate sequence from multiple choice questions (A/B/C/D).",
"Do NOT generate sequence if the text is a quiz or test format.",
],
# 1. Recognition Rules (for Analyzer)
"analyzer_rules": [
"Text is a list of words, phrases, or sentences suitable for ordering",
"Items are separated by commas, semicolons, or newlines",
"Example: 'apple, banana, orange' or 'Sentence 1; Sentence 2'",
"NO questions required - just a list of items",
"Text is NOT a long essay or complex dialogue",
],
# 2. Rules tạo nội dung (cho Generator)
"generation_rules": [
"KEEP ORIGINAL LANGUAGE - Do NOT translate",
"Analyze text semantically to extract meaningful items",
"For each item, decide type: WORD/PHRASE or SENTENCE",
"- If item is a WORD/PHRASE (label, noun, greeting) -> Fill 'word' field",
"- If item is a COMPLETE SENTENCE (subject+verb) -> Fill 'sentence' field",
"NEVER fill both fields for the same item",
"Set metadata.sub_type = 'word' or 'sentence' (all items should match sub_type)",
"Clean up OCR noise, numbering (e.g. '1. Apple' -> 'Apple')",
# CONSISTENCY RULES
"CRITICAL: All extracted items MUST be of the SAME type.",
"Choose ONE type for the whole list: either ALL 'word' OR ALL 'sentence'.",
"If input has mixed types, pick the MAJORITY type and ignore the others.",
# VISUAL FIELD COMPULSORY
"image_description: MUST be a visual description of the item in ENGLISH. Example: 'A red apple', 'Two people shaking hands'",
"image_keywords: MUST provide 2-3 English keywords for search. Example: ['apple', 'fruit', 'red']",
],
"examples": EXAMPLES if 'EXAMPLES' in globals() else []
}
# ============== EXAMPLES ==============
@@ -171,3 +119,59 @@ EXAMPLES = [
"why": "These are PHRASES/GREETINGS, not complete sentences → use 'word' field"
}
]
# ============== CONFIG ==============
# ============== CONFIG ==============
GAME_CONFIG = {
"game_type": "sequence",
"display_name": "Arrange Sequence",
"description": "Arrange sentences or words in order",
"type_id": 1,
"active": True,
"max_items": 10,
"schema": SequenceItem,
"output_schema": SequenceOutput,
"output_parser": output_parser,
"input_format_rules": [
"Text MUST be a list of items (words, phrases, sentences) to be ordered.",
"Do NOT generate sequence from multiple choice questions (A/B/C/D).",
"Do NOT generate sequence if the text is a quiz or test format.",
],
# 1. Recognition Rules (for Analyzer)
"analyzer_rules": [
"Text is a list of words, phrases, or sentences suitable for ordering",
"Items are separated by commas, semicolons, or newlines",
"Example: 'apple, banana, orange' or 'Sentence 1; Sentence 2'",
"NO questions required - just a list of items",
"Text is NOT a long essay or complex dialogue",
],
# 2. Rules tạo nội dung (cho Generator)
"generation_rules": [
"KEEP ORIGINAL LANGUAGE - Do NOT translate",
"Analyze text semantically to extract meaningful items",
"For each item, decide type: WORD/PHRASE or SENTENCE",
"- If item is a WORD/PHRASE (label, noun, greeting) -> Fill 'word' field",
"- If item is a COMPLETE SENTENCE (subject+verb) -> Fill 'sentence' field",
"NEVER fill both fields for the same item",
"Set metadata.sub_type = 'word' or 'sentence' (all items should match sub_type)",
"Clean up OCR noise, numbering (e.g. '1. Apple' -> 'Apple')",
# CONSISTENCY RULES
"CRITICAL: All extracted items MUST be of the SAME type.",
"Choose ONE type for the whole list: either ALL 'word' OR ALL 'sentence'.",
"If input has mixed types, pick the MAJORITY type and ignore the others.",
# VISUAL FIELD COMPULSORY
"image_description: MUST be a visual description of the item in ENGLISH. Example: 'A red apple', 'Two people shaking hands'",
"image_keywords: MUST provide 2-3 English keywords for search. Example: ['apple', 'fruit', 'red']",
],
"examples": EXAMPLES if 'EXAMPLES' in globals() else []
}

View File

@@ -74,7 +74,7 @@ DEFAULT_CONFIGS = {
"openai": ModelConfig(
provider="openai",
model_name="gpt-4o-mini",
temperature=0.1
temperature=0.1,
),
"openai_light": ModelConfig(
provider="openai",
@@ -117,13 +117,19 @@ def get_llm(config: ModelConfig) -> BaseChatModel:
from langchain_google_genai import ChatGoogleGenerativeAI
api_key = config.api_key or os.getenv("GOOGLE_API_KEY")
print("Using GOOGLE_API_KEY:", api_key)
if not api_key:
raise ValueError("GOOGLE_API_KEY required for Gemini. Set via env or config.api_key")
return ChatGoogleGenerativeAI(
model=config.model_name,
temperature=config.temperature,
google_api_key=api_key
google_api_key=api_key,
version="v1",
additional_headers={
"User-Agent": "PostmanRuntime/7.43.0",
"Accept": "*/*"
}
)
elif provider == "openai":
@@ -136,7 +142,8 @@ def get_llm(config: ModelConfig) -> BaseChatModel:
return ChatOpenAI(
model=config.model_name,
temperature=config.temperature,
api_key=api_key
api_key=api_key,
base_url=config.base_url or None
)
else: