This commit is contained in:
vuongps38770
2026-01-13 09:33:10 +07:00
parent 29544da4c6
commit 7c41ddaa82
9 changed files with 1362 additions and 599 deletions

1
.env
View File

@@ -0,0 +1 @@
PORT=2088

280
api.py
View File

@@ -5,12 +5,19 @@ from fastapi.middleware.cors import CORSMiddleware
from pydantic import BaseModel, Field from pydantic import BaseModel, Field
from pathlib import Path from pathlib import Path
import re import re
from dotenv import load_dotenv
load_dotenv()
from src import ( from src import (
GameCore, get_registry, reload_games, GameCore,
get_active_game_types, get_active_type_ids, get_registry,
get_game_by_id, id_to_type, type_to_id, reload_games,
ModelConfig get_active_game_types,
get_active_type_ids,
get_game_by_id,
id_to_type,
type_to_id,
ModelConfig,
) )
@@ -18,7 +25,7 @@ from src import (
app = FastAPI( app = FastAPI(
title="Game Generator API", title="Game Generator API",
description="API tạo game giáo dục từ văn bản", description="API tạo game giáo dục từ văn bản",
version="2.0.0" version="2.0.0",
) )
app.add_middleware( app.add_middleware(
@@ -31,31 +38,43 @@ app.add_middleware(
# ============== REQUEST/RESPONSE MODELS ============== # ============== REQUEST/RESPONSE MODELS ==============
class LLMConfigRequest(BaseModel): class LLMConfigRequest(BaseModel):
provider: str = Field(default="gemini", description="ollama, gemini, openai") provider: str = Field(default="gemini", description="ollama, gemini, openai")
model_name: str = Field(default="gemini-2.0-flash-lite") model_name: str = Field(default="gemini-2.0-flash-lite")
api_key: Optional[str] = Field(default=None, description="API key (None = lấy từ env)") api_key: Optional[str] = Field(
default=None, description="API key (None = lấy từ env)"
)
temperature: float = Field(default=0.1) temperature: float = Field(default=0.1)
base_url: Optional[str] = Field(default=None, description="Base URL cho Ollama") base_url: Optional[str] = Field(default=None, description="Base URL cho Ollama")
class GenerateRequest(BaseModel): class GenerateRequest(BaseModel):
text: str = Field(description="Input text", min_length=10) text: str = Field(description="Input text", min_length=10)
enabled_game_ids: Optional[List[int]] = Field(default=None, description="List of type_ids (1=quiz, 2=sequence_sentence, 3=sequence_word)") enabled_game_ids: Optional[List[int]] = Field(
default=None,
description="List of type_ids (1=quiz, 2=sequence_sentence, 3=sequence_word)",
)
run_analyzer: bool = Field(default=True) run_analyzer: bool = Field(default=True)
run_validator: bool = Field(default=True) run_validator: bool = Field(default=True)
max_items: Optional[int] = Field(default=3) max_items: Optional[int] = Field(default=100)
min_score: int = Field(default=50, description="Minimum score (0-100) for analyzer to include a game") min_score: int = Field(
default=50, description="Minimum score (0-100) for analyzer to include a game"
)
debug: bool = Field(default=False, description="Print prompts to server log") debug: bool = Field(default=False, description="Print prompts to server log")
# LLM config (optional - override global) # LLM config (optional - override global)
llm_config: Optional[LLMConfigRequest] = Field(default=None, description="Override LLM config") llm_config: Optional[LLMConfigRequest] = Field(
default=None, description="Override LLM config"
)
class TokenUsageResponse(BaseModel): class TokenUsageResponse(BaseModel):
prompt_tokens: int = 0 prompt_tokens: int = 0
completion_tokens: int = 0 completion_tokens: int = 0
total_tokens: int = 0 total_tokens: int = 0
input_chars: int = 0 # Character count sent to LLM
output_chars: int = 0 # Character count received from LLM
class GameScoreInfo(BaseModel): class GameScoreInfo(BaseModel):
@@ -66,12 +85,14 @@ class GameScoreInfo(BaseModel):
class GameResultData(BaseModel): class GameResultData(BaseModel):
"""Structure thống nhất cho mỗi game result""" """Structure thống nhất cho mỗi game result"""
items: List[Dict[str, Any]] = [] items: List[Dict[str, Any]] = []
metadata: Optional[Dict[str, Any]] = None metadata: Optional[Dict[str, Any]] = None
class CommonMetadataResponse(BaseModel): class CommonMetadataResponse(BaseModel):
"""Metadata chung cho toàn bộ kết quả generate""" """Metadata chung cho toàn bộ kết quả generate"""
title: str = "" title: str = ""
description: str = "" description: str = ""
grade: int = 0 grade: int = 0
@@ -92,7 +113,7 @@ class GenerateResponse(BaseModel):
class GameInfo(BaseModel): class GameInfo(BaseModel):
type_id: int type_id: int
game_type: str # Keep for reference game_type: str
display_name: str display_name: str
description: str description: str
active: bool active: bool
@@ -135,7 +156,7 @@ def get_core(config_override: Optional[LLMConfigRequest] = None) -> GameCore:
model_name=config_override.model_name, model_name=config_override.model_name,
api_key=config_override.api_key, api_key=config_override.api_key,
temperature=config_override.temperature, temperature=config_override.temperature,
base_url=config_override.base_url base_url=config_override.base_url,
) )
return GameCore(llm_config=config) return GameCore(llm_config=config)
@@ -149,6 +170,7 @@ def get_core(config_override: Optional[LLMConfigRequest] = None) -> GameCore:
# ============== ENDPOINTS ============== # ============== ENDPOINTS ==============
@app.post("/generate", response_model=GenerateResponse) @app.post("/generate", response_model=GenerateResponse)
async def generate_games(request: GenerateRequest): async def generate_games(request: GenerateRequest):
"""Generate games from text with scoring""" """Generate games from text with scoring"""
@@ -157,17 +179,18 @@ async def generate_games(request: GenerateRequest):
# Convert type_ids to game_types # Convert type_ids to game_types
if request.enabled_game_ids: if request.enabled_game_ids:
games = [id_to_type(tid) for tid in request.enabled_game_ids if id_to_type(tid)] games = [
id_to_type(tid) for tid in request.enabled_game_ids if id_to_type(tid)
]
else: else:
games = get_active_game_types() games = get_active_game_types()
result = core.run_multi( result = await core.run_multi_async(
text=request.text, text=request.text,
enabled_games=games, enabled_games=games,
max_items=request.max_items or 3, max_items=request.max_items or 100,
min_score=request.min_score,
validate=request.run_validator, validate=request.run_validator,
debug=request.debug debug=request.debug,
) )
# Convert game_types to type_ids in response # Convert game_types to type_ids in response
@@ -176,27 +199,33 @@ async def generate_games(request: GenerateRequest):
# Convert game_scores # Convert game_scores
game_scores = [] game_scores = []
for s in result.get("game_scores", []): for s in result.get("game_scores", []):
game_scores.append(GameScoreInfo( game_scores.append(
type_id=type_to_id(s.get("type", "")), GameScoreInfo(
score=s.get("score", 0), type_id=type_to_id(s.get("type", "")),
reason=s.get("reason", "") score=s.get("score", 0),
)) reason=s.get("reason", ""),
)
)
# Convert results keys to type_ids # Convert results keys to type_ids
results_by_id = {} results_by_id = {}
for game_type, items in result.get("results", {}).items(): for game_type, items in result.get("results", {}).items():
tid = type_to_id(game_type) tid = type_to_id(game_type)
if tid > 0: if tid >= 0: # 0=quiz, 1=sequence are valid
results_by_id[tid] = items results_by_id[tid] = items
# Get common metadata from analyzer # Get common metadata from analyzer
core_meta = result.get("metadata", {}) core_meta = result.get("metadata", {})
common_metadata = CommonMetadataResponse( common_metadata = (
title=core_meta.get("title", ""), CommonMetadataResponse(
description=core_meta.get("description", ""), title=core_meta.get("title", ""),
grade=core_meta.get("grade", 0), description=core_meta.get("description", ""),
difficulty=core_meta.get("difficulty", 0) grade=core_meta.get("grade", 0),
) if core_meta else None difficulty=core_meta.get("difficulty", 0),
)
if core_meta
else None
)
return GenerateResponse( return GenerateResponse(
success=result.get("success", False), success=result.get("success", False),
@@ -206,25 +235,120 @@ async def generate_games(request: GenerateRequest):
results=results_by_id, results=results_by_id,
llm=result.get("llm"), llm=result.get("llm"),
token_usage=result.get("token_usage"), token_usage=result.get("token_usage"),
errors=result.get("errors", []) errors=result.get("errors", []),
) )
except Exception as e: except Exception as e:
return GenerateResponse( return GenerateResponse(
success=False, success=False, games=[], game_scores=[], results={}, errors=[str(e)]
games=[], )
game_scores=[],
results={},
errors=[str(e)] # ============== FAST GENERATE (1 API call - OPTIMIZED) ==============
class FastGenerateRequest(BaseModel):
text: str = Field(description="Input text", min_length=10)
enabled_game_ids: Optional[List[int]] = Field(
default=None, description="Limit type_ids"
)
max_items: int = Field(default=100, description="Max items per game")
min_score: int = Field(default=50, description="Min score 0-100 to include game")
run_validator: bool = Field(default=True)
debug: bool = Field(default=False)
llm_config: Optional[LLMConfigRequest] = Field(default=None)
@app.post("/generate/fast", response_model=GenerateResponse)
async def generate_fast(request: FastGenerateRequest):
"""
🚀 OPTIMIZED: 1 API call để analyze + generate TẤT CẢ games phù hợp.
So với /generate (2+ calls):
- Chỉ 1 API call
- Tiết kiệm quota/tokens
- Nhanh hơn
So với /generate/single:
- Trả về NHIỀU games (không chỉ 1)
"""
try:
core = get_core(request.llm_config)
# Convert type_ids to game_types
if request.enabled_game_ids:
games = [
id_to_type(tid) for tid in request.enabled_game_ids if id_to_type(tid)
]
else:
games = get_active_game_types()
result = await core.run_fast_async(
text=request.text,
enabled_games=games,
max_items=request.max_items,
min_score=request.min_score,
validate=request.run_validator,
debug=request.debug,
)
# Convert to response format (same as /generate)
game_ids = [type_to_id(g) for g in result.get("games", [])]
game_scores = [
GameScoreInfo(
type_id=type_to_id(s.get("type", "")),
score=s.get("score", 0),
reason=s.get("reason", ""),
)
for s in result.get("game_scores", [])
]
results_by_id = {}
for game_type, data in result.get("results", {}).items():
tid = type_to_id(game_type)
if tid >= 0: # 0=quiz, 1=sequence are valid
results_by_id[tid] = data
core_meta = result.get("metadata", {})
common_metadata = (
CommonMetadataResponse(
title=core_meta.get("title", ""),
description=core_meta.get("description", ""),
grade=core_meta.get("grade", 0),
difficulty=core_meta.get("difficulty", 0),
)
if core_meta
else None
)
return GenerateResponse(
success=result.get("success", False),
games=game_ids,
game_scores=game_scores,
metadata=common_metadata,
results=results_by_id,
api_calls=1, # Always 1 for fast
llm=result.get("llm"),
token_usage=result.get("token_usage"),
errors=result.get("errors", []),
)
except Exception as e:
return GenerateResponse(
success=False, games=[], game_scores=[], results={}, errors=[str(e)]
) )
# ============== SINGLE BEST (1 PROMPT) ============== # ============== SINGLE BEST (1 PROMPT) ==============
class SingleGenerateRequest(BaseModel): class SingleGenerateRequest(BaseModel):
text: str = Field(description="Input text", min_length=10) text: str = Field(description="Input text", min_length=10)
enabled_game_ids: Optional[List[int]] = Field(default=None, description="Limit type_ids to choose from") enabled_game_ids: Optional[List[int]] = Field(
max_items: int = Field(default=3, description="Max items to generate") default=None, description="Limit type_ids to choose from"
)
max_items: int = Field(default=100, description="Max items to generate")
run_validator: bool = Field(default=True) run_validator: bool = Field(default=True)
debug: bool = Field(default=False) debug: bool = Field(default=False)
llm_config: Optional[LLMConfigRequest] = Field(default=None) llm_config: Optional[LLMConfigRequest] = Field(default=None)
@@ -254,7 +378,9 @@ async def generate_single_game(request: SingleGenerateRequest):
# Convert type_ids to game_types # Convert type_ids to game_types
if request.enabled_game_ids: if request.enabled_game_ids:
games = [id_to_type(tid) for tid in request.enabled_game_ids if id_to_type(tid)] games = [
id_to_type(tid) for tid in request.enabled_game_ids if id_to_type(tid)
]
else: else:
games = None games = None
@@ -263,7 +389,7 @@ async def generate_single_game(request: SingleGenerateRequest):
enabled_games=games, enabled_games=games,
max_items=request.max_items, max_items=request.max_items,
debug=request.debug, debug=request.debug,
validate=request.run_validator validate=request.run_validator,
) )
# Convert game_type to type_id # Convert game_type to type_id
@@ -277,21 +403,19 @@ async def generate_single_game(request: SingleGenerateRequest):
items=result.get("items", []), items=result.get("items", []),
token_usage=result.get("token_usage"), token_usage=result.get("token_usage"),
llm=result.get("llm"), llm=result.get("llm"),
errors=result.get("errors", []) errors=result.get("errors", []),
) )
except Exception as e: except Exception as e:
return SingleGenerateResponse( return SingleGenerateResponse(success=False, errors=[str(e)])
success=False,
errors=[str(e)]
)
# ============== DIRECT GENERATE (1 game cụ thể, không analyze) ============== # ============== DIRECT GENERATE (1 game cụ thể, không analyze) ==============
class DirectGenerateRequest(BaseModel): class DirectGenerateRequest(BaseModel):
text: str = Field(description="Input text", min_length=10) text: str = Field(description="Input text", min_length=10)
max_items: int = Field(default=3, description="Max items to generate") max_items: int = Field(default=100, description="Max items to generate")
run_validator: bool = Field(default=True) run_validator: bool = Field(default=True)
debug: bool = Field(default=False) debug: bool = Field(default=False)
llm_config: Optional[LLMConfigRequest] = Field(default=None) llm_config: Optional[LLMConfigRequest] = Field(default=None)
@@ -299,6 +423,7 @@ class DirectGenerateRequest(BaseModel):
class DirectGenerateResponse(BaseModel): class DirectGenerateResponse(BaseModel):
"""Response thống nhất, giống GenerateResponse nhưng cho 1 game""" """Response thống nhất, giống GenerateResponse nhưng cho 1 game"""
success: bool success: bool
games: List[int] = [] # Single type_id in list games: List[int] = [] # Single type_id in list
results: Dict[int, GameResultData] = {} # Same structure as GenerateResponse results: Dict[int, GameResultData] = {} # Same structure as GenerateResponse
@@ -322,7 +447,7 @@ async def generate_direct(type_id: int, request: DirectGenerateRequest):
return DirectGenerateResponse( return DirectGenerateResponse(
success=False, success=False,
games=[type_id], games=[type_id],
errors=[f"Game with type_id={type_id} not found"] errors=[f"Game with type_id={type_id} not found"],
) )
core = get_core(request.llm_config) core = get_core(request.llm_config)
@@ -332,7 +457,7 @@ async def generate_direct(type_id: int, request: DirectGenerateRequest):
text=request.text, text=request.text,
max_items=request.max_items, max_items=request.max_items,
validate=request.run_validator, validate=request.run_validator,
debug=request.debug debug=request.debug,
) )
format_error = result.get("format_error") format_error = result.get("format_error")
@@ -341,7 +466,7 @@ async def generate_direct(type_id: int, request: DirectGenerateRequest):
# Build results với structure thống nhất # Build results với structure thống nhất
game_result = GameResultData( game_result = GameResultData(
items=data.get("items", []) if isinstance(data, dict) else [], items=data.get("items", []) if isinstance(data, dict) else [],
metadata=data.get("metadata") if isinstance(data, dict) else None metadata=data.get("metadata") if isinstance(data, dict) else None,
) )
return DirectGenerateResponse( return DirectGenerateResponse(
@@ -352,15 +477,11 @@ async def generate_direct(type_id: int, request: DirectGenerateRequest):
format_error=format_error, format_error=format_error,
token_usage=result.get("token_usage"), token_usage=result.get("token_usage"),
llm=result.get("llm"), llm=result.get("llm"),
errors=result.get("errors", []) errors=result.get("errors", []),
) )
except Exception as e: except Exception as e:
return DirectGenerateResponse( return DirectGenerateResponse(success=False, games=[type_id], errors=[str(e)])
success=False,
games=[type_id],
errors=[str(e)]
)
@app.get("/games", response_model=GamesListResponse) @app.get("/games", response_model=GamesListResponse)
@@ -373,14 +494,16 @@ async def list_games():
active_count = 0 active_count = 0
for game_type, game in all_games.items(): for game_type, game in all_games.items():
games_list.append(GameInfo( games_list.append(
type_id=game.type_id, GameInfo(
game_type=game.game_type, type_id=game.type_id,
display_name=game.display_name, game_type=game.game_type,
description=game.description, display_name=game.display_name,
active=game.active, description=game.description,
max_items=game.max_items, active=game.active,
)) max_items=game.max_items,
)
)
if game.active: if game.active:
active_count += 1 active_count += 1
@@ -388,9 +511,7 @@ async def list_games():
games_list.sort(key=lambda g: g.type_id) games_list.sort(key=lambda g: g.type_id)
return GamesListResponse( return GamesListResponse(
total=len(games_list), total=len(games_list), active_count=active_count, games=games_list
active_count=active_count,
games=games_list
) )
@@ -420,7 +541,7 @@ def _set_game_active(game_type: str, active: bool) -> ActionResponse:
if not re.search(pattern, content): if not re.search(pattern, content):
raise HTTPException(400, f"Cannot find 'active' field in {game_type}.py") raise HTTPException(400, f"Cannot find 'active' field in {game_type}.py")
new_content = re.sub(pattern, f'\\1{new_value}', content) new_content = re.sub(pattern, f"\\1{new_value}", content)
game_file.write_text(new_content, encoding="utf-8") game_file.write_text(new_content, encoding="utf-8")
reload_games() reload_games()
@@ -430,7 +551,7 @@ def _set_game_active(game_type: str, active: bool) -> ActionResponse:
success=True, success=True,
message=f"Game '{game_type}' has been {action}", message=f"Game '{game_type}' has been {action}",
game_type=game_type, game_type=game_type,
active=active active=active,
) )
@@ -447,7 +568,7 @@ async def get_llm_config():
provider=_current_config.provider, provider=_current_config.provider,
model_name=_current_config.model_name, model_name=_current_config.model_name,
temperature=_current_config.temperature, temperature=_current_config.temperature,
base_url=_current_config.base_url base_url=_current_config.base_url,
) )
@@ -461,7 +582,7 @@ async def set_llm_config(config: LLMConfigRequest):
model_name=config.model_name, model_name=config.model_name,
api_key=config.api_key, api_key=config.api_key,
temperature=config.temperature, temperature=config.temperature,
base_url=config.base_url base_url=config.base_url,
) )
try: try:
@@ -470,13 +591,10 @@ async def set_llm_config(config: LLMConfigRequest):
return ActionResponse( return ActionResponse(
success=True, success=True,
message=f"LLM changed to {config.provider}/{config.model_name}" message=f"LLM changed to {config.provider}/{config.model_name}",
) )
except Exception as e: except Exception as e:
return ActionResponse( return ActionResponse(success=False, message=f"Failed to change LLM: {str(e)}")
success=False,
message=f"Failed to change LLM: {str(e)}"
)
@app.post("/reload", response_model=ActionResponse) @app.post("/reload", response_model=ActionResponse)
@@ -488,17 +606,13 @@ async def reload_all_games():
_core = None _core = None
return ActionResponse( return ActionResponse(
success=True, success=True, message=f"Reloaded. Active games: {get_active_game_types()}"
message=f"Reloaded. Active games: {get_active_game_types()}"
) )
@app.get("/health") @app.get("/health")
async def health_check(): async def health_check():
return { return {"status": "healthy", "active_games": get_active_game_types()}
"status": "healthy",
"active_games": get_active_game_types()
}
# ============== STARTUP ============== # ============== STARTUP ==============
@@ -510,4 +624,8 @@ async def startup():
if __name__ == "__main__": if __name__ == "__main__":
import uvicorn import uvicorn
uvicorn.run(app, host="0.0.0.0", port=2088)
port = os.getenv("PORT")
if not port:
raise ValueError("Missing required environment variable: PORT")
uvicorn.run(app, host="0.0.0.0", port=int(port))

View File

@@ -1,23 +1,57 @@
""" """
games/match.py - Match Game - Match sentences with images games/match.py - Match Game - Match words/phrases with images
type_id = 3
Input: Danh sách từ hoặc cụm từ
Output: Mỗi item gồm từ/cụm từ và mô tả hình ảnh tương ứng
""" """
from typing import List
from typing import List, Literal
from pydantic import BaseModel, Field from pydantic import BaseModel, Field
from langchain_core.output_parsers import PydanticOutputParser from langchain_core.output_parsers import PydanticOutputParser
# ============== SCHEMA ============== # ============== SCHEMA ==============
class MatchItem(BaseModel): class MatchItem(BaseModel):
word: str = Field(description="The sentence to be matched (EXACT copy from source)") """Schema cho 1 item của Match game"""
match_with: str = Field(description="Short keyword for reference")
original_quote: str = Field(description="EXACT quote from source text") word: str = Field(
image_description: str = Field(default="", description="Detailed visual description for image generation/search") description="The word or phrase to be matched (EXACT copy from source, cleaned of numbering)"
image_is_complex: bool = Field(default=False, description="True if image needs precise quantities, humans, or multiple detailed objects") )
original_quote: str = Field(
description="EXACT quote from source text before any cleaning"
)
image_description: str = Field(
description="Detailed visual description for image generation in ENGLISH. Must be specific and visual."
)
image_keywords: List[str] = Field(
default=[], description="2-3 English keywords for image search"
)
image_is_complex: bool = Field(
default=False,
description="True if image needs precise quantities, humans, or multiple detailed objects",
)
class MatchMetadata(BaseModel):
"""Metadata đánh giá nội dung"""
title: str = Field(description="Title from source or short descriptive title")
description: str = Field(description="One sentence summary of the content")
grade: int = Field(
description="Estimated grade level 1-5 (1=easy/young, 5=advanced)"
)
type: Literal["match"] = Field(default="match", description="Game type")
difficulty: int = Field(description="Difficulty 1-5 for that grade")
class MatchOutput(BaseModel): class MatchOutput(BaseModel):
"""Output wrapper for match items""" """Output wrapper for match items"""
items: List[MatchItem] = Field(description="List of match items generated from source text")
items: List[MatchItem] = Field(
description="List of match items generated from source text"
)
metadata: MatchMetadata = Field(description="Metadata about the content")
# Output parser # Output parser
@@ -26,56 +60,110 @@ output_parser = PydanticOutputParser(pydantic_object=MatchOutput)
# ============== CONFIG ============== # ============== CONFIG ==============
GAME_CONFIG = { GAME_CONFIG = {
# === REQUIRED ===
"game_type": "match", "game_type": "match",
"type_id": 3,
"display_name": "Match with Image", "display_name": "Match with Image",
"description": "Match sentences with images", "description": "Match words or phrases with their corresponding images",
"active": True,
"min_items": 2,
"max_items": 10,
"schema": MatchItem, "schema": MatchItem,
"output_schema": MatchOutput, "output_schema": MatchOutput,
"output_parser": output_parser, "output_parser": output_parser,
# === OPTIONAL ===
"system_prompt": """Extract sentences and create image descriptions for matching game. "active": True,
The game will show images and players must match them with the correct sentences. "max_items": 10,
# Input validation rules
YOUR TASK: "input_format_rules": [
1. Extract meaningful sentences from the source text "Text MUST be a list of words or phrases separated by commas, semicolons, or newlines",
2. Create a DETAILED image_description that clearly represents the sentence "NOT suitable for long sentences or paragraphs",
3. The image should be distinct enough to match with its sentence "Each item should be a concrete noun/concept that can be visualized",
],
CRITICAL RULES: # Analyzer rules - khi nào nên chọn game này
1. KEEP THE ORIGINAL LANGUAGE - Do NOT translate the source text "analyzer_rules": [
2. original_quote MUST be an EXACT copy from source text "Text is a list of words or short phrases",
3. image_description must be DETAILED and SPECIFIC to the sentence content "Words represent concrete objects/concepts that can be visualized",
4. Each image should be visually distinguishable from others""", "Examples: 'apple, banana, orange' or 'cat; dog; bird'",
"NOT suitable for abstract concepts or long sentences",
],
# Generation rules - cách tạo nội dung
"generation_rules": [
"KEEP ORIGINAL LANGUAGE for 'word' field - Do NOT translate",
"original_quote = EXACT copy from source before cleaning",
"Clean numbering like '1.', 'a)', '' from word field",
"Each word/phrase should represent a visualizable concept",
# Image rules
"image_description: MUST be DETAILED visual description in ENGLISH",
"image_description: Describe colors, shapes, actions, context",
"image_keywords: 2-3 English keywords for search",
"image_is_complex: TRUE for humans, precise counts, complex scenes",
"NEVER leave image_description empty!",
# Quality rules
"Each image should be visually DISTINCT from others",
"Avoid generic descriptions - be specific",
],
"examples": [], # Defined below
} }
# ============== EXAMPLES ============== # ============== EXAMPLES ==============
EXAMPLES = [ EXAMPLES = [
{ {
"input": "The Sun is a star. The Moon orbits Earth.", "input": "apple; banana;",
"output": { "output": {
"items": [ "items": [
{ {
"word": "The Sun is a star.", "word": "apple",
"match_with": "sun", "original_quote": "apple",
"original_quote": "The Sun is a star.", "image_description": "A shiny red apple with a green leaf on top",
"image_description": "A bright glowing yellow sun with solar flares", "image_keywords": ["apple", "fruit", "red"],
"image_is_complex": False "image_is_complex": False,
}, },
{ {
"word": "The Moon orbits Earth.", "word": "banana",
"match_with": "moon", "original_quote": "banana",
"original_quote": "The Moon orbits Earth.", "image_description": "A curved yellow banana",
"image_description": "A grey moon circling around the blue Earth planet", "image_keywords": ["banana", "fruit", "yellow"],
"image_is_complex": False "image_is_complex": False,
} },
] ],
"metadata": {
"title": "Fruits",
"description": "Common fruits vocabulary",
"grade": 1,
"type": "match",
"difficulty": 1,
},
}, },
"why_suitable": "Has distinct concepts that can be visualized and matched" "why_suitable": "Simple words representing concrete objects that can be visualized",
} },
{
"input": "1. elephant\n2. giraffe\n",
"output": {
"items": [
{
"word": "elephant",
"original_quote": "1. elephant",
"image_description": "A large grey elephant with big ears and long trunk",
"image_keywords": ["elephant", "animal", "africa"],
"image_is_complex": False,
},
{
"word": "giraffe",
"original_quote": "2. giraffe",
"image_description": "A tall giraffe with brown spots and long neck",
"image_keywords": ["giraffe", "tall", "spots"],
"image_is_complex": False,
},
],
"metadata": {
"title": "African Animals",
"description": "Safari animals vocabulary",
"grade": 2,
"type": "match",
"difficulty": 1,
},
},
"why_suitable": "Numbered list of animals - numbering will be cleaned",
},
] ]
GAME_CONFIG["examples"] = EXAMPLES

View File

@@ -6,6 +6,7 @@ core.py - Simple Game Generator Core
2. run_single() - Analyze + generate 1 game tốt nhất (1 API call) 2. run_single() - Analyze + generate 1 game tốt nhất (1 API call)
3. generate() - Generate 1 game cụ thể (không analyze) 3. generate() - Generate 1 game cụ thể (không analyze)
""" """
import os import os
import json import json
import time import time
@@ -25,14 +26,31 @@ class TokenUsage:
prompt_tokens: int = 0 prompt_tokens: int = 0
completion_tokens: int = 0 completion_tokens: int = 0
total_tokens: int = 0 total_tokens: int = 0
input_chars: int = 0 # Character count sent to LLM
output_chars: int = 0 # Character count received from LLM
def add(self, usage: Dict[str, int]): def add(self, usage: Dict[str, int]):
self.prompt_tokens += usage.get("prompt_tokens", 0) or usage.get("input_tokens", 0) self.prompt_tokens += usage.get("prompt_tokens", 0) or usage.get(
self.completion_tokens += usage.get("completion_tokens", 0) or usage.get("output_tokens", 0) "input_tokens", 0
)
self.completion_tokens += usage.get("completion_tokens", 0) or usage.get(
"output_tokens", 0
)
self.total_tokens = self.prompt_tokens + self.completion_tokens self.total_tokens = self.prompt_tokens + self.completion_tokens
def add_chars(self, input_text: str, output_text: str):
"""Track character counts for LLM input/output"""
self.input_chars += len(input_text) if input_text else 0
self.output_chars += len(output_text) if output_text else 0
def to_dict(self) -> Dict[str, int]: def to_dict(self) -> Dict[str, int]:
return {"prompt_tokens": self.prompt_tokens, "completion_tokens": self.completion_tokens, "total_tokens": self.total_tokens} return {
"prompt_tokens": self.prompt_tokens,
"completion_tokens": self.completion_tokens,
"total_tokens": self.total_tokens,
"input_chars": self.input_chars,
"output_chars": self.output_chars,
}
class GameCore: class GameCore:
@@ -84,7 +102,7 @@ class GameCore:
max_items: int = 3, max_items: int = 3,
min_score: int = 20, min_score: int = 20,
validate: bool = True, validate: bool = True,
debug: bool = False debug: bool = False,
) -> Dict[str, Any]: ) -> Dict[str, Any]:
""" """
Analyze text + Generate nhiều games phù hợp. Analyze text + Generate nhiều games phù hợp.
@@ -97,7 +115,9 @@ class GameCore:
# 1. Analyze (also returns metadata) # 1. Analyze (also returns metadata)
available = enabled_games or self.registry.get_game_types() available = enabled_games or self.registry.get_game_types()
logger.info(f"Analyzing text for multi-gen. Available games: {available}") logger.info(f"Analyzing text for multi-gen. Available games: {available}")
games, scores, metadata, err = self._analyze(text, available, min_score, tracker, debug) games, scores, metadata, err = self._analyze(
text, available, min_score, tracker, debug
)
errors.extend(err) errors.extend(err)
if not games: if not games:
@@ -115,8 +135,186 @@ class GameCore:
results = self._validate(results, text) results = self._validate(results, text)
# Check if any game has items # Check if any game has items
has_items = any(data.get("items", []) for data in results.values() if isinstance(data, dict)) has_items = any(
return self._result(has_items, games, results, errors, tracker, scores, metadata) data.get("items", []) for data in results.values() if isinstance(data, dict)
)
return self._result(
has_items, games, results, errors, tracker, scores, metadata
)
# ============== 1.5. RUN FAST (1 API call: Analyze + Generate ALL suitable games) ==============
def run_fast(
self,
text: str,
enabled_games: Optional[List[str]] = None,
max_items: int = 3,
min_score: int = 50,
validate: bool = True,
debug: bool = False,
) -> Dict[str, Any]:
"""
OPTIMIZED: 1 API call để analyze + generate TẤT CẢ games phù hợp.
Output format GIỐNG HỆT run_multi():
- 1 call duy nhất thay vì 2 (analyze + generate)
- Trả về nhiều games với items
Returns: {success, games, game_scores, metadata, results, token_usage, llm}
"""
tracker = TokenUsage()
available = enabled_games or self.registry.get_game_types()
logger.info(f"[run_fast] Starting with games: {available}")
# Build FULL game schemas (giống _generate_multi)
games_schema = []
for gt in available:
game = get_game(gt)
if game:
games_schema.append(f"""### {gt.upper()}
{game.generated_system_prompt}
REQUIRED OUTPUT FORMAT:
{game.format_instructions}""")
# Format cho response
fmt = ", ".join(
[
f'"{gt}": {{"score": 0-100, "reason": "...", "items": [...]}}'
for gt in available
]
)
prompt = ChatPromptTemplate.from_messages(
[
(
"system",
"""You are an educational game analyzer AND generator. In ONE response:
1. SCORE each game type (0-100) based on how well the text matches game requirements
2. GENERATE items for games with score >= {min_score}
SCORING GUIDE:
- 70-100: Text matches game requirements well → GENERATE items
- 40-69: Partial match → GENERATE items if >= min_score
- 0-39: Does not match → DO NOT generate items
GENERATION RULES:
- KEEP original language from text
- original_quote = EXACT copy from source text
- ALL content must come from source text only
- Include ALL required fields (image_description, image_keywords, etc.)
- Generate max {max_items} items per game
- STRICTLY FOLLOW each game's GENERATION RULES defined in their schema below""",
),
(
"human",
"""GAMES AND THEIR SCHEMAS:
{schemas}
SOURCE TEXT:
{text}
RESPOND with this EXACT JSON structure:
{{
"metadata": {{
"title": "short title from source",
"description": "one sentence summary",
"grade": 1-5,
"difficulty": 1-5
}},
{format}
}}""",
),
]
)
if debug:
print(f"\n{'=' * 50}\n✨ RUN FAST (1 call)\n{'=' * 50}")
print(f"Text length: {len(text)}")
try:
# Build input for tracking
invoke_params = {
"schemas": "\n\n".join(games_schema),
"text": text,
"format": fmt,
"min_score": min_score,
"max_items": max_items,
}
resp = (prompt | self.llm).invoke(invoke_params)
tracker.add(self._get_usage(resp))
# Track character counts: input = all params joined, output = response content
input_text = " ".join(str(v) for v in invoke_params.values())
tracker.add_chars(input_text, resp.content)
if debug:
print(f"📝 Response: {resp.content[:500]}...")
data = self._parse_json(resp.content)
metadata = data.get("metadata", {})
# Process results - format giống _generate_multi
results = {}
scores = []
selected_games = []
errors = []
for gt in available:
game_data = data.get(gt, {})
if not isinstance(game_data, dict):
continue
score = game_data.get("score", 0)
reason = game_data.get("reason", "")
items = game_data.get("items", [])
scores.append({"type": gt, "score": score, "reason": reason})
if score >= min_score and items:
# Post-process items (giống _generate_multi)
processed_items = self._post_process(items, gt)
# Validate if needed
if validate:
processed_items = [
i
for i in processed_items
if self.validator.validate_quote(
i.get("original_quote", ""), text
).is_valid
]
# Thống nhất structure: {items: [...], metadata: {...}} - giống run_multi
results[gt] = {
"items": processed_items,
"metadata": game_data.get("metadata"),
}
if processed_items:
selected_games.append(gt)
else:
errors.append(f"No valid items for {gt}")
elif score >= min_score:
errors.append(f"No items generated for {gt}")
# Sort scores
scores.sort(key=lambda x: x.get("score", 0), reverse=True)
return self._result(
success=len(selected_games) > 0,
games=selected_games,
results=results,
errors=errors,
tracker=tracker,
scores=scores,
metadata=metadata,
)
except Exception as e:
logger.error(f"[run_fast] Error: {e}")
return self._result(False, [], {}, [str(e)], tracker)
# ============== 2. RUN SINGLE (1 API call: Analyze + Generate 1 game) ============== # ============== 2. RUN SINGLE (1 API call: Analyze + Generate 1 game) ==============
@@ -126,7 +324,7 @@ class GameCore:
enabled_games: Optional[List[str]] = None, enabled_games: Optional[List[str]] = None,
max_items: int = 3, max_items: int = 3,
validate: bool = True, validate: bool = True,
debug: bool = False debug: bool = False,
) -> Dict[str, Any]: ) -> Dict[str, Any]:
""" """
1 API call: Analyze + Generate game tốt nhất. 1 API call: Analyze + Generate game tốt nhất.
@@ -142,46 +340,77 @@ class GameCore:
for gt in available: for gt in available:
game = get_game(gt) game = get_game(gt)
if game: if game:
example = json.dumps(game.examples[0].get('output', {}), ensure_ascii=False, indent=2) if game.examples else "{}" example = (
games_info.append(f"### {gt}\n{game.description}\nExample output:\n{example}") json.dumps(
game.examples[0].get("output", {}), ensure_ascii=False, indent=2
)
if game.examples
else "{}"
)
games_info.append(
f"### {gt}\n{game.description}\nExample output:\n{example}"
)
prompt = ChatPromptTemplate.from_messages([ prompt = ChatPromptTemplate.from_messages(
("system", """You are an educational game generator. [
1. ANALYZE text and CHOOSE the BEST game type (
2. GENERATE items for that game "system",
"""You are an educational game generator.
1. ANALYZE text and CHOOSE the BEST game type
2. GENERATE items for that game
RULES: RULES:
- KEEP original language - KEEP original language
- original_quote = EXACT copy from source - original_quote = EXACT copy from source
- ALL content from source only"""), - ALL content from source only""",
("human", """GAMES: ),
{games_info} (
"human",
"""GAMES:
{games_info}
TEXT: TEXT:
{text} {text}
Choose BEST game from: {types} Choose BEST game from: {types}
Generate max {max_items} items. Generate max {max_items} items.
Return JSON: Return JSON:
{{"game_type": "chosen", "reason": "why", "items": [...]}}""") {{"game_type": "chosen", "reason": "why", "items": [...]}}""",
]) ),
]
)
content = {"games_info": "\n\n".join(games_info), "text": text[:2000], "types": ", ".join(available), "max_items": max_items} content = {
"games_info": "\n\n".join(games_info),
"text": text[:2000],
"types": ", ".join(available),
"max_items": max_items,
}
if debug: if debug:
print(f"\n{'='*50}\n🎯 RUN SINGLE\n{'='*50}") print(f"\n{'=' * 50}\n🎯 RUN SINGLE\n{'=' * 50}")
try: try:
resp = (prompt | self.llm).invoke(content) resp = (prompt | self.llm).invoke(content)
tracker.add(self._get_usage(resp)) tracker.add(self._get_usage(resp))
# Track character counts
input_text = " ".join(str(v) for v in content.values())
tracker.add_chars(input_text, resp.content)
data = self._parse_json(resp.content) data = self._parse_json(resp.content)
game_type = data.get("game_type") game_type = data.get("game_type")
items = self._post_process(data.get("items", []), game_type) items = self._post_process(data.get("items", []), game_type)
if validate and items: if validate and items:
items = [i for i in items if self.validator.validate_quote(i.get("original_quote", ""), text).is_valid] items = [
i
for i in items
if self.validator.validate_quote(
i.get("original_quote", ""), text
).is_valid
]
return { return {
"success": len(items) > 0, "success": len(items) > 0,
@@ -190,10 +419,17 @@ Return JSON:
"items": items, "items": items,
"errors": [], "errors": [],
"token_usage": tracker.to_dict(), "token_usage": tracker.to_dict(),
"llm": f"{self.llm_config.provider}/{self.llm_config.model_name}" "llm": f"{self.llm_config.provider}/{self.llm_config.model_name}",
} }
except Exception as e: except Exception as e:
return {"success": False, "game_type": None, "items": [], "errors": [str(e)], "token_usage": tracker.to_dict(), "llm": f"{self.llm_config.provider}/{self.llm_config.model_name}"} return {
"success": False,
"game_type": None,
"items": [],
"errors": [str(e)],
"token_usage": tracker.to_dict(),
"llm": f"{self.llm_config.provider}/{self.llm_config.model_name}",
}
# ============== 3. GENERATE (1 game cụ thể, không analyze) ============== # ============== 3. GENERATE (1 game cụ thể, không analyze) ==============
@@ -203,7 +439,7 @@ Return JSON:
text: str, text: str,
max_items: int = 3, max_items: int = 3,
validate: bool = True, validate: bool = True,
debug: bool = False debug: bool = False,
) -> Dict[str, Any]: ) -> Dict[str, Any]:
"""Generate 1 game cụ thể""" """Generate 1 game cụ thể"""
tracker = TokenUsage() tracker = TokenUsage()
@@ -212,41 +448,61 @@ Return JSON:
game = get_game(game_type) game = get_game(game_type)
if not game: if not game:
return {"success": False, "game_type": game_type, "items": [], "errors": [f"Game not found: {game_type}"], "token_usage": {}, "llm": ""} return {
"success": False,
"game_type": game_type,
"items": [],
"errors": [f"Game not found: {game_type}"],
"token_usage": {},
"llm": "",
}
# Build Format Rules Section # Build Format Rules Section
format_rules_section = "" format_rules_section = ""
if game.input_format_rules: if game.input_format_rules:
rules_str = "\n".join(f"- {r}" for r in game.input_format_rules) rules_str = "\n".join(f"- {r}" for r in game.input_format_rules)
format_rules_section = f""" format_rules_section = f"""
CRITICAL: FIRST, VALIDATE THE INPUT TEXT. CRITICAL: FIRST, VALIDATE THE INPUT TEXT.
Format Rules: Format Rules:
{rules_str} {rules_str}
If the text is completely UNSUITABLE for this game type, you MUST output strictly this JSON and nothing else: If the text is completely UNSUITABLE for this game type, you MUST output strictly this JSON and nothing else:
{{{{ "format_error": "Input text incompatible with game requirements." }}}} {{{{ "format_error": "Input text incompatible with game requirements." }}}}
""" """
prompt = ChatPromptTemplate.from_messages([ prompt = ChatPromptTemplate.from_messages(
("system", f"""{game.generated_system_prompt} [
{format_rules_section}"""), (
("human", """TEXT TO PROCESS: "system",
{text} f"""{game.generated_system_prompt}
{format_rules_section}""",
),
(
"human",
"""TEXT TO PROCESS:
{text}
Generate content in JSON format: Generate content in JSON format:
{format_instructions}""") {format_instructions}""",
]) ),
]
)
if debug: if debug:
print(f"\n{'='*50}\n🎮 GENERATE: {game_type}\n{'='*50}") print(f"\n{'=' * 50}\n🎮 GENERATE: {game_type}\n{'=' * 50}")
try: try:
resp = (prompt | self.llm).invoke({ invoke_params = {
"text": text, "text": text,
"format_instructions": game.format_instructions "format_instructions": game.format_instructions,
}) }
resp = (prompt | self.llm).invoke(invoke_params)
tracker.add(self._get_usage(resp)) tracker.add(self._get_usage(resp))
# Track character counts
input_text = " ".join(str(v) for v in invoke_params.values())
tracker.add_chars(input_text, resp.content)
# 1. Parse as raw JSON first to check for format_error # 1. Parse as raw JSON first to check for format_error
raw_data = None raw_data = None
try: try:
@@ -263,7 +519,7 @@ Generate content in JSON format:
"format_error": raw_data["format_error"], "format_error": raw_data["format_error"],
"errors": [raw_data["format_error"]], "errors": [raw_data["format_error"]],
"token_usage": tracker.to_dict(), "token_usage": tracker.to_dict(),
"llm": f"{self.llm_config.provider}/{self.llm_config.model_name}" "llm": f"{self.llm_config.provider}/{self.llm_config.model_name}",
} }
parsed_data = raw_data parsed_data = raw_data
@@ -274,10 +530,10 @@ Generate content in JSON format:
parsed = game.output_parser.parse(resp.content) parsed = game.output_parser.parse(resp.content)
parsed_data = parsed.model_dump() parsed_data = parsed.model_dump()
except Exception as pe: except Exception as pe:
if debug: print(f"⚠️ output_parser failed: {pe}") if debug:
print(f"⚠️ output_parser failed: {pe}")
# Keep raw_data if parser fails but we have JSON # Keep raw_data if parser fails but we have JSON
# Check format error # Check format error
if parsed_data and parsed_data.get("format_error"): if parsed_data and parsed_data.get("format_error"):
return { return {
@@ -287,7 +543,7 @@ Generate content in JSON format:
"format_error": parsed_data["format_error"], "format_error": parsed_data["format_error"],
"errors": [parsed_data["format_error"]], "errors": [parsed_data["format_error"]],
"token_usage": tracker.to_dict(), "token_usage": tracker.to_dict(),
"llm": f"{self.llm_config.provider}/{self.llm_config.model_name}" "llm": f"{self.llm_config.provider}/{self.llm_config.model_name}",
} }
# Post-process # Post-process
@@ -295,7 +551,13 @@ Generate content in JSON format:
items = self._post_process(items, game_type) items = self._post_process(items, game_type)
if validate and items: if validate and items:
items = [i for i in items if self.validator.validate_quote(i.get("original_quote", ""), text).is_valid] items = [
i
for i in items
if self.validator.validate_quote(
i.get("original_quote", ""), text
).is_valid
]
if not items: if not items:
return { return {
@@ -303,9 +565,9 @@ Generate content in JSON format:
"game_type": game_type, "game_type": game_type,
"data": None, "data": None,
"format_error": "No items extracted", "format_error": "No items extracted",
"errors": [], "errors": [],
"token_usage": tracker.to_dict(), "token_usage": tracker.to_dict(),
"llm": f"{self.llm_config.provider}/{self.llm_config.model_name}" "llm": f"{self.llm_config.provider}/{self.llm_config.model_name}",
} }
if parsed_data: if parsed_data:
@@ -317,66 +579,93 @@ Generate content in JSON format:
"data": parsed_data, "data": parsed_data,
"errors": [], "errors": [],
"token_usage": tracker.to_dict(), "token_usage": tracker.to_dict(),
"llm": f"{self.llm_config.provider}/{self.llm_config.model_name}" "llm": f"{self.llm_config.provider}/{self.llm_config.model_name}",
} }
except Exception as e: except Exception as e:
return {"success": False, "game_type": game_type, "data": None, "errors": [str(e)], "token_usage": tracker.to_dict(), "llm": f"{self.llm_config.provider}/{self.llm_config.model_name}"} return {
"success": False,
"game_type": game_type,
"data": None,
"errors": [str(e)],
"token_usage": tracker.to_dict(),
"llm": f"{self.llm_config.provider}/{self.llm_config.model_name}",
}
# ============== PRIVATE METHODS ============== # ============== PRIVATE METHODS ==============
def _analyze(self, text: str, available: List[str], min_score: int, tracker: TokenUsage, debug: bool) -> tuple: def _analyze(
self,
text: str,
available: List[str],
min_score: int,
tracker: TokenUsage,
debug: bool,
) -> tuple:
"""Analyze text để suggest games - với retry""" """Analyze text để suggest games - với retry"""
# Lấy context từ game configs # Lấy context từ game configs
context = get_analyzer_context() context = get_analyzer_context()
prompt = ChatPromptTemplate.from_messages([ prompt = ChatPromptTemplate.from_messages(
("system", """You are a game type analyzer. Score each game 0-100 based on how well the text matches the game requirements. [
(
"system",
"""You are a game type analyzer. Score each game 0-100 based on how well the text matches the game requirements.
GAME REQUIREMENTS: GAME REQUIREMENTS:
{context} {context}
SCORING: SCORING:
- 70-100: Text matches game requirements well - 70-100: Text matches game requirements well
- 40-69: Partial match - 40-69: Partial match
- 0-39: Does not match requirements - 0-39: Does not match requirements
IMPORTANT: You MUST use the exact game type name (e.g. 'quiz', 'sequence') in the "type" field. IMPORTANT: You MUST use the exact game type name (e.g. 'quiz', 'sequence') in the "type" field.
Return valid JSON with scores AND metadata about the content: Return valid JSON with scores AND metadata about the content:
{{ {{
"scores": [ "scores": [
{{ {{
"type": "NAME_OF_GAME_TYPE", "type": "NAME_OF_GAME_TYPE",
"score": 80, "score": 80,
"reason": "..." "reason": "..."
}} }}
], ],
"metadata": {{ "metadata": {{
"title": "Title from source or create short title", "title": "Title from source or create short title",
"description": "One sentence summary", "description": "One sentence summary",
"grade": 1-5, "grade": 1-5,
"difficulty": 1-5 "difficulty": 1-5
}} }}
}}"""), }}""",
("human", """TEXT TO ANALYZE: ),
{text} (
"human",
"""TEXT TO ANALYZE:
{text}
Analyze for games: {types} Analyze for games: {types}
Return JSON:""") Return JSON:""",
]) ),
]
)
max_retries = 2 max_retries = 2
for attempt in range(max_retries): for attempt in range(max_retries):
try: try:
resp = (prompt | self.llm).invoke({ invoke_params = {
"context": context, "context": context,
"text": text[:800], "text": text,
"types": ", ".join(available) "types": ", ".join(available),
}) }
resp = (prompt | self.llm).invoke(invoke_params)
tracker.add(self._get_usage(resp)) tracker.add(self._get_usage(resp))
# Track character counts
input_text = " ".join(str(v) for v in invoke_params.values())
tracker.add_chars(input_text, resp.content)
if debug: if debug:
print(f"📝 Analyzer raw: {resp.content[:300]}") print(f"📝 Analyzer raw: {resp.content}")
# Parse JSON với fallback # Parse JSON với fallback
content = resp.content.strip() content = resp.content.strip()
@@ -386,7 +675,11 @@ Return JSON:""")
continue continue
data = self._parse_json(content) data = self._parse_json(content)
scores = [s for s in data.get("scores", []) if s.get("type") in available and s.get("score", 0) >= min_score] scores = [
s
for s in data.get("scores", [])
if s.get("type") in available and s.get("score", 0) >= min_score
]
scores.sort(key=lambda x: x.get("score", 0), reverse=True) scores.sort(key=lambda x: x.get("score", 0), reverse=True)
# Extract metadata from response # Extract metadata from response
@@ -407,15 +700,26 @@ Return JSON:""")
return available, [], {}, ["Analyze failed after retries"] return available, [], {}, ["Analyze failed after retries"]
def _generate_multi(self, games: List[str], text: str, max_items: int, tracker: TokenUsage, debug: bool) -> tuple: def _generate_multi(
self,
games: List[str],
text: str,
max_items: int,
tracker: TokenUsage,
debug: bool,
) -> tuple:
"""Generate nhiều games""" """Generate nhiều games"""
if len(games) == 1: if len(games) == 1:
result = self.generate(games[0], text, max_items, validate=False, debug=debug) result = self.generate(
games[0], text, max_items, validate=False, debug=debug
)
tracker.add(result.get("token_usage", {})) tracker.add(result.get("token_usage", {}))
# Fix: generate returns {data: {items: [...]}} not {items: [...]} # Fix: generate returns {data: {items: [...]}} not {items: [...]}
data = result.get("data") or {} data = result.get("data") or {}
items = data.get("items", []) if isinstance(data, dict) else [] items = data.get("items", []) if isinstance(data, dict) else []
return {games[0]: {"items": items, "metadata": data.get("metadata")}}, result.get("errors", []) return {
games[0]: {"items": items, "metadata": data.get("metadata")}
}, result.get("errors", [])
# Multi-game: Build schema info for each game # Multi-game: Build schema info for each game
games_schema = [] games_schema = []
@@ -423,37 +727,57 @@ Return JSON:""")
game = get_game(gt) game = get_game(gt)
if game: if game:
games_schema.append(f"""### {gt.upper()} games_schema.append(f"""### {gt.upper()}
{game.generated_system_prompt} {game.generated_system_prompt}
REQUIRED OUTPUT FORMAT: REQUIRED OUTPUT FORMAT:
{game.format_instructions}""") {game.format_instructions}""")
prompt = ChatPromptTemplate.from_messages([ prompt = ChatPromptTemplate.from_messages(
("system", """You are a multi-game content generator. [
Generate items for EACH game type following their EXACT schema. (
IMPORTANT: Include ALL required fields for each item (image_description, image_keywords, etc.) "system",
RULES: Keep original language, use exact quotes from text."""), """You are a multi-game content generator. In ONE response:
("human", """GAMES AND THEIR SCHEMAS: 1. Generate items for EACH game type following their EXACT schema
{schemas}
SOURCE TEXT: GENERATION RULES:
{text} - KEEP original language from text
- original_quote = EXACT copy from source text
- ALL content must come from source text only
- Include ALL required fields (image_description, image_keywords, etc.)
- STRICTLY FOLLOW each game's GENERATION RULES defined in their schema below""",
),
(
"human",
"""GAMES AND THEIR SCHEMAS:
{schemas}
Generate items for: {types} SOURCE TEXT:
Return valid JSON: {{{format}}}""") {text}
])
fmt = ", ".join([f'"{gt}": {{"items": [...], "metadata": {{...}}}}' for gt in games]) Generate items for: {types}
Return valid JSON: {{{format}}}""",
),
]
)
fmt = ", ".join(
[f'"{gt}": {{"items": [...], "metadata": {{...}}}}' for gt in games]
)
try: try:
resp = (prompt | self.llm).invoke({ invoke_params = {
"schemas": "\n\n".join(games_schema), "schemas": "\n\n".join(games_schema),
"text": text, "text": text,
"types": ", ".join(games), "types": ", ".join(games),
"format": fmt "format": fmt,
}) }
resp = (prompt | self.llm).invoke(invoke_params)
tracker.add(self._get_usage(resp)) tracker.add(self._get_usage(resp))
# Track character counts
input_text = " ".join(str(v) for v in invoke_params.values())
tracker.add_chars(input_text, resp.content)
data = self._parse_json(resp.content) data = self._parse_json(resp.content)
results = {} results = {}
errors = [] errors = []
@@ -468,22 +792,37 @@ Return valid JSON: {{{format}}}""")
return results, errors return results, errors
except Exception as e: except Exception as e:
return {gt: {"items": [], "metadata": None} for gt in games}, [f"Generate error: {e}"] return {gt: {"items": [], "metadata": None} for gt in games}, [
f"Generate error: {e}"
]
def _validate(self, results: Dict[str, dict], text: str) -> Dict[str, dict]: def _validate(self, results: Dict[str, dict], text: str) -> Dict[str, dict]:
"""Validate items trong results""" """Validate items trong results"""
validated = {} validated = {}
for gt, data in results.items(): for gt, data in results.items():
items = data.get("items", []) if isinstance(data, dict) else [] items = data.get("items", []) if isinstance(data, dict) else []
valid_items = [i for i in items if self.validator.validate_quote(i.get("original_quote", ""), text).is_valid] valid_items = [
validated[gt] = {"items": valid_items, "metadata": data.get("metadata") if isinstance(data, dict) else None} i
for i in items
if self.validator.validate_quote(
i.get("original_quote", ""), text
).is_valid
]
validated[gt] = {
"items": valid_items,
"metadata": data.get("metadata") if isinstance(data, dict) else None,
}
return validated return validated
def _post_process(self, items: List, game_type: str) -> List[Dict]: def _post_process(self, items: List, game_type: str) -> List[Dict]:
ms = int(time.time() * 1000) ms = int(time.time() * 1000)
result = [] result = []
for i, item in enumerate(items): for i, item in enumerate(items):
d = item if isinstance(item, dict) else (item.model_dump() if hasattr(item, 'model_dump') else {}) d = (
item
if isinstance(item, dict)
else (item.model_dump() if hasattr(item, "model_dump") else {})
)
d["id"] = f"{game_type[:2].upper()}-{ms}-{i}" d["id"] = f"{game_type[:2].upper()}-{ms}-{i}"
d["game_type"] = game_type d["game_type"] = game_type
result.append(d) result.append(d)
@@ -495,12 +834,23 @@ Return valid JSON: {{{format}}}""")
return json.loads(content) return json.loads(content)
def _get_usage(self, resp) -> Dict: def _get_usage(self, resp) -> Dict:
if hasattr(resp, 'response_metadata'): if hasattr(resp, "response_metadata"):
meta = resp.response_metadata meta = resp.response_metadata
return meta.get('usage', meta.get('usage_metadata', meta.get('token_usage', {}))) return meta.get(
return getattr(resp, 'usage_metadata', {}) "usage", meta.get("usage_metadata", meta.get("token_usage", {}))
)
return getattr(resp, "usage_metadata", {})
def _result(self, success: bool, games: List, results: Dict, errors: List, tracker: TokenUsage, scores: List = None, metadata: Dict = None) -> Dict: def _result(
self,
success: bool,
games: List,
results: Dict,
errors: List,
tracker: TokenUsage,
scores: List = None,
metadata: Dict = None,
) -> Dict:
return { return {
"success": success, "success": success,
"games": games, "games": games,
@@ -509,5 +859,69 @@ Return valid JSON: {{{format}}}""")
"results": results, "results": results,
"errors": errors, "errors": errors,
"token_usage": tracker.to_dict(), "token_usage": tracker.to_dict(),
"llm": f"{self.llm_config.provider}/{self.llm_config.model_name}" "llm": f"{self.llm_config.provider}/{self.llm_config.model_name}",
} }
# ============== ASYNC WRAPPERS (for concurrent FastAPI handling) ==============
# These methods run the blocking LLM calls in a thread pool
async def run_fast_async(
self,
text: str,
enabled_games: Optional[List[str]] = None,
max_items: int = 3,
min_score: int = 50,
validate: bool = True,
debug: bool = False,
) -> Dict[str, Any]:
"""Async wrapper for run_fast - runs in thread pool to not block event loop"""
import asyncio
return await asyncio.to_thread(
self.run_fast, text, enabled_games, max_items, min_score, validate, debug
)
async def run_single_async(
self,
text: str,
enabled_games: Optional[List[str]] = None,
max_items: int = 3,
validate: bool = True,
debug: bool = False,
) -> Dict[str, Any]:
"""Async wrapper for run_single - runs in thread pool to not block event loop"""
import asyncio
return await asyncio.to_thread(
self.run_single, text, enabled_games, max_items, validate, debug
)
async def run_multi_async(
self,
text: str,
enabled_games: Optional[List[str]] = None,
max_items: int = 3,
validate: bool = True,
debug: bool = False,
) -> Dict[str, Any]:
"""Async wrapper for run_multi - runs in thread pool to not block event loop"""
import asyncio
return await asyncio.to_thread(
self.run_multi, text, enabled_games, max_items, validate, debug
)
async def generate_async(
self,
text: str,
game_types: Union[List[str], str],
max_items: int = 10,
validate: bool = True,
debug: bool = False,
) -> Dict[str, Any]:
"""Async wrapper for generate - runs in thread pool to not block event loop"""
import asyncio
return await asyncio.to_thread(
self.generate, text, game_types, max_items, validate, debug
)

View File

@@ -10,6 +10,7 @@ Hệ thống sẽ:
THÊM GAME MỚI = TẠO FILE TRONG games/ THÊM GAME MỚI = TẠO FILE TRONG games/
BẬT/TẮT GAME = SỬA active: True/False trong file game BẬT/TẮT GAME = SỬA active: True/False trong file game
""" """
import importlib.util import importlib.util
from pathlib import Path from pathlib import Path
from typing import Dict, List, Any, Optional from typing import Dict, List, Any, Optional
@@ -25,6 +26,7 @@ class GameRegistry:
- game_type (string): "quiz", "sequence" - game_type (string): "quiz", "sequence"
- type_id (int): 1, 2 - type_id (int): 1, 2
""" """
_instance: Optional["GameRegistry"] = None _instance: Optional["GameRegistry"] = None
_all_games: Dict[str, GameType] = {} # Keyed by game_type _all_games: Dict[str, GameType] = {} # Keyed by game_type
_id_map: Dict[int, str] = {} # type_id -> game_type _id_map: Dict[int, str] = {} # type_id -> game_type
@@ -59,10 +61,12 @@ class GameRegistry:
game_def = self._load_game_from_file(file_path) game_def = self._load_game_from_file(file_path)
if game_def: if game_def:
self._all_games[game_def.game_type] = game_def self._all_games[game_def.game_type] = game_def
if game_def.type_id > 0: if game_def.type_id >= 0: # 0=quiz, 1=sequence are valid
self._id_map[game_def.type_id] = game_def.game_type self._id_map[game_def.type_id] = game_def.game_type
status = "" if game_def.active else "⏸️" status = "" if game_def.active else "⏸️"
print(f"{status} Loaded: {game_def.game_type} (id={game_def.type_id}, active={game_def.active})") print(
f"{status} Loaded: {game_def.game_type} (id={game_def.type_id}, active={game_def.active})"
)
except Exception as e: except Exception as e:
print(f"❌ Error loading {file_path.name}: {e}") print(f"❌ Error loading {file_path.name}: {e}")
@@ -116,9 +120,9 @@ class GameRegistry:
return self._id_map.get(type_id) return self._id_map.get(type_id)
def get_id_by_game_type(self, game_type: str) -> int: def get_id_by_game_type(self, game_type: str) -> int:
"""Convert game_type -> type_id""" """Convert game_type -> type_id. Returns -1 if not found."""
game = self._all_games.get(game_type) game = self._all_games.get(game_type)
return game.type_id if game else 0 return game.type_id if game else -1 # -1 = not found
def get_all_games(self) -> Dict[str, GameType]: def get_all_games(self) -> Dict[str, GameType]:
"""Lấy tất cả games ACTIVE""" """Lấy tất cả games ACTIVE"""
@@ -134,7 +138,9 @@ class GameRegistry:
def get_type_ids(self) -> List[int]: def get_type_ids(self) -> List[int]:
"""Lấy danh sách type_ids ACTIVE""" """Lấy danh sách type_ids ACTIVE"""
return [v.type_id for v in self._all_games.values() if v.active and v.type_id > 0] return [
v.type_id for v in self._all_games.values() if v.active and v.type_id > 0
]
def get_analyzer_context(self) -> str: def get_analyzer_context(self) -> str:
"""Tạo context cho Analyzer (chỉ từ active games)""" """Tạo context cho Analyzer (chỉ từ active games)"""
@@ -144,7 +150,7 @@ class GameRegistry:
if not game.active: if not game.active:
continue continue
hints = game.analyzer_rules # New field name hints = game.analyzer_rules # New field name
if hints: if hints:
hints_text = "\n - ".join(hints) hints_text = "\n - ".join(hints)
context_parts.append( context_parts.append(

View File

@@ -4,88 +4,180 @@ games/_template.py - TEMPLATE CHO GAME MỚI
THÊM GAME MỚI CHỈ CẦN: THÊM GAME MỚI CHỈ CẦN:
1. Copy file này 1. Copy file này
2. Rename thành <game_type>.py (ví dụ: matching.py) 2. Rename thành <game_type>.py (ví dụ: matching.py)
3. Sửa nội dung bên trong 3. Sửa nội dung bên trong theo hướng dẫn
4. DONE! Hệ thống tự động nhận diện. 4. DONE! Hệ thống tự động nhận diện.
Không cần sửa bất kỳ file nào khác! Không cần sửa bất kỳ file nào khác!
""" """
from typing import List, Optional from typing import List, Literal, Optional
from pydantic import BaseModel, Field from pydantic import BaseModel, Field
from langchain_core.output_parsers import PydanticOutputParser
# ============== 1. SCHEMA ============== # ============== 1. ITEM SCHEMA ==============
# Định nghĩa structure của 1 item trong game # Định nghĩa structure của 1 item trong game
# BẮT BUỘC phải có: original_quote và explanation # BẮT BUỘC phải có: original_quote
class YourGameItem(BaseModel): class YourGameItem(BaseModel):
"""Schema cho 1 item của game""" """Schema cho 1 item của game"""
# Các trường BẮT BUỘC (để chống hallucination) # === TRƯỜNG BẮT BUỘC ===
original_quote: str = Field( original_quote: str = Field(
description="Trích dẫn NGUYÊN VĂN từ văn bản gốc" description="EXACT quote from source text - dùng để verify không hallucinate"
) )
explanation: str = Field(description="Giải thích")
# Thêm các trường riêng của game ở đây # === TRƯỜNG RIÊNG CỦA GAME ===
# Thêm các trường cần thiết cho game của bạn
# Ví dụ: # Ví dụ:
# question: str = Field(description="Câu hỏi") question: str = Field(description="The question")
# answer: str = Field(description="Đáp án") answer: str = Field(description="The correct answer")
# === TRƯỜNG HÌNH ẢNH (Khuyến nghị) ===
image_description: str = Field(default="", description="Visual description in English")
image_keywords: List[str] = Field(default=[], description="2-3 English keywords for image search")
image_is_complex: bool = Field(default=False, description="True if needs precise quantities/humans/complex scene")
# ============== 2. CONFIG ============== # ============== 2. METADATA SCHEMA ==============
# Cấu hình cho game # Metadata mô tả nội dung được generate
class YourGameMetadata(BaseModel):
"""Metadata đánh giá nội dung"""
title: str = Field(description="Title from source or short descriptive title")
description: str = Field(description="One sentence summary")
grade: int = Field(description="Grade level 1-5 (1=easy, 5=advanced)")
type: Literal["your_game"] = Field(default="your_game", description="Game type - MUST match game_type below")
difficulty: int = Field(description="Difficulty 1-5 for that grade")
# ============== 3. OUTPUT SCHEMA ==============
# Wrapper chứa danh sách items và metadata
class YourGameOutput(BaseModel):
"""Output wrapper - BẮT BUỘC phải có"""
items: List[YourGameItem] = Field(description="List of game items")
metadata: YourGameMetadata = Field(description="Metadata about the content")
# Output parser - tự động từ output schema
output_parser = PydanticOutputParser(pydantic_object=YourGameOutput)
# ============== 4. CONFIG ==============
# Cấu hình cho game - ĐÂY LÀ PHẦN QUAN TRỌNG NHẤT
GAME_CONFIG = { GAME_CONFIG = {
# Key duy nhất cho game (dùng trong API) # === REQUIRED FIELDS ===
# Key duy nhất cho game (dùng trong API) - PHẢI unique
"game_type": "your_game", "game_type": "your_game",
# ID số nguyên unique - PHẢI khác các game khác
# Quiz=1, Sequence=2, ... tiếp tục từ 3
"type_id": 99, # TODO: Đổi thành số unique
# Tên hiển thị # Tên hiển thị
"display_name": "Tên Game", "display_name": "Your Game Name",
# Mô tả ngắn # Mô tả ngắn
"description": "Mô tả game của bạn", "description": "Description of your game",
# Số lượng items # Schema classes - BẮT BUỘC
"max_items": 5,
# Trỏ đến schema class
"schema": YourGameItem, "schema": YourGameItem,
"output_schema": YourGameOutput,
"output_parser": output_parser,
# Prompt cho LLM # === OPTIONAL FIELDS (có default) ===
"system_prompt": """Bạn là chuyên gia tạo [tên game].
NHIỆM VỤ: [Mô tả nhiệm vụ] # Game có active không
"active": True,
QUY TẮC: # Số lượng items tối đa
1. original_quote PHẢI là trích dẫn NGUYÊN VĂN "max_items": 10,
2. [Quy tắc khác]
3. [Quy tắc khác]""", # Rules validate input trước khi generate (Direct Mode)
"input_format_rules": [
"Text should contain ... suitable for this game.",
"Text MUST have ...",
],
# Rules cho Analyzer nhận diện game phù hợp
"analyzer_rules": [
"Text MUST contain ...",
"NOT suitable if text is ...",
],
# Rules cho Generator tạo nội dung
"generation_rules": [
"KEEP ORIGINAL LANGUAGE - Do NOT translate",
"original_quote = EXACT quote from source text",
"ALL content must come from source only - do NOT invent",
# Thêm rules riêng cho game của bạn
"Your specific rule 1",
"Your specific rule 2",
# Visual fields
"image_description: MUST be visual description in ENGLISH",
"image_keywords: MUST provide 2-3 English keywords",
"NEVER leave image fields empty!",
],
# Examples - giúp LLM học format
"examples": [] # Sẽ định nghĩa bên dưới
} }
# ============== 3. EXAMPLES ============== # ============== 5. EXAMPLES ==============
# Ví dụ input/output để: # Ví dụ input/output để LLM học pattern
# - Analyzer học khi nào nên suggest game này
# - Generator dùng làm few-shot
EXAMPLES = [ EXAMPLES = [
{ {
# Input text mẫu # Input text mẫu
"input": "Văn bản mẫu ở đây...", "input": "Sample text for your game...",
# Output mong đợi # Output mong đợi - PHẢI match schema
"output": { "output": {
"items": [ "items": [
{ {
"original_quote": "Trích dẫn từ văn bản", "original_quote": "Exact quote from input",
"explanation": "Giải thích", "question": "Sample question?",
# Các trường khác của schema... "answer": "Sample answer",
"image_description": "Visual description",
"image_keywords": ["keyword1", "keyword2"],
"image_is_complex": False
} }
] ],
"metadata": {
"title": "Sample Title",
"description": "Sample description",
"grade": 2,
"type": "your_game",
"difficulty": 2
}
}, },
# Analyzer học từ trường này # Giải thích tại sao phù hợp - Analyzer học từ đây
"why_suitable": "Giải thích tại sao văn bản này phù hợp với game này" "why_suitable": "Explain why this input is suitable for this game"
}, },
# Thêm 1-2 examples nữa... # Thêm 1-2 examples nữa để LLM học tốt hơn...
] ]
# Gán examples vào config
GAME_CONFIG["examples"] = EXAMPLES
# ============== 6. POST PROCESS (Optional) ==============
# Function xử lý output sau khi LLM generate
def post_process_your_game(items: List[dict]) -> List[dict]:
"""Clean up hoặc transform items sau khi generate"""
for item in items:
# Ví dụ: clean up text
if item.get("answer"):
item["answer"] = item["answer"].strip()
return items
# Đăng ký handler (optional)
# GAME_CONFIG["post_process_handler"] = post_process_your_game

View File

@@ -1,139 +1,172 @@
""" """
games/quiz.py - Quiz Game - Multiple choice questions games/quiz.py - Optimized for LLM Performance while keeping System Integrity
""" """
from typing import List, Literal
import re from typing import List, Literal, Optional
from pydantic import BaseModel, Field from pydantic import BaseModel, Field, field_validator
from langchain_core.output_parsers import PydanticOutputParser from langchain_core.output_parsers import PydanticOutputParser
import re
# ============== SCHEMA ============== # ==========================================
# 1. OPTIMIZED SCHEMA (Thông minh hơn)
# ==========================================
class QuizItem(BaseModel): class QuizItem(BaseModel):
question: str = Field(description="The question based on source content") # LLM chỉ cần tập trung sinh ra raw data, việc clean để code lo
answers: str = Field(description="The correct answer") question: str = Field(description="Question text. Use ____ for blanks.")
options: List[str] = Field(description="List of options including correct answer") # Request field có thể để default, logic xử lý sau
original_quote: str = Field(description="EXACT quote from source text") request: str = Field(
image_description: str = Field(default="", description="Visual description for the question") default="Choose the correct answer", description="Instruction type"
image_keywords: List[str] = Field(default=[], description="Keywords for image search") )
image_is_complex: bool = Field(default=False, description="True if image needs precise quantities, humans, or multiple detailed objects") answer: str = Field(description="Correct answer text")
options: List[str] = Field(description="List of options")
original_quote: str = Field(description="Exact source sentence")
# Gom nhóm image fields để prompt gọn hơn
image_description: str = Field(
default="", description="Visual description (if needed)"
)
image_keywords: List[str] = Field(default=[])
image_is_complex: bool = Field(default=False)
@field_validator("answer", "options", mode="before")
@classmethod
def clean_prefixes(cls, v):
"""Tự động xóa A., B., (1)... ngay khi nhận dữ liệu từ LLM"""
def clean_str(text):
# Regex xóa (A), 1., Q: ở đầu và (1) ở cuối
text = re.sub(
r"^(\([A-Za-z0-9]\)|[A-Za-z0-9]\.|Q\d*:)\s*",
"",
str(text),
flags=re.IGNORECASE,
)
text = re.sub(r"\s*\([A-Za-z0-9]\)$", "", text)
return text.strip()
if isinstance(v, list):
return [clean_str(item) for item in v]
return clean_str(v)
class QuizMetadata(BaseModel): class QuizMetadata(BaseModel):
"""Metadata đánh giá nội dung""" title: str = Field(description="Short content title")
title: str = Field( description: str = Field(description="Summary")
description="Title for this content. Prefer title from source document if available and suitable, otherwise create a short descriptive title." grade: int = Field(description="Level 1-5")
) type: Literal["quiz"] = "quiz"
description: str = Field( difficulty: int = Field(description="Level 1-5")
description="Short description summarizing the content/topic of the quiz."
)
grade: int = Field(
description="Estimated grade level 1-5 (1=easy/young, 5=advanced/older). Judge by vocabulary, concepts, required knowledge."
)
type: Literal["quiz"] = Field(default="quiz", description="Game type (always 'quiz')")
difficulty: int = Field(
description="Difficulty 1-5 for that grade (1=very easy, 5=very hard). Judge by question complexity, number of options, abstract concepts."
)
class QuizOutput(BaseModel): class QuizOutput(BaseModel):
"""Output wrapper for quiz items""" items: List[QuizItem]
items: List[QuizItem] = Field(description="List of quiz items generated from source text") metadata: QuizMetadata
metadata: QuizMetadata = Field(description="Metadata about the quiz content")
# Output parser
output_parser = PydanticOutputParser(pydantic_object=QuizOutput) output_parser = PydanticOutputParser(pydantic_object=QuizOutput)
# ==========================================
# 2. COMPACT CONFIG (Giữ đủ key, giảm nội dung)
# ==========================================
# ============== CONFIG ==============
# ============== CONFIG ==============
GAME_CONFIG = { GAME_CONFIG = {
# --- SYSTEM FIELDS (Giữ nguyên không đổi) ---
"game_type": "quiz", "game_type": "quiz",
"display_name": "Quiz", "display_name": "Quiz",
"description": "Multiple choice questions", "description": "Multiple choice questions",
"type_id": 1, "type_id": 0,
"active": True, "active": True,
"max_items": 10, "max_items": 10,
"schema": QuizItem, "schema": QuizItem,
"output_schema": QuizOutput, "output_schema": QuizOutput,
"output_parser": output_parser, "output_parser": output_parser,
# --- USER UI HINTS (Rút gọn văn bản hiển thị) ---
"input_format_rules": [ "input_format_rules": [
"Text should contain facts or questions suitable for a quiz.", "Text must contain specific facts or Q&A content.",
"Prefer extracting existing multiple choice questions if available.", "Suitable for multiple choice extraction.",
"Text MUST contain questions with multiple choice options",
], ],
# --- PRE-CHECK LOGIC (Rút gọn) ---
# 1. Recognition Rules (for Analyzer)
"analyzer_rules": [ "analyzer_rules": [
"Text MUST contain questions with multiple choice options", "Contains questions with options OR factual statements.",
"NOT suitable if text is just a list of words with no questions", "Not just a list of unconnected words.",
], ],
# --- LLM INSTRUCTIONS ---
# 2. Rules tạo nội dung (cho Generator) "generation_rules": [
"generation_rules": [ "MODE: STRICT EXTRACTION & LOCALITY PRIORITIZED.",
"KEEP ORIGINAL LANGUAGE - Do NOT translate", "1. MANDATORY OPTIONS & LOCALITY: Only create a quiz item if 2-4 options are EXPLICITLY present and located immediately after/below the question. SKIP if options are shared in a 'Word Box' or 'Word Bank' tại đầu/cuối trang.",
"original_quote = EXACT quote from source text (full question block)", "2. ANSWER PRIORITY: Use the provided key if available. If the marker is empty, solve it yourself using grammar rules. Do not redefine existing keys.",
"ALL content must come from source only - do NOT invent", "3. ZERO FABRICATION: Do NOT invent distractors. Only extract what is explicitly present.",
"REMOVE unnecessary numbering: 'Question 1:', '(1)', '(2)', 'A.', 'B.' from question/options/answers", "4. LOGICAL AMBIGUITY: If a question is grammatically correct with multiple options but lacks context, SKIP IT.",
"STRICTLY CLEAN OUTPUT for 'answers': MUST contain ONLY the text content of the correct option.", "5. SEMANTIC OPTION EXTRACTION: Extract ONLY the meaningful word/phrase. Strip away ALL labels like (1), (A), or OCR noise.",
"FORBIDDEN in 'answers': Prefixes like '(1)', '(2)', 'A.', 'B.', '1.' - REMOVE THEM.", "6. SMART FILL-IN-THE-BLANK: If the question is a 'Fill in the blank' type, you MUST analyze the sentence structure and place the '____' at the grammatically correct position (e.g., 'Blood ____ oozing'). DO NOT blindly put it at the end. If the sentence is already a complete question (not a blank type), do not add '____'.",
"IMPORTANT: The 'answers' field MUST EXACTLY MATCH one of the 'options' values text-wise.", "7. METADATA: Fill metadata accurately based on content. Do not leave empty."
],
# VISUAL FIELD COMPULSORY # --- EXAMPLES (Chỉ giữ 1 cái tốt nhất để làm mẫu format) ---
"image_description: MUST be a visual description relevant to the question in ENGLISH.", "examples": [
"image_keywords: MUST provide 2-3 English keywords for search.", {
"image_is_complex: FALSE for simple/static objects, TRUE for quantities/humans/complex scenes", "input": "The giraffe has a long neck. Options: neck, leg, tail.",
"NEVER leave image fields empty!", "output": {
"items": [
{
"question": "The giraffe has a long ____.",
"request": "Fill in the blank",
"answer": "neck",
"options": ["neck", "leg", "tail"],
"original_quote": "The giraffe has a long neck.",
"image_description": "A giraffe",
"image_keywords": ["giraffe"],
"image_is_complex": False,
}
],
"metadata": {
"title": "Animals",
"description": "Giraffe anatomy",
"grade": 2,
"type": "quiz",
"difficulty": 1,
},
},
"why_suitable": "Valid extraction: Text has Fact + Options.",
}
], ],
"examples": EXAMPLES if 'EXAMPLES' in globals() else []
} }
def clean_prefix(text: str) -> str: # # ==========================================
"""Remove prefixes like (1), (A), 1., A. from text""" # # 3. HANDLER (Logic hậu xử lý gọn nhẹ)
if not text: return text # # ==========================================
# Regex: Start with ( (number/letter) ) OR number/letter dot. Followed by spaces. # def post_process_quiz(items: List[dict]) -> List[dict]:
return re.sub(r'^(\(\d+\)|\([A-Za-z]\)|\d+\.|[A-Za-z]\.)\s*', '', text).strip() # valid_items = []
# for item in items:
# options = item.get("options", [])
# answer = item.get("answer", "")
# if len(options) < 2:
# continue
# # Nếu có answer từ input, thì so khớp để làm sạch
# if answer:
# matched_option = next(
# (opt for opt in options if opt.lower() == answer.lower()), None
# )
# if matched_option:
# item["answer"] = matched_option
# # Nếu có answer mà không khớp option nào thì mới cân nhắc loại (hoặc để AI tự đoán lại)
# # Nếu answer rỗng (do ngoặc trống), ta vẫn giữ câu này lại
# # (với điều kiện LLM đã được dặn là phải tự điền vào trường answer)
# if not item.get("answer"):
# # Bạn có thể chọn loại bỏ hoặc tin tưởng vào đáp án LLM tự suy luận
# pass
# item["request"] = (
# "Fill in the blank"
# if "____" in item.get("question", "")
# else "Choose the correct answer"
# )
# valid_items.append(item)
# return valid_items
def post_process_quiz(items: List[dict]) -> List[dict]: # # Đăng ký handler
"""Clean up answers and options prefixes""" # GAME_CONFIG["post_process_handler"] = post_process_quiz
for item in items:
# Clean answers
if item.get("answers"):
item["answers"] = clean_prefix(item["answers"])
# Clean options
if item.get("options") and isinstance(item["options"], list):
item["options"] = [clean_prefix(opt) for opt in item["options"]]
return items
# Register handler
GAME_CONFIG["post_process_handler"] = post_process_quiz
# ============== EXAMPLES ==============
EXAMPLES = [
{
"input": "The Sun is a star at the center of the Solar System.",
"output": {
"items": [{
"question": "Where is the Sun located?",
"answers": "At the center of the Solar System",
"options": ["At the center of the Solar System", "At the edge of the Solar System", "Near the Moon", "Outside the universe"],
"original_quote": "The Sun is a star at the center of the Solar System.",
"image_description": "The sun in the middle of planets",
"image_keywords": ["sun", "planets"],
"image_is_complex": False
}]
},
"why_suitable": "Has clear facts"
}
]

View File

@@ -1,6 +1,6 @@
""" """
games/sequence.py - Arrange Sequence Game (Sentences OR Words) games/sequence.py - Arrange Sequence Game (Sentences OR Words)
type_id = 2 type_id = 1
LLM tự quyết định dựa vào ngữ nghĩa: LLM tự quyết định dựa vào ngữ nghĩa:
- "good morning", "apple", "happy" → WORD - "good morning", "apple", "happy" → WORD
- "Hi, I'm Lisa", "The sun rises" → SENTENCE - "Hi, I'm Lisa", "The sun rises" → SENTENCE
@@ -38,7 +38,7 @@ class SequenceMetadata(BaseModel):
description="LLM decides: 'word' for words/phrases, 'sentence' for complete sentences" description="LLM decides: 'word' for words/phrases, 'sentence' for complete sentences"
) )
difficulty: int = Field( difficulty: int = Field(
description="Difficulty 1-5 for that grade." description="Difficulty 1-3 for that grade."
) )
@@ -52,59 +52,7 @@ class SequenceOutput(BaseModel):
output_parser = PydanticOutputParser(pydantic_object=SequenceOutput) output_parser = PydanticOutputParser(pydantic_object=SequenceOutput)
# ============== CONFIG ==============
# ============== CONFIG ==============
GAME_CONFIG = {
"game_type": "sequence",
"display_name": "Arrange Sequence",
"description": "Arrange sentences or words in order",
"type_id": 2,
"active": True,
"max_items": 10,
"schema": SequenceItem,
"output_schema": SequenceOutput,
"output_parser": output_parser,
"input_format_rules": [
"Text MUST be a list of items (words, phrases, sentences) to be ordered.",
"Do NOT generate sequence from multiple choice questions (A/B/C/D).",
"Do NOT generate sequence if the text is a quiz or test format.",
],
# 1. Recognition Rules (for Analyzer)
"analyzer_rules": [
"Text is a list of words, phrases, or sentences suitable for ordering",
"Items are separated by commas, semicolons, or newlines",
"Example: 'apple, banana, orange' or 'Sentence 1; Sentence 2'",
"NO questions required - just a list of items",
"Text is NOT a long essay or complex dialogue",
],
# 2. Rules tạo nội dung (cho Generator)
"generation_rules": [
"KEEP ORIGINAL LANGUAGE - Do NOT translate",
"Analyze text semantically to extract meaningful items",
"For each item, decide type: WORD/PHRASE or SENTENCE",
"- If item is a WORD/PHRASE (label, noun, greeting) -> Fill 'word' field",
"- If item is a COMPLETE SENTENCE (subject+verb) -> Fill 'sentence' field",
"NEVER fill both fields for the same item",
"Set metadata.sub_type = 'word' or 'sentence' (all items should match sub_type)",
"Clean up OCR noise, numbering (e.g. '1. Apple' -> 'Apple')",
# CONSISTENCY RULES
"CRITICAL: All extracted items MUST be of the SAME type.",
"Choose ONE type for the whole list: either ALL 'word' OR ALL 'sentence'.",
"If input has mixed types, pick the MAJORITY type and ignore the others.",
# VISUAL FIELD COMPULSORY
"image_description: MUST be a visual description of the item in ENGLISH. Example: 'A red apple', 'Two people shaking hands'",
"image_keywords: MUST provide 2-3 English keywords for search. Example: ['apple', 'fruit', 'red']",
],
"examples": EXAMPLES if 'EXAMPLES' in globals() else []
}
# ============== EXAMPLES ============== # ============== EXAMPLES ==============
@@ -171,3 +119,59 @@ EXAMPLES = [
"why": "These are PHRASES/GREETINGS, not complete sentences → use 'word' field" "why": "These are PHRASES/GREETINGS, not complete sentences → use 'word' field"
} }
] ]
# ============== CONFIG ==============
# ============== CONFIG ==============
GAME_CONFIG = {
"game_type": "sequence",
"display_name": "Arrange Sequence",
"description": "Arrange sentences or words in order",
"type_id": 1,
"active": True,
"max_items": 10,
"schema": SequenceItem,
"output_schema": SequenceOutput,
"output_parser": output_parser,
"input_format_rules": [
"Text MUST be a list of items (words, phrases, sentences) to be ordered.",
"Do NOT generate sequence from multiple choice questions (A/B/C/D).",
"Do NOT generate sequence if the text is a quiz or test format.",
],
# 1. Recognition Rules (for Analyzer)
"analyzer_rules": [
"Text is a list of words, phrases, or sentences suitable for ordering",
"Items are separated by commas, semicolons, or newlines",
"Example: 'apple, banana, orange' or 'Sentence 1; Sentence 2'",
"NO questions required - just a list of items",
"Text is NOT a long essay or complex dialogue",
],
# 2. Rules tạo nội dung (cho Generator)
"generation_rules": [
"KEEP ORIGINAL LANGUAGE - Do NOT translate",
"Analyze text semantically to extract meaningful items",
"For each item, decide type: WORD/PHRASE or SENTENCE",
"- If item is a WORD/PHRASE (label, noun, greeting) -> Fill 'word' field",
"- If item is a COMPLETE SENTENCE (subject+verb) -> Fill 'sentence' field",
"NEVER fill both fields for the same item",
"Set metadata.sub_type = 'word' or 'sentence' (all items should match sub_type)",
"Clean up OCR noise, numbering (e.g. '1. Apple' -> 'Apple')",
# CONSISTENCY RULES
"CRITICAL: All extracted items MUST be of the SAME type.",
"Choose ONE type for the whole list: either ALL 'word' OR ALL 'sentence'.",
"If input has mixed types, pick the MAJORITY type and ignore the others.",
# VISUAL FIELD COMPULSORY
"image_description: MUST be a visual description of the item in ENGLISH. Example: 'A red apple', 'Two people shaking hands'",
"image_keywords: MUST provide 2-3 English keywords for search. Example: ['apple', 'fruit', 'red']",
],
"examples": EXAMPLES if 'EXAMPLES' in globals() else []
}

View File

@@ -74,7 +74,7 @@ DEFAULT_CONFIGS = {
"openai": ModelConfig( "openai": ModelConfig(
provider="openai", provider="openai",
model_name="gpt-4o-mini", model_name="gpt-4o-mini",
temperature=0.1 temperature=0.1,
), ),
"openai_light": ModelConfig( "openai_light": ModelConfig(
provider="openai", provider="openai",
@@ -117,13 +117,19 @@ def get_llm(config: ModelConfig) -> BaseChatModel:
from langchain_google_genai import ChatGoogleGenerativeAI from langchain_google_genai import ChatGoogleGenerativeAI
api_key = config.api_key or os.getenv("GOOGLE_API_KEY") api_key = config.api_key or os.getenv("GOOGLE_API_KEY")
print("Using GOOGLE_API_KEY:", api_key)
if not api_key: if not api_key:
raise ValueError("GOOGLE_API_KEY required for Gemini. Set via env or config.api_key") raise ValueError("GOOGLE_API_KEY required for Gemini. Set via env or config.api_key")
return ChatGoogleGenerativeAI( return ChatGoogleGenerativeAI(
model=config.model_name, model=config.model_name,
temperature=config.temperature, temperature=config.temperature,
google_api_key=api_key google_api_key=api_key,
version="v1",
additional_headers={
"User-Agent": "PostmanRuntime/7.43.0",
"Accept": "*/*"
}
) )
elif provider == "openai": elif provider == "openai":
@@ -136,7 +142,8 @@ def get_llm(config: ModelConfig) -> BaseChatModel:
return ChatOpenAI( return ChatOpenAI(
model=config.model_name, model=config.model_name,
temperature=config.temperature, temperature=config.temperature,
api_key=api_key api_key=api_key,
base_url=config.base_url or None
) )
else: else: