ai-cut/app/utils/clients/gemini-models.ts

272 lines
6.6 KiB
TypeScript
Raw Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

/**
* Gemini API 可用模型列表
* 根據 Google Gemini API 官方文檔整理
* 參考https://ai.google.dev/gemini-api/docs?hl=zh-tw
*/
export interface GeminiModel {
name: string
displayName: string
description: string
capabilities: {
text: boolean
vision: boolean
audio: boolean
video: boolean
imageGeneration: boolean
tts: boolean
}
category: 'text' | 'vision' | 'multimodal' | 'image-generation' | 'video-generation' | 'audio' | 'tts' | 'robotics'
}
/**
* Gemini 可用模型列表
* 根據 2025 年最新官方文檔
*/
export const GEMINI_MODELS: GeminiModel[] = [
// Gemini 3 系列
{
name: 'gemini-3-pro',
displayName: 'Gemini 3 Pro',
description: 'Google 最聰明的模型,全球最出色的多模態理解模型,建立在最先進的推論技術基礎',
capabilities: {
text: true,
vision: true,
audio: true,
video: true,
imageGeneration: false,
tts: false
},
category: 'multimodal'
},
// Gemini 2.5 系列
{
name: 'gemini-2.5-pro',
displayName: 'Gemini 2.5 Pro',
description: 'Google 強大的推理模型,擅長程式設計和複雜的推理工作',
capabilities: {
text: true,
vision: true,
audio: true,
video: true,
imageGeneration: false,
tts: false
},
category: 'multimodal'
},
{
name: 'gemini-2.5-pro-tts',
displayName: 'Gemini 2.5 Pro TTS',
description: 'Gemini 2.5 模型變體,具備原生文字轉語音 (TTS) 功能',
capabilities: {
text: true,
vision: true,
audio: true,
video: true,
imageGeneration: false,
tts: true
},
category: 'tts'
},
{
name: 'gemini-2.5-flash',
displayName: 'Gemini 2.5 Flash',
description: '表現最均衡的模型,脈絡窗口達 100 萬個詞元,執行更多工作',
capabilities: {
text: true,
vision: true,
audio: true,
video: true,
imageGeneration: false,
tts: false
},
category: 'multimodal'
},
{
name: 'gemini-2.5-flash-lite',
displayName: 'Gemini 2.5 Flash-Lite',
description: '多模態模型,兼具速度和成本效益,效能優異,適合處理高頻率工作',
capabilities: {
text: true,
vision: true,
audio: true,
video: true,
imageGeneration: false,
tts: false
},
category: 'multimodal'
},
{
name: 'gemini-2.5-flash-image',
displayName: 'Gemini 2.5 Flash Image',
description: '使用原生圖像生成功能,可生成及編輯高度情境化的圖片',
capabilities: {
text: true,
vision: true,
audio: false,
video: false,
imageGeneration: true,
tts: false
},
category: 'image-generation'
},
// Gemini 1.5 系列(向後相容)
{
name: 'gemini-1.5-pro-latest',
displayName: 'Gemini 1.5 Pro (Latest)',
description: '最新版本的 Gemini 1.5 Pro支援文字、圖像、音頻、視頻多模態處理',
capabilities: {
text: true,
vision: true,
audio: true,
video: true,
imageGeneration: false,
tts: false
},
category: 'multimodal'
},
{
name: 'gemini-1.5-pro',
displayName: 'Gemini 1.5 Pro',
description: 'Gemini 1.5 Pro支援文字、圖像、音頻、視頻多模態處理',
capabilities: {
text: true,
vision: true,
audio: true,
video: true,
imageGeneration: false,
tts: false
},
category: 'multimodal'
},
{
name: 'gemini-1.5-flash-latest',
displayName: 'Gemini 1.5 Flash (Latest)',
description: '最新版本的 Gemini 1.5 Flash輕量級快速響應支援多模態',
capabilities: {
text: true,
vision: true,
audio: true,
video: true,
imageGeneration: false,
tts: false
},
category: 'multimodal'
},
{
name: 'gemini-1.5-flash',
displayName: 'Gemini 1.5 Flash',
description: 'Gemini 1.5 Flash輕量級快速響應支援多模態',
capabilities: {
text: true,
vision: true,
audio: true,
video: true,
imageGeneration: false,
tts: false
},
category: 'multimodal'
},
// 其他模型
{
name: 'gemini-pro',
displayName: 'Gemini Pro',
description: 'Gemini Pro支援文字和圖像處理',
capabilities: {
text: true,
vision: true,
audio: false,
video: false,
imageGeneration: false,
tts: false
},
category: 'multimodal'
},
// 機器人模型
{
name: 'gemini-robotics-er-1.5',
displayName: 'Gemini Robotics-ER 1.5',
description: '視覺語言模型 (VLM),可將 Gemini 的代理功能帶入機器人領域,在實體世界中進行進階推理',
capabilities: {
text: true,
vision: true,
audio: false,
video: false,
imageGeneration: false,
tts: false
},
category: 'robotics'
}
]
/**
* 預設模型(用於文字生成)
*/
export const DEFAULT_TEXT_MODEL = 'gemini-2.5-flash'
/**
* 預設視覺模型(用於圖像分析)
*/
export const DEFAULT_VISION_MODEL = 'gemini-2.5-flash'
/**
* 預設音頻模型(用於音頻處理)
*/
export const DEFAULT_AUDIO_MODEL = 'gemini-2.5-flash'
/**
* 預設圖像生成模型
*/
export const DEFAULT_IMAGE_GENERATION_MODEL = 'gemini-2.5-flash-image'
/**
* 預設 TTS 模型(文字轉語音)
*/
export const DEFAULT_TTS_MODEL = 'gemini-2.5-pro-tts'
/**
* 根據功能需求取得適合的模型
*/
export function getModelForTask(
task: 'text' | 'vision' | 'audio' | 'video' | 'image-generation' | 'tts'
): string {
switch (task) {
case 'text':
return DEFAULT_TEXT_MODEL
case 'vision':
return DEFAULT_VISION_MODEL
case 'audio':
return DEFAULT_AUDIO_MODEL
case 'video':
return DEFAULT_VISION_MODEL // 視頻使用視覺模型
case 'image-generation':
return DEFAULT_IMAGE_GENERATION_MODEL
case 'tts':
return DEFAULT_TTS_MODEL
default:
return DEFAULT_TEXT_MODEL
}
}
/**
* 根據類別取得模型列表
*/
export function getModelsByCategory(category: GeminiModel['category']): GeminiModel[] {
return GEMINI_MODELS.filter(model => model.category === category)
}
/**
* 取得支援特定功能的模型列表
*/
export function getModelsByCapability(capability: keyof GeminiModel['capabilities']): GeminiModel[] {
return GEMINI_MODELS.filter(model => model.capabilities[capability] === true)
}
/**
* 取得模型資訊
*/
export function getModelInfo(modelName: string): GeminiModel | undefined {
return GEMINI_MODELS.find(m => m.name === modelName)
}