ai-cut/app/utils/clients/gemini-models.ts

272 lines
6.6 KiB
TypeScript
Raw Normal View History

2025-12-16 10:08:51 +00:00
/**
* Gemini API
* Google Gemini API
* https://ai.google.dev/gemini-api/docs?hl=zh-tw
*/
export interface GeminiModel {
name: string
displayName: string
description: string
capabilities: {
text: boolean
vision: boolean
audio: boolean
video: boolean
imageGeneration: boolean
tts: boolean
}
category: 'text' | 'vision' | 'multimodal' | 'image-generation' | 'video-generation' | 'audio' | 'tts' | 'robotics'
}
/**
* Gemini
* 2025
*/
export const GEMINI_MODELS: GeminiModel[] = [
// Gemini 3 系列
{
name: 'gemini-3-pro',
displayName: 'Gemini 3 Pro',
description: 'Google 最聰明的模型,全球最出色的多模態理解模型,建立在最先進的推論技術基礎',
capabilities: {
text: true,
vision: true,
audio: true,
video: true,
imageGeneration: false,
tts: false
},
category: 'multimodal'
},
// Gemini 2.5 系列
{
name: 'gemini-2.5-pro',
displayName: 'Gemini 2.5 Pro',
description: 'Google 強大的推理模型,擅長程式設計和複雜的推理工作',
capabilities: {
text: true,
vision: true,
audio: true,
video: true,
imageGeneration: false,
tts: false
},
category: 'multimodal'
},
{
name: 'gemini-2.5-pro-tts',
displayName: 'Gemini 2.5 Pro TTS',
description: 'Gemini 2.5 模型變體,具備原生文字轉語音 (TTS) 功能',
capabilities: {
text: true,
vision: true,
audio: true,
video: true,
imageGeneration: false,
tts: true
},
category: 'tts'
},
{
name: 'gemini-2.5-flash',
displayName: 'Gemini 2.5 Flash',
description: '表現最均衡的模型,脈絡窗口達 100 萬個詞元,執行更多工作',
capabilities: {
text: true,
vision: true,
audio: true,
video: true,
imageGeneration: false,
tts: false
},
category: 'multimodal'
},
{
name: 'gemini-2.5-flash-lite',
displayName: 'Gemini 2.5 Flash-Lite',
description: '多模態模型,兼具速度和成本效益,效能優異,適合處理高頻率工作',
capabilities: {
text: true,
vision: true,
audio: true,
video: true,
imageGeneration: false,
tts: false
},
category: 'multimodal'
},
{
name: 'gemini-2.5-flash-image',
displayName: 'Gemini 2.5 Flash Image',
description: '使用原生圖像生成功能,可生成及編輯高度情境化的圖片',
capabilities: {
text: true,
vision: true,
audio: false,
video: false,
imageGeneration: true,
tts: false
},
category: 'image-generation'
},
// Gemini 1.5 系列(向後相容)
{
name: 'gemini-1.5-pro-latest',
displayName: 'Gemini 1.5 Pro (Latest)',
description: '最新版本的 Gemini 1.5 Pro支援文字、圖像、音頻、視頻多模態處理',
capabilities: {
text: true,
vision: true,
audio: true,
video: true,
imageGeneration: false,
tts: false
},
category: 'multimodal'
},
{
name: 'gemini-1.5-pro',
displayName: 'Gemini 1.5 Pro',
description: 'Gemini 1.5 Pro支援文字、圖像、音頻、視頻多模態處理',
capabilities: {
text: true,
vision: true,
audio: true,
video: true,
imageGeneration: false,
tts: false
},
category: 'multimodal'
},
{
name: 'gemini-1.5-flash-latest',
displayName: 'Gemini 1.5 Flash (Latest)',
description: '最新版本的 Gemini 1.5 Flash輕量級快速響應支援多模態',
capabilities: {
text: true,
vision: true,
audio: true,
video: true,
imageGeneration: false,
tts: false
},
category: 'multimodal'
},
{
name: 'gemini-1.5-flash',
displayName: 'Gemini 1.5 Flash',
description: 'Gemini 1.5 Flash輕量級快速響應支援多模態',
capabilities: {
text: true,
vision: true,
audio: true,
video: true,
imageGeneration: false,
tts: false
},
category: 'multimodal'
},
// 其他模型
{
name: 'gemini-pro',
displayName: 'Gemini Pro',
description: 'Gemini Pro支援文字和圖像處理',
capabilities: {
text: true,
vision: true,
audio: false,
video: false,
imageGeneration: false,
tts: false
},
category: 'multimodal'
},
// 機器人模型
{
name: 'gemini-robotics-er-1.5',
displayName: 'Gemini Robotics-ER 1.5',
description: '視覺語言模型 (VLM),可將 Gemini 的代理功能帶入機器人領域,在實體世界中進行進階推理',
capabilities: {
text: true,
vision: true,
audio: false,
video: false,
imageGeneration: false,
tts: false
},
category: 'robotics'
}
]
/**
*
*/
export const DEFAULT_TEXT_MODEL = 'gemini-2.5-flash'
/**
*
*/
export const DEFAULT_VISION_MODEL = 'gemini-2.5-flash'
/**
*
*/
export const DEFAULT_AUDIO_MODEL = 'gemini-2.5-flash'
/**
*
*/
export const DEFAULT_IMAGE_GENERATION_MODEL = 'gemini-2.5-flash-image'
/**
* TTS
*/
export const DEFAULT_TTS_MODEL = 'gemini-2.5-pro-tts'
/**
*
*/
export function getModelForTask(
task: 'text' | 'vision' | 'audio' | 'video' | 'image-generation' | 'tts'
): string {
switch (task) {
case 'text':
return DEFAULT_TEXT_MODEL
case 'vision':
return DEFAULT_VISION_MODEL
case 'audio':
return DEFAULT_AUDIO_MODEL
case 'video':
return DEFAULT_VISION_MODEL // 視頻使用視覺模型
case 'image-generation':
return DEFAULT_IMAGE_GENERATION_MODEL
case 'tts':
return DEFAULT_TTS_MODEL
default:
return DEFAULT_TEXT_MODEL
}
}
/**
*
*/
export function getModelsByCategory(category: GeminiModel['category']): GeminiModel[] {
return GEMINI_MODELS.filter(model => model.category === category)
}
/**
*
*/
export function getModelsByCapability(capability: keyof GeminiModel['capabilities']): GeminiModel[] {
return GEMINI_MODELS.filter(model => model.capabilities[capability] === true)
}
/**
*
*/
export function getModelInfo(modelName: string): GeminiModel | undefined {
return GEMINI_MODELS.find(m => m.name === modelName)
}