{"data":[{"created":1775520000,"id":"zai-org-glm-5-1","model_spec":{"betaModel":true,"pricing":{"input":{"usd":1.75,"diem":1.75},"cache_input":{"usd":0.325,"diem":0.325},"output":{"usd":5.5,"diem":5.5}},"availableContextTokens":200000,"maxCompletionTokens":24000,"capabilities":{"optimizedForCode":false,"quantization":"fp8","supportsAudioInput":false,"supportsFunctionCalling":true,"supportsLogProbs":false,"supportsMultipleImages":false,"supportsReasoning":true,"supportsReasoningEffort":false,"supportsResponseSchema":true,"supportsTeeAttestation":false,"supportsE2EE":false,"supportsVideoInput":false,"supportsVision":false,"supportsWebSearch":true,"supportsXSearch":false},"description":"GLM-5.1 is the next-generation large language model developed by Zhiyuan AI, featuring significantly enhanced reasoning capabilities, improved instruction following, and support for multiple languages. Supports large context windows for processing extensive text and detailed analysis with fast inference speed.","name":"GLM 5.1","modelSource":"https://huggingface.co/zai-org/GLM-5.1","offline":false,"privacy":"private","traits":[]},"object":"model","owned_by":"venice.ai","type":"text"},{"created":1770768000,"id":"zai-org-glm-5","model_spec":{"pricing":{"input":{"usd":1,"diem":1},"cache_input":{"usd":0.2,"diem":0.2},"output":{"usd":3.2,"diem":3.2}},"model_sets":["venice_recommendations"],"availableContextTokens":198000,"maxCompletionTokens":32000,"capabilities":{"optimizedForCode":true,"quantization":"fp8","supportsAudioInput":false,"supportsFunctionCalling":true,"supportsLogProbs":false,"supportsMultipleImages":false,"supportsReasoning":true,"supportsReasoningEffort":false,"supportsResponseSchema":true,"supportsTeeAttestation":false,"supportsE2EE":false,"supportsVideoInput":false,"supportsVision":false,"supportsWebSearch":true,"supportsXSearch":false},"description":"GLM-5 is the next-generation large language model developed by Zhiyuan AI, featuring significantly enhanced reasoning capabilities, improved instruction following, and support for multiple languages. Supports large context windows for processing extensive text and detailed analysis.","name":"GLM 5","modelSource":"https://huggingface.co/zai-org/GLM-5","offline":false,"privacy":"private","traits":[]},"object":"model","owned_by":"venice.ai","type":"text"},{"created":1773532800,"id":"z-ai-glm-5-turbo","model_spec":{"pricing":{"input":{"usd":1.2,"diem":1.2},"cache_input":{"usd":0.24,"diem":0.24},"output":{"usd":4,"diem":4}},"availableContextTokens":200000,"maxCompletionTokens":32768,"capabilities":{"optimizedForCode":true,"quantization":"not-available","supportsAudioInput":false,"supportsFunctionCalling":true,"supportsLogProbs":false,"supportsMultipleImages":false,"supportsReasoning":true,"supportsReasoningEffort":true,"supportsResponseSchema":true,"supportsTeeAttestation":false,"supportsE2EE":false,"supportsVideoInput":false,"supportsVision":false,"supportsWebSearch":true,"supportsXSearch":false},"description":"GLM-5 Turbo is a fast inference model from Z.ai tuned for strong performance in agent-driven environments and production coding workflows.","name":"GLM 5 Turbo","modelSource":"https://huggingface.co/zai-org/GLM-5","offline":false,"privacy":"anonymized","traits":[]},"object":"model","owned_by":"venice.ai","type":"text"},{"created":1775001600,"id":"z-ai-glm-5v-turbo","model_spec":{"betaModel":true,"pricing":{"input":{"usd":1.5,"diem":1.5},"cache_input":{"usd":0.3,"diem":0.3},"output":{"usd":5,"diem":5}},"availableContextTokens":200000,"maxCompletionTokens":32768,"capabilities":{"optimizedForCode":true,"quantization":"not-available","supportsAudioInput":false,"supportsFunctionCalling":true,"supportsLogProbs":false,"supportsMultipleImages":true,"maxImages":10,"supportsReasoning":true,"supportsReasoningEffort":true,"supportsResponseSchema":true,"supportsTeeAttestation":false,"supportsE2EE":false,"supportsVideoInput":false,"supportsVision":true,"supportsWebSearch":true,"supportsXSearch":false},"description":"GLM-5V-Turbo is Z.ai's first native multimodal agent foundation model, built for vision-based coding and agent-driven tasks with image, video, and text inputs.","name":"GLM 5V Turbo","modelSource":"","offline":false,"privacy":"anonymized","traits":[]},"object":"model","owned_by":"venice.ai","type":"text"},{"created":1770163200,"id":"olafangensan-glm-4.7-flash-heretic","model_spec":{"pricing":{"input":{"usd":0.14,"diem":0.14},"output":{"usd":0.8,"diem":0.8}},"model_sets":["venice_recommendations"],"availableContextTokens":200000,"maxCompletionTokens":24000,"capabilities":{"optimizedForCode":false,"quantization":"fp8","supportsAudioInput":false,"supportsFunctionCalling":true,"supportsLogProbs":false,"supportsMultipleImages":false,"supportsReasoning":true,"supportsReasoningEffort":false,"supportsResponseSchema":true,"supportsTeeAttestation":false,"supportsE2EE":false,"supportsVideoInput":false,"supportsVision":false,"supportsWebSearch":true,"supportsXSearch":false},"description":"GLM-4.7-Flash-Heretic is an uncensored experimental variant of GLM-4.7-Flash, optimized for creative freedom and unfiltered dialogue with fast inference speed.","name":"GLM 4.7 Flash Heretic","modelSource":"https://huggingface.co/Olafangensan/GLM-4.7-Flash-heretic","offline":false,"privacy":"private","traits":[]},"object":"model","owned_by":"venice.ai","type":"text"},{"created":1769644800,"id":"zai-org-glm-4.7-flash","model_spec":{"pricing":{"input":{"usd":0.125,"diem":0.125},"output":{"usd":0.5,"diem":0.5}},"availableContextTokens":128000,"maxCompletionTokens":16384,"capabilities":{"optimizedForCode":false,"quantization":"fp8","supportsAudioInput":false,"supportsFunctionCalling":true,"supportsLogProbs":false,"supportsMultipleImages":false,"supportsReasoning":true,"supportsReasoningEffort":true,"supportsResponseSchema":true,"supportsTeeAttestation":false,"supportsE2EE":false,"supportsVideoInput":false,"supportsVision":false,"supportsWebSearch":true,"supportsXSearch":false},"description":"GLM-4.7-Flash is a fast inference variant of GLM-4.7, optimized for speed while maintaining strong reasoning capabilities. Ideal for applications requiring quick responses with good quality.","name":"GLM 4.7 Flash","modelSource":"https://huggingface.co/zai-org/GLM-4.7-Flash","offline":false,"privacy":"private","traits":[]},"object":"model","owned_by":"venice.ai","type":"text"},{"created":1711929600,"id":"zai-org-glm-4.6","model_spec":{"pricing":{"input":{"usd":0.85,"diem":0.85},"cache_input":{"usd":0.3,"diem":0.3},"output":{"usd":2.75,"diem":2.75}},"availableContextTokens":198000,"maxCompletionTokens":16384,"capabilities":{"optimizedForCode":false,"quantization":"fp4","supportsAudioInput":false,"supportsFunctionCalling":true,"supportsLogProbs":false,"supportsMultipleImages":false,"supportsReasoning":true,"supportsReasoningEffort":false,"supportsResponseSchema":true,"supportsTeeAttestation":false,"supportsE2EE":false,"supportsVideoInput":false,"supportsVision":false,"supportsWebSearch":true,"supportsXSearch":false},"description":"GLM-4.6 is a large language model developed by Zhiyuan AI, featuring strong reasoning capabilities and support for multiple languages. Supports the largest context window for processing extensive text and detailed analysis.","name":"GLM 4.6","modelSource":"https://huggingface.co/zai-org/GLM-4.6","offline":false,"privacy":"private","traits":[]},"object":"model","owned_by":"venice.ai","type":"text"},{"created":1766534400,"id":"zai-org-glm-4.7","model_spec":{"pricing":{"input":{"usd":0.55,"diem":0.55},"cache_input":{"usd":0.11,"diem":0.11},"output":{"usd":2.65,"diem":2.65}},"availableContextTokens":198000,"maxCompletionTokens":16384,"capabilities":{"optimizedForCode":false,"quantization":"fp4","supportsAudioInput":false,"supportsFunctionCalling":true,"supportsLogProbs":false,"supportsMultipleImages":false,"supportsReasoning":true,"supportsReasoningEffort":false,"supportsResponseSchema":true,"supportsTeeAttestation":false,"supportsE2EE":false,"supportsVideoInput":false,"supportsVision":false,"supportsWebSearch":true,"supportsXSearch":false},"description":"GLM-4.7 is a large language model developed by Zhiyuan AI, featuring strong reasoning capabilities and support for multiple languages. Supports the largest context window for processing extensive text and detailed analysis.","name":"GLM 4.7","modelSource":"https://huggingface.co/zai-org/GLM-4.7","offline":false,"privacy":"private","traits":["default","most_intelligent","function_calling_default"]},"object":"model","owned_by":"venice.ai","type":"text"},{"created":1742262554,"id":"venice-uncensored","model_spec":{"pricing":{"input":{"usd":0.2,"diem":0.2},"output":{"usd":0.9,"diem":0.9}},"availableContextTokens":32000,"maxCompletionTokens":8192,"capabilities":{"optimizedForCode":false,"quantization":"fp16","supportsAudioInput":false,"supportsFunctionCalling":false,"supportsLogProbs":false,"supportsMultipleImages":false,"supportsReasoning":false,"supportsReasoningEffort":false,"supportsResponseSchema":true,"supportsTeeAttestation":false,"supportsE2EE":false,"supportsVideoInput":false,"supportsVision":false,"supportsWebSearch":true,"supportsXSearch":false},"deprecation":{"date":"2026-04-15T00:00:00.000Z"},"description":"Designed for maximum creative freedom and authentic interaction. Ideal for open-ended exploration, roleplay, and unfiltered dialogue. Features minimal content restrictions.","name":"Venice Uncensored 1.1","modelSource":"https://huggingface.co/cognitivecomputations/Dolphin-Mistral-24B-Venice-Edition","offline":false,"privacy":"private","traits":["most_uncensored"]},"object":"model","owned_by":"venice.ai","type":"text"},{"created":1771545600,"id":"venice-uncensored-role-play","model_spec":{"pricing":{"input":{"usd":0.5,"diem":0.5},"output":{"usd":2,"diem":2}},"availableContextTokens":128000,"maxCompletionTokens":4096,"capabilities":{"optimizedForCode":false,"quantization":"fp8","supportsAudioInput":false,"supportsFunctionCalling":true,"supportsLogProbs":false,"supportsMultipleImages":true,"maxImages":10,"supportsReasoning":false,"supportsReasoningEffort":false,"supportsResponseSchema":true,"supportsTeeAttestation":false,"supportsE2EE":false,"supportsVideoInput":false,"supportsVision":true,"supportsWebSearch":true,"supportsXSearch":false},"description":"Optimized for creative roleplay scenarios with maximum freedom. Designed for immersive storytelling, character interactions, and open-ended creative writing.","name":"Venice Role Play Uncensored","modelSource":"https://huggingface.co/dphnAI/24B-3.2-RP-K2-final","offline":false,"privacy":"private","traits":[]},"object":"model","owned_by":"venice.ai","type":"text"},{"created":1775433600,"id":"qwen-3-6-plus","model_spec":{"betaModel":true,"pricing":{"input":{"usd":0.625,"diem":0.625},"cache_input":{"usd":0.0625,"diem":0.0625},"cache_write":{"usd":0.78,"diem":0.78},"output":{"usd":3.75,"diem":3.75},"extended":{"context_token_threshold":256000,"input":{"usd":2.5,"diem":2.5},"output":{"usd":7.5,"diem":7.5},"cache_input":{"usd":0.0625,"diem":0.0625},"cache_write":{"usd":0.78,"diem":0.78}}},"availableContextTokens":1000000,"maxCompletionTokens":65536,"capabilities":{"optimizedForCode":true,"quantization":"not-available","supportsAudioInput":false,"supportsFunctionCalling":true,"supportsLogProbs":false,"supportsMultipleImages":true,"maxImages":10,"supportsReasoning":true,"supportsReasoningEffort":false,"supportsResponseSchema":true,"supportsTeeAttestation":false,"supportsE2EE":false,"supportsVideoInput":true,"supportsVision":true,"supportsWebSearch":true,"supportsXSearch":false},"constraints":{"temperature":{"default":0.7},"top_p":{"default":0.8}},"description":"Qwen 3.6 Plus Uncensored is Alibaba's latest flagship reasoning model with exceptional performance across coding, reasoning, and general knowledge tasks. Features mixed reasoning, function calling, and multimodal input support.","name":"Qwen 3.6 Plus Uncensored","modelSource":"","offline":false,"privacy":"anonymized","traits":[]},"object":"model","owned_by":"venice.ai","type":"text"},{"created":1772668800,"id":"qwen3-5-9b","model_spec":{"pricing":{"input":{"usd":0.05,"diem":0.05},"output":{"usd":0.15,"diem":0.15}},"availableContextTokens":256000,"maxCompletionTokens":32768,"capabilities":{"optimizedForCode":false,"quantization":"fp8","supportsAudioInput":false,"supportsFunctionCalling":true,"supportsLogProbs":true,"supportsMultipleImages":true,"maxImages":10,"supportsReasoning":true,"supportsReasoningEffort":false,"supportsResponseSchema":true,"supportsTeeAttestation":false,"supportsE2EE":false,"supportsVideoInput":false,"supportsVision":true,"supportsWebSearch":true,"supportsXSearch":false},"description":"A 9B dense model with 262K native context window (extendable to 1M). Features Gated DeltaNet hybrid attention architecture for efficient long-context processing. Supports 201 languages, thinking/reasoning mode, and function calling.","name":"Qwen 3.5 9B","modelSource":"https://huggingface.co/Qwen/Qwen3.5-9B","offline":false,"privacy":"private","traits":[]},"object":"model","owned_by":"venice.ai","type":"text"},{"created":1771200000,"id":"qwen3-5-397b-a17b","model_spec":{"pricing":{"input":{"usd":0.75,"diem":0.75},"output":{"usd":4.5,"diem":4.5}},"availableContextTokens":128000,"maxCompletionTokens":32768,"capabilities":{"optimizedForCode":true,"quantization":"not-available","supportsAudioInput":false,"supportsFunctionCalling":true,"supportsLogProbs":true,"supportsMultipleImages":true,"maxImages":5,"supportsReasoning":true,"supportsReasoningEffort":true,"supportsResponseSchema":true,"supportsTeeAttestation":false,"supportsE2EE":false,"supportsVideoInput":true,"supportsVision":true,"supportsWebSearch":true,"supportsXSearch":false},"description":"Qwen 3.5 is Alibaba flagship reasoning model featuring a 397B parameter Mixture-of-Experts architecture with 17B active parameters. It excels at complex reasoning, coding, and general knowledge tasks.","name":"Qwen 3.5 397B","modelSource":"","offline":false,"privacy":"anonymized","traits":[]},"object":"model","owned_by":"venice.ai","type":"text"},{"created":1771977600,"id":"qwen3-5-35b-a3b","model_spec":{"betaModel":true,"pricing":{"input":{"usd":0.3125,"diem":0.3125},"cache_input":{"usd":0.15625,"diem":0.15625},"output":{"usd":1.25,"diem":1.25}},"availableContextTokens":256000,"maxCompletionTokens":65536,"capabilities":{"optimizedForCode":true,"quantization":"not-available","supportsAudioInput":false,"supportsFunctionCalling":true,"supportsLogProbs":true,"supportsMultipleImages":true,"maxImages":5,"supportsReasoning":true,"supportsReasoningEffort":false,"supportsResponseSchema":true,"supportsTeeAttestation":false,"supportsE2EE":false,"supportsVideoInput":true,"supportsVision":true,"supportsWebSearch":true,"supportsXSearch":false},"constraints":{"temperature":{"default":1},"top_p":{"default":0.95},"repetition_penalty":{"default":1}},"description":"Qwen 3.5 35B A3B is a highly efficient MoE model with 35B total parameters and only 3B active parameters. It surpasses the larger Qwen3-235B-A22B while being 6.7x smaller, excelling at reasoning, coding, and general knowledge tasks.","name":"Qwen 3.5 35B A3B","modelSource":"","offline":false,"privacy":"private","traits":[]},"object":"model","owned_by":"venice.ai","type":"text"},{"created":1745903059,"id":"qwen3-235b-a22b-thinking-2507","model_spec":{"pricing":{"input":{"usd":0.45,"diem":0.45},"output":{"usd":3.5,"diem":3.5}},"availableContextTokens":128000,"maxCompletionTokens":16384,"capabilities":{"optimizedForCode":false,"quantization":"fp8","supportsAudioInput":false,"supportsFunctionCalling":true,"supportsLogProbs":false,"supportsMultipleImages":false,"supportsReasoning":true,"supportsReasoningEffort":false,"supportsResponseSchema":true,"supportsTeeAttestation":false,"supportsE2EE":false,"supportsVideoInput":false,"supportsVision":false,"supportsWebSearch":true,"supportsXSearch":false},"description":"Built for in-depth research and handling long, complex documents. Ideal for technical work, multimodal input, and high-precision tasks.","name":"Qwen 3 235B A22B Thinking 2507","modelSource":"https://huggingface.co/Qwen/Qwen3-235B-A22B-Thinking-2507-FP8","offline":false,"privacy":"private","traits":["default_reasoning"]},"object":"model","owned_by":"venice.ai","type":"text"},{"created":1745903059,"id":"qwen3-235b-a22b-instruct-2507","model_spec":{"pricing":{"input":{"usd":0.15,"diem":0.15},"output":{"usd":0.75,"diem":0.75}},"availableContextTokens":128000,"maxCompletionTokens":16384,"capabilities":{"optimizedForCode":false,"quantization":"fp8","supportsAudioInput":false,"supportsFunctionCalling":true,"supportsLogProbs":false,"supportsMultipleImages":false,"supportsReasoning":false,"supportsReasoningEffort":false,"supportsResponseSchema":true,"supportsTeeAttestation":false,"supportsE2EE":false,"supportsVideoInput":false,"supportsVision":false,"supportsWebSearch":true,"supportsXSearch":false},"description":"Built for in-depth research and handling long, complex documents. Ideal for technical work, multimodal input, and high-precision tasks.","name":"Qwen 3 235B A22B Instruct 2507","modelSource":"https://huggingface.co/Qwen/Qwen3-235B-A22B-Instruct-2507-FP8","offline":false,"privacy":"private","traits":[]},"object":"model","owned_by":"venice.ai","type":"text"},{"created":1745903059,"id":"qwen3-next-80b","model_spec":{"pricing":{"input":{"usd":0.35,"diem":0.35},"output":{"usd":1.9,"diem":1.9}},"availableContextTokens":256000,"maxCompletionTokens":16384,"capabilities":{"optimizedForCode":false,"quantization":"fp16","supportsAudioInput":false,"supportsFunctionCalling":true,"supportsLogProbs":false,"supportsMultipleImages":false,"supportsReasoning":false,"supportsReasoningEffort":false,"supportsResponseSchema":true,"supportsTeeAttestation":false,"supportsE2EE":false,"supportsVideoInput":false,"supportsVision":false,"supportsWebSearch":true,"supportsXSearch":false},"description":"Optimized for speed and efficiency.","name":"Qwen 3 Next 80b","modelSource":"https://huggingface.co/Qwen/Qwen3-Next-80B-A3B-Instruct","offline":false,"privacy":"private","traits":[]},"object":"model","owned_by":"venice.ai","type":"text"},{"created":1745903059,"id":"qwen3-coder-480b-a35b-instruct","model_spec":{"pricing":{"input":{"usd":0.75,"diem":0.75},"output":{"usd":3,"diem":3}},"availableContextTokens":256000,"maxCompletionTokens":65536,"capabilities":{"optimizedForCode":true,"quantization":"fp8","supportsAudioInput":false,"supportsFunctionCalling":true,"supportsLogProbs":false,"supportsMultipleImages":false,"supportsReasoning":false,"supportsReasoningEffort":false,"supportsResponseSchema":true,"supportsTeeAttestation":false,"supportsE2EE":false,"supportsVideoInput":false,"supportsVision":false,"supportsWebSearch":true,"supportsXSearch":false},"description":"Optimized for code.","name":"Qwen 3 Coder 480b","modelSource":"https://huggingface.co/Qwen/Qwen3-Coder-480B-A35B-Instruct","offline":false,"privacy":"private","traits":["default_code"]},"object":"model","owned_by":"venice.ai","type":"text"},{"created":1768521600,"id":"qwen3-vl-235b-a22b","model_spec":{"pricing":{"input":{"usd":0.25,"diem":0.25},"output":{"usd":1.5,"diem":1.5}},"availableContextTokens":256000,"maxCompletionTokens":16384,"capabilities":{"optimizedForCode":false,"quantization":"fp8","supportsAudioInput":false,"supportsFunctionCalling":true,"supportsLogProbs":false,"supportsMultipleImages":true,"maxImages":10,"supportsReasoning":false,"supportsReasoningEffort":false,"supportsResponseSchema":true,"supportsTeeAttestation":false,"supportsE2EE":false,"supportsVideoInput":false,"supportsVision":true,"supportsWebSearch":true,"supportsXSearch":false},"description":"Qwen3-VL 235B vision-language model with MoE architecture. The most powerful VL model in the Qwen series with superior visual perception, OCR, and multimodal reasoning.","name":"Qwen3 VL 235B","modelSource":"https://huggingface.co/Qwen/Qwen3-VL-235B-A22B-Instruct","offline":false,"privacy":"private","traits":["default_vision"]},"object":"model","owned_by":"venice.ai","type":"text"},{"created":1769472000,"id":"qwen3-coder-480b-a35b-instruct-turbo","model_spec":{"betaModel":true,"pricing":{"input":{"usd":0.35,"diem":0.35},"cache_input":{"usd":0.04,"diem":0.04},"output":{"usd":1.5,"diem":1.5}},"availableContextTokens":256000,"maxCompletionTokens":65536,"capabilities":{"optimizedForCode":true,"quantization":"fp8","supportsAudioInput":false,"supportsFunctionCalling":true,"supportsLogProbs":true,"supportsMultipleImages":false,"supportsReasoning":false,"supportsReasoningEffort":false,"supportsResponseSchema":true,"supportsTeeAttestation":false,"supportsE2EE":false,"supportsVideoInput":false,"supportsVision":false,"supportsWebSearch":true,"supportsXSearch":false},"description":"Turbo variant of Qwen3 Coder 480B, optimized for faster inference on code tasks.","name":"Qwen 3 Coder 480B Turbo","modelSource":"https://huggingface.co/Qwen/Qwen3-Coder-480B-A35B-Instruct-Turbo","offline":false,"privacy":"private","traits":[]},"object":"model","owned_by":"venice.ai","type":"text"},{"created":1775088000,"id":"google-gemma-4-26b-a4b-it","model_spec":{"pricing":{"input":{"usd":0.1625,"diem":0.1625},"output":{"usd":0.5,"diem":0.5}},"availableContextTokens":256000,"maxCompletionTokens":8192,"capabilities":{"optimizedForCode":false,"quantization":"bf16","supportsAudioInput":false,"supportsFunctionCalling":true,"supportsLogProbs":true,"supportsMultipleImages":true,"maxImages":10,"supportsReasoning":true,"supportsReasoningEffort":false,"supportsResponseSchema":true,"supportsTeeAttestation":false,"supportsE2EE":false,"supportsVideoInput":true,"supportsVision":true,"supportsWebSearch":true,"supportsXSearch":false},"description":"Gemma 4 26B A4B is a Mixture-of-Experts model from Google DeepMind with 26B total parameters and only 4B active per token, offering fast inference at high quality. It handles text, image, and video input, supports 256K context, function calling, and reasoning with configurable thinking modes.","name":"Google Gemma 4 26B A4B Instruct","modelSource":"https://huggingface.co/google/gemma-4-26B-A4B-it","offline":false,"privacy":"private","traits":[]},"object":"model","owned_by":"venice.ai","type":"text"},{"created":1775174400,"id":"google-gemma-4-31b-it","model_spec":{"pricing":{"input":{"usd":0.175,"diem":0.175},"output":{"usd":0.5,"diem":0.5}},"availableContextTokens":256000,"maxCompletionTokens":8192,"capabilities":{"optimizedForCode":false,"quantization":"bf16","supportsAudioInput":false,"supportsFunctionCalling":true,"supportsLogProbs":true,"supportsMultipleImages":true,"maxImages":10,"supportsReasoning":true,"supportsReasoningEffort":false,"supportsResponseSchema":true,"supportsTeeAttestation":false,"supportsE2EE":false,"supportsVideoInput":true,"supportsVision":true,"supportsWebSearch":true,"supportsXSearch":false},"description":"Gemma 4 31B is a dense model from Google DeepMind with 31B parameters, delivering frontier-level reasoning performance. It handles text, image, and video input, supports 256K context, function calling, and configurable thinking modes.","name":"Google Gemma 4 31B Instruct","modelSource":"https://huggingface.co/google/gemma-4-31B-it","offline":false,"privacy":"private","traits":[]},"object":"model","owned_by":"venice.ai","type":"text"},{"created":1762214400,"id":"google-gemma-3-27b-it","model_spec":{"pricing":{"input":{"usd":0.12,"diem":0.12},"output":{"usd":0.2,"diem":0.2}},"availableContextTokens":198000,"maxCompletionTokens":16384,"capabilities":{"optimizedForCode":false,"quantization":"fp8","supportsAudioInput":false,"supportsFunctionCalling":true,"supportsLogProbs":false,"supportsMultipleImages":true,"maxImages":10,"supportsReasoning":false,"supportsReasoningEffort":false,"supportsResponseSchema":true,"supportsTeeAttestation":false,"supportsE2EE":false,"supportsVideoInput":false,"supportsVision":true,"supportsWebSearch":true,"supportsXSearch":false},"description":"Gemma 3 introduces multimodality, supporting vision-language input and text outputs. It handles context windows up to 128k tokens, understands over 140 languages, and offers improved math, reasoning, and chat capabilities, including structured outputs and function calling. Gemma 3 27B is Google's latest open source model, successor to Gemma 2.","name":"Google Gemma 3 27B Instruct","modelSource":"https://huggingface.co/google/gemma-3-27b-it","offline":false,"privacy":"private","traits":[]},"object":"model","owned_by":"venice.ai","type":"text"},{"created":1775088000,"id":"arcee-trinity-large-thinking","model_spec":{"pricing":{"input":{"usd":0.3125,"diem":0.3125},"cache_input":{"usd":0.075,"diem":0.075},"output":{"usd":1.125,"diem":1.125}},"availableContextTokens":256000,"maxCompletionTokens":65536,"capabilities":{"optimizedForCode":true,"quantization":"fp8","supportsAudioInput":false,"supportsFunctionCalling":true,"supportsLogProbs":false,"supportsMultipleImages":false,"supportsReasoning":true,"supportsReasoningEffort":true,"supportsResponseSchema":true,"supportsTeeAttestation":false,"supportsE2EE":false,"supportsVideoInput":false,"supportsVision":false,"supportsWebSearch":true,"supportsXSearch":false},"description":"Trinity Large Thinking is a reasoning-optimized variant of Arcee AI's Trinity-Large family, a 398B-parameter sparse Mixture-of-Experts model with approximately 13B active parameters per token, post-trained with extended chain-of-thought reasoning and agentic RL. It supports tool calling, multilingual input, and 256K context windows.","name":"Trinity Large Thinking","modelSource":"https://huggingface.co/arcee-ai/Trinity-Large-Thinking-FP8-Block","offline":false,"privacy":"private","traits":[]},"object":"model","owned_by":"venice.ai","type":"text"},{"created":1764547200,"id":"grok-41-fast","model_spec":{"pricing":{"input":{"usd":0.23,"diem":0.23},"cache_input":{"usd":0.06,"diem":0.06},"output":{"usd":0.57,"diem":0.57}},"model_sets":["venice_recommendations"],"availableContextTokens":1000000,"maxCompletionTokens":30000,"capabilities":{"optimizedForCode":false,"quantization":"not-available","supportsAudioInput":false,"supportsFunctionCalling":true,"supportsLogProbs":false,"supportsMultipleImages":true,"maxImages":10,"supportsReasoning":true,"supportsReasoningEffort":false,"supportsResponseSchema":true,"supportsTeeAttestation":false,"supportsE2EE":false,"supportsVideoInput":false,"supportsVision":true,"supportsWebSearch":true,"supportsXSearch":false},"description":"Grok 4.1 Fast is xAI's best agentic tool-calling model that shines in real-world use cases like customer support and image analysis.","name":"Grok 4.1 Fast","modelSource":"","offline":false,"privacy":"private","traits":[]},"object":"model","owned_by":"venice.ai","type":"text"},{"created":1773273600,"id":"grok-4-20","model_spec":{"betaModel":true,"pricing":{"input":{"usd":2.27,"diem":2.27},"cache_input":{"usd":0.23,"diem":0.23},"output":{"usd":6.8,"diem":6.8},"extended":{"context_token_threshold":200000,"input":{"usd":4.53,"diem":4.53},"output":{"usd":13.6,"diem":13.6},"cache_input":{"usd":0.23,"diem":0.23}}},"availableContextTokens":2000000,"maxCompletionTokens":128000,"capabilities":{"optimizedForCode":false,"quantization":"not-available","supportsAudioInput":false,"supportsFunctionCalling":true,"supportsLogProbs":false,"supportsMultipleImages":true,"maxImages":10,"supportsReasoning":true,"supportsReasoningEffort":false,"supportsResponseSchema":true,"supportsTeeAttestation":false,"supportsE2EE":false,"supportsVideoInput":false,"supportsVision":true,"supportsWebSearch":true,"supportsXSearch":true},"description":"Grok 4.20 is xAI's latest multimodal reasoning model with strong tool use, structured output support, and a 2M-token context window.","name":"Grok 4.20","modelSource":"","offline":false,"privacy":"private","traits":[]},"object":"model","owned_by":"venice.ai","type":"text"},{"created":1773273600,"id":"grok-4-20-multi-agent","model_spec":{"betaModel":true,"pricing":{"input":{"usd":2.27,"diem":2.27},"cache_input":{"usd":0.23,"diem":0.23},"output":{"usd":6.8,"diem":6.8},"extended":{"context_token_threshold":200000,"input":{"usd":4.53,"diem":4.53},"output":{"usd":13.6,"diem":13.6},"cache_input":{"usd":0.23,"diem":0.23}}},"availableContextTokens":2000000,"maxCompletionTokens":128000,"capabilities":{"optimizedForCode":false,"quantization":"not-available","supportsAudioInput":false,"supportsFunctionCalling":false,"supportsLogProbs":false,"supportsMultipleImages":true,"maxImages":10,"supportsReasoning":true,"supportsReasoningEffort":false,"supportsResponseSchema":true,"supportsTeeAttestation":false,"supportsE2EE":false,"supportsVideoInput":false,"supportsVision":true,"supportsWebSearch":true,"supportsXSearch":true},"description":"Grok 4.20 Multi-Agent is a variant of xAI Grok 4.20 designed for collaborative, agent-based workflows. Multiple agents operate in parallel to conduct deep research, coordinate tool use, and synthesize information across complex tasks.","name":"Grok 4.20 Multi-Agent","modelSource":"","offline":false,"privacy":"private","traits":[]},"object":"model","owned_by":"venice.ai","type":"text"},{"created":1768435200,"id":"mistral-small-3-2-24b-instruct","model_spec":{"pricing":{"input":{"usd":0.09375,"diem":0.09375},"output":{"usd":0.25,"diem":0.25}},"availableContextTokens":256000,"maxCompletionTokens":16384,"capabilities":{"optimizedForCode":false,"quantization":"fp8","supportsAudioInput":false,"supportsFunctionCalling":true,"supportsLogProbs":false,"supportsMultipleImages":false,"supportsReasoning":false,"supportsReasoningEffort":false,"supportsResponseSchema":true,"supportsTeeAttestation":false,"supportsE2EE":false,"supportsVideoInput":false,"supportsVision":false,"supportsWebSearch":true,"supportsXSearch":false},"description":"Mistral Small 3.2 is a 24B parameter model optimized for efficiency and performance. Ideal for general-purpose tasks with balanced speed and capability.","name":"Mistral Small 3.2 24B Instruct","modelSource":"https://huggingface.co/mistralai/Mistral-Small-3.2-24B-Instruct-2506","offline":false,"privacy":"private","traits":[]},"object":"model","owned_by":"venice.ai","type":"text"},{"created":1773619200,"id":"mistral-small-2603","model_spec":{"betaModel":true,"pricing":{"input":{"usd":0.1875,"diem":0.1875},"output":{"usd":0.75,"diem":0.75}},"availableContextTokens":256000,"maxCompletionTokens":65536,"capabilities":{"optimizedForCode":true,"quantization":"fp8","supportsAudioInput":false,"supportsFunctionCalling":true,"supportsLogProbs":false,"supportsMultipleImages":true,"maxImages":10,"supportsReasoning":true,"supportsReasoningEffort":true,"supportsResponseSchema":true,"supportsTeeAttestation":false,"supportsE2EE":false,"supportsVideoInput":false,"supportsVision":true,"supportsWebSearch":true,"supportsXSearch":false},"description":"Mistral Small 4 unifies instruction following, reasoning, coding, and vision in a single 119B MoE model with 256K context and configurable reasoning effort.","name":"Mistral Small 4","modelSource":"https://huggingface.co/mistralai/Mistral-Small-4-119B-2603","offline":false,"privacy":"private","traits":[]},"object":"model","owned_by":"venice.ai","type":"text"},{"created":1758758400,"id":"hermes-3-llama-3.1-405b","model_spec":{"pricing":{"input":{"usd":1.1,"diem":1.1},"output":{"usd":3,"diem":3}},"availableContextTokens":128000,"maxCompletionTokens":16384,"capabilities":{"optimizedForCode":false,"quantization":"fp8","supportsAudioInput":false,"supportsFunctionCalling":false,"supportsLogProbs":false,"supportsMultipleImages":false,"supportsReasoning":false,"supportsReasoningEffort":false,"supportsResponseSchema":false,"supportsTeeAttestation":false,"supportsE2EE":false,"supportsVideoInput":false,"supportsVision":false,"supportsWebSearch":true,"supportsXSearch":false},"description":"Hermes 3 405B is a frontier level, full parameter finetune of the Llama-3.1 405B foundation model, focused on aligning LLMs to the user, with powerful steering capabilities and control given to the end user.","name":"Hermes 3 Llama 3.1 405b","modelSource":"https://huggingface.co/NousResearch/Hermes-3-Llama-3.1-405B","offline":false,"privacy":"private","traits":[]},"object":"model","owned_by":"venice.ai","type":"text"},{"created":1771459200,"id":"gemini-3-1-pro-preview","model_spec":{"pricing":{"input":{"usd":2.5,"diem":2.5},"cache_input":{"usd":0.5,"diem":0.5},"cache_write":{"usd":0.5,"diem":0.5},"output":{"usd":15,"diem":15},"extended":{"context_token_threshold":200000,"input":{"usd":5,"diem":5},"output":{"usd":22.5,"diem":22.5},"cache_input":{"usd":0.5,"diem":0.5},"cache_write":{"usd":0.5,"diem":0.5}}},"availableContextTokens":1000000,"maxCompletionTokens":32768,"capabilities":{"optimizedForCode":false,"quantization":"not-available","supportsAudioInput":true,"supportsFunctionCalling":true,"supportsLogProbs":false,"supportsMultipleImages":true,"maxImages":20,"supportsReasoning":true,"supportsReasoningEffort":true,"supportsResponseSchema":true,"supportsTeeAttestation":false,"supportsE2EE":false,"supportsVideoInput":true,"supportsVision":true,"supportsWebSearch":true,"supportsXSearch":false},"description":"Gemini 3.1 Pro is the latest evolution of Google flagship frontier model with 1M context, advancing high-precision multimodal reasoning across text, image, and code.","name":"Gemini 3.1 Pro Preview","modelSource":"https://deepmind.google/models/gemini/pro/","offline":false,"privacy":"anonymized","traits":[]},"object":"model","owned_by":"venice.ai","type":"text"},{"created":1766102400,"id":"gemini-3-flash-preview","model_spec":{"pricing":{"input":{"usd":0.7,"diem":0.7},"cache_input":{"usd":0.07,"diem":0.07},"output":{"usd":3.75,"diem":3.75}},"availableContextTokens":256000,"maxCompletionTokens":65536,"capabilities":{"optimizedForCode":false,"quantization":"not-available","supportsAudioInput":true,"supportsFunctionCalling":true,"supportsLogProbs":false,"supportsMultipleImages":true,"maxImages":10,"supportsReasoning":true,"supportsReasoningEffort":true,"supportsResponseSchema":true,"supportsTeeAttestation":false,"supportsE2EE":false,"supportsVideoInput":true,"supportsVision":true,"supportsWebSearch":true,"supportsXSearch":false},"description":"Gemini 3 Flash Preview is a high speed, high value thinking model designed for agentic workflows, multi-turn chat, and coding assistance. It delivers near Pro level reasoning with substantially lower latency.","name":"Gemini 3 Flash Preview","modelSource":"https://deepmind.google/models/gemini/flash/","offline":false,"privacy":"anonymized","traits":[]},"object":"model","owned_by":"venice.ai","type":"text"},{"created":1770249600,"id":"claude-opus-4-6","model_spec":{"betaModel":true,"pricing":{"input":{"usd":6,"diem":6},"cache_input":{"usd":0.6,"diem":0.6},"cache_write":{"usd":7.5,"diem":7.5},"output":{"usd":30,"diem":30}},"model_sets":["venice_recommendations"],"availableContextTokens":1000000,"maxCompletionTokens":128000,"capabilities":{"optimizedForCode":true,"quantization":"not-available","supportsAudioInput":false,"supportsFunctionCalling":true,"supportsLogProbs":false,"supportsMultipleImages":true,"maxImages":10,"supportsReasoning":true,"supportsReasoningEffort":true,"supportsResponseSchema":true,"supportsTeeAttestation":false,"supportsE2EE":false,"supportsVideoInput":false,"supportsVision":true,"supportsWebSearch":true,"supportsXSearch":false},"description":"Claude Opus 4.6 is Anthropic's most capable reasoning model, building on Opus 4.5 with enhanced performance across complex software engineering, agentic workflows, and long-horizon tasks. It features a 1M token context window, improved multimodal capabilities, and stronger robustness to prompt injection.","name":"Claude Opus 4.6","modelSource":"","offline":false,"privacy":"anonymized","traits":[]},"object":"model","owned_by":"venice.ai","type":"text"},{"created":1775606400,"id":"claude-opus-4-6-fast","model_spec":{"betaModel":true,"pricing":{"input":{"usd":36,"diem":36},"cache_input":{"usd":3.6,"diem":3.6},"cache_write":{"usd":45,"diem":45},"output":{"usd":180,"diem":180}},"availableContextTokens":1000000,"maxCompletionTokens":128000,"capabilities":{"optimizedForCode":true,"quantization":"not-available","supportsAudioInput":false,"supportsFunctionCalling":true,"supportsLogProbs":false,"supportsMultipleImages":true,"maxImages":10,"supportsReasoning":true,"supportsReasoningEffort":true,"supportsResponseSchema":true,"supportsTeeAttestation":false,"supportsE2EE":false,"supportsVideoInput":false,"supportsVision":true,"supportsWebSearch":true,"supportsXSearch":false},"description":"Claude Opus 4.6 (Fast) is a speed-optimized variant of Anthropic's most capable reasoning model, offering the same 1M token context window and strong performance across complex software engineering, agentic workflows, and long-horizon tasks — with lower latency via OpenRouter's optimized routing.","name":"Claude Opus 4.6 Fast","modelSource":"","offline":false,"privacy":"anonymized","traits":[]},"object":"model","owned_by":"venice.ai","type":"text"},{"created":1764979200,"id":"claude-opus-4-5","model_spec":{"pricing":{"input":{"usd":6,"diem":6},"cache_input":{"usd":0.6,"diem":0.6},"cache_write":{"usd":7.5,"diem":7.5},"output":{"usd":30,"diem":30}},"availableContextTokens":198000,"maxCompletionTokens":32768,"capabilities":{"optimizedForCode":true,"quantization":"not-available","supportsAudioInput":false,"supportsFunctionCalling":true,"supportsLogProbs":false,"supportsMultipleImages":true,"maxImages":10,"supportsReasoning":true,"supportsReasoningEffort":true,"supportsResponseSchema":true,"supportsTeeAttestation":false,"supportsE2EE":false,"supportsVideoInput":false,"supportsVision":true,"supportsWebSearch":true,"supportsXSearch":false},"description":"Claude Opus 4.5 is Anthropic's frontier reasoning model optimized for complex software engineering, agentic workflows, and long-horizon computer use. It offers strong multimodal capabilities, competitive performance across real-world coding and reasoning benchmarks, and improved robustness to prompt injection.","name":"Claude Opus 4.5","modelSource":"","offline":false,"privacy":"anonymized","traits":[]},"object":"model","owned_by":"venice.ai","type":"text"},{"created":1771286400,"id":"claude-sonnet-4-6","model_spec":{"betaModel":true,"pricing":{"input":{"usd":3.6,"diem":3.6},"cache_input":{"usd":0.36,"diem":0.36},"cache_write":{"usd":4.5,"diem":4.5},"output":{"usd":18,"diem":18}},"availableContextTokens":1000000,"maxCompletionTokens":64000,"capabilities":{"optimizedForCode":true,"quantization":"not-available","supportsAudioInput":false,"supportsFunctionCalling":true,"supportsLogProbs":false,"supportsMultipleImages":true,"maxImages":10,"supportsReasoning":true,"supportsReasoningEffort":true,"supportsResponseSchema":true,"supportsTeeAttestation":false,"supportsE2EE":false,"supportsVideoInput":false,"supportsVision":true,"supportsWebSearch":true,"supportsXSearch":false},"description":"Claude Sonnet 4.6 is Anthropic's best combination of speed and intelligence, offering strong performance on coding, reasoning, and general tasks with excellent speed and cost efficiency. It features a 1M token context window and 64K max output tokens.","name":"Claude Sonnet 4.6","modelSource":"","offline":false,"privacy":"anonymized","traits":[]},"object":"model","owned_by":"venice.ai","type":"text"},{"created":1736899200,"id":"claude-sonnet-4-5","model_spec":{"pricing":{"input":{"usd":3.75,"diem":3.75},"cache_input":{"usd":0.375,"diem":0.375},"cache_write":{"usd":4.69,"diem":4.69},"output":{"usd":18.75,"diem":18.75}},"availableContextTokens":198000,"maxCompletionTokens":64000,"capabilities":{"optimizedForCode":true,"quantization":"not-available","supportsAudioInput":false,"supportsFunctionCalling":true,"supportsLogProbs":false,"supportsMultipleImages":true,"maxImages":10,"supportsReasoning":true,"supportsReasoningEffort":true,"supportsResponseSchema":true,"supportsTeeAttestation":false,"supportsE2EE":false,"supportsVideoInput":false,"supportsVision":true,"supportsWebSearch":true,"supportsXSearch":false},"description":"Claude Sonnet 4.5 is Anthropic's balanced model offering strong performance on coding, reasoning, and general tasks with good speed and cost efficiency.","name":"Claude Sonnet 4.5","modelSource":"","offline":false,"privacy":"anonymized","traits":[]},"object":"model","owned_by":"venice.ai","type":"text"},{"created":1762387200,"id":"openai-gpt-oss-120b","model_spec":{"pricing":{"input":{"usd":0.07,"diem":0.07},"output":{"usd":0.3,"diem":0.3}},"availableContextTokens":128000,"maxCompletionTokens":16384,"capabilities":{"optimizedForCode":false,"quantization":"not-available","supportsAudioInput":false,"supportsFunctionCalling":true,"supportsLogProbs":false,"supportsMultipleImages":false,"supportsReasoning":false,"supportsReasoningEffort":false,"supportsResponseSchema":false,"supportsTeeAttestation":false,"supportsE2EE":false,"supportsVideoInput":false,"supportsVision":false,"supportsWebSearch":true,"supportsXSearch":false},"description":"gpt-oss-120b is an open-weight, 117B-parameter Mixture-of-Experts (MoE) language model from OpenAI designed for high-reasoning, agentic, and general-purpose production use cases. The model supports configurable reasoning depth, full chain-of-thought access, and native tool use, including function calling, browsing, and structured output generation","name":"OpenAI GPT OSS 120B","modelSource":"https://huggingface.co/openai/gpt-oss-120b","offline":false,"privacy":"private","traits":[]},"object":"model","owned_by":"venice.ai","type":"text"},{"created":1765324800,"id":"kimi-k2-thinking","model_spec":{"pricing":{"input":{"usd":0.75,"diem":0.75},"cache_input":{"usd":0.375,"diem":0.375},"output":{"usd":3.2,"diem":3.2}},"availableContextTokens":256000,"maxCompletionTokens":65536,"capabilities":{"optimizedForCode":true,"quantization":"int4","supportsAudioInput":false,"supportsFunctionCalling":true,"supportsLogProbs":false,"supportsMultipleImages":false,"supportsReasoning":true,"supportsReasoningEffort":true,"supportsResponseSchema":true,"supportsTeeAttestation":false,"supportsE2EE":false,"supportsVideoInput":false,"supportsVision":false,"supportsWebSearch":true,"supportsXSearch":false},"deprecation":{"date":"2026-05-06T00:00:00.000Z"},"description":"Kimi K2 Thinking is Moonshot AIs most advanced open reasoning model to date, extending the K2 series into agentic, long-horizon reasoning. Built on the trillion-parameter Mixture-of-Experts (MoE) architecture introduced in Kimi K2, it activates 32 billion parameters per forward pass and supports 256 k-token context windows.","name":"Kimi K2 Thinking","modelSource":"https://huggingface.co/moonshotai/Kimi-K2-Thinking","offline":false,"privacy":"private","traits":[]},"object":"model","owned_by":"venice.ai","type":"text"},{"created":1769548800,"id":"kimi-k2-5","model_spec":{"pricing":{"input":{"usd":0.56,"diem":0.56},"cache_input":{"usd":0.11,"diem":0.11},"output":{"usd":3.5,"diem":3.5}},"model_sets":["venice_recommendations"],"availableContextTokens":256000,"maxCompletionTokens":65536,"capabilities":{"optimizedForCode":true,"quantization":"not-available","supportsAudioInput":false,"supportsFunctionCalling":true,"supportsLogProbs":false,"supportsMultipleImages":true,"maxImages":10,"supportsReasoning":true,"supportsReasoningEffort":true,"supportsResponseSchema":true,"supportsTeeAttestation":false,"supportsE2EE":false,"supportsVideoInput":false,"supportsVision":true,"supportsWebSearch":true,"supportsXSearch":false},"description":"Kimi K2.5 is Moonshot AIs most advanced open reasoning model, featuring trillion-parameter Mixture-of-Experts architecture with 32B active parameters and 256K context windows.","name":"Kimi K2.5","modelSource":"","offline":false,"privacy":"private","traits":[]},"object":"model","owned_by":"venice.ai","type":"text"},{"created":1764806400,"id":"deepseek-v3.2","model_spec":{"pricing":{"input":{"usd":0.33,"diem":0.33},"cache_input":{"usd":0.16,"diem":0.16},"output":{"usd":0.48,"diem":0.48}},"model_sets":["venice_recommendations"],"availableContextTokens":160000,"maxCompletionTokens":32768,"capabilities":{"optimizedForCode":false,"quantization":"not-available","supportsAudioInput":false,"supportsFunctionCalling":true,"supportsLogProbs":false,"supportsMultipleImages":false,"supportsReasoning":true,"supportsReasoningEffort":false,"supportsResponseSchema":true,"supportsTeeAttestation":false,"supportsE2EE":false,"supportsVideoInput":false,"supportsVision":false,"supportsWebSearch":true,"supportsXSearch":false},"description":"DeepSeek-V3.2 is an efficient large language model with DeepSeek Sparse Attention (DSA) for long contexts. It features strong reasoning and tool-use skills, achieving top results on the 2025 IMO and IOI.","name":"DeepSeek V3.2","modelSource":"https://huggingface.co/deepseek-ai/DeepSeek-V3.2","offline":false,"privacy":"private","traits":[]},"object":"model","owned_by":"venice.ai","type":"text"},{"created":1774310400,"id":"aion-labs-aion-2-0","model_spec":{"pricing":{"input":{"usd":1,"diem":1},"cache_input":{"usd":0.25,"diem":0.25},"output":{"usd":2,"diem":2}},"availableContextTokens":128000,"maxCompletionTokens":32768,"capabilities":{"optimizedForCode":false,"quantization":"not-available","supportsAudioInput":false,"supportsFunctionCalling":false,"supportsLogProbs":false,"supportsMultipleImages":false,"supportsReasoning":true,"supportsReasoningEffort":true,"supportsResponseSchema":false,"supportsTeeAttestation":false,"supportsE2EE":false,"supportsVideoInput":false,"supportsVision":false,"supportsWebSearch":true,"supportsXSearch":false},"description":"Aion 2.0 is a DeepSeek V3.2-based model fine-tuned for immersive roleplaying and long-form storytelling. It excels at introducing tension, crises, and meaningful conflict into narratives, keeping stories unpredictable and deeply engaging. The model handles mature and darker themes with exceptional nuance, avoiding sanitized or surface-level treatment. Available as an early alpha—your feedback helps shape its development.","name":"Aion 2.0","modelSource":"","offline":false,"privacy":"anonymized","traits":[]},"object":"model","owned_by":"venice.ai","type":"text"},{"created":1727966436,"id":"llama-3.2-3b","model_spec":{"pricing":{"input":{"usd":0.15,"diem":0.15},"output":{"usd":0.6,"diem":0.6}},"availableContextTokens":128000,"maxCompletionTokens":4096,"capabilities":{"optimizedForCode":false,"quantization":"fp16","supportsAudioInput":false,"supportsFunctionCalling":true,"supportsLogProbs":false,"supportsMultipleImages":false,"supportsReasoning":false,"supportsReasoningEffort":false,"supportsResponseSchema":false,"supportsTeeAttestation":false,"supportsE2EE":false,"supportsVideoInput":false,"supportsVision":false,"supportsWebSearch":true,"supportsXSearch":false},"name":"Llama 3.2 3B","modelSource":"https://huggingface.co/meta-llama/Llama-3.2-3B","offline":false,"privacy":"private","traits":[]},"object":"model","owned_by":"venice.ai","type":"text"},{"created":1743897600,"id":"llama-3.3-70b","model_spec":{"pricing":{"input":{"usd":0.7,"diem":0.7},"output":{"usd":2.8,"diem":2.8}},"availableContextTokens":128000,"maxCompletionTokens":4096,"capabilities":{"optimizedForCode":false,"quantization":"fp8","supportsAudioInput":false,"supportsFunctionCalling":true,"supportsLogProbs":false,"supportsMultipleImages":false,"supportsReasoning":false,"supportsReasoningEffort":false,"supportsResponseSchema":false,"supportsTeeAttestation":false,"supportsE2EE":false,"supportsVideoInput":false,"supportsVision":false,"supportsWebSearch":true,"supportsXSearch":false},"name":"Llama 3.3 70B","modelSource":"https://huggingface.co/meta-llama/Llama-3.3-70B-Instruct","offline":false,"privacy":"private","traits":[]},"object":"model","owned_by":"venice.ai","type":"text"},{"created":1765584000,"id":"openai-gpt-52","model_spec":{"pricing":{"input":{"usd":2.19,"diem":2.19},"cache_input":{"usd":0.219,"diem":0.219},"output":{"usd":17.5,"diem":17.5}},"model_sets":["venice_recommendations"],"availableContextTokens":256000,"maxCompletionTokens":65536,"capabilities":{"optimizedForCode":false,"quantization":"not-available","supportsAudioInput":false,"supportsFunctionCalling":true,"supportsLogProbs":false,"supportsMultipleImages":false,"supportsReasoning":true,"supportsReasoningEffort":true,"supportsResponseSchema":true,"supportsTeeAttestation":false,"supportsE2EE":false,"supportsVideoInput":false,"supportsVision":false,"supportsWebSearch":true,"supportsXSearch":false},"description":"GPT-5.2 is the latest frontier-grade model in the GPT-5 series, offering stronger agentic and long context performance compared to GPT-5.1. It uses adaptive reasoning to allocate computation dynamically, responding quickly to simple queries while spending more depth on complex tasks.","name":"GPT-5.2","modelSource":"","offline":false,"privacy":"anonymized","traits":[]},"object":"model","owned_by":"venice.ai","type":"text"},{"created":1736899200,"id":"openai-gpt-52-codex","model_spec":{"pricing":{"input":{"usd":2.19,"diem":2.19},"cache_input":{"usd":0.219,"diem":0.219},"output":{"usd":17.5,"diem":17.5}},"availableContextTokens":256000,"maxCompletionTokens":65536,"capabilities":{"optimizedForCode":true,"quantization":"not-available","supportsAudioInput":false,"supportsFunctionCalling":true,"supportsLogProbs":false,"supportsMultipleImages":true,"maxImages":10,"supportsReasoning":true,"supportsReasoningEffort":true,"supportsResponseSchema":true,"supportsTeeAttestation":false,"supportsE2EE":false,"supportsVideoInput":false,"supportsVision":true,"supportsWebSearch":true,"supportsXSearch":false},"description":"GPT-5.2 Codex is OpenAI specialized coding model built on GPT-5.2, optimized for advanced software development, code generation, and technical problem-solving.","name":"GPT-5.2 Codex","modelSource":"","offline":false,"privacy":"anonymized","traits":[]},"object":"model","owned_by":"venice.ai","type":"text"},{"created":1771891200,"id":"openai-gpt-53-codex","model_spec":{"betaModel":true,"pricing":{"input":{"usd":2.19,"diem":2.19},"cache_input":{"usd":0.219,"diem":0.219},"output":{"usd":17.5,"diem":17.5}},"availableContextTokens":400000,"maxCompletionTokens":128000,"capabilities":{"optimizedForCode":true,"quantization":"not-available","supportsAudioInput":false,"supportsFunctionCalling":true,"supportsLogProbs":false,"supportsMultipleImages":true,"maxImages":10,"supportsReasoning":true,"supportsReasoningEffort":true,"supportsResponseSchema":true,"supportsTeeAttestation":false,"supportsE2EE":false,"supportsVideoInput":false,"supportsVision":true,"supportsWebSearch":true,"supportsXSearch":false},"description":"GPT-5.3 Codex is OpenAI specialized coding model built on GPT-5.3, optimized for advanced software development, code generation, and technical problem-solving.","name":"GPT-5.3 Codex","modelSource":"","offline":false,"privacy":"anonymized","traits":[]},"object":"model","owned_by":"venice.ai","type":"text"},{"created":1772668800,"id":"openai-gpt-54","model_spec":{"betaModel":true,"pricing":{"input":{"usd":3.13,"diem":3.13},"cache_input":{"usd":0.313,"diem":0.313},"output":{"usd":18.8,"diem":18.8}},"availableContextTokens":1000000,"maxCompletionTokens":131072,"capabilities":{"optimizedForCode":false,"quantization":"not-available","supportsAudioInput":false,"supportsFunctionCalling":true,"supportsLogProbs":false,"supportsMultipleImages":true,"maxImages":10,"supportsReasoning":true,"supportsReasoningEffort":true,"supportsResponseSchema":true,"supportsTeeAttestation":false,"supportsE2EE":false,"supportsVideoInput":false,"supportsVision":true,"supportsWebSearch":true,"supportsXSearch":false},"description":"GPT-5.4 is the latest frontier model in the GPT-5 series with a 1M+ context window, offering improved agentic and long context performance. It uses adaptive reasoning to dynamically allocate computation across tasks.","name":"GPT-5.4","modelSource":"","offline":false,"privacy":"anonymized","traits":[]},"object":"model","owned_by":"venice.ai","type":"text"},{"created":1772668800,"id":"openai-gpt-54-pro","model_spec":{"betaModel":true,"pricing":{"input":{"usd":37.5,"diem":37.5},"output":{"usd":225,"diem":225},"extended":{"context_token_threshold":272000,"input":{"usd":75,"diem":75},"output":{"usd":337.5,"diem":337.5}}},"availableContextTokens":1000000,"maxCompletionTokens":128000,"capabilities":{"optimizedForCode":false,"quantization":"not-available","supportsAudioInput":false,"supportsFunctionCalling":true,"supportsLogProbs":false,"supportsMultipleImages":true,"maxImages":10,"supportsReasoning":true,"supportsReasoningEffort":true,"supportsResponseSchema":true,"supportsTeeAttestation":false,"supportsE2EE":false,"supportsVideoInput":false,"supportsVision":true,"supportsWebSearch":true,"supportsXSearch":false},"description":"GPT-5.4 Pro is OpenAI's most advanced model, building on GPT-5.4's unified architecture with enhanced reasoning for complex, high-stakes tasks. It provides a 1M+ token context window (922K input, 128K output) and supports text and image inputs.","name":"GPT-5.4 Pro","modelSource":"","offline":false,"privacy":"anonymized","traits":[]},"object":"model","owned_by":"venice.ai","type":"text"},{"created":1774569600,"id":"openai-gpt-54-mini","model_spec":{"betaModel":true,"pricing":{"input":{"usd":0.9375,"diem":0.9375},"cache_input":{"usd":0.09375,"diem":0.09375},"output":{"usd":5.625,"diem":5.625}},"availableContextTokens":400000,"maxCompletionTokens":128000,"capabilities":{"optimizedForCode":false,"quantization":"not-available","supportsAudioInput":false,"supportsFunctionCalling":true,"supportsLogProbs":false,"supportsMultipleImages":true,"maxImages":10,"supportsReasoning":true,"supportsReasoningEffort":true,"supportsResponseSchema":true,"supportsTeeAttestation":false,"supportsE2EE":false,"supportsVideoInput":false,"supportsVision":true,"supportsWebSearch":true,"supportsXSearch":false},"description":"GPT-5.4 Mini brings the core capabilities of GPT-5.4 to a faster, more efficient model optimized for high-throughput workloads. It supports text and image inputs with strong performance across reasoning, coding, and tool use.","name":"GPT-5.4 Mini","modelSource":"","offline":false,"privacy":"anonymized","traits":[]},"object":"model","owned_by":"venice.ai","type":"text"},{"created":1772236800,"id":"openai-gpt-4o-2024-11-20","model_spec":{"pricing":{"input":{"usd":3.125,"diem":3.125},"output":{"usd":12.5,"diem":12.5}},"availableContextTokens":128000,"maxCompletionTokens":16384,"capabilities":{"optimizedForCode":false,"quantization":"not-available","supportsAudioInput":false,"supportsFunctionCalling":true,"supportsLogProbs":true,"supportsMultipleImages":true,"maxImages":10,"supportsReasoning":false,"supportsReasoningEffort":false,"supportsResponseSchema":true,"supportsTeeAttestation":false,"supportsE2EE":false,"supportsVideoInput":false,"supportsVision":true,"supportsWebSearch":true,"supportsXSearch":false},"description":"OpenAI's multimodal flagship model with vision capabilities, strong reasoning, and broad knowledge. Popular for its balanced performance across tasks. Version: 2024-11-20.","name":"GPT-4o","modelSource":"","offline":false,"privacy":"anonymized","traits":[]},"object":"model","owned_by":"venice.ai","type":"text"},{"created":1772236800,"id":"openai-gpt-4o-mini-2024-07-18","model_spec":{"pricing":{"input":{"usd":0.1875,"diem":0.1875},"cache_input":{"usd":0.09375,"diem":0.09375},"output":{"usd":0.75,"diem":0.75}},"availableContextTokens":128000,"maxCompletionTokens":16384,"capabilities":{"optimizedForCode":false,"quantization":"not-available","supportsAudioInput":false,"supportsFunctionCalling":true,"supportsLogProbs":true,"supportsMultipleImages":true,"maxImages":10,"supportsReasoning":false,"supportsReasoningEffort":false,"supportsResponseSchema":true,"supportsTeeAttestation":false,"supportsE2EE":false,"supportsVideoInput":false,"supportsVision":true,"supportsWebSearch":true,"supportsXSearch":false},"description":"OpenAI's cost-efficient small model that delivers GPT-4 level intelligence at a fraction of the cost. Ideal for high-volume applications requiring strong reasoning. Version: 2024-07-18.","name":"GPT-4o Mini","modelSource":"","offline":false,"privacy":"anonymized","traits":[]},"object":"model","owned_by":"venice.ai","type":"text"},{"created":1764547200,"id":"minimax-m21","model_spec":{"pricing":{"input":{"usd":0.35,"diem":0.35},"cache_input":{"usd":0.04,"diem":0.04},"output":{"usd":1.5,"diem":1.5}},"availableContextTokens":198000,"maxCompletionTokens":32768,"capabilities":{"optimizedForCode":true,"quantization":"not-available","supportsAudioInput":false,"supportsFunctionCalling":true,"supportsLogProbs":true,"supportsMultipleImages":false,"supportsReasoning":true,"supportsReasoningEffort":true,"supportsResponseSchema":true,"supportsTeeAttestation":false,"supportsE2EE":false,"supportsVideoInput":false,"supportsVision":false,"supportsWebSearch":true,"supportsXSearch":false},"description":"MiniMax-M2.1 is a lightweight, state-of-the-art large language model optimized for coding, agentic workflows, and modern application development.","name":"MiniMax M2.1","modelSource":"https://huggingface.co/MiniMaxAI/MiniMax-M2.1","offline":false,"privacy":"private","traits":[]},"object":"model","owned_by":"venice.ai","type":"text"},{"created":1770854400,"id":"minimax-m25","model_spec":{"pricing":{"input":{"usd":0.34,"diem":0.34},"cache_input":{"usd":0.04,"diem":0.04},"output":{"usd":1.19,"diem":1.19}},"availableContextTokens":198000,"maxCompletionTokens":32768,"capabilities":{"optimizedForCode":true,"quantization":"not-available","supportsAudioInput":false,"supportsFunctionCalling":true,"supportsLogProbs":false,"supportsMultipleImages":false,"supportsReasoning":true,"supportsReasoningEffort":true,"supportsResponseSchema":false,"supportsTeeAttestation":false,"supportsE2EE":false,"supportsVideoInput":false,"supportsVision":false,"supportsWebSearch":true,"supportsXSearch":false},"description":"MiniMax-M2.5 is a state-of-the-art large language model optimized for coding, agentic workflows, and modern application development with enhanced reasoning capabilities.","name":"MiniMax M2.5","offline":false,"privacy":"private","traits":[]},"object":"model","owned_by":"venice.ai","type":"text"},{"created":1773792000,"id":"minimax-m27","model_spec":{"pricing":{"input":{"usd":0.375,"diem":0.375},"cache_input":{"usd":0.075,"diem":0.075},"output":{"usd":1.5,"diem":1.5}},"availableContextTokens":198000,"maxCompletionTokens":32768,"capabilities":{"optimizedForCode":true,"quantization":"not-available","supportsAudioInput":false,"supportsFunctionCalling":true,"supportsLogProbs":false,"supportsMultipleImages":false,"supportsReasoning":true,"supportsReasoningEffort":true,"supportsResponseSchema":false,"supportsTeeAttestation":false,"supportsE2EE":false,"supportsVideoInput":false,"supportsVision":false,"supportsWebSearch":true,"supportsXSearch":false},"description":"MiniMax-M2.7 is a next-generation large language model designed for autonomous, real-world productivity with advanced agentic capabilities through multi-agent collaboration.","name":"MiniMax M2.7","offline":false,"privacy":"anonymized","traits":[]},"object":"model","owned_by":"venice.ai","type":"text"},{"created":1771545600,"id":"mercury-2","model_spec":{"betaModel":true,"pricing":{"input":{"usd":0.3125,"diem":0.3125},"cache_input":{"usd":0.03125,"diem":0.03125},"output":{"usd":0.9375,"diem":0.9375}},"availableContextTokens":128000,"maxCompletionTokens":50000,"capabilities":{"optimizedForCode":false,"quantization":"not-available","supportsAudioInput":false,"supportsFunctionCalling":true,"supportsLogProbs":false,"supportsMultipleImages":false,"supportsReasoning":true,"supportsReasoningEffort":true,"supportsResponseSchema":true,"supportsTeeAttestation":false,"supportsE2EE":false,"supportsVideoInput":false,"supportsVision":false,"supportsWebSearch":true,"supportsXSearch":false},"description":"Mercury 2 is a diffusion-based reasoning LLM from Inception, delivering over 1,000 tokens per second — 5x faster than leading speed-optimized models — with strong reasoning, tool use, and structured output capabilities.","name":"Mercury 2","modelSource":"","offline":false,"privacy":"anonymized","traits":[]},"object":"model","owned_by":"venice.ai","type":"text"},{"created":1769472000,"id":"nvidia-nemotron-3-nano-30b-a3b","model_spec":{"betaModel":true,"pricing":{"input":{"usd":0.075,"diem":0.075},"output":{"usd":0.3,"diem":0.3}},"availableContextTokens":128000,"maxCompletionTokens":16384,"capabilities":{"optimizedForCode":false,"quantization":"fp8","supportsAudioInput":false,"supportsFunctionCalling":true,"supportsLogProbs":false,"supportsMultipleImages":false,"supportsReasoning":false,"supportsReasoningEffort":false,"supportsResponseSchema":true,"supportsTeeAttestation":false,"supportsE2EE":false,"supportsVideoInput":false,"supportsVision":false,"supportsWebSearch":true,"supportsXSearch":false},"description":"NVIDIA Nemotron 3 Nano 30B is a compact and efficient language model from NVIDIA, optimized for fast inference while maintaining strong performance across diverse tasks.","name":"NVIDIA Nemotron 3 Nano 30B","modelSource":"https://huggingface.co/nvidia/Nemotron-3-Nano-30B-A3B","offline":false,"privacy":"private","traits":[]},"object":"model","owned_by":"venice.ai","type":"text"},{"created":1774310400,"id":"nvidia-nemotron-cascade-2-30b-a3b","model_spec":{"betaModel":true,"pricing":{"input":{"usd":0.14,"diem":0.14},"output":{"usd":0.8,"diem":0.8}},"availableContextTokens":256000,"maxCompletionTokens":32768,"capabilities":{"optimizedForCode":false,"quantization":"fp8","supportsAudioInput":false,"supportsFunctionCalling":true,"supportsLogProbs":false,"supportsMultipleImages":false,"supportsReasoning":true,"supportsReasoningEffort":false,"supportsResponseSchema":true,"supportsTeeAttestation":false,"supportsE2EE":false,"supportsVideoInput":false,"supportsVision":false,"supportsWebSearch":true,"supportsXSearch":false},"description":"Nemotron Cascade 2 30B A3B is a reasoning-optimized language model from NVIDIA, designed for efficient inference with strong reasoning capabilities across complex tasks.","name":"Nemotron Cascade 2 30B A3B","modelSource":"https://huggingface.co/nvidia/Nemotron-Cascade-2-30B-A3B","offline":false,"privacy":"private","traits":[]},"object":"model","owned_by":"venice.ai","type":"text"},{"created":1773792000,"id":"e2ee-venice-uncensored-24b-p","model_spec":{"betaModel":true,"pricing":{"input":{"usd":0.25,"diem":0.25},"output":{"usd":1.15,"diem":1.15}},"availableContextTokens":32000,"maxCompletionTokens":4096,"capabilities":{"optimizedForCode":false,"quantization":"not-available","supportsAudioInput":false,"supportsFunctionCalling":false,"supportsLogProbs":false,"supportsMultipleImages":false,"supportsReasoning":false,"supportsReasoningEffort":false,"supportsResponseSchema":false,"supportsTeeAttestation":true,"supportsE2EE":true,"supportsVideoInput":false,"supportsVision":false,"supportsWebSearch":true,"supportsXSearch":false},"description":"Venice Uncensored 1.1 running in a Trusted Execution Environment (TEE). Hardware attestation evidence is available for independent verification of enclave identity and configuration.","name":"Venice Uncensored 1.1","modelSource":"https://huggingface.co/cognitivecomputations/Dolphin-Mistral-24B-Venice-Edition","offline":false,"privacy":"private","traits":[]},"object":"model","owned_by":"venice.ai","type":"text"},{"created":1773792000,"id":"e2ee-gemma-3-27b-p","model_spec":{"betaModel":true,"pricing":{"input":{"usd":0.14,"diem":0.14},"output":{"usd":0.5,"diem":0.5}},"availableContextTokens":40000,"maxCompletionTokens":4096,"capabilities":{"optimizedForCode":false,"quantization":"not-available","supportsAudioInput":false,"supportsFunctionCalling":false,"supportsLogProbs":false,"supportsMultipleImages":false,"supportsReasoning":false,"supportsReasoningEffort":false,"supportsResponseSchema":false,"supportsTeeAttestation":true,"supportsE2EE":true,"supportsVideoInput":false,"supportsVision":false,"supportsWebSearch":true,"supportsXSearch":false},"description":"Gemma 3 27B running in a Trusted Execution Environment (TEE). Google's multimodal model supporting vision-language input with 140+ language understanding, with hardware attestation evidence available for independent verification.","name":"Gemma 3 27B","modelSource":"https://huggingface.co/google/gemma-3-27b-it","offline":false,"privacy":"private","traits":[]},"object":"model","owned_by":"venice.ai","type":"text"},{"created":1773792000,"id":"e2ee-glm-4-7-p","model_spec":{"betaModel":true,"pricing":{"input":{"usd":1.1,"diem":1.1},"output":{"usd":4.15,"diem":4.15}},"availableContextTokens":128000,"maxCompletionTokens":32768,"capabilities":{"optimizedForCode":true,"quantization":"not-available","supportsAudioInput":false,"supportsFunctionCalling":false,"supportsLogProbs":false,"supportsMultipleImages":false,"supportsReasoning":true,"supportsReasoningEffort":false,"supportsResponseSchema":false,"supportsTeeAttestation":true,"supportsE2EE":true,"supportsVideoInput":false,"supportsVision":false,"supportsWebSearch":true,"supportsXSearch":false},"description":"GLM 4.7 running in a Trusted Execution Environment (TEE). Z.AI's flagship model with enhanced programming capabilities and stable multi-step reasoning, with hardware attestation evidence available for independent verification.","name":"GLM 4.7","modelSource":"","offline":false,"privacy":"private","traits":[]},"object":"model","owned_by":"venice.ai","type":"text"},{"created":1773792000,"id":"e2ee-glm-4-7-flash-p","model_spec":{"betaModel":true,"pricing":{"input":{"usd":0.13,"diem":0.13},"output":{"usd":0.55,"diem":0.55}},"availableContextTokens":198000,"maxCompletionTokens":4096,"capabilities":{"optimizedForCode":true,"quantization":"not-available","supportsAudioInput":false,"supportsFunctionCalling":false,"supportsLogProbs":false,"supportsMultipleImages":false,"supportsReasoning":false,"supportsReasoningEffort":false,"supportsResponseSchema":false,"supportsTeeAttestation":true,"supportsE2EE":true,"supportsVideoInput":false,"supportsVision":false,"supportsWebSearch":true,"supportsXSearch":false},"description":"GLM 4.7 Flash running in a Trusted Execution Environment (TEE). A 30B-class model optimized for agentic coding with strong long-horizon task planning, with hardware attestation evidence available for independent verification.","name":"GLM 4.7 Flash","modelSource":"","offline":false,"privacy":"private","traits":[]},"object":"model","owned_by":"venice.ai","type":"text"},{"created":1773792000,"id":"e2ee-gpt-oss-20b-p","model_spec":{"betaModel":true,"pricing":{"input":{"usd":0.05,"diem":0.05},"output":{"usd":0.19,"diem":0.19}},"availableContextTokens":128000,"maxCompletionTokens":32768,"capabilities":{"optimizedForCode":false,"quantization":"not-available","supportsAudioInput":false,"supportsFunctionCalling":false,"supportsLogProbs":false,"supportsMultipleImages":false,"supportsReasoning":true,"supportsReasoningEffort":false,"supportsResponseSchema":false,"supportsTeeAttestation":true,"supportsE2EE":true,"supportsVideoInput":false,"supportsVision":false,"supportsWebSearch":true,"supportsXSearch":false},"description":"GPT OSS 20B running in a Trusted Execution Environment (TEE). OpenAI's compact open-weight 21B MoE model with 3.6B active parameters, optimized for lower-latency inference, with hardware attestation evidence available for independent verification.","name":"GPT OSS 20B","modelSource":"https://huggingface.co/openai/gpt-oss-20b","offline":false,"privacy":"private","traits":[]},"object":"model","owned_by":"venice.ai","type":"text"},{"created":1773792000,"id":"e2ee-gpt-oss-120b-p","model_spec":{"betaModel":true,"pricing":{"input":{"usd":0.13,"diem":0.13},"output":{"usd":0.65,"diem":0.65}},"availableContextTokens":128000,"maxCompletionTokens":32768,"capabilities":{"optimizedForCode":false,"quantization":"not-available","supportsAudioInput":false,"supportsFunctionCalling":false,"supportsLogProbs":false,"supportsMultipleImages":false,"supportsReasoning":true,"supportsReasoningEffort":false,"supportsResponseSchema":false,"supportsTeeAttestation":true,"supportsE2EE":true,"supportsVideoInput":false,"supportsVision":false,"supportsWebSearch":true,"supportsXSearch":false},"description":"GPT OSS 120B running in a Trusted Execution Environment (TEE). OpenAI's open-weight 117B-parameter MoE model with configurable reasoning depth and native tool use, with hardware attestation evidence available for independent verification.","name":"GPT OSS 120B","modelSource":"https://huggingface.co/openai/gpt-oss-120b","offline":false,"privacy":"private","traits":[]},"object":"model","owned_by":"venice.ai","type":"text"},{"created":1773792000,"id":"e2ee-qwen-2-5-7b-p","model_spec":{"betaModel":true,"pricing":{"input":{"usd":0.05,"diem":0.05},"output":{"usd":0.13,"diem":0.13}},"availableContextTokens":32000,"maxCompletionTokens":4096,"capabilities":{"optimizedForCode":false,"quantization":"not-available","supportsAudioInput":false,"supportsFunctionCalling":false,"supportsLogProbs":false,"supportsMultipleImages":false,"supportsReasoning":false,"supportsReasoningEffort":false,"supportsResponseSchema":false,"supportsTeeAttestation":true,"supportsE2EE":true,"supportsVideoInput":false,"supportsVision":false,"supportsWebSearch":true,"supportsXSearch":false},"description":"Qwen 2.5 7B Instruct running in a Trusted Execution Environment (TEE). A compact model with strong coding, math, and multilingual capabilities supporting 29+ languages, with hardware attestation evidence available for independent verification.","name":"Qwen 2.5 7B","modelSource":"https://huggingface.co/Qwen/Qwen2.5-7B-Instruct","offline":false,"privacy":"private","traits":[]},"object":"model","owned_by":"venice.ai","type":"text"},{"created":1773792000,"id":"e2ee-qwen3-30b-a3b-p","model_spec":{"betaModel":true,"pricing":{"input":{"usd":0.19,"diem":0.19},"output":{"usd":0.69,"diem":0.69}},"availableContextTokens":256000,"maxCompletionTokens":32768,"capabilities":{"optimizedForCode":false,"quantization":"not-available","supportsAudioInput":false,"supportsFunctionCalling":true,"supportsLogProbs":false,"supportsMultipleImages":false,"supportsReasoning":false,"supportsReasoningEffort":false,"supportsResponseSchema":false,"supportsTeeAttestation":true,"supportsE2EE":true,"supportsVideoInput":false,"supportsVision":false,"supportsWebSearch":true,"supportsXSearch":false},"description":"Qwen3 30B A3B running in a Trusted Execution Environment (TEE). A MoE model with 30.5B total parameters and 3.3B activated per inference, supporting ultra-long 256K context, with hardware attestation evidence available for independent verification.","name":"Qwen3 30B A3B","modelSource":"https://huggingface.co/Qwen/Qwen3-30B-A3B-Instruct-2507","offline":false,"privacy":"private","traits":[]},"object":"model","owned_by":"venice.ai","type":"text"},{"created":1773792000,"id":"e2ee-qwen3-vl-30b-a3b-p","model_spec":{"betaModel":true,"pricing":{"input":{"usd":0.25,"diem":0.25},"output":{"usd":0.9,"diem":0.9}},"availableContextTokens":128000,"maxCompletionTokens":4096,"capabilities":{"optimizedForCode":false,"quantization":"not-available","supportsAudioInput":false,"supportsFunctionCalling":true,"supportsLogProbs":false,"supportsMultipleImages":false,"supportsReasoning":false,"supportsReasoningEffort":false,"supportsResponseSchema":false,"supportsTeeAttestation":true,"supportsE2EE":true,"supportsVideoInput":false,"supportsVision":true,"supportsWebSearch":true,"supportsXSearch":false},"description":"Qwen3 VL 30B A3B running in a Trusted Execution Environment (TEE). A multimodal model unifying text generation with visual understanding for images and videos, with hardware attestation evidence available for independent verification.","name":"Qwen3 VL 30B A3B","modelSource":"https://huggingface.co/Qwen/Qwen3-VL-30B-A3B-Instruct","offline":false,"privacy":"private","traits":[]},"object":"model","owned_by":"venice.ai","type":"text"},{"created":1773792000,"id":"e2ee-glm-5","model_spec":{"betaModel":true,"pricing":{"input":{"usd":1.1,"diem":1.1},"output":{"usd":4.15,"diem":4.15}},"availableContextTokens":198000,"maxCompletionTokens":32768,"capabilities":{"optimizedForCode":false,"quantization":"fp8","supportsAudioInput":false,"supportsFunctionCalling":false,"supportsLogProbs":false,"supportsMultipleImages":false,"supportsReasoning":true,"supportsReasoningEffort":false,"supportsResponseSchema":false,"supportsTeeAttestation":true,"supportsE2EE":true,"supportsVideoInput":false,"supportsVision":false,"supportsWebSearch":true,"supportsXSearch":false},"description":"GLM 5 running in a Trusted Execution Environment (TEE). Hardware attestation evidence is available for independent verification of enclave identity and configuration.","name":"GLM 5","modelSource":"https://huggingface.co/zai-org/GLM-5","offline":false,"privacy":"private","traits":[]},"object":"model","owned_by":"venice.ai","type":"text"},{"created":1773792000,"id":"e2ee-qwen3-5-122b-a10b","model_spec":{"betaModel":true,"pricing":{"input":{"usd":0.5,"diem":0.5},"output":{"usd":4,"diem":4}},"availableContextTokens":128000,"maxCompletionTokens":32768,"capabilities":{"optimizedForCode":false,"quantization":"not-available","supportsAudioInput":false,"supportsFunctionCalling":true,"supportsLogProbs":false,"supportsMultipleImages":true,"maxImages":10,"supportsReasoning":true,"supportsReasoningEffort":false,"supportsResponseSchema":false,"supportsTeeAttestation":true,"supportsE2EE":true,"supportsVideoInput":true,"supportsVision":true,"supportsWebSearch":true,"supportsXSearch":false},"description":"Qwen3.5 122B A10B running in a Trusted Execution Environment (TEE). Hardware attestation evidence is available for independent verification of enclave identity and configuration.","name":"Qwen3.5 122B A10B","modelSource":"https://huggingface.co/Qwen/Qwen3.5-122B-A10B","offline":false,"privacy":"private","traits":[]},"object":"model","owned_by":"venice.ai","type":"text"}],"object":"list","type":"text"}