From 004a32bdf012f8fa12bf04a1b2c293ec156b71c2 Mon Sep 17 00:00:00 2001 From: babayaga Date: Sat, 22 Feb 2025 14:03:11 +0100 Subject: [PATCH] maintainence love:) --- packages/osr-code-bot/dist/package-lock.json | 4 +- packages/osr-code-bot/dist/package.json | 2 +- .../osr-code-bot/dist/stats/statistics.html | 2 +- .../models/data/openai_models.json | 2 +- .../models/data/openrouter_models.json | 1320 ++++++++--------- 5 files changed, 665 insertions(+), 665 deletions(-) diff --git a/packages/osr-code-bot/dist/package-lock.json b/packages/osr-code-bot/dist/package-lock.json index 4465406..ab3ffff 100644 --- a/packages/osr-code-bot/dist/package-lock.json +++ b/packages/osr-code-bot/dist/package-lock.json @@ -1,12 +1,12 @@ { "name": "@plastichub/kbot", - "version": "1.1.16", + "version": "1.1.17", "lockfileVersion": 3, "requires": true, "packages": { "": { "name": "@plastichub/kbot", - "version": "1.1.16", + "version": "1.1.17", "license": "ISC", "dependencies": { "node-emoji": "^2.2.0" diff --git a/packages/osr-code-bot/dist/package.json b/packages/osr-code-bot/dist/package.json index a5a46e0..1a64c21 100644 --- a/packages/osr-code-bot/dist/package.json +++ b/packages/osr-code-bot/dist/package.json @@ -1,6 +1,6 @@ { "name": "@plastichub/kbot", - "version": "1.1.16", + "version": "1.1.17", "main": "main_node.js", "author": "", "license": "ISC", diff --git a/packages/osr-code-bot/dist/stats/statistics.html b/packages/osr-code-bot/dist/stats/statistics.html index 8763a46..3ba075a 100644 --- a/packages/osr-code-bot/dist/stats/statistics.html +++ b/packages/osr-code-bot/dist/stats/statistics.html @@ -188,7 +188,7 @@ footer h2 {
- + diff --git a/packages/osr-code-bot/models/data/openai_models.json b/packages/osr-code-bot/models/data/openai_models.json index 76771d8..098be29 100644 --- a/packages/osr-code-bot/models/data/openai_models.json +++ b/packages/osr-code-bot/models/data/openai_models.json @@ -1,5 +1,5 @@ { - "timestamp": 1740229118016, + "timestamp": 1740229355624, "models": [ { "id": "gpt-4o-mini-audio-preview-2024-12-17", diff --git a/packages/osr-code-bot/models/data/openrouter_models.json b/packages/osr-code-bot/models/data/openrouter_models.json index da498f7..62398a0 100644 --- a/packages/osr-code-bot/models/data/openrouter_models.json +++ b/packages/osr-code-bot/models/data/openrouter_models.json @@ -1,5 +1,5 @@ { - "timestamp": 1740229118229, + "timestamp": 1740229355965, "models": [ { "id": "perplexity/r1-1776", @@ -1657,54 +1657,6 @@ }, "per_request_limits": null }, - { - "id": "anthropic/claude-3.5-haiku-20241022:beta", - "name": "Anthropic: Claude 3.5 Haiku (2024-10-22) (self-moderated)", - "created": 1730678400, - "description": "Claude 3.5 Haiku features enhancements across all skill sets including coding, tool use, and reasoning. As the fastest model in the Anthropic lineup, it offers rapid response times suitable for applications that require high interactivity and low latency, such as user-facing chatbots and on-the-fly code completions. It also excels in specialized tasks like data extraction and real-time content moderation, making it a versatile tool for a broad range of industries.\n\nIt does not support image inputs.\n\nSee the launch announcement and benchmark results [here](https://www.anthropic.com/news/3-5-models-and-computer-use)", - "context_length": 200000, - "architecture": { - "modality": "text->text", - "tokenizer": "Claude", - "instruct_type": null - }, - "pricing": { - "prompt": "0.0000008", - "completion": "0.000004", - "image": "0", - "request": "0" - }, - "top_provider": { - "context_length": 200000, - "max_completion_tokens": 8192, - "is_moderated": false - }, - "per_request_limits": null - }, - { - "id": "anthropic/claude-3.5-haiku-20241022", - "name": "Anthropic: Claude 3.5 Haiku (2024-10-22)", - "created": 1730678400, - "description": "Claude 3.5 Haiku features enhancements across all skill sets including coding, tool use, and reasoning. As the fastest model in the Anthropic lineup, it offers rapid response times suitable for applications that require high interactivity and low latency, such as user-facing chatbots and on-the-fly code completions. It also excels in specialized tasks like data extraction and real-time content moderation, making it a versatile tool for a broad range of industries.\n\nIt does not support image inputs.\n\nSee the launch announcement and benchmark results [here](https://www.anthropic.com/news/3-5-models-and-computer-use)", - "context_length": 200000, - "architecture": { - "modality": "text->text", - "tokenizer": "Claude", - "instruct_type": null - }, - "pricing": { - "prompt": "0.0000008", - "completion": "0.000004", - "image": "0", - "request": "0" - }, - "top_provider": { - "context_length": 200000, - "max_completion_tokens": 8192, - "is_moderated": true - }, - "per_request_limits": null - }, { "id": "anthropic/claude-3.5-haiku:beta", "name": "Anthropic: Claude 3.5 Haiku (self-moderated)", @@ -1754,29 +1706,53 @@ "per_request_limits": null }, { - "id": "neversleep/llama-3.1-lumimaid-70b", - "name": "NeverSleep: Lumimaid v0.2 70B", - "created": 1729555200, - "description": "Lumimaid v0.2 70B is a finetune of [Llama 3.1 70B](/meta-llama/llama-3.1-70b-instruct) with a \"HUGE step up dataset wise\" compared to Lumimaid v0.1. Sloppy chats output were purged.\n\nUsage of this model is subject to [Meta's Acceptable Use Policy](https://llama.meta.com/llama3/use-policy/).", - "context_length": 16384, + "id": "anthropic/claude-3.5-haiku-20241022:beta", + "name": "Anthropic: Claude 3.5 Haiku (2024-10-22) (self-moderated)", + "created": 1730678400, + "description": "Claude 3.5 Haiku features enhancements across all skill sets including coding, tool use, and reasoning. As the fastest model in the Anthropic lineup, it offers rapid response times suitable for applications that require high interactivity and low latency, such as user-facing chatbots and on-the-fly code completions. It also excels in specialized tasks like data extraction and real-time content moderation, making it a versatile tool for a broad range of industries.\n\nIt does not support image inputs.\n\nSee the launch announcement and benchmark results [here](https://www.anthropic.com/news/3-5-models-and-computer-use)", + "context_length": 200000, "architecture": { "modality": "text->text", - "tokenizer": "Llama3", - "instruct_type": "llama3" + "tokenizer": "Claude", + "instruct_type": null }, "pricing": { - "prompt": "0.000003375", - "completion": "0.0000045", + "prompt": "0.0000008", + "completion": "0.000004", "image": "0", "request": "0" }, "top_provider": { - "context_length": 16384, - "max_completion_tokens": 2048, + "context_length": 200000, + "max_completion_tokens": 8192, "is_moderated": false }, "per_request_limits": null }, + { + "id": "anthropic/claude-3.5-haiku-20241022", + "name": "Anthropic: Claude 3.5 Haiku (2024-10-22)", + "created": 1730678400, + "description": "Claude 3.5 Haiku features enhancements across all skill sets including coding, tool use, and reasoning. As the fastest model in the Anthropic lineup, it offers rapid response times suitable for applications that require high interactivity and low latency, such as user-facing chatbots and on-the-fly code completions. It also excels in specialized tasks like data extraction and real-time content moderation, making it a versatile tool for a broad range of industries.\n\nIt does not support image inputs.\n\nSee the launch announcement and benchmark results [here](https://www.anthropic.com/news/3-5-models-and-computer-use)", + "context_length": 200000, + "architecture": { + "modality": "text->text", + "tokenizer": "Claude", + "instruct_type": null + }, + "pricing": { + "prompt": "0.0000008", + "completion": "0.000004", + "image": "0", + "request": "0" + }, + "top_provider": { + "context_length": 200000, + "max_completion_tokens": 8192, + "is_moderated": true + }, + "per_request_limits": null + }, { "id": "anthracite-org/magnum-v4-72b", "name": "Magnum v4 72B", @@ -1849,6 +1825,30 @@ }, "per_request_limits": null }, + { + "id": "neversleep/llama-3.1-lumimaid-70b", + "name": "NeverSleep: Lumimaid v0.2 70B", + "created": 1729555200, + "description": "Lumimaid v0.2 70B is a finetune of [Llama 3.1 70B](/meta-llama/llama-3.1-70b-instruct) with a \"HUGE step up dataset wise\" compared to Lumimaid v0.1. Sloppy chats output were purged.\n\nUsage of this model is subject to [Meta's Acceptable Use Policy](https://llama.meta.com/llama3/use-policy/).", + "context_length": 16384, + "architecture": { + "modality": "text->text", + "tokenizer": "Llama3", + "instruct_type": "llama3" + }, + "pricing": { + "prompt": "0.000003375", + "completion": "0.0000045", + "image": "0", + "request": "0" + }, + "top_provider": { + "context_length": 16384, + "max_completion_tokens": 2048, + "is_moderated": false + }, + "per_request_limits": null + }, { "id": "x-ai/grok-beta", "name": "xAI: Grok Beta", @@ -1993,30 +1993,6 @@ }, "per_request_limits": null }, - { - "id": "inflection/inflection-3-pi", - "name": "Inflection: Inflection 3 Pi", - "created": 1728604800, - "description": "Inflection 3 Pi powers Inflection's [Pi](https://pi.ai) chatbot, including backstory, emotional intelligence, productivity, and safety. It has access to recent news, and excels in scenarios like customer support and roleplay.\n\nPi has been trained to mirror your tone and style, if you use more emojis, so will Pi! Try experimenting with various prompts and conversation styles.", - "context_length": 8000, - "architecture": { - "modality": "text->text", - "tokenizer": "Other", - "instruct_type": null - }, - "pricing": { - "prompt": "0.0000025", - "completion": "0.00001", - "image": "0", - "request": "0" - }, - "top_provider": { - "context_length": 8000, - "max_completion_tokens": 1024, - "is_moderated": false - }, - "per_request_limits": null - }, { "id": "inflection/inflection-3-productivity", "name": "Inflection: Inflection 3 Productivity", @@ -2041,6 +2017,30 @@ }, "per_request_limits": null }, + { + "id": "inflection/inflection-3-pi", + "name": "Inflection: Inflection 3 Pi", + "created": 1728604800, + "description": "Inflection 3 Pi powers Inflection's [Pi](https://pi.ai) chatbot, including backstory, emotional intelligence, productivity, and safety. It has access to recent news, and excels in scenarios like customer support and roleplay.\n\nPi has been trained to mirror your tone and style, if you use more emojis, so will Pi! Try experimenting with various prompts and conversation styles.", + "context_length": 8000, + "architecture": { + "modality": "text->text", + "tokenizer": "Other", + "instruct_type": null + }, + "pricing": { + "prompt": "0.0000025", + "completion": "0.00001", + "image": "0", + "request": "0" + }, + "top_provider": { + "context_length": 8000, + "max_completion_tokens": 1024, + "is_moderated": false + }, + "per_request_limits": null + }, { "id": "google/gemini-flash-1.5-8b", "name": "Google: Gemini Flash 1.5 8B", @@ -2066,10 +2066,10 @@ "per_request_limits": null }, { - "id": "anthracite-org/magnum-v2-72b", - "name": "Magnum v2 72B", + "id": "thedrummer/rocinante-12b", + "name": "Rocinante 12B", "created": 1727654400, - "description": "From the maker of [Goliath](https://openrouter.ai/models/alpindale/goliath-120b), Magnum 72B is the seventh in a family of models designed to achieve the prose quality of the Claude 3 models, notably Opus & Sonnet.\n\nThe model is based on [Qwen2 72B](https://openrouter.ai/models/qwen/qwen-2-72b-instruct) and trained with 55 million tokens of highly curated roleplay (RP) data.", + "description": "Rocinante 12B is designed for engaging storytelling and rich prose.\n\nEarly testers have reported:\n- Expanded vocabulary with unique and expressive word choices\n- Enhanced creativity for vivid narratives\n- Adventure-filled and captivating stories", "context_length": 32768, "architecture": { "modality": "text->text", @@ -2077,8 +2077,8 @@ "instruct_type": "chatml" }, "pricing": { - "prompt": "0.000003", - "completion": "0.000003", + "prompt": "0.00000025", + "completion": "0.0000005", "image": "0", "request": "0" }, @@ -2114,10 +2114,10 @@ "per_request_limits": null }, { - "id": "thedrummer/rocinante-12b", - "name": "Rocinante 12B", + "id": "anthracite-org/magnum-v2-72b", + "name": "Magnum v2 72B", "created": 1727654400, - "description": "Rocinante 12B is designed for engaging storytelling and rich prose.\n\nEarly testers have reported:\n- Expanded vocabulary with unique and expressive word choices\n- Enhanced creativity for vivid narratives\n- Adventure-filled and captivating stories", + "description": "From the maker of [Goliath](https://openrouter.ai/models/alpindale/goliath-120b), Magnum 72B is the seventh in a family of models designed to achieve the prose quality of the Claude 3 models, notably Opus & Sonnet.\n\nThe model is based on [Qwen2 72B](https://openrouter.ai/models/qwen/qwen-2-72b-instruct) and trained with 55 million tokens of highly curated roleplay (RP) data.", "context_length": 32768, "architecture": { "modality": "text->text", @@ -2125,8 +2125,8 @@ "instruct_type": "chatml" }, "pricing": { - "prompt": "0.00000025", - "completion": "0.0000005", + "prompt": "0.000003", + "completion": "0.000003", "image": "0", "request": "0" }, @@ -2161,78 +2161,6 @@ }, "per_request_limits": null }, - { - "id": "meta-llama/llama-3.2-1b-instruct:free", - "name": "Meta: Llama 3.2 1B Instruct (free)", - "created": 1727222400, - "description": "Llama 3.2 1B is a 1-billion-parameter language model focused on efficiently performing natural language tasks, such as summarization, dialogue, and multilingual text analysis. Its smaller size allows it to operate efficiently in low-resource environments while maintaining strong task performance.\n\nSupporting eight core languages and fine-tunable for more, Llama 1.3B is ideal for businesses or developers seeking lightweight yet powerful AI solutions that can operate in diverse multilingual settings without the high computational demand of larger models.\n\nClick here for the [original model card](https://github.com/meta-llama/llama-models/blob/main/models/llama3_2/MODEL_CARD.md).\n\nUsage of this model is subject to [Meta's Acceptable Use Policy](https://www.llama.com/llama3/use-policy/).", - "context_length": 131072, - "architecture": { - "modality": "text->text", - "tokenizer": "Llama3", - "instruct_type": "llama3" - }, - "pricing": { - "prompt": "0", - "completion": "0", - "image": "0", - "request": "0" - }, - "top_provider": { - "context_length": 131072, - "max_completion_tokens": null, - "is_moderated": false - }, - "per_request_limits": null - }, - { - "id": "meta-llama/llama-3.2-1b-instruct", - "name": "Meta: Llama 3.2 1B Instruct", - "created": 1727222400, - "description": "Llama 3.2 1B is a 1-billion-parameter language model focused on efficiently performing natural language tasks, such as summarization, dialogue, and multilingual text analysis. Its smaller size allows it to operate efficiently in low-resource environments while maintaining strong task performance.\n\nSupporting eight core languages and fine-tunable for more, Llama 1.3B is ideal for businesses or developers seeking lightweight yet powerful AI solutions that can operate in diverse multilingual settings without the high computational demand of larger models.\n\nClick here for the [original model card](https://github.com/meta-llama/llama-models/blob/main/models/llama3_2/MODEL_CARD.md).\n\nUsage of this model is subject to [Meta's Acceptable Use Policy](https://www.llama.com/llama3/use-policy/).", - "context_length": 131072, - "architecture": { - "modality": "text->text", - "tokenizer": "Llama3", - "instruct_type": "llama3" - }, - "pricing": { - "prompt": "0.00000001", - "completion": "0.00000001", - "image": "0", - "request": "0" - }, - "top_provider": { - "context_length": 131072, - "max_completion_tokens": null, - "is_moderated": false - }, - "per_request_limits": null - }, - { - "id": "meta-llama/llama-3.2-90b-vision-instruct", - "name": "Meta: Llama 3.2 90B Vision Instruct", - "created": 1727222400, - "description": "The Llama 90B Vision model is a top-tier, 90-billion-parameter multimodal model designed for the most challenging visual reasoning and language tasks. It offers unparalleled accuracy in image captioning, visual question answering, and advanced image-text comprehension. Pre-trained on vast multimodal datasets and fine-tuned with human feedback, the Llama 90B Vision is engineered to handle the most demanding image-based AI tasks.\n\nThis model is perfect for industries requiring cutting-edge multimodal AI capabilities, particularly those dealing with complex, real-time visual and textual analysis.\n\nClick here for the [original model card](https://github.com/meta-llama/llama-models/blob/main/models/llama3_2/MODEL_CARD_VISION.md).\n\nUsage of this model is subject to [Meta's Acceptable Use Policy](https://www.llama.com/llama3/use-policy/).", - "context_length": 4096, - "architecture": { - "modality": "text+image->text", - "tokenizer": "Llama3", - "instruct_type": "llama3" - }, - "pricing": { - "prompt": "0.0000008", - "completion": "0.0000016", - "image": "0.0051456", - "request": "0" - }, - "top_provider": { - "context_length": 4096, - "max_completion_tokens": 2048, - "is_moderated": false - }, - "per_request_limits": null - }, { "id": "meta-llama/llama-3.2-11b-vision-instruct:free", "name": "Meta: Llama 3.2 11B Vision Instruct (free)", @@ -2281,6 +2209,78 @@ }, "per_request_limits": null }, + { + "id": "meta-llama/llama-3.2-90b-vision-instruct", + "name": "Meta: Llama 3.2 90B Vision Instruct", + "created": 1727222400, + "description": "The Llama 90B Vision model is a top-tier, 90-billion-parameter multimodal model designed for the most challenging visual reasoning and language tasks. It offers unparalleled accuracy in image captioning, visual question answering, and advanced image-text comprehension. Pre-trained on vast multimodal datasets and fine-tuned with human feedback, the Llama 90B Vision is engineered to handle the most demanding image-based AI tasks.\n\nThis model is perfect for industries requiring cutting-edge multimodal AI capabilities, particularly those dealing with complex, real-time visual and textual analysis.\n\nClick here for the [original model card](https://github.com/meta-llama/llama-models/blob/main/models/llama3_2/MODEL_CARD_VISION.md).\n\nUsage of this model is subject to [Meta's Acceptable Use Policy](https://www.llama.com/llama3/use-policy/).", + "context_length": 4096, + "architecture": { + "modality": "text+image->text", + "tokenizer": "Llama3", + "instruct_type": "llama3" + }, + "pricing": { + "prompt": "0.0000008", + "completion": "0.0000016", + "image": "0.0051456", + "request": "0" + }, + "top_provider": { + "context_length": 4096, + "max_completion_tokens": 2048, + "is_moderated": false + }, + "per_request_limits": null + }, + { + "id": "meta-llama/llama-3.2-1b-instruct:free", + "name": "Meta: Llama 3.2 1B Instruct (free)", + "created": 1727222400, + "description": "Llama 3.2 1B is a 1-billion-parameter language model focused on efficiently performing natural language tasks, such as summarization, dialogue, and multilingual text analysis. Its smaller size allows it to operate efficiently in low-resource environments while maintaining strong task performance.\n\nSupporting eight core languages and fine-tunable for more, Llama 1.3B is ideal for businesses or developers seeking lightweight yet powerful AI solutions that can operate in diverse multilingual settings without the high computational demand of larger models.\n\nClick here for the [original model card](https://github.com/meta-llama/llama-models/blob/main/models/llama3_2/MODEL_CARD.md).\n\nUsage of this model is subject to [Meta's Acceptable Use Policy](https://www.llama.com/llama3/use-policy/).", + "context_length": 131072, + "architecture": { + "modality": "text->text", + "tokenizer": "Llama3", + "instruct_type": "llama3" + }, + "pricing": { + "prompt": "0", + "completion": "0", + "image": "0", + "request": "0" + }, + "top_provider": { + "context_length": 131072, + "max_completion_tokens": null, + "is_moderated": false + }, + "per_request_limits": null + }, + { + "id": "meta-llama/llama-3.2-1b-instruct", + "name": "Meta: Llama 3.2 1B Instruct", + "created": 1727222400, + "description": "Llama 3.2 1B is a 1-billion-parameter language model focused on efficiently performing natural language tasks, such as summarization, dialogue, and multilingual text analysis. Its smaller size allows it to operate efficiently in low-resource environments while maintaining strong task performance.\n\nSupporting eight core languages and fine-tunable for more, Llama 1.3B is ideal for businesses or developers seeking lightweight yet powerful AI solutions that can operate in diverse multilingual settings without the high computational demand of larger models.\n\nClick here for the [original model card](https://github.com/meta-llama/llama-models/blob/main/models/llama3_2/MODEL_CARD.md).\n\nUsage of this model is subject to [Meta's Acceptable Use Policy](https://www.llama.com/llama3/use-policy/).", + "context_length": 131072, + "architecture": { + "modality": "text->text", + "tokenizer": "Llama3", + "instruct_type": "llama3" + }, + "pricing": { + "prompt": "0.00000001", + "completion": "0.00000001", + "image": "0", + "request": "0" + }, + "top_provider": { + "context_length": 131072, + "max_completion_tokens": null, + "is_moderated": false + }, + "per_request_limits": null + }, { "id": "qwen/qwen-2.5-72b-instruct", "name": "Qwen2.5 72B Instruct", @@ -2354,8 +2354,8 @@ "per_request_limits": null }, { - "id": "openai/o1-mini-2024-09-12", - "name": "OpenAI: o1-mini (2024-09-12)", + "id": "openai/o1-mini", + "name": "OpenAI: o1-mini", "created": 1726099200, "description": "The latest and strongest model family from OpenAI, o1 is designed to spend more time thinking before responding.\n\nThe o1 models are optimized for math, science, programming, and other STEM-related tasks. They consistently exhibit PhD-level accuracy on benchmarks in physics, chemistry, and biology. Learn more in the [launch announcement](https://openai.com/o1).\n\nNote: This model is currently experimental and not suitable for production use-cases, and may be heavily rate-limited.", "context_length": 128000, @@ -2426,8 +2426,8 @@ "per_request_limits": null }, { - "id": "openai/o1-mini", - "name": "OpenAI: o1-mini", + "id": "openai/o1-mini-2024-09-12", + "name": "OpenAI: o1-mini (2024-09-12)", "created": 1726099200, "description": "The latest and strongest model family from OpenAI, o1 is designed to spend more time thinking before responding.\n\nThe o1 models are optimized for math, science, programming, and other STEM-related tasks. They consistently exhibit PhD-level accuracy on benchmarks in physics, chemistry, and biology. Learn more in the [launch announcement](https://openai.com/o1).\n\nNote: This model is currently experimental and not suitable for production use-cases, and may be heavily rate-limited.", "context_length": 128000, @@ -2473,30 +2473,6 @@ }, "per_request_limits": null }, - { - "id": "cohere/command-r-08-2024", - "name": "Cohere: Command R (08-2024)", - "created": 1724976000, - "description": "command-r-08-2024 is an update of the [Command R](/models/cohere/command-r) with improved performance for multilingual retrieval-augmented generation (RAG) and tool use. More broadly, it is better at math, code and reasoning and is competitive with the previous version of the larger Command R+ model.\n\nRead the launch post [here](https://docs.cohere.com/changelog/command-gets-refreshed).\n\nUse of this model is subject to Cohere's [Usage Policy](https://docs.cohere.com/docs/usage-policy) and [SaaS Agreement](https://cohere.com/saas-agreement).", - "context_length": 128000, - "architecture": { - "modality": "text->text", - "tokenizer": "Cohere", - "instruct_type": null - }, - "pricing": { - "prompt": "0.0000001425", - "completion": "0.00000057", - "image": "0", - "request": "0" - }, - "top_provider": { - "context_length": 128000, - "max_completion_tokens": 4000, - "is_moderated": false - }, - "per_request_limits": null - }, { "id": "cohere/command-r-plus-08-2024", "name": "Cohere: Command R+ (08-2024)", @@ -2522,25 +2498,49 @@ "per_request_limits": null }, { - "id": "qwen/qwen-2-vl-7b-instruct", - "name": "Qwen2-VL 7B Instruct", - "created": 1724803200, - "description": "Qwen2 VL 7B is a multimodal LLM from the Qwen Team with the following key enhancements:\n\n- SoTA understanding of images of various resolution & ratio: Qwen2-VL achieves state-of-the-art performance on visual understanding benchmarks, including MathVista, DocVQA, RealWorldQA, MTVQA, etc.\n\n- Understanding videos of 20min+: Qwen2-VL can understand videos over 20 minutes for high-quality video-based question answering, dialog, content creation, etc.\n\n- Agent that can operate your mobiles, robots, etc.: with the abilities of complex reasoning and decision making, Qwen2-VL can be integrated with devices like mobile phones, robots, etc., for automatic operation based on visual environment and text instructions.\n\n- Multilingual Support: to serve global users, besides English and Chinese, Qwen2-VL now supports the understanding of texts in different languages inside images, including most European languages, Japanese, Korean, Arabic, Vietnamese, etc.\n\nFor more details, see this [blog post](https://qwenlm.github.io/blog/qwen2-vl/) and [GitHub repo](https://github.com/QwenLM/Qwen2-VL).\n\nUsage of this model is subject to [Tongyi Qianwen LICENSE AGREEMENT](https://huggingface.co/Qwen/Qwen1.5-110B-Chat/blob/main/LICENSE).", - "context_length": 4096, + "id": "cohere/command-r-08-2024", + "name": "Cohere: Command R (08-2024)", + "created": 1724976000, + "description": "command-r-08-2024 is an update of the [Command R](/models/cohere/command-r) with improved performance for multilingual retrieval-augmented generation (RAG) and tool use. More broadly, it is better at math, code and reasoning and is competitive with the previous version of the larger Command R+ model.\n\nRead the launch post [here](https://docs.cohere.com/changelog/command-gets-refreshed).\n\nUse of this model is subject to Cohere's [Usage Policy](https://docs.cohere.com/docs/usage-policy) and [SaaS Agreement](https://cohere.com/saas-agreement).", + "context_length": 128000, "architecture": { - "modality": "text+image->text", - "tokenizer": "Qwen", + "modality": "text->text", + "tokenizer": "Cohere", "instruct_type": null }, "pricing": { - "prompt": "0.0000001", - "completion": "0.0000001", - "image": "0.0001445", + "prompt": "0.0000001425", + "completion": "0.00000057", + "image": "0", "request": "0" }, "top_provider": { - "context_length": 4096, - "max_completion_tokens": null, + "context_length": 128000, + "max_completion_tokens": 4000, + "is_moderated": false + }, + "per_request_limits": null + }, + { + "id": "google/gemini-flash-1.5-8b-exp", + "name": "Google: Gemini Flash 1.5 8B Experimental", + "created": 1724803200, + "description": "Gemini Flash 1.5 8B Experimental is an experimental, 8B parameter version of the [Gemini Flash 1.5](/models/google/gemini-flash-1.5) model.\n\nUsage of Gemini is subject to Google's [Gemini Terms of Use](https://ai.google.dev/terms).\n\n#multimodal\n\nNote: This model is currently experimental and not suitable for production use-cases, and may be heavily rate-limited.", + "context_length": 1000000, + "architecture": { + "modality": "text+image->text", + "tokenizer": "Gemini", + "instruct_type": null + }, + "pricing": { + "prompt": "0", + "completion": "0", + "image": "0", + "request": "0" + }, + "top_provider": { + "context_length": 1000000, + "max_completion_tokens": 8192, "is_moderated": false }, "per_request_limits": null @@ -2570,25 +2570,25 @@ "per_request_limits": null }, { - "id": "google/gemini-flash-1.5-8b-exp", - "name": "Google: Gemini Flash 1.5 8B Experimental", + "id": "qwen/qwen-2-vl-7b-instruct", + "name": "Qwen2-VL 7B Instruct", "created": 1724803200, - "description": "Gemini Flash 1.5 8B Experimental is an experimental, 8B parameter version of the [Gemini Flash 1.5](/models/google/gemini-flash-1.5) model.\n\nUsage of Gemini is subject to Google's [Gemini Terms of Use](https://ai.google.dev/terms).\n\n#multimodal\n\nNote: This model is currently experimental and not suitable for production use-cases, and may be heavily rate-limited.", - "context_length": 1000000, + "description": "Qwen2 VL 7B is a multimodal LLM from the Qwen Team with the following key enhancements:\n\n- SoTA understanding of images of various resolution & ratio: Qwen2-VL achieves state-of-the-art performance on visual understanding benchmarks, including MathVista, DocVQA, RealWorldQA, MTVQA, etc.\n\n- Understanding videos of 20min+: Qwen2-VL can understand videos over 20 minutes for high-quality video-based question answering, dialog, content creation, etc.\n\n- Agent that can operate your mobiles, robots, etc.: with the abilities of complex reasoning and decision making, Qwen2-VL can be integrated with devices like mobile phones, robots, etc., for automatic operation based on visual environment and text instructions.\n\n- Multilingual Support: to serve global users, besides English and Chinese, Qwen2-VL now supports the understanding of texts in different languages inside images, including most European languages, Japanese, Korean, Arabic, Vietnamese, etc.\n\nFor more details, see this [blog post](https://qwenlm.github.io/blog/qwen2-vl/) and [GitHub repo](https://github.com/QwenLM/Qwen2-VL).\n\nUsage of this model is subject to [Tongyi Qianwen LICENSE AGREEMENT](https://huggingface.co/Qwen/Qwen1.5-110B-Chat/blob/main/LICENSE).", + "context_length": 4096, "architecture": { "modality": "text+image->text", - "tokenizer": "Gemini", + "tokenizer": "Qwen", "instruct_type": null }, "pricing": { - "prompt": "0", - "completion": "0", - "image": "0", + "prompt": "0.0000001", + "completion": "0.0000001", + "image": "0.0001445", "request": "0" }, "top_provider": { - "context_length": 1000000, - "max_completion_tokens": 8192, + "context_length": 4096, + "max_completion_tokens": null, "is_moderated": false }, "per_request_limits": null @@ -2761,30 +2761,6 @@ }, "per_request_limits": null }, - { - "id": "sao10k/l3-lunaris-8b", - "name": "Sao10K: Llama 3 8B Lunaris", - "created": 1723507200, - "description": "Lunaris 8B is a versatile generalist and roleplaying model based on Llama 3. It's a strategic merge of multiple models, designed to balance creativity with improved logic and general knowledge.\n\nCreated by [Sao10k](https://huggingface.co/Sao10k), this model aims to offer an improved experience over Stheno v3.2, with enhanced creativity and logical reasoning.\n\nFor best results, use with Llama 3 Instruct context template, temperature 1.4, and min_p 0.1.", - "context_length": 8192, - "architecture": { - "modality": "text->text", - "tokenizer": "Llama3", - "instruct_type": "llama3" - }, - "pricing": { - "prompt": "0.00000003", - "completion": "0.00000006", - "image": "0", - "request": "0" - }, - "top_provider": { - "context_length": 8192, - "max_completion_tokens": 8192, - "is_moderated": false - }, - "per_request_limits": null - }, { "id": "aetherwiing/mn-starcannon-12b", "name": "Aetherwiing: Starcannon 12B", @@ -2809,6 +2785,30 @@ }, "per_request_limits": null }, + { + "id": "sao10k/l3-lunaris-8b", + "name": "Sao10K: Llama 3 8B Lunaris", + "created": 1723507200, + "description": "Lunaris 8B is a versatile generalist and roleplaying model based on Llama 3. It's a strategic merge of multiple models, designed to balance creativity with improved logic and general knowledge.\n\nCreated by [Sao10k](https://huggingface.co/Sao10k), this model aims to offer an improved experience over Stheno v3.2, with enhanced creativity and logical reasoning.\n\nFor best results, use with Llama 3 Instruct context template, temperature 1.4, and min_p 0.1.", + "context_length": 8192, + "architecture": { + "modality": "text->text", + "tokenizer": "Llama3", + "instruct_type": "llama3" + }, + "pricing": { + "prompt": "0.00000003", + "completion": "0.00000006", + "image": "0", + "request": "0" + }, + "top_provider": { + "context_length": 8192, + "max_completion_tokens": 8192, + "is_moderated": false + }, + "per_request_limits": null + }, { "id": "openai/gpt-4o-2024-08-06", "name": "OpenAI: GPT-4o (2024-08-06)", @@ -2833,30 +2833,6 @@ }, "per_request_limits": null }, - { - "id": "meta-llama/llama-3.1-405b", - "name": "Meta: Llama 3.1 405B (base)", - "created": 1722556800, - "description": "Meta's latest class of model (Llama 3.1) launched with a variety of sizes & flavors. This is the base 405B pre-trained version.\n\nIt has demonstrated strong performance compared to leading closed-source models in human evaluations.\n\nTo read more about the model release, [click here](https://ai.meta.com/blog/meta-llama-3/). Usage of this model is subject to [Meta's Acceptable Use Policy](https://llama.meta.com/llama3/use-policy/).", - "context_length": 32768, - "architecture": { - "modality": "text->text", - "tokenizer": "Llama3", - "instruct_type": "none" - }, - "pricing": { - "prompt": "0.000002", - "completion": "0.000002", - "image": "0", - "request": "0" - }, - "top_provider": { - "context_length": 32768, - "max_completion_tokens": null, - "is_moderated": false - }, - "per_request_limits": null - }, { "id": "nothingiisreal/mn-celeste-12b", "name": "Mistral Nemo 12B Celeste", @@ -2881,6 +2857,30 @@ }, "per_request_limits": null }, + { + "id": "meta-llama/llama-3.1-405b", + "name": "Meta: Llama 3.1 405B (base)", + "created": 1722556800, + "description": "Meta's latest class of model (Llama 3.1) launched with a variety of sizes & flavors. This is the base 405B pre-trained version.\n\nIt has demonstrated strong performance compared to leading closed-source models in human evaluations.\n\nTo read more about the model release, [click here](https://ai.meta.com/blog/meta-llama-3/). Usage of this model is subject to [Meta's Acceptable Use Policy](https://llama.meta.com/llama3/use-policy/).", + "context_length": 32768, + "architecture": { + "modality": "text->text", + "tokenizer": "Llama3", + "instruct_type": "none" + }, + "pricing": { + "prompt": "0.000002", + "completion": "0.000002", + "image": "0", + "request": "0" + }, + "top_provider": { + "context_length": 32768, + "max_completion_tokens": null, + "is_moderated": false + }, + "per_request_limits": null + }, { "id": "perplexity/llama-3.1-sonar-small-128k-chat", "name": "Perplexity: Llama 3.1 Sonar 8B", @@ -2905,30 +2905,6 @@ }, "per_request_limits": null }, - { - "id": "perplexity/llama-3.1-sonar-large-128k-chat", - "name": "Perplexity: Llama 3.1 Sonar 70B", - "created": 1722470400, - "description": "Llama 3.1 Sonar is Perplexity's latest model family. It surpasses their earlier Sonar models in cost-efficiency, speed, and performance.\n\nThis is a normal offline LLM, but the [online version](/models/perplexity/llama-3.1-sonar-large-128k-online) of this model has Internet access.", - "context_length": 131072, - "architecture": { - "modality": "text->text", - "tokenizer": "Llama3", - "instruct_type": null - }, - "pricing": { - "prompt": "0.000001", - "completion": "0.000001", - "image": "0", - "request": "0" - }, - "top_provider": { - "context_length": 131072, - "max_completion_tokens": null, - "is_moderated": false - }, - "per_request_limits": null - }, { "id": "perplexity/llama-3.1-sonar-large-128k-online", "name": "Perplexity: Llama 3.1 Sonar 70B Online", @@ -2953,6 +2929,30 @@ }, "per_request_limits": null }, + { + "id": "perplexity/llama-3.1-sonar-large-128k-chat", + "name": "Perplexity: Llama 3.1 Sonar 70B", + "created": 1722470400, + "description": "Llama 3.1 Sonar is Perplexity's latest model family. It surpasses their earlier Sonar models in cost-efficiency, speed, and performance.\n\nThis is a normal offline LLM, but the [online version](/models/perplexity/llama-3.1-sonar-large-128k-online) of this model has Internet access.", + "context_length": 131072, + "architecture": { + "modality": "text->text", + "tokenizer": "Llama3", + "instruct_type": null + }, + "pricing": { + "prompt": "0.000001", + "completion": "0.000001", + "image": "0", + "request": "0" + }, + "top_provider": { + "context_length": 131072, + "max_completion_tokens": null, + "is_moderated": false + }, + "per_request_limits": null + }, { "id": "perplexity/llama-3.1-sonar-small-128k-online", "name": "Perplexity: Llama 3.1 Sonar 8B Online", @@ -3073,6 +3073,30 @@ }, "per_request_limits": null }, + { + "id": "mistralai/codestral-mamba", + "name": "Mistral: Codestral Mamba", + "created": 1721347200, + "description": "A 7.3B parameter Mamba-based model designed for code and reasoning tasks.\n\n- Linear time inference, allowing for theoretically infinite sequence lengths\n- 256k token context window\n- Optimized for quick responses, especially beneficial for code productivity\n- Performs comparably to state-of-the-art transformer models in code and reasoning tasks\n- Available under the Apache 2.0 license for free use, modification, and distribution", + "context_length": 256000, + "architecture": { + "modality": "text->text", + "tokenizer": "Mistral", + "instruct_type": null + }, + "pricing": { + "prompt": "0.00000025", + "completion": "0.00000025", + "image": "0", + "request": "0" + }, + "top_provider": { + "context_length": 256000, + "max_completion_tokens": null, + "is_moderated": false + }, + "per_request_limits": null + }, { "id": "mistralai/mistral-nemo:free", "name": "Mistral: Mistral Nemo (free)", @@ -3122,32 +3146,8 @@ "per_request_limits": null }, { - "id": "mistralai/codestral-mamba", - "name": "Mistral: Codestral Mamba", - "created": 1721347200, - "description": "A 7.3B parameter Mamba-based model designed for code and reasoning tasks.\n\n- Linear time inference, allowing for theoretically infinite sequence lengths\n- 256k token context window\n- Optimized for quick responses, especially beneficial for code productivity\n- Performs comparably to state-of-the-art transformer models in code and reasoning tasks\n- Available under the Apache 2.0 license for free use, modification, and distribution", - "context_length": 256000, - "architecture": { - "modality": "text->text", - "tokenizer": "Mistral", - "instruct_type": null - }, - "pricing": { - "prompt": "0.00000025", - "completion": "0.00000025", - "image": "0", - "request": "0" - }, - "top_provider": { - "context_length": 256000, - "max_completion_tokens": null, - "is_moderated": false - }, - "per_request_limits": null - }, - { - "id": "openai/gpt-4o-mini", - "name": "OpenAI: GPT-4o-mini", + "id": "openai/gpt-4o-mini-2024-07-18", + "name": "OpenAI: GPT-4o-mini (2024-07-18)", "created": 1721260800, "description": "GPT-4o mini is OpenAI's newest model after [GPT-4 Omni](/models/openai/gpt-4o), supporting both text and image inputs with text outputs.\n\nAs their most advanced small model, it is many multiples more affordable than other recent frontier models, and more than 60% cheaper than [GPT-3.5 Turbo](/models/openai/gpt-3.5-turbo). It maintains SOTA intelligence, while being significantly more cost-effective.\n\nGPT-4o mini achieves an 82% score on MMLU and presently ranks higher than GPT-4 on chat preferences [common leaderboards](https://arena.lmsys.org/).\n\nCheck out the [launch announcement](https://openai.com/index/gpt-4o-mini-advancing-cost-efficient-intelligence/) to learn more.\n\n#multimodal", "context_length": 128000, @@ -3170,8 +3170,8 @@ "per_request_limits": null }, { - "id": "openai/gpt-4o-mini-2024-07-18", - "name": "OpenAI: GPT-4o-mini (2024-07-18)", + "id": "openai/gpt-4o-mini", + "name": "OpenAI: GPT-4o-mini", "created": 1721260800, "description": "GPT-4o mini is OpenAI's newest model after [GPT-4 Omni](/models/openai/gpt-4o), supporting both text and image inputs with text outputs.\n\nAs their most advanced small model, it is many multiples more affordable than other recent frontier models, and more than 60% cheaper than [GPT-3.5 Turbo](/models/openai/gpt-3.5-turbo). It maintains SOTA intelligence, while being significantly more cost-effective.\n\nGPT-4o mini achieves an 82% score on MMLU and presently ranks higher than GPT-4 on chat preferences [common leaderboards](https://arena.lmsys.org/).\n\nCheck out the [launch announcement](https://openai.com/index/gpt-4o-mini-advancing-cost-efficient-intelligence/) to learn more.\n\n#multimodal", "context_length": 128000, @@ -3457,6 +3457,30 @@ }, "per_request_limits": null }, + { + "id": "mistralai/mistral-7b-instruct-v0.3", + "name": "Mistral: Mistral 7B Instruct v0.3", + "created": 1716768000, + "description": "A high-performing, industry-standard 7.3B parameter model, with optimizations for speed and context length.\n\nAn improved version of [Mistral 7B Instruct v0.2](/models/mistralai/mistral-7b-instruct-v0.2), with the following changes:\n\n- Extended vocabulary to 32768\n- Supports v3 Tokenizer\n- Supports function calling\n\nNOTE: Support for function calling depends on the provider.", + "context_length": 32768, + "architecture": { + "modality": "text->text", + "tokenizer": "Mistral", + "instruct_type": "mistral" + }, + "pricing": { + "prompt": "0.00000003", + "completion": "0.000000055", + "image": "0", + "request": "0" + }, + "top_provider": { + "context_length": 32768, + "max_completion_tokens": 8192, + "is_moderated": false + }, + "per_request_limits": null + }, { "id": "mistralai/mistral-7b-instruct:free", "name": "Mistral: Mistral 7B Instruct (free)", @@ -3505,30 +3529,6 @@ }, "per_request_limits": null }, - { - "id": "mistralai/mistral-7b-instruct-v0.3", - "name": "Mistral: Mistral 7B Instruct v0.3", - "created": 1716768000, - "description": "A high-performing, industry-standard 7.3B parameter model, with optimizations for speed and context length.\n\nAn improved version of [Mistral 7B Instruct v0.2](/models/mistralai/mistral-7b-instruct-v0.2), with the following changes:\n\n- Extended vocabulary to 32768\n- Supports v3 Tokenizer\n- Supports function calling\n\nNOTE: Support for function calling depends on the provider.", - "context_length": 32768, - "architecture": { - "modality": "text->text", - "tokenizer": "Mistral", - "instruct_type": "mistral" - }, - "pricing": { - "prompt": "0.00000003", - "completion": "0.000000055", - "image": "0", - "request": "0" - }, - "top_provider": { - "context_length": 32768, - "max_completion_tokens": 8192, - "is_moderated": false - }, - "per_request_limits": null - }, { "id": "nousresearch/hermes-2-pro-llama-3-8b", "name": "NousResearch: Hermes 2 Pro - Llama-3 8B", @@ -3673,6 +3673,30 @@ }, "per_request_limits": null }, + { + "id": "deepseek/deepseek-chat-v2.5", + "name": "DeepSeek V2.5", + "created": 1715644800, + "description": "DeepSeek-V2.5 is an upgraded version that combines DeepSeek-V2-Chat and DeepSeek-Coder-V2-Instruct. The new model integrates the general and coding abilities of the two previous versions. For model details, please visit [DeepSeek-V2 page](https://github.com/deepseek-ai/DeepSeek-V2) for more information.", + "context_length": 8192, + "architecture": { + "modality": "text->text", + "tokenizer": "Other", + "instruct_type": null + }, + "pricing": { + "prompt": "0.000002", + "completion": "0.000002", + "image": "0", + "request": "0" + }, + "top_provider": { + "context_length": 8192, + "max_completion_tokens": null, + "is_moderated": false + }, + "per_request_limits": null + }, { "id": "google/gemini-flash-1.5", "name": "Google: Gemini Flash 1.5", @@ -3698,19 +3722,19 @@ "per_request_limits": null }, { - "id": "deepseek/deepseek-chat-v2.5", - "name": "DeepSeek V2.5", - "created": 1715644800, - "description": "DeepSeek-V2.5 is an upgraded version that combines DeepSeek-V2-Chat and DeepSeek-Coder-V2-Instruct. The new model integrates the general and coding abilities of the two previous versions. For model details, please visit [DeepSeek-V2 page](https://github.com/deepseek-ai/DeepSeek-V2) for more information.", + "id": "meta-llama/llama-guard-2-8b", + "name": "Meta: LlamaGuard 2 8B", + "created": 1715558400, + "description": "This safeguard model has 8B parameters and is based on the Llama 3 family. Just like is predecessor, [LlamaGuard 1](https://huggingface.co/meta-llama/LlamaGuard-7b), it can do both prompt and response classification.\n\nLlamaGuard 2 acts as a normal LLM would, generating text that indicates whether the given input/output is safe/unsafe. If deemed unsafe, it will also share the content categories violated.\n\nFor best results, please use raw prompt input or the `/completions` endpoint, instead of the chat API.\n\nIt has demonstrated strong performance compared to leading closed-source models in human evaluations.\n\nTo read more about the model release, [click here](https://ai.meta.com/blog/meta-llama-3/). Usage of this model is subject to [Meta's Acceptable Use Policy](https://llama.meta.com/llama3/use-policy/).", "context_length": 8192, "architecture": { "modality": "text->text", - "tokenizer": "Other", - "instruct_type": null + "tokenizer": "Llama3", + "instruct_type": "none" }, "pricing": { - "prompt": "0.000002", - "completion": "0.000002", + "prompt": "0.0000002", + "completion": "0.0000002", "image": "0", "request": "0" }, @@ -3745,30 +3769,6 @@ }, "per_request_limits": null }, - { - "id": "meta-llama/llama-guard-2-8b", - "name": "Meta: LlamaGuard 2 8B", - "created": 1715558400, - "description": "This safeguard model has 8B parameters and is based on the Llama 3 family. Just like is predecessor, [LlamaGuard 1](https://huggingface.co/meta-llama/LlamaGuard-7b), it can do both prompt and response classification.\n\nLlamaGuard 2 acts as a normal LLM would, generating text that indicates whether the given input/output is safe/unsafe. If deemed unsafe, it will also share the content categories violated.\n\nFor best results, please use raw prompt input or the `/completions` endpoint, instead of the chat API.\n\nIt has demonstrated strong performance compared to leading closed-source models in human evaluations.\n\nTo read more about the model release, [click here](https://ai.meta.com/blog/meta-llama-3/). Usage of this model is subject to [Meta's Acceptable Use Policy](https://llama.meta.com/llama3/use-policy/).", - "context_length": 8192, - "architecture": { - "modality": "text->text", - "tokenizer": "Llama3", - "instruct_type": "none" - }, - "pricing": { - "prompt": "0.0000002", - "completion": "0.0000002", - "image": "0", - "request": "0" - }, - "top_provider": { - "context_length": 8192, - "max_completion_tokens": null, - "is_moderated": false - }, - "per_request_limits": null - }, { "id": "openai/gpt-4o", "name": "OpenAI: GPT-4o", @@ -4033,30 +4033,6 @@ }, "per_request_limits": null }, - { - "id": "google/gemini-pro-1.5", - "name": "Google: Gemini Pro 1.5", - "created": 1712620800, - "description": "Google's latest multimodal model, supports image and video[0] in text or chat prompts.\n\nOptimized for language tasks including:\n\n- Code generation\n- Text generation\n- Text editing\n- Problem solving\n- Recommendations\n- Information extraction\n- Data extraction or generation\n- AI agents\n\nUsage of Gemini is subject to Google's [Gemini Terms of Use](https://ai.google.dev/terms).\n\n* [0]: Video input is not available through OpenRouter at this time.", - "context_length": 2000000, - "architecture": { - "modality": "text+image->text", - "tokenizer": "Gemini", - "instruct_type": null - }, - "pricing": { - "prompt": "0.00000125", - "completion": "0.000005", - "image": "0.0006575", - "request": "0" - }, - "top_provider": { - "context_length": 2000000, - "max_completion_tokens": 8192, - "is_moderated": false - }, - "per_request_limits": null - }, { "id": "openai/gpt-4-turbo", "name": "OpenAI: GPT-4 Turbo", @@ -4081,6 +4057,30 @@ }, "per_request_limits": null }, + { + "id": "google/gemini-pro-1.5", + "name": "Google: Gemini Pro 1.5", + "created": 1712620800, + "description": "Google's latest multimodal model, supports image and video[0] in text or chat prompts.\n\nOptimized for language tasks including:\n\n- Code generation\n- Text generation\n- Text editing\n- Problem solving\n- Recommendations\n- Information extraction\n- Data extraction or generation\n- AI agents\n\nUsage of Gemini is subject to Google's [Gemini Terms of Use](https://ai.google.dev/terms).\n\n* [0]: Video input is not available through OpenRouter at this time.", + "context_length": 2000000, + "architecture": { + "modality": "text+image->text", + "tokenizer": "Gemini", + "instruct_type": null + }, + "pricing": { + "prompt": "0.00000125", + "completion": "0.000005", + "image": "0.0006575", + "request": "0" + }, + "top_provider": { + "context_length": 2000000, + "max_completion_tokens": 8192, + "is_moderated": false + }, + "per_request_limits": null + }, { "id": "cohere/command-r-plus", "name": "Cohere: Command R+", @@ -4273,54 +4273,6 @@ }, "per_request_limits": null }, - { - "id": "anthropic/claude-3-opus:beta", - "name": "Anthropic: Claude 3 Opus (self-moderated)", - "created": 1709596800, - "description": "Claude 3 Opus is Anthropic's most powerful model for highly complex tasks. It boasts top-level performance, intelligence, fluency, and understanding.\n\nSee the launch announcement and benchmark results [here](https://www.anthropic.com/news/claude-3-family)\n\n#multimodal", - "context_length": 200000, - "architecture": { - "modality": "text+image->text", - "tokenizer": "Claude", - "instruct_type": null - }, - "pricing": { - "prompt": "0.000015", - "completion": "0.000075", - "image": "0.024", - "request": "0" - }, - "top_provider": { - "context_length": 200000, - "max_completion_tokens": 4096, - "is_moderated": false - }, - "per_request_limits": null - }, - { - "id": "anthropic/claude-3-opus", - "name": "Anthropic: Claude 3 Opus", - "created": 1709596800, - "description": "Claude 3 Opus is Anthropic's most powerful model for highly complex tasks. It boasts top-level performance, intelligence, fluency, and understanding.\n\nSee the launch announcement and benchmark results [here](https://www.anthropic.com/news/claude-3-family)\n\n#multimodal", - "context_length": 200000, - "architecture": { - "modality": "text+image->text", - "tokenizer": "Claude", - "instruct_type": null - }, - "pricing": { - "prompt": "0.000015", - "completion": "0.000075", - "image": "0.024", - "request": "0" - }, - "top_provider": { - "context_length": 200000, - "max_completion_tokens": 4096, - "is_moderated": true - }, - "per_request_limits": null - }, { "id": "anthropic/claude-3-sonnet:beta", "name": "Anthropic: Claude 3 Sonnet (self-moderated)", @@ -4369,6 +4321,54 @@ }, "per_request_limits": null }, + { + "id": "anthropic/claude-3-opus:beta", + "name": "Anthropic: Claude 3 Opus (self-moderated)", + "created": 1709596800, + "description": "Claude 3 Opus is Anthropic's most powerful model for highly complex tasks. It boasts top-level performance, intelligence, fluency, and understanding.\n\nSee the launch announcement and benchmark results [here](https://www.anthropic.com/news/claude-3-family)\n\n#multimodal", + "context_length": 200000, + "architecture": { + "modality": "text+image->text", + "tokenizer": "Claude", + "instruct_type": null + }, + "pricing": { + "prompt": "0.000015", + "completion": "0.000075", + "image": "0.024", + "request": "0" + }, + "top_provider": { + "context_length": 200000, + "max_completion_tokens": 4096, + "is_moderated": false + }, + "per_request_limits": null + }, + { + "id": "anthropic/claude-3-opus", + "name": "Anthropic: Claude 3 Opus", + "created": 1709596800, + "description": "Claude 3 Opus is Anthropic's most powerful model for highly complex tasks. It boasts top-level performance, intelligence, fluency, and understanding.\n\nSee the launch announcement and benchmark results [here](https://www.anthropic.com/news/claude-3-family)\n\n#multimodal", + "context_length": 200000, + "architecture": { + "modality": "text+image->text", + "tokenizer": "Claude", + "instruct_type": null + }, + "pricing": { + "prompt": "0.000015", + "completion": "0.000075", + "image": "0.024", + "request": "0" + }, + "top_provider": { + "context_length": 200000, + "max_completion_tokens": 4096, + "is_moderated": true + }, + "per_request_limits": null + }, { "id": "cohere/command-r-03-2024", "name": "Cohere: Command R (03-2024)", @@ -4513,30 +4513,6 @@ }, "per_request_limits": null }, - { - "id": "mistralai/mistral-small", - "name": "Mistral Small", - "created": 1704844800, - "description": "With 22 billion parameters, Mistral Small v24.09 offers a convenient mid-point between (Mistral NeMo 12B)[/mistralai/mistral-nemo] and (Mistral Large 2)[/mistralai/mistral-large], providing a cost-effective solution that can be deployed across various platforms and environments. It has better reasoning, exhibits more capabilities, can produce and reason about code, and is multiligual, supporting English, French, German, Italian, and Spanish.", - "context_length": 32000, - "architecture": { - "modality": "text->text", - "tokenizer": "Mistral", - "instruct_type": null - }, - "pricing": { - "prompt": "0.0000002", - "completion": "0.0000006", - "image": "0", - "request": "0" - }, - "top_provider": { - "context_length": 32000, - "max_completion_tokens": null, - "is_moderated": false - }, - "per_request_limits": null - }, { "id": "mistralai/mistral-tiny", "name": "Mistral Tiny", @@ -4561,6 +4537,30 @@ }, "per_request_limits": null }, + { + "id": "mistralai/mistral-small", + "name": "Mistral Small", + "created": 1704844800, + "description": "With 22 billion parameters, Mistral Small v24.09 offers a convenient mid-point between (Mistral NeMo 12B)[/mistralai/mistral-nemo] and (Mistral Large 2)[/mistralai/mistral-large], providing a cost-effective solution that can be deployed across various platforms and environments. It has better reasoning, exhibits more capabilities, can produce and reason about code, and is multiligual, supporting English, French, German, Italian, and Spanish.", + "context_length": 32000, + "architecture": { + "modality": "text->text", + "tokenizer": "Mistral", + "instruct_type": null + }, + "pricing": { + "prompt": "0.0000002", + "completion": "0.0000006", + "image": "0", + "request": "0" + }, + "top_provider": { + "context_length": 32000, + "max_completion_tokens": null, + "is_moderated": false + }, + "per_request_limits": null + }, { "id": "mistralai/mistral-medium", "name": "Mistral Medium", @@ -4609,30 +4609,6 @@ }, "per_request_limits": null }, - { - "id": "google/gemini-pro-vision", - "name": "Google: Gemini Pro Vision 1.0", - "created": 1702425600, - "description": "Google's flagship multimodal model, supporting image and video in text or chat prompts for a text or code response.\n\nSee the benchmarks and prompting guidelines from [Deepmind](https://deepmind.google/technologies/gemini/).\n\nUsage of Gemini is subject to Google's [Gemini Terms of Use](https://ai.google.dev/terms).\n\n#multimodal", - "context_length": 16384, - "architecture": { - "modality": "text+image->text", - "tokenizer": "Gemini", - "instruct_type": null - }, - "pricing": { - "prompt": "0.0000005", - "completion": "0.0000015", - "image": "0.0025", - "request": "0" - }, - "top_provider": { - "context_length": 16384, - "max_completion_tokens": 2048, - "is_moderated": false - }, - "per_request_limits": null - }, { "id": "google/gemini-pro", "name": "Google: Gemini Pro 1.0", @@ -4657,6 +4633,30 @@ }, "per_request_limits": null }, + { + "id": "google/gemini-pro-vision", + "name": "Google: Gemini Pro Vision 1.0", + "created": 1702425600, + "description": "Google's flagship multimodal model, supporting image and video in text or chat prompts for a text or code response.\n\nSee the benchmarks and prompting guidelines from [Deepmind](https://deepmind.google/technologies/gemini/).\n\nUsage of Gemini is subject to Google's [Gemini Terms of Use](https://ai.google.dev/terms).\n\n#multimodal", + "context_length": 16384, + "architecture": { + "modality": "text+image->text", + "tokenizer": "Gemini", + "instruct_type": null + }, + "pricing": { + "prompt": "0.0000005", + "completion": "0.0000015", + "image": "0.0025", + "request": "0" + }, + "top_provider": { + "context_length": 16384, + "max_completion_tokens": 2048, + "is_moderated": false + }, + "per_request_limits": null + }, { "id": "mistralai/mixtral-8x7b", "name": "Mistral: Mixtral 8x7B (base)", @@ -4777,54 +4777,6 @@ }, "per_request_limits": null }, - { - "id": "anthropic/claude-2:beta", - "name": "Anthropic: Claude v2 (self-moderated)", - "created": 1700611200, - "description": "Claude 2 delivers advancements in key capabilities for enterprises—including an industry-leading 200K token context window, significant reductions in rates of model hallucination, system prompts and a new beta feature: tool use.", - "context_length": 200000, - "architecture": { - "modality": "text->text", - "tokenizer": "Claude", - "instruct_type": null - }, - "pricing": { - "prompt": "0.000008", - "completion": "0.000024", - "image": "0", - "request": "0" - }, - "top_provider": { - "context_length": 200000, - "max_completion_tokens": 4096, - "is_moderated": false - }, - "per_request_limits": null - }, - { - "id": "anthropic/claude-2", - "name": "Anthropic: Claude v2", - "created": 1700611200, - "description": "Claude 2 delivers advancements in key capabilities for enterprises—including an industry-leading 200K token context window, significant reductions in rates of model hallucination, system prompts and a new beta feature: tool use.", - "context_length": 200000, - "architecture": { - "modality": "text->text", - "tokenizer": "Claude", - "instruct_type": null - }, - "pricing": { - "prompt": "0.000008", - "completion": "0.000024", - "image": "0", - "request": "0" - }, - "top_provider": { - "context_length": 200000, - "max_completion_tokens": 4096, - "is_moderated": true - }, - "per_request_limits": null - }, { "id": "anthropic/claude-2.1:beta", "name": "Anthropic: Claude v2.1 (self-moderated)", @@ -4873,6 +4825,54 @@ }, "per_request_limits": null }, + { + "id": "anthropic/claude-2:beta", + "name": "Anthropic: Claude v2 (self-moderated)", + "created": 1700611200, + "description": "Claude 2 delivers advancements in key capabilities for enterprises—including an industry-leading 200K token context window, significant reductions in rates of model hallucination, system prompts and a new beta feature: tool use.", + "context_length": 200000, + "architecture": { + "modality": "text->text", + "tokenizer": "Claude", + "instruct_type": null + }, + "pricing": { + "prompt": "0.000008", + "completion": "0.000024", + "image": "0", + "request": "0" + }, + "top_provider": { + "context_length": 200000, + "max_completion_tokens": 4096, + "is_moderated": false + }, + "per_request_limits": null + }, + { + "id": "anthropic/claude-2", + "name": "Anthropic: Claude v2", + "created": 1700611200, + "description": "Claude 2 delivers advancements in key capabilities for enterprises—including an industry-leading 200K token context window, significant reductions in rates of model hallucination, system prompts and a new beta feature: tool use.", + "context_length": 200000, + "architecture": { + "modality": "text->text", + "tokenizer": "Claude", + "instruct_type": null + }, + "pricing": { + "prompt": "0.000008", + "completion": "0.000024", + "image": "0", + "request": "0" + }, + "top_provider": { + "context_length": 200000, + "max_completion_tokens": 4096, + "is_moderated": true + }, + "per_request_limits": null + }, { "id": "teknium/openhermes-2.5-mistral-7b", "name": "OpenHermes 2.5 Mistral 7B", @@ -4993,30 +4993,6 @@ }, "per_request_limits": null }, - { - "id": "openai/gpt-3.5-turbo-1106", - "name": "OpenAI: GPT-3.5 Turbo 16k (older v1106)", - "created": 1699228800, - "description": "An older GPT-3.5 Turbo model with improved instruction following, JSON mode, reproducible outputs, parallel function calling, and more. Training data: up to Sep 2021.", - "context_length": 16385, - "architecture": { - "modality": "text->text", - "tokenizer": "GPT", - "instruct_type": null - }, - "pricing": { - "prompt": "0.000001", - "completion": "0.000002", - "image": "0", - "request": "0" - }, - "top_provider": { - "context_length": 16385, - "max_completion_tokens": 4096, - "is_moderated": true - }, - "per_request_limits": null - }, { "id": "openai/gpt-4-1106-preview", "name": "OpenAI: GPT-4 Turbo (older v1106)", @@ -5041,6 +5017,30 @@ }, "per_request_limits": null }, + { + "id": "openai/gpt-3.5-turbo-1106", + "name": "OpenAI: GPT-3.5 Turbo 16k (older v1106)", + "created": 1699228800, + "description": "An older GPT-3.5 Turbo model with improved instruction following, JSON mode, reproducible outputs, parallel function calling, and more. Training data: up to Sep 2021.", + "context_length": 16385, + "architecture": { + "modality": "text->text", + "tokenizer": "GPT", + "instruct_type": null + }, + "pricing": { + "prompt": "0.000001", + "completion": "0.000002", + "image": "0", + "request": "0" + }, + "top_provider": { + "context_length": 16385, + "max_completion_tokens": 4096, + "is_moderated": true + }, + "per_request_limits": null + }, { "id": "google/palm-2-chat-bison-32k", "name": "Google: PaLM 2 Chat 32k", @@ -5305,30 +5305,6 @@ }, "per_request_limits": null }, - { - "id": "mancer/weaver", - "name": "Mancer: Weaver (alpha)", - "created": 1690934400, - "description": "An attempt to recreate Claude-style verbosity, but don't expect the same level of coherence or memory. Meant for use in roleplay/narrative situations.", - "context_length": 8000, - "architecture": { - "modality": "text->text", - "tokenizer": "Llama2", - "instruct_type": "alpaca" - }, - "pricing": { - "prompt": "0.0000015", - "completion": "0.00000225", - "image": "0", - "request": "0" - }, - "top_provider": { - "context_length": 8000, - "max_completion_tokens": 1000, - "is_moderated": false - }, - "per_request_limits": null - }, { "id": "huggingfaceh4/zephyr-7b-beta:free", "name": "Hugging Face: Zephyr 7B (free)", @@ -5353,6 +5329,30 @@ }, "per_request_limits": null }, + { + "id": "mancer/weaver", + "name": "Mancer: Weaver (alpha)", + "created": 1690934400, + "description": "An attempt to recreate Claude-style verbosity, but don't expect the same level of coherence or memory. Meant for use in roleplay/narrative situations.", + "context_length": 8000, + "architecture": { + "modality": "text->text", + "tokenizer": "Llama2", + "instruct_type": "alpaca" + }, + "pricing": { + "prompt": "0.0000015", + "completion": "0.00000225", + "image": "0", + "request": "0" + }, + "top_provider": { + "context_length": 8000, + "max_completion_tokens": 1000, + "is_moderated": false + }, + "per_request_limits": null + }, { "id": "anthropic/claude-2.0:beta", "name": "Anthropic: Claude v2.0 (self-moderated)", @@ -5425,30 +5425,6 @@ }, "per_request_limits": null }, - { - "id": "google/palm-2-chat-bison", - "name": "Google: PaLM 2 Chat", - "created": 1689811200, - "description": "PaLM 2 is a language model by Google with improved multilingual, reasoning and coding capabilities.", - "context_length": 9216, - "architecture": { - "modality": "text->text", - "tokenizer": "PaLM", - "instruct_type": null - }, - "pricing": { - "prompt": "0.000001", - "completion": "0.000002", - "image": "0", - "request": "0" - }, - "top_provider": { - "context_length": 9216, - "max_completion_tokens": 1024, - "is_moderated": false - }, - "per_request_limits": null - }, { "id": "google/palm-2-codechat-bison", "name": "Google: PaLM 2 Code Chat", @@ -5473,6 +5449,30 @@ }, "per_request_limits": null }, + { + "id": "google/palm-2-chat-bison", + "name": "Google: PaLM 2 Chat", + "created": 1689811200, + "description": "PaLM 2 is a language model by Google with improved multilingual, reasoning and coding capabilities.", + "context_length": 9216, + "architecture": { + "modality": "text->text", + "tokenizer": "PaLM", + "instruct_type": null + }, + "pricing": { + "prompt": "0.000001", + "completion": "0.000002", + "image": "0", + "request": "0" + }, + "top_provider": { + "context_length": 9216, + "max_completion_tokens": 1024, + "is_moderated": false + }, + "per_request_limits": null + }, { "id": "gryphe/mythomax-l2-13b:free", "name": "MythoMax 13B (free)", @@ -5569,54 +5569,6 @@ }, "per_request_limits": null }, - { - "id": "openai/gpt-3.5-turbo", - "name": "OpenAI: GPT-3.5 Turbo", - "created": 1685232000, - "description": "GPT-3.5 Turbo is OpenAI's fastest model. It can understand and generate natural language or code, and is optimized for chat and traditional completion tasks.\n\nTraining data up to Sep 2021.", - "context_length": 16385, - "architecture": { - "modality": "text->text", - "tokenizer": "GPT", - "instruct_type": null - }, - "pricing": { - "prompt": "0.0000005", - "completion": "0.0000015", - "image": "0", - "request": "0" - }, - "top_provider": { - "context_length": 16385, - "max_completion_tokens": 4096, - "is_moderated": true - }, - "per_request_limits": null - }, - { - "id": "openai/gpt-3.5-turbo-0125", - "name": "OpenAI: GPT-3.5 Turbo 16k", - "created": 1685232000, - "description": "The latest GPT-3.5 Turbo model with improved instruction following, JSON mode, reproducible outputs, parallel function calling, and more. Training data: up to Sep 2021.\n\nThis version has a higher accuracy at responding in requested formats and a fix for a bug which caused a text encoding issue for non-English language function calls.", - "context_length": 16385, - "architecture": { - "modality": "text->text", - "tokenizer": "GPT", - "instruct_type": null - }, - "pricing": { - "prompt": "0.0000005", - "completion": "0.0000015", - "image": "0", - "request": "0" - }, - "top_provider": { - "context_length": 16385, - "max_completion_tokens": 4096, - "is_moderated": true - }, - "per_request_limits": null - }, { "id": "openai/gpt-4", "name": "OpenAI: GPT-4", @@ -5664,6 +5616,54 @@ "is_moderated": true }, "per_request_limits": null + }, + { + "id": "openai/gpt-3.5-turbo-0125", + "name": "OpenAI: GPT-3.5 Turbo 16k", + "created": 1685232000, + "description": "The latest GPT-3.5 Turbo model with improved instruction following, JSON mode, reproducible outputs, parallel function calling, and more. Training data: up to Sep 2021.\n\nThis version has a higher accuracy at responding in requested formats and a fix for a bug which caused a text encoding issue for non-English language function calls.", + "context_length": 16385, + "architecture": { + "modality": "text->text", + "tokenizer": "GPT", + "instruct_type": null + }, + "pricing": { + "prompt": "0.0000005", + "completion": "0.0000015", + "image": "0", + "request": "0" + }, + "top_provider": { + "context_length": 16385, + "max_completion_tokens": 4096, + "is_moderated": true + }, + "per_request_limits": null + }, + { + "id": "openai/gpt-3.5-turbo", + "name": "OpenAI: GPT-3.5 Turbo", + "created": 1685232000, + "description": "GPT-3.5 Turbo is OpenAI's fastest model. It can understand and generate natural language or code, and is optimized for chat and traditional completion tasks.\n\nTraining data up to Sep 2021.", + "context_length": 16385, + "architecture": { + "modality": "text->text", + "tokenizer": "GPT", + "instruct_type": null + }, + "pricing": { + "prompt": "0.0000005", + "completion": "0.0000015", + "image": "0", + "request": "0" + }, + "top_provider": { + "context_length": 16385, + "max_completion_tokens": 4096, + "is_moderated": true + }, + "per_request_limits": null } ] } \ No newline at end of file