diff --git a/packages/osr-code-bot/README.md b/packages/osr-code-bot/README.md index 57e8492..e9eb307 100644 --- a/packages/osr-code-bot/README.md +++ b/packages/osr-code-bot/README.md @@ -130,6 +130,7 @@ When creating content ### Initialize folder ```kbot init``` + # Command Line Parameters This document describes all available command line parameters. diff --git a/packages/osr-code-bot/dist/README.md b/packages/osr-code-bot/dist/README.md index 6c1614f..e9eb307 100644 --- a/packages/osr-code-bot/dist/README.md +++ b/packages/osr-code-bot/dist/README.md @@ -1,4 +1,4 @@ -# @plastichub/code-bot +# @plastichub/kbot AI-powered command-line tool for code modifications and project management that supports multiple AI models and routers. @@ -42,7 +42,7 @@ KBot supports both OpenRouter and OpenAI APIs. You'll need at least one of these ### Installation using Node NPM package manager ```bash -npm install -g @plastichub/code-bot +npm install -g @plastichub/kbot ``` ## Configuration @@ -78,6 +78,9 @@ Create configuration at `$HOME/.osr/.config.json` (or export OSR_CONFIG with pat "serpapi": { "key": "your SerpAPI key (optional, used for web searches(places, google maps))" }, + "deepseek": { + "key": "your SerpAPI key (optional, used for web searches(places, google maps))" + }, } ``` @@ -106,6 +109,28 @@ When creating content - always add links - when sending emails, always add 'Best regards, [Your Name]' ``` +## Commands + +### Prompt + +```kbot "create Astro minimal boilerplate, use starlight theme. Install dependencies via NPM tool"``` + +### Fetch latest models + +```kbot fetch``` + +### Print examples + +```kbot examples``` + +### Print extended help + +```kbot help-md``` + +### Initialize folder + +```kbot init``` + # Command Line Parameters This document describes all available command line parameters. @@ -117,11 +142,18 @@ This document describes all available command line parameters. | `path` | Target directory | `.` | No | | `prompt` | The prompt. Supports file paths and environment variables | `./prompt.md` | No | | `output` | Optional output path for modified files (Tool mode only) | - | No | -| `dst` | Optional destination path for the result, will substitute ${MODEL} and ${ROUTER} in the path. Used for "completion" mode | - | No | +| `dst` | Optional destination path for the result, will substitute ${MODEL} and ${ROUTER} in the path. | - | No | | `model` | AI model to use for processing | `anthropic/claude-3.5-sonnet` | No | | `router` | Router to use: openai or openrouter | `openrouter` | No | | `mode` | Chat completion mode: "completion" (without tools) or "tools" | `tools` | No | +## Advanced Parameters + +| Parameter | Description | Default | Required | +|-----------|-------------|---------|----------| +| `each` | Target directory | `.` | No | +| `dry` | Dry run - only write out parameters without making API calls | `false` | No | + ## File Selection & Tools | Parameter | Description | Default | Required | @@ -146,35 +178,7 @@ This document describes all available command line parameters. | `logLevel` | Logging level for the application (0-4) | `2` | No | | `logs` | Logging directory | `./.kbot` | No | | `dump` | Create a script | - | No | -| `dry` | Dry run - only write out parameters without making API calls | `false` | No | -## Examples - -```bash -# Basic usage with default parameters -kbot --prompt="What are the changes needed?" - -# Specify model and router -kbot --model="gpt-4" --router="openai" --prompt="Analyze this code" - -# Process specific files -kbot --include="src/*.ts" --include="package.json" --prompt="Check for security issues" - -# Dry run with custom logging -kbot --dry=true --logLevel=4 --prompt="Test run" -``` - -## Environment Variables - -Many path-based parameters support environment variables using the `${VARIABLE}` syntax: - -- `${POLYMECH-ROOT}` - Root directory for Polymech -- `${OSR-ROOT}` - Root directory for OSR - -Example: -```bash -kbot --profile="${POLYMECH-ROOT}/custom-profile.json" -``` # Working on Larger Directories diff --git a/packages/osr-code-bot/dist/package-lock.json b/packages/osr-code-bot/dist/package-lock.json index 33b58c5..178752e 100644 --- a/packages/osr-code-bot/dist/package-lock.json +++ b/packages/osr-code-bot/dist/package-lock.json @@ -1,12 +1,12 @@ { "name": "@plastichub/kbot", - "version": "1.1.9", + "version": "1.1.12", "lockfileVersion": 3, "requires": true, "packages": { "": { "name": "@plastichub/kbot", - "version": "1.1.9", + "version": "1.1.12", "license": "ISC", "dependencies": { "node-emoji": "^2.2.0" diff --git a/packages/osr-code-bot/dist/package.json b/packages/osr-code-bot/dist/package.json index a6a889f..4832020 100644 --- a/packages/osr-code-bot/dist/package.json +++ b/packages/osr-code-bot/dist/package.json @@ -1,6 +1,6 @@ { "name": "@plastichub/kbot", - "version": "1.1.9", + "version": "1.1.12", "main": "main_node.js", "author": "", "license": "ISC", diff --git a/packages/osr-code-bot/dist/stats/statistics.html b/packages/osr-code-bot/dist/stats/statistics.html index bd9cd46..70421ef 100644 --- a/packages/osr-code-bot/dist/stats/statistics.html +++ b/packages/osr-code-bot/dist/stats/statistics.html @@ -188,7 +188,7 @@ footer h2 {
- + diff --git a/packages/osr-code-bot/models/data/openai_models.json b/packages/osr-code-bot/models/data/openai_models.json index be68856..1804a4e 100644 --- a/packages/osr-code-bot/models/data/openai_models.json +++ b/packages/osr-code-bot/models/data/openai_models.json @@ -1,30 +1,6 @@ { - "timestamp": 1738438200064, + "timestamp": 1739276098767, "models": [ - { - "id": "gpt-4o-audio-preview-2024-10-01", - "object": "model", - "created": 1727389042, - "owned_by": "system" - }, - { - "id": "gpt-4o-mini-audio-preview", - "object": "model", - "created": 1734387424, - "owned_by": "system" - }, - { - "id": "gpt-4o-mini-audio-preview-2024-12-17", - "object": "model", - "created": 1734115920, - "owned_by": "system" - }, - { - "id": "gpt-4o-mini-realtime-preview", - "object": "model", - "created": 1734387380, - "owned_by": "system" - }, { "id": "dall-e-2", "object": "model", @@ -32,23 +8,17 @@ "owned_by": "system" }, { - "id": "gpt-4o-mini", + "id": "o1-mini-2024-09-12", "object": "model", - "created": 1721172741, + "created": 1725648979, "owned_by": "system" }, { - "id": "gpt-4o-mini-2024-07-18", + "id": "gpt-4o-mini-realtime-preview-2024-12-17", "object": "model", - "created": 1721172717, + "created": 1734112601, "owned_by": "system" }, - { - "id": "gpt-3.5-turbo", - "object": "model", - "created": 1677610602, - "owned_by": "openai" - }, { "id": "o1-preview-2024-09-12", "object": "model", @@ -56,9 +26,9 @@ "owned_by": "system" }, { - "id": "gpt-3.5-turbo-0125", + "id": "o1-mini", "object": "model", - "created": 1706048358, + "created": 1725649008, "owned_by": "system" }, { @@ -68,15 +38,21 @@ "owned_by": "system" }, { - "id": "text-embedding-ada-002", + "id": "gpt-4o-mini-realtime-preview", "object": "model", - "created": 1671217299, - "owned_by": "openai-internal" + "created": 1734387380, + "owned_by": "system" }, { - "id": "o1-mini-2024-09-12", + "id": "gpt-4o-mini-audio-preview-2024-12-17", "object": "model", - "created": 1725648979, + "created": 1734115920, + "owned_by": "system" + }, + { + "id": "chatgpt-4o-latest", + "object": "model", + "created": 1723515131, "owned_by": "system" }, { @@ -86,16 +62,22 @@ "owned_by": "openai-internal" }, { - "id": "dall-e-3", + "id": "gpt-4-turbo", "object": "model", - "created": 1698785189, + "created": 1712361441, "owned_by": "system" }, { - "id": "gpt-4", + "id": "gpt-4-turbo-preview", "object": "model", - "created": 1687882411, - "owned_by": "openai" + "created": 1706037777, + "owned_by": "system" + }, + { + "id": "gpt-4o-mini-audio-preview", + "object": "model", + "created": 1734387424, + "owned_by": "system" }, { "id": "babbage-002", @@ -103,12 +85,6 @@ "created": 1692634615, "owned_by": "system" }, - { - "id": "gpt-4-1106-preview", - "object": "model", - "created": 1698957206, - "owned_by": "system" - }, { "id": "omni-moderation-latest", "object": "model", @@ -128,15 +104,27 @@ "owned_by": "system" }, { - "id": "gpt-4o-2024-05-13", + "id": "gpt-4o-audio-preview-2024-12-17", "object": "model", - "created": 1715368132, + "created": 1734034239, "owned_by": "system" }, { - "id": "gpt-4o-2024-08-06", + "id": "gpt-4o-audio-preview", "object": "model", - "created": 1722814719, + "created": 1727460443, + "owned_by": "system" + }, + { + "id": "text-embedding-3-large", + "object": "model", + "created": 1705953180, + "owned_by": "system" + }, + { + "id": "gpt-4o-2024-05-13", + "object": "model", + "created": 1715368132, "owned_by": "system" }, { @@ -145,36 +133,6 @@ "created": 1699046015, "owned_by": "system" }, - { - "id": "gpt-4o", - "object": "model", - "created": 1715367049, - "owned_by": "system" - }, - { - "id": "gpt-3.5-turbo-instruct", - "object": "model", - "created": 1692901427, - "owned_by": "system" - }, - { - "id": "gpt-3.5-turbo-instruct-0914", - "object": "model", - "created": 1694122472, - "owned_by": "system" - }, - { - "id": "gpt-4-0613", - "object": "model", - "created": 1686588896, - "owned_by": "openai" - }, - { - "id": "chatgpt-4o-latest", - "object": "model", - "created": 1723515131, - "owned_by": "system" - }, { "id": "gpt-4o-2024-11-20", "object": "model", @@ -182,9 +140,9 @@ "owned_by": "system" }, { - "id": "gpt-4-turbo", + "id": "gpt-4-turbo-2024-04-09", "object": "model", - "created": 1712361441, + "created": 1712601677, "owned_by": "system" }, { @@ -193,6 +151,12 @@ "created": 1681940951, "owned_by": "openai-internal" }, + { + "id": "gpt-3.5-turbo-16k", + "object": "model", + "created": 1683758102, + "owned_by": "openai-internal" + }, { "id": "tts-1-1106", "object": "model", @@ -205,36 +169,6 @@ "created": 1692634301, "owned_by": "system" }, - { - "id": "gpt-4-turbo-preview", - "object": "model", - "created": 1706037777, - "owned_by": "system" - }, - { - "id": "gpt-4o-mini-realtime-preview-2024-12-17", - "object": "model", - "created": 1734112601, - "owned_by": "system" - }, - { - "id": "gpt-4o-audio-preview", - "object": "model", - "created": 1727460443, - "owned_by": "system" - }, - { - "id": "text-embedding-3-small", - "object": "model", - "created": 1705948997, - "owned_by": "system" - }, - { - "id": "gpt-4-turbo-2024-04-09", - "object": "model", - "created": 1712601677, - "owned_by": "system" - }, { "id": "gpt-3.5-turbo-1106", "object": "model", @@ -242,27 +176,33 @@ "owned_by": "system" }, { - "id": "gpt-4o-realtime-preview-2024-10-01", + "id": "gpt-4o-mini", "object": "model", - "created": 1727131766, + "created": 1721172741, "owned_by": "system" }, { - "id": "gpt-3.5-turbo-16k", + "id": "gpt-3.5-turbo-instruct", "object": "model", - "created": 1683758102, - "owned_by": "openai-internal" - }, - { - "id": "gpt-4o-audio-preview-2024-12-17", - "object": "model", - "created": 1734034239, + "created": 1692901427, "owned_by": "system" }, { - "id": "text-embedding-3-large", + "id": "dall-e-3", "object": "model", - "created": 1705953180, + "created": 1698785189, + "owned_by": "system" + }, + { + "id": "gpt-3.5-turbo-instruct-0914", + "object": "model", + "created": 1694122472, + "owned_by": "system" + }, + { + "id": "gpt-3.5-turbo-0125", + "object": "model", + "created": 1706048358, "owned_by": "system" }, { @@ -271,12 +211,30 @@ "created": 1733945430, "owned_by": "system" }, + { + "id": "gpt-3.5-turbo", + "object": "model", + "created": 1677610602, + "owned_by": "openai" + }, { "id": "gpt-4o-realtime-preview", "object": "model", "created": 1727659998, "owned_by": "system" }, + { + "id": "gpt-4o-mini-2024-07-18", + "object": "model", + "created": 1721172717, + "owned_by": "system" + }, + { + "id": "text-embedding-3-small", + "object": "model", + "created": 1705948997, + "owned_by": "system" + }, { "id": "gpt-4-0125-preview", "object": "model", @@ -284,9 +242,51 @@ "owned_by": "system" }, { - "id": "o1-mini", + "id": "gpt-4", "object": "model", - "created": 1725649008, + "created": 1687882411, + "owned_by": "openai" + }, + { + "id": "text-embedding-ada-002", + "object": "model", + "created": 1671217299, + "owned_by": "openai-internal" + }, + { + "id": "gpt-4-1106-preview", + "object": "model", + "created": 1698957206, + "owned_by": "system" + }, + { + "id": "gpt-4o-audio-preview-2024-10-01", + "object": "model", + "created": 1727389042, + "owned_by": "system" + }, + { + "id": "gpt-4o-2024-08-06", + "object": "model", + "created": 1722814719, + "owned_by": "system" + }, + { + "id": "gpt-4o", + "object": "model", + "created": 1715367049, + "owned_by": "system" + }, + { + "id": "gpt-4-0613", + "object": "model", + "created": 1686588896, + "owned_by": "openai" + }, + { + "id": "gpt-4o-realtime-preview-2024-10-01", + "object": "model", + "created": 1727131766, "owned_by": "system" } ] diff --git a/packages/osr-code-bot/models/data/openrouter_models.json b/packages/osr-code-bot/models/data/openrouter_models.json index a6c70ce..47562ad 100644 --- a/packages/osr-code-bot/models/data/openrouter_models.json +++ b/packages/osr-code-bot/models/data/openrouter_models.json @@ -1,11 +1,227 @@ { - "timestamp": 1738438200259, + "timestamp": 1739276098996, "models": [ { - "id": "qwen/qwen-turbo-2024-11-01", + "id": "allenai/llama-3.1-tulu-3-405b", + "name": "Llama 3.1 Tulu 3 405b", + "created": 1739053421, + "description": "Tülu 3 405B is the largest model in the Tülu 3 family, applying fully open post-training recipes at a 405B parameter scale. Built on the Llama 3.1 405B base, it leverages Reinforcement Learning with Verifiable Rewards (RLVR) to enhance instruction following, MATH, GSM8K, and IFEval performance. As part of Tülu 3’s fully open-source approach, it offers state-of-the-art capabilities while surpassing prior open-weight models like Llama 3.1 405B Instruct and Nous Hermes 3 405B on multiple benchmarks. To read more, [click here.](https://allenai.org/blog/tulu-3-405B)", + "context_length": 16000, + "architecture": { + "modality": "text->text", + "tokenizer": "Other", + "instruct_type": null + }, + "pricing": { + "prompt": "0.000005", + "completion": "0.00001", + "image": "0", + "request": "0" + }, + "top_provider": { + "context_length": 16000, + "max_completion_tokens": null, + "is_moderated": false + }, + "per_request_limits": null + }, + { + "id": "deepseek/deepseek-r1-distill-llama-8b", + "name": "DeepSeek: R1 Distill Llama 8B", + "created": 1738937718, + "description": "DeepSeek R1 Distill Llama 8B is a distilled large language model based on [Llama-3.1-8B-Instruct](/meta-llama/llama-3.1-8b-instruct), using outputs from [DeepSeek R1](/deepseek/deepseek-r1). The model combines advanced distillation techniques to achieve high performance across multiple benchmarks, including:\n\n- AIME 2024 pass@1: 50.4\n- MATH-500 pass@1: 89.1\n- CodeForces Rating: 1205\n\nThe model leverages fine-tuning from DeepSeek R1's outputs, enabling competitive performance comparable to larger frontier models.\n\nHugging Face: \n- [Llama-3.1-8B](https://huggingface.co/meta-llama/Llama-3.1-8B) \n- [DeepSeek-R1-Distill-Llama-8B](https://huggingface.co/deepseek-ai/DeepSeek-R1-Distill-Llama-8B) |", + "context_length": 32000, + "architecture": { + "modality": "text->text", + "tokenizer": "Llama3", + "instruct_type": null + }, + "pricing": { + "prompt": "0.00000004", + "completion": "0.00000004", + "image": "0", + "request": "0" + }, + "top_provider": { + "context_length": 32000, + "max_completion_tokens": 32000, + "is_moderated": false + }, + "per_request_limits": null + }, + { + "id": "google/gemini-2.0-flash-001", + "name": "Google: Gemini Flash 2.0", + "created": 1738769413, + "description": "Gemini Flash 2.0 offers a significantly faster time to first token (TTFT) compared to [Gemini Flash 1.5](/google/gemini-flash-1.5), while maintaining quality on par with larger models like [Gemini Pro 1.5](/google/gemini-pro-1.5). It introduces notable enhancements in multimodal understanding, coding capabilities, complex instruction following, and function calling. These advancements come together to deliver more seamless and robust agentic experiences.", + "context_length": 1000000, + "architecture": { + "modality": "text+image->text", + "tokenizer": "Gemini", + "instruct_type": null + }, + "pricing": { + "prompt": "0.0000001", + "completion": "0.0000004", + "image": "0", + "request": "0" + }, + "top_provider": { + "context_length": 1000000, + "max_completion_tokens": 8192, + "is_moderated": false + }, + "per_request_limits": null + }, + { + "id": "google/gemini-2.0-flash-lite-preview-02-05:free", + "name": "Google: Gemini Flash Lite 2.0 Preview (free)", + "created": 1738768262, + "description": "Gemini Flash Lite 2.0 offers a significantly faster time to first token (TTFT) compared to [Gemini Flash 1.5](google/gemini-flash-1.5), while maintaining quality on par with larger models like [Gemini Pro 1.5](google/gemini-pro-1.5). Because it's currently in preview, it will be **heavily rate-limited** by Google. This model will move from free to paid pending a general rollout on February 24th, at $0.075 / $0.30 per million input / ouput tokens respectively.", + "context_length": 1000000, + "architecture": { + "modality": "text+image->text", + "tokenizer": "Gemini", + "instruct_type": null + }, + "pricing": { + "prompt": "0", + "completion": "0", + "image": "0", + "request": "0" + }, + "top_provider": { + "context_length": 1000000, + "max_completion_tokens": 8192, + "is_moderated": false + }, + "per_request_limits": null + }, + { + "id": "google/gemini-2.0-pro-exp-02-05:free", + "name": "Google: Gemini Pro 2.0 Experimental (free)", + "created": 1738768044, + "description": "Gemini 2.0 Pro Experimental is a bleeding-edge version of the Gemini 2.0 Pro model. Because it's currently experimental, it will be **heavily rate-limited** by Google.\n\nUsage of Gemini is subject to Google's [Gemini Terms of Use](https://ai.google.dev/terms).\n\n#multimodal", + "context_length": 2000000, + "architecture": { + "modality": "text+image->text", + "tokenizer": "Gemini", + "instruct_type": null + }, + "pricing": { + "prompt": "0", + "completion": "0", + "image": "0", + "request": "0" + }, + "top_provider": { + "context_length": 2000000, + "max_completion_tokens": 8192, + "is_moderated": false + }, + "per_request_limits": null + }, + { + "id": "qwen/qwen-vl-plus:free", + "name": "Qwen: Qwen VL Plus (free)", + "created": 1738731255, + "description": "Qwen's Enhanced Large Visual Language Model. Significantly upgraded for detailed recognition capabilities and text recognition abilities, supporting ultra-high pixel resolutions up to millions of pixels and extreme aspect ratios for image input. It delivers significant performance across a broad range of visual tasks.\n", + "context_length": 7500, + "architecture": { + "modality": "text+image->text", + "tokenizer": "Qwen", + "instruct_type": null + }, + "pricing": { + "prompt": "0", + "completion": "0", + "image": "0", + "request": "0" + }, + "top_provider": { + "context_length": 7500, + "max_completion_tokens": 1500, + "is_moderated": false + }, + "per_request_limits": null + }, + { + "id": "aion-labs/aion-1.0", + "name": "AionLabs: Aion-1.0", + "created": 1738697557, + "description": "Aion-1.0 is a multi-model system designed for high performance across various tasks, including reasoning and coding. It is built on DeepSeek-R1, augmented with additional models and techniques such as Tree of Thoughts (ToT) and Mixture of Experts (MoE). It is Aion Lab's most powerful reasoning model.", + "context_length": 16384, + "architecture": { + "modality": "text->text", + "tokenizer": "Other", + "instruct_type": null + }, + "pricing": { + "prompt": "0.000004", + "completion": "0.000012", + "image": "0", + "request": "0" + }, + "top_provider": { + "context_length": 16384, + "max_completion_tokens": 16384, + "is_moderated": false + }, + "per_request_limits": null + }, + { + "id": "aion-labs/aion-1.0-mini", + "name": "AionLabs: Aion-1.0-Mini", + "created": 1738697107, + "description": "Aion-1.0-Mini 32B parameter model is a distilled version of the DeepSeek-R1 model, designed for strong performance in reasoning domains such as mathematics, coding, and logic. It is a modified variant of a FuseAI model that outperforms R1-Distill-Qwen-32B and R1-Distill-Llama-70B, with benchmark results available on its [Hugging Face page](https://huggingface.co/FuseAI/FuseO1-DeepSeekR1-QwQ-SkyT1-32B-Preview), independently replicated for verification.", + "context_length": 16384, + "architecture": { + "modality": "text->text", + "tokenizer": "Other", + "instruct_type": null + }, + "pricing": { + "prompt": "0.0000008", + "completion": "0.0000024", + "image": "0", + "request": "0" + }, + "top_provider": { + "context_length": 16384, + "max_completion_tokens": 16384, + "is_moderated": false + }, + "per_request_limits": null + }, + { + "id": "aion-labs/aion-rp-llama-3.1-8b", + "name": "AionLabs: Aion-RP 1.0 (8B)", + "created": 1738696718, + "description": "Aion-RP-Llama-3.1-8B ranks the highest in the character evaluation portion of the RPBench-Auto benchmark, a roleplaying-specific variant of Arena-Hard-Auto, where LLMs evaluate each other’s responses. It is a fine-tuned base model rather than an instruct model, designed to produce more natural and varied writing.", + "context_length": 32768, + "architecture": { + "modality": "text->text", + "tokenizer": "Other", + "instruct_type": null + }, + "pricing": { + "prompt": "0.0000002", + "completion": "0.0000002", + "image": "0", + "request": "0" + }, + "top_provider": { + "context_length": 32768, + "max_completion_tokens": 32768, + "is_moderated": false + }, + "per_request_limits": null + }, + { + "id": "qwen/qwen-turbo", "name": "Qwen: Qwen-Turbo", "created": 1738410974, - "description": "Qwen-Turbo is a 1M context model that provides fast speed and low cost, suitable for simple tasks.", + "description": "Qwen-Turbo, based on Qwen2.5, is a 1M context model that provides fast speed and low cost, suitable for simple tasks.", "context_length": 1000000, "architecture": { "modality": "text->text", @@ -25,11 +241,35 @@ }, "per_request_limits": null }, + { + "id": "qwen/qwen2.5-vl-72b-instruct:free", + "name": "Qwen: Qwen2.5 VL 72B Instruct (free)", + "created": 1738410311, + "description": "Qwen2.5-VL is proficient in recognizing common objects such as flowers, birds, fish, and insects. It is also highly capable of analyzing texts, charts, icons, graphics, and layouts within images.", + "context_length": 131072, + "architecture": { + "modality": "text+image->text", + "tokenizer": "Qwen", + "instruct_type": null + }, + "pricing": { + "prompt": "0", + "completion": "0", + "image": "0", + "request": "0" + }, + "top_provider": { + "context_length": 131072, + "max_completion_tokens": 2048, + "is_moderated": false + }, + "per_request_limits": null + }, { "id": "qwen/qwen-plus", "name": "Qwen: Qwen-Plus", "created": 1738409840, - "description": "Qwen-Plus is a 131K context model with a balanced performance, speed, and cost combination.", + "description": "Qwen-Plus, based on the Qwen2.5 foundation model, is a 131K context model with a balanced performance, speed, and cost combination.", "context_length": 131072, "architecture": { "modality": "text->text", @@ -53,7 +293,7 @@ "id": "qwen/qwen-max", "name": "Qwen: Qwen-Max ", "created": 1738402289, - "description": "Qwen-Max, with 32K context, provides the best inference performance among [Qwen models](/qwen), especially for complex multi-step tasks.", + "description": "Qwen-Max, based on Qwen2.5, provides the best inference performance among [Qwen models](/qwen), especially for complex multi-step tasks. It's a large-scale MoE model that has been pretrained on over 20 trillion tokens and further post-trained with curated Supervised Fine-Tuning (SFT) and Reinforcement Learning from Human Feedback (RLHF) methodologies. The parameter count is unknown.", "context_length": 32768, "architecture": { "modality": "text->text", @@ -99,7 +339,7 @@ }, { "id": "deepseek/deepseek-r1-distill-qwen-1.5b", - "name": "Deepseek: Deepseek R1 Distill Qwen 1.5B", + "name": "DeepSeek: R1 Distill Qwen 1.5B", "created": 1738328067, "description": "DeepSeek R1 Distill Qwen 1.5B is a distilled large language model based on [Qwen 2.5 Math 1.5B](https://huggingface.co/Qwen/Qwen2.5-Math-1.5B), using outputs from [DeepSeek R1](/deepseek/deepseek-r1). It's a very small and efficient model which outperforms [GPT 4o 0513](/openai/gpt-4o-2024-05-13) on Math Benchmarks.\n\nOther benchmark results include:\n\n- AIME 2024 pass@1: 28.9\n- AIME 2024 cons@64: 52.7\n- MATH-500 pass@1: 83.9\n\nThe model leverages fine-tuning from DeepSeek R1's outputs, enabling competitive performance comparable to larger frontier models.", "context_length": 131072, @@ -116,7 +356,7 @@ }, "top_provider": { "context_length": 131072, - "max_completion_tokens": 2048, + "max_completion_tokens": 32768, "is_moderated": false }, "per_request_limits": null @@ -147,7 +387,7 @@ }, { "id": "deepseek/deepseek-r1-distill-qwen-32b", - "name": "DeepSeek: DeepSeek R1 Distill Qwen 32B", + "name": "DeepSeek: R1 Distill Qwen 32B", "created": 1738194830, "description": "DeepSeek R1 Distill Qwen 32B is a distilled large language model based on [Qwen 2.5 32B](https://huggingface.co/Qwen/Qwen2.5-32B), using outputs from [DeepSeek R1](/deepseek/deepseek-r1). It outperforms OpenAI's o1-mini across various benchmarks, achieving new state-of-the-art results for dense models.\n\nOther benchmark results include:\n\n- AIME 2024 pass@1: 72.6\n- MATH-500 pass@1: 94.3\n- CodeForces Rating: 1691\n\nThe model leverages fine-tuning from DeepSeek R1's outputs, enabling competitive performance comparable to larger frontier models.", "context_length": 131072, @@ -171,24 +411,24 @@ }, { "id": "deepseek/deepseek-r1-distill-qwen-14b", - "name": "DeepSeek: DeepSeek R1 Distill Qwen 14B", + "name": "DeepSeek: R1 Distill Qwen 14B", "created": 1738193940, "description": "DeepSeek R1 Distill Qwen 14B is a distilled large language model based on [Qwen 2.5 14B](https://huggingface.co/deepseek-ai/DeepSeek-R1-Distill-Qwen-14B), using outputs from [DeepSeek R1](/deepseek/deepseek-r1). It outperforms OpenAI's o1-mini across various benchmarks, achieving new state-of-the-art results for dense models.\n\nOther benchmark results include:\n\n- AIME 2024 pass@1: 69.7\n- MATH-500 pass@1: 93.9\n- CodeForces Rating: 1481\n\nThe model leverages fine-tuning from DeepSeek R1's outputs, enabling competitive performance comparable to larger frontier models.", - "context_length": 131072, + "context_length": 64000, "architecture": { "modality": "text->text", "tokenizer": "Qwen", "instruct_type": null }, "pricing": { - "prompt": "0.0000016", - "completion": "0.0000016", + "prompt": "0.00000015", + "completion": "0.00000015", "image": "0", "request": "0" }, "top_provider": { - "context_length": 131072, - "max_completion_tokens": 2048, + "context_length": 64000, + "max_completion_tokens": 64000, "is_moderated": false }, "per_request_limits": null @@ -289,9 +529,33 @@ }, "per_request_limits": null }, + { + "id": "deepseek/deepseek-r1-distill-llama-70b:free", + "name": "DeepSeek: R1 Distill Llama 70B (free)", + "created": 1737663169, + "description": "DeepSeek R1 Distill Llama 70B is a distilled large language model based on [Llama-3.3-70B-Instruct](/meta-llama/llama-3.3-70b-instruct), using outputs from [DeepSeek R1](/deepseek/deepseek-r1). The model combines advanced distillation techniques to achieve high performance across multiple benchmarks, including:\n\n- AIME 2024 pass@1: 70.0\n- MATH-500 pass@1: 94.5\n- CodeForces Rating: 1633\n\nThe model leverages fine-tuning from DeepSeek R1's outputs, enabling competitive performance comparable to larger frontier models.", + "context_length": 8192, + "architecture": { + "modality": "text->text", + "tokenizer": "Llama3", + "instruct_type": null + }, + "pricing": { + "prompt": "0", + "completion": "0", + "image": "0", + "request": "0" + }, + "top_provider": { + "context_length": 8192, + "max_completion_tokens": 32768, + "is_moderated": false + }, + "per_request_limits": null + }, { "id": "deepseek/deepseek-r1-distill-llama-70b", - "name": "DeepSeek: DeepSeek R1 Distill Llama 70B", + "name": "DeepSeek: R1 Distill Llama 70B", "created": 1737663169, "description": "DeepSeek R1 Distill Llama 70B is a distilled large language model based on [Llama-3.3-70B-Instruct](/meta-llama/llama-3.3-70b-instruct), using outputs from [DeepSeek R1](/deepseek/deepseek-r1). The model combines advanced distillation techniques to achieve high performance across multiple benchmarks, including:\n\n- AIME 2024 pass@1: 70.0\n- MATH-500 pass@1: 94.5\n- CodeForces Rating: 1633\n\nThe model leverages fine-tuning from DeepSeek R1's outputs, enabling competitive performance comparable to larger frontier models.", "context_length": 131072, @@ -339,10 +603,10 @@ }, { "id": "deepseek/deepseek-r1:free", - "name": "DeepSeek: DeepSeek R1 (free)", + "name": "DeepSeek: R1 (free)", "created": 1737381095, "description": "DeepSeek R1 is here: Performance on par with [OpenAI o1](/openai/o1), but open-sourced and with fully open reasoning tokens. It's 671B parameters in size, with 37B active in an inference pass.\n\nFully open-source model & [technical report](https://api-docs.deepseek.com/news/news250120).\n\nMIT licensed: Distill & commercialize freely!", - "context_length": 128000, + "context_length": 163840, "architecture": { "modality": "text->text", "tokenizer": "DeepSeek", @@ -355,56 +619,32 @@ "request": "0" }, "top_provider": { - "context_length": 128000, - "max_completion_tokens": 4096, + "context_length": 163840, + "max_completion_tokens": null, "is_moderated": false }, "per_request_limits": null }, { "id": "deepseek/deepseek-r1", - "name": "DeepSeek: DeepSeek R1", + "name": "DeepSeek: R1", "created": 1737381095, "description": "DeepSeek R1 is here: Performance on par with [OpenAI o1](/openai/o1), but open-sourced and with fully open reasoning tokens. It's 671B parameters in size, with 37B active in an inference pass.\n\nFully open-source model & [technical report](https://api-docs.deepseek.com/news/news250120).\n\nMIT licensed: Distill & commercialize freely!", - "context_length": 16000, + "context_length": 128000, "architecture": { "modality": "text->text", "tokenizer": "DeepSeek", "instruct_type": null }, "pricing": { - "prompt": "0.00000075", + "prompt": "0.0000008", "completion": "0.0000024", "image": "0", "request": "0" }, "top_provider": { - "context_length": 16000, - "max_completion_tokens": 8192, - "is_moderated": false - }, - "per_request_limits": null - }, - { - "id": "deepseek/deepseek-r1:nitro", - "name": "DeepSeek: DeepSeek R1 (nitro)", - "created": 1737381095, - "description": "DeepSeek R1 is here: Performance on par with [OpenAI o1](/openai/o1), but open-sourced and with fully open reasoning tokens. It's 671B parameters in size, with 37B active in an inference pass.\n\nFully open-source model & [technical report](https://api-docs.deepseek.com/news/news250120).\n\nMIT licensed: Distill & commercialize freely!", - "context_length": 163840, - "architecture": { - "modality": "text->text", - "tokenizer": "DeepSeek", - "instruct_type": null - }, - "pricing": { - "prompt": "0.000007", - "completion": "0.000007", - "image": "0", - "request": "0" - }, - "top_provider": { - "context_length": 163840, - "max_completion_tokens": 32768, + "context_length": 128000, + "max_completion_tokens": null, "is_moderated": false }, "per_request_limits": null @@ -529,12 +769,36 @@ }, "per_request_limits": null }, + { + "id": "deepseek/deepseek-chat:free", + "name": "DeepSeek: DeepSeek V3 (free)", + "created": 1735241320, + "description": "DeepSeek-V3 is the latest model from the DeepSeek team, building upon the instruction following and coding abilities of the previous versions. Pre-trained on nearly 15 trillion tokens, the reported evaluations reveal that the model outperforms other open-source models and rivals leading closed-source models.\n\nFor model details, please visit [the DeepSeek-V3 repo](https://github.com/deepseek-ai/DeepSeek-V3) for more information, or see the [launch announcement](https://api-docs.deepseek.com/news/news1226).", + "context_length": 128000, + "architecture": { + "modality": "text->text", + "tokenizer": "DeepSeek", + "instruct_type": null + }, + "pricing": { + "prompt": "0", + "completion": "0", + "image": "0", + "request": "0" + }, + "top_provider": { + "context_length": 128000, + "max_completion_tokens": null, + "is_moderated": false + }, + "per_request_limits": null + }, { "id": "deepseek/deepseek-chat", "name": "DeepSeek: DeepSeek V3", "created": 1735241320, "description": "DeepSeek-V3 is the latest model from the DeepSeek team, building upon the instruction following and coding abilities of the previous versions. Pre-trained on nearly 15 trillion tokens, the reported evaluations reveal that the model outperforms other open-source models and rivals leading closed-source models.\n\nFor model details, please visit [the DeepSeek-V3 repo](https://github.com/deepseek-ai/DeepSeek-V3) for more information, or see the [launch announcement](https://api-docs.deepseek.com/news/news1226).", - "context_length": 16000, + "context_length": 32768, "architecture": { "modality": "text->text", "tokenizer": "DeepSeek", @@ -547,7 +811,7 @@ "request": "0" }, "top_provider": { - "context_length": 16000, + "context_length": 32768, "max_completion_tokens": null, "is_moderated": false }, @@ -558,7 +822,7 @@ "name": "Qwen: QvQ 72B Preview", "created": 1735088567, "description": "QVQ-72B-Preview is an experimental research model developed by the [Qwen](/qwen) team, focusing on enhancing visual reasoning capabilities.\n\n## Performance\n\n| | **QVQ-72B-Preview** | o1-2024-12-17 | gpt-4o-2024-05-13 | Claude3.5 Sonnet-20241022 | Qwen2VL-72B |\n|----------------|-----------------|---------------|-------------------|----------------------------|-------------|\n| MMMU(val) | 70.3 | 77.3 | 69.1 | 70.4 | 64.5 |\n| MathVista(mini) | 71.4 | 71.0 | 63.8 | 65.3 | 70.5 |\n| MathVision(full) | 35.9 | – | 30.4 | 35.6 | 25.9 |\n| OlympiadBench | 20.4 | – | 25.9 | – | 11.2 |\n\n\n## Limitations\n\n1. **Language Mixing and Code-Switching:** The model might occasionally mix different languages or unexpectedly switch between them, potentially affecting the clarity of its responses.\n2. **Recursive Reasoning Loops:** There's a risk of the model getting caught in recursive reasoning loops, leading to lengthy responses that may not even arrive at a final answer.\n3. **Safety and Ethical Considerations:** Robust safety measures are needed to ensure reliable and safe performance. Users should exercise caution when deploying this model.\n4. **Performance and Benchmark Limitations:** Despite the improvements in visual reasoning, QVQ doesn’t entirely replace the capabilities of [Qwen2-VL-72B](/qwen/qwen-2-vl-72b-instruct). During multi-step visual reasoning, the model might gradually lose focus on the image content, leading to hallucinations. Moreover, QVQ doesn’t show significant improvement over [Qwen2-VL-72B](/qwen/qwen-2-vl-72b-instruct) in basic recognition tasks like identifying people, animals, or plants.\n\nNote: Currently, the model only supports single-round dialogues and image outputs. It does not support video inputs.", - "context_length": 128000, + "context_length": 32000, "architecture": { "modality": "text+image->text", "tokenizer": "Qwen", @@ -571,7 +835,7 @@ "request": "0" }, "top_provider": { - "context_length": 128000, + "context_length": 32000, "max_completion_tokens": 4096, "is_moderated": false }, @@ -725,7 +989,7 @@ "id": "cohere/command-r7b-12-2024", "name": "Cohere: Command R7B (12-2024)", "created": 1734158152, - "description": "Command R7B (12-2024) is a small, fast update of the Command R+ model, delivered in December 2024. It excels at RAG, tool use, agents, and similar tasks requiring complex reasoning and multiple steps.", + "description": "Command R7B (12-2024) is a small, fast update of the Command R+ model, delivered in December 2024. It excels at RAG, tool use, agents, and similar tasks requiring complex reasoning and multiple steps.\n\nUse of this model is subject to Cohere's [Usage Policy](https://docs.cohere.com/docs/usage-policy) and [SaaS Agreement](https://cohere.com/saas-agreement).", "context_length": 128000, "architecture": { "modality": "text->text", @@ -793,6 +1057,30 @@ }, "per_request_limits": null }, + { + "id": "meta-llama/llama-3.3-70b-instruct:free", + "name": "Meta: Llama 3.3 70B Instruct (free)", + "created": 1733506137, + "description": "The Meta Llama 3.3 multilingual large language model (LLM) is a pretrained and instruction tuned generative model in 70B (text in/text out). The Llama 3.3 instruction tuned text only model is optimized for multilingual dialogue use cases and outperforms many of the available open source and closed chat models on common industry benchmarks.\n\nSupported languages: English, German, French, Italian, Portuguese, Hindi, Spanish, and Thai.\n\n[Model Card](https://github.com/meta-llama/llama-models/blob/main/models/llama3_3/MODEL_CARD.md)", + "context_length": 131072, + "architecture": { + "modality": "text->text", + "tokenizer": "Llama3", + "instruct_type": "llama3" + }, + "pricing": { + "prompt": "0", + "completion": "0", + "image": "0", + "request": "0" + }, + "top_provider": { + "context_length": 131072, + "max_completion_tokens": 2048, + "is_moderated": false + }, + "per_request_limits": null + }, { "id": "meta-llama/llama-3.3-70b-instruct", "name": "Meta: Llama 3.3 70B Instruct", @@ -913,30 +1201,6 @@ }, "per_request_limits": null }, - { - "id": "google/gemini-exp-1121:free", - "name": "Google: Gemini Experimental 1121 (free)", - "created": 1732216725, - "description": "Experimental release (November 21st, 2024) of Gemini.", - "context_length": 40960, - "architecture": { - "modality": "text+image->text", - "tokenizer": "Gemini", - "instruct_type": null - }, - "pricing": { - "prompt": "0", - "completion": "0", - "image": "0", - "request": "0" - }, - "top_provider": { - "context_length": 40960, - "max_completion_tokens": 8192, - "is_moderated": false - }, - "per_request_limits": null - }, { "id": "google/learnlm-1.5-pro-experimental:free", "name": "Google: LearnLM 1.5 Pro Experimental (free)", @@ -1105,50 +1369,26 @@ }, "per_request_limits": null }, - { - "id": "google/gemini-exp-1114:free", - "name": "Google: Gemini Experimental 1114 (free)", - "created": 1731714740, - "description": "Gemini 11-14 (2024) experimental model features \"quality\" improvements.", - "context_length": 40960, - "architecture": { - "modality": "text+image->text", - "tokenizer": "Gemini", - "instruct_type": null - }, - "pricing": { - "prompt": "0", - "completion": "0", - "image": "0", - "request": "0" - }, - "top_provider": { - "context_length": 40960, - "max_completion_tokens": 8192, - "is_moderated": false - }, - "per_request_limits": null - }, { "id": "infermatic/mn-inferor-12b", "name": "Infermatic: Mistral Nemo Inferor 12B", "created": 1731464428, "description": "Inferor 12B is a merge of top roleplay models, expert on immersive narratives and storytelling.\n\nThis model was merged using the [Model Stock](https://arxiv.org/abs/2403.19522) merge method using [anthracite-org/magnum-v4-12b](https://openrouter.ai/anthracite-org/magnum-v4-72b) as a base.\n", - "context_length": 32000, + "context_length": 16384, "architecture": { "modality": "text->text", "tokenizer": "Mistral", "instruct_type": "mistral" }, "pricing": { - "prompt": "0.00000025", - "completion": "0.0000005", + "prompt": "0.0000008", + "completion": "0.0000012", "image": "0", "request": "0" }, "top_provider": { - "context_length": 32000, - "max_completion_tokens": null, + "context_length": 16384, + "max_completion_tokens": 4096, "is_moderated": false }, "per_request_limits": null @@ -1537,6 +1777,30 @@ }, "per_request_limits": null }, + { + "id": "nvidia/llama-3.1-nemotron-70b-instruct:free", + "name": "NVIDIA: Llama 3.1 Nemotron 70B Instruct (free)", + "created": 1728950400, + "description": "NVIDIA's Llama 3.1 Nemotron 70B is a language model designed for generating precise and useful responses. Leveraging [Llama 3.1 70B](/models/meta-llama/llama-3.1-70b-instruct) architecture and Reinforcement Learning from Human Feedback (RLHF), it excels in automatic alignment benchmarks. This model is tailored for applications requiring high accuracy in helpfulness and response generation, suitable for diverse user queries across multiple domains.\n\nUsage of this model is subject to [Meta's Acceptable Use Policy](https://www.llama.com/llama3/use-policy/).", + "context_length": 131072, + "architecture": { + "modality": "text->text", + "tokenizer": "Llama3", + "instruct_type": "llama3" + }, + "pricing": { + "prompt": "0", + "completion": "0", + "image": "0", + "request": "0" + }, + "top_provider": { + "context_length": 131072, + "max_completion_tokens": null, + "is_moderated": false + }, + "per_request_limits": null + }, { "id": "nvidia/llama-3.1-nemotron-70b-instruct", "name": "NVIDIA: Llama 3.1 Nemotron 70B Instruct", @@ -1705,30 +1969,6 @@ }, "per_request_limits": null }, - { - "id": "meta-llama/llama-3.2-3b-instruct:free", - "name": "Meta: Llama 3.2 3B Instruct (free)", - "created": 1727222400, - "description": "Llama 3.2 3B is a 3-billion-parameter multilingual large language model, optimized for advanced natural language processing tasks like dialogue generation, reasoning, and summarization. Designed with the latest transformer architecture, it supports eight languages, including English, Spanish, and Hindi, and is adaptable for additional languages.\n\nTrained on 9 trillion tokens, the Llama 3.2 3B model excels in instruction-following, complex reasoning, and tool use. Its balanced performance makes it ideal for applications needing accuracy and efficiency in text generation across multilingual settings.\n\nClick here for the [original model card](https://github.com/meta-llama/llama-models/blob/main/models/llama3_2/MODEL_CARD.md).\n\nUsage of this model is subject to [Meta's Acceptable Use Policy](https://www.llama.com/llama3/use-policy/).", - "context_length": 4096, - "architecture": { - "modality": "text->text", - "tokenizer": "Llama3", - "instruct_type": "llama3" - }, - "pricing": { - "prompt": "0", - "completion": "0", - "image": "0", - "request": "0" - }, - "top_provider": { - "context_length": 4096, - "max_completion_tokens": 2048, - "is_moderated": false - }, - "per_request_limits": null - }, { "id": "meta-llama/llama-3.2-3b-instruct", "name": "Meta: Llama 3.2 3B Instruct", @@ -1753,30 +1993,6 @@ }, "per_request_limits": null }, - { - "id": "meta-llama/llama-3.2-1b-instruct:free", - "name": "Meta: Llama 3.2 1B Instruct (free)", - "created": 1727222400, - "description": "Llama 3.2 1B is a 1-billion-parameter language model focused on efficiently performing natural language tasks, such as summarization, dialogue, and multilingual text analysis. Its smaller size allows it to operate efficiently in low-resource environments while maintaining strong task performance.\n\nSupporting eight core languages and fine-tunable for more, Llama 1.3B is ideal for businesses or developers seeking lightweight yet powerful AI solutions that can operate in diverse multilingual settings without the high computational demand of larger models.\n\nClick here for the [original model card](https://github.com/meta-llama/llama-models/blob/main/models/llama3_2/MODEL_CARD.md).\n\nUsage of this model is subject to [Meta's Acceptable Use Policy](https://www.llama.com/llama3/use-policy/).", - "context_length": 4096, - "architecture": { - "modality": "text->text", - "tokenizer": "Llama3", - "instruct_type": "llama3" - }, - "pricing": { - "prompt": "0", - "completion": "0", - "image": "0", - "request": "0" - }, - "top_provider": { - "context_length": 4096, - "max_completion_tokens": 2048, - "is_moderated": false - }, - "per_request_limits": null - }, { "id": "meta-llama/llama-3.2-1b-instruct", "name": "Meta: Llama 3.2 1B Instruct", @@ -1802,8 +2018,8 @@ "per_request_limits": null }, { - "id": "meta-llama/llama-3.2-90b-vision-instruct:free", - "name": "Meta: Llama 3.2 90B Vision Instruct (free)", + "id": "meta-llama/llama-3.2-90b-vision-instruct", + "name": "Meta: Llama 3.2 90B Vision Instruct", "created": 1727222400, "description": "The Llama 90B Vision model is a top-tier, 90-billion-parameter multimodal model designed for the most challenging visual reasoning and language tasks. It offers unparalleled accuracy in image captioning, visual question answering, and advanced image-text comprehension. Pre-trained on vast multimodal datasets and fine-tuned with human feedback, the Llama 90B Vision is engineered to handle the most demanding image-based AI tasks.\n\nThis model is perfect for industries requiring cutting-edge multimodal AI capabilities, particularly those dealing with complex, real-time visual and textual analysis.\n\nClick here for the [original model card](https://github.com/meta-llama/llama-models/blob/main/models/llama3_2/MODEL_CARD_VISION.md).\n\nUsage of this model is subject to [Meta's Acceptable Use Policy](https://www.llama.com/llama3/use-policy/).", "context_length": 4096, @@ -1813,9 +2029,9 @@ "instruct_type": "llama3" }, "pricing": { - "prompt": "0", - "completion": "0", - "image": "0", + "prompt": "0.0000008", + "completion": "0.0000016", + "image": "0.00128", "request": "0" }, "top_provider": { @@ -1825,30 +2041,6 @@ }, "per_request_limits": null }, - { - "id": "meta-llama/llama-3.2-90b-vision-instruct", - "name": "Meta: Llama 3.2 90B Vision Instruct", - "created": 1727222400, - "description": "The Llama 90B Vision model is a top-tier, 90-billion-parameter multimodal model designed for the most challenging visual reasoning and language tasks. It offers unparalleled accuracy in image captioning, visual question answering, and advanced image-text comprehension. Pre-trained on vast multimodal datasets and fine-tuned with human feedback, the Llama 90B Vision is engineered to handle the most demanding image-based AI tasks.\n\nThis model is perfect for industries requiring cutting-edge multimodal AI capabilities, particularly those dealing with complex, real-time visual and textual analysis.\n\nClick here for the [original model card](https://github.com/meta-llama/llama-models/blob/main/models/llama3_2/MODEL_CARD_VISION.md).\n\nUsage of this model is subject to [Meta's Acceptable Use Policy](https://www.llama.com/llama3/use-policy/).", - "context_length": 131072, - "architecture": { - "modality": "text+image->text", - "tokenizer": "Llama3", - "instruct_type": "llama3" - }, - "pricing": { - "prompt": "0.0000009", - "completion": "0.0000009", - "image": "0.001301", - "request": "0" - }, - "top_provider": { - "context_length": 131072, - "max_completion_tokens": null, - "is_moderated": false - }, - "per_request_limits": null - }, { "id": "meta-llama/llama-3.2-11b-vision-instruct:free", "name": "Meta: Llama 3.2 11B Vision Instruct (free)", @@ -1902,21 +2094,21 @@ "name": "Qwen2.5 72B Instruct", "created": 1726704000, "description": "Qwen2.5 72B is the latest series of Qwen large language models. Qwen2.5 brings the following improvements upon Qwen2:\n\n- Significantly more knowledge and has greatly improved capabilities in coding and mathematics, thanks to our specialized expert models in these domains.\n\n- Significant improvements in instruction following, generating long texts (over 8K tokens), understanding structured data (e.g, tables), and generating structured outputs especially JSON. More resilient to the diversity of system prompts, enhancing role-play implementation and condition-setting for chatbots.\n\n- Long-context Support up to 128K tokens and can generate up to 8K tokens.\n\n- Multilingual support for over 29 languages, including Chinese, English, French, Spanish, Portuguese, German, Italian, Russian, Japanese, Korean, Vietnamese, Thai, Arabic, and more.\n\nUsage of this model is subject to [Tongyi Qianwen LICENSE AGREEMENT](https://huggingface.co/Qwen/Qwen1.5-110B-Chat/blob/main/LICENSE).", - "context_length": 32768, + "context_length": 128000, "architecture": { "modality": "text->text", "tokenizer": "Qwen", "instruct_type": "chatml" }, "pricing": { - "prompt": "0.00000023", + "prompt": "0.00000013", "completion": "0.0000004", "image": "0", "request": "0" }, "top_provider": { - "context_length": 32768, - "max_completion_tokens": 4096, + "context_length": 128000, + "max_completion_tokens": null, "is_moderated": false }, "per_request_limits": null @@ -2093,7 +2285,7 @@ "id": "cohere/command-r-08-2024", "name": "Cohere: Command R (08-2024)", "created": 1724976000, - "description": "command-r-08-2024 is an update of the [Command R](/models/cohere/command-r) with improved performance for multilingual retrieval-augmented generation (RAG) and tool use. More broadly, it is better at math, code and reasoning and is competitive with the previous version of the larger Command R+ model.\n\nRead the launch post [here](https://docs.cohere.com/changelog/command-gets-refreshed).\n\nUse of this model is subject to Cohere's [Acceptable Use Policy](https://docs.cohere.com/docs/c4ai-acceptable-use-policy).", + "description": "command-r-08-2024 is an update of the [Command R](/models/cohere/command-r) with improved performance for multilingual retrieval-augmented generation (RAG) and tool use. More broadly, it is better at math, code and reasoning and is competitive with the previous version of the larger Command R+ model.\n\nRead the launch post [here](https://docs.cohere.com/changelog/command-gets-refreshed).\n\nUse of this model is subject to Cohere's [Usage Policy](https://docs.cohere.com/docs/usage-policy) and [SaaS Agreement](https://cohere.com/saas-agreement).", "context_length": 128000, "architecture": { "modality": "text->text", @@ -2117,7 +2309,7 @@ "id": "cohere/command-r-plus-08-2024", "name": "Cohere: Command R+ (08-2024)", "created": 1724976000, - "description": "command-r-plus-08-2024 is an update of the [Command R+](/models/cohere/command-r-plus) with roughly 50% higher throughput and 25% lower latencies as compared to the previous Command R+ version, while keeping the hardware footprint the same.\n\nRead the launch post [here](https://docs.cohere.com/changelog/command-gets-refreshed).\n\nUse of this model is subject to Cohere's [Acceptable Use Policy](https://docs.cohere.com/docs/c4ai-acceptable-use-policy).", + "description": "command-r-plus-08-2024 is an update of the [Command R+](/models/cohere/command-r-plus) with roughly 50% higher throughput and 25% lower latencies as compared to the previous Command R+ version, while keeping the hardware footprint the same.\n\nRead the launch post [here](https://docs.cohere.com/changelog/command-gets-refreshed).\n\nUse of this model is subject to Cohere's [Usage Policy](https://docs.cohere.com/docs/usage-policy) and [SaaS Agreement](https://cohere.com/saas-agreement).", "context_length": 128000, "architecture": { "modality": "text->text", @@ -2161,30 +2353,6 @@ }, "per_request_limits": null }, - { - "id": "google/gemini-flash-1.5-exp", - "name": "Google: Gemini Flash 1.5 Experimental", - "created": 1724803200, - "description": "Gemini 1.5 Flash Experimental is an experimental version of the [Gemini 1.5 Flash](/models/google/gemini-flash-1.5) model.\n\nUsage of Gemini is subject to Google's [Gemini Terms of Use](https://ai.google.dev/terms).\n\n#multimodal\n\nNote: This model is experimental and not suited for production use-cases. It may be removed or redirected to another model in the future.", - "context_length": 1000000, - "architecture": { - "modality": "text+image->text", - "tokenizer": "Gemini", - "instruct_type": null - }, - "pricing": { - "prompt": "0", - "completion": "0", - "image": "0", - "request": "0" - }, - "top_provider": { - "context_length": 1000000, - "max_completion_tokens": 8192, - "is_moderated": false - }, - "per_request_limits": null - }, { "id": "sao10k/l3.1-euryale-70b", "name": "Sao10K: Llama 3.1 Euryale 70B v2.2", @@ -2545,30 +2713,6 @@ }, "per_request_limits": null }, - { - "id": "google/gemini-pro-1.5-exp", - "name": "Google: Gemini Pro 1.5 Experimental", - "created": 1722470400, - "description": "Gemini 1.5 Pro Experimental is a bleeding-edge version of the [Gemini 1.5 Pro](/models/google/gemini-pro-1.5) model. Because it's currently experimental, it will be **heavily rate-limited** by Google.\n\nUsage of Gemini is subject to Google's [Gemini Terms of Use](https://ai.google.dev/terms).\n\n#multimodal", - "context_length": 1000000, - "architecture": { - "modality": "text+image->text", - "tokenizer": "Gemini", - "instruct_type": null - }, - "pricing": { - "prompt": "0", - "completion": "0", - "image": "0", - "request": "0" - }, - "top_provider": { - "context_length": 1000000, - "max_completion_tokens": 8192, - "is_moderated": false - }, - "per_request_limits": null - }, { "id": "perplexity/llama-3.1-sonar-large-128k-chat", "name": "Perplexity: Llama 3.1 Sonar 70B", @@ -2641,30 +2785,6 @@ }, "per_request_limits": null }, - { - "id": "meta-llama/llama-3.1-405b-instruct:free", - "name": "Meta: Llama 3.1 405B Instruct (free)", - "created": 1721692800, - "description": "The highly anticipated 400B class of Llama3 is here! Clocking in at 128k context with impressive eval scores, the Meta AI team continues to push the frontier of open-source LLMs.\n\nMeta's latest class of model (Llama 3.1) launched with a variety of sizes & flavors. This 405B instruct-tuned version is optimized for high quality dialogue usecases.\n\nIt has demonstrated strong performance compared to leading closed-source models including GPT-4o and Claude 3.5 Sonnet in evaluations.\n\nTo read more about the model release, [click here](https://ai.meta.com/blog/meta-llama-3-1/). Usage of this model is subject to [Meta's Acceptable Use Policy](https://llama.meta.com/llama3/use-policy/).", - "context_length": 8000, - "architecture": { - "modality": "text->text", - "tokenizer": "Llama3", - "instruct_type": "llama3" - }, - "pricing": { - "prompt": "0", - "completion": "0", - "image": "0", - "request": "0" - }, - "top_provider": { - "context_length": 8000, - "max_completion_tokens": 4000, - "is_moderated": false - }, - "per_request_limits": null - }, { "id": "meta-llama/llama-3.1-405b-instruct", "name": "Meta: Llama 3.1 405B Instruct", @@ -2689,54 +2809,6 @@ }, "per_request_limits": null }, - { - "id": "meta-llama/llama-3.1-405b-instruct:nitro", - "name": "Meta: Llama 3.1 405B Instruct (nitro)", - "created": 1721692800, - "description": "The highly anticipated 400B class of Llama3 is here! Clocking in at 128k context with impressive eval scores, the Meta AI team continues to push the frontier of open-source LLMs.\n\nMeta's latest class of model (Llama 3.1) launched with a variety of sizes & flavors. This 405B instruct-tuned version is optimized for high quality dialogue usecases.\n\nIt has demonstrated strong performance compared to leading closed-source models including GPT-4o and Claude 3.5 Sonnet in evaluations.\n\nTo read more about the model release, [click here](https://ai.meta.com/blog/meta-llama-3-1/). Usage of this model is subject to [Meta's Acceptable Use Policy](https://llama.meta.com/llama3/use-policy/).", - "context_length": 8000, - "architecture": { - "modality": "text->text", - "tokenizer": "Llama3", - "instruct_type": "llama3" - }, - "pricing": { - "prompt": "0.00001462", - "completion": "0.00001462", - "image": "0", - "request": "0" - }, - "top_provider": { - "context_length": 8000, - "max_completion_tokens": null, - "is_moderated": false - }, - "per_request_limits": null - }, - { - "id": "meta-llama/llama-3.1-8b-instruct:free", - "name": "Meta: Llama 3.1 8B Instruct (free)", - "created": 1721692800, - "description": "Meta's latest class of model (Llama 3.1) launched with a variety of sizes & flavors. This 8B instruct-tuned version is fast and efficient.\n\nIt has demonstrated strong performance compared to leading closed-source models in human evaluations.\n\nTo read more about the model release, [click here](https://ai.meta.com/blog/meta-llama-3-1/). Usage of this model is subject to [Meta's Acceptable Use Policy](https://llama.meta.com/llama3/use-policy/).", - "context_length": 8192, - "architecture": { - "modality": "text->text", - "tokenizer": "Llama3", - "instruct_type": "llama3" - }, - "pricing": { - "prompt": "0", - "completion": "0", - "image": "0", - "request": "0" - }, - "top_provider": { - "context_length": 8192, - "max_completion_tokens": 4096, - "is_moderated": false - }, - "per_request_limits": null - }, { "id": "meta-llama/llama-3.1-8b-instruct", "name": "Meta: Llama 3.1 8B Instruct", @@ -2761,30 +2833,6 @@ }, "per_request_limits": null }, - { - "id": "meta-llama/llama-3.1-70b-instruct:free", - "name": "Meta: Llama 3.1 70B Instruct (free)", - "created": 1721692800, - "description": "Meta's latest class of model (Llama 3.1) launched with a variety of sizes & flavors. This 70B instruct-tuned version is optimized for high quality dialogue usecases.\n\nIt has demonstrated strong performance compared to leading closed-source models in human evaluations.\n\nTo read more about the model release, [click here](https://ai.meta.com/blog/meta-llama-3-1/). Usage of this model is subject to [Meta's Acceptable Use Policy](https://llama.meta.com/llama3/use-policy/).", - "context_length": 8192, - "architecture": { - "modality": "text->text", - "tokenizer": "Llama3", - "instruct_type": "llama3" - }, - "pricing": { - "prompt": "0", - "completion": "0", - "image": "0", - "request": "0" - }, - "top_provider": { - "context_length": 8192, - "max_completion_tokens": 4096, - "is_moderated": false - }, - "per_request_limits": null - }, { "id": "meta-llama/llama-3.1-70b-instruct", "name": "Meta: Llama 3.1 70B Instruct", @@ -2809,30 +2857,6 @@ }, "per_request_limits": null }, - { - "id": "meta-llama/llama-3.1-70b-instruct:nitro", - "name": "Meta: Llama 3.1 70B Instruct (nitro)", - "created": 1721692800, - "description": "Meta's latest class of model (Llama 3.1) launched with a variety of sizes & flavors. This 70B instruct-tuned version is optimized for high quality dialogue usecases.\n\nIt has demonstrated strong performance compared to leading closed-source models in human evaluations.\n\nTo read more about the model release, [click here](https://ai.meta.com/blog/meta-llama-3-1/). Usage of this model is subject to [Meta's Acceptable Use Policy](https://llama.meta.com/llama3/use-policy/).", - "context_length": 64000, - "architecture": { - "modality": "text->text", - "tokenizer": "Llama3", - "instruct_type": "llama3" - }, - "pricing": { - "prompt": "0.00000325", - "completion": "0.00000325", - "image": "0", - "request": "0" - }, - "top_provider": { - "context_length": 64000, - "max_completion_tokens": null, - "is_moderated": false - }, - "per_request_limits": null - }, { "id": "mistralai/mistral-nemo", "name": "Mistral: Mistral Nemo", @@ -3229,14 +3253,14 @@ "instruct_type": "chatml" }, "pricing": { - "prompt": "0.00000034", - "completion": "0.00000039", + "prompt": "0.0000009", + "completion": "0.0000009", "image": "0", "request": "0" }, "top_provider": { "context_length": 32768, - "max_completion_tokens": null, + "max_completion_tokens": 4096, "is_moderated": false }, "per_request_limits": null @@ -3289,30 +3313,6 @@ }, "per_request_limits": null }, - { - "id": "mistralai/mistral-7b-instruct:nitro", - "name": "Mistral: Mistral 7B Instruct (nitro)", - "created": 1716768000, - "description": "A high-performing, industry-standard 7.3B parameter model, with optimizations for speed and context length.\n\n*Mistral 7B Instruct has multiple version variants, and this is intended to be the latest version.*", - "context_length": 32768, - "architecture": { - "modality": "text->text", - "tokenizer": "Mistral", - "instruct_type": "mistral" - }, - "pricing": { - "prompt": "0.00000007", - "completion": "0.00000007", - "image": "0", - "request": "0" - }, - "top_provider": { - "context_length": 32768, - "max_completion_tokens": null, - "is_moderated": false - }, - "per_request_limits": null - }, { "id": "mistralai/mistral-7b-instruct-v0.3", "name": "Mistral: Mistral 7B Instruct v0.3", @@ -3673,6 +3673,30 @@ }, "per_request_limits": null }, + { + "id": "sao10k/fimbulvetr-11b-v2", + "name": "Fimbulvetr 11B v2", + "created": 1713657600, + "description": "Creative writing model, routed with permission. It's fast, it keeps the conversation going, and it stays in character.\n\nIf you submit a raw prompt, you can use Alpaca or Vicuna formats.", + "context_length": 4096, + "architecture": { + "modality": "text->text", + "tokenizer": "Llama2", + "instruct_type": "alpaca" + }, + "pricing": { + "prompt": "0.0000008", + "completion": "0.0000012", + "image": "0", + "request": "0" + }, + "top_provider": { + "context_length": 4096, + "max_completion_tokens": 4096, + "is_moderated": false + }, + "per_request_limits": null + }, { "id": "meta-llama/llama-3-8b-instruct:free", "name": "Meta: Llama 3 8B Instruct (free)", @@ -3721,54 +3745,6 @@ }, "per_request_limits": null }, - { - "id": "meta-llama/llama-3-8b-instruct:extended", - "name": "Meta: Llama 3 8B Instruct (extended)", - "created": 1713398400, - "description": "Meta's latest class of model (Llama 3) launched with a variety of sizes & flavors. This 8B instruct-tuned version was optimized for high quality dialogue usecases.\n\nIt has demonstrated strong performance compared to leading closed-source models in human evaluations.\n\nTo read more about the model release, [click here](https://ai.meta.com/blog/meta-llama-3/). Usage of this model is subject to [Meta's Acceptable Use Policy](https://llama.meta.com/llama3/use-policy/).", - "context_length": 16384, - "architecture": { - "modality": "text->text", - "tokenizer": "Llama3", - "instruct_type": "llama3" - }, - "pricing": { - "prompt": "0.0000001875", - "completion": "0.000001125", - "image": "0", - "request": "0" - }, - "top_provider": { - "context_length": 16384, - "max_completion_tokens": 2048, - "is_moderated": false - }, - "per_request_limits": null - }, - { - "id": "meta-llama/llama-3-8b-instruct:nitro", - "name": "Meta: Llama 3 8B Instruct (nitro)", - "created": 1713398400, - "description": "Meta's latest class of model (Llama 3) launched with a variety of sizes & flavors. This 8B instruct-tuned version was optimized for high quality dialogue usecases.\n\nIt has demonstrated strong performance compared to leading closed-source models in human evaluations.\n\nTo read more about the model release, [click here](https://ai.meta.com/blog/meta-llama-3/). Usage of this model is subject to [Meta's Acceptable Use Policy](https://llama.meta.com/llama3/use-policy/).", - "context_length": 8192, - "architecture": { - "modality": "text->text", - "tokenizer": "Llama3", - "instruct_type": "llama3" - }, - "pricing": { - "prompt": "0.0000002", - "completion": "0.0000002", - "image": "0", - "request": "0" - }, - "top_provider": { - "context_length": 8192, - "max_completion_tokens": null, - "is_moderated": false - }, - "per_request_limits": null - }, { "id": "meta-llama/llama-3-70b-instruct", "name": "Meta: Llama 3 70B Instruct", @@ -3793,30 +3769,6 @@ }, "per_request_limits": null }, - { - "id": "meta-llama/llama-3-70b-instruct:nitro", - "name": "Meta: Llama 3 70B Instruct (nitro)", - "created": 1713398400, - "description": "Meta's latest class of model (Llama 3) launched with a variety of sizes & flavors. This 70B instruct-tuned version was optimized for high quality dialogue usecases.\n\nIt has demonstrated strong performance compared to leading closed-source models in human evaluations.\n\nTo read more about the model release, [click here](https://ai.meta.com/blog/meta-llama-3/). Usage of this model is subject to [Meta's Acceptable Use Policy](https://llama.meta.com/llama3/use-policy/).", - "context_length": 8192, - "architecture": { - "modality": "text->text", - "tokenizer": "Llama3", - "instruct_type": "llama3" - }, - "pricing": { - "prompt": "0.00000088", - "completion": "0.00000088", - "image": "0", - "request": "0" - }, - "top_provider": { - "context_length": 8192, - "max_completion_tokens": null, - "is_moderated": false - }, - "per_request_limits": null - }, { "id": "mistralai/mixtral-8x22b-instruct", "name": "Mistral: Mixtral 8x22B Instruct", @@ -3941,7 +3893,7 @@ "id": "cohere/command-r-plus", "name": "Cohere: Command R+", "created": 1712188800, - "description": "Command R+ is a new, 104B-parameter LLM from Cohere. It's useful for roleplay, general consumer usecases, and Retrieval Augmented Generation (RAG).\n\nIt offers multilingual support for ten key languages to facilitate global business operations. See benchmarks and the launch post [here](https://txt.cohere.com/command-r-plus-microsoft-azure/).\n\nUse of this model is subject to Cohere's [Acceptable Use Policy](https://docs.cohere.com/docs/c4ai-acceptable-use-policy).", + "description": "Command R+ is a new, 104B-parameter LLM from Cohere. It's useful for roleplay, general consumer usecases, and Retrieval Augmented Generation (RAG).\n\nIt offers multilingual support for ten key languages to facilitate global business operations. See benchmarks and the launch post [here](https://txt.cohere.com/command-r-plus-microsoft-azure/).\n\nUse of this model is subject to Cohere's [Usage Policy](https://docs.cohere.com/docs/usage-policy) and [SaaS Agreement](https://cohere.com/saas-agreement).", "context_length": 128000, "architecture": { "modality": "text->text", @@ -3965,7 +3917,7 @@ "id": "cohere/command-r-plus-04-2024", "name": "Cohere: Command R+ (04-2024)", "created": 1712016000, - "description": "Command R+ is a new, 104B-parameter LLM from Cohere. It's useful for roleplay, general consumer usecases, and Retrieval Augmented Generation (RAG).\n\nIt offers multilingual support for ten key languages to facilitate global business operations. See benchmarks and the launch post [here](https://txt.cohere.com/command-r-plus-microsoft-azure/).\n\nUse of this model is subject to Cohere's [Acceptable Use Policy](https://docs.cohere.com/docs/c4ai-acceptable-use-policy).", + "description": "Command R+ is a new, 104B-parameter LLM from Cohere. It's useful for roleplay, general consumer usecases, and Retrieval Augmented Generation (RAG).\n\nIt offers multilingual support for ten key languages to facilitate global business operations. See benchmarks and the launch post [here](https://txt.cohere.com/command-r-plus-microsoft-azure/).\n\nUse of this model is subject to Cohere's [Usage Policy](https://docs.cohere.com/docs/usage-policy) and [SaaS Agreement](https://cohere.com/saas-agreement).", "context_length": 128000, "architecture": { "modality": "text->text", @@ -4037,7 +3989,7 @@ "id": "cohere/command", "name": "Cohere: Command", "created": 1710374400, - "description": "Command is an instruction-following conversational model that performs language tasks with high quality, more reliably and with a longer context than our base generative models.\n\nUse of this model is subject to Cohere's [Acceptable Use Policy](https://docs.cohere.com/docs/c4ai-acceptable-use-policy).", + "description": "Command is an instruction-following conversational model that performs language tasks with high quality, more reliably and with a longer context than our base generative models.\n\nUse of this model is subject to Cohere's [Usage Policy](https://docs.cohere.com/docs/usage-policy) and [SaaS Agreement](https://cohere.com/saas-agreement).", "context_length": 4096, "architecture": { "modality": "text->text", @@ -4061,7 +4013,7 @@ "id": "cohere/command-r", "name": "Cohere: Command R", "created": 1710374400, - "description": "Command-R is a 35B parameter model that performs conversational language tasks at a higher quality, more reliably, and with a longer context than previous models. It can be used for complex workflows like code generation, retrieval augmented generation (RAG), tool use, and agents.\n\nRead the launch post [here](https://txt.cohere.com/command-r/).\n\nUse of this model is subject to Cohere's [Acceptable Use Policy](https://docs.cohere.com/docs/c4ai-acceptable-use-policy).", + "description": "Command-R is a 35B parameter model that performs conversational language tasks at a higher quality, more reliably, and with a longer context than previous models. It can be used for complex workflows like code generation, retrieval augmented generation (RAG), tool use, and agents.\n\nRead the launch post [here](https://txt.cohere.com/command-r/).\n\nUse of this model is subject to Cohere's [Usage Policy](https://docs.cohere.com/docs/usage-policy) and [SaaS Agreement](https://cohere.com/saas-agreement).", "context_length": 128000, "architecture": { "modality": "text->text", @@ -4229,7 +4181,7 @@ "id": "cohere/command-r-03-2024", "name": "Cohere: Command R (03-2024)", "created": 1709341200, - "description": "Command-R is a 35B parameter model that performs conversational language tasks at a higher quality, more reliably, and with a longer context than previous models. It can be used for complex workflows like code generation, retrieval augmented generation (RAG), tool use, and agents.\n\nRead the launch post [here](https://txt.cohere.com/command-r/).\n\nUse of this model is subject to Cohere's [Acceptable Use Policy](https://docs.cohere.com/docs/c4ai-acceptable-use-policy).", + "description": "Command-R is a 35B parameter model that performs conversational language tasks at a higher quality, more reliably, and with a longer context than previous models. It can be used for complex workflows like code generation, retrieval augmented generation (RAG), tool use, and agents.\n\nRead the launch post [here](https://txt.cohere.com/command-r/).\n\nUse of this model is subject to Cohere's [Usage Policy](https://docs.cohere.com/docs/usage-policy) and [SaaS Agreement](https://cohere.com/saas-agreement).", "context_length": 128000, "architecture": { "modality": "text->text", @@ -4561,30 +4513,6 @@ }, "per_request_limits": null }, - { - "id": "mistralai/mixtral-8x7b-instruct:nitro", - "name": "Mistral: Mixtral 8x7B Instruct (nitro)", - "created": 1702166400, - "description": "Mixtral 8x7B Instruct is a pretrained generative Sparse Mixture of Experts, by Mistral AI, for chat and instruction use. Incorporates 8 experts (feed-forward networks) for a total of 47 billion parameters.\n\nInstruct model fine-tuned by Mistral. #moe", - "context_length": 32768, - "architecture": { - "modality": "text->text", - "tokenizer": "Mistral", - "instruct_type": "mistral" - }, - "pricing": { - "prompt": "0.0000005", - "completion": "0.0000005", - "image": "0", - "request": "0" - }, - "top_provider": { - "context_length": 32768, - "max_completion_tokens": null, - "is_moderated": false - }, - "per_request_limits": null - }, { "id": "openchat/openchat-7b:free", "name": "OpenChat 3.5 7B (free)", @@ -4772,7 +4700,7 @@ }, "top_provider": { "context_length": 4096, - "max_completion_tokens": null, + "max_completion_tokens": 4096, "is_moderated": false }, "per_request_limits": null @@ -4801,30 +4729,6 @@ }, "per_request_limits": null }, - { - "id": "undi95/toppy-m-7b:nitro", - "name": "Toppy M 7B (nitro)", - "created": 1699574400, - "description": "A wild 7B parameter model that merges several models using the new task_arithmetic merge method from mergekit.\nList of merged models:\n- NousResearch/Nous-Capybara-7B-V1.9\n- [HuggingFaceH4/zephyr-7b-beta](/models/huggingfaceh4/zephyr-7b-beta)\n- lemonilia/AshhLimaRP-Mistral-7B\n- Vulkane/120-Days-of-Sodom-LoRA-Mistral-7b\n- Undi95/Mistral-pippa-sharegpt-7b-qlora\n\n#merge #uncensored", - "context_length": 4096, - "architecture": { - "modality": "text->text", - "tokenizer": "Mistral", - "instruct_type": "alpaca" - }, - "pricing": { - "prompt": "0.00000007", - "completion": "0.00000007", - "image": "0", - "request": "0" - }, - "top_provider": { - "context_length": 4096, - "max_completion_tokens": null, - "is_moderated": false - }, - "per_request_limits": null - }, { "id": "undi95/toppy-m-7b", "name": "Toppy M 7B", @@ -4877,7 +4781,7 @@ "id": "openrouter/auto", "name": "Auto Router", "created": 1699401600, - "description": "Your prompt will be processed by a meta-model and routed to one of dozens of models (see below), optimizing for the best possible output.\n\nTo see which model was used, visit [Activity](/activity), or read the `model` attribute of the response. Your response will be priced at the same rate as the routed model.\n\nThe meta-model is powered by [Not Diamond](https://docs.notdiamond.ai/docs/how-not-diamond-works). Learn more in our [docs](/docs/model-routing).\n\nRequests will be routed to the following models:\n- [openai/gpt-4o-2024-08-06](/openai/gpt-4o-2024-08-06)\n- [openai/gpt-4o-2024-05-13](/openai/gpt-4o-2024-05-13)\n- [openai/gpt-4o-mini-2024-07-18](/openai/gpt-4o-mini-2024-07-18)\n- [openai/chatgpt-4o-latest](/openai/chatgpt-4o-latest)\n- [openai/o1-preview-2024-09-12](/openai/o1-preview-2024-09-12)\n- [openai/o1-mini-2024-09-12](/openai/o1-mini-2024-09-12)\n- [anthropic/claude-3.5-sonnet](/anthropic/claude-3.5-sonnet)\n- [anthropic/claude-3.5-haiku](/anthropic/claude-3.5-haiku)\n- [anthropic/claude-3-opus](/anthropic/claude-3-opus)\n- [anthropic/claude-2.1](/anthropic/claude-2.1)\n- [google/gemini-pro-1.5](/google/gemini-pro-1.5)\n- [google/gemini-flash-1.5](/google/gemini-flash-1.5)\n- [mistralai/mistral-large-2407](/mistralai/mistral-large-2407)\n- [mistralai/mistral-nemo](/mistralai/mistral-nemo)\n- [meta-llama/llama-3.1-70b-instruct](/meta-llama/llama-3.1-70b-instruct)\n- [meta-llama/llama-3.1-405b-instruct](/meta-llama/llama-3.1-405b-instruct)\n- [mistralai/mixtral-8x22b-instruct](/mistralai/mixtral-8x22b-instruct)\n- [cohere/command-r-plus](/cohere/command-r-plus)\n- [cohere/command-r](/cohere/command-r)", + "description": "Your prompt will be processed by a meta-model and routed to one of dozens of models (see below), optimizing for the best possible output.\n\nTo see which model was used, visit [Activity](/activity), or read the `model` attribute of the response. Your response will be priced at the same rate as the routed model.\n\nThe meta-model is powered by [Not Diamond](https://docs.notdiamond.ai/docs/how-not-diamond-works). Learn more in our [docs](/docs/model-routing).\n\nRequests will be routed to the following models:\n- [openai/gpt-4o-2024-08-06](/openai/gpt-4o-2024-08-06)\n- [openai/gpt-4o-2024-05-13](/openai/gpt-4o-2024-05-13)\n- [openai/gpt-4o-mini-2024-07-18](/openai/gpt-4o-mini-2024-07-18)\n- [openai/chatgpt-4o-latest](/openai/chatgpt-4o-latest)\n- [openai/o1-preview-2024-09-12](/openai/o1-preview-2024-09-12)\n- [openai/o1-mini-2024-09-12](/openai/o1-mini-2024-09-12)\n- [anthropic/claude-3.5-sonnet](/anthropic/claude-3.5-sonnet)\n- [anthropic/claude-3.5-haiku](/anthropic/claude-3.5-haiku)\n- [anthropic/claude-3-opus](/anthropic/claude-3-opus)\n- [anthropic/claude-2.1](/anthropic/claude-2.1)\n- [google/gemini-pro-1.5](/google/gemini-pro-1.5)\n- [google/gemini-flash-1.5](/google/gemini-flash-1.5)\n- [mistralai/mistral-large-2407](/mistralai/mistral-large-2407)\n- [mistralai/mistral-nemo](/mistralai/mistral-nemo)\n- [deepseek/deepseek-r1](/deepseek/deepseek-r1)\n- [meta-llama/llama-3.1-70b-instruct](/meta-llama/llama-3.1-70b-instruct)\n- [meta-llama/llama-3.1-405b-instruct](/meta-llama/llama-3.1-405b-instruct)\n- [mistralai/mixtral-8x22b-instruct](/mistralai/mixtral-8x22b-instruct)\n- [cohere/command-r-plus](/cohere/command-r-plus)\n- [cohere/command-r](/cohere/command-r)", "context_length": 2000000, "architecture": { "modality": "text->text", @@ -5329,30 +5233,6 @@ }, "per_request_limits": null }, - { - "id": "undi95/remm-slerp-l2-13b:extended", - "name": "ReMM SLERP 13B (extended)", - "created": 1689984000, - "description": "A recreation trial of the original MythoMax-L2-B13 but with updated models. #merge", - "context_length": 6144, - "architecture": { - "modality": "text->text", - "tokenizer": "Llama2", - "instruct_type": "alpaca" - }, - "pricing": { - "prompt": "0.000001125", - "completion": "0.000001125", - "image": "0", - "request": "0" - }, - "top_provider": { - "context_length": 6144, - "max_completion_tokens": 512, - "is_moderated": false - }, - "per_request_limits": null - }, { "id": "google/palm-2-chat-bison", "name": "Google: PaLM 2 Chat", @@ -5449,54 +5329,6 @@ }, "per_request_limits": null }, - { - "id": "gryphe/mythomax-l2-13b:nitro", - "name": "MythoMax 13B (nitro)", - "created": 1688256000, - "description": "One of the highest performing and most popular fine-tunes of Llama 2 13B, with rich descriptions and roleplay. #merge", - "context_length": 4096, - "architecture": { - "modality": "text->text", - "tokenizer": "Llama2", - "instruct_type": "alpaca" - }, - "pricing": { - "prompt": "0.0000002", - "completion": "0.0000002", - "image": "0", - "request": "0" - }, - "top_provider": { - "context_length": 4096, - "max_completion_tokens": null, - "is_moderated": false - }, - "per_request_limits": null - }, - { - "id": "gryphe/mythomax-l2-13b:extended", - "name": "MythoMax 13B (extended)", - "created": 1688256000, - "description": "One of the highest performing and most popular fine-tunes of Llama 2 13B, with rich descriptions and roleplay. #merge", - "context_length": 8192, - "architecture": { - "modality": "text->text", - "tokenizer": "Llama2", - "instruct_type": "alpaca" - }, - "pricing": { - "prompt": "0.000001125", - "completion": "0.000001125", - "image": "0", - "request": "0" - }, - "top_provider": { - "context_length": 8192, - "max_completion_tokens": 512, - "is_moderated": false - }, - "per_request_limits": null - }, { "id": "meta-llama/llama-2-13b-chat", "name": "Meta: Llama 2 13B Chat", diff --git a/packages/osr-code-bot/schema.json b/packages/osr-code-bot/schema.json index 8526655..639ef34 100644 --- a/packages/osr-code-bot/schema.json +++ b/packages/osr-code-bot/schema.json @@ -26,7 +26,7 @@ }, "each": { "type": "string", - "description": "Iterate over items, supported GLOB | Path to JSON File | array of strings (comma separated). To test different models, use --each=\"gpt-3.5-turbo,gpt-4o\", the actual string will exposed as variable `ITEM`, eg: --dst=${ITEM}-output.md" + "description": "Iterate over items, supported: GLOB | Path to JSON File | array of strings (comma separated). To test different models, use --each=\"gpt-3.5-turbo,gpt-4o\", the actual string will exposed as variable `ITEM`, eg: --dst=\"${ITEM}-output.md\"" }, "disable": { "type": "array", diff --git a/packages/osr-code-bot/schema_ui.json b/packages/osr-code-bot/schema_ui.json index 02e6c99..7c3bc01 100644 --- a/packages/osr-code-bot/schema_ui.json +++ b/packages/osr-code-bot/schema_ui.json @@ -27,7 +27,7 @@ "ui:title": "Dst" }, "each": { - "ui:description": "Iterate over items, supported GLOB | Path to JSON File | array of strings (comma separated). To test different models, use --each=\"gpt-3.5-turbo,gpt-4o\", the actual string will exposed as variable `ITEM`, eg: --dst=${ITEM}-output.md", + "ui:description": "Iterate over items, supported: GLOB | Path to JSON File | array of strings (comma separated). To test different models, use --each=\"gpt-3.5-turbo,gpt-4o\", the actual string will exposed as variable `ITEM`, eg: --dst=\"${ITEM}-output.md\"", "ui:title": "Each" }, "disable": { diff --git a/packages/osr-code-bot/scripts/build.sh b/packages/osr-code-bot/scripts/build.sh index 1a8e492..b465819 100644 --- a/packages/osr-code-bot/scripts/build.sh +++ b/packages/osr-code-bot/scripts/build.sh @@ -1,6 +1,10 @@ npm run webpack +kbotd types +kbotd fetch sh scripts/update-readme.sh cp README.md dist/README.md cd dist npm version patch npm publish +cd .. +git commit -m "maintainence love:)" . \ No newline at end of file diff --git a/packages/osr-code-bot/src/client.ts b/packages/osr-code-bot/src/client.ts index b74ef8d..e2a2775 100644 --- a/packages/osr-code-bot/src/client.ts +++ b/packages/osr-code-bot/src/client.ts @@ -3,58 +3,90 @@ import { logger } from './index' import { loadConfig } from './config' import { IKBotOptions } from './zod_types' +/** + * Router types supported by the client + */ +type RouterType = 'openrouter' | 'openai' | 'deepseek' | 'huggingface'; + +/** + * Default base URLs for different routers + */ +const ROUTER_BASE_URLS: Record = { + openrouter: 'https://openrouter.ai/api/v1', + openai: '', // OpenAI uses default URL + deepseek: 'https://api.deepseek.com', + huggingface: 'https://api-inference.huggingface.co' +}; + +/** + * Default models for different routers + */ +const DEFAULT_MODETS: Record = { + openrouter: 'anthropic/claude-3.5-sonnet', + openai: 'gpt-4o', + deepseek: 'deepseek-chat', + huggingface: 'meta-llama/2' +}; + +/** + * Creates an OpenAI client instance based on the provided options. + * @param options - Configuration options for the client + * @returns OpenAI client instance or undefined if configuration is invalid + */ export const createClient = (options: IKBotOptions) => { + // Load configuration from file const config = loadConfig(options) - let apiKey: string = options.api_key if (!config) { logger.error( "Config not found in $HOME/.osr/config.json. " + "Optionally, export OSR_CONFIG with the path to the configuration file, " ); - return undefined + return undefined; } - const router = options.router ?? "openrouter" - let baseURL = options.baseURL - if (!options.baseURL) { + + // Determine router to use (defaults to 'openrouter') + const router: RouterType = (options.router ?? 'openrouter') as RouterType; + + // Initialize API key and baseURL + let apiKey = options.api_key; + + // Set API key based on router if not provided in options + if (!apiKey) { switch (router) { - case "openrouter": - apiKey = apiKey || config?.openrouter?.key; - if (!options.baseURL) { - baseURL = "https://openrouter.ai/api/v1" - } + case 'openrouter': + apiKey = config?.openrouter?.key; break; - case "openai": - apiKey = apiKey || config?.openai?.key; + case 'openai': + apiKey = config?.openai?.key; break; - case "deepseek": - apiKey = apiKey || config?.deepseek?.key; - if (!options.baseURL) { - baseURL = "https://api.deepseek.com" - } + case 'deepseek': + apiKey = config?.deepseek?.key; + break; + case 'huggingface': + apiKey = config?.huggingface?.key; break; } } + // Validate API key if (!apiKey) { logger.error(`No ${router} key found. Please provide an "api_key", set it in the config, or pass it via JSON config.`); return undefined; } - if (router === "openrouter" && !options.model) { - options.model = "anthropic/claude-3.5-sonnet" + // Set default baseURL if not provided + const baseURL = options.baseURL ?? ROUTER_BASE_URLS[router]; + + // Set default model if not provided + if (!options.model) { + options.model = DEFAULT_MODELS[router]; } - if (router === "openai" && !options.model) { - options.model = "gpt-4o" - } + logger.info(`Creating client with ${router} router, model ${options.model}, and API key ${apiKey} at ${baseURL}`); - if (router === "deepseek" && !options.model) { - options.model = "deepseek-chat" - } - - logger.info(`Creating client with ${router} router, model ${options.model}, and API key ${apiKey} at ${baseURL}`) + // Create and return the OpenAI client instance return new OpenAI({ apiKey, baseURL, - }) + }); } \ No newline at end of file diff --git a/packages/osr-code-bot/src/zod_types.ts b/packages/osr-code-bot/src/zod_types.ts index 4460516..065348b 100644 --- a/packages/osr-code-bot/src/zod_types.ts +++ b/packages/osr-code-bot/src/zod_types.ts @@ -7,7 +7,7 @@ export interface IKBotOptions { output?: string | undefined; /** Optional destination path for the result, will substitute ${MODEL_NAME} and ${ROUTER} in the path. Optional, used for "completion" mode */ dst?: string | undefined; - /** Iterate over items, supported GLOB | Path to JSON File | array of strings (comma separated). To test different models, use --each="gpt-3.5-turbo,gpt-4o", the actual string will exposed as variable `ITEM`, eg: --dst=${ITEM}-output.md */ + /** Iterate over items, supported: GLOB | Path to JSON File | array of strings (comma separated). To test different models, use --each="gpt-3.5-turbo,gpt-4o", the actual string will exposed as variable `ITEM`, eg: --dst="${ITEM}-output.md" */ each?: string | undefined; /** Disable tools categories, eg: --disable=fs,git,interact,terminal,search,web,email,user */ disable?: string[];