xref: /plugin/aichat/Model/Groq/models.json (revision b971544330dac8e132930c58cba9d7442ef90031)
1{
2    "chat": {
3        "llama3-8b-8192": {
4            "description": "LLaMA3 8b",
5            "inputTokens": 8192,
6            "inputTokenPrice": 0.05,
7            "outputTokens": 8192,
8            "outputTokenPrice": 0.08
9        },
10        "llama3-70b-8192": {
11            "description": "LLaMA3 70b",
12            "inputTokens": 8192,
13            "inputTokenPrice": 0.59,
14            "outputTokens": 8192,
15            "outputTokenPrice": 0.79
16        },
17        "llama2-70b-4096": {
18            "description": "LLaMA2 70b",
19            "inputTokens": 4096,
20            "inputTokenPrice": 0.7,
21            "outputTokens": 4096,
22            "outputTokenPrice": 0.8
23        },
24        "mixtral-8x7b-32768": {
25            "description": "Mixtral 8x7b",
26            "inputTokens": 32768,
27            "inputTokenPrice": 0.24,
28            "outputTokens": 32768,
29            "outputTokenPrice": 0.24
30        },
31        "llama-3.1-8b-instant": {
32            "description": "",
33            "inputTokens": 128000,
34            "inputTokenPrice": 0.05,
35            "outputTokens": 8192,
36            "outputTokenPrice": 0.08
37        },
38        "llama-3.1-405b-reasoning": {
39            "description": "",
40            "inputTokens": 8192,
41            "inputTokenPrice": 0.59,
42            "outputTokens": 8192,
43            "outputTokenPrice": 0.79
44        },
45        "gemma2-9b-it": {
46            "description": "Gemma is a family of lightweight, state-of-the-art open models from Google, built from the same research and technology used to create the Gemini models. They are text-to-text, decoder-only large language models, available in English, with open weights, pre-trained variants, and instruction-tuned variants. Gemma models are well-suited for a variety of text generation tasks, including question answering, summarization, and reasoning. Their relatively small size makes it possible to deploy them in environments with limited resources such as a laptop, desktop or your own cloud infrastructure, democratizing access to state of the art AI models and helping foster innovation for everyone.",
47            "inputTokens": 8192,
48            "inputTokenPrice": 0.2,
49            "outputTokens": 8192,
50            "outputTokenPrice": 0.2
51        },
52        "deepseek-r1-distill-llama-70b": {
53            "description": "",
54            "inputTokens": 131072,
55            "inputTokenPrice": 0.75,
56            "outputTokens": 131072,
57            "outputTokenPrice": 0.99
58        },
59        "llama-3.3-70b-versatile": {
60            "description": "",
61            "inputTokens": 128000,
62            "inputTokenPrice": 0.59,
63            "outputTokens": 32768,
64            "outputTokenPrice": 0.79
65        },
66        "llama-3.3-70b-specdec": {
67            "description": "",
68            "inputTokens": 8192,
69            "inputTokenPrice": 0.59,
70            "outputTokens": 8192,
71            "outputTokenPrice": 0.99
72        },
73        "llama-3.1-70b-versatile": {
74            "description": "",
75            "inputTokens": 8192,
76            "inputTokenPrice": 0.59,
77            "outputTokens": 8192,
78            "outputTokenPrice": 0.79
79        },
80        "gemma-7b-it": {
81            "description": "",
82            "inputTokens": 8192,
83            "inputTokenPrice": 0.05,
84            "outputTokens": 8192,
85            "outputTokenPrice": 0.08
86        },
87        "llama-guard-4-12b": {
88            "description": "",
89            "inputTokens": 8192,
90            "inputTokenPrice": 0.2,
91            "outputTokens": 8192,
92            "outputTokenPrice": 0.2
93        },
94        "llama-4-maverick-17b-128e-instruct": {
95            "description": "",
96            "inputTokens": 131072,
97            "inputTokenPrice": 0.2,
98            "outputTokens": 8192,
99            "outputTokenPrice": 0.6
100        },
101        "llama-4-scout-17b-16e-instruct": {
102            "description": "",
103            "inputTokens": 131072,
104            "inputTokenPrice": 0.11,
105            "outputTokens": 8192,
106            "outputTokenPrice": 0.34
107        },
108        "kimi-k2-instruct-0905": {
109            "description": "",
110            "inputTokens": 262144,
111            "inputTokenPrice": 1,
112            "outputTokens": 16384,
113            "outputTokenPrice": 3
114        },
115        "gpt-oss-120b": {
116            "description": "",
117            "inputTokens": 131072,
118            "inputTokenPrice": 0.15,
119            "outputTokens": 32766,
120            "outputTokenPrice": 0.6
121        },
122        "gpt-oss-20b": {
123            "description": "",
124            "inputTokens": 131072,
125            "inputTokenPrice": 0.08,
126            "outputTokens": 32768,
127            "outputTokenPrice": 0.3
128        },
129        "gpt-oss-safeguard-20b": {
130            "description": "",
131            "inputTokens": 131072,
132            "inputTokenPrice": 0.08,
133            "outputTokens": 65536,
134            "outputTokenPrice": 0.3
135        },
136        "qwen3-32b": {
137            "description": "",
138            "inputTokens": 131000,
139            "inputTokenPrice": 0.29,
140            "outputTokens": 131000,
141            "outputTokenPrice": 0.59
142        }
143    }
144}