Gemini OpenAI format (Chat)

curl --request POST \
  --url https://api.gravitex.ai/v1/chat/completions

import requests

url = "https://api.gravitex.ai/v1/chat/completions"

response = requests.post(url)

print(response.text)

const options = {method: 'POST'};

fetch('https://api.gravitex.ai/v1/chat/completions', options)
  .then(res => res.json())
  .then(res => console.log(res))
  .catch(err => console.error(err));

<?php

$curl = curl_init();

curl_setopt_array($curl, [
  CURLOPT_URL => "https://api.gravitex.ai/v1/chat/completions",
  CURLOPT_RETURNTRANSFER => true,
  CURLOPT_ENCODING => "",
  CURLOPT_MAXREDIRS => 10,
  CURLOPT_TIMEOUT => 30,
  CURLOPT_HTTP_VERSION => CURL_HTTP_VERSION_1_1,
  CURLOPT_CUSTOMREQUEST => "POST",
]);

$response = curl_exec($curl);
$err = curl_error($curl);

curl_close($curl);

if ($err) {
  echo "cURL Error #:" . $err;
} else {
  echo $response;
}

package main

import (
	"fmt"
	"net/http"
	"io"
)

func main() {

	url := "https://api.gravitex.ai/v1/chat/completions"

	req, _ := http.NewRequest("POST", url, nil)

	res, _ := http.DefaultClient.Do(req)

	defer res.Body.Close()
	body, _ := io.ReadAll(res.Body)

	fmt.Println(string(body))

}

HttpResponse<String> response = Unirest.post("https://api.gravitex.ai/v1/chat/completions")
  .asString();

require 'uri'
require 'net/http'

url = URI("https://api.gravitex.ai/v1/chat/completions")

http = Net::HTTP.new(url.host, url.port)
http.use_ssl = true

request = Net::HTTP::Post.new(url)

response = http.request(request)
puts response.read_body

POST

chat

completions

Gemini OpenAI format (Chat)

curl --request POST \
  --url https://api.gravitex.ai/v1/chat/completions

import requests

url = "https://api.gravitex.ai/v1/chat/completions"

response = requests.post(url)

print(response.text)

const options = {method: 'POST'};

fetch('https://api.gravitex.ai/v1/chat/completions', options)
  .then(res => res.json())
  .then(res => console.log(res))
  .catch(err => console.error(err));

<?php

$curl = curl_init();

curl_setopt_array($curl, [
  CURLOPT_URL => "https://api.gravitex.ai/v1/chat/completions",
  CURLOPT_RETURNTRANSFER => true,
  CURLOPT_ENCODING => "",
  CURLOPT_MAXREDIRS => 10,
  CURLOPT_TIMEOUT => 30,
  CURLOPT_HTTP_VERSION => CURL_HTTP_VERSION_1_1,
  CURLOPT_CUSTOMREQUEST => "POST",
]);

$response = curl_exec($curl);
$err = curl_error($curl);

curl_close($curl);

if ($err) {
  echo "cURL Error #:" . $err;
} else {
  echo $response;
}

package main

import (
	"fmt"
	"net/http"
	"io"
)

func main() {

	url := "https://api.gravitex.ai/v1/chat/completions"

	req, _ := http.NewRequest("POST", url, nil)

	res, _ := http.DefaultClient.Do(req)

	defer res.Body.Close()
	body, _ := io.ReadAll(res.Body)

	fmt.Println(string(body))

}

HttpResponse<String> response = Unirest.post("https://api.gravitex.ai/v1/chat/completions")
  .asString();

require 'uri'
require 'net/http'

url = URI("https://api.gravitex.ai/v1/chat/completions")

http = Net::HTTP.new(url.host, url.port)
http.use_ssl = true

request = Net::HTTP::Post.new(url)

response = http.request(request)
puts response.read_body

For Google Gemini native protocol, see Gemini Native. For general multi-model Chat Completions, see OpenAI Chat Completions.

Endpoint: POST https://api.gravitex.ai/v1/chat/completions

1. Model categories

Category	Example models	Routing	Notes
Chat / multimodal	`gemini-3.5-flash`、`gemini-3.1-pro-preview`、`gemini-3-flash-preview`、`gemini-3.1-flash-lite-preview`	`:generateContent` 或 `:streamGenerateContent`	Streaming follows client `stream` flag

2. Endpoint and authentication

POST https://api.gravitex.ai/v1/chat/completions
Authorization: Bearer sk-<your-token>
Content-Type: application/json

3. OpenAI field → Gemini mapping

OpenAI field	Gemini field	Description
`model`	URL path 中的 `models/<model>`	Model name passed through to Gemini
`messages`	`contents[]` + `systemInstruction`	`system`/`developer` 角色→ `systemInstruction`; `assistant`→`model`; `tool`/`function`→`functionResponse`
`stream`	URL `:streamGenerateContent` vs `:generateContent`
`temperature`	`generationConfig.temperature`
`top_p`	`generationConfig.topP`
`max_tokens` / `max_completion_tokens`	`generationConfig.maxOutputTokens`
`seed`	`generationConfig.seed`
`stop`	`generationConfig.stopSequences`	Max 5; extra entries truncated
`response_format.type = "json_schema"/"json_object"`	`generationConfig.responseMimeType = "application/json"` + `responseSchema`	`additionalProperties` 等 Gemini 不识别的Field会被自动剔除
`tools` 中的 `function`	`tools[].functionDeclarations`	See §4
`tools` 中三个特殊名 (`googleSearch` / `codeExecution` / `urlContext`)	`tools[].googleSearch` / `tools[].codeExecution` / `tools[].urlContext`	See §4
`tool_choice`	`toolConfig.functionCallingConfig`	`"auto"→AUTO`、`"none"→NONE`、`"required"→ANY`；对象形式 `{type:"function",function:{name:"X"}}` → `ANY` + `allowedFunctionNames=["X"]`
`frequency_penalty`	`generationConfig.frequencyPenalty`
`presence_penalty`	`generationConfig.presencePenalty`
`top_k`	`generationConfig.topK`
`n`	`generationConfig.candidateCount`	仅 `n > 1` 时生效，控制候选回答数量
`logprobs`	`generationConfig.responseLogprobs`	是否返回 logprobs
`top_logprobs`	`generationConfig.logprobs`	top logprobs 数量
`modalities`	`generationConfig.responseModalities`	JSON 数组（如 `["text","audio"]`）
`audio`	`generationConfig.speechConfig`	TTS 语音配置，直接透传给 Gemini speechConfig

messages.content 支持多模态数组（OpenAI v2）：

type:"text" → parts[].text
type:"image_url" / type:"input_audio" / type:"file" → Downloaded/decoded into parts[].inlineData with MIME allowlist:
- 图片：image/png、image/jpeg、image/jpg、image/webp、image/heic、image/heif
- 音频：audio/mpeg、audio/mp3、audio/wav
- 视频：video/mp4、video/mov、video/mpeg、video/mpg、video/avi、video/wmv、video/mpegps、video/flv
- 文档：application/pdf、text/plain
content 字符串里夹的 ![alt](data:image/...;base64,...) Markdown images become inlineData parts (same as image_url).

4. Tools passthrough

"tools": [
  { "type": "function", "function": { "name": "googleSearch" } },     // Enable Google Search
  { "type": "function", "function": { "name": "codeExecution" } },    // Enable code execution
  { "type": "function", "function": { "name": "urlContext" } },       // Enable URL context
  { "type": "function", "function": {                                  // Standard function calling
      "name": "get_weather",
      "description": "Get weather",
      "parameters": { "type": "object", "properties": { "city": { "type": "string" } }, "required": ["city"] }
    }
  }
]

Three special names map to native Gemini tools; other function entries use functionDeclarations.

5. extra_body — passthrough Gemini native parameters

extra_body.google.* All fields under this namespace are passed to the Gemini native API.

{
  "model": "gemini-3.5-flash",
  "messages": [{ "role": "user", "content": "Draw a shiba inu" }],
  "extra_body": {
    "google": {
      "generationConfig": { /* ... */ },
      "safetySettings":   [ /* ... */ ],
      "tools":            [ /* ... */ ],
      "systemInstruction": { /* ... */ },
      "thinking_config":  { /* ... */ }
    }
  }
}

5.1 Two passthrough paths

路径	Field	Behavior
① snake_case allowlist (legacy)	`extra_body.google.thinking_config`	显式解析后写入对应Field。只接受 snake_case key，Type不匹配会被静默跳过（不报错），交由 ② 兜底。
② Full passthrough (no schema validation)	`extra_body.google.`* 下除 ① 外的任意Field	把整个 `extra_body.google` 子树（剔除 `thinking_config`）深度合并到最终发给 Gemini 的请求 JSON。Field名按 Gemini 官方原生 camelCase 书写（`generationConfig`、`safetySettings`、`tools`、`systemInstruction`、`toolConfig`、`cachedContent`、`responseModalities`、`responseSchema`、`responseJsonSchema` etc.）。

5.2 thinking_config (snake_case allowlist)

Field	Type	Gemini field	Description
`thinking_budget`	int	`thinkingBudget`	Thinking token budget. > 0 sets `include_thoughts` true; 0 or negative disables thinking
`include_thoughts`	bool	`includeThoughts`	Return thinking trace in `reasoning_content`
`thinking_level`	string	`thinkingLevel`	Thinking level (e.g. `"HIGH"`)

只要传了 extra_body.google，系统自动的思维链适配会关闭，全部 thinking Behavior由调用方掌控。

5.3 Deep-merge rules

Treat extra_body.google (minus the one snake_case key above) as patch.
把已经根据 OpenAI field构造好的 Gemini 请求作为 base。
deep merge:
- Same key, both maps → recursive merge;
- 其它Type（标量、数组、null）→ patch 直接覆盖 base；
- Keys only in base are kept.
Merged body is sent upstream — equivalent to a native Gemini call.

含义：你可以用 extra_body.google.generationConfig.maxOutputTokens 覆盖通过 OpenAI field max_tokens 设置的值，也可以用 extra_body.google.safetySettings 完全替换平台默认安全设置，新增 Gemini field（如未来上线的Field）无需改代码即可直接使用。

5.4 Passthrough example

{
  "model": "gemini-3.5-flash",
  "messages": [{ "role": "user", "content": "Write an article about AI" }],
  "extra_body": {
    "google": {
      "generationConfig": {
        "temperature": 1,
        "topP": 0.95,
        "maxOutputTokens": 32768
      },
      "safetySettings": [
        { "category": "HARM_CATEGORY_HATE_SPEECH",       "threshold": "OFF" },
        { "category": "HARM_CATEGORY_DANGEROUS_CONTENT", "threshold": "OFF" },
        { "category": "HARM_CATEGORY_SEXUALLY_EXPLICIT", "threshold": "OFF" },
        { "category": "HARM_CATEGORY_HARASSMENT",        "threshold": "OFF" }
      ]
    }
  }
}

6. Response format

6.1 Non-streaming `chat.completion`

{
  "id": "sXIFar39H4K0694P2MWmWQ",
  "object": "chat.completion",
  "created": 1747299537,
  "model": "gemini-3.5-flash",
  "choices": [
    {
      "index": 0,
      "message": {
        "role": "assistant",
        "content": "Hello, how can I help?",
        "reasoning_content": "User is greeting..."
      },
      "finish_reason": "stop"
    }
  ],
  "usage": { /* 见 §7 */ }
}

id = Upstream responseId (matches log request_id); falls back to chatcmpl-* if missing.
reasoning_content：Thinking text (only when include_thoughts:true).
executable_code / code_execution_result：Embedded as markdown code blocks in text.
Non-image media (audio, etc.) embedded as markdown [media](data:...).
finish_reason Mapping: STOP→stop, MAX_TOKENS→length, safety/recitation/…→content_filter, functionCall→tool_calls.

6.2 Streaming `chat.completion.chunk`

In streaming, delta.content is a string (not an array).
Images embedded in delta.content as ![image](data:...) markdown.
id Stable across streaming chunks.

7. Usage

response.usage 完整Field：

{
  "prompt_tokens": 1127,
  "completion_tokens": 2050,     // includes reasoning tokens
  "total_tokens": 2273,

  "prompt_tokens_details": {
    "cached_tokens": 0,
    "text_tokens":   7,
    "audio_tokens":  0,
    "image_tokens":  1120
  },

  "completion_tokens_details": {
    "text_tokens":      26,
    "audio_tokens":     0,
    "image_tokens":     1120,
    "reasoning_tokens": 904      // thinking tokens, shown separately
  }
}

7.1 How thinking tokens are counted

reasoning_tokens Shown separately for visibility.
completion_tokens includes reasoning_tokens (OpenAI semantics; billing uses completion_tokens).

7.2 Output token breakdown

系统会根据Output自动Bucket token Type：

Output	Bucket
Image output	→ `image_tokens`
Text only	→ `text_tokens`

Bucket基于Output，而非模型名——即使模型名含 “image”，纯文本回答仍按文本计费。

7.3 Modality case handling

Accepts image / IMAGE case variants.

8. Logging and reconciliation

Upstream token usage is logged per request:

responseId Matches response.id and log request_id.

9. Examples

9.1 Text chat with thinking

curl -X POST https://api.gravitex.ai/v1/chat/completions \
  -H "Authorization: Bearer sk-xxx" \
  -H "Content-Type: application/json" \
  -d '{
    "model": "gemini-3.5-flash",
    "messages": [{"role": "user", "content": "Prove Fermat's Last Theorem"}],
    "extra_body": {
      "google": {
        "thinking_config": { "thinking_level": "HIGH", "include_thoughts": true }
      }
    }
  }'

9.2 Multimodal input (text + image URL)

curl -X POST https://api.gravitex.ai/v1/chat/completions \
  -H "Authorization: Bearer sk-xxx" \
  -H "Content-Type: application/json" \
  -d '{
    "model": "gemini-3.5-flash",
    "messages": [
      { "role": "user", "content": [
          { "type": "text", "text": "What is in this image?" },
          { "type": "image_url", "image_url": { "url": "https://example.com/cat.jpg" } }
        ]
      }
    ]
  }'

9.3 Google Search + URL context

curl -X POST https://api.gravitex.ai/v1/chat/completions \
  -H "Authorization: Bearer sk-xxx" \
  -H "Content-Type: application/json" \
  -d '{
    "model": "gemini-3.5-flash",
    "messages": [{"role": "user", "content": "Big tech news today?"}],
    "tools": [
      { "type": "function", "function": { "name": "googleSearch" } },
      { "type": "function", "function": { "name": "urlContext" } }
    ]
  }'

9.4 Streaming chat

curl -N -X POST https://api.gravitex.ai/v1/chat/completions \
  -H "Authorization: Bearer sk-xxx" \
  -H "Content-Type: application/json" \
  -d '{
    "model": "gemini-3.5-flash",
    "messages": [{"role": "user", "content": "Write a five-character quatrain"}],
    "stream": true,
    "stream_options": { "include_usage": true }
  }'

10. OpenAI-only parameters (ignored for Gemini)

以下 OpenAI 标准参数在 Gemini API 中没有对应Field，传入后会被静默丢弃，不会报错：

OpenAI field	Description
`logit_bias`	Gemini has no logit bias
`prediction`	Predicted output（No Gemini equivalent）
`user`	OpenAI 用户标识（No Gemini equivalent）
`parallel_tool_calls`	No parallel tool call control
`verbosity`	GPT-5 specific
`service_tier`	OpenAI service tier
`safety_identifier`	OpenAI safety identifier
`store`	OpenAI store flag
`prompt_cache_key` / `prompt_cache_retention`	OpenAI cache control
`web_search_options`	Use `googleSearch` / `urlContext` in `tools` instead
`functions` / `function_call`	Legacy functions API — use `tools` + `tool_choice`

11. FAQ — Thinking / Reasoning

Q1：Can I control Gemini thinking length in OpenAI format?

Yes. Three approaches:

方式一：`reasoning_effort`（OpenAI 标准Field，最简单）

Pass OpenAI reasoning_effort; mapped to Gemini thinking config:

{
  "model": "gemini-3.5-flash",
  "messages": [{"role": "user", "content": "Prove Fermat's Last Theorem"}],
  "reasoning_effort": "high"
}

Mapping (automatic):

`reasoning_effort`	Gemini 3 series
`"low"`	`thinkingLevel = "LOW"`
`"medium"`	`thinkingLevel = "MEDIUM"`
`"high"`	`thinkingLevel = "HIGH"`

Option 2: model name suffix

Suffix	Behavior
`-thinking`	Enables thinking + `includeThoughts: true`; budget from `max_tokens` percent
`-thinking-<数字>`	Thinking on; budget = number (clamped)
`-nothinking`	Disables thinking (`thinkingBudget = 0`); Gemini 2.5 only

Example: gemini-3.5-flash-thinking-16384 → thinking on, budget 16384.

Option 3: `extra_body.google.thinking_config` (full control)

{
  "model": "gemini-3.5-flash",
  "messages": [{"role": "user", "content": "..."}],
  "extra_body": {
    "google": {
      "thinking_config": {
        "thinking_level": "HIGH",
        "include_thoughts": true
      }
    }
  }
}

Gemini 3 series用 thinking_level 代替 thinking_budget：

{
  "extra_body": {
    "google": {
      "thinking_config": {
        "thinking_level": "HIGH",
        "include_thoughts": true
      }
    }
  }
}

优先级：extra_body.google.thinking_config > reasoning_effort > Suffix。传了 extra_body.google 后系统自动的思维链适配会关闭，所有 thinking Behavior完全由调用方控制。

Q2：How do I read thinking output?

设了 include_thoughts: true 后，思考过程会放在响应的 reasoning_content Field：

{
  "choices": [{
    "message": {
      "role": "assistant",
      "content": "Final answer...",
      "reasoning_content": "Thinking trace..."
    }
  }]
}

In streaming, thinking arrives via delta.reasoning_content.

Q3：Gemini 2.5 vs 3 thinking differences

Feature	Gemini 2.5	Gemini 3
Control	`thinkingBudget`（integer token budget）	`thinkingLevel`（enum MINIMAL/LOW/MEDIUM/HIGH）
Can disable thinking	Yes (`thinkingBudget = 0`)	Cannot disable (platform limit)
`reasoning_effort: "none"`	Supported (disable thinking)	Not supported

12. Known limitations

Limitation	Description
Streaming images as markdown	Streaming embeds `![image](data:...)`; non-streaming may return multimodal array
Non-image media as markdown text	音频等非图片媒体以 markdown 形式嵌入
`extra_body.google.`* 完全透传，不做Field校验	Field写错（如 typo、值Type错误）会原样发给上游，由 Gemini 返回错误，调用方负责Field正确性
Default safety from platform	Override via `extra_body.google.safetySettings`

13. Troubleshooting

Issue	What to check
Usage looks wrong	Check logs vs upstream usage
`response.id` mismatch with logs	Should not happen — contact support
Why images are markdown not array	Streaming uses markdown; non-streaming may use multimodal array
`extra_body.google.xxx` not applied	1) `extra_body` 是 JSON 对象，不能是字符串；2) `xxx` 是否在 `google` 命名空间下；3) `thinking_config` 需要用 snake_case，其他Field用 Gemini 原生 camelCase
Override default safety	Set `extra_body.google.safetySettings`

Native OpenAI Format (ChatCompletions)Moderations

API documentation

Chat & text

Safety & audio

Image Series

Video Series

Completions & Embeddings

Interface module

Gemini OpenAI format (Chat)

1. Model categories

2. Endpoint and authentication

3. OpenAI field → Gemini mapping

4. Tools passthrough

5. extra_body — passthrough Gemini native parameters

5.1 Two passthrough paths

5.2 thinking_config (snake_case allowlist)

5.3 Deep-merge rules

5.4 Passthrough example

6. Response format

6.1 Non-streaming `chat.completion`

6.2 Streaming `chat.completion.chunk`

7. Usage

7.1 How thinking tokens are counted

7.2 Output token breakdown

7.3 Modality case handling

8. Logging and reconciliation

9. Examples

9.1 Text chat with thinking

9.2 Multimodal input (text + image URL)

9.3 Google Search + URL context

9.4 Streaming chat

10. OpenAI-only parameters (ignored for Gemini)

11. FAQ — Thinking / Reasoning

Q1：Can I control Gemini thinking length in OpenAI format?

方式一：`reasoning_effort`（OpenAI 标准Field，最简单）

Option 2: model name suffix

Option 3: `extra_body.google.thinking_config` (full control)

Q2：How do I read thinking output?

Q3：Gemini 2.5 vs 3 thinking differences

12. Known limitations

13. Troubleshooting

​1. Model categories

​2. Endpoint and authentication

​3. OpenAI field → Gemini mapping

​4. Tools passthrough

​5. extra_body — passthrough Gemini native parameters

​5.1 Two passthrough paths

​5.2 thinking_config (snake_case allowlist)

​5.3 Deep-merge rules

​5.4 Passthrough example

​6. Response format

​6.1 Non-streaming chat.completion

​6.2 Streaming chat.completion.chunk

​7. Usage

​7.1 How thinking tokens are counted

​7.2 Output token breakdown

​7.3 Modality case handling

​8. Logging and reconciliation

​9. Examples

​9.1 Text chat with thinking

​9.2 Multimodal input (text + image URL)

​9.3 Google Search + URL context

​9.4 Streaming chat

​10. OpenAI-only parameters (ignored for Gemini)

​11. FAQ — Thinking / Reasoning

​Q1：Can I control Gemini thinking length in OpenAI format?

​方式一：reasoning_effort（OpenAI 标准Field，最简单）

​Option 2: model name suffix

​Option 3: extra_body.google.thinking_config (full control)

​Q2：How do I read thinking output?

​Q3：Gemini 2.5 vs 3 thinking differences

​12. Known limitations

​13. Troubleshooting

1. Model categories

2. Endpoint and authentication

3. OpenAI field → Gemini mapping

4. Tools passthrough

5. extra_body — passthrough Gemini native parameters

5.1 Two passthrough paths

5.2 thinking_config (snake_case allowlist)

5.3 Deep-merge rules

5.4 Passthrough example

6. Response format

6.1 Non-streaming `chat.completion`

6.2 Streaming `chat.completion.chunk`

7. Usage

7.1 How thinking tokens are counted

7.2 Output token breakdown

7.3 Modality case handling

8. Logging and reconciliation

9. Examples

9.1 Text chat with thinking

9.2 Multimodal input (text + image URL)

9.3 Google Search + URL context

9.4 Streaming chat

10. OpenAI-only parameters (ignored for Gemini)

11. FAQ — Thinking / Reasoning

Q1：Can I control Gemini thinking length in OpenAI format?

方式一：`reasoning_effort`（OpenAI 标准Field，最简单）

Option 2: model name suffix

Option 3: `extra_body.google.thinking_config` (full control)

Q2：How do I read thinking output?

Q3：Gemini 2.5 vs 3 thinking differences

12. Known limitations

13. Troubleshooting