Native OpenAI Format (ChatCompletions)

curl --request POST \
  --url https://api.gravitex.ai/v1/chat/completions \
  --header 'Authorization: <authorization>' \
  --header 'Content-Type: application/json' \
  --data '
{
  "model": "<string>",
  "messages": [
    {}
  ],
  "temperature": 123,
  "stream": true,
  "max_tokens": 123,
  "top_p": 123
}
'

import requests

url = "https://api.gravitex.ai/v1/chat/completions"

payload = {
    "model": "<string>",
    "messages": [{}],
    "temperature": 123,
    "stream": True,
    "max_tokens": 123,
    "top_p": 123
}
headers = {
    "Authorization": "<authorization>",
    "Content-Type": "application/json"
}

response = requests.post(url, json=payload, headers=headers)

print(response.text)

const options = {
  method: 'POST',
  headers: {Authorization: '<authorization>', 'Content-Type': 'application/json'},
  body: JSON.stringify({
    model: '<string>',
    messages: [{}],
    temperature: 123,
    stream: true,
    max_tokens: 123,
    top_p: 123
  })
};

fetch('https://api.gravitex.ai/v1/chat/completions', options)
  .then(res => res.json())
  .then(res => console.log(res))
  .catch(err => console.error(err));

<?php

$curl = curl_init();

curl_setopt_array($curl, [
  CURLOPT_URL => "https://api.gravitex.ai/v1/chat/completions",
  CURLOPT_RETURNTRANSFER => true,
  CURLOPT_ENCODING => "",
  CURLOPT_MAXREDIRS => 10,
  CURLOPT_TIMEOUT => 30,
  CURLOPT_HTTP_VERSION => CURL_HTTP_VERSION_1_1,
  CURLOPT_CUSTOMREQUEST => "POST",
  CURLOPT_POSTFIELDS => json_encode([
    'model' => '<string>',
    'messages' => [
        [
                
        ]
    ],
    'temperature' => 123,
    'stream' => true,
    'max_tokens' => 123,
    'top_p' => 123
  ]),
  CURLOPT_HTTPHEADER => [
    "Authorization: <authorization>",
    "Content-Type: application/json"
  ],
]);

$response = curl_exec($curl);
$err = curl_error($curl);

curl_close($curl);

if ($err) {
  echo "cURL Error #:" . $err;
} else {
  echo $response;
}

package main

import (
	"fmt"
	"strings"
	"net/http"
	"io"
)

func main() {

	url := "https://api.gravitex.ai/v1/chat/completions"

	payload := strings.NewReader("{\n  \"model\": \"<string>\",\n  \"messages\": [\n    {}\n  ],\n  \"temperature\": 123,\n  \"stream\": true,\n  \"max_tokens\": 123,\n  \"top_p\": 123\n}")

	req, _ := http.NewRequest("POST", url, payload)

	req.Header.Add("Authorization", "<authorization>")
	req.Header.Add("Content-Type", "application/json")

	res, _ := http.DefaultClient.Do(req)

	defer res.Body.Close()
	body, _ := io.ReadAll(res.Body)

	fmt.Println(string(body))

}

HttpResponse<String> response = Unirest.post("https://api.gravitex.ai/v1/chat/completions")
  .header("Authorization", "<authorization>")
  .header("Content-Type", "application/json")
  .body("{\n  \"model\": \"<string>\",\n  \"messages\": [\n    {}\n  ],\n  \"temperature\": 123,\n  \"stream\": true,\n  \"max_tokens\": 123,\n  \"top_p\": 123\n}")
  .asString();

require 'uri'
require 'net/http'

url = URI("https://api.gravitex.ai/v1/chat/completions")

http = Net::HTTP.new(url.host, url.port)
http.use_ssl = true

request = Net::HTTP::Post.new(url)
request["Authorization"] = '<authorization>'
request["Content-Type"] = 'application/json'
request.body = "{\n  \"model\": \"<string>\",\n  \"messages\": [\n    {}\n  ],\n  \"temperature\": 123,\n  \"stream\": true,\n  \"max_tokens\": 123,\n  \"top_p\": 123\n}"

response = http.request(request)
puts response.read_body

{
  "id": "chatcmpl-xxx",
  "object": "chat.completion",
  "created": 1234567890,
  "model": "glm-5",
  "choices": [
    {
      "index": 0,
      "message": {
        "role": "assistant",
        "content": "Artificial intelligence is a branch of computer science that aims to create intelligent machines..."
      },
      "finish_reason": "stop"
    }
  ],
  "usage": {
    "prompt_tokens": 25,
    "completion_tokens": 100,
    "total_tokens": 125
  }
}

POST

chat

completions

Native OpenAI Format (ChatCompletions)

curl --request POST \
  --url https://api.gravitex.ai/v1/chat/completions \
  --header 'Authorization: <authorization>' \
  --header 'Content-Type: application/json' \
  --data '
{
  "model": "<string>",
  "messages": [
    {}
  ],
  "temperature": 123,
  "stream": true,
  "max_tokens": 123,
  "top_p": 123
}
'

import requests

url = "https://api.gravitex.ai/v1/chat/completions"

payload = {
    "model": "<string>",
    "messages": [{}],
    "temperature": 123,
    "stream": True,
    "max_tokens": 123,
    "top_p": 123
}
headers = {
    "Authorization": "<authorization>",
    "Content-Type": "application/json"
}

response = requests.post(url, json=payload, headers=headers)

print(response.text)

const options = {
  method: 'POST',
  headers: {Authorization: '<authorization>', 'Content-Type': 'application/json'},
  body: JSON.stringify({
    model: '<string>',
    messages: [{}],
    temperature: 123,
    stream: true,
    max_tokens: 123,
    top_p: 123
  })
};

fetch('https://api.gravitex.ai/v1/chat/completions', options)
  .then(res => res.json())
  .then(res => console.log(res))
  .catch(err => console.error(err));

<?php

$curl = curl_init();

curl_setopt_array($curl, [
  CURLOPT_URL => "https://api.gravitex.ai/v1/chat/completions",
  CURLOPT_RETURNTRANSFER => true,
  CURLOPT_ENCODING => "",
  CURLOPT_MAXREDIRS => 10,
  CURLOPT_TIMEOUT => 30,
  CURLOPT_HTTP_VERSION => CURL_HTTP_VERSION_1_1,
  CURLOPT_CUSTOMREQUEST => "POST",
  CURLOPT_POSTFIELDS => json_encode([
    'model' => '<string>',
    'messages' => [
        [
                
        ]
    ],
    'temperature' => 123,
    'stream' => true,
    'max_tokens' => 123,
    'top_p' => 123
  ]),
  CURLOPT_HTTPHEADER => [
    "Authorization: <authorization>",
    "Content-Type: application/json"
  ],
]);

$response = curl_exec($curl);
$err = curl_error($curl);

curl_close($curl);

if ($err) {
  echo "cURL Error #:" . $err;
} else {
  echo $response;
}

package main

import (
	"fmt"
	"strings"
	"net/http"
	"io"
)

func main() {

	url := "https://api.gravitex.ai/v1/chat/completions"

	payload := strings.NewReader("{\n  \"model\": \"<string>\",\n  \"messages\": [\n    {}\n  ],\n  \"temperature\": 123,\n  \"stream\": true,\n  \"max_tokens\": 123,\n  \"top_p\": 123\n}")

	req, _ := http.NewRequest("POST", url, payload)

	req.Header.Add("Authorization", "<authorization>")
	req.Header.Add("Content-Type", "application/json")

	res, _ := http.DefaultClient.Do(req)

	defer res.Body.Close()
	body, _ := io.ReadAll(res.Body)

	fmt.Println(string(body))

}

HttpResponse<String> response = Unirest.post("https://api.gravitex.ai/v1/chat/completions")
  .header("Authorization", "<authorization>")
  .header("Content-Type", "application/json")
  .body("{\n  \"model\": \"<string>\",\n  \"messages\": [\n    {}\n  ],\n  \"temperature\": 123,\n  \"stream\": true,\n  \"max_tokens\": 123,\n  \"top_p\": 123\n}")
  .asString();

require 'uri'
require 'net/http'

url = URI("https://api.gravitex.ai/v1/chat/completions")

http = Net::HTTP.new(url.host, url.port)
http.use_ssl = true

request = Net::HTTP::Post.new(url)
request["Authorization"] = '<authorization>'
request["Content-Type"] = 'application/json'
request.body = "{\n  \"model\": \"<string>\",\n  \"messages\": [\n    {}\n  ],\n  \"temperature\": 123,\n  \"stream\": true,\n  \"max_tokens\": 123,\n  \"top_p\": 123\n}"

response = http.request(request)
puts response.read_body

{
  "id": "chatcmpl-xxx",
  "object": "chat.completion",
  "created": 1234567890,
  "model": "glm-5",
  "choices": [
    {
      "index": 0,
      "message": {
        "role": "assistant",
        "content": "Artificial intelligence is a branch of computer science that aims to create intelligent machines..."
      },
      "finish_reason": "stop"
    }
  ],
  "usage": {
    "prompt_tokens": 25,
    "completion_tokens": 100,
    "total_tokens": 125
  }
}

Introduction

Universal text chat API supporting OpenAI-compatible large language models for generating conversational responses. Through a unified API interface, you can call multiple mainstream large models including OpenAI, Claude, DeepSeek, Grok, and Tongyi Qianwen.

Authentication

string

required

Bearer Token, e.g. Bearer sk-xxxxxxxxxx

Request Parameters

string

required

Model identifier, supported models include:

OpenAI series: gpt-5.5, gpt-5.4, gpt-5.4-pro, gpt-5.4-mini, gpt-5.4-nano, gpt-4o, etc.
Claude series: claude-opus-4-8, claude-opus-4-7, claude-opus-4-6, claude-sonnet-4-5-20250929, claude-haiku-4-5-20251001, etc.
DeepSeek series: deepseek-v4-pro, deepseek-v4-flash, deepseek-v3-1-250821, deepseek-v3, deepseek-r1, etc.
Grok series: grok-4, grok-4-fast-reasoning, grok-3, etc.
Gemini series: gemini-3.1-pro-preview, gemini-3-pro-preview, gemini-3-flash-preview, nano-banana-pro and -thinking/-nothinking / -thinking-<budget> / -thinking-low/-thinking-high variants
Domestic models: glm-5, glm-4.7, doubao-seed-1-8-251228 (Doubao Seed series), qwen3-coder-plus, kimi-k2.5, etc.

array

required

Conversation message list, each element contains role (user/system/assistant) and content

number

default:"0.7"

Randomness control, 0-2, higher values = more random responses

boolean

default:"false"

Whether to enable streaming output, returns SSE format chunked data

number

Maximum number of tokens to generate, controls response length

number

Nucleus sampling parameter, 0-1, controls generation diversity

Basic Examples

Non-Streaming Request
Streaming Request (SSE)
Python Example

curl -X POST "https://api.gravitex.ai/v1/chat/completions" \
  -H "Content-Type: application/json" \
  -H "Authorization: Bearer sk-xxxxxxxxxx" \
  -d '{
    "model": "glm-5",
    "messages": [
      {"role": "system", "content": "You are a helpful assistant"},
      {"role": "user", "content": "Briefly introduce artificial intelligence"}
    ],
    "temperature": 0.7
  }'

curl -N -X POST "https://api.gravitex.ai/v1/chat/completions" \
  -H "Content-Type: application/json" \
  -H "Authorization: Bearer sk-xxxxxxxxxx" \
  -d '{
    "model": "doubao-seed-1-8-251228",
    "stream": true,
    "messages": [
      {"role": "system", "content": "You are a helpful assistant"},
      {"role": "user", "content": "Briefly introduce artificial intelligence"}
    ]
  }'

from openai import OpenAI

client = OpenAI(
    api_key="sk-xxxxxxxxxx",
    base_url="https://api.gravitex.ai/v1"
)

# Non-streaming
completion = client.chat.completions.create(
    model="glm-5",
    messages=[
        {"role": "system", "content": "You are a helpful assistant"},
        {"role": "user", "content": "Briefly introduce artificial intelligence"}
    ],
    temperature=0.7
)
print(completion.choices[0].message.content)

# Streaming
stream = client.chat.completions.create(
    model="doubao-seed-1-8-251228",
    messages=[
        {"role": "user", "content": "Briefly introduce artificial intelligence"}
    ],
    stream=True
)
for chunk in stream:
    if chunk.choices[0].delta.content:
        print(chunk.choices[0].delta.content, end="")

{
  "id": "chatcmpl-xxx",
  "object": "chat.completion",
  "created": 1234567890,
  "model": "glm-5",
  "choices": [
    {
      "index": 0,
      "message": {
        "role": "assistant",
        "content": "Artificial intelligence is a branch of computer science that aims to create intelligent machines..."
      },
      "finish_reason": "stop"
    }
  ],
  "usage": {
    "prompt_tokens": 25,
    "completion_tokens": 100,
    "total_tokens": 125
  }
}

Advanced Features

Tool Calling (Functions / Tools)

Supports OpenAI-compatible tool calling format, applicable to GPT, Claude, DeepSeek, Grok, Tongyi Qianwen, and other models.

Phase 1: Model Returns Tool Call
Phase 2: Return Tool Execution Result

curl -X POST "https://api.gravitex.ai/v1/chat/completions" \
  -H "Content-Type: application/json" \
  -H "Authorization: Bearer sk-xxxxxxxxxx" \
  -d '{
    "model": "glm-5",
    "messages": [
      {"role": "user", "content": "What'\''s the weather in Shanghai?"}
    ],
    "tools": [
      {
        "type": "function",
        "function": {
          "name": "get_weather",
          "description": "Get weather information by city",
          "parameters": {
            "type": "object",
            "properties": {
              "city": {"type": "string"}
            },
            "required": ["city"]
          }
        }
      }
    ],
    "tool_choice": "auto"
  }'

After the model returns tool_calls, you need to execute the tool and pass the result back to the model:

curl -X POST "https://api.gravitex.ai/v1/chat/completions" \
  -H "Content-Type: application/json" \
  -H "Authorization: Bearer sk-xxxxxxxxxx" \
  -d '{
    "model": "glm-5",
    "messages": [
      {"role": "user", "content": "What'\''s the weather in Shanghai?"},
      {
        "role": "assistant",
        "tool_calls": [
          {
            "id": "call_xxx",
            "type": "function",
            "function": {
              "name": "get_weather",
              "arguments": "{\"city\":\"Shanghai\"}"
            }
          }
        ]
      },
      {
        "role": "tool",
        "tool_call_id": "call_xxx",
        "content": "{\"temp\":\"22°C\",\"condition\":\"Cloudy\",\"aqi\":53}"
      }
    ]
  }'

tool_call_id must match the ID returned in Phase 1
If tool execution fails, return readable error information to avoid blocking subsequent completions
Phase 2 also supports streaming output

Structured Output (JSON Schema)

Supports controlling output format through response_format parameter, applicable to GPT, Claude, Grok, and other models.

curl -X POST "https://api.gravitex.ai/v1/chat/completions" \
  -H "Content-Type: application/json" \
  -H "Authorization: Bearer sk-xxxxxxxxxx" \
  -d '{
    "model": "doubao-seed-1-8-251228",
    "response_format": {
      "type": "json_schema",
      "json_schema": {
        "name": "Answer",
        "schema": {
          "type": "object",
          "properties": {
            "summary": {"type": "string"}
          },
          "required": ["summary"]
        }
      }
    },
    "messages": [
      {"role": "user", "content": "Return a JSON containing a summary field"}
    ]
  }'

For strict structured output, it is recommended to lower the temperature value (e.g., 0.1-0.3) and set an appropriate max_tokens to improve consistency.

Thinking Capability

Some models support thinking capability (Thinking/Reasoning), which can display the reasoning process when generating responses. Different models implement this differently:

DeepSeek
Tongyi Qianwen
Gemini

DeepSeek models support enabling thinking capability through the thinking field:

curl -X POST "https://api.gravitex.ai/v1/chat/completions" \
  -H "Content-Type: application/json" \
  -H "Authorization: Bearer sk-xxxxxxxxxx" \
  -d '{
    "model": "deepseek-v3-1-250821",
    "messages": [
      {"role": "system", "content": "You are a helpful assistant"},
      {"role": "user", "content": "Give a medium-difficulty geometry problem and solve it step by step"}
    ],
    "thinking": {"type": "enabled"}
  }'

Default thinking.type is "disabled", need to explicitly set to "enabled" to enable
The output form of thinking capability may vary by model version
It is recommended to use with stream: true for better interactive experience

Tongyi Qianwen supports deep thinking functionality, requires streaming output:

curl -N -X POST "https://api.gravitex.ai/v1/chat/completions" \
  -H "Content-Type: application/json" \
  -H "Authorization: Bearer sk-xxxxxxxxxx" \
  -d '{
    "model": "qwen3-omni-flash",
    "stream": true,
    "enable_thinking": true,
    "parameters": {
      "incremental_output": true
    },
    "messages": [
      {"role": "system", "content": "You are an excellent mathematician"},
      {"role": "user", "content": "What is the formula for Tower of Hanoi"}
    ]
  }'

Inline reasoning process into content:If the client does not display reasoning_content, you can use gravitex_thinking_to_content: true to inline reasoning content into content:

curl -N -X POST "https://api.gravitex.ai/v1/chat/completions" \
  -H "Content-Type: application/json" \
  -H "Authorization: Bearer sk-xxxxxxxxxx" \
  -d '{
    "model": "qwen3-omni-flash",
    "stream": true,
    "enable_thinking": true,
    "gravitex_thinking_to_content": true,
    "parameters": {
      "incremental_output": true
    },
    "messages": [
      {"role": "user", "content": "What is the formula for Tower of Hanoi"}
    ]
  }'

Tongyi Qianwen’s deep thinking functionality must be used with stream: true. If enable_thinking: true is set but stream: false, the system will automatically disable deep thinking to avoid upstream errors.

Full Gemini OpenAI-compatible docs (field mapping, extra_body, usage, thinking) are in Gemini OpenAI format (Chat). Quick reference:

Model suffix: -thinking (auto budget); -thinking-<number> precise budget (e.g., gemini-2.5-flash-thinking-8192); -nothinking disable; gemini-3-pro-preview-thinking-low/high specify level directly
extra_body config: extra_body.google.thinking_config.thinking_budget + include_thoughts; special values: -1 auto-enable, 0 disable, >0 specific budget; requires stream: true
reasoning_effort: usable when using -thinking and max_tokens is not set (low/medium/high ≈ 20%/50%/80% budget)
Gemini 3 Pro Preview: uses thinking_level (LOW/HIGH, default HIGH), can be combined with search
Enable search: recommended OpenAI-compatible tool "tools":[{"type":"function","function":{"name":"googleSearch"}}]; or pass through extra_body.google.tools:[{"googleSearch":{}}]
Notes: thinking adapter must be enabled server-side; thinking budget counts toward output tokens; use stream: true to view reasoning_content

Example (2.5 with specific budget):

curl -X POST "https://api.gravitex.ai/v1/chat/completions" \
  -H "Content-Type: application/json" \
  -H "Authorization: Bearer sk-xxxxxxxxxx" \
  -d '{
    "model": "gemini-3-flash-preview",
    "messages": [
      {"role":"user","content":"Give a medium-difficulty geometry problem and analyze it step by step."}
    ],
    "extra_body": {
      "google": {
        "thinking_config": { "thinking_budget": 6000, "include_thoughts": true }
      }
    },
    "stream": true
  }'

Example (3 Pro Preview thinking + search):

curl -X POST "https://api.gravitex.ai/v1/chat/completions" \
  -H "Content-Type: application/json" \
  -H "Authorization: Bearer sk-xxxxxxxxxx" \
  -d '{
    "model": "gemini-3-pro-preview",
    "messages": [
      {"role":"user","content":"Google search the weather in Guangzhou today"}
    ],
    "generationConfig": {
      "thinkingConfig": { "thinkingLevel": "LOW" }
    },
    "tools": [
      { "type": "function", "function": { "name": "googleSearch" } }
    ],
    "stream": true
  }'

Tongyi Qianwen Extended Features

Tongyi Qianwen models support extended features such as search, speech recognition, etc. All extended parameters need to be placed in the parameters object.

Search Feature
Speech Recognition

curl -X POST "https://api.gravitex.ai/v1/chat/completions" \
  -H "Content-Type: application/json" \
  -H "Authorization: Bearer sk-xxxxxxxxxx" \
  -d '{
    "model": "qwen3-omni-flash",
    "messages": [
      {"role": "user", "content": "Please first search for recent common misconceptions about Fermat'\''s Last Theorem, then answer"}
    ],
    "stream": true,
    "enable_thinking": true,
    "parameters": {
      "enable_search": true,
      "search_options": {
        "region": "CN",
        "recency_days": 30
      },
      "incremental_output": true
    }
  }'

curl -X POST "https://api.gravitex.ai/v1/chat/completions" \
  -H "Content-Type: application/json" \
  -H "Authorization: Bearer sk-xxxxxxxxxx" \
  -d '{
    "model": "qwen3-omni-flash",
    "messages": [
      {"role": "user", "content": "Hello"}
    ],
    "parameters": {
      "asr_options": {
        "language": "zh"
      }
    }
  }'

All extended parameters for Tongyi Qianwen (such as enable_search, search_options, asr_options, temperature, top_p, etc.) need to be placed in the parameters object, not at the top level of the request body.

Web Search Features

Some models support real-time web search, allowing access to the latest information and including citation sources in responses.

Claude Web Search
Grok Live Search

Claude models do not support enabling web search functionality through the web_search_options parameter, so it can only be implemented through tool calls, and may be unstable due to network and prompt reasons. For details, see Tool Calling (Functions / Tools) above.Basic Example (showing tool call flow):

curl -X POST "https://api.gravitex.ai/v1/chat/completions" \
  -H "Content-Type: application/json" \
  -H "Authorization: Bearer sk-xxxxxxxxxx" \
  -d '{
    "model": "glm-5",
    "messages": [
      {"role": "user", "content": "What are the latest news about artificial intelligence?"},
      {
        "role": "assistant",
        "content": "I'\''ll help you search for the latest news about artificial intelligence.",
        "tool_calls": [
          {
            "id": "toolu_xxx",
            "type": "function",
            "function": {
              "name": "WebSearch",
              "arguments": "{\"query\": \"artificial intelligence latest news 2025\"}"
            }
          }
        ]
      },
      {
        "role": "tool",
        "tool_call_id": "toolu_xxx",
        "name": "WebSearch",
        "content": "Web search results for query: \"artificial intelligence latest news 2025\"..."
      }
    ],
    "web_search_options": {
      "search_context_size": "medium"
    }
  }'

Example with Location Information (showing tool call flow):

curl -X POST "https://api.gravitex.ai/v1/chat/completions" \
  -H "Content-Type: application/json" \
  -H "Authorization: Bearer sk-xxxxxxxxxx" \
  -d '{
    "model": "glm-5",
    "messages": [
      {"role": "user", "content": "What'\''s the weather in Shanghai today?"},
      {
        "role": "assistant",
        "content": "I'\''ll help you search for today'\''s weather in Shanghai.",
        "tool_calls": [
          {
            "id": "toolu_xxx",
            "type": "function",
            "function": {
              "name": "WebSearch",
              "arguments": "{\"query\": \"Shanghai today weather\"}"
            }
          }
        ]
      },
      {
        "role": "tool",
        "tool_call_id": "toolu_xxx",
        "name": "WebSearch",
        "content": "Web search results for query: \"Shanghai today weather\"..."
      }
    ],
    "web_search_options": {
      "search_context_size": "medium",
      "user_location": {
        "approximate": {
          "timezone": "Asia/Shanghai",
          "country": "CN",
          "region": "Shanghai",
          "city": "Shanghai"
        }
      }
    }
  }'

Search functionality will increase response time and token consumption (including search result content)
Search results will automatically include citation sources in the response
Supported models include Claude Sonnet 4, Claude 3 Opus, etc.
In multi-turn conversations, tool calls and results will be visible in message history, and the model can continue the conversation based on previous search results

Stability Notice:

Web search functionality depends on upstream proxy services and external search services, and may have the following instabilities:
- Network fluctuations: Network connection issues may cause search requests to timeout or fail
- Service limitations: Search services may have rate limits, timeout limits, or temporary unavailability
- Search result quality: Some queries may not find relevant information, or search results may be of poor quality
- Model judgment: The model will automatically determine whether a search is needed based on the question, and in some cases may not trigger a search
This is an inherent characteristic of web search functionality. It is recommended to:
- Implement retry mechanisms in critical scenarios
- Handle search failures with graceful degradation (e.g., using the model’s knowledge base to answer)
- Avoid relying entirely on web search in scenarios with extremely high real-time requirements

Grok models support real-time search through the search_parameters parameter.

object

Search parameter configuration

mode (optional): Search mode, options:
- "off": Disable search
- "auto": Model automatically determines if search is needed (recommended)
- "on": Force search usage
return_citations (optional): Whether to return citation links in the response, defaults to true

Basic Example:

curl -X POST "https://api.gravitex.ai/v1/chat/completions" \
  -H "Content-Type: application/json" \
  -H "Authorization: Bearer sk-xxxxxxxxxx" \
  -d '{
    "model": "grok-4",
    "messages": [
      {"role": "user", "content": "What are the latest developments in AI in 2026?"}
    ],
    "search_parameters": {
      "mode": "auto"
    }
  }'

Force Search Example:

curl -X POST "https://api.gravitex.ai/v1/chat/completions" \
  -H "Content-Type: application/json" \
  -H "Authorization: Bearer sk-xxxxxxxxxx" \
  -d '{
    "model": "grok-4",
    "messages": [
      {"role": "user", "content": "What are the latest tech news?"}
    ],
    "search_parameters": {
      "mode": "on",
      "return_citations": true
    }
  }'

Python Example:

from openai import OpenAI

client = OpenAI(
    api_key="sk-xxxxxxxxxx",
    base_url="https://api.gravitex.ai/v1"
)

completion = client.chat.completions.create(
    model="grok-4",
    messages=[
        {"role": "user", "content": "What are the latest developments in AI in 2026?"}
    ],
    extra_body={
        "search_parameters": {
            "mode": "auto"
        }
    }
)
print(completion.choices[0].message.content)

It is recommended to use "auto" mode to let the model automatically determine if search is needed
Search functionality will increase response time but provides access to the latest real-time information
Supported models include grok-4, grok-3 series, etc.
Search results will include citation sources in the response

GPT File Input (Responses API)

GPT-5 and other models support file input functionality, which needs to be called through the /v1/responses endpoint, not /v1/chat/completions.

Upload via File URL
Upload via Base64 Encoding

You can upload PDF files by linking external URLs:

from openai import OpenAI

client = OpenAI(
    api_key="sk-xxxxxxxxxx",
    base_url="https://api.gravitex.ai/v1/responses?api-version=2025-03-01-preview"
)

response = client.responses.create(
    model="gpt-5.2",
    input=[
        {
            "role": "user",
            "content": [
                {
                    "type": "input_text",
                    "text": "Analyze this letter and summarize its key points"
                },
                {
                    "type": "input_file",
                    "file_url": "https://www.example.com/document.pdf"
                }
            ]
        }
    ]
)
print(response.output_text)

Send as Base64-encoded input:

import base64
from openai import OpenAI

client = OpenAI(
    api_key="sk-xxxxxxxxxx",
    base_url="https://api.gravitex.ai/v1"
)

with open("document.pdf", "rb") as f:
    data = f.read()

base64_string = base64.b64encode(data).decode("utf-8")

response = client.responses.create(
    model="gpt-5.2",
    input=[
        {
            "role": "user",
            "content": [
                {
                    "type": "input_file",
                    "filename": "document.pdf",
                    "file_data": f"data:application/pdf;base64,{base64_string}"
                },
                {
                    "type": "input_text",
                    "text": "What is the main content of this document?"
                }
            ]
        }
    ]
)
print(response.output_text)

File size limit: Single file not exceeding 50 MB, total size of all files in a single request not exceeding 50 MB
Supported models: gpt-4o, gpt-4o-mini, gpt-5-chat, and other models that support text and image input

Grok Reasoning Capability

Grok models (especially grok-4-fast-reasoning) support reasoning capability. When enabled, usage.completion_tokens_details.reasoning_tokens shows the token count consumed by the reasoning process. See usage field reference below for details.

usage field reference

When calling /v1/chat/completions, the usage object in the response contains token usage statistics. This section first covers common fields (typical chat model scenarios), then scenario-specific fields that only appear with non-zero values in certain cases.

Common fields

Applies to: GPT series, Claude chat/thinking models, Gemini, DeepSeek, and other text chat scenarios via /v1/chat/completions. Does not include fields that only appear when the gateway internally calls Claude Messages protocol or image generation models — see Scenario-specific fields.

{
  "id": "chatcmpl-abc123",
  "object": "chat.completion",
  "created": 1752345600,
  "model": "gpt-5.6",
  "choices": [
    {
      "index": 0,
      "message": {
        "role": "assistant",
        "content": "This is the model's reply"
      },
      "finish_reason": "stop"
    }
  ],
  "usage": {
    "prompt_tokens": 120,
    "completion_tokens": 85,
    "total_tokens": 205,

    "prompt_tokens_details": {
      "cached_tokens": 30,
      "cache_write_tokens": 5,
      "text_tokens": 0,
      "audio_tokens": 0,
      "image_tokens": 0
    },
    "completion_tokens_details": {
      "text_tokens": 0,
      "audio_tokens": 0,
      "image_tokens": 0,
      "reasoning_tokens": 42,
      "accepted_prediction_tokens": 0,
      "rejected_prediction_tokens": 0
    },

    "input_tokens": 120,
    "output_tokens": 85,
    "input_tokens_details": null
  }
}

Field	When it has a value	Description
`prompt_tokens`	Always	Total input tokens
`completion_tokens`	Always	Total output tokens
`total_tokens`	Always	`prompt_tokens + completion_tokens`
`prompt_tokens_details.cached_tokens`	When model/channel supports prompt caching	Input tokens served from cache and billed at cache rate (OpenAI native cache, Gemini implicit cache, and Claude cache reads all appear in this field)
`prompt_tokens_details.cache_write_tokens`	GPT-5.6+ with explicit prompt cache enabled	Tokens newly written to cache in this request, billed at cache-write rate
`prompt_tokens_details.audio_tokens`	Audio input models only (e.g. `gpt-4o-audio-preview`)	Input tokens consumed by audio content
`prompt_tokens_details.text_tokens` / `image_tokens`	Usually `0` for plain text chat	Breakdown of text/image input tokens; typically not populated in plain text chat, but the fields are always present
`completion_tokens_details.reasoning_tokens`	Reasoning models only (GPT o1/o3/GPT-5 series, Claude extended thinking, Gemini thinking)	Tokens consumed by internal reasoning; not shown in the final reply text but billed at output rate
`completion_tokens_details.audio_tokens`	Audio output models only	Output tokens consumed by audio content
`completion_tokens_details.accepted_prediction_tokens` / `rejected_prediction_tokens`	OpenAI Predicted Outputs only	Tokens that matched / did not match predicted content; unmatched tokens are still billed at output rate
`completion_tokens_details.text_tokens` / `image_tokens`	`0` in plain text chat	See “About image_tokens” below
`input_tokens` / `output_tokens`	Always	Aliases for `prompt_tokens` / `completion_tokens`, retained for upstream protocol compatibility
`input_tokens_details`	Usually `null` in chat scenarios	Reserved field; not populated for ordinary chat requests

prompt_tokens_details and completion_tokens_details are always present in the response, even when all sub-fields are 0; 0 does not mean “unsupported”, only “not used in this request”.
completion_tokens_details.image_tokens (and prompt_tokens_details.image_tokens) are listed here because the structure is always present, but non-zero values only occur in special scenarios (Claude / image generation models). See Scenario-specific fields.
reasoning_tokens is a cross-model concept: whether the upstream is an OpenAI reasoning model, Claude extended thinking, or Gemini thinking, if “thinking” was enabled for that call, it appears in this single field.

Scenario-specific fields

The following fields only have non-zero values in specific cases: the request goes through /v1/chat/completions, but the gateway internally converts it to another protocol to call upstream (Claude Messages protocol / image generation models). The converted usage then includes these “native protocol-specific” fields.

Scenario 1: Underlying Claude call (`/v1/chat/completions` → `/v1/messages`)

When you request a Claude model in OpenAI format, the gateway converts the request to Anthropic Messages protocol, then maps Claude’s usage back to OpenAI format. Claude’s cache mechanism is finer-grained than OpenAI’s (5-minute / 1-hour TTL tiers), carried by these dedicated fields:

"usage": {
  "prompt_tokens": 120,
  "completion_tokens": 85,
  "total_tokens": 205,
  "prompt_tokens_details": {
    "cached_tokens": 30,
    "cached_creation_tokens": 10
  },
  "completion_tokens_details": {
    "reasoning_tokens": 42
  },
  "claude_cache_creation_5_m_tokens": 8,
  "claude_cache_creation_1_h_tokens": 2
}

Field	Description
`prompt_tokens_details.cached_creation_tokens`	Maps to Claude’s `cache_creation_input_tokens`: extra input tokens consumed to write prompt cache in this request (this total is split by TTL into the two fields below)
`claude_cache_creation_5_m_tokens`	Portion of `cached_creation_tokens` written to cache at 5-minute TTL tier
`claude_cache_creation_1_h_tokens`	Portion of `cached_creation_tokens` written to cache at 1-hour TTL tier (higher unit price)

prompt_tokens_details.cached_tokens and completion_tokens_details.reasoning_tokens also have values when calling Claude models (mapping to Claude cache-read tokens and extended-thinking tokens respectively), but these are cross-model common fields already covered in Common fields.

Scenario 2: Underlying image generation model (`/v1/chat/completions` → `/v1/images/generations` semantics)

Some image generation models (e.g. Gemini native image output, gpt-image series) normally use the official /v1/images/generations usage structure (input_tokens/output_tokens + modality breakdown). When users call these models via /v1/chat/completions for conversational image generation, the gateway maps that information into chat-format usage:

"usage": {
  "prompt_tokens": 50,
  "completion_tokens": 1290,
  "total_tokens": 1340,
  "prompt_tokens_details": {
    "text_tokens": 50,
    "image_tokens": 0
  },
  "completion_tokens_details": {
    "text_tokens": 0,
    "image_tokens": 1290
  },
  "generated_images": 1
}

Field	Description
`completion_tokens_details.image_tokens`	Image output tokens from official `/v1/images/generations`, mapped into chat format and billed at image ratio (`ImageCompletionRatio`)
`prompt_tokens_details.image_tokens`	Image tokens consumed on the input side (e.g. image editing / image-to-image)
`generated_images`	Actual number of images generated upstream (per-image billing models use this count instead of token count to avoid overcharging when e.g. 4 images were requested but only 1 was produced)

Scenario 3: Channel protocol differences

These two fields are not “converted” — they are passed through as-is from specific channel upstream responses. You are unlikely to encounter them with mainstream models:

Field	When it appears	Description
`prompt_cache_hit_tokens`	DeepSeek channel, when upstream returns cache info under DeepSeek’s own field name	DeepSeek’s official API uses `prompt_cache_hit_tokens` instead of `cached_tokens` for cache hits; the gateway also maps this into the common `prompt_tokens_details.cached_tokens`, but the original field is retained at the top level
`cost`	OpenRouter channel, when upstream includes USD cost in the response	OpenRouter-specific field; does not appear on ordinary channels (official OpenAI/Claude/Gemini, etc.)

Response Format

Non-Streaming Response
Streaming Response

{
  "id": "chatcmpl-xxx",
  "object": "chat.completion",
  "created": 1234567890,
  "model": "glm-5",
  "choices": [
    {
      "index": 0,
      "message": {
        "role": "assistant",
        "content": "Response content..."
      },
      "finish_reason": "stop"
    }
  ],
  "usage": {
    "prompt_tokens": 25,
    "completion_tokens": 100,
    "total_tokens": 125
  }
}

For full usage field reference, see usage field reference above.

Streaming responses are returned in SSE (Server-Sent Events) format, each chunk contains partial content:

data: {"id":"chatcmpl-xxx","object":"chat.completion.chunk","created":1234567890,"model":"doubao-seed-1-8-251228","choices":[{"index":0,"delta":{"content":"回"},"finish_reason":null}]}

data: {"id":"chatcmpl-xxx","object":"chat.completion.chunk","created":1234567890,"model":"doubao-seed-1-8-251228","choices":[{"index":0,"delta":{"content":"复"},"finish_reason":null}]}

data: [DONE]

The last chunk usually contains usage statistics.

Error Handling

Exception Type	Trigger Scenario	Return Message
AuthenticationError	Invalid or unauthorized API key	Error: Invalid or unauthorized API key
NotFoundError	Model does not exist or is not supported	Error: Model [model] does not exist or is not supported
APIConnectionError	Network interruption or server not responding	Error: Cannot connect to API server
APIError	Request format error and other server-side exceptions	API request failed: [error details]

Supported Model Series

OpenAI Series

GPT-5.5, GPT-5.4 family (5.4 / Pro / Mini / Nano), GPT-4o, GPT-4o Mini

Claude Series (Anthropic)

Claude Sonnet 4, Claude 3 Opus, Claude 3 Haiku

DeepSeek Series

DeepSeek V3, DeepSeek R1

Grok Series (xAI)

Grok-4, Grok-3, Grok-3-fast, Grok-4-fast-reasoning

Tongyi Qianwen Series (Qwen)

Qwen3-omni-flash, etc.

Doubao Seed Series

doubao-seed-1-8-251228, etc.

Other Models

Gemini series, GLM series (including glm-5), Kimi series, etc.

For the complete model list, please see the Model Information Page.

Notes

In the messages list, system role is used to set model behavior, user role is for user questions
Multi-turn conversations require appending history (including assistant role responses)
Requires openai library: pip install openai
Different models may have different levels of support for certain features, it is recommended to check the specific model documentation before use

Using streaming output can improve first token response time and interactive experience
Tool calling requires proper timeout and retry mechanisms to avoid blocking model responses
Tongyi Qianwen extended parameters must be placed in the parameters object

FAQ

View FAQ for chat interface

Model List

View all supported model information

Native OpenAI Format (Responses)Gemini OpenAI format (Chat)

API documentation

Chat & text

Safety & audio

Image Series

Video Series

Completions & Embeddings

Interface module

Native OpenAI Format (ChatCompletions)

Introduction

Authentication

Request Parameters

Basic Examples

Advanced Features

Tool Calling (Functions / Tools)

Structured Output (JSON Schema)

Thinking Capability

Tongyi Qianwen Extended Features

Web Search Features

GPT File Input (Responses API)

Grok Reasoning Capability

usage field reference

Common fields

Scenario-specific fields

Scenario 1: Underlying Claude call (`/v1/chat/completions` → `/v1/messages`)

Scenario 2: Underlying image generation model (`/v1/chat/completions` → `/v1/images/generations` semantics)

Scenario 3: Channel protocol differences

Response Format

Error Handling

Supported Model Series

OpenAI Series

Claude Series (Anthropic)

DeepSeek Series

Grok Series (xAI)

Tongyi Qianwen Series (Qwen)

Doubao Seed Series

Other Models

Notes

FAQ

Model List

​Introduction

​Authentication

​Request Parameters

​Basic Examples

​Advanced Features

​Tool Calling (Functions / Tools)

​Structured Output (JSON Schema)

​Thinking Capability

​Tongyi Qianwen Extended Features

​Web Search Features

​GPT File Input (Responses API)

​Grok Reasoning Capability

​usage field reference

​Common fields

​Scenario-specific fields

​Scenario 1: Underlying Claude call (/v1/chat/completions → /v1/messages)

​Scenario 2: Underlying image generation model (/v1/chat/completions → /v1/images/generations semantics)

​Scenario 3: Channel protocol differences

​Response Format

​Error Handling

​Supported Model Series

​OpenAI Series

​Claude Series (Anthropic)

​DeepSeek Series

​Grok Series (xAI)

​Tongyi Qianwen Series (Qwen)

​Doubao Seed Series

​Other Models

​Notes

​Related Resources

FAQ

Model List

Introduction

Authentication

Request Parameters

Basic Examples

Advanced Features

Tool Calling (Functions / Tools)

Structured Output (JSON Schema)

Thinking Capability

Tongyi Qianwen Extended Features

Web Search Features

GPT File Input (Responses API)

Grok Reasoning Capability

usage field reference

Common fields

Scenario-specific fields

Scenario 1: Underlying Claude call (`/v1/chat/completions` → `/v1/messages`)

Scenario 2: Underlying image generation model (`/v1/chat/completions` → `/v1/images/generations` semantics)

Scenario 3: Channel protocol differences

Response Format

Error Handling

Supported Model Series

OpenAI Series

Claude Series (Anthropic)

DeepSeek Series

Grok Series (xAI)

Tongyi Qianwen Series (Qwen)

Doubao Seed Series

Other Models

Notes

Related Resources