一、下载Ollama
下载ollama(应用商店或者上网),通过命令行ollama run qwen3:8b,下载千问(推理)模型。使用ollama run qwen2.5vl:7b 下载千问(图像识别)模型
二、下载chatbox
下载chatbox(同上),方便ai的使用,打开设置,选择模型提供方(ollama),新建,ID:qwen3:8b,勾选推理,工具使用
三、调用
编写python程序,自动调用ai模型,同时预留提示词,方便后续设置ai人设。 示例代码:
import requests
import json
# 使用OpenAI兼容API的版本
def call_8b_model_openai(prompt, model="qwen3:8b", stream=True):
"""
使用OpenAI兼容的API调用本地Ollama模型
:param stream: 是否启用流式输出
"""
url = "http://localhost:11434/v1/chat/completions"
headers = {
'Content-Type': 'application/json',
'Authorization': 'Bearer ollama'
}
data = {
"model": model,
"messages": [
{
"role": "system",
"content":"你是一位病人"
},
{
"role": "system",
"content":"说话冗长,"
},
{
"role": "system",
"content": "你是一个性格暴躁的人"
},
{
"role": "user",
"content": prompt
}
],
"stream": stream, # 启用流式输出
"temperature": 0.7
}
try:
if stream:
print("开始流式输出:")
print("-" * 50)
response = requests.post(url, headers=headers, json=data, stream=True)
response.raise_for_status()
full_response = ""
for line in response.iter_lines():
if line:
line = line.decode('utf-8')
# 检查是否是SSE格式(以"data: "开头)
if line.startswith('data: '):
data_str = line[6:] # 去掉"data: "前缀
if data_str == '[DONE]':
break
try:
chunk = json.loads(data_str)
if 'choices' in chunk and chunk['choices']:
delta = chunk['choices'][0].get('delta', {})
if 'content' in delta:
content = delta['content']
print(content, end='', flush=True)
full_response += content
# if 'reasoning' in delta:
# print(delta['reasoning'], end='', flush=True)
except json.JSONDecodeError:
continue
else:
# 如果不是SSE格式,尝试直接解析
try:
chunk = json.loads(line)
if 'choices' in chunk and chunk['choices']:
delta = chunk['choices'][0].get('delta', {})
if 'content' in delta:
content = delta['content']
print(content, end='', flush=True)
full_response += content
except json.JSONDecodeError:
continue
print("\n" + "-" * 50)
print("流式输出结束")
return full_response
else:
# 非流式输出(保持原有逻辑)
response = requests.post(url, headers=headers, json=data)
response.raise_for_status()
result = response.json()
return result["choices"][0]["message"]["content"]
except requests.exceptions.ConnectionError:
return "无法连接到Ollama服务,请确保Ollama正在运行"
except Exception as e:
return f"调用失败:{str(e)}"
# 测试调用
if __name__ == "__main__":
user_prompt = "你哪里不舒服啊?"
print("测试OpenAI兼容API(流式输出):")
# 启用流式输出
answer2 = call_8b_model_openai(user_prompt, stream=True)
# 也可以这样调用非流式版本
# print("\n\n测试非流式输出:")
# answer3 = call_8b_model_openai(user_prompt, stream=False)
# print("完整回答:\n", answer3)