一个华科软件人的起跑:本地部署ai模型

49 阅读2分钟

一、下载Ollama

下载ollama(应用商店或者上网),通过命令行ollama run qwen3:8b,下载千问(推理)模型。使用ollama run qwen2.5vl:7b 下载千问(图像识别)模型

二、下载chatbox

下载chatbox(同上),方便ai的使用,打开设置,选择模型提供方(ollama),新建,ID:qwen3:8b,勾选推理,工具使用

三、调用

编写python程序,自动调用ai模型,同时预留提示词,方便后续设置ai人设。 示例代码:

import requests
import json


# 使用OpenAI兼容API的版本
def call_8b_model_openai(prompt, model="qwen3:8b", stream=True):
    """
    使用OpenAI兼容的API调用本地Ollama模型
    :param stream: 是否启用流式输出
    """
    url = "http://localhost:11434/v1/chat/completions"

    headers = {
        'Content-Type': 'application/json',
        'Authorization': 'Bearer ollama'
    }

    data = {
        "model": model,
        "messages": [
            {
                "role": "system",
                "content":"你是一位病人"

            },
            {
                "role": "system",
                "content":"说话冗长,"

            },
            {
                "role": "system",
                "content": "你是一个性格暴躁的人"
            },
            {
                "role": "user",
                "content": prompt
            }
        ],
        "stream": stream,  # 启用流式输出
        "temperature": 0.7
    }

    try:
        if stream:
            print("开始流式输出:")
            print("-" * 50)

            response = requests.post(url, headers=headers, json=data, stream=True)
            response.raise_for_status()

            full_response = ""
            for line in response.iter_lines():
                if line:
                    line = line.decode('utf-8')
                    # 检查是否是SSE格式(以"data: "开头)
                    if line.startswith('data: '):
                        data_str = line[6:]  # 去掉"data: "前缀
                        if data_str == '[DONE]':
                            break
                        try:
                            chunk = json.loads(data_str)
                            if 'choices' in chunk and chunk['choices']:
                                delta = chunk['choices'][0].get('delta', {})
                                if 'content' in delta:
                                    content = delta['content']
                                    print(content, end='', flush=True)
                                    full_response += content
                                # if 'reasoning' in delta:
                                #     print(delta['reasoning'], end='', flush=True)
                        except json.JSONDecodeError:
                            continue
                    else:
                        # 如果不是SSE格式,尝试直接解析
                        try:
                            chunk = json.loads(line)
                            if 'choices' in chunk and chunk['choices']:
                                delta = chunk['choices'][0].get('delta', {})
                                if 'content' in delta:
                                    content = delta['content']
                                    print(content, end='', flush=True)
                                    full_response += content
                        except json.JSONDecodeError:
                            continue

            print("\n" + "-" * 50)
            print("流式输出结束")
            return full_response

        else:
            # 非流式输出(保持原有逻辑)
            response = requests.post(url, headers=headers, json=data)
            response.raise_for_status()
            result = response.json()
            return result["choices"][0]["message"]["content"]

    except requests.exceptions.ConnectionError:
        return "无法连接到Ollama服务,请确保Ollama正在运行"
    except Exception as e:
        return f"调用失败:{str(e)}"


# 测试调用
if __name__ == "__main__":
    user_prompt = "你哪里不舒服啊?"
    print("测试OpenAI兼容API(流式输出):")

    # 启用流式输出
    answer2 = call_8b_model_openai(user_prompt, stream=True)

    # 也可以这样调用非流式版本
    # print("\n\n测试非流式输出:")
    # answer3 = call_8b_model_openai(user_prompt, stream=False)
    # print("完整回答:\n", answer3)