1.使用langchain,版本要高一点
这里的参数根据实际情况进行调整,我使用的是azure的服务
**
import os
os.environ["OPENAI_API_KEY"] = "you key"
os.environ["OPENAI_API_BASE"] = "https://xxx.openai.azure.com/"
os.environ["OPENAI_API_TYPE"] = 'azure'
os.environ["OPENAI_API_VERSION"] = "2023-03-15-preview"
import chromadb
from langchain.embeddings import OpenAIEmbeddings
from chromadb.api.types import Documents, EmbeddingFunction, Embeddings
chroma_client = chromadb.Client()
AzureEmbeddings = OpenAIEmbeddings(deployment="xxx-embedding")
class AzureEmbeddingFunction(EmbeddingFunction):
def __call__(self, texts: Documents) -> Embeddings:
embeddings = [AzureEmbeddings.embed_query(x) for x in texts]
return embeddings
collection = chroma_client.create_collection(name="my_collection", embedding_function=AzureEmbeddingFunction())
print(collection)
2.使用openai直接调用azure服务
**
import os
os.environ["OPENAI_API_KEY"] = "you key"
os.environ["OPENAI_API_BASE"] = "https://xxx.openai.azure.com/"
os.environ["OPENAI_API_TYPE"] = 'azure'
os.environ["OPENAI_API_VERSION"] = "2023-03-15-preview"
import openai
import chromadb
from chromadb.api.types import Documents, EmbeddingFunction, Embeddings
chroma_client = chromadb.Client()
def embed_query(content):
data_embedding_res = openai.Embedding.create(
engine="xxx-embedding",
input=content
)
return data_embedding_res
class AzureEmbeddingFunction(EmbeddingFunction):
def __call__(self, texts: Documents) -> Embeddings:
embeddings = [embed_query(x)['data'][0]['embedding'] for x in texts]
return embeddings
collection = chroma_client.create_collection(name="my_collection", embedding_function=AzureEmbeddingFunction())
print(collection)
3.使用本地模型进行Embeddings
text2vec-base-chinese自己从huggingface.co/shibing624/…下载
**
import chromadb
from chromadb.api.types import Documents, EmbeddingFunction, Embeddings
chroma_client = chromadb.Client()
model_path = r'D:\PycharmProjects\example\models\text2vec-base-chinese'
from sentence_transformers import SentenceTransformer
model = SentenceTransformer(model_name_or_path=model_path)
class MyEmbeddingFunction(EmbeddingFunction):
def __call__(self, texts: Documents) -> Embeddings:
embeddings = [model.encode(x) for x in texts]
return embeddings
collection = chroma_client.create_collection(name="my_collection", embedding_function=MyEmbeddingFunction())
print(collection)
4.使用本地模型进行直接生成方法
**
import chromadb
chroma_client = chromadb.Client()
model_path = r'D:\PycharmProjects\example\models\text2vec-base-chinese'
from chromadb.utils import embedding_functions
sentence_transformer_ef = embedding_functions.SentenceTransformerEmbeddingFunction(model_name=model_path)
collection = chroma_client.create_collection(name="my_collection", embedding_function=sentence_transformer_ef)
print(collection)