在之前的文章 “Jina-VLM:小型多语言视觉语言模型”,我们介绍了 JINA-VLM 小型多语言视觉语言模型。在今天的文章中,我们讲使用一个 Python 应用来展示如何使用它。
下载代码
首先我们克隆代码:
`git clone https://github.com/liu-xiao-guo/jina_vlm_demo`AI写代码
`
1. $ tree -L 3
2. .
3. ├── README.md
4. ├── app.py
5. ├── images
6. │ ├── chenglong.png
7. │ ├── carjpg.jpg
8. │ ├── girl.jpeg
9. │ └── mask.png
10. ├── pics
11. │ ├── car.png
12. │ ├── girl.png
13. │ └── jackie.png
14. └── requirements.txt
`AI写代码
如上所示,我们有一个目录叫做 images, 它里面含有我们需要进行对话的图片。 app.py 是我们的应用。在文件中,还有一个叫做 ,env 的配置文件:
,env
`JINA_API_KEY=<Your Jina API Key at jina.ai>`AI写代码
我们需要在 jina.ai,里申请一个免费的 key。
应用设计
我们的 app.py 代码是这样的:
app.py
`
1. import streamlit as st
2. from PIL import Image
3. import os
4. import io
5. import base64
6. import requests
7. from dotenv import load_dotenv
9. # --- Page Configuration ---
10. st.set_page_config(
11. page_title="Jina VLM Image Chat",
12. page_icon="🤖",
13. layout="wide"
14. )
16. # --- Main Application UI ---
17. st.title("🖼️ Jina VLM Image Chat")
18. st.write("Select an image from the sidebar and ask questions about it!")
20. # --- API Key Loading ---
21. load_dotenv() # Load variables from .env file
22. JINA_API_KEY = os.getenv("JINA_API_KEY")
24. JINA_API_URL = "https://api-beta-vlm.jina.ai/v1/chat/completions"
26. if not JINA_API_KEY:
27. st.error("JINA_API_KEY not found. Please create a .env file and add your key.")
28. st.info("Example .env file content: JINA_API_KEY=\"your_api_key_here\"")
29. st.stop()
31. # --- Image Selection Sidebar ---
32. st.sidebar.title("Image Selection")
33. IMAGE_DIR = "images"
35. # Check if the image directory exists to avoid errors.
36. if not os.path.isdir(IMAGE_DIR):
37. st.sidebar.error(f"Image directory '{IMAGE_DIR}' not found.")
38. st.sidebar.info("Please create an 'images' folder and add some pictures to it.")
39. image_files = []
40. else:
41. # Get a list of valid image files from the directory.
42. image_files = [f for f in os.listdir(IMAGE_DIR) if f.lower().endswith(('.png', '.jpg', '.jpeg', '.gif', '.bmp'))]
44. if not image_files:
45. st.sidebar.warning("No images found in the 'images' directory.")
46. selected_image_name = None
47. else:
48. selected_image_name = st.sidebar.selectbox(
49. "Choose an image:",
50. image_files
51. )
53. # --- Image and Chat State Management ---
54. # Clear chat history if the selected image changes.
55. if "last_seen_image" not in st.session_state:
56. st.session_state.last_seen_image = None
58. if selected_image_name != st.session_state.last_seen_image:
59. st.session_state.messages = [] # Reset chat history
60. st.session_state.last_seen_image = selected_image_name
61. # Clear image bytes if no image is selected
62. if selected_image_name is None:
63. st.session_state.current_image_bytes = None
64. else:
65. # Load and store the new image in session state
66. image_path = os.path.join(IMAGE_DIR, selected_image_name)
67. try:
68. image = Image.open(image_path).convert("RGB")
69. # Convert image to bytes to store in session state (more robust)
70. img_byte_arr = io.BytesIO()
71. image.save(img_byte_arr, format='PNG')
72. st.session_state.current_image_bytes = img_byte_arr.getvalue()
73. except Exception as e:
74. st.sidebar.error(f"Error opening image: {e}")
75. st.session_state.current_image_bytes = None
77. # Rerun to update the UI immediately after image change
78. if "messages" in st.session_state and len(st.session_state.messages) > 0:
79. st.rerun()
82. # Display the selected image in the sidebar
83. if st.session_state.get("current_image_bytes"):
84. st.sidebar.image(
85. st.session_state.current_image_bytes,
86. caption=st.session_state.last_seen_image,
87. width='stretch' # Make image fill the sidebar width
88. )
90. # --- Chat Interface ---
91. # Initialize chat history in session state if it doesn't exist.
92. if "messages" not in st.session_state:
93. st.session_state.messages = []
95. # Display past messages from the chat history.
96. for message in st.session_state.messages:
97. with st.chat_message(message["role"]):
98. st.markdown(message["content"])
100. # Handle user input from the chat box.
101. if prompt := st.chat_input("Ask a question about the image..."):
102. # Check if an image is selected before proceeding.
103. if not st.session_state.get("current_image_bytes"):
104. st.warning("Please select an image from the sidebar first.")
105. else:
106. # Add user message to history and display it.
107. st.session_state.messages.append({"role": "user", "content": prompt})
108. with st.chat_message("user"):
109. st.markdown(prompt)
111. # Prepare for and generate the model's response.
112. with st.chat_message("assistant"):
113. message_placeholder = st.empty()
114. message_placeholder.markdown("Thinking... 🤔")
116. try:
117. # Retrieve the image from session state and base64 encode it.
118. image_bytes = st.session_state.current_image_bytes
119. base64_image = base64.b64encode(image_bytes).decode('utf-8')
120. image_url = f"data:image/png;base64,{base64_image}"
122. # Prepare headers and payload for the Jina API
123. headers = {
124. "Content-Type": "application/json",
125. "Authorization": f"Bearer {JINA_API_KEY}"
126. }
127. payload = {
128. "model": "jina-vlm",
129. "messages": [{
130. "role": "user",
131. "content": [
132. {"type": "text", "text": prompt},
133. {"type": "image_url", "image_url": {"url": image_url}}
134. ]
135. }],
136. "max_tokens": 1024
137. }
139. # Make the API request
140. response = requests.post(JINA_API_URL, headers=headers, json=payload)
141. response.raise_for_status() # Raise an exception for bad status codes
143. # Extract the response content
144. response_data = response.json()
145. assistant_response = response_data['choices'][0]['message']['content']
147. message_placeholder.markdown(assistant_response)
149. # Add assistant response to chat history.
150. st.session_state.messages.append({"role": "assistant", "content": assistant_response})
152. except Exception as e:
153. error_message = f"An error occurred while generating the response: {e}"
154. message_placeholder.error(error_message)
155. st.session_state.messages.append({"role": "assistant", "content": error_message})
`AI写代码收起代码块
运行应用
我们可以使用如下的命令来运行:
`(.venv) $ streamlit run app.py`AI写代码
运行后,我们可以看到如下的画面:
在左边,我们选中所需要的图片。在右边,我们可以看到聊天的记录。我们可以针对图片来进行提问:
祝大家学习愉快!