因为需要将我之前在掘金上发布的部分笔记备份一份到GitHub上,所以我编写了一个简单的python脚本,用于将markdown文本中引用的在线图片批量下载到本地,并且使用图片保存到本地的相对路径替换掉相应的markdown文本中的图片URL。
import re
import os
import hashlib
import requests
def download_and_replace_image_links(md_file_path, cookie_dict, save_folder='images'):
# 确保保存图片的文件夹存在
if not os.path.exists(save_folder):
os.makedirs(save_folder)
# 读取.md文件
with open(md_file_path, 'r', encoding='utf-8') as file:
content = file.read()
# 正则表达式匹配Markdown中的图片链接
pattern = r'!\[.*?\]\((https?://.*?\.(awebp|webp|jpg|png|gif)\?.*?)\)'
matches = re.findall(pattern, content)
# 下载并保存图片,更新Markdown文件内容
print(cookie_dict)
for match in matches:
url, file_extension = match
try:
response = requests.get(url, cookies=cookie_dict)
response.raise_for_status() # 确保请求成功
# 计算图片的哈希值作为文件名
file_hash = hashlib.sha256(response.content).hexdigest()
filename = f'{file_hash}.{file_extension}'
file_path = os.path.join(save_folder, filename)
# 保存图片到磁盘
with open(file_path, 'wb') as f:
f.write(response.content)
# 替换Markdown文件中的链接
content = content.replace(url, f'{save_folder}/{filename}')
except requests.RequestException as e:
print(f"Failed to download {url}: {e}")
# 将更新后的内容写回.md文件
with open(md_file_path, 'w', encoding='utf-8') as file:
file.write(content)
def convert_cookie_to_dict(cookies):
result = dict()
for record in cookies.split('; '):
key, value = record.split('=')
result[key] = value
return result
# 此处替换成正常访问图片URL可能需要的cookie字符串
cookie_dict = convert_cookie_to_dict("A=a; B=b; C=c");
# 此处替换成要进行图片资源备份的markdown文件的路径
download_and_replace_image_links("my_note.md", cookie_dict)
注意使用python的requests
库时需要事先关闭电脑上的网络加速器等软件,否则可能导致下载失败!