django从下载hdfs下载文件

126 阅读1分钟

hdfs下载本地

with client.read('features') as reader:
  features = reader.read()
  f = open('1.jpg', 'wb')
  f.write(features)

通过web下载
文件信息在mysql中有备份,从mysql中得到文件的名字,再从hdfs找到该文件
settings

# hdfs配置
HDFSCLI_TEST_URL = 'http://172.17.0.1:50070' #我是使用docker启动的hdfs
CLIENT = Client(HDFSCLI_TEST_URL, root='/')

view

from django.http import StreamingHttpResponse
from filesystem.settings import CLIENT
class FileDownLoad(View):
    def get(self, request):
    	filesId = request.GET.get('filesId')
		fileIdList = []
    # 转换成列表
    filestemp = eval(filesId)
    # 是一个id
    if type(filestemp) is int:
        fileIdList.append(filestemp)
    else:
        fileIdList = list(filestemp)

    for fileId in fileIdList:
        fileObj = File.objects.filter(id=fileId).first()
        if fileObj is None:
            raise Exception('文件不存在')

        hdfs_name = fileObj.hdfsName
        try:
            with CLIENT.read(hdfs_name) as reader:
                the_file_name = fileObj.name
                # 这里创建返回
                response = StreamingHttpResponse(reader)
                # 注意格式
                response['Content-Type'] = 'image/jpeg'
                # 注意filename 这个是下载后的名字
                response['Content-Disposition'] = 'attachment;filename=%s' % the_file_name
                return response
        except Exception:
            raise Exception(u'下载失败')

用postman测试
在这里插入图片描述

参考
hdfscli.readthedocs.io/en/latest/q…
blog.csdn.net/weixin_4213…
www.cnblogs.com/zhangshengx…