批量下载远程图片到本地

57 阅读1分钟
  • 准备json
["http://xxxxx/xxxxx.jpg", ...]
node download.js
// download.js
const fs = require('fs');
const path = require('path');
const https = require('https');
const http = require('http');

// 配置参数
const CONFIG = {
  maxConcurrent: 10,  // 最大并发下载数
  timeout: 30000,     // 超时时间(毫秒)
  retryCount: 3,      // 重试次数
  outputDir: './image' // 输出目录
};

// 读取JSON文件中的URL列表
function loadUrls() {
  try {
    const data = fs.readFileSync('./finalresult_converted.json', 'utf8');
    return JSON.parse(data);
  } catch (error) {
    console.error('读取URL文件失败:', error.message);
    process.exit(1);
  }
}

// 从URL中提取文件名
function getFileNameFromUrl(url) {
  const urlPath = new URL(url).pathname;
  return path.basename(urlPath);
}

// 下载单个文件
function downloadFile(url, filePath, retries = CONFIG.retryCount) {
  return new Promise((resolve, reject) => {
    const protocol = url.startsWith('https:') ? https : http;
    
    const request = protocol.get(url, {
      timeout: CONFIG.timeout
    }, (response) => {
      // 检查响应状态码
      if (response.statusCode !== 200) {
        reject(new Error(`HTTP ${response.statusCode}: ${response.statusMessage}`));
        return;
      }

      // 创建写入流
      const fileStream = fs.createWriteStream(filePath);
      
      response.pipe(fileStream);
      
      fileStream.on('finish', () => {
        fileStream.close();
        resolve();
      });
      
      fileStream.on('error', (error) => {
        fs.unlink(filePath, () => {}); // 删除不完整的文件
        reject(error);
      });
    });

    request.on('error', (error) => {
      reject(error);
    });

    request.on('timeout', () => {
      request.destroy();
      reject(new Error('请求超时'));
    });
  }).catch((error) => {
    if (retries > 0) {
      console.log(`下载失败,重试中... (剩余重试次数: ${retries}): ${url}`);
      return new Promise(resolve => setTimeout(resolve, 1000))
        .then(() => downloadFile(url, filePath, retries - 1));
    }
    throw error;
  });
}

// 并发下载管理器
class DownloadManager {
  constructor(maxConcurrent) {
    this.maxConcurrent = maxConcurrent;
    this.running = 0;
    this.queue = [];
    this.results = {
      success: 0,
      failed: 0,
      errors: []
    };
  }

  async add(url) {
    return new Promise((resolve) => {
      this.queue.push({ url, resolve });
      this.process();
    });
  }

  async process() {
    if (this.running >= this.maxConcurrent || this.queue.length === 0) {
      return;
    }

    const { url, resolve } = this.queue.shift();
    this.running++;

    try {
      const fileName = getFileNameFromUrl(url);
      const filePath = path.join(CONFIG.outputDir, fileName);
      
      // 检查文件是否已存在
      if (fs.existsSync(filePath)) {
        console.log(`文件已存在,跳过: ${fileName}`);
        this.results.success++;
      } else {
        console.log(`开始下载: ${fileName}`);
        await downloadFile(url, filePath);
        console.log(`下载完成: ${fileName}`);
        this.results.success++;
      }
    } catch (error) {
      console.error(`下载失败: ${url} - ${error.message}`);
      this.results.failed++;
      this.results.errors.push({ url, error: error.message });
    }

    this.running--;
    resolve();
    
    // 继续处理队列中的下一个任务
    this.process();
  }

  async waitForCompletion() {
    while (this.running > 0 || this.queue.length > 0) {
      await new Promise(resolve => setTimeout(resolve, 100));
    }
  }
}

// 主函数
async function main() {
  console.log('开始批量下载图片...');
  console.log(`配置: 最大并发数=${CONFIG.maxConcurrent}, 超时=${CONFIG.timeout}ms, 重试次数=${CONFIG.retryCount}`);
  
  // 确保输出目录存在
  if (!fs.existsSync(CONFIG.outputDir)) {
    fs.mkdirSync(CONFIG.outputDir, { recursive: true });
  }

  // 加载URL列表
  const urls = loadUrls();
  console.log(`共找到 ${urls.length} 个图片URL`);

  // 创建下载管理器
  const downloadManager = new DownloadManager(CONFIG.maxConcurrent);
  
  // 开始时间
  const startTime = Date.now();
  
  // 添加所有下载任务
  const downloadPromises = urls.map(url => downloadManager.add(url));
  
  // 等待所有下载完成
  await Promise.all(downloadPromises);
  await downloadManager.waitForCompletion();
  
  // 结束时间
  const endTime = Date.now();
  const duration = (endTime - startTime) / 1000;
  
  // 输出结果统计
  console.log('\n=== 下载完成 ===');
  console.log(`总耗时: ${duration.toFixed(2)} 秒`);
  console.log(`成功下载: ${downloadManager.results.success} 个文件`);
  console.log(`下载失败: ${downloadManager.results.failed} 个文件`);
  
  if (downloadManager.results.errors.length > 0) {
    console.log('\n失败的下载:');
    downloadManager.results.errors.forEach(({ url, error }) => {
      console.log(`- ${url}: ${error}`);
    });
  }
  
  console.log(`\n所有图片已保存到: ${path.resolve(CONFIG.outputDir)}`);
}

// 运行主函数
if (require.main === module) {
  main().catch(error => {
    console.error('程序执行出错:', error);
    process.exit(1);
  });
}