mongodb 管道聚合详解

459 阅读3分钟

聚合

聚合(Aggregation)可以将多个 documents 组合在一起,并对其进行各种操作,最后,返回单个结果。Mongodb 提供了三种使用聚合的方式:

  1. 管道聚合
  2. map-redece 函数
  3. 单一目的聚合方法

管道聚合

pipeline(管道线)类似工厂的流水线,将材料从管道的一端送入,进行多阶段的加工成为有用的产品,最后从管道的另一端送出。对于 mongodb ,则是将 多个 documents 推入流中,我们借助各种 聚合操作 进行处理,比如多表查询、过滤、计算、排序、统计,分组等等。最后,得到我们需要的数据。

$addFields

将新字段添加到 documents 中,这个方法会修改原 documents。($set$addFields 的别名。)

// scores 集合

{
  _id: 1,
  student: "Maya",
  homework: [ 10, 5, 10 ],
  quiz: [ 10, 8 ],
  extraCredit: 0
}
{
  _id: 2,
  student: "Ryan",
  homework: [ 5, 6, 5 ],
  quiz: [ 8, 8 ],
  extraCredit: 8
}
db.scores.aggregate( [
   {
     $addFields: {
        // homework 数组中的所有元素相加
        totalHomework: { $sum: "$homework" } ,
        totalQuiz: { $sum: "$quiz" }
     }
   },
   // 此时,第一阶段处理完成
   // 每个 documents 上新添加了 totalHomework 和 totalQuiz 字段
   // 接下来,进入第二阶段处理
   {
     $addFields: { totalScore:
       // 将以下三个字段的值相加
       { $add: [ "$totalHomework", "$totalQuiz", "$extraCredit" ] } }
   }
] )
// 返回的数据

{
  "_id" : 1,
  "student" : "Maya",
  "homework" : [ 10, 5, 10 ],
  "quiz" : [ 10, 8 ],
  "extraCredit" : 0,
  "totalHomework" : 25,
  "totalQuiz" : 18,
  "totalScore" : 43
}
{
  "_id" : 2,
  "student" : "Ryan",
  "homework" : [ 5, 6, 5 ],
  "quiz" : [ 8, 8 ],
  "extraCredit" : 8,
  "totalHomework" : 16,
  "totalQuiz" : 16,
  "totalScore" : 40
}

将新字段添加到对象中。

// vehicles 集合

{ _id: 1, type: "car", specs: { doors: 4, wheels: 4 } }
db.vehicles.aggregate( [
        {
           $addFields: {
              // 使用点表示法
              "specs.fuel_type": "unleaded"
           }
        }
   ] )
// 返回的数据


{ _id: 1, type: "car",
   specs: { doors: 4, wheels: 4, fuel_type: "unleaded" } }

将新字段添加到数组中。

// scores  集合

{ _id: 1, student: "Maya", homework: [ 10, 5, 10 ], quiz: [ 10, 8 ], extraCredit: 0 },
db.scores.aggregate([
    // $concatArrays 类似于 js 数组的 concat 方法
    // 将 $homework(homework 字段的引用)与 [7] 合并一起
    // 然后,再覆盖掉原 homework
   { $addFields: { homework: { $concatArrays: [ "$homework", [ 7 ] ] } } }
])
// 返回的数据


{ _id: 1, type: "car",
   specs: { doors: 4, wheels: 4, fuel_type: "unleaded" } }
{ _id: 2, type: "motorcycle",
   specs: { doors: 0, wheels: 2, fuel_type: "unleaded" } }

$count

统计 documents 的数量。

{ "_id" : 1, "subject" : "History", "score" : 88 }
{ "_id" : 2, "subject" : "History", "score" : 92 }
{ "_id" : 3, "subject" : "History", "score" : 97 }
{ "_id" : 4, "subject" : "History", "score" : 71 }
{ "_id" : 5, "subject" : "History", "score" : 79 }
{ "_id" : 6, "subject" : "History", "score" : 83 }
db.scores.aggregate(
  [
    {
      $match: {
       // score > 80
        score: {
          $gt: 80
        }
      }
    },
    {
      $count: "passing_scores"
    }
  ]
)
{ "passing_scores" : 4 }

$limit $ $skip

$limit$skip 类似于 mysql 的 limitoffset,常用于 分页 的实现。

{ "_id" : 1, "subject" : "History", "score" : 88 }
{ "_id" : 2, "subject" : "History", "score" : 92 }
{ "_id" : 3, "subject" : "History", "score" : 97 }
{ "_id" : 4, "subject" : "History", "score" : 71 }
{ "_id" : 5, "subject" : "History", "score" : 79 }
{ "_id" : 6, "subject" : "History", "score" : 83 }
db.scores.aggregate(
  [
   // 跳过前 3 个
   {$skip: 3},
   // 截取前 2 个
   {$limit: 2},
  ]
)
{
{ "_id" : 4, "subject" : "History", "score" : 71 }
{ "_id" : 5, "subject" : "History", "score" : 79 }

$group

按照指定的 _id 字段对输入 documents 进行分组。

 { "_id" : 1, "item" : "abc", "price" : NumberDecimal("10"), "quantity" : NumberInt("2"), "date" : ISODate("2014-03-01T08:00:00Z") },
  { "_id" : 2, "item" : "jkl", "price" : NumberDecimal("20"), "quantity" : NumberInt("1"), "date" : ISODate("2014-03-01T09:00:00Z") },
  { "_id" : 3, "item" : "xyz", "price" : NumberDecimal("5"), "quantity" : NumberInt( "10"), "date" : ISODate("2014-03-15T09:00:00Z") },
  { "_id" : 4, "item" : "xyz", "price" : NumberDecimal("5"), "quantity" :  NumberInt("20") , "date" : ISODate("2014-04-04T11:21:39.736Z") },
  { "_id" : 5, "item" : "abc", "price" : NumberDecimal("10"), "quantity" : NumberInt("10") , "date" : ISODate("2014-04-04T21:23:13.331Z") },
  { "_id" : 6, "item" : "def", "price" : NumberDecimal("7.5"), "quantity": NumberInt("5" ) , "date" : ISODate("2015-06-04T05:08:13Z") },
  { "_id" : 7, "item" : "def", "price" : NumberDecimal("7.5"), "quantity": NumberInt("10") , "date" : ISODate("2015-09-10T08:43:00Z") },
  { "_id" : 8, "item" : "abc", "price" : NumberDecimal("10"), "quantity" : NumberInt("5" ) , "date" : ISODate("2016-02-06T20:20:13Z") },
db.sales.aggregate( [
  {
    $group: {
       _id: null,
       count: { $sum: 1 }
    }
  }
] )
db.sales.aggregate(
  [
    {
      $group :
        {
          // 分组的依据是 item 字段
          // 拥有相同 item 名值的 documents 分在一组中
          _id : "$item",
          // 价格和数量相乘,然后加在一起
          totalSaleAmount: { $sum: { $multiply: [ "$price", "$quantity" ] } }
        }
     },
    
     {
      // totalSaleAmount <= 100
       $match: { "totalSaleAmount": { $gte: 100 } }
     }
   ]
 )
{ "_id" : "abc", "totalSaleAmount" : NumberDecimal("170") }
{ "_id" : "xyz", "totalSaleAmount" : NumberDecimal("150") }
{ "_id" : "def", "totalSaleAmount" : NumberDecimal("112.5") }

$match

过滤,相当于 mysql 的 where 查询语句。

 "_id" : ObjectId("512bc95fe835e68f199c8686"), "author" : "dave", "score" : 80, "views" : 100 }
{ "_id" : ObjectId("512bc962e835e68f199c8687"), "author" : "dave", "score" : 85, "views" : 521 }
{ "_id" : ObjectId("55f5a192d4bede9ac365b257"), "author" : "ahn", "score" : 60, "views" : 1000 }
{ "_id" : ObjectId("55f5a192d4bede9ac365b258"), "author" : "li", "score" : 55, "views" : 5000 }
{ "_id" : ObjectId("55f5a1d3d4bede9ac365b259"), "author" : "annT", "score" : 60, "views" : 50 }
{ "_id" : ObjectId("55f5a1d3d4bede9ac365b25a"), "author" : "li", "score" : 94, "views" : 999 }
{ "_id" : ObjectId("55f5a1d3d4bede9ac365b25b"), "author" : "ty", "score" : 95, "views" : 1000 }
db.articles.aggregate(
   // 仅返回 author = "dave" 的 documents
    [ { $match : { author : "dave" } } ]
);
{ "_id" : ObjectId("512bc95fe835e68f199c8686"), "author" : "dave", "score" : 80, "views" : 100 }
{ "_id" : ObjectId("512bc962e835e68f199c8687"), "author" : "dave", "score" : 85, "views" : 521 }

$project

字段的增删改。

{
  "_id" : 1,
  title: "abc123",
  isbn: "0001122223334",
  author: { last: "zzz", first: "aaa" },
  copies: 5
}
// 只返回 title 和 author 字段
db.books.aggregate( [ { $project : { title : 1 , author : 1 } } ] )
// { "_id" : 1, "title" : "abc123", "author" : { "last" : "zzz", "first" : "aaa" } }

// 排除 author 里的 first 字段
db.books.aggregate( [ { $project : { author.first : 0 } } ] )
// { "_id" : 1, title: "abc123", isbn: "0001122223334", author: { last: "zzz", }, copies: 5 }


生成新的结构。

{
  "_id" : 1,
  title: "abc123",
  isbn: "0001122223334",
  author: { last: "zzz", first: "aaa" },
  copies: 5
}
db.books.aggregate(
   [
      {
         $project: {
            title: 1,
            isbn: {
               prefix: { $substr: [ "$isbn", 0, 3 ] },
               group: { $substr: [ "$isbn", 3, 2 ] },
               publisher: { $substr: [ "$isbn", 5, 4 ] },
               title: { $substr: [ "$isbn", 9, 3 ] },
               checkDigit: { $substr: [ "$isbn", 12, 1] }
            },
            lastName: "$author.last",
            copiesSold: "$copies"
         }
      }
   ]
)
{
   "_id" : 1,
   "title" : "abc123",
   "isbn" : {
      "prefix" : "000",
      "group" : "11",
      "publisher" : "2222",
      "title" : "333",
      "checkDigit" : "4"
   },
   "lastName" : "zzz",
   "copiesSold" : 5
}

$sort

排序。

db.users.aggregate(
   [
     // 按照 age 字段降序
     // 然后,再按照 posts 字段升序
     { $sort : { age : -1, posts: 1 } }
   ]
)

$unwind

解构一个数组字段

{ "_id" : 1, "item" : "ABC1", sizes: [ "S", "M", "L", null] }
db.inventory.aggregate( [
  {
    $unwind:
      {
        path: "$sizes",
        // 保留对应的索引
        includeArrayIndex: "arrayIndex",
        // 保留值为 null 的数值
        preserveNullAndEmptyArrays: true
      }
   }])
{ "_id" : 1, "item" : "ABC1", "sizes" : "S""arrayIndex" : NumberLong(0)}
{ "_id" : 1, "item" : "ABC1", "sizes" : "M""arrayIndex" : NumberLong(1)}
{ "_id" : 1, "item" : "ABC1", "sizes" : "L""arrayIndex" : NumberLong(2) }
{ "_id" : 1, "item" : "ABC1", "arrayIndex" : NumberLong(3)}

$lookup

$lookup 可以实现 mysql 中的关联查询。它将 documents 中指定字段,用另外一个 documents 进行替换。

// orders
{ "_id" : 1, "item" : "almonds", "price" : 12, "quantity" : 2 },
{ "_id" : 2, "item" : "pecans", "price" : 20, "quantity" : 1 },
{ "_id" : 3  }

// inventory  
{ "_id" : 1, "sku" : "almonds", description: "product 1", "instock" : 120 },
{ "_id" : 2, "sku" : "bread", description: "product 2", "instock" : 80 },
{ "_id" : 3, "sku" : "cashews", description: "product 3", "instock" : 60 },
{ "_id" : 4, "sku" : "pecans", description: "product 4", "instock" : 70 },
{ "_id" : 5, "sku": null, description: "Incomplete" },
{ "_id" : 6 }
db.orders.aggregate([
   {
     $lookup:
       {
         // 关联 documents
         from: "inventory",
         // orders 中需要替换的字段
         localField: "item",
         // inventory 中需要查询的字段
         foreignField: "sku",
         // 别名
         as: "inventory_docs"
       }
  }
])
{
   "_id" : 1,
   "item" : "almonds",
   "price" : 12,
   "quantity" : 2,
   "inventory_docs" : [
      { "_id" : 1, "sku" : "almonds", "description" : "product 1", "instock" : 120 }
   ]
}
{
   "_id" : 2,
   "item" : "pecans",
   "price" : 20,
   "quantity" : 1,
   "inventory_docs" : [
      { "_id" : 4, "sku" : "pecans", "description" : "product 4", "instock" : 70 }
   ]
}
{
   "_id" : 3,
   "inventory_docs" : [
      { "_id" : 5, "sku" : null, "description" : "Incomplete" },
      { "_id" : 6 }
   ]
}