聚合
聚合(Aggregation)可以将多个 documents
组合在一起,并对其进行各种操作,最后,返回单个结果。Mongodb 提供了三种使用聚合的方式:
管道聚合
pipeline(管道线)类似工厂的流水线,将材料从管道的一端送入,进行多阶段的加工成为有用的产品,最后从管道的另一端送出。对于 mongodb ,则是将 多个 documents
推入流中,我们借助各种 聚合操作
进行处理,比如多表查询、过滤、计算、排序、统计,分组等等。最后,得到我们需要的数据。
$addFields
将新字段添加到 documents
中,这个方法会修改原 documents
。($set
是 $addFields
的别名。)
// scores 集合
{
_id: 1,
student: "Maya",
homework: [ 10, 5, 10 ],
quiz: [ 10, 8 ],
extraCredit: 0
}
{
_id: 2,
student: "Ryan",
homework: [ 5, 6, 5 ],
quiz: [ 8, 8 ],
extraCredit: 8
}
db.scores.aggregate( [
{
$addFields: {
// homework 数组中的所有元素相加
totalHomework: { $sum: "$homework" } ,
totalQuiz: { $sum: "$quiz" }
}
},
// 此时,第一阶段处理完成
// 每个 documents 上新添加了 totalHomework 和 totalQuiz 字段
// 接下来,进入第二阶段处理
{
$addFields: { totalScore:
// 将以下三个字段的值相加
{ $add: [ "$totalHomework", "$totalQuiz", "$extraCredit" ] } }
}
] )
// 返回的数据
{
"_id" : 1,
"student" : "Maya",
"homework" : [ 10, 5, 10 ],
"quiz" : [ 10, 8 ],
"extraCredit" : 0,
"totalHomework" : 25,
"totalQuiz" : 18,
"totalScore" : 43
}
{
"_id" : 2,
"student" : "Ryan",
"homework" : [ 5, 6, 5 ],
"quiz" : [ 8, 8 ],
"extraCredit" : 8,
"totalHomework" : 16,
"totalQuiz" : 16,
"totalScore" : 40
}
将新字段添加到对象中。
// vehicles 集合
{ _id: 1, type: "car", specs: { doors: 4, wheels: 4 } }
db.vehicles.aggregate( [
{
$addFields: {
// 使用点表示法
"specs.fuel_type": "unleaded"
}
}
] )
// 返回的数据
{ _id: 1, type: "car",
specs: { doors: 4, wheels: 4, fuel_type: "unleaded" } }
将新字段添加到数组中。
// scores 集合
{ _id: 1, student: "Maya", homework: [ 10, 5, 10 ], quiz: [ 10, 8 ], extraCredit: 0 },
db.scores.aggregate([
// $concatArrays 类似于 js 数组的 concat 方法
// 将 $homework(homework 字段的引用)与 [7] 合并一起
// 然后,再覆盖掉原 homework
{ $addFields: { homework: { $concatArrays: [ "$homework", [ 7 ] ] } } }
])
// 返回的数据
{ _id: 1, type: "car",
specs: { doors: 4, wheels: 4, fuel_type: "unleaded" } }
{ _id: 2, type: "motorcycle",
specs: { doors: 0, wheels: 2, fuel_type: "unleaded" } }
$count
统计 documents
的数量。
{ "_id" : 1, "subject" : "History", "score" : 88 }
{ "_id" : 2, "subject" : "History", "score" : 92 }
{ "_id" : 3, "subject" : "History", "score" : 97 }
{ "_id" : 4, "subject" : "History", "score" : 71 }
{ "_id" : 5, "subject" : "History", "score" : 79 }
{ "_id" : 6, "subject" : "History", "score" : 83 }
db.scores.aggregate(
[
{
$match: {
// score > 80
score: {
$gt: 80
}
}
},
{
$count: "passing_scores"
}
]
)
{ "passing_scores" : 4 }
$limit $ $skip
$limit
和 $skip
类似于 mysql 的 limit
和 offset
,常用于 分页
的实现。
{ "_id" : 1, "subject" : "History", "score" : 88 }
{ "_id" : 2, "subject" : "History", "score" : 92 }
{ "_id" : 3, "subject" : "History", "score" : 97 }
{ "_id" : 4, "subject" : "History", "score" : 71 }
{ "_id" : 5, "subject" : "History", "score" : 79 }
{ "_id" : 6, "subject" : "History", "score" : 83 }
db.scores.aggregate(
[
// 跳过前 3 个
{$skip: 3},
// 截取前 2 个
{$limit: 2},
]
)
{
{ "_id" : 4, "subject" : "History", "score" : 71 }
{ "_id" : 5, "subject" : "History", "score" : 79 }
$group
按照指定的 _id
字段对输入 documents
进行分组。
{ "_id" : 1, "item" : "abc", "price" : NumberDecimal("10"), "quantity" : NumberInt("2"), "date" : ISODate("2014-03-01T08:00:00Z") },
{ "_id" : 2, "item" : "jkl", "price" : NumberDecimal("20"), "quantity" : NumberInt("1"), "date" : ISODate("2014-03-01T09:00:00Z") },
{ "_id" : 3, "item" : "xyz", "price" : NumberDecimal("5"), "quantity" : NumberInt( "10"), "date" : ISODate("2014-03-15T09:00:00Z") },
{ "_id" : 4, "item" : "xyz", "price" : NumberDecimal("5"), "quantity" : NumberInt("20") , "date" : ISODate("2014-04-04T11:21:39.736Z") },
{ "_id" : 5, "item" : "abc", "price" : NumberDecimal("10"), "quantity" : NumberInt("10") , "date" : ISODate("2014-04-04T21:23:13.331Z") },
{ "_id" : 6, "item" : "def", "price" : NumberDecimal("7.5"), "quantity": NumberInt("5" ) , "date" : ISODate("2015-06-04T05:08:13Z") },
{ "_id" : 7, "item" : "def", "price" : NumberDecimal("7.5"), "quantity": NumberInt("10") , "date" : ISODate("2015-09-10T08:43:00Z") },
{ "_id" : 8, "item" : "abc", "price" : NumberDecimal("10"), "quantity" : NumberInt("5" ) , "date" : ISODate("2016-02-06T20:20:13Z") },
db.sales.aggregate( [
{
$group: {
_id: null,
count: { $sum: 1 }
}
}
] )
db.sales.aggregate(
[
{
$group :
{
// 分组的依据是 item 字段
// 拥有相同 item 名值的 documents 分在一组中
_id : "$item",
// 价格和数量相乘,然后加在一起
totalSaleAmount: { $sum: { $multiply: [ "$price", "$quantity" ] } }
}
},
{
// totalSaleAmount <= 100
$match: { "totalSaleAmount": { $gte: 100 } }
}
]
)
{ "_id" : "abc", "totalSaleAmount" : NumberDecimal("170") }
{ "_id" : "xyz", "totalSaleAmount" : NumberDecimal("150") }
{ "_id" : "def", "totalSaleAmount" : NumberDecimal("112.5") }
$match
过滤,相当于 mysql 的 where 查询语句。
"_id" : ObjectId("512bc95fe835e68f199c8686"), "author" : "dave", "score" : 80, "views" : 100 }
{ "_id" : ObjectId("512bc962e835e68f199c8687"), "author" : "dave", "score" : 85, "views" : 521 }
{ "_id" : ObjectId("55f5a192d4bede9ac365b257"), "author" : "ahn", "score" : 60, "views" : 1000 }
{ "_id" : ObjectId("55f5a192d4bede9ac365b258"), "author" : "li", "score" : 55, "views" : 5000 }
{ "_id" : ObjectId("55f5a1d3d4bede9ac365b259"), "author" : "annT", "score" : 60, "views" : 50 }
{ "_id" : ObjectId("55f5a1d3d4bede9ac365b25a"), "author" : "li", "score" : 94, "views" : 999 }
{ "_id" : ObjectId("55f5a1d3d4bede9ac365b25b"), "author" : "ty", "score" : 95, "views" : 1000 }
db.articles.aggregate(
// 仅返回 author = "dave" 的 documents
[ { $match : { author : "dave" } } ]
);
{ "_id" : ObjectId("512bc95fe835e68f199c8686"), "author" : "dave", "score" : 80, "views" : 100 }
{ "_id" : ObjectId("512bc962e835e68f199c8687"), "author" : "dave", "score" : 85, "views" : 521 }
$project
字段的增删改。
{
"_id" : 1,
title: "abc123",
isbn: "0001122223334",
author: { last: "zzz", first: "aaa" },
copies: 5
}
// 只返回 title 和 author 字段
db.books.aggregate( [ { $project : { title : 1 , author : 1 } } ] )
// { "_id" : 1, "title" : "abc123", "author" : { "last" : "zzz", "first" : "aaa" } }
// 排除 author 里的 first 字段
db.books.aggregate( [ { $project : { author.first : 0 } } ] )
// { "_id" : 1, title: "abc123", isbn: "0001122223334", author: { last: "zzz", }, copies: 5 }
生成新的结构。
{
"_id" : 1,
title: "abc123",
isbn: "0001122223334",
author: { last: "zzz", first: "aaa" },
copies: 5
}
db.books.aggregate(
[
{
$project: {
title: 1,
isbn: {
prefix: { $substr: [ "$isbn", 0, 3 ] },
group: { $substr: [ "$isbn", 3, 2 ] },
publisher: { $substr: [ "$isbn", 5, 4 ] },
title: { $substr: [ "$isbn", 9, 3 ] },
checkDigit: { $substr: [ "$isbn", 12, 1] }
},
lastName: "$author.last",
copiesSold: "$copies"
}
}
]
)
{
"_id" : 1,
"title" : "abc123",
"isbn" : {
"prefix" : "000",
"group" : "11",
"publisher" : "2222",
"title" : "333",
"checkDigit" : "4"
},
"lastName" : "zzz",
"copiesSold" : 5
}
$sort
排序。
db.users.aggregate(
[
// 按照 age 字段降序
// 然后,再按照 posts 字段升序
{ $sort : { age : -1, posts: 1 } }
]
)
$unwind
解构一个数组字段
{ "_id" : 1, "item" : "ABC1", sizes: [ "S", "M", "L", null] }
db.inventory.aggregate( [
{
$unwind:
{
path: "$sizes",
// 保留对应的索引
includeArrayIndex: "arrayIndex",
// 保留值为 null 的数值
preserveNullAndEmptyArrays: true
}
}])
{ "_id" : 1, "item" : "ABC1", "sizes" : "S", "arrayIndex" : NumberLong(0)}
{ "_id" : 1, "item" : "ABC1", "sizes" : "M", "arrayIndex" : NumberLong(1)}
{ "_id" : 1, "item" : "ABC1", "sizes" : "L", "arrayIndex" : NumberLong(2) }
{ "_id" : 1, "item" : "ABC1", "arrayIndex" : NumberLong(3)}
$lookup
$lookup 可以实现 mysql 中的关联查询。它将 documents
中指定字段,用另外一个 documents
进行替换。
// orders
{ "_id" : 1, "item" : "almonds", "price" : 12, "quantity" : 2 },
{ "_id" : 2, "item" : "pecans", "price" : 20, "quantity" : 1 },
{ "_id" : 3 }
// inventory
{ "_id" : 1, "sku" : "almonds", description: "product 1", "instock" : 120 },
{ "_id" : 2, "sku" : "bread", description: "product 2", "instock" : 80 },
{ "_id" : 3, "sku" : "cashews", description: "product 3", "instock" : 60 },
{ "_id" : 4, "sku" : "pecans", description: "product 4", "instock" : 70 },
{ "_id" : 5, "sku": null, description: "Incomplete" },
{ "_id" : 6 }
db.orders.aggregate([
{
$lookup:
{
// 关联 documents
from: "inventory",
// orders 中需要替换的字段
localField: "item",
// inventory 中需要查询的字段
foreignField: "sku",
// 别名
as: "inventory_docs"
}
}
])
{
"_id" : 1,
"item" : "almonds",
"price" : 12,
"quantity" : 2,
"inventory_docs" : [
{ "_id" : 1, "sku" : "almonds", "description" : "product 1", "instock" : 120 }
]
}
{
"_id" : 2,
"item" : "pecans",
"price" : 20,
"quantity" : 1,
"inventory_docs" : [
{ "_id" : 4, "sku" : "pecans", "description" : "product 4", "instock" : 70 }
]
}
{
"_id" : 3,
"inventory_docs" : [
{ "_id" : 5, "sku" : null, "description" : "Incomplete" },
{ "_id" : 6 }
]
}