0. 环境
- mongodb 4.2.0
- spring-data-mongodb 3.0.0
- jdk1.8
1. 增删改查
1.1 增
- insert
- insertOne
- inertMany 举例1:在shell中先定义函数,再执行插入
document = ({"Tome":"work"})
db.abmlz.insert(document)
举例2:在shell中批量插入
db.abmlz.insertMany([{"Job":"work"}, {"Tome":"Dance"}])
对应java:
//单条插入
Map<String, Object> map = new HashMap<>();
mongoTemplate.insert(map, "集合名");
//多条插入
List<Document> primaryIdList = new ArrayList<Document>();
ppidList.add(Document.parse("json字符串"));
mongoTemplate.insert(primaryIdList, "ppidTable");
1.2 删
1.3 查
1.3.1 简单查询及排序
mongo查询格式及举例:
//第一个括号:查询条件; 第二个括号:查询返回字段限制; 第三个括号: limit; 第四个括号skip;
db.getCollection('abmlz').find({ "name" : "malize", "loc.type": "Point"}, {},{} ,{})
//简单查询:
db.getCollection('amlz').find({"name" : "malize"})
或者
db.'amlz'.find({"name" : "malize"})
//查询内嵌文档(带.):
db.getCollection('amlz').find({"loc.coordinates" : [
52.37,
5.217
]})
//只返回patient_id和visit_id两个字段
db.getCollection('zsyyIBD').find({}, {patient_id: 1,visit_id: 1})
//不返回patient_id和visit_id两个字段
db.getCollection('zsyyIBD').find({}, {patient_id: 0,visit_id: 0})
//排序分页(跳过1条,取出10条):
db.getCollection('abmlz').find().sort({'name': -1 }).limit(10).skip(1)
//shell快捷跳过(跳过1条,查出2条):
db.getCollection('abmlz').find({'name': 'lifei' },{}, limit, skip)
db.getCollection('abmlz').find({'name': 'lifei' },{},2,1)
对应java:
Query query = new Query();
//统计总数
long totalCount = mongoTemplate.count(query, Object.class, remarkPatientsParam.getDiseaseCode());
query.with(Sort.by(Sort.Order.asc("排序字段")));
query.skip((remarkPatientsParam.getCurrentPage() - 1) * remarkPatientsParam.getPageSize());
query.limit(remarkPatientsParam.getPageSize());
List<GroupPatientList> patientLists = mongoTemplate.find(query, GroupPatientList.class, "表名");
固定大小集合(严格保证数据顺序是按插入顺序排)
用于日志, 大小超过设定的集合后, 自动删除最先插入的数据
//设定固定大小20480字节集合,最大文档数为100,只要满足一条,文档就会触发删除最老数据
db.createCollection("fixedCollecion",{capped:true, size:20480, max: 100})
//按照自然插入顺序倒序,取出10条
db.getCollection('fixedCollecion').find({}).sort({$natural: -1}).limit(10)
1.3.2 条件操作符
$gt $gte $lt $lte $in $nin $ne $all(全部都满足)
//查询年龄大于30岁的:
db.getCollection('zsyyIBD').find({'info_age':{$gte: '30'}})
1.4 地理搜索...
导入数据
db.getCollection('amlz').insertMany([
{ "_id" : ObjectId("611cba5e04b2d2815aecb101"), "name" : "malize", "loc" : { "type" : "Point", "coordinates" : [ 52.37, 5.217 ] } } ,
{ "_id" : ObjectId("611cbb8a04b2d2815aecb102"), "name" : "jack_Line", "loc" : { "type" : "LineString", "coordinates" : [ [ 52.36881, 4.890286 ], [ 52.36871, 4.890021 ] ] } } ,
{ "_id" : ObjectId("611cbedc04b2d2815aecb103"), "name" : "lifei", "loc" : { "type" : "Point", "coordinates" : [ 51.21154451, 13.0217 ] } } ,
{ "_id" : ObjectId("611cbee004b2d2815aecb104"), "name" : "lifei", "loc" : { "type" : "Point", "coordinates" : [ 51.9125, 4.472 ] } } ,
{ "_id" : ObjectId("611cbf0904b2d2815aecb105"), "name" : "wangqi", "loc" : { "type" : "Point", "coordinates" : [ 52.3678, 4.89 ] } }])
建索引(地理搜索基于索引)
db.amlz.ensureIndex({'loc':'2dsphere'})
执行查询
db.getCollection('amlz').find({
'loc':{
$geoNear:{
$geometry:{type:"Point", "coordinates" : [ 52.37, 5.217]},
$maxDistance:40000
}
}
})
查询结果
我的理解:
建索引的时候,mongo会把经纬度与公里距离映射,以此来实现地址位置搜索
1.4 元素整体匹配:elementMatch
需求:要求成组数据内的每一组{}数据都符合$and查询结果
嵌套两层逻辑符后,进行数组内元素整体匹配:
db.getCollection('zsyyIBD').find(
{
"$or": [{
"$and": [
{
'lab': {
$elemMatch: {
"routineblood_norlabsubitem": {
$regex: "细胞*"
},
"routineblood_norlabsubitem": "红细胞体积分布宽度变异系数"
}
}
},
{
'pathologygroup': {
$elemMatch: {
"pathology_examdiag": {
$regex: "胃角粘膜*"
},
"pathology_examtime": {
$gt: "2021-04-14 15:11:00"
}
}
}
}
]
}]
},
{
"lab": 0,
"advice": 0,
"unstructuredgroup": 0
}
).limit(10).skip(1)
2. 聚合操作(count、distinct、group)
2.1 count统计总数
//统计总数(忽略skip,limit条件):
db.getCollection('abmlz').find({}).skip(2).limit(10).count()
//统计总数(满足skip,limit条件后进行统计):
db.getCollection('abmlz').find({}).skip(2).limit(10).count(true)
2.2 distinct
去重统计
db.getCollection('zsyyIBD').distinct('unstructuredgroup.followdiagnosis_firstpagediag')
//结果
[
"克罗恩病(回结肠型 非狭窄非穿透型 活动期中度)",
"慢性胃炎",
"克罗恩病(回结肠+上消化道型 非狭窄非穿通型 缓解期)",
"地中海贫血",
"胆囊息肉",
"克罗恩病(慢性复发型 回肠型 狭窄型 缓解期 轻度)",
"泌尿系感染",
"克罗恩病并肠瘘"
]
指定 Query 条件的distinct
db.zsyyIBD.distinct( "info_name", { "info_sex": "男" } )
2.3 group分组(db.zsyyIBD.group()方法已失效,现在都用aggregate里的group分组)
官方回我邮件:
统计重复的数据
db.getCollection('datatemp').aggregate([
{ $group: { _id : '$primaryId', count: { $sum : 1 } } },
{ $match: { count: { $gt : 1} } }
])
2.4 slice函数
数组切割后返回指定元素个数,减少返回的数据总量,查询更高效.数组内以元素为单位切割
//从前开始返回前五项
db.getCollection('amlz').find({}, {'advice':{$slice:5}})
//从最后五项开始返回五项
db.getCollection('amlz').find({}, {'advice':{$slice:-5}})
//从最后五项开始返回3条
db.getCollection('amlz').find({}, {'advice':{$slice: [-5,3]}})
//从前位置为2处(第3条数据)开始,取出3条
db.getCollection('amlz').find({}, {'advice':{$slice: [2,3]}})
全部数据:
查询结果案例数据:
2.5 $mod 整数数字字段取模后值筛选
//取出该字段对2取模后为0的所有文档
db.getCollection('amlz').find({'baseinfofirst_firstoptionage': {$mod: [2,0] }})
3. 常用sql查询
只返回部分字段:patinet:1
db.getCollection('shouyeshoushu').find({"$or" :
[
{ "shouyeshoushu.operation_name" : { "$regex" : "^.*残胃切除术.*$"}},
{ "shouyeshoushu.operation_name" : { "$regex" : "^.*胃大部切除.*$"}},
{ "shouyeshoushu.operation_name" : { "$regex" : "^.*腹腔镜下腹膜活检术.*$"}},
{ "shouyeshoushu.operation_name" : { "$regex" : "^.*胃近端切除术.*$"}},
{ "shouyeshoushu.operation_name" : { "$regex" : "^.*腹腔镜下全胃切除.*$"}},
{ "shouyeshoushu.operation_name" : { "$regex" : "^.*内镜下胃黏膜下剥离术.*$"}},
{ "shouyeshoushu.operation_name" : { "$regex" : "^.*内镜下胃黏膜下剥离术.*$"}},
{ "shouyeshoushu.operation_name" : { "$regex" : "^.*内镜下胃黏膜下剥离术.*$"}}
]
}, {patient_id: 1,visit_id: 1});
全部数据只返回两个字段
db.getCollection('zsyyIBD').find({}, {patient_id: 1,visit_id: 1})
获取最小值,命名为minScore
db.getCollection('jianchabaogao').aggregate(
[{$project: {minScore: {$min: "$jianchabaogao.exam_report.exam_time"}}}]
)
嵌套逻辑操作符查询
db.getCollection('zsyyIBD').find(
{
"$and": [
{
"$or": [
{
"visit_times": {
"$nin": [
"0"
]
},
"info_sex": "女"
}
]
},
{
"$or": [
{
"visit_times": {
"$nin": [
"0"
]
},
"form_updator" : "qiuyun"
}
]
}
]
})
正则表达式匹配
db.getCollection('jianchabaogao').aggregate([{
"$or": [
{
"jianchabaogao.exam_report.standard_name": {
"$regex":
"^.*检查.*$"
}
},
{
"jianchabaogao.exam_report.standard_name": {
"$regex":
"^.*扫描.*$"
}
}
]
}])
java代码内正则同脚本中的正则
switch (fieldRelation) {
case "包含":
return Pattern.compile("^.*" + 扫描 + ".*$", Pattern.CASE_INSENSITIVE);
case "不包含":
return Pattern.compile("^((?!" + fieldValue + ").)*$", Pattern.CASE_INSENSITIVE);
case "开头是":
return Pattern.compile("^" + fieldValue + ".*$", Pattern.CASE_INSENSITIVE);
case "开头不是":
return Pattern.compile("^(?!" + fieldValue + ").*", Pattern.CASE_INSENSITIVE);
case "等于":
return Pattern.compile(fieldValue, Pattern.CASE_INSENSITIVE);
case "不等于":
return Pattern.compile("^(?!" + fieldValue + ").*$", Pattern.CASE_INSENSITIVE);
case "结尾是":
return Pattern.compile("^.*" + fieldValue + "$", Pattern.CASE_INSENSITIVE);
case "结尾不是":
return Pattern.compile("^.*(?!" + fieldValue + ")$", Pattern.CASE_INSENSITIVE);
}
switch (exp) {
case "等于":
Criteria criteriaInnner = criteriaVisitTimes.and(enPath).is(values);
criteriaInner.add(criteriaInnner);
break;
case "不等于":
Criteria criteriaInnnerNot = criteriaVisitTimes.and(enPath).nin(values);
criteriaInner.add(criteriaInnnerNot);
break;
case "大于":
Criteria criteriaInnnerGt = criteriaVisitTimes.and(enPath).gt(values);
criteriaInner.add(criteriaInnnerGt);
break;
case "小于":
Criteria criteriaInnnerLt = criteriaVisitTimes.and(enPath).lt(values);
criteriaInner.add(criteriaInnnerLt);
break;
case "大于等于":
Criteria criteriaInnnerGte = criteriaVisitTimes.and(enPath).gte(values);
criteriaInner.add(criteriaInnnerGte);
break;
case "小于等于":
Criteria criteriaInnnerLte = criteriaVisitTimes.and(enPath).lte(values);
criteriaInner.add(criteriaInnnerLte);
break;
case "包含":
Pattern pattern = Pattern.compile("^.*" + values + ".*$", Pattern.CASE_INSENSITIVE);
criteriaVisitTimes.and(enPath).regex(pattern);
criteriaInner.add(criteriaVisitTimes);
break;
case "不包含":
Pattern patternNot = Pattern.compile("^((?!" + values + ").)*$", Pattern.CASE_INSENSITIVE);
criteriaVisitTimes.and(enPath).regex(patternNot);
criteriaInner.add(criteriaVisitTimes);
break;
}
}
聚合后切割
db.getCollection('binganshouye').aggregate([
{
$group : {
_id : { $substr: [ "$_id", 6, 12] },
total : {$sum : 1}
}
}
]);
删除字段的值
db.getCollection('bysyweicancer').update(
{
},
{
$unset:{'lab':''}
},
{
"multi" : true
}
);
管道聚合,相对时间后的的聚合
db.getCollection('jianyanbaogao').aggregate(
[
{
"$unwind": "$jianyanbaogao.lab_report"
},
{
"$match": {
"jianyanbaogao.lab_report.apply_time": {
"$gte": "2014-11-22 11:01:25",
"$lte": "2021-11-27 11:01:25"
}
}
},
{
"$match": {
"jianyanbaogao.lab_report.lab_sub_item_name": "乙型肝炎核心总抗体"
}
},
{
"$project": {
"_id": 1,
"patient_id": 1,
"data": "$jianyanbaogao.lab_report.lab_result_value",
"relationData": "$jianyanbaogao.lab_report.apply_time",
"lab_sub_item_name":"$jianyanbaogao.lab_report.lab_sub_item_name"
}
}
]
)
值域取值
db.getCollection('jianyanbaogao').distinct('age')
4.Linux环境常用指令
进入shell命令行
mongo IP:27017/db_specialized_disease_cohort -u jhadmin -p jh1234
mongo IP:27017/admin -u bysy -p bysyadmin
重命名集合名:
db.bysyweicancer_malize.renameCollection("bysyweicancer");
导入mongo文件
mongoimport -h 127.0.0.1:27017 -ubysy -pbysyadmin --authenticationDatabase admin -d zbk2021-2 /data/1/sddap3/bysyweicancer.json
导出mongo文件
mongoexport -h 127.0.0.1:27017 -ubysy -pbysyadmin --authenticationDatabase admin -d zbk2021-2 -c bysyweicancer -o /data/1/sddap3/bysyweicancer.json
mongodump -h 127.0.0.1:27017 -ubysy -pbysyadmin --authenticationDatabase admin -d zbk2021-2 -o /data/1/sddap3/
恢复:
mongorestore -h 127.0.0.1:27017 -ubysy -pbysyadmin --authenticationDatabase admin -d zbk2021test --dir /data/1/sddap3/zbk2021-2/
把查询结果作为新数据插入新表
var result = db.getCollection('shouyeshoushu').find({"$or" :
[
{ "shouyeshoushu.operation_name" : { "$regex" : "^.*残胃切除术.*$"}},
{ "shouyeshoushu.operation_name" : { "$regex" : "^.*胃大部切除.*$"}},
{ "shouyeshoushu.operation_name" : { "$regex" : "^.*腹腔镜下腹膜活检术.*$"}},
{ "shouyeshoushu.operation_name" : { "$regex" : "^.*胃近端切除术.*$"}},
{ "shouyeshoushu.operation_name" : { "$regex" : "^.*腹腔镜下全胃切除.*$"}},
{ "shouyeshoushu.operation_name" : { "$regex" : "^.*内镜下胃黏膜下剥离术.*$"}},
{ "shouyeshoushu.operation_name" : { "$regex" : "^.*内镜下胃黏膜下剥离术.*$"}},
{ "shouyeshoushu.operation_name" : { "$regex" : "^.*内镜下胃黏膜下剥离术.*$"}}
]
}, {patient_id: 1});
while(result.hasNext()) db.shouyeshoushu_end.insert(result.next())
5.常见问题
1.线上mongo大的页码报错,无法正常分页
Caused by: com.mongodb.MongoQueryException: Query failed with error code 96 and error message 'Executor error during find command :: caused by :: Sort operation used more than the maximum 33554432 bytes of RAM. Add an index, or specify a smaller limit.' on server 168.168.251.48:20000
原因:
mongo是取出所有的数据,在内存进行分页排序,默认设置32m,很容易导致内存不够用
解决:
把mongo库排序最大内存设置为320m:
db.adminCommand({setParameter:1,internalQueryExecMaxBlockingSortBytes:335544320});
给高频,核心字段(特别是排序字段)加索引:
db.zsyynxgbzbk.createIndex({"baseinfolast_lastvisittime":1}); // -1是倒序
2.深坑:代码中写的排序字段在mongo表里没有,mongo也不会报错
query.with(Sort.by(Sort.Order.desc("lastvisittime"))); //库里的字段是:baseinfolast_lastvisittime