com.mongodb.MongoCursorNotFoundException: Query failed with error code -5 and error message 'Cursor 405128352514 not found on server 127.0.0.1:27017' on server 127.0.0.1:27017
这个错误 MongoCursorNotFoundException 是 MongoDB 客户端在尝试访问一个已在服务器上失效的游标时抛出的。以下是详细的原因分析和解决方案:
1、错误原因
- 游标超时:MongoDB 默认会在 10 分钟未使用后自动关闭游标以释放资源。
- 游标已关闭:手动调用
cursor.close()或迭代完所有结果后,游标会被关闭。 - 服务器重启:MongoDB 服务器重启后,所有之前的游标都会失效。
- 集合被修改:游标创建后,如果对应集合被
drop、rename或重建索引,游标会失效。
2、游标为什么10分钟没有再去读取数据?
try {
MongoCollection<BasicDBObject> coll = formSwitchboardIdentityRelationMongo.getCollection(
dbname, "forrelation", BasicDBObject.class);
FindIterable<BasicDBObject> cur = coll.find(query);
mongoCursor = cur.cursor();
while(mongoCursor.hasNext()) {
bean = mongoCursor.next();
...业务代码
} catch (Exception e) {
throw e;
} finally {
if(mongoCursor!=null) {
mongoCursor.close();
}
}
从游标中获取记录,难道没一条需要执行10分钟这么久吗?显示不是。
mongoCursor 从MongoDB服务端批量获取数据,不是一条一条获取数据,第一次获取101条数据,后面继续调用getMore获取数据,getMore并不是每次101条,后面可能批量10000条数据,所以造成游标链接超时,记录并不是一条数据处理超过10分钟。
3、查看和修改cursorTimeoutMillis 配置值
cursorTimeoutMillis 是 MongoDB 控制游标闲置超时的核心参数(默认 600000 毫秒,即 10 分钟)
db.adminCommand({ getParameter: 1, cursorTimeoutMillis: 1 });
/* 1 */
{
"cursorTimeoutMillis" : NumberLong(600000),
"ok" : 1.0
}
运行时临时修改(立即生效,重启失效)
db.adminCommand({ setParameter: 1, cursorTimeoutMillis: 3600000 });
/* 1 */
{
"was" : NumberLong(600000),
"ok" : 1.0
}
验证修改结果
db.adminCommand({ getParameter: 1, cursorTimeoutMillis: 1 });
/* 1 */
{
"cursorTimeoutMillis" : NumberLong(3600000),
"ok" : 1.0
}
4、源码关闭游标逻辑
com.mongodb.MongoCursorNotFoundException: Query failed with error code -5 and error message 'Cursor 405128352514 not found on server 127.0.0.1:27017' on server 127.0.0.1:27017。
重要内容是说游标找不到了,游标已经被关闭了。游标怎么被关闭的呢?分析一下源代码。
MongoDB 还会启动后台线程,定时扫描并关闭超时游标(避免闲置游标长期占用内存)。
ClientCursorMonitor 收超时游标
ClientCursorMonitor 后台线程(继承 BackgroundJob),是游标超时清理的定时触发入口;结合之前的 CursorManager.cpp 中的 timeoutCursors() 清理逻辑,共同实现了「每 N 秒扫描一次,关闭闲置超 10 分钟的游标」。
#include "mongo/platform/basic.h"
#include "mongo/db/clientcursor.h"
#include <string>
#include <time.h>
#include <vector>
#include "mongo/base/counter.h"
#include "mongo/db/audit.h"
#include "mongo/db/auth/action_set.h"
#include "mongo/db/auth/action_type.h"
#include "mongo/db/auth/authorization_session.h"
#include "mongo/db/auth/privilege.h"
#include "mongo/db/catalog/collection.h"
#include "mongo/db/client.h"
#include "mongo/db/commands.h"
#include "mongo/db/commands/server_status.h"
#include "mongo/db/commands/server_status_metric.h"
#include "mongo/db/cursor_manager.h"
#include "mongo/db/cursor_server_params.h"
#include "mongo/db/jsobj.h"
#include "mongo/db/query/explain.h"
#include "mongo/db/repl/repl_client_info.h"
#include "mongo/db/repl/replication_coordinator.h"
#include "mongo/util/background.h"
#include "mongo/util/concurrency/idle_thread_block.h"
#include "mongo/util/exit.h"
namespace mongo {
using std::string;
using std::stringstream;
static Counter64 cursorStatsOpen; // gauge
static Counter64 cursorStatsOpenPinned; // gauge
static Counter64 cursorStatsOpenNoTimeout; // gauge
static Counter64 cursorStatsTimedOut;
static ServerStatusMetricField<Counter64> dCursorStatsOpen("cursor.open.total", &cursorStatsOpen);
static ServerStatusMetricField<Counter64> dCursorStatsOpenPinned("cursor.open.pinned",
&cursorStatsOpenPinned);
static ServerStatusMetricField<Counter64> dCursorStatsOpenNoTimeout("cursor.open.noTimeout",
&cursorStatsOpenNoTimeout);
static ServerStatusMetricField<Counter64> dCursorStatusTimedout("cursor.timedOut",
&cursorStatsTimedOut);
long long ClientCursor::totalOpen() {
return cursorStatsOpen.get();
}
ClientCursor::ClientCursor(ClientCursorParams params,
CursorId cursorId,
OperationContext* operationUsingCursor,
Date_t now)
: _cursorid(cursorId),
_nss(std::move(params.nss)),
_authenticatedUsers(std::move(params.authenticatedUsers)),
_lsid(operationUsingCursor->getLogicalSessionId()),
_txnNumber(operationUsingCursor->getTxnNumber()),
_writeConcernOptions(std::move(params.writeConcernOptions)),
_readConcernArgs(std::move(params.readConcernArgs)),
_originatingCommand(params.originatingCommandObj),
_originatingPrivileges(std::move(params.originatingPrivileges)),
_queryOptions(params.queryOptions),
_lockPolicy(params.lockPolicy),
_needsMerge(params.needsMerge),
_exec(std::move(params.exec)),
_operationUsingCursor(operationUsingCursor),
_lastUseDate(now),
_createdDate(now),
_planSummary(Explain::getPlanSummary(_exec.get())) {
invariant(_exec);
invariant(_operationUsingCursor);
cursorStatsOpen.increment();
if (isNoTimeout()) {
// cursors normally timeout after an inactivity period to prevent excess memory use
// setting this prevents timeout of the cursor in question.
cursorStatsOpenNoTimeout.increment();
}
}
ClientCursor::~ClientCursor() {
// Cursors must be unpinned and deregistered from their cursor manager before being deleted.
invariant(!_operationUsingCursor);
invariant(_disposed);
cursorStatsOpen.decrement();
if (isNoTimeout()) {
cursorStatsOpenNoTimeout.decrement();
}
}
void ClientCursor::markAsKilled(Status killStatus) {
_exec->markAsKilled(killStatus);
}
void ClientCursor::dispose(OperationContext* opCtx) {
if (_disposed) {
return;
}
_exec->dispose(opCtx);
_disposed = true;
}
GenericCursor ClientCursor::toGenericCursor() const {
GenericCursor gc;
gc.setCursorId(cursorid());
gc.setNs(nss());
gc.setNDocsReturned(nReturnedSoFar());
gc.setTailable(isTailable());
gc.setAwaitData(isAwaitData());
gc.setNoCursorTimeout(isNoTimeout());
gc.setOriginatingCommand(getOriginatingCommandObj());
gc.setLsid(getSessionId());
gc.setLastAccessDate(getLastUseDate());
gc.setCreatedDate(getCreatedDate());
gc.setNBatchesReturned(getNBatches());
gc.setPlanSummary(getPlanSummary());
if (auto opCtx = _operationUsingCursor) {
gc.setOperationUsingCursorId(opCtx->getOpID());
}
return gc;
}
//
// Pin methods
//
ClientCursorPin::ClientCursorPin(OperationContext* opCtx,
ClientCursor* cursor,
CursorManager* cursorManager)
: _opCtx(opCtx), _cursor(cursor), _cursorManager(cursorManager) {
invariant(_cursor);
invariant(_cursor->_operationUsingCursor);
invariant(!_cursor->_disposed);
// We keep track of the number of cursors currently pinned. The cursor can become unpinned
// either by being released back to the cursor manager or by being deleted. A cursor may be
// transferred to another pin object via move construction or move assignment, but in this case
// it is still considered pinned.
cursorStatsOpenPinned.increment();
}
ClientCursorPin::ClientCursorPin(ClientCursorPin&& other)
: _opCtx(other._opCtx), _cursor(other._cursor), _cursorManager(other._cursorManager) {
// The pinned cursor is being transferred to us from another pin. The 'other' pin must have a
// pinned cursor.
invariant(other._cursor);
invariant(other._cursor->_operationUsingCursor);
// Be sure to set the 'other' pin's cursor to null in order to transfer ownership to ourself.
other._cursor = nullptr;
other._opCtx = nullptr;
other._cursorManager = nullptr;
}
ClientCursorPin& ClientCursorPin::operator=(ClientCursorPin&& other) {
if (this == &other) {
return *this;
}
// The pinned cursor is being transferred to us from another pin. The 'other' pin must have a
// pinned cursor, and we must not have a cursor.
invariant(!_cursor);
invariant(other._cursor);
invariant(other._cursor->_operationUsingCursor);
// Copy the cursor pointer to ourselves, but also be sure to set the 'other' pin's cursor to
// null so that it no longer has the cursor pinned.
// Be sure to set the 'other' pin's cursor to null in order to transfer ownership to ourself.
_cursor = other._cursor;
other._cursor = nullptr;
_opCtx = other._opCtx;
other._opCtx = nullptr;
_cursorManager = other._cursorManager;
other._cursorManager = nullptr;
return *this;
}
ClientCursorPin::~ClientCursorPin() {
release();
}
void ClientCursorPin::release() {
if (!_cursor)
return;
invariant(_cursor->_operationUsingCursor);
invariant(_cursorManager);
// Unpin the cursor. This must be done by calling into the cursor manager, since the cursor
// manager must acquire the appropriate mutex in order to safely perform the unpin operation.
_cursorManager->unpin(_opCtx, std::unique_ptr<ClientCursor, ClientCursor::Deleter>(_cursor));
cursorStatsOpenPinned.decrement();
_cursor = nullptr;
}
void ClientCursorPin::deleteUnderlying() {
invariant(_cursor);
invariant(_cursor->_operationUsingCursor);
invariant(_cursorManager);
// Note the following subtleties of this method's implementation:
// - We must unpin the cursor (by clearing the '_operationUsingCursor' field) before
// destruction, since it is an error to delete a pinned cursor.
// - In addition, we must deregister the cursor before clearing the '_operationUsingCursor'
// field, since it is an error to unpin a registered cursor without holding the appropriate
// cursor manager mutex. By first deregistering the cursor, we ensure that no other thread can
// access '_cursor', meaning that it is safe for us to write to '_operationUsingCursor'
// without holding the CursorManager mutex.
_cursorManager->deregisterCursor(_cursor);
// Make sure the cursor is disposed and unpinned before being destroyed.
_cursor->dispose(_opCtx);
_cursor->_operationUsingCursor = nullptr;
delete _cursor;
cursorStatsOpenPinned.decrement();
_cursor = nullptr;
}
ClientCursor* ClientCursorPin::getCursor() const {
return _cursor;
}
namespace {
//
// ClientCursorMonitor
//
/**
* Thread for timing out inactive cursors.
*/
class ClientCursorMonitor : public BackgroundJob {
public:
std::string name() const {
return "ClientCursorMonitor";
}
void run() {
ThreadClient tc("clientcursormon", getGlobalServiceContext());
while (!globalInShutdownDeprecated()) {
{
const ServiceContext::UniqueOperationContext opCtx = cc().makeOperationContext();
auto now = opCtx->getServiceContext()->getPreciseClockSource()->now();
cursorStatsTimedOut.increment(
CursorManager::get(opCtx.get())->timeoutCursors(opCtx.get(), now));
}
MONGO_IDLE_THREAD_BLOCK;
sleepsecs(getClientCursorMonitorFrequencySecs());
}
}
};
// Only one instance of the ClientCursorMonitor exists
ClientCursorMonitor clientCursorMonitor;
void _appendCursorStats(BSONObjBuilder& b) {
b.append("note", "deprecated, use server status metrics");
b.appendNumber("clientCursors_size", cursorStatsOpen.get());
b.appendNumber("totalOpen", cursorStatsOpen.get());
b.appendNumber("pinned", cursorStatsOpenPinned.get());
b.appendNumber("totalNoTimeout", cursorStatsOpenNoTimeout.get());
b.appendNumber("timedOut", cursorStatsTimedOut.get());
}
} // namespace
void startClientCursorMonitor() {
clientCursorMonitor.go();
}
} // namespace mongo
ClientCursorMonitor 后台线程(定时清理)找到的定时任务核心,继承 MongoDB 基础的 BackgroundJob 类。
定时器中调用CursorManager::get(opCtx.get())->timeoutCursors(opCtx.get(), now)清晰超时的游标
游标超时关闭
编辑
MongoDB控制的游标超时间,不是WiredTiger控制的游标超时间。
CursorManager是MongoDB 4.0.7 游标管理的核心实现,负责游标注册、查找、超时清理、销毁、权限校验等全生命周期管理,也是 MongoCursorNotFoundException(游标未找到)的核心触发源头
路径:src/mongo/db/cursor_manager.h + src/mongo/db/cursor_manager.cpp
// 1. 超时判断逻辑(核心)
bool CursorManager::cursorShouldTimeout_inlock(const ClientCursor* cursor, Date_t now) {
// 跳过:禁用超时的游标 / 正在使用的游标
if (cursor->isNoTimeout() || cursor->_operationUsingCursor) {
return false;
}
// 核心条件:当前时间 - 最后使用时间 ≥ 游标超时阈值(默认 10 分钟)
return (now - cursor->_lastUseDate) >= Milliseconds(getCursorTimeoutMillis());
}
// 2. 批量清理超时游标
std::size_t CursorManager::timeoutCursors(OperationContext* opCtx, Date_t now) {
std::vector<std::unique_ptr<ClientCursor, ClientCursor::Deleter>> toDisposeWithoutMutex;
// 遍历所有分区
for (size_t partitionId = 0; partitionId < kNumPartitions; ++partitionId) {
auto lockedPartition = _cursorMap->lockOnePartitionById(partitionId);
for (auto it = lockedPartition->begin(); it != lockedPartition->end();) {
auto* cursor = it->second;
// 判断是否超时,超时则加入清理列表并从哈希表移除
if (cursorShouldTimeout_inlock(cursor, now)) {
toDisposeWithoutMutex.emplace_back(cursor);
lockedPartition->erase(it++);
} else {
++it;
}
}
}
// 释放游标资源(避免持有分区锁时操作,防止死锁)
for (auto&& cursor : toDisposeWithoutMutex) {
log() << "Cursor id " << cursor->cursorid() << " timed out, idle since "
<< cursor->getLastUseDate();
cursor->dispose(opCtx); // 真正关闭游标、释放执行器/内存资源
}
return toDisposeWithoutMutex.size();
}
清理流程:「判断超时 → 从哈希表移除 → 释放资源」,移除后客户端再操作该游标 ID 就会触发 CursorNotFound
5、解决方案
5.1、禁用游标超时后必须手动关闭
使用 noCursorTimeout(true) 后,游标不会自动关闭,若忘记 cursor.close() 会导致服务端游标泄漏,最终耗尽资源;这个方案有风险。
// Java 示例:设置游标超时时间为 30 分钟
FindIterable<Document> iterable = collection.find()
.noCursorTimeout(true); // 禁用自动超时
5.2、避免长时间阻塞游标遍历
遍历过程中不要执行耗时操作(如远程调用、大量计算),可先拉取数据到本地再处理;
本案例并不是一条业务数据处理超过10分钟,是10000条数据处理超过10分钟,cur.batchSize(100);限制游标mongoCursor每次批量获取数据,这批数据处理完毕,再获取批量数据,这样两次getMore命令间隔不会超过10分钟了。
try {
MongoCollection<BasicDBObject> coll = formSwitchboardIdentityRelationMongo.getCollection(
dbname, "forrelation", BasicDBObject.class);
FindIterable<BasicDBObject> cur = coll.find(query);
//com.mongodb.MongoCursorNotFoundException: Query failed with error code -5 and error message 'Cursor
cur.batchSize(100);
mongoCursor = cur.cursor();
while(mongoCursor.hasNext()) {
bean = mongoCursor.next();
...业务代码
} catch (Exception e) {
throw e;
} finally {
if(mongoCursor!=null) {
mongoCursor.close();
}
}
6、总结
MongoDB MongoCursorNotFoundException(错误码 - 5)的成因与解决方案。该异常是因客户端访问的游标在服务端已失效,主因是游标闲置超 10 分钟被自动清理。
MongoDB 通过ClientCursorMonitor后台线程每隔 60 秒扫描,CursorManager的timeoutCursors方法依据cursorShouldTimeout_inlock逻辑,判定闲置超阈值(默认 10 分钟)且未被使用的游标,将其从哈希表移除并释放资源,后续客户端操作该游标即触发异常。
解决方案包括:一是设置noCursorTimeout(true)禁用自动超时,但需手动关闭游标防止泄漏;二是优化遍历逻辑,通过batchSize限制单次获取数据量,缩短getMore间隔,避免游标长期闲置;三是避免遍历中执行耗时操作,减少游标占用时间。