Skip to content

Commit a6205ef

Browse files
author
askvorcov
committed
fix mongo: deadline propagation stale snapshot on connection creation
When a new MongoDB connection needs to be created (no idle connections in pool), Create() runs a ping to MongoDB without any deadline enforcement — it is bounded only by SOCKETTIMEOUTMS from the URI config (typically several seconds). Meanwhile, MakeRequestContext() captures the remaining deadline as a snapshot *before* calling GetClient(), so if Create() takes several seconds the snapshot becomes stale. The stale value is then used as maxTimeMS for the actual query, causing requests to run far beyond the original deadline. Example: deadline=999ms, Create() takes ~6.9s ping, then query runs another 999ms → total 7.883s with "operation exceeded time limit" from MongoDB. Fix in collection_impl.cpp: After GetClient() returns, re-evaluate the remaining deadline with a fresh GetDeadlineTimeLeft() call. If the deadline has already expired, throw CancelledException immediately. Pass the fresh value as inherited_deadline so maxTimeMS is computed correctly. commit_hash:6a4eeb4aa39628bf4bae31a00048927e735e0a3c
1 parent 86a10ab commit a6205ef

3 files changed

Lines changed: 68 additions & 10 deletions

File tree

mongo/src/storages/mongo/cdriver/collection_impl.cpp

Lines changed: 25 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -134,6 +134,20 @@ void SetMaxServerTime(
134134
}
135135
}
136136

137+
std::optional<std::chrono::milliseconds> GetTimeoutOrThrow(
138+
const dynamic_config::Snapshot& dynamic_config,
139+
stats::OperationStatisticsItem& stats,
140+
tracing::Span& span
141+
) {
142+
const auto time_left = GetDeadlineTimeLeft(dynamic_config);
143+
if (time_left && time_left <= std::chrono::seconds{0}) {
144+
stats.Account(stats::ErrorType::kCancelled);
145+
span.AddTag(kCancelledByDeadlineTag, true);
146+
throw CancelledException(CancelledException::ByDeadlinePropagation{});
147+
}
148+
return time_left;
149+
}
150+
137151
} // namespace
138152

139153
CDriverCollectionImpl::CDriverCollectionImpl(
@@ -616,28 +630,29 @@ RequestContext CDriverCollectionImpl::MakeRequestContext(std::string&& span_name
616630
auto stats = statistics_->items[stats_key];
617631
auto dynamic_config = pool_impl_->GetConfig();
618632

619-
const auto inherited_deadline = GetDeadlineTimeLeft(dynamic_config);
620-
if (inherited_deadline && inherited_deadline <= std::chrono::seconds{0}) {
621-
stats->Account(stats::ErrorType::kCancelled);
622-
span.AddTag(kCancelledByDeadlineTag, true);
623-
throw CancelledException(CancelledException::ByDeadlinePropagation{});
624-
}
625-
626-
if (inherited_deadline) {
627-
span.AddTag(tracing::kTimeoutMs, inherited_deadline->count());
633+
// first deadline check, to make sure we dont get/wait for client if deadline is already reached.
634+
auto timeout_ms = GetTimeoutOrThrow(dynamic_config, *stats, span);
635+
if (timeout_ms) {
636+
span.AddTag(tracing::kTimeoutMs, timeout_ms->count());
628637
}
629638

630639
auto client = GetClient(*stats);
631640
cdriver::CollectionPtr
632641
collection(mongoc_client_get_collection(client.get(), GetDatabaseName().c_str(), GetCollectionName().c_str()));
633642

643+
// The second deadline check, to make sure we did not hit deadline after waiting or creating a new client.
644+
timeout_ms = timeout_ms ? GetTimeoutOrThrow(dynamic_config, *stats, span) : timeout_ms;
645+
if (timeout_ms) {
646+
span.AddTag(tracing::kTimeoutMs, timeout_ms->count());
647+
}
648+
634649
return RequestContext{
635650
std::move(stats),
636651
std::move(dynamic_config),
637652
std::move(client),
638653
std::move(collection),
639654
std::move(span),
640-
inherited_deadline,
655+
timeout_ms,
641656
};
642657
}
643658

mongo/src/storages/mongo/cdriver/pool_impl.cpp

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -495,6 +495,7 @@ CDriverPoolImpl::ConnPtr CDriverPoolImpl::Pop() {
495495
<< "' has too many establishing connections. " << MakeQueueDeadlineMessage(inherited_timeout);
496496
}
497497
conn = Create();
498+
TESTPOINT("mongo-connection-created", {});
498499
}
499500
}
500501

mongo/src/storages/mongo/deadline_mongotest.cpp

Lines changed: 42 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,13 +1,17 @@
11
#include <userver/utest/utest.hpp>
22

3+
#include <userver/engine/sleep.hpp>
34
#include <userver/engine/task/cancel.hpp>
45
#include <userver/server/request/task_inherited_data.hpp>
6+
#include <userver/testsuite/testpoint.hpp>
7+
#include <userver/testsuite/testpoint_control.hpp>
58

69
#include <storages/mongo/util_mongotest.hpp>
710
#include <userver/formats/bson.hpp>
811
#include <userver/storages/mongo/collection.hpp>
912
#include <userver/storages/mongo/exception.hpp>
1013
#include <userver/storages/mongo/pool.hpp>
14+
#include <userver/tracing/span.hpp>
1115

1216
using namespace std::chrono_literals;
1317

@@ -84,4 +88,42 @@ UTEST_F(DeadlinePropagation, AlreadyCancelled) {
8488
UEXPECT_THROW(coll.InsertOne(bson::MakeDoc("_id", 2)), mongo::CancelledException);
8589
}
8690

91+
UTEST_F(DeadlinePropagation, CancelledByDeadlineAfterGetClient) {
92+
auto pool_config = MakeTestPoolConfig();
93+
pool_config.pool_settings.initial_size = 0;
94+
pool_config.pool_settings.idle_limit = 1;
95+
pool_config.pool_settings.max_size = 1;
96+
pool_config.queue_timeout = utest::kMaxTestWaitTime;
97+
98+
{
99+
// check positive case, connection creation is fast
100+
auto pool = MakePool({}, pool_config);
101+
auto coll = pool.GetCollection("dp");
102+
server::request::kTaskInheritedData.Set(MakeRequestData(engine::Deadline::FromDuration(100ms)));
103+
UASSERT_NO_THROW(coll.InsertOne(bson::MakeDoc("_id", 1)));
104+
}
105+
{
106+
// check negative case, connection creation is too slow
107+
auto pool = MakePool({}, pool_config);
108+
auto coll = pool.GetCollection("dp");
109+
// delay in connection create
110+
testsuite::TestpointControl testpoint_control;
111+
class SlowConnectionClient final : public testsuite::TestpointClientBase {
112+
public:
113+
mutable size_t times_called = 0;
114+
~SlowConnectionClient() override { Unregister(); }
115+
void Execute(std::string_view, const formats::json::Value&, Callback) const override {
116+
engine::SleepFor(std::chrono::milliseconds{200});
117+
times_called++;
118+
}
119+
} slow_client;
120+
testpoint_control.SetClient(slow_client);
121+
testpoint_control.SetEnabledNames({"mongo-connection-created"});
122+
123+
server::request::kTaskInheritedData.Set(MakeRequestData(engine::Deadline::FromDuration(100ms)));
124+
UEXPECT_THROW(coll.InsertOne(bson::MakeDoc("_id", 2)), mongo::CancelledException);
125+
EXPECT_EQ(slow_client.times_called, 1);
126+
}
127+
}
128+
87129
USERVER_NAMESPACE_END

0 commit comments

Comments
 (0)