I am struggling to understans Mongodb performance in relation to filter, sort and limit.
I have a collection with 32 million documents, size 5 GB.
I have this query:
db.report_meter_device_audit.find({
"$and": [{ "receivedTime": { "$gt": new ISODate("2020-12-31T00:00:00Z") } },
{ "receivedTime": { "$lt": new ISODate("2021-05-31T00:00:00Z") } }]
}).sort({ meterid: 1 }).limit(100);
and built this index:
(meterid:1 , receivedTime:-1)
This query returns very fast, and explain.executionStats shows this:
"winningPlan" : {
"stage" : "LIMIT",
"limitAmount" : 100,
"inputStage" : {
"stage" : "FETCH",
"filter" : {
"$and" : [
{
"receivedTime" : {
"$lt" : ISODate("2021-05-31T03:00:00.000+03:00")
}
},
{
"receivedTime" : {
"$gt" : ISODate("2020-12-31T02:00:00.000+02:00")
}
}
]
},
"inputStage" : {
"stage" : "IXSCAN",
"keyPattern" : {
"meterid" : 1,
"receivedTime" : -1
},
"indexName" : "meterid_time_idx",
"isMultiKey" : false,
"multiKeyPaths" : {
"meterid" : [ ],
"receivedTime" : [ ]
},
"isUnique" : false,
"isSparse" : false,
"isPartial" : false,
"indexVersion" : 2,
"direction" : "forward",
"indexBounds" : {
"meterid" : [
"[MinKey, MaxKey]"
],
"receivedTime" : [
"[MaxKey, MinKey]"
]
}
}
}
},
.
.
"executionStats" : {
"executionSuccess" : true,
"nReturned" : 100,
"executionTimeMillis" : 3,
"totalKeysExamined" : 100,
"totalDocsExamined" : 100,
"executionStages" : {
"stage" : "LIMIT",
"nReturned" : 100,
"executionTimeMillisEstimate" : 0,
"works" : 101,
"advanced" : 100,
"needTime" : 0,
"needYield" : 0,
"saveState" : 3,
"restoreState" : 3,
"isEOF" : 1,
"limitAmount" : 100,
"inputStage" : {
"stage" : "FETCH",
"filter" : {
"$and" : [
{
"receivedTime" : {
"$lt" : ISODate("2021-05-31T03:00:00.000+03:00")
}
},
{
"receivedTime" : {
"$gt" : ISODate("2020-12-31T02:00:00.000+02:00")
}
}
]
},
"nReturned" : 100,
"executionTimeMillisEstimate" : 0,
"works" : 100,
"advanced" : 100,
"needTime" : 0,
"needYield" : 0,
"saveState" : 3,
"restoreState" : 3,
"isEOF" : 0,
"docsExamined" : 100,
"alreadyHasObj" : 0,
"inputStage" : {
"stage" : "IXSCAN",
"nReturned" : 100,
"executionTimeMillisEstimate" : 0,
"works" : 100,
"advanced" : 100,
"needTime" : 0,
"needYield" : 0,
"saveState" : 3,
"restoreState" : 3,
"isEOF" : 0,
"keyPattern" : {
"meterid" : 1,
"receivedTime" : -1
},
"indexName" : "meterid_time_idx",
"isMultiKey" : false,
"multiKeyPaths" : {
"meterid" : [ ],
"receivedTime" : [ ]
},
"isUnique" : false,
"isSparse" : false,
"isPartial" : false,
"indexVersion" : 2,
"direction" : "forward",
"indexBounds" : {
"meterid" : [
"[MinKey, MaxKey]"
],
"receivedTime" : [
"[MaxKey, MinKey]"
]
},
"keysExamined" : 100,
"seeks" : 1,
"dupsTested" : 0,
"dupsDropped" : 0,
"indexDef" : {
"indexName" : "meterid_time_idx",
"isMultiKey" : false,
"multiKeyPaths" : {
"meterid" : [ ],
"receivedTime" : [ ]
},
"keyPattern" : {
"meterid" : 1,
"receivedTime" : -1
},
"isUnique" : false,
"isSparse" : false,
"isPartial" : false,
"direction" : "forward"
}
}
}
}
},
However, if I reverse the sort order ({ meterid: -1 }), the query is very slow, and I can see that the same index is used, but the number of documents scanned is huge:
"winningPlan" : {
"stage" : "LIMIT",
"limitAmount" : 100,
"inputStage" : {
"stage" : "FETCH",
"filter" : {
"$and" : [
{
"receivedTime" : {
"$lt" : ISODate("2021-05-31T03:00:00.000+03:00")
}
},
{
"receivedTime" : {
"$gt" : ISODate("2020-12-31T02:00:00.000+02:00")
}
}
]
},
"inputStage" : {
"stage" : "IXSCAN",
"keyPattern" : {
"meterid" : 1,
"receivedTime" : -1
},
"indexName" : "meterid_time_idx",
"isMultiKey" : false,
"multiKeyPaths" : {
"meterid" : [ ],
"receivedTime" : [ ]
},
"isUnique" : false,
"isSparse" : false,
"isPartial" : false,
"indexVersion" : 2,
"direction" : "backward",
"indexBounds" : {
"meterid" : [
"[MaxKey, MinKey]"
],
"receivedTime" : [
"[MinKey, MaxKey]"
]
}
}
}
},
"executionStats" : {
"executionSuccess" : true,
"nReturned" : 100,
"executionTimeMillis" : 185544,
"totalKeysExamined" : 10292501,
"totalDocsExamined" : 10292501,
"executionStages" : {
"stage" : "LIMIT",
"nReturned" : 100,
"executionTimeMillisEstimate" : 19783,
"works" : 10292502,
"advanced" : 100,
"needTime" : 10292401,
"needYield" : 0,
"saveState" : 229475,
"restoreState" : 229475,
"isEOF" : 1,
"limitAmount" : 100,
"inputStage" : {
"stage" : "FETCH",
"filter" : {
"$and" : [
{
"receivedTime" : {
"$lt" : ISODate("2021-05-31T03:00:00.000+03:00")
}
},
{
"receivedTime" : {
"$gt" : ISODate("2020-12-31T02:00:00.000+02:00")
}
}
]
},
"nReturned" : 100,
"executionTimeMillisEstimate" : 19698,
"works" : 10292501,
"advanced" : 100,
"needTime" : 10292401,
"needYield" : 0,
"saveState" : 229475,
"restoreState" : 229475,
"isEOF" : 0,
"docsExamined" : 10292501,
"alreadyHasObj" : 0,
"inputStage" : {
"stage" : "IXSCAN",
"nReturned" : 10292501,
"executionTimeMillisEstimate" : 1945,
"works" : 10292501,
"advanced" : 10292501,
"needTime" : 0,
"needYield" : 0,
"saveState" : 229475,
"restoreState" : 229475,
"isEOF" : 0,
"keyPattern" : {
"meterid" : 1,
"receivedTime" : -1
},
"indexName" : "meterid_time_idx",
"isMultiKey" : false,
"multiKeyPaths" : {
"meterid" : [ ],
"receivedTime" : [ ]
},
"isUnique" : false,
"isSparse" : false,
"isPartial" : false,
"indexVersion" : 2,
"direction" : "backward",
"indexBounds" : {
"meterid" : [
"[MaxKey, MinKey]"
],
"receivedTime" : [
"[MinKey, MaxKey]"
]
},
"keysExamined" : 10292501,
"seeks" : 1,
"dupsTested" : 0,
"dupsDropped" : 0
}
}
}
},
even if I try to add an index such as this:
(meterid:-1 , receivedTime:-1)
I can see that it is not used. The previous index is still used.
So the questions are:
- Why is Mongo behaving like this and
- How can I achieve better performance for the descending sort too.
meteridexactly? is it randomly generated or monotonically increasing?