Couchbase count performance


#1

Hi
We are currently running performance test and the below query times out in index
scan for ~100K records.
I changed to GROUP BY and then COUNT but in vain, any help is much appreciated.
Basically the order document consists of shipments and orderlines arrays which needs to be combined on certain dynamic predicates

Query

SELECT count(distinct t.orderKey)
FROM WCI as t
UNNEST t.shipmentLines AS shpl
UNNEST t.orderLines AS orderLine
WHERE ( meta(t).id LIKE “ccisvt1:8200:Sales1:odm:%” ) AND ( t.fulfillmentStatus IN [“PENDING”,“ASSIGNED”] )
AND ( t.type = “odm” ) AND ( t.absMinShipStatus < 1400000000 )
AND (orderLine.orderLineKey = shpl.orderLineKey ) AND (orderLine.deliveryMethod = “SHP” )
AND ( ( shpl.expectedShipmentDate IS VALUED ) AND ( shpl.expectedShipmentDate != “” )
AND ( ( DATE_DIFF_STR(NOW_UTC(), STR_TO_TZ(shpl.expectedShipmentDate, ‘UTC’), ‘second’)
>= 0 )
OR ( DATE_DIFF_STR(STR_TO_TZ(shpl.expectedShipmentDate, ‘UTC’),STR_TO_TZ(NOW_UTC(), ‘UTC’) ,‘second’)
< 86400 ))) AND t.orderKey IS NOT MISSING

Covering Index

CREATE INDEX idx_sla_risk_orders_01 ON WCI(orderKey,(meta().id),type,fulfillmentStatus,absMinShipStatus,(distinct (array shpl for shpl in shipmentLines end)),shipmentLines,orderLines) PARTITION BY hash(orderKey,absMinShipStatus) WHERE ((type = “odm”) and (fulfillmentStatus in [“PENDING”, “ASSIGNED”]))

Query Plan
{
“plan”: {
#operator”: “Sequence”,
“~children”: [
{
#operator”: “DistinctScan”,
“scan”: {
#operator”: “IndexScan3”,
“as”: “t”,
“covers”: [
“cover ((t.orderKey))”,
“cover ((meta(t).id))”,
“cover ((t.type))”,
“cover ((t.fulfillmentStatus))”,
“cover ((t.absMinShipStatus))”,
“cover ((distinct (array shpl for shpl in (t.shipmentLines) end)))”,
“cover ((t.shipmentLines))”,
“cover ((t.orderLines))”,
“cover ((meta(t).id))”
],
“filter_covers”: {
“cover (((t.fulfillmentStatus) in [“PENDING”, “ASSIGNED”]))”: true,
“cover ((t.type))”: “odm”
},
“index”: “idx_sla_risk_orders_01”,
“index_id”: “acbfc6ffe108addc”,
“index_projection”: {
“entry_keys”: [
0,
2,
3,
4,
6,
7
],
“primary_key”: true
},
“keyspace”: “WCI”,
“namespace”: “default”,
“spans”: [
{
“exact”: true,
“range”: [
{
“inclusion”: 0,
“low”: “null”
},
{
“high”: ““ccisvt1:8200:Sales1:odm;””,
“inclusion”: 1,
“low”: ““ccisvt1:8200:Sales1:odm:””
},
{
“high”: ““odm””,
“inclusion”: 3,
“low”: ““odm””
},
{
“high”: ““ASSIGNED””,
“inclusion”: 3,
“low”: ““ASSIGNED””
},
{
“high”: “1400000000”,
“inclusion”: 0,
“low”: “null”
}
]
},
{
“exact”: true,
“range”: [
{
“inclusion”: 0,
“low”: “null”
},
{
“high”: ““ccisvt1:8200:Sales1:odm;””,
“inclusion”: 1,
“low”: ““ccisvt1:8200:Sales1:odm:””
},
{
“high”: ““odm””,
“inclusion”: 3,
“low”: ““odm””
},
{
“high”: ““PENDING””,
“inclusion”: 3,
“low”: ““PENDING””
},
{
“high”: “1400000000”,
“inclusion”: 0,
“low”: “null”
}
]
}
],
“using”: “gsi”
}
},
{
#operator”: “Parallel”,
“~child”: {
#operator”: “Sequence”,
“~children”: [
{
#operator”: “Unnest”,
“as”: “shpl”,
“expr”: “cover ((t.shipmentLines))”
}
]
}
},
{
#operator”: “Parallel”,
“~child”: {
#operator”: “Sequence”,
“~children”: [
{
#operator”: “Unnest”,
“as”: “orderLine”,
“expr”: “cover ((t.orderLines))”
}
]
}
},
{
#operator”: “Parallel”,
“~child”: {
#operator”: “Sequence”,
“~children”: [
{
#operator”: “Filter”,
“condition”: “((((((((cover ((meta(t).id)) like “ccisvt1:8200:Sales1:odm:%”) and cover (((t.fulfillmentStatus) in [“PENDING”, “ASSIGNED”]))) and (cover ((t.type)) = “odm”)) and (cover ((t.absMinShipStatus)) < 1400000000)) and ((orderLine.orderLineKey) = (shpl.orderLineKey))) and ((orderLine.deliveryMethod) = “SHP”)) and ((((shpl.expectedShipmentDate) is valued) and (not ((shpl.expectedShipmentDate) = “”))) and ((0 <= date_diff_str(now_utc(), str_to_zone_name((shpl.expectedShipmentDate), “UTC”), “second”)) or (date_diff_str(str_to_zone_name((shpl.expectedShipmentDate), “UTC”), str_to_zone_name(now_utc(), “UTC”), “second”) < 86400)))) and (cover ((t.orderKey)) is not missing))”
},
{
#operator”: “InitialGroup”,
“aggregates”: [
“count(cover ((t.orderKey)))”
],
“group_keys”:
}
]
}
},
{
#operator”: “IntermediateGroup”,
“aggregates”: [
“count(cover ((t.orderKey)))”
],
“group_keys”:
},
{
#operator”: “FinalGroup”,
“aggregates”: [
“count(cover ((t.orderKey)))”
],
“group_keys”:
},
{
#operator”: “Parallel”,
“~child”: {
#operator”: “Sequence”,
“~children”: [
{
#operator”: “InitialProject”,
“result_terms”: [
{
“expr”: “count(cover ((t.orderKey)))”
}
]
},
{
#operator”: “FinalProject”
}
]
}
}
]
},
“text”: “SELECT count( t.orderKey)\nFROM WCI as t\nUNNEST t.shipmentLines AS shpl\nUNNEST t.orderLines AS orderLine\n WHERE ( meta(t).id LIKE “ccisvt1:8200:Sales1:odm:%” ) AND ( t.fulfillmentStatus IN [“PENDING”,“ASSIGNED”] )\n AND ( t.type = “odm” ) AND ( t.absMinShipStatus < 1400000000 )\n AND (orderLine.orderLineKey = shpl.orderLineKey ) AND (orderLine.deliveryMethod = “SHP” )\n AND ( ( shpl.expectedShipmentDate IS VALUED ) AND ( shpl.expectedShipmentDate != “” )\n AND ( ( DATE_DIFF_STR(NOW_UTC(), STR_TO_TZ(shpl.expectedShipmentDate, ‘UTC’), ‘second’)\n >= 0 )\n OR ( DATE_DIFF_STR(STR_TO_TZ(shpl.expectedShipmentDate, ‘UTC’),STR_TO_TZ(NOW_UTC(), ‘UTC’) ,‘second’)\n < 86400 ))) AND t.orderKey IS NOT MISSING”
}

Thanks
Jothi


#2
CREATE INDEX ix1 ON default(fulfillmentStatus,
                   DISTINCT ARRAY STR_TO_TZ(shpl.expectedShipmentDate, "UTC") FOR shpl IN shipmentLines END,
                   absMinShipStatus,
                   META().id, shipmentLines, orderLines, orderKey)
PARTITION BY HASH(META().id)
WHERE type = "odm";

SELECT COUNT(t.orderKey) AS cnt
FROM default as t
WHERE  t.type = "odm"
       AND t.fulfillmentStatus IN ["PENDING","ASSIGNED"]
       AND t.absMinShipStatus < 1400000000
       AND META(t).id LIKE "ccisvt1:8200:Sales1:odm:%"
       AND (ANY shpl IN t.shipmentLines
            SATISFIES STR_TO_TZ(shpl.expectedShipmentDate, "UTC") >= NOW_UTC()
                      AND STR_TO_TZ(shpl.expectedShipmentDate, "UTC") < DATE_ADD_STR(NOW_UTC(),86400,"second")
                      AND (ANY ol IN t.orderLines
                           SATISFIES ol.orderLineKey = shpl.orderLineKey AND ol.deliveryMethod = "SHP"
                           END)
           END);

NOTE: Partition key must be immutable.

https://blog.couchbase.com/a-guide-to-n1ql-features-in-couchbase-5-5-special-edition/


#3

Thanks for the suggestions I modified the query as below, but still it takes 30 secs for 100K records. We need the response in sub seconds…
Modified Count query:
SELECT count( t.orderKey)
FROM WCI AS t
WHERE ( meta(t).id LIKE “ccisvt1:8200:Sales1:odm:%” ) AND ( t.fulfillmentStatus IN [“PENDING”,“ASSIGNED”] )
AND ( t.type = “odm” ) AND ( t.absMinShipStatus < 1400000000 )
AND (ANY shpl IN t.shipmentLines SATISFIES
shpl.expectedShipmentDate != “”
AND (STR_TO_TZ(shpl.expectedShipmentDate, “UTC”) < DATE_ADD_STR(NOW_UTC(),86400,“second”))
AND (ANY ol IN t.orderLines SATISFIES
ol.orderLineKey = shpl.orderLineKey AND ol.deliveryMethod = “SHP” END)
END)
Index
CREATE INDEX idx_sla_risk_orders_01 ON WCI
(meta().id, fulfillmentStatus,
DISTINCT ARRAY STR_TO_TZ(shpl.expectedShipmentDate, “UTC”) FOR shpl IN shipmentLines
WHEN shpl.expectedShipmentDate != “”
END,
absMinShipStatus,
shipmentLines, orderLines, orderKey)
PARTITION BY HASH(META().id)
WHERE type = “odm”;

Questions

  1. Does the sequence of the index fields and the predicates should match?
  2. We are expected to return count and the next 100 records per request, avoided UNNEST to avoid the GROUP BY
    Records fetching query
    SELECT t.orderKey, ( ARRAY {“orderLineKey” : shpl.orderLineKey, “shipmentKey” : shpl.shipmentKey, “shipmentLinekey” : shpl.shipmentLineKey}
    FOR shpl IN t.shipmentLines WHEN ( ANY ol IN t.orderLines SATISFIES
    ol.orderLineKey = shpl.orderLineKey AND ol.deliveryMethod = “SHP” END )
    AND ( shpl.expectedShipmentDate != “” )
    AND (STR_TO_TZ(shpl.expectedShipmentDate, “UTC”) < DATE_ADD_STR(NOW_UTC(),86400,“second”))
    END ) as alertData
    FROM WCI AS t
    WHERE ( meta(t).id LIKE “ccisvt1:8200:Sales1:odm:%” ) AND ( t.fulfillmentStatus IN [“PENDING”,“ASSIGNED”] )
    AND ( t.type = “odm” )
    AND ( t.absMinShipStatus < 1400000000 )
    AND (ANY shpl IN t.shipmentLines SATISFIES
    shpl.expectedShipmentDate != “”
    AND (STR_TO_TZ(shpl.expectedShipmentDate, “UTC”) < DATE_ADD_STR(NOW_UTC(),86400,“second”))
    AND (ANY ol IN t.orderLines SATISFIES
    ol.orderLineKey = shpl.orderLineKey AND ol.deliveryMethod = “SHP” END)END)

ORDER BY t.orderKey ASC
LIMIT 100 OFFSET 74300
Thanks
Jothi


#4

I faced with the same problem, count queries work slow I did not understand why, can you try with the ENTERPRISE edition and check the result if u can? Because I did it with that way and it works within miliseconds. I know this is not a good solution.


#5

We are already on the enterprise edition :frowning:
image


#6

Why did you move Meta().id to leading key?
If you need sort and pagination add orderKey to leading index key and query predicate t.orderKey IS NOT NULL .

Also explore keyset pagination https://blog.couchbase.com/offset-keyset-pagination-n1ql-query-couchbase/


#7

I did not see any difference in performance moving meta().id to the leading key. Does it matter to remove the LIKE key from leading keys?

Thanks
Jothi


#8

I see your reply in the other post
Index key order makes difference, Recommended order will be equal, IN predicates first in leading keys, followed by less than/less than equal, followed by grater than/grater than equal fields.
But why didn’t I see any performance difference with moving the meta().id with the recommended order?
Should the predicate order match with the index order?
Thanks
Jothi