Improving aggregate group by query

query
n1ql

#1

I have a fairly involved query that gets top scores for players in a leaderboard used by multiple apps (games). Each player can have multiple scores, but I am only pulling their top score.

Using 4.5 beta.

The query looks like this:

select 
            max([score,meta(scorebeast).id])[0] as score,
            max([score,meta(scorebeast).id])[1] as scoreId,
            max([score,date])[1] as date,
            max([score,nick])[1] as nick
        from scorebeast
        where score is not null and app = 'some_game' and meta(scorebeast).id like 'score_%'
        group by userId
        order by score desc, date asc
        limit 10 offset 100

I am currently getting execution time of 6 seconds (improvement from 12 seconds after creating basic indexes), but I am wondering if this can be pushed further.

How can I improve this query?

explain here:

[
  {
    "plan": {
      "#operator": "Sequence",
      "~children": [
        {
          "#operator": "Sequence",
          "~children": [
            {
              "#operator": "IntersectScan",
              "scans": [
                {
                  "#operator": "IndexScan",
                  "index": "score_idx",
                  "index_id": "22237429400cc486",
                  "keyspace": "scorebeast",
                  "namespace": "default",
                  "spans": [
                    {
                      "Range": {
                        "Inclusion": 0,
                        "Low": [
                          "null"
                        ]
                      }
                    }
                  ],
                  "using": "gsi"
                },
                {
                  "#operator": "IndexScan",
                  "index": "app_idx",
                  "index_id": "b58177828b6774d0",
                  "keyspace": "scorebeast",
                  "namespace": "default",
                  "spans": [
                    {
                      "Range": {
                        "High": [
                          "\"some_game\""
                        ],
                        "Inclusion": 3,
                        "Low": [
                          "\"some_game\""
                        ]
                      }
                    }
                  ],
                  "using": "gsi"
                },
                {
                  "#operator": "IndexScan",
                  "index": "scorebeast-primary-index",
                  "index_id": "351355ff755686da",
                  "keyspace": "scorebeast",
                  "namespace": "default",
                  "spans": [
                    {
                      "Range": {
                        "High": [
                          "\"scorf\""
                        ],
                        "Inclusion": 1,
                        "Low": [
                          "\"score\""
                        ]
                      }
                    }
                  ],
                  "using": "gsi"
                }
              ]
            },
            {
              "#operator": "Parallel",
              "~child": {
                "#operator": "Sequence",
                "~children": [
                  {
                    "#operator": "Fetch",
                    "keyspace": "scorebeast",
                    "namespace": "default"
                  },
                  {
                    "#operator": "Filter",
                    "condition": "((((`scorebeast`.`score`) is not null) and ((`scorebeast`.`app`) = \"some_game\")) and ((meta(`scorebeast`).`id`) like \"score_%\"))"
                  },
                  {
                    "#operator": "InitialGroup",
                    "aggregates": [
                      "max([(`scorebeast`.`score`), (`scorebeast`.`date`)])",
                      "max([(`scorebeast`.`score`), (`scorebeast`.`nick`)])",
                      "max([(`scorebeast`.`score`), (meta(`scorebeast`).`id`)])"
                    ],
                    "group_keys": [
                      "(`scorebeast`.`userId`)"
                    ]
                  }
                ]
              }
            },
            {
              "#operator": "IntermediateGroup",
              "aggregates": [
                "max([(`scorebeast`.`score`), (`scorebeast`.`date`)])",
                "max([(`scorebeast`.`score`), (`scorebeast`.`nick`)])",
                "max([(`scorebeast`.`score`), (meta(`scorebeast`).`id`)])"
              ],
              "group_keys": [
                "(`scorebeast`.`userId`)"
              ]
            },
            {
              "#operator": "FinalGroup",
              "aggregates": [
                "max([(`scorebeast`.`score`), (`scorebeast`.`date`)])",
                "max([(`scorebeast`.`score`), (`scorebeast`.`nick`)])",
                "max([(`scorebeast`.`score`), (meta(`scorebeast`).`id`)])"
              ],
              "group_keys": [
                "(`scorebeast`.`userId`)"
              ]
            },
            {
              "#operator": "Parallel",
              "~child": {
                "#operator": "Sequence",
                "~children": [
                  {
                    "#operator": "InitialProject",
                    "result_terms": [
                      {
                        "as": "score",
                        "expr": "(max([(`scorebeast`.`score`), (meta(`scorebeast`).`id`)])[0])"
                      },
                      {
                        "as": "scoreId",
                        "expr": "(max([(`scorebeast`.`score`), (meta(`scorebeast`).`id`)])[1])"
                      },
                      {
                        "as": "date",
                        "expr": "(max([(`scorebeast`.`score`), (`scorebeast`.`date`)])[1])"
                      },
                      {
                        "as": "nick",
                        "expr": "(max([(`scorebeast`.`score`), (`scorebeast`.`nick`)])[1])"
                      }
                    ]
                  }
                ]
              }
            }
          ]
        },
        {
          "#operator": "Order",
          "limit": "10",
          "offset": "100",
          "sort_terms": [
            {
              "desc": true,
              "expr": "`score`"
            },
            {
              "expr": "`date`"
            }
          ]
        },
        {
          "#operator": "Offset",
          "expr": "100"
        },
        {
          "#operator": "Limit",
          "expr": "10"
        },
        {
          "#operator": "FinalProject"
        }
      ]
    },
    "text": "\nselect \n            max([score,meta(scorebeast).id])[0] as score,\n            max([score,meta(scorebeast).id])[1] as scoreId,\n            max([score,date])[1] as date,\n            max([score,nick])[1] as nick\n        from scorebeast\n        where score is not null and app = 'some_game' and meta(scorebeast).id like 'score_%'\n        group by userId\n        order by score desc, date asc\n        limit 10 offset 100"
  }
]

#2

Hi @enko

Yes, you need a covering index. Please create the following index and
then post the following EXPLAIN.

CREATE INDEX idx_leaderbeast ON scorebeast(score, date, nick, userId)
WHERE app = 'some_game' AND META().id LIKE 'score_%';

EXPLAIN select
            max([score,meta(scorebeast).id])[0] as score,
            max([score,meta(scorebeast).id])[1] as scoreId,
            max([score,date])[1] as date,
            max([score,nick])[1] as nick
	from scorebeast USE INDEX (idx_leaderbeast)
	where score is not null and app = 'some_game' and meta(scorebeast).id like 'score_%'
	group by userId
	order by score desc, date asc
	limit 10 offset 100
;

#3

@geraldss thank you! Down to 3.x seconds.
I’ll keep experimenting further.

[
  {
    "plan": {
      "#operator": "Sequence",
      "~children": [
        {
          "#operator": "Sequence",
          "~children": [
            {
              "#operator": "IndexScan",
              "covers": [
                "cover ((`scorebeast`.`score`))",
                "cover ((`scorebeast`.`date`))",
                "cover ((`scorebeast`.`nick`))",
                "cover ((`scorebeast`.`userId`))",
                "cover ((meta(`scorebeast`).`id`))"
              ],
              "filter_covers": {
                "cover ((\"score\" <= (meta(`scorebeast`).`id`)))": true,
                "cover (((meta(`scorebeast`).`id`) < \"scorf\"))": true,
                "cover ((`scorebeast`.`app`))": "machrace_pc_alpha"
              },
              "index": "idx_leaderbeast",
              "index_id": "6612212b820ac79",
              "keyspace": "scorebeast",
              "namespace": "default",
              "spans": [
                {
                  "Range": {
                    "Inclusion": 0,
                    "Low": [
                      "null"
                    ]
                  }
                }
              ],
              "using": "gsi"
            },
            {
              "#operator": "Parallel",
              "~child": {
                "#operator": "Sequence",
                "~children": [
                  {
                    "#operator": "Filter",
                    "condition": "(((cover ((`scorebeast`.`score`)) is not null) and (cover ((`scorebeast`.`app`)) = \"machrace_pc_alpha\")) and (cover ((meta(`scorebeast`).`id`)) like \"score_%\"))"
                  },
                  {
                    "#operator": "InitialGroup",
                    "aggregates": [
                      "max([cover ((`scorebeast`.`score`)), cover ((`scorebeast`.`date`))])",
                      "max([cover ((`scorebeast`.`score`)), cover ((`scorebeast`.`nick`))])",
                      "max([cover ((`scorebeast`.`score`)), cover ((meta(`scorebeast`).`id`))])"
                    ],
                    "group_keys": [
                      "cover ((`scorebeast`.`userId`))"
                    ]
                  }
                ]
              }
            },
            {
              "#operator": "IntermediateGroup",
              "aggregates": [
                "max([cover ((`scorebeast`.`score`)), cover ((`scorebeast`.`date`))])",
                "max([cover ((`scorebeast`.`score`)), cover ((`scorebeast`.`nick`))])",
                "max([cover ((`scorebeast`.`score`)), cover ((meta(`scorebeast`).`id`))])"
              ],
              "group_keys": [
                "cover ((`scorebeast`.`userId`))"
              ]
            },
            {
              "#operator": "FinalGroup",
              "aggregates": [
                "max([cover ((`scorebeast`.`score`)), cover ((`scorebeast`.`date`))])",
                "max([cover ((`scorebeast`.`score`)), cover ((`scorebeast`.`nick`))])",
                "max([cover ((`scorebeast`.`score`)), cover ((meta(`scorebeast`).`id`))])"
              ],
              "group_keys": [
                "cover ((`scorebeast`.`userId`))"
              ]
            },
            {
              "#operator": "Parallel",
              "~child": {
                "#operator": "Sequence",
                "~children": [
                  {
                    "#operator": "InitialProject",
                    "result_terms": [
                      {
                        "as": "score",
                        "expr": "(max([cover ((`scorebeast`.`score`)), cover ((meta(`scorebeast`).`id`))])[0])"
                      },
                      {
                        "as": "scoreId",
                        "expr": "(max([cover ((`scorebeast`.`score`)), cover ((meta(`scorebeast`).`id`))])[1])"
                      },
                      {
                        "as": "date",
                        "expr": "(max([cover ((`scorebeast`.`score`)), cover ((`scorebeast`.`date`))])[1])"
                      },
                      {
                        "as": "nick",
                        "expr": "(max([cover ((`scorebeast`.`score`)), cover ((`scorebeast`.`nick`))])[1])"
                      }
                    ]
                  }
                ]
              }
            }
          ]
        },
        {
          "#operator": "Order",
          "limit": "10",
          "offset": "100",
          "sort_terms": [
            {
              "desc": true,
              "expr": "`score`"
            },
            {
              "expr": "`date`"
            }
          ]
        },
        {
          "#operator": "Offset",
          "expr": "100"
        },
        {
          "#operator": "Limit",
          "expr": "10"
        },
        {
          "#operator": "FinalProject"
        }
      ]
    },
    "text": "select\n            max([score,meta(scorebeast).id])[0] as score,\n            max([score,meta(scorebeast).id])[1] as scoreId,\n            max([score,date])[1] as date,\n            max([score,nick])[1] as nick\n\tfrom scorebeast USE INDEX (idx_leaderbeast)\n\twhere score is not null and app = 'machrace_pc_alpha' and meta(scorebeast).id like 'score_%'\n\tgroup by userId\n\torder by score desc, date asc\n\tlimit 10 offset 100"
  }
]

#4

In cbq shell, can you try setting max_parallelism to your number of cores? For example

\set -max-parallelism 16


#5

@geraldss Looks like it went down to 2.6 - 2.8 sec.

This is a 4 core VM with 1GB of ram and minimum allocation of 256mb within cb server.

Also wanted to mention, I am getting identical performance when i remove use index from the query.

select
            max([score,meta(scorebeast).id])[0] as score,
            max([score,meta(scorebeast).id])[1] as scoreId,
            max([score,date])[1] as date,
            max([score,nick])[1] as nick
    from scorebeast
    where score is not null and app = 'machrace_pc_alpha' and meta(scorebeast).id like 'score_%'
    group by userId
    order by score desc, date asc
    limit 10 offset 800

#6

You can use EXPLAIN to see if the optimizer is making the same decision with and without the USE INDEX. In this case, it is the same decision, because the index is covering.