Slow Count Query Using N!QL

mustanish · February 19, 2019, 2:16pm

Hi,

I have 63k records in my bucket and I am trying to fetch counts based on type. Below is my query

SELECT COUNT(type) AS count, type FROM events USE INDEX(my-contribution USING GSI) WHERE user_id IS NOT MISSING AND type IN[‘added’, ‘edited’] GROUP BY type;

Running this query takes more than 500ms.

Explain for the same is

{
  "plan": {
    "#operator": "Sequence",
    "~children": [
      {
        "#operator": "IndexScan3",
        "covers": [
          "cover ((`events`.`user_id`))",
          "cover ((`events`.`type`))",
          "cover ((meta(`events`).`id`))"
        ],
        "index": "my-contribution",
        "index_id": "7e05677ff098f217",
        "index_projection": {
          "entry_keys": [
            0,
            1
          ]
        },
        "keyspace": "events",
        "namespace": "default",
        "spans": [
          {
            "exact": true,
            "range": [
              {
                "inclusion": 1,
                "low": "null"
              },
              {
                "high": "\"added\"",
                "inclusion": 3,
                "low": "\"added\""
              }
            ]
          },
          {
            "exact": true,
            "range": [
              {
                "inclusion": 1,
                "low": "null"
              },
              {
                "high": "\"edited\"",
                "inclusion": 3,
                "low": "\"edited\""
              }
            ]
          }
        ],
        "using": "gsi"
      },
      {
        "#operator": "Parallel",
        "~child": {
          "#operator": "Sequence",
          "~children": [
            {
              "#operator": "Filter",
              "condition": "((cover ((`events`.`user_id`)) is not missing) and (cover ((`events`.`type`)) in [\"added\", \"edited\"]))"
            },
            {
              "#operator": "InitialGroup",
              "aggregates": [
                "count(cover ((`events`.`type`)))"
              ],
              "group_keys": [
                "cover ((`events`.`type`))"
              ]
            }
          ]
        }
      },
      {
        "#operator": "IntermediateGroup",
        "aggregates": [
          "count(cover ((`events`.`type`)))"
        ],
        "group_keys": [
          "cover ((`events`.`type`))"
        ]
      },
      {
        "#operator": "FinalGroup",
        "aggregates": [
          "count(cover ((`events`.`type`)))"
        ],
        "group_keys": [
          "cover ((`events`.`type`))"
        ]
      },
      {
        "#operator": "Parallel",
        "~child": {
          "#operator": "Sequence",
          "~children": [
            {
              "#operator": "InitialProject",
              "result_terms": [
                {
                  "as": "count",
                  "expr": "count(cover ((`events`.`type`)))"
                },
                {
                  "expr": "cover ((`events`.`type`))"
                }
              ]
            },
            {
              "#operator": "FinalProject"
            }
          ]
        }
      }
    ]
  },
  "text": "SELECT COUNT(type) AS count, type FROM events USE INDEX(`my-contribution` USING GSI) WHERE user_id IS NOT MISSING AND \ntype IN['added', 'edited'] GROUP BY type;"
}

What should I do improve the performance of this query

vsr1 · February 19, 2019, 2:21pm

Use this index

CREATE INDEX ix1 ON events (type, user_id);

Follow https://blog.couchbase.com/understanding-index-grouping-aggregation-couchbase-n1ql-query/

mustanish · February 20, 2019, 6:21am

I am using Community Edition 6.0.0 build 1693 on my local machine and on production I am using Community Edition 5.0.1 build 5003.

I created above INDEX on both servers but I do not see any difference in performance. It is still taking around 500ms. I even imported demo travel-sample bucket, created mentioned index and ran mentioned query , The blog claims that it will take somewhere around 13ms but it is taking more than 250ms.

Explain for my query using index suggested by you

{
  "plan": {
    "#operator": "Sequence",
    "~children": [
      {
        "#operator": "IndexScan3",
        "covers": [
          "cover ((`events`.`type`))",
          "cover ((`events`.`user_id`))",
          "cover ((meta(`events`).`id`))"
        ],
        "index": "ix1",
        "index_id": "c22d2a2a7cc6fbae",
        "index_projection": {
          "entry_keys": [
            0,
            1
          ]
        },
        "keyspace": "events",
        "namespace": "default",
        "spans": [
          {
            "exact": true,
            "range": [
              {
                "high": "\"added\"",
                "inclusion": 3,
                "low": "\"added\""
              },
              {
                "inclusion": 1,
                "low": "null"
              }
            ]
          },
          {
            "exact": true,
            "range": [
              {
                "high": "\"edited\"",
                "inclusion": 3,
                "low": "\"edited\""
              },
              {
                "inclusion": 1,
                "low": "null"
              }
            ]
          }
        ],
        "using": "gsi"
      },
      {
        "#operator": "Parallel",
        "~child": {
          "#operator": "Sequence",
          "~children": [
            {
              "#operator": "Filter",
              "condition": "((cover ((`events`.`user_id`)) is not missing) and (cover ((`events`.`type`)) in [\"added\", \"edited\"]))"
            },
            {
              "#operator": "InitialGroup",
              "aggregates": [
                "count(cover ((`events`.`type`)))"
              ],
              "group_keys": [
                "cover ((`events`.`type`))"
              ]
            }
          ]
        }
      },
      {
        "#operator": "IntermediateGroup",
        "aggregates": [
          "count(cover ((`events`.`type`)))"
        ],
        "group_keys": [
          "cover ((`events`.`type`))"
        ]
      },
      {
        "#operator": "FinalGroup",
        "aggregates": [
          "count(cover ((`events`.`type`)))"
        ],
        "group_keys": [
          "cover ((`events`.`type`))"
        ]
      },
      {
        "#operator": "Parallel",
        "~child": {
          "#operator": "Sequence",
          "~children": [
            {
              "#operator": "InitialProject",
              "result_terms": [
                {
                  "as": "count",
                  "expr": "count(cover ((`events`.`type`)))"
                },
                {
                  "expr": "cover ((`events`.`type`))"
                }
              ]
            },
            {
              "#operator": "FinalProject"
            }
          ]
        }
      }
    ]
  },
  "text": "SELECT COUNT(type) AS count, type FROM events USE INDEX(`ix1` USING GSI) WHERE user_id IS NOT MISSING AND \ntype IN['added', 'edited'] GROUP BY type;"
}

Query Which I ran from blog

SELECT t.type, COUNT(type) AS cnt FROM travel-sample AS t WHERE t.type IS NOT NULL GROUP BY t.type;

Index I used from blog

CREATE INDEX idx_ts_type_country_city ON travel-sample (type, country, city);

Explain for the query

{
  "plan": {
    "#operator": "Sequence",
    "~children": [
      {
        "#operator": "IndexScan3",
        "as": "t",
        "covers": [
          "cover ((`t`.`type`))",
          "cover ((meta(`t`).`id`))"
        ],
        "index": "def_type",
        "index_id": "e6315299f80e4c8f",
        "index_projection": {
          "entry_keys": [
            0
          ]
        },
        "keyspace": "travel-sample",
        "namespace": "default",
        "spans": [
          {
            "exact": true,
            "range": [
              {
                "inclusion": 0,
                "low": "null"
              }
            ]
          }
        ],
        "using": "gsi"
      },
      {
        "#operator": "Parallel",
        "~child": {
          "#operator": "Sequence",
          "~children": [
            {
              "#operator": "Filter",
              "condition": "(cover ((`t`.`type`)) is not null)"
            },
            {
              "#operator": "InitialGroup",
              "aggregates": [
                "count(cover ((`t`.`type`)))"
              ],
              "group_keys": [
                "cover ((`t`.`type`))"
              ]
            }
          ]
        }
      },
      {
        "#operator": "IntermediateGroup",
        "aggregates": [
          "count(cover ((`t`.`type`)))"
        ],
        "group_keys": [
          "cover ((`t`.`type`))"
        ]
      },
      {
        "#operator": "FinalGroup",
        "aggregates": [
          "count(cover ((`t`.`type`)))"
        ],
        "group_keys": [
          "cover ((`t`.`type`))"
        ]
      },
      {
        "#operator": "Parallel",
        "~child": {
          "#operator": "Sequence",
          "~children": [
            {
              "#operator": "InitialProject",
              "result_terms": [
                {
                  "expr": "cover ((`t`.`type`))"
                },
                {
                  "as": "cnt",
                  "expr": "count(cover ((`t`.`type`)))"
                }
              ]
            },
            {
              "#operator": "FinalProject"
            }
          ]
        }
      }
    ]
  },
  "text": "SELECT t.type, COUNT(type) AS cnt FROM `travel-sample` AS t WHERE t.type IS NOT NULL GROUP BY t.type;"
}

vsr1 · February 20, 2019, 6:26am

Few line above Performance section mentioned this. Index grouping and aggregation is supported in Enterprise Edition only.

mustanish · February 20, 2019, 6:38am

So there is no way to improve performance on community edition. Thanks for your help