How to best investigate what index to create

n1ql

#1

I have a bucket (named data) and want to find out how to define index(es) to optimize these two queries:

SELECT COUNT(1) AS size FROM data WHERE type='FishingTrip' AND (SUBSTR(date,0,10) > SUBSTR(date_add_str(now_str(), -300, 'day'),0,10))

and

SELECT SUM(count) as count FROM data WHERE type='Catch' AND SUBSTR(date,0,10) > SUBSTR(date_add_str(now_str(), -300, 'day'),0,10)

I already have an index for type:

CREATE INDEX `def_type` ON `data`(`type`)

and I have tried (by guessing…) to create one over the type, date, and count:

CREATE INDEX `def_type_date_count` ON `data`(`type`,`date`,`count`)

… but is this the best way of doing it? I can see that the “FishingTrip” query runs around 175ms whereas the “Catch” query runs around 15ms… - although I would have thought that the later was the “heaviest” due to the SUM() function. The count field does not exist on the FishingTrip - could that be a reason?

How do I find out how to best translate the “Explain” to the index I need to create?

For reference the output of Explain for FishingTrip is:

{
  "plan": {
    "#operator": "Sequence",
    "~children": [
      {
        "#operator": "IndexScan2",
        "covers": [
          "cover ((`data`.`type`))",
          "cover ((`data`.`date`))",
          "cover ((`data`.`count`))",
          "cover ((meta(`data`).`id`))"
        ],
        "index": "def_type_date_count",
        "index_id": "9f49efa27e901715",
        "index_projection": {
          "entry_keys": [
            0,
            1
          ]
        },
        "keyspace": "data",
        "namespace": "default",
        "spans": [
          {
            "range": [
              {
                "high": "\"FishingTrip\"",
                "inclusion": 3,
                "low": "\"FishingTrip\""
              },
              {
                "inclusion": 0,
                "low": "null"
              }
            ]
          }
        ],
        "using": "gsi"
      },
      {
        "#operator": "Parallel",
        "~child": {
          "#operator": "Sequence",
          "~children": [
            {
              "#operator": "Filter",
              "condition": "((cover ((`data`.`type`)) = \"FishingTrip\") and (substr0(date_add_str(now_str(), (-300), \"day\"), 0, 10) < substr0(cover ((`data`.`date`)), 0, 10)))"
            },
            {
              "#operator": "InitialGroup",
              "aggregates": [
                "count(1)"
              ],
              "group_keys": []
            }
          ]
        }
      },
      {
        "#operator": "IntermediateGroup",
        "aggregates": [
          "count(1)"
        ],
        "group_keys": []
      },
      {
        "#operator": "FinalGroup",
        "aggregates": [
          "count(1)"
        ],
        "group_keys": []
      },
      {
        "#operator": "Parallel",
        "~child": {
          "#operator": "Sequence",
          "~children": [
            {
              "#operator": "InitialProject",
              "result_terms": [
                {
                  "as": "size",
                  "expr": "count(1)"
                }
              ]
            },
            {
              "#operator": "FinalProject"
            }
          ]
        }
      }
    ]
  },
  "text": "SELECT COUNT(1) AS size FROM data WHERE type='FishingTrip' AND (SUBSTR(date,0,10) > SUBSTR(date_add_str(now_str(), -300, 'day'),0,10))"
}

and for Catch it is:

{
  "plan": {
    "#operator": "Sequence",
    "~children": [
      {
        "#operator": "IndexScan2",
        "covers": [
          "cover ((`data`.`type`))",
          "cover ((`data`.`date`))",
          "cover ((`data`.`count`))",
          "cover ((meta(`data`).`id`))"
        ],
        "index": "def_type_date_count",
        "index_id": "9f49efa27e901715",
        "index_projection": {
          "entry_keys": [
            0,
            1,
            2
          ]
        },
        "keyspace": "data",
        "namespace": "default",
        "spans": [
          {
            "range": [
              {
                "high": "\"Catch\"",
                "inclusion": 3,
                "low": "\"Catch\""
              },
              {
                "inclusion": 0,
                "low": "null"
              },
              {
                "inclusion": 0,
                "low": "null"
              }
            ]
          }
        ],
        "using": "gsi"
      },
      {
        "#operator": "Parallel",
        "~child": {
          "#operator": "Sequence",
          "~children": [
            {
              "#operator": "Filter",
              "condition": "((cover ((`data`.`type`)) = \"Catch\") and (substr0(date_add_str(now_str(), (-300), \"day\"), 0, 10) < substr0(cover ((`data`.`date`)), 0, 10)))"
            },
            {
              "#operator": "InitialGroup",
              "aggregates": [
                "sum(cover ((`data`.`count`)))"
              ],
              "group_keys": []
            }
          ]
        }
      },
      {
        "#operator": "IntermediateGroup",
        "aggregates": [
          "sum(cover ((`data`.`count`)))"
        ],
        "group_keys": []
      },
      {
        "#operator": "FinalGroup",
        "aggregates": [
          "sum(cover ((`data`.`count`)))"
        ],
        "group_keys": []
      },
      {
        "#operator": "Parallel",
        "~child": {
          "#operator": "Sequence",
          "~children": [
            {
              "#operator": "InitialProject",
              "result_terms": [
                {
                  "as": "count",
                  "expr": "sum(cover ((`data`.`count`)))"
                }
              ]
            },
            {
              "#operator": "FinalProject"
            }
          ]
        }
      }
    ]
  },
  "text": "SELECT SUM(count) AS count FROM data WHERE type='Catch' AND (SUBSTR(date,0,10) > SUBSTR(date_add_str(now_str(), -300, 'day'),0,10))"
}

#2

CREATE INDEX ix1 ON data(type,SUBSTR(date,0,10), count);

Also check out Index Aggregation in A Guide to N1QL In couchbase 5.5