Appropriate index for query with unnest

I have this query to find nearest points from an array of points (geo search using “bounding box” concept):

SELECT d.type,
       d.name,
       d.`key`,
       p.*,
       (ACOS(SIN( RADIANS(55.55)) * SIN(RADIANS(p.lat)) + COS( RADIANS(55.55 )) * COS(RADIANS(p.lat)) * COS(RADIANS(p.lon) - RADIANS( 11.25))) * 6371) distance
FROM data d
UNNEST points p
WHERE d.type IN ['Lake','Stream','PutTakeLake','CoastLocalArea','SeaLocalArea']
    AND p.lat BETWEEN 55.5 AND 55.6
    AND p.lon BETWEEN 11.0 AND 11.5
ORDER BY distance ASC
LIMIT 100

It works - but I would like to speed it up.
The data structure looks like this:

{
  "key": "8642",
  "name": "The lake",
  "points": [
    {
      "lat": 55.5747937187,
      "lon": 11.2856648546
    },
    {
      "lat": 55.5930342762,
      "lon": 11.2790527292
    },
   :
   :
    {
      "lat": 55.5566764979,
      "lon": 11.3007332022
    }
  ],
  "type": "Lake"
}

I have looked at various samples for building the index and have come up with:

CREATE INDEX `def_nearest1` ON `data`(`type`,`name`,`key`,(all (array [(`p`.`lat`), (`p`.`lon`)] for `p` in `points` end))) WHERE (`type` in ["Lake", "Stream", "PutTakeLake", "CoastLocalArea", "SeaLocalArea"])

The query uses the index according to this “explains” - but is slow, so I obviously haven’t found the right way to do this…

{
    "#operator": "Sequence",
    "~children": [
        {
            "#operator": "Sequence",
            "~children": [
                {
                    "#operator": "DistinctScan",
                    "scan": {
                        "#operator": "IndexScan3",
                        "as": "d",
                        "index": "def_nearest1",
                        "index_id": "ecb344c87d7ff413",
                        "index_projection": {
                            "primary_key": true
                        },
                        "keyspace": "data",
                        "namespace": "default",
                        "spans": [
                            {
                                "exact": true,
                                "range": [
                                    {
                                        "high": "\"CoastLocalArea\"",
                                        "inclusion": 3,
                                        "low": "\"CoastLocalArea\""
                                    }
                                ]
                            },
                            {
                                "exact": true,
                                "range": [
                                    {
                                        "high": "\"Lake\"",
                                        "inclusion": 3,
                                        "low": "\"Lake\""
                                    }
                                ]
                            },
                            {
                                "exact": true,
                                "range": [
                                    {
                                        "high": "\"PutTakeLake\"",
                                        "inclusion": 3,
                                        "low": "\"PutTakeLake\""
                                    }
                                ]
                            },
                            {
                                "exact": true,
                                "range": [
                                    {
                                        "high": "\"SeaLocalArea\"",
                                        "inclusion": 3,
                                        "low": "\"SeaLocalArea\""
                                    }
                                ]
                            },
                            {
                                "exact": true,
                                "range": [
                                    {
                                        "high": "\"Stream\"",
                                        "inclusion": 3,
                                        "low": "\"Stream\""
                                    }
                                ]
                            }
                        ],
                        "using": "gsi"
                    }
                },
                {
                    "#operator": "Fetch",
                    "as": "d",
                    "keyspace": "data",
                    "namespace": "default"
                },
                {
                    "#operator": "Parallel",
                    "~child": {
                        "#operator": "Sequence",
                        "~children": [
                            {
                                "#operator": "Unnest",
                                "as": "p",
                                "expr": "(`d`.`points`)"
                            }
                        ]
                    }
                },
                {
                    "#operator": "Parallel",
                    "~child": {
                        "#operator": "Sequence",
                        "~children": [
                            {
                                "#operator": "Filter",
                                "condition": "((((`d`.`type`) in [\"Lake\", \"Stream\", \"PutTakeLake\", \"CoastLocalArea\", \"SeaLocalArea\"]) and ((`p`.`lat`) between 55.5 and 55.6)) and ((`p`.`lon`) between 11 and 11.5))"
                            },
                            {
                                "#operator": "InitialProject",
                                "result_terms": [
                                    {
                                        "expr": "(`d`.`type`)"
                                    },
                                    {
                                        "expr": "(`d`.`name`)"
                                    },
                                    {
                                        "expr": "(`d`.`key`)"
                                    },
                                    {
                                        "expr": "`p`",
                                        "star": true
                                    },
                                    {
                                        "as": "distance",
                                        "expr": "(acos(((sin(radians(55.55)) * sin(radians((`p`.`lat`)))) + ((cos(radians(55.55)) * cos(radians((`p`.`lat`)))) * cos((radians((`p`.`lon`)) - radians(11.25)))))) * 6371)"
                                    }
                                ]
                            }
                        ]
                    }
                }
            ]
        },
        {
            "#operator": "Order",
            "limit": "100",
            "sort_terms": [
                {
                    "expr": "`distance`"
                }
            ]
        },
        {
            "#operator": "Limit",
            "expr": "100"
        },
        {
            "#operator": "FinalProject"
        }
    ]
}

I’m trying this on: Community Edition 6.6.0 build 7909

Try this. Query should cover with index. You need to use [p.lat, p.lon] as pair, include array index key as leading key

CREATE INDEX `ix1` ON `data`((all (array [(`p`.`lat`), (`p`.`lon`)] for `p` in `points` end)), `type`,`name`,`key`) WHERE (`type` in ["Lake", "Stream", "PutTakeLake", "CoastLocalArea", "SeaLocalArea"]);

SELECT d.type,
       d.name,
       d.`key`,
       lat,
       lon,
       (ACOS(SIN( RADIANS(55.55)) * SIN(RADIANS(lat)) + COS( RADIANS(55.55 )) * COS(RADIANS(lat)) * COS(RADIANS(lon) - RADIANS( 11.25))) * 6371) distance
FROM data d
UNNEST d.points p
LET  fltr = [p.lat, p.lon], lat = fltr[0], lon = fltr[1]
WHERE d.type IN ['Lake','Stream','PutTakeLake','CoastLocalArea','SeaLocalArea']
    AND lat BETWEEN 55.5 AND 55.6 AND lon BETWEEN 11.0 AND 11.5
    AND fltr >= [55.5, 11.0] AND fltr <  [SUCCESSOR(55.6), 11.5]
ORDER BY distance ASC
LIMIT 100

FYI: How to Speed Up Spatial Search in Couchbase N1QL - DZone Performance

1 Like

Bingo!

Now it runs in 40-150 ms which is fine.

First, I thought the “trick” was to create separate “fields” for lat and lon… I had the feeling I was missing something like that - but couldn’t figure it out. But playing a little more with it I found that the order of the fields in the index alone improved my own query by a factor 2.

Thank you! - and again I’m positively surprised with the possibilities in Couchbase although I cannot control it all myself.

Have a nice weekend!

/John

PS. I had looked at the awesome article you refer to - and that gave me the basis of how to get to the query . I just had to unnest the points as I have many points and not just one POI point as in the samples I could find.

… just a quick question. I tried to find some info about the SUCCESSOR()method in the docs… but I couldn’t? The search method didn’t find it and trying to look through all the functions and search for the word didn’t give any result.

Could you perhaps give me a pointer or put a couple of words on what it does?

Thanks!

Successor give next order of value.

https://docs.couchbase.com/server/current/n1ql/n1ql-language-reference/datatypes.html#collation

Within the data types, integer adds +1, float64 src/math/nextafter.go - The Go Programming Language
string next ascii string comes after.

SELECT SUCCESSOR(1), SUCCESSOR(1.1), SUCCESSOR("abc");
{
        "$1": 2,
        "$2": 1.1000000000000003,
        "$3": "abc "
    }

The following two links have some details why need successor. These are complex.

1 Like

In your case you have both elements you can use the following. fltr is used for IndexScan any false positives (this predicate must be such that it will not give false negatives to avoid wrong results), eliminated by lat, lon predicate by post indexScan.

 AND lat BETWEEN 55.5 AND 55.6 AND lon BETWEEN 11.0 AND 11.5
AND fltr BETWEEN [55.5, 11.0] AND [55.6, 11.5]
1 Like

Great, then I think I understand it! But good to have a heads up

Just a quick question. I have a local test server that is only version 6.0 (as it runs CentOS 6) - where this query doesn’t work. Could it simply be that this older version does not handle the

LET  fltr = [p.lat, p.lon], lat = fltr[0], lon = fltr[1]

and usage in the Where clause?
I think I have the index and the query right… I’m adding index creation to code so that’s what I test locally prior to modifying the service it should call :innocent:

Case cade let might supported in 6.5+

Try this pre 6.5

LET fltr = [p.lat, p.lon], lat = [p.lat, p.lon][0], lon = [p.lat, p.lon][1]

LET variable in WHERE clause during plan it will be inline and decide plan.
LET syntax only allows AFTER FROM-JOIN and before WHERE. If never used in WHERE it will process after WHERE (optimization to avoid compute and through away)

Thanks. It works… I had moved the distance calculation to the LET line as well - but it doesn’t show up in the result but that is probably due to the same issue.

I suppose I need to find some time to update my local dev. server :slight_smile:

Same issue. I would recommend leave distance in projection to avoid unnecessary calculation. The above optimization is all or nothing (i.e even one let variable used in WHERE it will compute before WHERE).

1 Like

I found an unexpected side effect of narrowing down my search scope. If I limit the types to less than the entire list, e.g.

SELECT d.type,
       d.name,
       d.`key`,
       lat,
       lon,
       distance
FROM data d
UNNEST d.points p
LET fltr = [p.lat, p.lon],
lat = [p.lat, p.lon][0], lon = [p.lat, p.lon][1],
distance = (ACOS(SIN( RADIANS(55.55)) * SIN(RADIANS([p.lat, p.lon][0])) + COS( RADIANS(55.55 )) * COS(RADIANS([p.lat, p.lon][0])) * COS(RADIANS([p.lat, p.lon][1]) - RADIANS( 11.25))) * 6371)
WHERE d.type IN ['Lake','Stream']
    AND lat BETWEEN 55.5 AND 55.6
    AND lon BETWEEN 11.0 AND 11.5
    AND fltr BETWEEN [55.5, 11.0] AND [55.6, 11.5]
    AND distance < 3
ORDER BY distance ASC
LIMIT 100

Instead of the full list:

WHERE d.type IN ['Lake','Stream','PutTakeLake','CoastLocalArea','SeaLocalArea']

then the query doesn’t use the newly created index but falls back to index defined on type and scans that - resulting in a slower query. Is there a way I can define the index and query to overcome this issue? Otherwise, I can always filter the data out using code - but it seems kind of wrong :innocent: :slight_smile:

The explain looks like this with the narrowed search:

{
  "plan": {
    "#operator": "Sequence",
    "~children": [
      {
        "#operator": "Sequence",
        "~children": [
          {
            "#operator": "IndexScan3",
            "as": "d",
            "index": "def_type",
            "index_id": "9994ebedc9163379",
            "index_projection": {
              "primary_key": true
            },
            "keyspace": "data",
            "namespace": "default",
            "spans": [
              {
                "exact": true,
                "range": [
                  {
                    "high": "\"Lake\"",
                    "inclusion": 3,
                    "low": "\"Lake\""
                  }
                ]
              },
              {
                "exact": true,
                "range": [
                  {
                    "high": "\"Stream\"",
                    "inclusion": 3,
                    "low": "\"Stream\""
                  }
                ]
              }
            ],
            "using": "gsi"
          },
          {
            "#operator": "Fetch",
            "as": "d",
            "keyspace": "data",
            "namespace": "default"
          },
          {
            "#operator": "Parallel",
            "~child": {
              "#operator": "Sequence",
              "~children": [
                {
                  "#operator": "Unnest",
                  "as": "p",
                  "expr": "(`d`.`points`)"
                }
              ]
            }
          },
          {
            "#operator": "Parallel",
            "~child": {
              "#operator": "Sequence",
              "~children": [
                {
                  "#operator": "Let",
                  "bindings": [
                    {
                      "expr": "[(`p`.`lat`), (`p`.`lon`)]",
                      "var": "fltr"
                    },
                    {
                      "expr": "([(`p`.`lat`), (`p`.`lon`)][0])",
                      "var": "lat"
                    },
                    {
                      "expr": "([(`p`.`lat`), (`p`.`lon`)][1])",
                      "var": "lon"
                    },
                    {
                      "expr": "(acos(((sin(radians(55.55)) * sin(radians(([(`p`.`lat`), (`p`.`lon`)][0])))) + ((cos(radians(55.55)) * cos(radians(([(`p`.`lat`), (`p`.`lon`)][0])))) * cos((radians(([(`p`.`lat`), (`p`.`lon`)][1])) - radians(11.25)))))) * 6371)",
                      "var": "distance"
                    }
                  ]
                },
                {
                  "#operator": "Filter",
                  "condition": "((((((`d`.`type`) in [\"Lake\", \"Stream\"]) and (`lat` between 55.5 and 55.6)) and (`lon` between 11 and 11.5)) and (`fltr` between [55.5, 11] and [55.6, 11.5])) and (`distance` < 3))"
                },
                {
                  "#operator": "InitialProject",
                  "result_terms": [
                    {
                      "expr": "(`d`.`type`)"
                    },
                    {
                      "expr": "(`d`.`name`)"
                    },
                    {
                      "expr": "(`d`.`key`)"
                    },
                    {
                      "expr": "`lat`"
                    },
                    {
                      "expr": "`lon`"
                    },
                    {
                      "expr": "`distance`"
                    }
                  ]
                }
              ]
            }
          }
        ]
      },
      {
        "#operator": "Order",
        "limit": "100",
        "sort_terms": [
          {
            "expr": "`distance`"
          }
        ]
      },
      {
        "#operator": "Limit",
        "expr": "100"
      },
      {
        "#operator": "FinalProject"
      }
    ]
  },
  "text": "SELECT d.type,\n       d.name,\n       d.`key`,\n       lat,\n       lon,\n        distance\nFROM data d\nUNNEST d.points p\nLET fltr = [p.lat, p.lon],\nlat = [p.lat, p.lon][0], lon = [p.lat, p.lon][1],\ndistance = (ACOS(SIN( RADIANS(55.55)) * SIN(RADIANS([p.lat, p.lon][0])) + COS( RADIANS(55.55 )) * COS(RADIANS([p.lat, p.lon][0])) * COS(RADIANS([p.lat, p.lon][1]) - RADIANS( 11.25))) * 6371)\nWHERE d.type IN ['Lake','Stream']\n    AND lat BETWEEN 55.5 AND 55.6\n    AND lon BETWEEN 11.0 AND 11.5\n    AND fltr BETWEEN [55.5, 11.0] AND [55.6, 11.5]\n    AND distance < 3\nORDER BY distance ASC\nLIMIT 100"
}

@jda ,

It should work in 6.0.1+ (MB-32306 ), If not Add double predicate

One index Selection (exactly same as index where clause and other subset what you need)

WHERE d.type IN ['Lake','Stream','PutTakeLake','CoastLocalArea','SeaLocalArea']
                    AND d.type IN ['Lake','Stream']

Ah! Neat way to “cheat” the index :+1: :grin:

Thanks!