Text search on sub-objects/arrays in travel-sample data

I’ve been building up a useful index (index JSON at end of post) for some of the travel-sample bucket data including: review content, hotel location, cleanliness ratings and dates. It took me a while to get a couple of these figured out (thx @sreek), so thought I’d share it back to the forums for reference.


It includes custom date parser:
image

And note that cleanliness is numeric.
You can do a ton of interesting stuff with this index including date ranges:
{
“query”: {
“start”: “2014-10-12”,
“end”: “2015-10-09”,
“field”: “reviews.date”
}
}

geospatial bounding boxes:
{
“query”: {
“top_left”: [-2.235143, 53.482358],
“bottom_right”: [ 29, 40],
“field”: “geo”
}
}
And all types of facet summaries:

Term facet:
{
“query”: {
“query”:“horrible experience”
},
“facets”: {
“type”: {
“size”: 5,
“field”: “type”
}
}
}

Numeric ranges:
{
“query”: {
“query”:“rustic style”
},
“facets”: {
“reviews.ratings.cleanliness”: {
“size”: 5,
“field”: “reviews.ratings.cleanliness”,
“numeric_ranges”: [{“name”:“high”,“min”:3},{“name”:“low”,“max”:2}]
}
}
}

Date ranges:
{
“query”: {
“query”:“horrible experience”

      },
          "facets": {
             "reviews.date": {
                 "size": 5,
                 "field": "reviews.date",
                 "date_ranges": [{"name":"recent","start":"2013-01-01"},{"name":"old","start":"2001-01-01"}]
             }
        }
    } 

JSON Index definition:

{
 "name": "hotels",
 "type": "fulltext-index",
 "params": {
  "doc_config": {
   "docid_prefix_delim": "",
   "docid_regexp": "",
   "mode": "type_field",
   "type_field": "type"
  },
  "mapping": {
   "analysis": {
    "analyzers": {
     "letterAnalyzer": {
      "tokenizer": "letter",
      "type": "custom"
     }
    },
    "date_time_parsers": {
     "custom_date": {
      "layouts": [
       "2006-01-02 15:04:05 +0300"
      ],
      "type": "flexiblego"
     }
    }
   },
   "default_analyzer": "standard",
   "default_datetime_parser": "dateTimeOptional",
   "default_field": "_all",
   "default_mapping": {
    "dynamic": true,
    "enabled": false
   },
   "default_type": "_default",
   "docvalues_dynamic": true,
   "index_dynamic": true,
   "store_dynamic": false,
   "type_field": "_type",
   "types": {
    "hotel": {
     "dynamic": true,
     "enabled": true,
     "properties": {
      "reviews": {
       "dynamic": true,
       "enabled": true,
       "properties": {
        "ratings": {
         "dynamic": true,
         "enabled": true,
         "properties": {
          "Cleanliness": {
           "enabled": true,
           "dynamic": false,
           "fields": [
            {
             "docvalues": true,
             "include_in_all": true,
             "include_term_vectors": true,
             "index": true,
             "name": "cleanliness",
             "store": true,
             "type": "number"
            }
           ]
          }
         }
        },
        "content": {
         "enabled": true,
         "dynamic": false,
         "fields": [
          {
           "docvalues": true,
           "include_in_all": true,
           "include_term_vectors": true,
           "index": true,
           "name": "content",
           "store": true,
           "type": "text"
          }
         ]
        },
        "date": {
         "enabled": true,
         "dynamic": false,
         "fields": [
          {
           "date_format": "custom_date",
           "docvalues": true,
           "include_in_all": true,
           "include_term_vectors": true,
           "index": true,
           "name": "date",
           "store": true,
           "type": "datetime"
          }
         ]
        }
       }
      },
      "geo": {
       "enabled": true,
       "dynamic": false,
       "fields": [
        {
         "docvalues": true,
         "include_in_all": true,
         "include_term_vectors": true,
         "index": true,
         "name": "geo",
         "store": true,
         "type": "geopoint"
        }
       ]
      }
     }
    }
   }
  },
  "store": {
   "indexType": "scorch"
  }
 },
 "sourceType": "couchbase",
 "sourceName": "travel-sample",
 "sourceUUID": "",
 "sourceParams": {},
 "planParams": {
  "maxPartitionsPerPIndex": 171,
  "indexPartitions": 6,
  "numReplicas": 0
 },
 "uuid": ""
}