Flattening and listing all field names in a bucket


#1

Hi all,
I have a bucket containing various sorts of data. Each document exists of multiple JSON objects. Below, is the output for SELECT * FROM test statement. How may I list every distinct field name for each sub-document?
For the output below, I want to be able to get data1, data2, data3, data4, data5, data6

[
{
	"test": [
		{
			"data1": "data1",
			"data2": "data2",
			"data3": "data3",
			"data4": "data4"
		}
	]
},
{
	"test": [
		{
			"data1": "data1",
			"data2": "data2",
			"data3": "data3",
			"data4": "data4"
		}
	]
},
{
	"test": [
		{
			"data1": "data1",
			"data2": "data2",
			"data3": "data3",
			"data4": "data4",
			"data5": "data5"
		}
	]
},
{
	"test": [
		{
			"data1": "data1",
			"data2": "data2",
			"data3": "data3"
		}
	]
},
{
	"test": [
		{
			"data1": "data1"
		},
		{
			"data1": "data1",
			"data2": "data2"
		},
		{
			"data1": "data1",
			"data2": "data2",
			"data3": "data3",
			"data4": "data4",
			"data5": "data5",
			"data6": "data6"
		},
		{
			"data1": "data1",
			"data2": "data2",
			"data3": "data3",
			"data4": "data4"
		},
		{
			"data1": "data1",
			"data2": "data2",
			"data3": "data3"
		}
	]
}

]


#2
INSERT INTO default VALUES
("k01",{ "test": [ { "data1": "data1val", "data2": "data2val", "data3": "data3val", "data4k01": "data4k01val" } ] }),
("k02",{ "test": [ { "data1": "data1val", "data2": "data2val", "data3": "data3val", "data4k02": "data4k02val" } ] }),
("k03",{ "test": [ { "data1": "data1val", "data2": "data2val", "data3": "data3val", "data4k03": "data4k03val", "data5": "data5val" } ] }),
("k04",{ "test": [ { "data1": "data1val", "data2": "data2val", "data3": "data3val" , "data4k04": "data4k04val" } ] }),
("k05",{ "test": [ { "data1": "data1val" }, { "data1": "data1val", "data2": "data2val" }, { "data1": "data1val", "data2": "data2val", "data3": "data3val", "data4": "data4val", "data5": "data5val", "data6": "data6val" }, { "data1": "data1val", "data2": "data2val", "data3": "data3val", "data4": "data4val" }, { "data1": "data1val", "data2": "data2val", "data3": "data3val" }, { "data4k05": "data4k05val" } ] });

SELECT  OBJECT o.name:o.val FOR o IN  (ARRAY_FLATTEN(ARRAY OBJECT_PAIRS(v) FOR v IN d.test END,3)) AS data END FROM default d USE KEYS "k05";
SELECT  OBJECT o.name:o.val FOR o IN  ARRAY_FLATTEN(ag,4) END AS data FROM default d
LETTING ag = ARRAY_AGG(ARRAY OBJECT_PAIRS(v) FOR v IN d.test END);

If you need only DISTINCT field names

SELECT  ARRAY_DISTINCT(ARRAY_FLATTEN(ag,3)) AS data FROM default d
LETTING ag = ARRAY_AGG(ARRAY OBJECT_NAMES(v) FOR v IN d.test END);

List field names (NOT PATHs) nested object/nested arrays etc.

SELECT  ARRAY_DISTINCT(ARRAY_FLATTEN(ag,3)) AS data 
FROM default d LETTING ag = ARRAY_AGG(ARRAY v[0] FOR v IN PAIRS(d) END);