COUNT Condition: Data Format Discussion

Query:

cbq> SELECT {SUBSTR(t, 0, 10) : {
   >         did : {
   >                 "foull_entries" : COUNT(NULLIF(ARRAY_LENGTH(bcn), 0)),
   >                 "total_entries" : COUNT(*),
   >                 "nulld_entries" : COUNT(CASE WHEN ARRAY_LENGTH(bcn)==0 THEN 1 ELSE NULL END)
   >         }
   > }}
   > FROM sync_gateway
   > WHERE type="bcn_scan" AND SUBSTR(t, 0, 10) BETWEEN "2016-11-01" AND "2016-12-15"
   > GROUP BY SUBSTR(t, 0, 10), did
   > ORDER BY SUBSTR(t, 0, 10) ASC, did ASC;

Result:

{
    "2016-12-14": {
        "5fe03c918f85052bcd4b4094f1fae051": {
            "foull_entries": 0,
            "nulld_entries": 426,
            "total_entries": 426
        }
    }
},
{
    "2016-12-14": {
        "8dadc3de40bda851abdb4982eebb21fc": {
            "foull_entries": 320,
            "nulld_entries": 0,
            "total_entries": 320
        }
    }
},
{
    "2016-12-14": {
        "8f2d11c831dc6eaa1144c9db5409437e": {
            "foull_entries": 0,
            "nulld_entries": 2880,
            "total_entries": 2880
        }
    }
},
{
    "2016-12-14": {
        "abc7ff93c3f3c4bcde0baa28f2fa688c": {
            "foull_entries": 1425,
            "nulld_entries": 216,
            "total_entries": 1641
        }
    }
},
{
    "2016-12-14": {
        "c6a995f8d8f360d21dcc2c5f5f2a7fd4": {
            "foull_entries": 0,
            "nulld_entries": 1321,
            "total_entries": 1321
        }
    }
},
{
    "2016-12-15": {
        "5fe03c918f85052bcd4b4094f1fae051": {
            "foull_entries": 0,
            "nulld_entries": 673,
            "total_entries": 673
        }
    }
},
{
    "2016-12-15": {
        "8dadc3de40bda851abdb4982eebb21fc": {
            "foull_entries": 247,
            "nulld_entries": 0,
            "total_entries": 247
        }
    }
},
{
    "2016-12-15": {
        "8f2d11c831dc6eaa1144c9db5409437e": {
            "foull_entries": 0,
            "nulld_entries": 1485,
            "total_entries": 1485
        }
    }
},
{
    "2016-12-15": {
        "abc7ff93c3f3c4bcde0baa28f2fa688c": {
            "foull_entries": 924,
            "nulld_entries": 0,
            "total_entries": 924
        }
    }
}

Desired:

{
    "2016-12-14": {[
        "5fe03c918f85052bcd4b4094f1fae051": {
            "foull_entries": 0,
            "nulld_entries": 426,
            "total_entries": 426
        },
        "8dadc3de40bda851abdb4982eebb21fc": {
            "foull_entries": 320,
            "nulld_entries": 0,
            "total_entries": 320
        },
        "8f2d11c831dc6eaa1144c9db5409437e": {
            "foull_entries": 0,
            "nulld_entries": 2880,
            "total_entries": 2880
        },
        "abc7ff93c3f3c4bcde0baa28f2fa688c": {
            "foull_entries": 1425,
            "nulld_entries": 216,
            "total_entries": 1641
        },"c6a995f8d8f360d21dcc2c5f5f2a7fd4": {
            "foull_entries": 0,
            "nulld_entries": 1321,
            "total_entries": 1321
        }
    ]}
},
{
    "2016-12-15": {[
        "5fe03c918f85052bcd4b4094f1fae051": {
            "foull_entries": 0,
            "nulld_entries": 673,
            "total_entries": 673
        },
        "8dadc3de40bda851abdb4982eebb21fc": {
            "foull_entries": 247,
            "nulld_entries": 0,
            "total_entries": 247
        },
        "8f2d11c831dc6eaa1144c9db5409437e": {
            "foull_entries": 0,
            "nulld_entries": 1485,
            "total_entries": 1485
        },
        "abc7ff93c3f3c4bcde0baa28f2fa688c": {
            "foull_entries": 924,
            "nulld_entries": 0,
            "total_entries": 924
        }
    ]}
}
1 Like

Try this.

SELECT RAW
    {
        SUBSTR(t, 0, 10) :
      	ARRAY_SORT( ARRAY_AGG( {
                did : {
                    "foull_entries" : COUNT(NULLIF(ARRAY_LENGTH(bcn), 0)),
                    "total_entries" : COUNT(*),
                    "nulld_entries" : COUNT(CASE WHEN ARRAY_LENGTH(bcn)==0 THEN 1 ELSE NULL END)
                }
	    } ) )   
    }
FROM sync_gateway
WHERE
    type="bcn_scan"
    AND SUBSTR(t, 0, 10) BETWEEN $start AND $end
    AND did IS NOT NULL
GROUP BY SUBSTR(t, 0, 10)
ORDER BY SUBSTR(t, 0, 10) ASC;

Line by line:

cbq> SELECT RAW
   >     {
   >         SUBSTR(t, 0, 10) :
   >         ARRAY_SORT( ARRAY_AGG( {
   >                 did : {
   >                     "foull_entries" : COUNT(NULLIF(ARRAY_LENGTH(bcn), 0)),
   >                     "total_entries" : COUNT(*),
   >                     "nulld_entries" : COUNT(CASE WHEN ARRAY_LENGTH(bcn)==0 THEN 1 ELSE NULL END)
   >                 }
   >         } ) )
   >     }
   > FROM sync_gateway
   > WHERE
   >     type="bcn_scan"
   >     AND SUBSTR(t, 0, 10) BETWEEN "2016-11-01" AND "2016-12-15"
   >     AND did IS NOT NULL
   > GROUP BY SUBSTR(t, 0, 10)
   > ORDER BY SUBSTR(t, 0, 10) ASC;
{
    "requestID": "656bb5a4-3470-43b6-ba1c-fcd0fdf32555",
    "signature": "object",
    "results": [
    ],
    "errors": [
        {
            "code": 5020,
            "msg": "Error updating initial GROUP value. - cause: Aggregate count(nullif(array_length((`sync_gateway`.`bcn`)), 0)) not found."
        }
    ],
    "status": "stopped",
    "metrics": {
        "elapsedTime": "20.98872ms",
        "executionTime": "20.95454ms",
        "resultCount": 0,
        "resultSize": 0,
        "errorCount": 1
    }
}
cbq>

My bad. Try this:

SELECT RAW
    {
      	date :
      	ARRAY_SORT( ARRAY_AGG( counts ) )
    }
FROM (
    SELECT
        SUBSTR(t, 0, 10) AS date,
        {
            did : {
                "foull_entries" : COUNT(NULLIF(ARRAY_LENGTH(bcn), 0)),
                "total_entries" : COUNT(*),
                "nulld_entries" : COUNT(CASE WHEN ARRAY_LENGTH(bcn)==0 THEN 1 ELSE NULL END)
            }
        } AS counts
    FROM sync_gateway
    WHERE
        type="bcn_scan"
        AND SUBSTR(t, 0, 10) BETWEEN $start AND $end
        AND did IS NOT NULL
    GROUP BY SUBSTR(t, 0, 10), did
) AS s
GROUP BY date
ORDER BY date;

Query is exactly what I’ve wanted and also indexable by bcn_scan-date-did

It’s on the 2m0.714s sec - it’s close to the "executionTime": "2m3.604919887s" error from COUNT condition: INDEX Speed-up improvements

Should I be worried? (I remind you that bucket has ~1.5M entries)

Set max_parallelism to 2x # of cores.

Set pipeline_batch and pipeline_cap to 64K or 256K.

I cannot find none of these options, nowhere.

curl -X GET -u u:p http://localhost:9102/settings 

{
	"indexer.settings.bufferPoolBlockSize": 16384,
	"indexer.settings.compaction.abort_exceed_interval": false,
	"indexer.settings.compaction.check_period": 30,
	"indexer.settings.compaction.compaction_mode": "circular",
	"indexer.settings.compaction.days_of_week": "Sunday,Monday,Tuesday,Wednesday,Thursday,Friday,Saturday",
	"indexer.settings.compaction.interval": "00:00,00:00",
	"indexer.settings.compaction.min_frag": 30,
	"indexer.settings.compaction.min_size": 524288000,
	"indexer.settings.cpuProfFname": "",
	"indexer.settings.cpuProfile": false,
	"indexer.settings.fast_flush_mode": true,
	"indexer.settings.gc_percent": 100,
	"indexer.settings.inmemory_snapshot.fdb.interval": 200,
	"indexer.settings.inmemory_snapshot.interval": 200,
	"indexer.settings.inmemory_snapshot.moi.interval": 20,
	"indexer.settings.largeSnapshotThreshold": 200,
	"indexer.settings.log_level": "info",
	"indexer.settings.maxVbQueueLength": 0,
	"indexer.settings.max_array_seckey_size": 10240,
	"indexer.settings.max_cpu_percent": 0,
	"indexer.settings.max_writer_lock_prob": 20,
	"indexer.settings.memProfFname": "",
	"indexer.settings.memProfile": false,
	"indexer.settings.memory_quota": 536870912,
	"indexer.settings.minVbQueueLength": 250,
	"indexer.settings.moi.debug": false,
	"indexer.settings.moi.persistence_threads": 8,
	"indexer.settings.moi.recovery_threads": 4,
	"indexer.settings.persisted_snapshot.fdb.interval": 5000,
	"indexer.settings.persisted_snapshot.interval": 5000,
	"indexer.settings.persisted_snapshot.moi.interval": 600000,
	"indexer.settings.persisted_snapshot_init_build.fdb.interval": 5000,
	"indexer.settings.persisted_snapshot_init_build.interval": 5000,
	"indexer.settings.persisted_snapshot_init_build.moi.interval": 600000,
	"indexer.settings.recovery.max_rollbacks": 5,
	"indexer.settings.scan_getseqnos_retries": 30,
	"indexer.settings.scan_timeout": 120000,
	"indexer.settings.send_buffer_size": 1024,
	"indexer.settings.sliceBufSize": 50000,
	"indexer.settings.smallSnapshotThreshold": 30,
	"indexer.settings.statsLogDumpInterval": 60,
	"indexer.settings.storage_mode": "forestdb",
	"indexer.settings.wal_size": 4096,
	"projector.settings.log_level": "info",
	"queryport.client.settings.poolOverflow": 30,
	"queryport.client.settings.poolSize": 1000
}

The only options I found is submitting it as a POST parameter through curl, and still, it says:

If a request includes max_parallelism, it is **capped** by the server max_parallelism.
If a request does not include max_parallelism, **the server wide max_parallelism parameter is used**.

So, that tells me it’s some “hidden” option that’s set to 1 by now.


First of all, I think that this post was perfectly fine. I know this is not a classroom, but it feel like there is no middle ground here (Between documentation, query.pub.*. and this forum). It’s either simple SELECTs, or you are an SQL intermediate and you pull off half-way amazing stuff.
Or you are geralds and you pull rabbits out of a tindermatch box :joy:

I don’t know in what format I should present the data. As I told many times, I just took a big breath, updated the database (to use N1QL instead of the really alien Map/Reduce Views) and dived into the unknown.

So, maybe they will have to be this stringy - maybe they will come with red ribbons on them :slight_smile:

{
	"date" : "2016-12-14",
	"phones" : [{
		"did" : "5fe03c918f85052bcd4b4094f1fae051",
		"entries" : {
			"full" : 0,
			"null" : 426,
			"sums" : 426
		}
	},
	{"..."}
	]
},
{"..."}

All offers are welcome

Ok. Keeping your current format, try this in cbq shell:

\set -max_paralllelism (2x # cores);

The issue your query. This in 4.5.0+.

Your format is fine if that’s what your application wants. If want to use index order, then use the original query above, but the output will be in a different format, and you may have to change your app.

cbq> SELECT {SUBSTR(t, 0, 10) : {
   >         did : {
   >                 "foull_entries" : COUNT(NULLIF(ARRAY_LENGTH(bcn), 0)),
   >                 "total_entries" : COUNT(*),
   >                 "nulld_entries" : COUNT(CASE WHEN ARRAY_LENGTH(bcn)==0 THEN 1 ELSE NULL END)
   >         }
   > }}
   > FROM sync_gateway
   > WHERE type="bcn_scan" AND SUBSTR(t, 0, 10) BETWEEN "2016-11-01" AND "2016-12-15"
   > GROUP BY SUBSTR(t, 0, 10), did
   > ORDER BY SUBSTR(t, 0, 10) ASC, did ASC;