N1Ql with cover index performance very slow

rkumar · September 11, 2016, 10:30am

Currently i’m using CB v 4.5 with linux 16.04 and having approx 1 million of records, with enable FTS as well.
Below is the structure of doc(small snippet)

{
“ACTIVE_FLAG”: true,
“ANONYMOUS_FLAG”: false,
“TYPE”: “DOCUMENT”,
“AUTHOR”: {
“AUTHOR_ID”: “Jazzy4md”,
“AUTHOR_NAME”: “Jazzy4md”
},
“AUTHOR_POST_FLAG”: false,
“AVGSCORE”: [
{
“CATEGORY_ID”: 4000,
“INSIGHT”: 2,
“SCORE”: -1
},
{
“CATEGORY_ID”: 4001,
“INSIGHT”: 3,
“SCORE”: 0
},
{
“CATEGORY_ID”: 4002,
“INSIGHT”: 4,
“SCORE”: 1
},
{
“CATEGORY_ID”: 4003,
“INSIGHT”: 3,
“SCORE”: 0
},
{
“CATEGORY_ID”: 4004,
“INSIGHT”: 4,
“SCORE”: 1
},
{
“CATEGORY_ID”: 4005,
“INSIGHT”: 3,
“SCORE”: 0
}
],
“CONTENTS”: [
{
“ACTIVE_FLAG”: true,
“CREATED_BY”: "thread86@bfountain.com",
“CREATED_TIME”: “2016-03-08T04:05:17Z”,
“ID”: 95218520,
“MD5”: “a6105c0a611b41b08f1209506350279e”,
“MODELS”: “*”,
“MODIFIED_BY”: "thread86@bfountain.com",
“MODIFIED_TIME”: “2016-03-08T09:35:17Z”,
“NLP_FLAG”: false,
“OPTION_ID”: 1000100000,
“PHI_FLAG”: false,
“PREDEFINED”: false,
“PROFANITY_FLAG”: false,
“PUBLISHING_API_STATUS”: “H”,
“QUESTION_ID”: 1000000075,
“RESPONSE”: “yes”,
“RESPONSE_EN”: “yes”,
“STATUS”: “Q”,
“WORD_COUNT”: 0
}]
}

so we put most of statics data on AVGSCORE,

so create index is:
cbq> CREATE INDEX idx_ns_t ON default (DISTINCT ARRAY i.CATEGORY_ID FOR i IN AVGSCORE END, AVGSCORE,PERSON_ID) WHERE MENTION_TIME>=‘2001-01-01’ AND (array_length(AVGSCORE) >0) and TYPE=“DOCUMENT”;

and select query is below:
SELECT avg(i.SCORE) FROM default t use index(idx_ns_t) UNNEST t.AVGSCORE AS i WHERE (i.CATEGORY_ID =4000) AND (array_length(t.AVGSCORE) >0) AND (t.MENTION_TIME>=‘2001-01-01’) and t.TYPE=“DOCUMENT”;

but its very much slow ,time to execute is near a minute. as well some time it give me
“msg”: “Index scan timed out - cause: Index scan timed out”.

below is the explain as well,please let me know ,how can i optimized this query

explain SELECT avg(i.SCORE) FROM default t UNNEST t.AVGSCORE AS i WHERE (i.CATEGORY_ID =4000) AND (array_length(t.AVGSCORE) >0) AND (t.MENTION_TIME>=‘2001-01-01’) and t.TYPE=“DOCUMENT”;
{
“requestID”: “7a73401d-edad-4dab-a2db-230b8d1fef30”,
“signature”: “json”,
“results”: [
{
“plan”: {
“#operator”: “Sequence”,
“~children”: [
{
“#operator”: “DistinctScan”,
“scan”: {
“#operator”: “IndexScan”,
“covers”: [
“cover ((distinct (array (i.CATEGORY_ID) for i in (t.AVGSCORE) end)))”,
“cover ((t.AVGSCORE))”,
“cover ((t.PERSON_ID))”,
“cover ((meta(t).id))”
],
“filter_covers”: {
“cover ((“2001-01-01” \u003c= (t.MENTION_TIME)))”: true,
“cover ((0 \u003c array_length((t.AVGSCORE))))”: true,
“cover ((t.TYPE))”: “DOCUMENT”
},
“index”: “idx_ns_t”,
“index_id”: “66b173f89fd66046”,
“keyspace”: “default”,
“namespace”: “default”,
“spans”: [
{
“Range”: {
“High”: [
“4000”
],
“Inclusion”: 3,
“Low”: [
“4000”
]
}
}
],
“using”: “gsi”
}
},
{
“#operator”: “Parallel”,
“~child”: {
“#operator”: “Sequence”,
“~children”: [
{
“#operator”: “Unnest”,
“as”: “i”,
“expr”: “cover ((t.AVGSCORE))”
},
{
“#operator”: “Filter”,
“condition”: “(((((i.CATEGORY_ID) = 4000) and cover ((0 \u003c array_length((t.AVGSCORE))))) and cover ((“2001-01-01” \u003c= (t.MENTION_TIME)))) and (cover ((t.TYPE)) = “DOCUMENT”))”
},
{
“#operator”: “InitialGroup”,
“aggregates”: [
“avg((i.SCORE))”
],
“group_keys”:
}
]
}
},
{
“#operator”: “IntermediateGroup”,
“aggregates”: [
“avg((i.SCORE))”
],
“group_keys”:
},
{
“#operator”: “FinalGroup”,
“aggregates”: [
“avg((i.SCORE))”
],
“group_keys”:
},
{
“#operator”: “Parallel”,
“~child”: {
“#operator”: “Sequence”,
“~children”: [
{
“#operator”: “InitialProject”,
“result_terms”: [
{
“expr”: “avg((i.SCORE))”
}
]
},
{
“#operator”: “FinalProject”
}
]
}
}
]
},
“text”: “SELECT avg(i.SCORE) FROM default t UNNEST t.AVGSCORE AS i WHERE (i.CATEGORY_ID =4000) AND (array_length(t.AVGSCORE) \u003e0) AND (t.MENTION_TIME\u003e=‘2001-01-01’) and t.TYPE=“DOCUMENT””
}
],
“status”: “success”,
“metrics”: {
“elapsedTime”: “10.377563ms”,
“executionTime”: “10.339567ms”,
“resultCount”: 1,
“resultSize”: 4622
}

keshav_m · September 14, 2016, 4:33pm

Hi @rkumar

How many documents do you have? And what’s the average number of objects in each AVGSCORE array?

Try creating the indexing without the second key (AVGSCORE)
How many documents do you expect to qualify for this query? Long running queries timeout after 120 seconds of index scan now.
Try the query below as well after the index is rebuilt without AVGSCORE (second key). This should be logically same as what you have.

SELECT avg(i.SCORE) FROM default t use index(idx_ns_t)
WHERE ANY i in t.AVGSCORE SATISFIES (i.CATEGORY_ID =4000) END
AND (array_length(t.AVGSCORE) >0)
AND (t.MENTION_TIME>=‘2001-01-01’)
and t.TYPE=“DOCUMENT”;

rkumar · October 3, 2016, 1:43pm

Hi @keshav_m,@geraldss,
Currently i’m using CB v 4.5 with centOS on AWS and having approx 1 million of records

For below query it will take 6.3 s to execute,which i need to be execute in MS.

explain SELECT round(AVG( s.SCORE),1),COUNT(c.SENTIMENTS),count(t.ID)
FROM default t use index(idx_score_adv)
UNNEST t.CONTENTS c
UNNEST c.SENTIMENTS s
WHERE t.TYPE="DOCUMENT"and t.SOURCE_ID in [100000,100001,100011,100021,100031,100032,100100,100101,100102,100103,100104,100105,100106,100107,100108,100109,100110,100111,100112,100113,100114,100115,100116,100117,100118,100119,100120,100121,100122,100123]
AND t.PERSON_ID=143124 AND s.CATEGORY_ID=4015
and t.MENTION_TIME >=‘2001-01-01’ and t.MENTION_TIME<‘2015-01-01’ ;

similarly,
SELECT ROUND(AVG( s.SCORE),1),COUNT(c.SENTIMENTS),count(t.ID)
FROM default t use index(idx_score_adv)
UNNEST t.CONTENTS c
UNNEST c.SENTIMENTS s
WHERE t.TYPE="DOCUMENT"
AND t.PERSON_ID=143124 AND t.LOCATION_ID=5204 AND s.CATEGORY_ID IN [4000,4001,4002,4003,4004,4005,4010,4011,4012,4013]
and t.MENTION_TIME >=‘2001-01-01’ and t.MENTION_TIME<‘2015-01-01’ ;

for both query execution time is 6+ seconds.

below is the index query( for both query is approx same ):
CREATE INDEX idx_score_adv ON default (PERSON_ID,DISTINCT ARRAY (DISTINCT ARRAY s.SCORE AND s.CATEGORY_ID FOR s IN c.SENTIMENTS END) FOR c IN CONTENTS END) WHERE MENTION_TIME>=‘2001-01-01’ and TYPE=“DOCUMENT”;

below is the explain query:
[
{
“plan”: {
"#operator": “Sequence”,
"~children": [
{
"#operator": “DistinctScan”,
“scan”: {
"#operator": “IndexScan”,
“index”: “idx_score_adv”,
“index_id”: “63fe4493cc84b279”,
“keyspace”: “default”,
“namespace”: “default”,
“spans”: [
{
“Range”: {
“High”: [
“successor(143124)”
],
“Inclusion”: 1,
“Low”: [
“143124”
]
}
}
],
“using”: “gsi”
}
},
{
"#operator": “Parallel”,
"~child": {
"#operator": “Sequence”,
"~children": [
{
"#operator": “Fetch”,
“as”: “t”,
“keyspace”: “default”,
“namespace”: “default”
},
{
"#operator": “Unnest”,
“as”: “c”,
“expr”: “(t.CONTENTS)”
},
{
"#operator": “Unnest”,
“as”: “s”,
“expr”: “(c.SENTIMENTS)”
},
{
"#operator": “Filter”,
“condition”: “((((((((t.TYPE) = “DOCUMENT”) and ((t.PERSON_ID) = 143124)) and ((t.LOCATION_ID) = 5204)) and ((s.CATEGORY_ID) in [4000, 4001, 4002, 4003, 4004, 4005, 4010, 4011, 4012, 4013, 4014, 4015, 4016, 4017, 4018, 4019, 4020, 4021, 4022, 4023, 4024, 4025, 4026, 4027, 4028, 4029, 4030, 4031, 4032, 4033, 4034, 4035, 4036, 4037, 4038, 4039, 4040])) and (“2001-01-01” <= (t.MENTION_TIME))) and ((t.MENTION_TIME) < “2015-01-01”)) and any client in (t.CLIENT) satisfies (client = “nspc”) end)”
},
{
"#operator": “InitialGroup”,
“aggregates”: [
“avg((s.SCORE))”,
“count((c.SENTIMENTS))”,
“count((t.ID))”
],
“group_keys”: []
}
]
}
},
{
"#operator": “IntermediateGroup”,
“aggregates”: [
“avg((s.SCORE))”,
“count((c.SENTIMENTS))”,
“count((t.ID))”
],
“group_keys”: []
},
{
"#operator": “FinalGroup”,
“aggregates”: [
“avg((s.SCORE))”,
“count((c.SENTIMENTS))”,
“count((t.ID))”
],
“group_keys”: []
},
{
"#operator": “Parallel”,
"~child": {
"#operator": “Sequence”,
"~children": [
{
"#operator": “InitialProject”,
“result_terms”: [
{
“expr”: “round(avg((s.SCORE)), 1)”
},
{
“expr”: “count((c.SENTIMENTS))”
},
{
“expr”: “count((t.ID))”
}
]
},
{
"#operator": “FinalProject”
}
]
}
}
]
},
“text”: “SELECT ROUND(AVG( s.SCORE),1),COUNT(c.SENTIMENTS),count(t.ID)\nFROM default t use index(idx_score_adv)\nUNNEST t.CONTENTS c \nUNNEST c.SENTIMENTS s \nWHERE t.TYPE=“DOCUMENT” \n AND t.PERSON_ID=143124 AND t.LOCATION_ID=5204 AND s.CATEGORY_ID IN [4000,4001,4002,4003,4004,4005,4010,4011,4012,4013,4014,4015,4016,4017]\nand t.MENTION_TIME >=‘2001-01-01’ and t.MENTION_TIME<‘2015-01-01’ ;”
}
]

structure of the document is(small snippet)

{
“AUTHOR”: {
“AUTHOR_ID”: “262008347153404”,
“AUTHOR_NAME”: “Sundance Creek”
},
“SURVEY_TYPE”: “FEEDBACK”,
“AUTHOR_POST_FLAG”: true,
“PRODUCT_ID”: “apartment”,

“MENTION_TIME”: “2016-03-07T22:54:26Z”,
“SURVEY_MODE”: “HARVESTER”,
“HAS_TESTIMONIAL”: true,
“ENGAGEMENTS”: [],
“AVGSCORE”: [],
“MODIFIED_TIME”: “2016-04-07T03:12:57Z”,
“CREATED_BY”: "thread121@bfountain.com",
“ID”: 10063686,
“MODIFIED_BY”: "thread91@bfountain.com",
“URL”: “https://www.facebook.com/262008347153404/posts/1066877086666522”,
“ANONYMOUS_FLAG”: false,
“TYPE”: “DOCUMENT”,
“PUBLISH_FLAG”: false,
“ACTIVE_FLAG”: true,
“SOURCE_ID”: 100110,
“CONTENTS”: [
{
“ACTIVE_FLAG”: true,
“PROFANITY_FLAG”: false,
“RESPONSE_EN”: “Manager”,
“PREDEFINED”: false,
“MD5”: “ae94be3cd532ce4a025884819eb08c98”,
“PUBLISHING_API_STATUS”: “H”,
“STATUS”: “Q”,
“QUESTION_ID”: 7768,
“OPTION_ID”: 1,
“MODIFIED_TIME”: “2016-03-08T05:50:25Z”,
“CREATED_BY”: "thread121@bfountain.com",
“PHI_FLAG”: false,
“RESPONSE”: “Manager”,
“ID”: 95192433,
“MODELS”: “",
“MODIFIED_BY”: "thread121@bfountain.com",
“WORD_COUNT”: 0,
“NLP_FLAG”: false,
“CREATED_TIME”: “2016-03-08T00:20:25Z”
“SENTIMENTS”: [
{
“ACTIVE_FLAG”: true,
“PROFANITY_FLAG”: false,
“RESPONSE_EN”: “Manager”,
“PREDEFINED”: false,
“MD5”: “ae94be3cd532ce4a025884819eb08c98”,
“PUBLISHING_API_STATUS”: “H”,
“STATUS”: “Q”,
“QUESTION_ID”: 7768,
“OPTION_ID”: 1,
“MODIFIED_TIME”: “2016-03-08T05:50:25Z”,
“CREATED_BY”: "thread121@bfountain.com",
“PHI_FLAG”: false,
“RESPONSE”: “Manager”,
“ID”: 95192433,
“MODELS”: "”,
“MODIFIED_BY”: "thread121@bfountain.com",
“WORD_COUNT”: 0,
“NLP_FLAG”: false,
“CREATED_TIME”: “2016-03-08T00:20:25Z”
}
}

],
“REVIEW_COUNT”: 1,
“LOCATION_ID”: 158942,
“PERSON_ID”: 0,

}

Could you please guide me to improve this query or index so that execution time will be in ms

thank you!

geraldss · October 3, 2016, 3:31pm

Can you post the CREATE INDEX definitions.

rkumar · October 3, 2016, 6:17pm

hi @geraldss,

Thanks for reply,below is the create index definition.

CREATE INDEX idx_score_adv ON default (PERSON_ID,DISTINCT ARRAY
(DISTINCT ARRAY s.SCORE AND s.CATEGORY_ID FOR s IN c.SENTIMENTS END)
FOR c IN CONTENTS END) WHERE MENTION_TIME>=‘2001-01-01’ and
TYPE=“DOCUMENT”;

Even in between i’m trying to create index in better way,so that execution time will be less.
One more thing,is that count is expensive in CBv4.5,or how to optimized the count query

Thank you!

geraldss · October 3, 2016, 6:53pm

Hi @rkumar,

Try the following indexes for 4.5.

CREATE INDEX idx2 ON default
(PERSON_ID, SOURCE_ID)
WHERE MENTION_TIME>='2001-01-01' AND TYPE="DOCUMENT";

CREATE INDEX idx3 ON default
(DISTINCT ARRAY (DISTINCT ARRAY s.CATEGORY_ID FOR s IN c.SENTIMENTS END) FOR c IN CONTENTS END)
WHERE MENTION_TIME>='2001-01-01' AND TYPE="DOCUMENT";

CREATE INDEX idx4 ON default
(PERSON_ID, LOCATION_ID)
WHERE MENTION_TIME>='2001-01-01' AND TYPE="DOCUMENT";

rkumar · October 3, 2016, 6:58pm

Hi @geraldss,

Thanks for quick reply,ok i will test this now,but will that indexs improve the count-query,because in most queries we use count ,to show the user dash board ,that why we need optimized or quick .

one more thing,if i break my index n three parts,will they satisfies all the query which based on that index what i send you.
i will revert you the timing and explain very soon.

Thank you.

rkumar · October 3, 2016, 8:07pm

hi @geraldss,

Thank you for that,off course it reduce the execution time from 6+ seconds to ~1.6 seconds,but still they are in seconds
Below is the query and EXPLAIN
QUERY:
explain SELECT AVG( s.SCORE),COUNT(c.SENTIMENTS),count(t.ID)
FROM default t
UNNEST t.CONTENTS c
UNNEST c.SENTIMENTS s
WHERE t.PERSON_ID=143124 AND t.TYPE=“DOCUMENT” and t.SOURCE_ID=445 AND t.LOCATION_ID=5204
AND c.QUESTION_ID=2471 AND s.CATEGORY_ID IN [4000,4001,4002,4003,4004,4005,4010,4011,4012,4013,4014,4015,4016,4017,4018,4019,4020,4021,4022,4023,4024,4025,4026,4027,4028,4029,4030,4031,4032,4033,4034,4035,4036,4037,4038,4039,4040]and t.MENTION_TIME >=‘2001-01-01’ and t.MENTION_TIME<‘2015-01-01’ ;

HIS EXPLAIN:
[
{
“plan”: {
"#operator": “Sequence”,
"~children": [
{
"#operator": “IntersectScan”,
“scans”: [
{
"#operator": “IndexScan”,
“index”: “idx2”,
“index_id”: “afabd553bb6b4fe2”,
“keyspace”: “default”,
“namespace”: “default”,
“spans”: [
{
“Range”: {
“High”: [
“143124”,
“445”
],
“Inclusion”: 3,
“Low”: [
“143124”,
“445”
]
}
}
],
“using”: “gsi”
},
{
"#operator": “IndexScan”,
“index”: “idx4”,
“index_id”: “4f5d24afe7f0370c”,
“keyspace”: “default”,
“namespace”: “default”,
“spans”: [
{
“Range”: {
“High”: [
“143124”,
“5204”
],
“Inclusion”: 3,
“Low”: [
“143124”,
“5204”
]
}
}
],
“using”: “gsi”
}
]
},
{
"#operator": “Parallel”,
"~child": {
"#operator": “Sequence”,
"~children": [
{
"#operator": “Fetch”,
“as”: “t”,
“keyspace”: “default”,
“namespace”: “default”
},
{
"#operator": “Unnest”,
“as”: “c”,
“expr”: “(t.CONTENTS)”
},
{
"#operator": “Unnest”,
“as”: “s”,
“expr”: “(c.SENTIMENTS)”
},
{
"#operator": “Filter”,
“condition”: “(((((((((t.PERSON_ID) = 143124) and ((t.TYPE) = “DOCUMENT”)) and ((t.SOURCE_ID) = 445)) and ((t.LOCATION_ID) = 5204)) and ((c.QUESTION_ID) = 2471)) and ((s.CATEGORY_ID) in [4000, 4001, 4002, 4003, 4004, 4005, 4010, 4011, 4012, 4013, 4014, 4015, 4016, 4017, 4018, 4019, 4020, 4021, 4022, 4023, 4024, 4025, 4026, 4027, 4028, 4029, 4030, 4031, 4032, 4033, 4034, 4035, 4036, 4037, 4038, 4039, 4040])) and (“2001-01-01” <= (t.MENTION_TIME))) and ((t.MENTION_TIME) < “2015-01-01”))”
},
{
"#operator": “InitialGroup”,
“aggregates”: [
“avg((s.SCORE))”,
“count((c.SENTIMENTS))”,
“count((t.ID))”
],
“group_keys”: []
}
]
}
},
{
"#operator": “IntermediateGroup”,
“aggregates”: [
“avg((s.SCORE))”,
“count((c.SENTIMENTS))”,
“count((t.ID))”
],
“group_keys”: []
},
{
"#operator": “FinalGroup”,
“aggregates”: [
“avg((s.SCORE))”,
“count((c.SENTIMENTS))”,
“count((t.ID))”
],
“group_keys”: []
},
{
"#operator": “Parallel”,
"~child": {
"#operator": “Sequence”,
"~children": [
{
"#operator": “InitialProject”,
“result_terms”: [
{
“expr”: “avg((s.SCORE))”
},
{
“expr”: “count((c.SENTIMENTS))”
},
{
“expr”: “count((t.ID))”
}
]
},
{
"#operator": “FinalProject”
}
]
}
}
]
},
“text”: “SELECT AVG( s.SCORE),COUNT(c.SENTIMENTS),count(t.ID)\nFROM default t \nUNNEST t.CONTENTS c \nUNNEST c.SENTIMENTS s \n\nWHERE t.PERSON_ID=143124 AND t.TYPE=“DOCUMENT” and t.SOURCE_ID=445 AND t.LOCATION_ID=5204 \nAND c.QUESTION_ID=2471 AND s.CATEGORY_ID IN [4000,4001,4002,4003,4004,4005,4010,4011,4012,4013,4014,4015,4016,4017,4018,4019,4020,4021,4022,4023,4024,4025,4026,4027,4028,4029,4030,4031,4032,4033,4034,4035,4036,4037,4038,4039,4040]and t.MENTION_TIME >=‘2001-01-01’ and t.MENTION_TIME<‘2015-01-01’ ;”
}
].

Still it using “IndexScan” rather than IndexCountScan

geraldss · October 3, 2016, 8:39pm

How long do both queries take. Please post the timings for each query.

rkumar · October 3, 2016, 8:43pm

QUERY 1.
SELECT ROUND(AVG( s.SCORE),1),COUNT(c.SENTIMENTS),count(t.ID)
FROM default t
UNNEST t.CONTENTS c
UNNEST c.SENTIMENTS s
WHERE t.TYPE="DOCUMENT"
AND t.PERSON_ID=143124 AND t.LOCATION_ID=5204 AND s.CATEGORY_ID IN [4000,4001,4002,4003,4004,4005,4010,4011,4012,4013,4014,4015,4016,4017,4018,4019,4020,4021,4022,4023,4024,4025,4026,4027,4028,4029,4030,4031,4032,4033,4034,4035,4036,4037,4038,4039,4040]
and t.MENTION_TIME >=‘2001-01-01’ and t.MENTION_TIME<‘2015-01-01’ ;

TIME:1.77S

QUERY 2.
SELECT ROUND(AVG( s.SCORE),1),COUNT(c.SENTIMENTS),count(t.ID)
FROM default t
UNNEST t.CONTENTS c
UNNEST c.SENTIMENTS s
WHERE t.TYPE="DOCUMENT"
AND t.PERSON_ID=143124 AND t.LOCATION_ID=5204 AND s.CATEGORY_ID IN [4000,4001,4002,4003,4004,4005,4010,4011,4012,4013,4014,4015,4016,4017,4018,4019,4020,4021,4022,4023,4024,4025,4026,4027,4028,4029,4030,4031,4032,4033,4034,4035,4036,4037,4038,4039,4040]
and t.MENTION_TIME >=‘2001-01-01’ and t.MENTION_TIME<‘2015-01-01’ ;

TIME:1.82S

EXPLAIN I ALREADY MENTION IN ABOVE REPLY.

geraldss · October 3, 2016, 8:50pm

Ok, that is good progress. Next, we need to get you on 4.5.1 so that we can combine the two indexes into one.

rkumar · October 3, 2016, 8:55pm

@geraldss,

Thank you for that,we will install and move everything in our staging server and will see,
but still this count give me results in seconds,and with increase in records each day(Approx 1 lacks/day),so worried later it will more slow down.
can you guide me to make it more optimized so that get result in ms.or its not possible till version,because we making our performance report, and need to decide whether we use cassandra or CB .

what is that mean to combine the two indexs into one.?

geraldss · October 3, 2016, 10:14pm

Can you try the same query, with two different versions.

(1) USE INDEX (idx2)

(2) USE INDEX (idx4)

Can you run the same query, and post both execution times.

rkumar · October 4, 2016, 5:08am

Hi @geraldss,

Not a significant result.
QUERY:
SELECT ROUND(AVG( s.SCORE),1),COUNT(c.SENTIMENTS),count(t.ID)
FROM default t use index(idx4)
UNNEST t.CONTENTS c
UNNEST c.SENTIMENTS s
WHERE t.TYPE="DOCUMENT"
AND t.PERSON_ID=143124 AND t.LOCATION_ID=5204 AND s.CATEGORY_ID IN [4001,4002,4003,4004,4005,4010,4011,4012,4013,4014,4015,4016,4017,4018,4019,4020,4021,4022,4023,4024,4025,4026,4027,4028,4029,4030,4031,4032,4033,4034,4035,4036,4037,4038,4039,4040]
and t.MENTION_TIME >=‘2001-01-01’ and t.MENTION_TIME<‘2015-01-01’ ;

TIME: 173s
And if i use use index(idx2) ,execution time is: 6.08s.

Even how i can assure that my query will use indexcountscan,where it using indexscan

geraldss · October 4, 2016, 9:49am

Do COUNT(c.SENTIMENTS) and COUNT(t.ID) return different values?

rkumar · October 4, 2016, 10:13am

Hi @geraldss,

Yes,in 95% it give the same count,below is the result

result is [
{
"$1": 0.9244186046511628,
"$2": 172,
"$3": 172
}
]

or

[
{
"$1": 0.9,
"$2": 240,
"$3": 240
}
]

geraldss · October 4, 2016, 10:24am

Ok, try USE INDEX( idx3 ) and post the EXPLAIN and execution time.

CREATE INDEX idx3 ON default
(DISTINCT ARRAY (DISTINCT ARRAY s.CATEGORY_ID FOR s IN c.SENTIMENTS END) FOR c IN CONTENTS END, PERSON_ID, LOCATION_ID)
WHERE MENTION_TIME>='2001-01-01' AND TYPE="DOCUMENT";

rkumar · October 4, 2016, 7:11pm

Hi @geraldss,

QUERY : explain SELECT ROUND(AVG( s.SCORE),1),COUNT(c.SENTIMENTS),count(t.ID)
FROM default t use index(idx3_latest)
UNNEST t.CONTENTS c
UNNEST c.SENTIMENTS s
WHERE t.TYPE="DOCUMENT"
AND t.PERSON_ID=143124 AND t.LOCATION_ID=5204 AND s.CATEGORY_ID IN [4001,4002,4003,4004,4005,4010,4011,4012,4013,4014,4015,4016,4017,4018,4019,4020,4021,4022,4023,4024,4025,4026,4027,4028,4029,4030,4031,4032,4033,4034,4035,4036,4037,4038,4039,4040]
and t.MENTION_TIME >=‘2001-01-01’ and t.MENTION_TIME<‘2015-01-01’ ;

EXPLAIN:
[
{
“plan”: {
"#operator": “Sequence”,
"~children": [
{
"#operator": “DistinctScan”,
“scan”: {
"#operator": “IndexScan”,
“index”: “idx3_latest”,
“index_id”: “77c9d979371f51d1”,
“keyspace”: “default”,
“namespace”: “default”,
“spans”: [
{
“Range”: {
“High”: [
“4001”
],
“Inclusion”: 3,
“Low”: [
“4001”
]
}
},
{
“Range”: {
“High”: [
“4002”
],
“Inclusion”: 3,
“Low”: [
“4002”
]
}
},
{
“Range”: {
“High”: [
“4003”
],
“Inclusion”: 3,
“Low”: [
“4003”
]
}
},
{
“Range”: {
“High”: [
“4004”
],
“Inclusion”: 3,
“Low”: [
“4004”
]
}
},
{
“Range”: {
“High”: [
“4005”
],
“Inclusion”: 3,
“Low”: [
“4005”
]
}
},
{
“Range”: {
“High”: [
“4010”
],
“Inclusion”: 3,
“Low”: [
“4010”
]
}
},
{
“Range”: {
“High”: [
“4011”
],
“Inclusion”: 3,
“Low”: [
“4011”
]
}
},
{
“Range”: {
“High”: [
“4012”
],
“Inclusion”: 3,
“Low”: [
“4012”
]
}
},
{
“Range”: {
“High”: [
“4013”
],
“Inclusion”: 3,
“Low”: [
“4013”
]
}
},
{
“Range”: {
“High”: [
“4014”
],
“Inclusion”: 3,
“Low”: [
“4014”
]
}
},
{
“Range”: {
“High”: [
“4015”
],
“Inclusion”: 3,
“Low”: [
“4015”
]
}
},
{
“Range”: {
“High”: [
“4016”
],
“Inclusion”: 3,
“Low”: [
“4016”
]
}
},
{
“Range”: {
“High”: [
“4017”
],
“Inclusion”: 3,
“Low”: [
“4017”
]
}
},
{
“Range”: {
“High”: [
“4018”
],
“Inclusion”: 3,
“Low”: [
“4018”
]
}
},
{
“Range”: {
“High”: [
“4019”
],
“Inclusion”: 3,
“Low”: [
“4019”
]
}
},
{
“Range”: {
“High”: [
“4020”
],
“Inclusion”: 3,
“Low”: [
“4020”
]
}
},
{
“Range”: {
“High”: [
“4021”
],
“Inclusion”: 3,
“Low”: [
“4021”
]
}
},
{
“Range”: {
“High”: [
“4022”
],
“Inclusion”: 3,
“Low”: [
“4022”
]
}
},
{
“Range”: {
“High”: [
“4023”
],
“Inclusion”: 3,
“Low”: [
“4023”
]
}
},
{
“Range”: {
“High”: [
“4024”
],
“Inclusion”: 3,
“Low”: [
“4024”
]
}
},
{
“Range”: {
“High”: [
“4025”
],
“Inclusion”: 3,
“Low”: [
“4025”
]
}
},
{
“Range”: {
“High”: [
“4026”
],
“Inclusion”: 3,
“Low”: [
“4026”
]
}
},
{
“Range”: {
“High”: [
“4027”
],
“Inclusion”: 3,
“Low”: [
“4027”
]
}
},
{
“Range”: {
“High”: [
“4028”
],
“Inclusion”: 3,
“Low”: [
“4028”
]
}
},
{
“Range”: {
“High”: [
“4029”
],
“Inclusion”: 3,
“Low”: [
“4029”
]
}
},
{
“Range”: {
“High”: [
“4030”
],
“Inclusion”: 3,
“Low”: [
“4030”
]
}
},
{
“Range”: {
“High”: [
“4031”
],
“Inclusion”: 3,
“Low”: [
“4031”
]
}
},
{
“Range”: {
“High”: [
“4032”
],
“Inclusion”: 3,
“Low”: [
“4032”
]
}
},
{
“Range”: {
“High”: [
“4033”
],
“Inclusion”: 3,
“Low”: [
“4033”
]
}
},
{
“Range”: {
“High”: [
“4034”
],
“Inclusion”: 3,
“Low”: [
“4034”
]
}
},
{
“Range”: {
“High”: [
“4035”
],
“Inclusion”: 3,
“Low”: [
“4035”
]
}
},
{
“Range”: {
“High”: [
“4036”
],
“Inclusion”: 3,
“Low”: [
“4036”
]
}
},
{
“Range”: {
“High”: [
“4037”
],
“Inclusion”: 3,
“Low”: [
“4037”
]
}
},
{
“Range”: {
“High”: [
“4038”
],
“Inclusion”: 3,
“Low”: [
“4038”
]
}
},
{
“Range”: {
“High”: [
“4039”
],
“Inclusion”: 3,
“Low”: [
“4039”
]
}
},
{
“Range”: {
“High”: [
“4040”
],
“Inclusion”: 3,
“Low”: [
“4040”
]
}
}
],
“using”: “gsi”
}
},
{
"#operator": “Parallel”,
"~child": {
"#operator": “Sequence”,
"~children": [
{
"#operator": “Fetch”,
“as”: “t”,
“keyspace”: “default”,
“namespace”: “default”
},
{
"#operator": “Unnest”,
“as”: “c”,
“expr”: “(t.CONTENTS)”
},
{
"#operator": “Unnest”,
“as”: “s”,
“expr”: “(c.SENTIMENTS)”
},
{
"#operator": “Filter”,
“condition”: “(((((((t.TYPE) = “DOCUMENT”) and ((t.PERSON_ID) = 143124)) and ((t.LOCATION_ID) = 5204)) and ((s.CATEGORY_ID) in [4001, 4002, 4003, 4004, 4005, 4010, 4011, 4012, 4013, 4014, 4015, 4016, 4017, 4018, 4019, 4020, 4021, 4022, 4023, 4024, 4025, 4026, 4027, 4028, 4029, 4030, 4031, 4032, 4033, 4034, 4035, 4036, 4037, 4038, 4039, 4040])) and (“2001-01-01” <= (t.MENTION_TIME))) and ((t.MENTION_TIME) < “2015-01-01”))”
},
{
"#operator": “InitialGroup”,
“aggregates”: [
“avg((s.SCORE))”,
“count((c.SENTIMENTS))”,
“count((t.ID))”
],
“group_keys”: []
}
]
}
},
{
"#operator": “IntermediateGroup”,
“aggregates”: [
“avg((s.SCORE))”,
“count((c.SENTIMENTS))”,
“count((t.ID))”
],
“group_keys”: []
},
{
"#operator": “FinalGroup”,
“aggregates”: [
“avg((s.SCORE))”,
“count((c.SENTIMENTS))”,
“count((t.ID))”
],
“group_keys”: []
},
{
"#operator": “Parallel”,
"~child": {
"#operator": “Sequence”,
"~children": [
{
"#operator": “InitialProject”,
“result_terms”: [
{
“expr”: “round(avg((s.SCORE)), 1)”
},
{
“expr”: “count((c.SENTIMENTS))”
},
{
“expr”: “count((t.ID))”
}
]
},
{
"#operator": “FinalProject”
}
]
}
}
]
},
“text”: “SELECT ROUND(AVG( s.SCORE),1),COUNT(c.SENTIMENTS),count(t.ID)\nFROM default t use index(idx3_latest) \nUNNEST t.CONTENTS c\nUNNEST c.SENTIMENTS s\nWHERE t.TYPE=“DOCUMENT”\nAND t.PERSON_ID=143124 AND t.LOCATION_ID=5204 AND s.CATEGORY_ID IN [4001,4002,4003,4004,4005,4010,4011,4012,4013,4014,4015,4016,4017,4018,4019,4020,4021,4022,4023,4024,4025,4026,4027,4028,4029,4030,4031,4032,4033,4034,4035,4036,4037,4038,4039,4040]\nand t.MENTION_TIME >=‘2001-01-01’ and t.MENTION_TIME<‘2015-01-01’ ;”
}
]

EXECUTION TIME IS VERY HIGH ,SO THAT I INTERRUPT.

geraldss · October 4, 2016, 7:22pm

Ok. So idx4 is the best so far. We can continue testing with 4.5.1, which will be available soon.

rkumar · October 4, 2016, 7:25pm

Yes,idx2 and idx4 are good w.r.t what the time previously i had from 6+ sec to 1.7s,bu still with only 1M of records it give me results in 1.7s,which i need to pin down within milli second.

one more thing which make me crazy that ,the parameter IN,if i give small no like category_id in[1,2,3] results in MS and if i increase the no like IN[1,2,3,…50],it will take seconds.

SO please let me know when 4.5.1 is releasing,so that same i can convey to my management.

Thank you!