Client-Side timeout exceeded for operation. C client with durability async

Hi,

I have a client in C like the libeventdirect example (https://github.com/couchbase/libcouchbase/blob/master/example/libeventdirect/main.c)
I made two modifications though to the client:
I added a for loop in bootstrap_callback to insert 10000 entries.(this doesn’t affect the behaviour because with only one insert i get the same error)
I also added a durability check in the store callback before reading the entries. So the read is moved to the durability_callback.

The problem is that when the durability callback is called i get the following error(almost instantly).
"Response was not successful for durability: Client-Side timeout exceeded for operation. Inspect network conditions or increase the timeout"
If i add a timeout in the durability options then the durability callback is not called.

The code is compiled as:
gcc durability.c -o durability -I/opt/couchbase/include -lcouchbase -lcouchbase_libevent

My code is shown below as also the output with LCB_LOGLEVEL=5.
Any ideas?

#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <libcouchbase/couchbase.h>

#include <event2/event.h>

int storecallbacks=0;
int getcallbacks=0;
int durcallbacks=0;

static void
bootstrap_callback(lcb_t instance, lcb_error_t err)
{
 lcb_store_cmd_t cmd;
 const lcb_store_cmd_t *cmds[1];
 char key[50];
 char value[5000];

if (err != LCB_SUCCESS) {
    fprintf(stderr, "ERROR: %s\n", lcb_strerror(instance, err));
    exit(EXIT_FAILURE);
}

printf("bootstrap_callback\n");
/* Since we've got our configuration, let's go ahead and store a value */
int i=0;
for (i=0;i<1;i++)
{
    sprintf(key,"%030d",i+1);
    memset(value,(i+1)%128,2100);
    printf("send %.30s key\n",key);
    cmds[0] = &cmd;
    memset(&cmd, 0, sizeof(cmd));
    cmd.v.v0.key = key;
    cmd.v.v0.nkey = 30;
    cmd.v.v0.bytes = value;
    cmd.v.v0.nbytes = strlen(value);
    cmd.v.v0.operation = LCB_SET;
    err = lcb_store(instance, NULL, 1, cmds);
    if (err != LCB_SUCCESS) {
        fprintf(stderr, "Failed to set up store request: %s\n", lcb_strerror(instance, err));
        exit(EXIT_FAILURE);
    }
  }
  }

static void
durability_callback(
lcb_t instance,
const void *cookie,
lcb_error_t error,
const lcb_durability_resp_t *resp)
{
durcallbacks++;
printf("(%d)durability_callback %.30s\n",durcallbacks,resp->v.v0.key);

lcb_get_cmd_t cmd;
const lcb_get_cmd_t *cmds[1];
cmds[0] = &cmd;

if (error != LCB_SUCCESS) {
    fprintf(stderr, "Response was not successful for durability: %s\n", lcb_strerror(instance,        error));
    exit(EXIT_FAILURE);
 }

 fprintf(stdout, "Needed %d responses to poll\n", resp->v.v0.nresponses);
    /* Time to read it back */
    memset(&cmd, 0, sizeof(cmd));
    cmd.v.v0.key = resp->v.v0.key;
    cmd.v.v0.nkey = resp->v.v0.nkey;
    error = lcb_get(instance, NULL, 1, cmds);

    if (error != LCB_SUCCESS) {
        fprintf(stderr, "Failed to setup get request: %s\n", lcb_strerror(instance, error));
        exit(EXIT_FAILURE);
    }

  (void)cookie; (void)resp;
 }

  static void
 get_callback(lcb_t instance, const void *cookie, lcb_error_t error,
  const lcb_get_resp_t *resp)
 {
  getcallbacks++;
printf("(%d)get_callback %.30s\n",getcallbacks, resp->v.v0.key);
if (error != LCB_SUCCESS) {
    fprintf(stderr, "Failed to get key: %s\n", lcb_strerror(instance, error));
    exit(EXIT_FAILURE);
}

fprintf(stdout, "I stored and retrieved the key '%.30s'.\n",resp->v.v0.key);

event_base_loopbreak((void *)lcb_get_cookie(instance));
(void)cookie; (void)resp;
}

static void
store_callback(lcb_t instance, const void *cookie, lcb_storage_t op,
lcb_error_t error, const lcb_store_resp_t *resp)
{
storecallbacks++;
printf("(%d)store_callback %.30s\n",storecallbacks,resp->v.v0.key);

if (error != LCB_SUCCESS) {
    fprintf(stderr, "Failed to store key: %s\n", lcb_strerror(instance, error));
    exit(EXIT_FAILURE);
}
 /* lcb_get_cmd_t cmd;
const lcb_get_cmd_t *cmds[1];
cmds[0] = &cmd;

if (error != LCB_SUCCESS) {
    fprintf(stderr, "Failed to store key: %s\n", lcb_strerror(instance, error));
    exit(EXIT_FAILURE);
}

// Time to read it back
memset(&cmd, 0, sizeof(cmd));
cmd.v.v0.key = resp->v.v0.key;
cmd.v.v0.nkey = resp->v.v0.nkey;
error = lcb_get(instance, NULL, 1, cmds);

if (error != LCB_SUCCESS) {
    fprintf(stderr, "Failed to setup get request: %s\n", lcb_strerror(instance, error));
    exit(EXIT_FAILURE);
}
*/

lcb_durability_opts_t options;
lcb_durability_cmd_t cmd = { 0 };
const lcb_durability_cmd_t *cmdp = &cmd;

cmd.v.v0.key = resp->v.v0.key;
cmd.v.v0.nkey = resp->v.v0.nkey;
//cmd.v.v0.cas = resp->v.v0.cas;

options.v.v0.persist_to = 1;
options.v.v0.replicate_to = -1;
options.v.v0.cap_max = 1;
//options.v.v0.timeout = 1000000000;

error = lcb_durability_poll(instance, cookie, &options, 1, &cmdp);
if (error != LCB_SUCCESS) {
    fprintf(stderr, "Failed to setup durability poll: %s\n", lcb_strerror(instance, error));
    exit(EXIT_FAILURE);
}

fprintf(stderr, "Durability is set\n");
(void)cookie; (void)op; (void)resp;
}

static lcb_io_opt_t
create_libevent_io_ops(struct event_base *evbase)
{
struct lcb_create_io_ops_st ciops;
lcb_io_opt_t ioops;
lcb_error_t error;
printf("create_libevent_io_ops\n");
memset(&ciops, 0, sizeof(ciops));
ciops.v.v0.type = LCB_IO_OPS_LIBEVENT;
ciops.v.v0.cookie = evbase;

error = lcb_create_io_ops(&ioops, &ciops);
if (error != LCB_SUCCESS) {
    fprintf(stderr, "Failed to create an IOOPS structure for libevent: %s\n", lcb_strerror(NULL, error));
    exit(EXIT_FAILURE);
}

return ioops;
}

static lcb_t
create_libcouchbase_handle(lcb_io_opt_t ioops)
{
lcb_t instance;
lcb_error_t error;
struct lcb_create_st copts;

memset(&copts, 0, sizeof(copts));

/* If NULL, will default to localhost */
copts.version=3;
copts.v.v3.connstr = "couchbase://127.0.0.1/default?operation_timeout=1000000000";
copts.v.v3.io = ioops;
error = lcb_create(&instance, &copts);

if (error != LCB_SUCCESS) {
    fprintf(stderr, "Failed to create a libcouchbase instance: %s\n", lcb_strerror(NULL, error));
    exit(EXIT_FAILURE);
}

/* Set up the callbacks */
lcb_set_bootstrap_callback(instance, bootstrap_callback);
lcb_set_get_callback(instance, get_callback);
lcb_set_store_callback(instance, store_callback);
lcb_set_durability_callback(instance, durability_callback);
printf("create_libcouchbase_handle\n");
if ((error = lcb_connect(instance)) != LCB_SUCCESS) {
    fprintf(stderr, "Failed to connect libcouchbase instance: %s\n", lcb_strerror(NULL, error));
    lcb_destroy(instance);
    exit(EXIT_FAILURE);
}


return instance;
}

/* This example shows how we can hook ourself into an external event loop.
* You may find more information in the blogpost: http://goo.gl/fCTrX */
int main(void)
{
struct event_base *evbase = event_base_new();
lcb_io_opt_t ioops = create_libevent_io_ops(evbase);
lcb_t instance = create_libcouchbase_handle(ioops);

/*Store the event base as the user cookie in our instance so that
 * we may terminate the program when we're done */
lcb_set_cookie(instance, evbase);

/* Run the event loop */
event_base_loop(evbase, 0);

/* Cleanup */
event_base_free(evbase);
lcb_destroy(instance);
exit(EXIT_SUCCESS);
}

OUTPUT:

create_libevent_io_ops
0ms [I0] {5494} [INFO] (instance - L:374) Version=2.4.9, Changeset=6d032d3d01eedd6034b364db29b2e865d8f78511
0ms [I0] {5494} [INFO] (instance - L:375) Effective connection string: couchbase://127.0.0.1/default?operation_timeout=1000000000. Bucket=default
0ms [I0] {5494} [DEBUG] (instance - L:234) Applying initial cntl operation_timeout=1000000000
0ms [I0] {5494} [DEBUG] (instance - L:65) Adding host 127.0.0.1:8091 to initial HTTP bootstrap list
0ms [I0] {5494} [DEBUG] (instance - L:65) Adding host 127.0.0.1:11210 to initial CCCP bootstrap list
create_libcouchbase_handle
0ms [I0] {5494} [DEBUG] (confmon - L:89) Preparing providers (this may be called multiple times)
0ms [I0] {5494} [DEBUG] (confmon - L:99) Provider FILE is DISABLED
0ms [I0] {5494} [DEBUG] (confmon - L:96) Provider CCCP is ENABLED
0ms [I0] {5494} [DEBUG] (confmon - L:96) Provider HTTP is ENABLED
0ms [I0] {5494} [DEBUG] (confmon - L:99) Provider MCRAW is DISABLED
0ms [I0] {5494} [TRACE] (confmon - L:292) Start refresh requested
0ms [I0] {5494} [TRACE] (confmon - L:271) Current provider is CCCP
0ms [I0] {5494} [INFO] (cccp - L:118) Requesting connection to node 127.0.0.1:11210 for CCCP configuration
0ms [I0] {5494} [DEBUG] (lcbio_mgr - L:416) <127.0.0.1:11210> (HE=0x20c1600) Creating new connection because none are available in the pool
0ms [I0] {5494} [DEBUG] (lcbio_mgr - L:321) <127.0.0.1:11210> (HE=0x20c1600) Starting connection on I=0x20c1c10
0ms [I0] {5494} [INFO] (connection - L:450) <127.0.0.1:11210> (SOCK=0x20c1d50) Starting. Timeout=2000000us
0ms [I0] {5494} [TRACE] (connection - L:267) <127.0.0.1:11210> (SOCK=0x20c1d50) Got event handler for new connection
0ms [I0] {5494} [TRACE] (connection - L:314) <127.0.0.1:11210> (SOCK=0x20c1d50) Scheduling asynchronous watch for socket.
0ms [I0] {5494} [TRACE] (connection - L:267) <127.0.0.1:11210> (SOCK=0x20c1d50) Got event handler for new connection
0ms [I0] {5494} [INFO] (connection - L:116) <127.0.0.1:11210> (SOCK=0x20c1d50) Connected
0ms [I0] {5494} [DEBUG] (connection - L:123) <127.0.0.1:11210> (SOCK=0x20c1d50) Successfuly set TCP_NODELAY
0ms [I0] {5494} [DEBUG] (lcbio_mgr - L:271) <127.0.0.1:11210> (HE=0x20c1600) Received result for I=0x20c1c10,C=0x20c1d50; E=0x0
0ms [I0] {5494} [DEBUG] (lcbio_mgr - L:223) <127.0.0.1:11210> (HE=0x20c1600) Assigning R=0x20c15b0 SOCKET=0x20c1d50
0ms [I0] {5494} [DEBUG] (ioctx - L:101) <127.0.0.1:11210> (CTX=0x20c2710,unknown) Pairing with SOCK=0x20c1d50
0ms [I0] {5494} [DEBUG] (ioctx - L:151) <127.0.0.1:11210> (CTX=0x20c2710,sasl) Destroying. PND=0,ENT=1,SORC=1
0ms [I0] {5494} [DEBUG] (ioctx - L:101) <127.0.0.1:11210> (CTX=0x20cac20,unknown) Pairing with SOCK=0x20c1d50
0ms [I0] {5494} [DEBUG] (ioctx - L:151) <127.0.0.1:11210> (CTX=0x20cac20,bc_cccp) Destroying. PND=0,ENT=1,SORC=1
0ms [I0] {5494} [INFO] (lcbio_mgr - L:491) <127.0.0.1:11210> (HE=0x20c1600) Placing socket back into the pool. I=0x20c1c10,C=0x20c1d50
1ms [I0] {5494} [INFO] (confmon - L:174) Setting new configuration. Received via CCCP
1ms [I0] {5494} [DEBUG] (bootstrap - L:55) Instance configured!
1ms [I0] {5494} [DEBUG] (confmon - L:89) Preparing providers (this may be called multiple times)
1ms [I0] {5494} [DEBUG] (confmon - L:99) Provider FILE is DISABLED
1ms [I0] {5494} [DEBUG] (confmon - L:96) Provider CCCP is ENABLED
1ms [I0] {5494} [DEBUG] (confmon - L:99) Provider HTTP is DISABLED
1ms [I0] {5494} [DEBUG] (confmon - L:99) Provider MCRAW is DISABLED
bootstrap_callback
send 000000000000000000000000000001 key
1ms [I0] {5494} [DEBUG] (lcbio_mgr - L:416) <10.70.6.172:11210> (HE=0x20de680) Creating new connection because none are available in the pool
1ms [I0] {5494} [DEBUG] (lcbio_mgr - L:321) <10.70.6.172:11210> (HE=0x20de680) Starting connection on I=0x20dec00
1ms [I0] {5494} [INFO] (connection - L:450) <10.70.6.172:11210> (SOCK=0x20ded50) Starting. Timeout=2764472320us
1ms [I0] {5494} [TRACE] (connection - L:267) <10.70.6.172:11210> (SOCK=0x20ded50) Got event handler for new connection
1ms [I0] {5494} [TRACE] (connection - L:314) <10.70.6.172:11210> (SOCK=0x20ded50) Scheduling asynchronous watch for socket.
1ms [I0] {5494} [TRACE] (connection - L:267) <10.70.6.172:11210> (SOCK=0x20ded50) Got event handler for new connection
1ms [I0] {5494} [INFO] (connection - L:116) <10.70.6.172:11210> (SOCK=0x20ded50) Connected
1ms [I0] {5494} [DEBUG] (connection - L:123) <10.70.6.172:11210> (SOCK=0x20ded50) Successfuly set TCP_NODELAY
1ms [I0] {5494} [DEBUG] (lcbio_mgr - L:271) <10.70.6.172:11210> (HE=0x20de680) Received result for I=0x20dec00,C=0x20ded50; E=0x0
1ms [I0] {5494} [DEBUG] (lcbio_mgr - L:223) <10.70.6.172:11210> (HE=0x20de680) Assigning R=0x20c2440 SOCKET=0x20ded50
1ms [I0] {5494} [TRACE] (server - L:516) <10.70.6.172:11210> (SRV=0x20d3390) Session not yet negotiated. Negotiating
1ms [I0] {5494} [DEBUG] (ioctx - L:101) <10.70.6.172:11210> (CTX=0x20c2c10,unknown) Pairing with SOCK=0x20ded50
2ms [I0] {5494} [DEBUG] (ioctx - L:151) <10.70.6.172:11210> (CTX=0x20c2c10,sasl) Destroying. PND=0,ENT=1,SORC=1
2ms [I0] {5494} [DEBUG] (ioctx - L:101) <10.70.6.172:11210> (CTX=0x20cb060,unknown) Pairing with SOCK=0x20ded50
2ms [I0] {5494} [DEBUG] (server - L:537) <10.70.6.172:11210> (SRV=0x20d3390,IX=0) Setting initial timeout=2764471ms
(1)store_callback 000000000000000000000000000001
Durability is set
2ms [I0] {5494} [DEBUG] (lcbio_mgr - L:416) <10.70.6.191:11210> (HE=0x20cca40) Creating new connection because none are available in the pool
2ms [I0] {5494} [DEBUG] (lcbio_mgr - L:321) <10.70.6.191:11210> (HE=0x20cca40) Starting connection on I=0x20c2c10
2ms [I0] {5494} [INFO] (connection - L:450) <10.70.6.191:11210> (SOCK=0x20c2d40) Starting. Timeout=2764472320us
2ms [I0] {5494} [TRACE] (connection - L:267) <10.70.6.191:11210> (SOCK=0x20c2d40) Got event handler for new connection
2ms [I0] {5494} [TRACE] (connection - L:314) <10.70.6.191:11210> (SOCK=0x20c2d40) Scheduling asynchronous watch for socket.
2ms [I0] {5494} [TRACE] (connection - L:267) <10.70.6.191:11210> (SOCK=0x20c2d40) Got event handler for new connection
2ms [I0] {5494} [INFO] (connection - L:116) <10.70.6.191:11210> (SOCK=0x20c2d40) Connected
2ms [I0] {5494} [DEBUG] (connection - L:123) <10.70.6.191:11210> (SOCK=0x20c2d40) Successfuly set TCP_NODELAY
2ms [I0] {5494} [DEBUG] (lcbio_mgr - L:271) <10.70.6.191:11210> (HE=0x20cca40) Received result for I=0x20c2c10,C=0x20c2d40; E=0x0
2ms [I0] {5494} [DEBUG] (lcbio_mgr - L:223) <10.70.6.191:11210> (HE=0x20cca40) Assigning R=0x20deda0 SOCKET=0x20c2d40
2ms [I0] {5494} [TRACE] (server - L:516) <10.70.6.191:11210> (SRV=0x20d41d0) Session not yet negotiated. Negotiating
2ms [I0] {5494} [DEBUG] (ioctx - L:101) <10.70.6.191:11210> (CTX=0x20cd8d0,unknown) Pairing with SOCK=0x20c2d40
3ms [I0] {5494} [DEBUG] (ioctx - L:151) <10.70.6.191:11210> (CTX=0x20cd8d0,sasl) Destroying. PND=0,ENT=1,SORC=1
3ms [I0] {5494} [DEBUG] (ioctx - L:101) <10.70.6.191:11210> (CTX=0x20cd600,unknown) Pairing with SOCK=0x20c2d40
3ms [I0] {5494} [DEBUG] (server - L:537) <10.70.6.191:11210> (SRV=0x20d41d0,IX=1) Setting initial timeout=2764470ms
4ms [I0] {5494} [WARN] (endure - L:572) Polling durability timed out!
(1)durability_callback 000000000000000000000000000001
Response was not successful for durability: Client-Side timeout exceeded for operation. Inspect network conditions or increase the timeout

Since version 2.4.9, the operation_timeout is intended to mean “seconds”. In your case, you are specifying a very large number which is likely resulting in an integer overflow (internally these still get converted to microseconds). I see you are setting the timeout to 1000 seconds. You can either do operation_timeout=1000.0 (compatible with versions prior to 2.4.9) or you can simply use operation_timeout=1000.

This was a design decision since many users assumed seconds.

I changed operation timeout as also the durability options timeout to 1000 but the result was again “client-side timeout…”.
It didn’t seem to make any difference unless i use a very high number as 100000000

Your memset looks funny as well, and likely ends up having a buffer overrun (at least here with valgrind). This is because none of the bytes in the value buffer are actually NULL:

send 000000000000000000000000000001 key
==79169== Conditional jump or move depends on uninitialised value(s)
==79169==    at 0x10000AD32: strlen (in /usr/local/Cellar/valgrind/HEAD/lib/valgrind/vgpreload_memcheck-amd64-darwin.so)
==79169==    by 0x1000015C0: bootstrap_callback (libevent-sample.c:38)
==79169==    by 0x1000653FD: config_callback (in /usr/local/Cellar/libcouchbase/2.5.0/lib/libcouchbase.2.dylib)

Your code seems to function perfectly when fixing the memory access errors (also, don’t call event_base_free() before lcb_destroy().). In my machine, this worked even with the memory access errors, but undefined behavior means uniform-behavior is not guaranteed :slight_smile:

Your example shows only a single item being stored/operated on, but it’s simple enough to extrapolate for many such items.