Closed rinor closed 1 year ago
Can someone help and point me towards the logical conditions that may cause this failure so I try to code and reproduce around that area?
I think this failure can occur when many fsync() calls are made in a short time for a file that is being written. For example, something like the following should reproduce the issue:
int fd = open("my_file");
while (1) {
write(fd, buffer, 4KB);
fsync(fd);
}
I'm trying with this code, but have yet to make it fail. I'll keep trying different buffer sizes
#include <errno.h>
#include <fcntl.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <unistd.h>
#include <sys/random.h>
#include <sys/stat.h>
#include <sys/types.h>
extern int errno;
int main(int argc, char *argv[]) {
char *fn = "test_fsync.data";
size_t num_bytes = 4096;
unsigned char *buffer = malloc(num_bytes);
getrandom(buffer, num_bytes, 0);
int fd = open(fn, O_WRONLY|O_CREAT|O_APPEND, S_IWUSR|S_IRUSR);
if (fd < 0) {
printf("open(): ret(%d) errno(%d) errstr(%s)\n", fd, errno, strerror(errno));
exit(1);
}
int write_ret = 0;
int fsync_ret = 0;
while(1) {
write_ret = 0;
fsync_ret = 0;
write_ret = write(fd, buffer, num_bytes);
if (write_ret < 0)
printf("write(%s, %d): ret(%d) errno(%d) errstr(%s)\n", fn, num_bytes, write_ret, errno, strerror(errno));
fsync_ret = fsync(fd);
if (fsync_ret < 0)
printf("fsync(%s): ret(%d) errno(%d) errstr(%s)\n", fn, fsync_ret, errno, strerror(errno));
if (write_ret < 0 || fsync_ret < 0) {
break;
}
}
close(fd);
free(buffer);
return 0;
}
Thinking more about it, looks like in order to trigger the failure there should be multiple fsync() calls in parallel, so I think if the above while(1)
loop is executed by multiple (more than 8) threads running in parallel and operating on the same file, the assertion failure should eventually occur.
Yep, this does it
#include <errno.h>
#include <fcntl.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <unistd.h>
#include <sys/random.h>
#include <sys/stat.h>
#include <sys/types.h>
#include <pthread.h>
#define NUM_THREADS 16
extern int errno;
struct thread_data{
int fd;
unsigned char *buffer;
size_t num_bytes;
};
static pthread_mutex_t lock = PTHREAD_MUTEX_INITIALIZER;
void *f_write(void* threadarg) {
int write_ret = 0;
int fsync_ret = 0;
struct thread_data *t_data = (struct thread_data *) threadarg;
while(1) {
pthread_mutex_lock(&lock);
write_ret = write(t_data->fd, t_data->buffer, t_data->num_bytes);
pthread_mutex_unlock(&lock); // wrong place
fsync_ret = fsync(t_data->fd);
// pthread_mutex_unlock(&lock); // correct place
if (write_ret < 0 || fsync_ret < 0)
break;
}
pthread_exit(NULL);
}
int main(int argc, char *argv[]) {
char *fn = "test_fsync.data";
size_t num_bytes = 4096;
int fd = open(fn, O_WRONLY|O_CREAT|O_APPEND, S_IWUSR|S_IRUSR);
if (fd < 0) {
printf("open(%s): ret(%d) errno(%d) errstr(%s)\n", fn, fd, errno, strerror(errno));
exit(1);
}
unsigned char *buffer = malloc(num_bytes);
getrandom(buffer, num_bytes, 0);
struct thread_data td = {
.fd = fd,
.buffer = buffer,
.num_bytes = num_bytes,
};
pthread_t threads[NUM_THREADS];
int rc=0, t=0;
for (t=0; t<NUM_THREADS; t++) {
rc = pthread_create(&threads[t], NULL, f_write, (void *) &td);
if (rc) {
printf("pthread_create(%d): ret(%d) errno(%d) errstr(%s)\n", t, rc, rc, strerror(rc));
exit(-1);
}
}
for (t=0; t<NUM_THREADS; t++) {
pthread_join(threads[t], NULL);
}
pthread_mutex_destroy(&lock);
close(fd);
free(buffer);
exit(0);
}
Additionally, when using smaller number of threads, so it doesn't crash the kernel,
#define NUM_THREADS 2
(any value should do it as long as it is > 1
, seems unrelated to smp but with --smp=1
might reproduce faster),this process seems to deadlock on nanos (not 100% of the times).
The behavior is consistent with all nanos versions, (ci stable release build, ci nightly build, local from source build).
Have yet to debug properly but it looks like it happens around mutex unlock ops
(unlikely) or fsync
(most probably)
Is there anything else I can help with and/or provide to troubleshoot this?
Is there anything else I can help with and/or provide to troubleshoot this?
No, I just haven't had the time to look into this yet, will try to do so soon. Thanks
No, I just haven't had the time to look into this yet, will try to do so soon. Thanks
There is no rush, thanks. I checked again today and landed at https://github.com/nanovms/nanos/pull/1752 where this magic number 8 was added https://github.com/nanovms/nanos/blob/ad2c7a3abc5f7fab3ae82a7677e9fa01d21f5856/src/kernel/pagecache.c#L1801 that explains why https://github.com/nanovms/nanos/blob/ad2c7a3abc5f7fab3ae82a7677e9fa01d21f5856/src/kernel/pagecache.c#L1088 fails because it gets back a queue full from https://github.com/nanovms/nanos/blob/ad2c7a3abc5f7fab3ae82a7677e9fa01d21f5856/src/runtime/queue.h#L43-L45 I guess expected on busy/slow storage devices. I guess one could increase that value to match the deployment service/environment needs, but it doesn't feel right (although it alleviates the queue full issues).
The other thing that "bothers" and still an issue is the deadlock. I'll keep trying to figure it out.
tested and works as expected. Thank you
Note: have yet to figure out how to reproduce it through a simple service, hence no extra info available atm.