HarryWei / cloudxy

Automatically exported from code.google.com/p/cloudxy
6 stars 3 forks source link

[BUG]hlfs_open error when first start #13

Closed GoogleCodeExporter closed 9 years ago

GoogleCodeExporter commented 9 years ago
bug 概述
========
当我们第一次启动hlfs时候,我们会 
find_lastest_alive_snapshot(...), 但是这时候
我们还没有alive_snapshot.txt文件,所以第一次启动时find_lastest_a
live_snapshot会
报错。

测试用例
========
#include <glib.h>
#include <stdlib.h>
#include <stdint.h>
#include <string.h>
#include "api/hlfs.h"
#include "hlfs_log.h"
#include "storage.h"
#include "storage_helper.h"

#define REQ_SIZE 4096
#define TOTAL_SIZE 40960

typedef struct {
    struct hlfs_ctrl *ctrl;
    char *uri;
} Fixture;

static void
hlfs_take_snapshot_setup(Fixture *fixture, const void *data) {
    const char *test_dir = (const char *)data;
    g_print("test env dir is %s\n", test_dir);
    char *fs_dir = g_build_filename(test_dir, "testfs", NULL);
//    g_assert(g_mkdir(fs_dir, 0700) == 0);
    char *uri = g_malloc0(128);
    g_assert(uri != NULL);
    snprintf(uri, 128, "%s%s", "local://", fs_dir);
//    char *uri = g_build_path(tmp, fs_dir, NULL);
    g_print("uri is %s\n", uri);
    pid_t status;
    const char cmd[256];
    memset((char *) cmd, 0, 256);
    sprintf((char *) cmd, "%s %s %s %s %d %s %d %s %d", "../mkfs.hlfs",
                                "-u", uri,
                                "-b", 8192,
                                "-s", 67108864,
                                "-m", 1024);
    g_message("cmd is [%s]", cmd);
    status = system(cmd);
#if 0
    GKeyFile *sb_keyfile = g_key_file_new();
    g_key_file_set_string(sb_keyfile, "METADATA", "uri", uri);
    g_key_file_set_integer(sb_keyfile, "METADATA", "block_size", 8196);
    g_key_file_set_integer(sb_keyfile, "METADATA", "segment_size", 67108864);
    g_key_file_set_integer(sb_keyfile, "METADATA", "max_fs_size", 671088640);
    gchar *content = g_key_file_to_data(sb_keyfile, NULL, NULL);
    char *sb_file_path = g_build_filename(fs_dir, "superblock", NULL);
    g_print("sb file path is %s\n", sb_file_path);
    GError *error = NULL;
    if (TRUE != g_file_set_contents(sb_file_path, content, strlen(content) + 1, &error)) {
        g_print("error msg is %s", error->message);
        error = NULL;
    }
#endif
    fixture->uri = uri;
    g_print("fixture->uri is %s\n", fixture->uri);
    fixture->ctrl = init_hlfs(fixture->uri);
    g_assert(fixture->ctrl != NULL);
    int ret = 0;
    ret = hlfs_open(fixture->ctrl, 1);
    g_message("ret is %d", ret);
    g_assert(ret == 0);
//    g_key_file_free(sb_keyfile);
//    g_free(sb_file_path);
    g_free(fs_dir);
    return ;
}

static void
do_snapshot(Fixture *fixture, int i) {
    char buffer[128];
    memset(buffer, 0, 128);
    if (0 == i) {
        sprintf(buffer, "%s", "T0");
        g_message("%d buffer is [%s]", i, buffer);
        int ret = hlfs_take_snapshot(fixture->ctrl, buffer);
        g_assert(ret == 0);
    } else if (1 == i) {
        sprintf(buffer, "%s", "T1");
        g_message("%d buffer is [%s]", i, buffer);
        int ret = hlfs_take_snapshot(fixture->ctrl, buffer);
        g_assert(ret == 0);
    } else if (2 == i) {
        sprintf(buffer, "%s", "T2");
        g_message("%d buffer is [%s]", i, buffer);
        int ret = hlfs_take_snapshot(fixture->ctrl, buffer);
        g_assert(ret == 0);
    } else if (3 == i) {
        sprintf(buffer, "%s", "T3");
        g_message("%d buffer is [%s]", i, buffer);
        int ret = hlfs_take_snapshot(fixture->ctrl, buffer);
        g_assert(ret == 0);
    } else if (4 == i) {
        sprintf(buffer, "%s", "T4");
        g_message("%d buffer is [%s]", i, buffer);
        int ret = hlfs_take_snapshot(fixture->ctrl, buffer);
        g_assert(ret == 0);
    } else if (5 == i) {
        sprintf(buffer, "%s", "T5");
        g_message("%d buffer is [%s]", i, buffer);
        int ret = hlfs_take_snapshot(fixture->ctrl, buffer);
        g_assert(ret == 0);
    } else if (6 == i) {
        sprintf(buffer, "%s", "T6");
        g_message("%d buffer is [%s]", i, buffer);
        int ret = hlfs_take_snapshot(fixture->ctrl, buffer);
        g_assert(ret == 0);
    } else if (7 == i) {
        sprintf(buffer, "%s", "T7");
        g_message("%d buffer is [%s]", i, buffer);
        int ret = hlfs_take_snapshot(fixture->ctrl, buffer);
        g_assert(ret == 0);
    } else if (8 == i) {
        sprintf(buffer, "%s", "T5");
        g_message("%d buffer is [%s]", i, buffer);
        int ret = hlfs_take_snapshot(fixture->ctrl, buffer);
        g_message("ret is %d", ret);
        g_assert(ret == -2);
    } else if (9 == i) {
        sprintf(buffer, "%s", "T9");
        g_message("%d buffer is [%s]", i, buffer);
        int ret = hlfs_take_snapshot(fixture->ctrl, buffer);
        g_assert(ret == 0);
    }
    return ;
}

static void
do_snapshot1(Fixture *fixture, int i) {
    char buffer[128];
    memset(buffer, 0, 128);
    if (0 == i) {
        sprintf(buffer, "%s", "T10");
        g_message("%d buffer is [%s]", i, buffer);
        int ret = hlfs_take_snapshot(fixture->ctrl, buffer);
        g_assert(ret == 0);
    } else if (1 == i) {
        sprintf(buffer, "%s", "T11");
        g_message("%d buffer is [%s]", i, buffer);
        int ret = hlfs_take_snapshot(fixture->ctrl, buffer);
        g_assert(ret == 0);
    } else if (2 == i) {
        sprintf(buffer, "%s", "T12");
        g_message("%d buffer is [%s]", i, buffer);
        int ret = hlfs_take_snapshot(fixture->ctrl, buffer);
        g_assert(ret == 0);
    } else if (3 == i) {
        sprintf(buffer, "%s", "T13");
        g_message("%d buffer is [%s]", i, buffer);
        int ret = hlfs_take_snapshot(fixture->ctrl, buffer);
        g_assert(ret == 0);
    } else if (4 == i) {
        sprintf(buffer, "%s", "T14");
        g_message("%d buffer is [%s]", i, buffer);
        int ret = hlfs_take_snapshot(fixture->ctrl, buffer);
        g_assert(ret == 0);
    } else if (5 == i) {
        sprintf(buffer, "%s", "T15");
        g_message("%d buffer is [%s]", i, buffer);
        int ret = hlfs_take_snapshot(fixture->ctrl, buffer);
        g_assert(ret == 0);
    } else if (6 == i) {
        sprintf(buffer, "%s", "T16");
        g_message("%d buffer is [%s]", i, buffer);
        int ret = hlfs_take_snapshot(fixture->ctrl, buffer);
        g_assert(ret == 0);
    } else if (7 == i) {
        sprintf(buffer, "%s", "T17");
        g_message("%d buffer is [%s]", i, buffer);
        int ret = hlfs_take_snapshot(fixture->ctrl, buffer);
        g_assert(ret == 0);
    } else if (8 == i) {
        sprintf(buffer, "%s", "T15");
        g_message("%d buffer is [%s]", i, buffer);
        int ret = hlfs_take_snapshot(fixture->ctrl, buffer);
        g_message("ret is %d", ret);
        g_assert(ret == -2);
    } else if (9 == i) {
        sprintf(buffer, "%s", "T19");
        g_message("%d buffer is [%s]", i, buffer);
        int ret = hlfs_take_snapshot(fixture->ctrl, buffer);
        g_assert(ret == 0);
    }
    return ;
}

static void
test_hlfs_take_snapshot(Fixture *fixture, const void *data) {
    char content[REQ_SIZE];
    int offset = 0;
    int i = 0;

    memset(content, 0, REQ_SIZE);
    while (offset <= TOTAL_SIZE) {
        int ret1 = hlfs_write(fixture->ctrl, content, REQ_SIZE, offset);
        g_assert_cmpint(ret1, ==, REQ_SIZE);
        do_snapshot(fixture, i);
        offset += REQ_SIZE;
        i += 1;
    }
    hlfs_close(fixture->ctrl);
    hlfs_open(fixture->ctrl, 1);
    offset = 0;
    i = 0;

    memset(content, 0, REQ_SIZE);
    while (offset <= TOTAL_SIZE) {
        int ret1 = hlfs_write(fixture->ctrl, content, REQ_SIZE, offset);
        g_assert_cmpint(ret1, ==, REQ_SIZE);
        do_snapshot1(fixture, i);
        offset += REQ_SIZE;
        i += 1;
    }

    return ;
}

static void
hlfs_take_snapshot_tear_down(Fixture *fixture, const void *data) {
    const char *test_dir = (const char *) data;
    g_print("clean dir path: %s\n", test_dir);
    char *fs_dir = g_build_filename(test_dir, "testfs", NULL);
#if 0
    pid_t status;
    const char cmd[256];
    memset((char *) cmd, 0, 256);
    sprintf((char *) cmd, "%s %s %s", "rm", "-r", fs_dir);
    g_message("cmd is [%s]", cmd);
    status = system(cmd);

    struct back_storage *storage = init_storage_handler(fixture->uri);
    g_assert(storage != NULL);
    int nums = 0;
    bs_file_info_t *infos = storage->bs_file_list_dir(storage, ".", &nums);
    g_assert(infos != NULL);
    bs_file_info_t *info = infos;
    int i = 0;
    g_message("nums is %d", nums);
    for (i = 0; i < nums; i++) {
        g_message("info name is %s", info->name);
        char *tmp_file = g_build_filename(fs_dir, info->name, NULL);
        g_message("tmp file name is [%s]", tmp_file);
        g_assert(g_remove(tmp_file) == 0);
        g_free(tmp_file);
        info += 1;
    }
//    char *sb_file = g_build_filename(fs_dir, "superblock", NULL);
//    g_assert(g_remove(sb_file) == 0);
    g_assert(g_remove(fs_dir) == 0);
    g_free(fixture->uri);
    g_free(fs_dir);
//    g_free(sb_file);
    g_free(storage);
    g_free(infos);
#endif
    g_free(fs_dir);
    g_free(fixture->uri);
    hlfs_close(fixture->ctrl);
    deinit_hlfs(fixture->ctrl);
    return;
}

int main(int argc, char **argv) {
    if (log4c_init()) {
        g_message("log4c init error!");
    }
    g_test_init(&argc, &argv, NULL);
    g_test_add("/misc/hlfs_take_snapshot",
                Fixture,
                g_get_current_dir(),
                hlfs_take_snapshot_setup,
                test_hlfs_take_snapshot,
                hlfs_take_snapshot_tear_down);
    return g_test_run();
}

输出结果
========
jiawei@jiawei-laptop:~/workshop15/cloudxy1/branches/snapshot/src/snapshot/unitte
st/build$ ./test_hlfs_take_snapshot
/misc/hlfs_take_snapshot: test env dir is 
/home/jiawei/workshop15/cloudxy1/branches/snapshot/src/snapshot/unittest/build
uri is 
local:///home/jiawei/workshop15/cloudxy1/branches/snapshot/src/snapshot/unittest
/build/testfs
** Message: cmd is [../mkfs.hlfs -u 
local:///home/jiawei/workshop15/cloudxy1/branches/snapshot/src/snapshot/unittest
/build/testfs -b 8192 -s 67108864 -m 1024]
** Message: can not mkdir for our fs 
local:///home/jiawei/workshop15/cloudxy1/branches/snapshot/src/snapshot/unittest
/build/testfs
fixture->uri is 
local:///home/jiawei/workshop15/cloudxy1/branches/snapshot/src/snapshot/unittest
/build/testfs
** Message: ret is -1
**
ERROR:/home/jiawei/workshop15/cloudxy1/branches/snapshot/src/snapshot/unittest/t
est_hlfs_take_snapshot.c:67:hlfs_take_snapshot_setup: assertion failed: (ret == 
0)
已放弃

hlfs日志输出
==========
[snipped]
[110]full path 
/home/jiawei/workshop15/cloudxy1/branches/snapshot/src/snapshot/unittest/build/t
estfs/alive_snapshot.txt
20120130 06:36:06.591 ERROR    hlfslog- 
[/home/jiawei/workshop15/cloudxy1/branches/snapshot/src/utils/storage_helper.c][
file_get_contents][551]file is not exist
20120130 06:36:06.591 DEBUG    hlfslog- 
[/home/jiawei/workshop15/cloudxy1/branches/snapshot/src/backend/local_storage.c]
[local_file_close][95]local -- enter func local_file_close
20120130 06:36:06.591 DEBUG    hlfslog- 
[/home/jiawei/workshop15/cloudxy1/branches/snapshot/src/backend/local_storage.c]
[local_file_close][99]local -- leave func local_file_close
20120130 06:36:06.591 ERROR    hlfslog- 
[/home/jiawei/workshop15/cloudxy1/branches/snapshot/src/snapshot/snapshot_helper
.c][load_all_alive_snapshot][274]can not read snapshot content!

bug分析
=======
就是因为我们第一次启动就去判断alive_snapshot.txt是否存在,��
�果不存在就返回错误,
这时候肯定不存在了,所以hlfs日志就给出了如上错误。

修复方案
========
1, 提前先创建这个文件
2, 
如果发现这个文件不存在,那么就说明是第一次启动,我们��
�把alive_ss_name置为NULL
......
我目前采用方案2,当发现这个文件不存在时就把alive_ss_name置
为NULL, 那就说明目前还没
快照,如果第一个快照发生,那么他的up name 就是 NULL , 
然后再更新 alive_ss_name 了。
但是这种方案可能存在问题,因为这个文件不存在的情况可��
�不较多,不仅仅是第一次启动时。

Original issue reported on code.google.com by harryxi...@gmail.com on 30 Jan 2012 at 7:29

GoogleCodeExporter commented 9 years ago
该问题的修复方案应该这样更合理:
hlfs_open时,首先判断是否存在snapshot.txt文件,如果该文件存��
�则alive_shopshot.txt必然存在,而且有内容——如果不成立则报�
��退出。如果snapshot不存在,则不用检查alive 
shopshot.txt了,直接启动即可。

Original comment by kanghua...@gmail.com on 30 Jan 2012 at 10:36

GoogleCodeExporter commented 9 years ago
这个bug,我增加了一个判断snapshot.txt和alive_snapshot.txt是否存��
�的函数,再需要的地方进行
提前判断,并做相应的处理即可解决问题,具体修复如下:

1, snapshot_helper.c 中增加如下公共函数
==================================

int is_first_start(struct back_storage *storage,
                const char * snapshot_file,
                const char *alive_snapshot_file) {
    if (EHLFS_NOFILE == storage->bs_file_is_exist(storage,snapshot_file) &&
            EHLFS_NOFILE == storage->bs_file_is_exist(storage,alive_snapshot_file)) {
        HLOG_DEBUG("first start hlfs ...");
        return HLFS_FS;
    }
    if (EHLFS_NOFILE != storage->bs_file_is_exist(storage,snapshot_file) &&
            EHLFS_NOFILE == storage->bs_file_is_exist(storage,alive_snapshot_file)) {
        HLOG_ERROR("Can not find alive snapshot file!");
        return EHLFS_UNKNOWN;
    }
    if (EHLFS_NOFILE == storage->bs_file_is_exist(storage,snapshot_file) &&
            EHLFS_NOFILE != storage->bs_file_is_exist(storage,alive_snapshot_file)) {
        HLOG_ERROR("Can not find snapshot file!");
        return EHLFS_UNKNOWN;
    }
    return 0;
}

2, hlfs_open 增加如下代码:
========================
[...]
    if (HLFS_FS == (ret = is_first_start(ctrl->storage, SNAPSHOT_FILE, ALIVE_SNAPSHOT_FILE))) {
        ss = (struct snapshot *)g_malloc0(sizeof(struct snapshot));
        if (NULL == ss) {
            HLOG_ERROR("Allocate Error!");
            return EHLFS_MEM;
        }
        sprintf(ss->sname, "%s", FIRST_UP_NAME);
        goto out;
    } else if (EHLFS_UNKNOWN == ret) {
        HLOG_ERROR("is first start error");
        return EHLFS_UNKNOWN;
    }
[...]

测试正常。

Original comment by harryxi...@gmail.com on 31 Jan 2012 at 6:06