sysprog21 / simplefs

A simple native file system for Linux kernel
Other
362 stars 91 forks source link

Reimplement the read and the write operations #57

Closed jserv closed 2 months ago

jserv commented 2 months ago

Currently, simplefs does not implement read and write operations and therefore uses the default implementation provided by the Linux kernel. Unfortunately, this implementation assumes that the data blocks are filled contiguously, which leads to potential performance and usability problems.

The simplefs does not store the size of data within each block, making it challenging to manage partially filled blocks. To address this issue, we need to modify the data structure used for storing data blocks. At present, simplefs utilizes an index_block to store up to 1024 block numbers, each encoded using 4 bytes. An ideal solution could be to limit the partition size to 4 GiB and repurpose the 12 bits of the block number to store the effective size of data within each block.

To simplify this step, we would implement read and write operations that behave like the default implementation, but that will not use the page cache. To do this, we will need to use the sb_bread and brelse functions provided by the Linux kernel to read data directly from the disk. We will also need to use the mark_buffer_dirty and sync_dirty_buffer functions to write data to the disk.

The possible implementation:

static ssize_t simplefs_read(struct file *file,
                             char __user *data,
                             size_t len,
                             loff_t *pos)
{
    unsigned long to_be_copied = 0;

    struct super_block *sb = file->f_inode->i_sb;
    sector_t iblock = *pos / SIMPLEFS_BLOCK_SIZE;
    struct simplefs_file_index_block *index;
    struct buffer_head *bh_index;
    struct simplefs_inode_info *ci = SIMPLEFS_INODE(file->f_inode);

    /* If block number exceeds filesize, fail */
    if (iblock >= SIMPLEFS_BLOCK_SIZE >> 2)
        return -EFBIG;

    /* Read index block from disk */
    bh_index = sb_bread(sb, ci->index_block);
    if (!bh_index)
        return -EIO;
    index = (struct simplefs_file_index_block *) bh_index->b_data;

    /* Get the block number for the current iblock */
    int bno = index->blocks[iblock];

    if (bno == 0) {
        brelse(bh_index);
        return -EIO;
    }

    struct buffer_head *bh = sb_bread(sb, bno);
    if (!bh) {
        brelse(bh_index);
        return -EIO;
    }
    struct simplefs_block_info *bi = (struct simplefs_block_info *) bh->b_data;

    /* get data from the buffer from the current position */
    size_t remain = strlen(bi->data) < SIMPLEFS_BLOCK_SIZE
                        ? strlen(bi->data)
                        : SIMPLEFS_BLOCK_SIZE;
    to_be_copied = len < remain ? len : remain;
    if (copy_to_user(data, bi->data + *pos % SIMPLEFS_BLOCK_SIZE,
                     to_be_copied)) {
        brelse(bh);
        brelse(bh_index);
        return -EFAULT;
    }
    brelse(bh);
    brelse(bh_index);
    if (to_be_copied == remain) {
        *pos += SIMPLEFS_BLOCK_SIZE - *pos % SIMPLEFS_BLOCK_SIZE;
    } else {
        *pos += to_be_copied;
    }
    file->f_pos += to_be_copied;

    return to_be_copied;
}
/*
 * The function returns an int that represents different cases
 * 0: it does not need a new block
 * 1: this block is not allocated
 * 2: there is not enough space to write len bits of data
 */
static inline int need_new_block(struct simplefs_file_index_block *index,
                          sector_t iblock,
                          size_t len,
                          size_t remain_length)
{
    if (index->blocks[iblock] == 0) /* block is empty */
        return 1;
    if (remain_length < len) /* need new block */
        return 2;
    return 0;
}

static ssize_t simplefs_write(struct file *file,
                              const char __user *data,
                              size_t len,
                              loff_t *pos)
{
    struct inode *inode = file->f_inode;
    struct super_block *sb = inode->i_sb;
    struct simplefs_inode_info *ci = SIMPLEFS_INODE(inode);
    struct simplefs_file_index_block *index;
    struct buffer_head *bh_index;
    sector_t iblock = *pos / SIMPLEFS_BLOCK_SIZE;

    bh_index = sb_bread(sb, ci->index_block);
    if (!bh_index)
        return -EIO;
    index = (struct simplefs_file_index_block *) bh_index->b_data;

    int bno;

    struct buffer_head *bh_data = sb_bread(sb, index->blocks[iblock]);
    struct simplefs_block_info *bi =
        (struct simplefs_block_info *) bh_data->b_data;

    size_t length = strlen(bi->data) < SIMPLEFS_BLOCK_SIZE
                        ? strlen(bi->data)
                        : SIMPLEFS_BLOCK_SIZE;
    size_t remain_length = SIMPLEFS_BLOCK_SIZE - length;
    int ret = need_new_block(index, iblock, len, remain_length);
    switch (ret) {
    case 0:
        bno = index->blocks[iblock];
        break;
    case 1:
        bno = get_free_block(SIMPLEFS_SB(sb));
        if (!bno)
            return -ENOSPC;
        index->blocks[iblock] = bno;
        break;
    case 2:
        bno = get_free_block(SIMPLEFS_SB(sb));
        if (!bno)
            return -ENOSPC;
        if (*pos % SIMPLEFS_BLOCK_SIZE == 0) {
            int prev_block = index->blocks[iblock];
            index->blocks[iblock] = bno;
            for (int i = iblock + 1; i < SIMPLEFS_BLOCK_SIZE / sizeof(uint32_t);
                 i++) {
                int should_break = 0;
                if (index->blocks[i] == 0)
                    should_break = 1;
                int temp = index->blocks[i];
                index->blocks[i] = prev_block;
                prev_block = temp;
                if (should_break)
                    break;
            }
        } else {
            int next_block = index->blocks[iblock + 1];
            index->blocks[iblock + 1] = bno;
            char *buffer_content = kmalloc(len - remain_length, GFP_KERNEL);
            if (!buffer_content)
                return -ENOMEM;
            int start = *pos % SIMPLEFS_BLOCK_SIZE;
            memcpy(buffer_content, &bi->data[length - len + remain_length],
                   len - remain_length);
            char *tmp = kmalloc(length - start, GFP_KERNEL);
            if (!tmp) {
                kfree(buffer_content);
                return -ENOMEM;
            }
            memcpy(tmp, &bi->data[start], length - start);
            if (copy_from_user(bi->data + start, data, len)) {
                kfree(buffer_content);
                return -EFAULT;
            }
            memcpy(bi->data + start + len, tmp, length - start);
            kfree(tmp);
            for (int i = iblock + 2; i < SIMPLEFS_BLOCK_SIZE / sizeof(uint32_t);
                 i++) {
                int should_break = 0;
                if (index->blocks[i] == 0)
                    should_break = 1;
                int tmp = index->blocks[i];
                index->blocks[i] = next_block;
                next_block = tmp;
                if (should_break)
                    break;
            }
            struct buffer_head *bh = sb_bread(sb, bno);
            char *buffer = bh->b_data;
            memcpy(buffer, buffer_content, len - remain_length);
            kfree(buffer_content);
            *pos += len;
            file->f_pos = *pos;
            data += len;
            brelse(bh);
            brelse(bh_index);
            inode->i_size += len;
            inode->i_blocks = inode->i_size / SIMPLEFS_BLOCK_SIZE + 2;
            inode->i_mtime = inode->i_ctime = current_time(inode);
            mark_inode_dirty(inode);
            return len;
        }
        break;
    }

    struct buffer_head *bh = sb_bread(sb, bno);
    if (!bh) {
        brelse(bh_index);
        return -EIO;
    }
    char *buffer = bh->b_data;
    int start = *pos % SIMPLEFS_BLOCK_SIZE;
    if (ret == 0 && start > length) {
        start = length;
        if (copy_from_user(buffer + start, data, len)) {
            brelse(bh);
            brelse(bh_index);
            return -EFAULT;
        }
        mark_buffer_dirty(bh);
        sync_dirty_buffer(bh);
        *pos = length + len;
        file->f_pos = *pos;
        data += len;

        brelse(bh);
        brelse(bh_index);

        inode->i_size += len;
        inode->i_mtime = inode->i_ctime = current_time(inode);

        mark_inode_dirty(inode);
        return len;
    } else if (ret == 0 && start < length) {
        char *tmp = kmalloc(length - start, GFP_KERNEL);
        memcpy(tmp, &bi->data[start], length - start);
        if (copy_from_user(bi->data + start, data, len)) {
            kfree(tmp);
            brelse(bh);
            brelse(bh_index);
            return -EFAULT;
        }
        memcpy(bi->data + start + len, tmp, length - start);
        kfree(tmp);
        mark_buffer_dirty(bh);
        sync_dirty_buffer(bh);
        *pos += len;
        file->f_pos = *pos;
        data += len;

        brelse(bh);
        brelse(bh_index);

        inode->i_size += len;
        inode->i_mtime = inode->i_ctime = current_time(inode);
        mark_inode_dirty(inode);
        return len;
    }
    uint32_t remain = len < SIMPLEFS_BLOCK_SIZE - *pos % SIMPLEFS_BLOCK_SIZE
                          ? len
                          : SIMPLEFS_BLOCK_SIZE - *pos % SIMPLEFS_BLOCK_SIZE;
    if (copy_from_user(buffer + *pos % SIMPLEFS_BLOCK_SIZE, data, remain)) {
        brelse(bh);
        brelse(bh_index);
        return -EFAULT;
    }

    mark_buffer_dirty(bh);
    sync_dirty_buffer(bh);

    *pos += remain;
    data += remain;
    file->f_pos = *pos;
    len -= remain;
    brelse(bh);
    brelse(bh_index);

    inode->i_size += remain;
    inode->i_blocks = inode->i_size / SIMPLEFS_BLOCK_SIZE + 2;
    inode->i_mtime = inode->i_ctime = current_time(inode);
    mark_inode_dirty(inode);

    return len;
}
HotMercury commented 2 months ago

At present, simplefs utilizes an index_block to store up to 1024 block numbers, each encoded using 4 bytes.

I have some issues here. Currently, simplefs should be using an extent block to allocate blocks, with each extent being 8 blocks. Therefore, the maximum space should be SIMPLEFS_FILES_PER_BLOCK(341) * SIMPLEFS_MAX_BLOCKS_PER_EXTENT(8) rather than 1024.

Another question is, if we implement read and write operations ourselves without using the page cache, does that mean we won't enter simplefs_write_begin?

jserv commented 2 months ago

Another question is, if we implement read and write operations ourselves without using the page cache, does that mean we won't enter simplefs_write_begin?"

It is intentionally independent from page cache for read and write operations.