LekKit / RVVM

The RISC-V Virtual Machine
GNU General Public License v3.0
929 stars 65 forks source link

Asking for support for splited disk images #117

Open fish4terrisa-MSDSM opened 11 months ago

fish4terrisa-MSDSM commented 11 months ago

It's a bit difficult for users (include me) to use a large disk image (for example, 512G) in RVVM on a net-based filesystem(nfs, rclone FUSE, etc.) or a filesystem with limited file size(fat32). It causes trouble for users on vps with low disk space and embedded device(like the MP3 I'm currently porting RVVM to). Maybe we can support it with the ability to use all files listed in a config file as a whole drive. For example, File: archriscv.list

/data/arhcriscv.img.chunk.0
/data/archriscv.img.chunk.1
/data/archriscv.img.chunk.2
...

And the user can run RVVM with rvvm -image_list archriscv.list ... to run the machine just like using rvvm -image archriscv.img ...

Sorry for my poor English :D

LekKit commented 11 months ago

It's a bit difficult for users (include me) to use a large disk image (for example, 512G)

If you are possibly talking about sparse images (Those that aren't consuming host space for unused guest space), then try allocating an image with fallocate rather than dd or similar tools (Assuming the host FS supports sparse files). RVVM will also trim unused blocks from the image. This however doesn't solve anything on FAT32 so see possible solution below

like the MP3 I'm currently porting RVVM to

Sounds cool! Care to share some details? (I'm interested in porting efforts as RVVM codebase should be very portable outside the windowing implementations)

I made a quick blk_dev layer implementation of what you'd like perhaps, tested with 1M files, 16G image and an Arch linux install. Works & performs fairly well, hope I did all the math right. This lacks blk_trim/blk_sync features, as FAT32 doesn't have TRIM & is not power resilient anyways.

Note: this is for now not much customizable at runtime. It also hooks blockdev subsystem into handling special filename as a split one, but if we want to merge this upstream I'd have to come with a better idea.

Feel free to improve/come up with ideas)

src/blk_split.c

/*
blk_split.c - Split-file block images
Copyright (C) 2023  LekKit <github.com/LekKit>

This Source Code Form is subject to the terms of the Mozilla Public
License, v. 2.0. If a copy of the MPL was not distributed with this
file, You can obtain one at https://mozilla.org/MPL/2.0/.

Alternatively, the contents of this file may be used under the terms
of the GNU General Public License as published by the Free Software
Foundation, either version 3 of the License, or any later version.

You should have received a copy of the GNU General Public License
along with this program.  If not, see <https://www.gnu.org/licenses/>.
*/

#include "blk_io.h"
#include "vector.h"

// Let it be a swarm of 1MB files, customize as needed
#define CHUNK_SIZE (1 << 20)

typedef struct {
    vector_t(rvfile_t*) files;
} blk_split_ctx_t;

static void blk_split_close(void* dev)
{
    blk_split_ctx_t* split = dev;
    vector_foreach(split->files, i) {
        rvclose(vector_at(split->files, i));
    }
    vector_free(split->files);
    free(split);
}

static size_t blk_split_read(void* dev, void* dst, size_t count, uint64_t offset)
{
    blk_split_ctx_t* split = dev;
    size_t ret = 0;
    while (ret < count) {
        size_t chunk_pos = (offset + ret) % CHUNK_SIZE;
        size_t chunk = EVAL_MIN(count - ret, CHUNK_SIZE - chunk_pos);
        size_t n = (offset + ret) / CHUNK_SIZE;
        rvfile_t* file = vector_at(split->files, n);
        size_t tmp = rvread(file, (uint8_t*)dst + ret, chunk, chunk_pos);
        ret += tmp;
        if (tmp != chunk) break;
    }
    return ret;
}

static size_t blk_split_write(void* dev, const void* src, size_t count, uint64_t offset)
{
    blk_split_ctx_t* split = dev;
    size_t ret = 0;
    while (ret < count) {
        size_t chunk_pos = (offset + ret) % CHUNK_SIZE;
        size_t chunk = EVAL_MIN(count - ret, CHUNK_SIZE - chunk_pos);
        size_t n = (offset + ret) / CHUNK_SIZE;
        rvfile_t* file = vector_at(split->files, n);
        size_t tmp = rvwrite(file, (uint8_t*)src + ret, chunk, chunk_pos);
        ret += tmp;
        if (tmp != chunk) break;
    }
    return ret;
}

static blkdev_type_t blk_split_type = {
    .name = "split",
    .close = blk_split_close,
    .read  = blk_split_read,
    .write = blk_split_write,
};

bool blk_split_init(blkdev_t* dev, bool create)
{
    blk_split_ctx_t* split = safe_new_obj(blk_split_ctx_t);

    dev->type = &blk_split_type;
    dev->size = 16ULL << 30;
    dev->data = split;

    for (size_t i=0; i<(dev->size / CHUNK_SIZE); ++i) {
        char filename[256] = {0};
        size_t len = rvvm_strlcpy(filename, "blk_split_", sizeof(filename));
        int_to_str_dec(filename + len, sizeof(filename) - len, i);
        rvfile_t* file = rvopen(filename, RVFILE_RW | RVFILE_EXCL | (create ? RVFILE_CREAT : 0));
        rvtruncate(file, CHUNK_SIZE);

        if (!file) {
            rvvm_error("Failed to open %s!", filename);
            blk_split_close(split);
            return false;
        }

        vector_push_back(split->files, file);
    }
    return true;
}

Patch src/blk_io.c@500

bool blk_split_init(blkdev_t* dev, bool create);

blkdev_t* blk_open(const char* filename, uint8_t opts)
{
    uint8_t filemode = (opts & BLKDEV_RW) ? (RVFILE_RW | RVFILE_EXCL) : 0;
    blkdev_t* dev = safe_new_obj(blkdev_t);
    if (rvvm_strcmp(filename, "blk_split") && blk_split_init(dev, false)) {
        return dev;
    }
    rvfile_t* file = rvopen(filename, filemode);
    if (!file) return NULL;

    blk_init_raw(dev, file);

    return dev;
}
fish4terrisa-MSDSM commented 11 months ago

It's a bit difficult for users (include me) to use a large disk image (for example, 512G)

If you are possibly talking about sparse images (Those that aren't consuming host space for unused guest space), then try allocating an image with fallocate rather than dd or similar tools (Assuming the host FS supports sparse files). RVVM will also trim unused blocks from the image. This however doesn't solve anything on FAT32 so see possible solution below

NFS and some other net-based filesystem don't have full support for sparse files.And some of them(rclone FUSE) will try to cache the whole file in ram and disk, which will eat up all the memory and disk free space. Rclone chunker did't work well, as my poor account will be banned for uploading too much files at one time when creating a chunked file inside the chunker. And a fuse based chunker(chunkfs) also have huge performance loss even on ext4.

like the MP3 I'm currently porting RVVM to

Sounds cool! Care to share some details? (I'm interested in porting efforts as RVVM codebase should be very portable outside the windowing implementations)

Currently, I'm working on a Sony NX-ZW300 walkman. It runs glibc linux, with many other open source binarys. I'm trying to run RVVM with SDL-1.2 , sdl's fbcon driver and tslib. Although it uses qt5's eglfs to run gui, I haven't found ways to combine sdl or rvvm with it. With zhangboyang/llusbdac, I can enable the adbd on the device and get the tty.

I made a quick blk_dev layer implementation of what you'd like perhaps, tested with 1M files, 16G image and an Arch linux install. Works & performs fairly well, hope I did all the math right. This lacks blk_trim/blk_sync features, as FAT32 doesn't have TRIM & is not power resilient anyways.

Note: this is for now not much customizable at runtime. It also hooks blockdev subsystem into handling special filename as a split one, but if we want to merge this upstream I'd have to come with a better idea.

Feel free to improve/come up with ideas)

src/blk_split.c

/*
blk_split.c - Split-file block images
Copyright (C) 2023  LekKit <github.com/LekKit>

This Source Code Form is subject to the terms of the Mozilla Public
License, v. 2.0. If a copy of the MPL was not distributed with this
file, You can obtain one at https://mozilla.org/MPL/2.0/.

Alternatively, the contents of this file may be used under the terms
of the GNU General Public License as published by the Free Software
Foundation, either version 3 of the License, or any later version.

You should have received a copy of the GNU General Public License
along with this program.  If not, see <https://www.gnu.org/licenses/>.
*/

#include "blk_io.h"
#include "vector.h"

// Let it be a swarm of 1MB files, customize as needed
#define CHUNK_SIZE (1 << 20)

typedef struct {
    vector_t(rvfile_t*) files;
} blk_split_ctx_t;

static void blk_split_close(void* dev)
{
    blk_split_ctx_t* split = dev;
    vector_foreach(split->files, i) {
        rvclose(vector_at(split->files, i));
    }
    vector_free(split->files);
    free(split);
}

static size_t blk_split_read(void* dev, void* dst, size_t count, uint64_t offset)
{
    blk_split_ctx_t* split = dev;
    size_t ret = 0;
    while (ret < count) {
        size_t chunk_pos = (offset + ret) % CHUNK_SIZE;
        size_t chunk = EVAL_MIN(count - ret, CHUNK_SIZE - chunk_pos);
        size_t n = (offset + ret) / CHUNK_SIZE;
        rvfile_t* file = vector_at(split->files, n);
        size_t tmp = rvread(file, (uint8_t*)dst + ret, chunk, chunk_pos);
        ret += tmp;
        if (tmp != chunk) break;
    }
    return ret;
}

static size_t blk_split_write(void* dev, const void* src, size_t count, uint64_t offset)
{
    blk_split_ctx_t* split = dev;
    size_t ret = 0;
    while (ret < count) {
        size_t chunk_pos = (offset + ret) % CHUNK_SIZE;
        size_t chunk = EVAL_MIN(count - ret, CHUNK_SIZE - chunk_pos);
        size_t n = (offset + ret) / CHUNK_SIZE;
        rvfile_t* file = vector_at(split->files, n);
        size_t tmp = rvwrite(file, (uint8_t*)src + ret, chunk, chunk_pos);
        ret += tmp;
        if (tmp != chunk) break;
    }
    return ret;
}

static blkdev_type_t blk_split_type = {
    .name = "split",
    .close = blk_split_close,
    .read  = blk_split_read,
    .write = blk_split_write,
};

bool blk_split_init(blkdev_t* dev, bool create)
{
    blk_split_ctx_t* split = safe_new_obj(blk_split_ctx_t);

    dev->type = &blk_split_type;
    dev->size = 16ULL << 30;
    dev->data = split;

    for (size_t i=0; i<(dev->size / CHUNK_SIZE); ++i) {
        char filename[256] = {0};
        size_t len = rvvm_strlcpy(filename, "blk_split_", sizeof(filename));
        int_to_str_dec(filename + len, sizeof(filename) - len, i);
        rvfile_t* file = rvopen(filename, RVFILE_RW | RVFILE_EXCL | (create ? RVFILE_CREAT : 0));
        rvtruncate(file, CHUNK_SIZE);

        if (!file) {
            rvvm_error("Failed to open %s!", filename);
            blk_split_close(split);
            return false;
        }

        vector_push_back(split->files, file);
    }
    return true;
}

Patch src/blk_io.c@500

bool blk_split_init(blkdev_t* dev, bool create);

blkdev_t* blk_open(const char* filename, uint8_t opts)
{
    uint8_t filemode = (opts & BLKDEV_RW) ? (RVFILE_RW | RVFILE_EXCL) : 0;
    blkdev_t* dev = safe_new_obj(blkdev_t);
    if (rvvm_strcmp(filename, "blk_split") && blk_split_init(dev, false)) {
        return dev;
    }
    rvfile_t* file = rvopen(filename, filemode);
    if (!file) return NULL;

    blk_init_raw(dev, file);

    return dev;
}

For these codes, i will try to test it later. Maybe we can use config files with the end of .rvdisksplit to work as the index of the splited files, and let the user choose which files are parts of the disk and the chunk_size.