NifTK / NiftyNetModelZoo

[unmaintained] This repository hosts NiftyNet networks pre-trained for specific tasks
http://niftynet.io
Apache License 2.0
50 stars 26 forks source link

Reorganise entries and use Git LFS for zipped files #7

Closed fepegar closed 5 years ago

fepegar commented 5 years ago

Status

READY

Description

Reorganise entries and use Git Large File Storage (LFS) for gunzipped files such as data, code or model weights. Using LFS, uploaded files can be larger than 100 MB.

Issues

Closes #5.

Script used for the modifications:

#!/usr/bin/env python3

import os
import urllib.request
from pathlib import Path
from subprocess import call
from configparser import ConfigParser

MAIN = 'main.ini'
ZOO_URL = 'https://github.com/NifTK/NiftyNetModelZoo/blob/master'

repo_dir = Path('~/git/NiftyNetModelZoo').expanduser()
os.chdir(repo_dir)

def git_mv(src, dst):
    """
    Move files using git
    """
    cmdline = f'git mv {src} {dst}'
    cmdline = cmdline.split()
    call(cmdline)

def download(section, out_dir, force=False):
    if 'url' not in section:
        return

    # https://blogs.dropbox.com/developers/2013/08/programmatically-download-content-from-share-links/
    url = section['url'].replace('?dl=1', '?raw=1')
    out_path = out_dir / f'{section.name}.tar.gz'
    if force or not out_path.is_file():
        print(f'Downloading from {url}\n'
              f'to {out_path}')
        urllib.request.urlretrieve(url, out_path)
        print()
    return out_path

def make_dirs():
    """
    Move entries into individual directories
    """
    # testing entry is an exception
    testing_dir = repo_dir / 'testing'
    testing_main = repo_dir / 'testing.ini'
    testing_data = repo_dir / 'testing_data.ini'
    testing_dir.mkdir(exist_ok=True)
    git_mv(testing_main, testing_dir / 'main.ini')
    git_mv(testing_data, testing_dir / 'data.ini')

    # Other files
    filepaths = sorted(list(Path().glob('*.*')))
    for path in filepaths:
        if path.name.startswith('.'): continue
        if 'README' in path.name: continue
        split = path.name.split('_model_zoo')

        if len(split) == 1:
            entry_name = path.stem
        elif len(split) == 2:
            entry_name, suffix = split
        entry_dir = Path(entry_name)
        entry_dir.mkdir(exist_ok=True)

        if len(split) == 1:
            main_name = Path(entry_name).with_suffix('.ini')
            src = main_name
            dst = entry_dir / 'main.ini'
            if src.is_file() and not dst.is_file():
                git_mv(src, dst)

        elif len(split) == 2:
            if suffix == '.md':
                dst = entry_dir / 'README.md'
            elif suffix == '.ini':
                dst = entry_dir / 'main.ini'
            else:
                dst = entry_dir / suffix[1:]
            git_mv(path, dst)

def update_readme():
    readme_path = repo_dir / 'README.md'
    text = readme_path.read_text()
    updated = text.replace('.md', '')
    readme_path.write_text(updated)

def update_urls():
    """
    Update URLs and download zipped files
    """
    entries = []
    for path in Path().glob('*'):
        if path.name.startswith('.'): continue
        if not path.is_dir(): continue
        entries.append(path)

    for entry in entries:
        config = ConfigParser()
        entry_dir = Path(entry)
        main_path = entry_dir / 'main.ini'
        config.read(main_path)
        for section_name in config.sections():
            section = config[section_name]
            out_path = download(section, entry_dir)
            if out_path is not None:
                new_url = f'{ZOO_URL}/{entry}/{out_path.name}'
                config[section.name]['url'] = str(new_url)
        with open(main_path, 'w') as configfile:
            config.write(configfile)

def main():
    make_dirs()
    update_readme()
    update_urls()

if __name__ == '__main__':
    main()
wyli commented 5 years ago

I've checked that the relevant changes here works with the latest net_download command from NifTK/NiftyNet:model-zoo-lfs, let's keep the files on this branch (till niftynet 1.0), so that the previous version of NiftyNet would have access to the previous model zoo format.

wyli commented 5 years ago

yes simply replacing all 5-reorganising-with-lfs with master