scdl-org / scdl

Soundcloud Music Downloader
GNU General Public License v2.0
3.29k stars 331 forks source link

sanitize_filename fix #481

Closed adithayyil closed 3 months ago

adithayyil commented 3 months ago

Sanitizes a string for use as a folder or filename while preserving the extension.

Solves the following cases but importantly handles hidden files/folders (dotfiles) and a folder sanitization issue (#479)

class TestSanitizeStr(unittest.TestCase):
    def test_sanitize_str(self):
        self.assertEqual(sanitize_str('...', '�', '.mp3'), '���')
        self.assertEqual(sanitize_str('..bruh..', '�', '.mp3'), 'bruh..')
        self.assertEqual(sanitize_str('.99999223hd"""', '�', '.mp3'), '99999223hd���')
        self.assertEqual(sanitize_str('....', '�', '.flac'), '����')
        self.assertEqual(sanitize_str('file.name', '�', '.txt'), 'file.name')
        self.assertEqual(sanitize_str('   leading_space', '�', '.pdf'), 'leading_space')
        self.assertEqual(sanitize_str('trailing_space   ', '�', '.docx'), 'trailing_space   ')
        self.assertEqual(sanitize_str('mix"ed...c/hars!!$$$', '�', '.csv'), 'mix�ed...c�hars!!$$$')
        self.assertEqual(sanitize_str('..hiddenfile', '�', '.hidden'), 'hiddenfile')
        self.assertEqual(sanitize_str('..', '�', '.mp3'), '��')
        self.assertEqual(sanitize_str('a..b..c', '�', '.txt'), 'a..b..c')
        self.assertEqual(sanitize_str('a!@#b$%^c&*()d', '�', '.txt'), 'a!@#b$%^c&�()d')
        self.assertEqual(sanitize_str('multiple   spaces', '�', '.txt'), 'multiple   spaces')
        self.assertEqual(sanitize_str('already_sanitized', '�', '.txt'), 'already_sanitized')

    def test_sanitize_str_no_ext(self):
        self.assertEqual(sanitize_str('...', '�'), '���')
        self.assertEqual(sanitize_str('..bruh..', '�'), 'bruh')  # original behavior except for stripping leading dots
        self.assertEqual(sanitize_str('.99999223hd"""', '�'), '99999223hd���')
        self.assertEqual(sanitize_str('....', '�'), '����')
        self.assertEqual(sanitize_str('filename', '�'), 'filename')
        self.assertEqual(sanitize_str('    leading_space', '�'), 'leading_space')
        self.assertEqual(sanitize_str('trailing_space    ', '�'), 'trailing_space')
        self.assertEqual(sanitize_str('  mix"ed...c/hars!!$$$', '�'), 'mix�ed...c�hars!!$$$')
        self.assertEqual(sanitize_str('....hiddenfile', '�'), 'hiddenfile')
        self.assertEqual(sanitize_str('', '�'), '') 
        self.assertEqual(sanitize_str('..', '�'), '��')
        self.assertEqual(sanitize_str('a..b..c', '�'), 'a..b..c')
        self.assertEqual(sanitize_str('a!@#b$%^c&*()d', '�'), 'a!@#b$%^c&�()d')
        self.assertEqual(sanitize_str('multiple   spaces', '�'), 'multiple   spaces')
        self.assertEqual(sanitize_str('already_sanitized', '�'), 'already_sanitized')
        self.assertEqual(sanitize_str('.hiddenfile', '�'), 'hiddenfile') 
Ran 2 tests in 0.001s

OK
7x11x13 commented 3 months ago

I made some changes so it tries to preserve dots instead of replacing them as much as possible (e.g. "..." turns into "_..._")