gtk-rs / gtk-rs-core

Rust bindings for GNOME libraries
https://gtk-rs.org/gtk-rs-core
MIT License
293 stars 115 forks source link

Regression in FilenameCollateKey change #1504

Closed ebassi closed 2 months ago

ebassi commented 2 months ago

Turns out that #1329 breaks code that does:

// f is a gio::File and set elsewhere
let basename = f.basename().unwrap();
let key = glib::FilenameCollationKey::from(basename.to_string_lossy());

which now generates a debugging assertion:

assertion failed: cstr.to_str().is_ok()

and a backtrace like this one taken from Amberol:

   0:     0x558f2b400cf5 - std::backtrace_rs::backtrace::libunwind::trace::h58eed11393533053
                               at /rustc/051478957371ee0084a7c0913941d2a8c4757bb9/library/std/src/../../backtrace/src/backtrace/libunwind.rs:116:5
   1:     0x558f2b400cf5 - std::backtrace_rs::backtrace::trace_unsynchronized::h6af9bae28ebb6388
                               at /rustc/051478957371ee0084a7c0913941d2a8c4757bb9/library/std/src/../../backtrace/src/backtrace/mod.rs:66:5
   2:     0x558f2b400cf5 - std::sys_common::backtrace::_print_fmt::hb6748916642a4fb2
                               at /rustc/051478957371ee0084a7c0913941d2a8c4757bb9/library/std/src/sys_common/backtrace.rs:68:5
   3:     0x558f2b400cf5 - <std::sys_common::backtrace::_print::DisplayBacktrace as core::fmt::Display>::fmt::h3692694645b1bb6a
                               at /rustc/051478957371ee0084a7c0913941d2a8c4757bb9/library/std/src/sys_common/backtrace.rs:44:22
   4:     0x558f2b4283eb - core::fmt::rt::Argument::fmt::h7aa93977ba74ae0f
                               at /rustc/051478957371ee0084a7c0913941d2a8c4757bb9/library/core/src/fmt/rt.rs:165:63
   5:     0x558f2b4283eb - core::fmt::write::h5131d80b4c69b88d
                               at /rustc/051478957371ee0084a7c0913941d2a8c4757bb9/library/core/src/fmt/mod.rs:1168:21
   6:     0x558f2b3fd97f - std::io::Write::write_fmt::h1fb327a7d8b0eb36
                               at /rustc/051478957371ee0084a7c0913941d2a8c4757bb9/library/std/src/io/mod.rs:1835:15
   7:     0x558f2b400ace - std::sys_common::backtrace::_print::he6ebb7b9d89f4456
                               at /rustc/051478957371ee0084a7c0913941d2a8c4757bb9/library/std/src/sys_common/backtrace.rs:47:5
   8:     0x558f2b400ace - std::sys_common::backtrace::print::h998d75b840f75a73
                               at /rustc/051478957371ee0084a7c0913941d2a8c4757bb9/library/std/src/sys_common/backtrace.rs:34:9
   9:     0x558f2b401e69 - std::panicking::default_hook::{{closure}}::h18ec7fe6a38b9da0
  10:     0x558f2b401c0a - std::panicking::default_hook::hfb3f22c2e4075a6a
                               at /rustc/051478957371ee0084a7c0913941d2a8c4757bb9/library/std/src/panicking.rs:298:9
  11:     0x558f2b402303 - std::panicking::rust_panic_with_hook::h51af00bcb4660c4e
                               at /rustc/051478957371ee0084a7c0913941d2a8c4757bb9/library/std/src/panicking.rs:795:13
  12:     0x558f2b4021ab - std::panicking::begin_panic_handler::{{closure}}::h39f76aa863fbe8ce
                               at /rustc/051478957371ee0084a7c0913941d2a8c4757bb9/library/std/src/panicking.rs:656:13
  13:     0x558f2b4011b9 - std::sys_common::backtrace::__rust_end_short_backtrace::h4d10fc2251b89840
                               at /rustc/051478957371ee0084a7c0913941d2a8c4757bb9/library/std/src/sys_common/backtrace.rs:171:18
  14:     0x558f2b401f17 - rust_begin_unwind
                               at /rustc/051478957371ee0084a7c0913941d2a8c4757bb9/library/std/src/panicking.rs:652:5
  15:     0x558f2a83cbe3 - core::panicking::panic_fmt::h319840fcbcd912ef
                               at /rustc/051478957371ee0084a7c0913941d2a8c4757bb9/library/core/src/panicking.rs:72:14
  16:     0x558f2a83cc8c - core::panicking::panic::h19def44c80243eda
                               at /rustc/051478957371ee0084a7c0913941d2a8c4757bb9/library/core/src/panicking.rs:146:5
  17:     0x558f2b38da93 - <glib::gstring::GString as glib::translate::FromGlibPtrFull<*mut u8>>::from_glib_full::hb2fc78a09742dc2c
                               at /var/home/ebassi/.var/app/org.gnome.Builder/cache/gnome-builder/projects/amberol/builds/io.bassi.Amberol.json-flatpak-org.gnome.Platform-master-x86_64-main/cargo-home/registry/src/index.crates.io-6f17d22bba15001f/glib-0.20.2/src/gstring.rs:2021:9
  18:     0x558f2b3a7ac6 - glib::translate::from_glib_full::h0b764bfbb493f071
                               at /var/home/ebassi/.var/app/org.gnome.Builder/cache/gnome-builder/projects/amberol/builds/io.bassi.Amberol.json-flatpak-org.gnome.Platform-master-x86_64-main/cargo-home/registry/src/index.crates.io-6f17d22bba15001f/glib-0.20.2/src/translate.rs:1635:5
  19:     0x558f2b38db07 - <glib::gstring::GString as glib::translate::FromGlibPtrFull<*mut i8>>::from_glib_full::h6bb551910ca65e16
                               at /var/home/ebassi/.var/app/org.gnome.Builder/cache/gnome-builder/projects/amberol/builds/io.bassi.Amberol.json-flatpak-org.gnome.Platform-master-x86_64-main/cargo-home/registry/src/index.crates.io-6f17d22bba15001f/glib-0.20.2/src/gstring.rs:2033:9
  20:     0x558f2b3a7b16 - glib::translate::from_glib_full::h2f271d1e47673c7c
                               at /var/home/ebassi/.var/app/org.gnome.Builder/cache/gnome-builder/projects/amberol/builds/io.bassi.Amberol.json-flatpak-org.gnome.Platform-master-x86_64-main/cargo-home/registry/src/index.crates.io-6f17d22bba15001f/glib-0.20.2/src/translate.rs:1635:5
  21:     0x558f2aa61da1 - <glib::unicollate::FilenameCollationKey as core::convert::From<T>>::from::h9745ab62a0598ee3
                               at /var/home/ebassi/.var/app/org.gnome.Builder/cache/gnome-builder/projects/amberol/builds/io.bassi.Amberol.json-flatpak-org.gnome.Platform-master-x86_64-main/cargo-home/registry/src/index.crates.io-6f17d22bba15001f/glib-0.20.2/src/unicollate.rs:42:13

Originally posted by @ebassi in https://github.com/gtk-rs/gtk-rs-core/issues/1329#issuecomment-2336176266

ebassi commented 2 months ago

The assertion happens when trying to collate a file using Japanese Unicode glyphs on a UTF-8 file system; create a file named 立秋 feat.ちょこ - プナイプナイせんそう.mp3.

sdroege commented 2 months ago

I can't reproduce this here. Unclear what's happening. I tried

use gio::prelude::*;

fn main() {
    let path = std::path::PathBuf::from("立秋 feat.ちょこ - プナイプナイせんそう.mp3");
    let file = gio::File::for_path(&path);
    let _ = std::fs::File::create(&path).unwrap();

    assert!(path.exists());
    let basename = file.basename().unwrap();
    let key = glib::FilenameCollationKey::from(basename.to_string_lossy());
    println!("{} {} {:?}", file.parse_name(), basename.display(), key);
}

and

diff --git a/glib/src/unicollate.rs b/glib/src/unicollate.rs
index 8e43bff5bac..39223532b58 100644
--- a/glib/src/unicollate.rs
+++ b/glib/src/unicollate.rs
@@ -71,6 +71,25 @@ mod tests {
         assert_eq!(unsorted, sorted);
     }

+    #[test]
+    fn collate_non_ascii() {
+        let mut unsorted = vec![
+            String::from("猫の手も借りたい"),
+            String::from("日本語は難しい"),
+            String::from("ありがとう"),
+        ];
+
+        let sorted = vec![
+            String::from("ありがとう"),
+            String::from("日本語は難しい"),
+            String::from("猫の手も借りたい"),
+        ];
+
+        unsorted.sort_by(|s1, s2| CollationKey::from(&s1).cmp(&CollationKey::from(&s2)));
+
+        assert_eq!(unsorted, sorted);
+    }
+
     #[test]
     fn collate_filenames() {
         let mut unsorted = vec![
@@ -91,4 +110,51 @@ mod tests {

         assert_eq!(unsorted, sorted);
     }
+
+    #[test]
+    fn collate_filenames_non_ascii() {
+        let mut unsorted = vec![
+            String::from("猫の手も借りたい.foo"),
+            String::from("日本語は難しい.bar"),
+            String::from("ありがとう.baz"),
+        ];
+
+        let sorted = vec![
+            String::from("ありがとう.baz"),
+            String::from("日本語は難しい.bar"),
+            String::from("猫の手も借りたい.foo"),
+        ];
+
+        unsorted.sort_by(|s1, s2| {
+            FilenameCollationKey::from(&s1).cmp(&FilenameCollationKey::from(&s2))
+        });
+
+        assert_eq!(unsorted, sorted);
+    }
+
+    #[test]
+    fn collate_filenames_from_path() {
+        use std::path::PathBuf;
+
+        let mut unsorted = vec![
+            PathBuf::from("猫の手も借りたい.foo"),
+            PathBuf::from("日本語は難しい.bar"),
+            PathBuf::from("ありがとう.baz"),
+            PathBuf::from("立秋 feat.ちょこ - プナイプナイせんそう.baz"),
+        ];
+
+        let sorted = vec![
+            PathBuf::from("ありがとう.baz"),
+            PathBuf::from("日本語は難しい.bar"),
+            PathBuf::from("猫の手も借りたい.foo"),
+            PathBuf::from("立秋 feat.ちょこ - プナイプナイせんそう.baz"),
+        ];
+
+        unsorted.sort_by(|s1, s2| {
+            FilenameCollationKey::from(&s1.to_string_lossy())
+                .cmp(&FilenameCollationKey::from(&s2.to_string_lossy()))
+        });
+
+        assert_eq!(unsorted, sorted);
+    }
 }
sdroege commented 2 months ago

Also importing directories in Amberol git main with lots of files with Japanese filenames works fine here.

The error looks like GLib is returning us something that is not valid UTF-8.

ebassi commented 2 months ago

Testing with Python:

>>> import locale
>>> locale.strxfrm("立秋 feat.ちょこ - プナイプナイせん.mp3")
'立秋 feat.ちょこ - プナイプナイせん.mp3'
>>> locale.setlocale(locale.LC_ALL, "en_GB.UTF-8")
'en_GB.UTF-8'
>>> locale.strxfrm("立秋 feat.ちょこ - プナイプナイせん.mp3")
'\udcc9\udbc9ƥƎšɨὰᾆὩύὴὠύὴὠὭᾐǵȪŚ\x01\x1d\x1d\x1d\x1d\x1d\x1d\x1d\x1d5\x1d\x1d\x1d5\x1d\x1d\x1d\x1d\x1d\x1d\x1d\x01\x02\x02\x02\x02\r\x0c\r\x10\x02\x10\x10\x10\x02\x10\x10\r\r\x02\x02\x02\x01\x03㵛\x01懘\x01憊\x01悝\x01擞\x01㵨\x01醉\x01鈺\x01酑\x01㵛\x01㵧\x01㵛\x01釢\x01醪\x01鄀\x01釢\x01醪\x01鄀\x01酴\x01鉶\x01㵨\x01挹\x01搒\x01嶼'

GLib calls strxfrm() inside g_utf8_collate_key_for_filename(), and apparently strxfrm() does not return a UTF-8 encoded string. Ideally, the API documentation should make it clear that the input has to be UTF-8, but not the output.

sdroege commented 2 months ago

See https://github.com/gtk-rs/gtk-rs-core/pull/1505