ziglang / zig

General-purpose programming language and toolchain for maintaining robust, optimal, and reusable software.
https://ziglang.org
MIT License
33.67k stars 2.47k forks source link

remove realpath() from the standard library #19353

Open andrewrk opened 5 months ago

andrewrk commented 5 months ago

This function is not portable, relies on a legacy permissions model, cannot handle arbitrarily nested file system paths, and is therefore typically a bug to call. Yet, many programmers reach for this function out of naivety. To prevent it from cropping up in various codebases, and causing an overall degradation of software quality, omit it from the Zig Standard Library.

diff --git a/lib/std/fs/Dir.zig b/lib/std/fs/Dir.zig
index 11fbc13c41..65922d3a0a 100644
--- a/lib/std/fs/Dir.zig
+++ b/lib/std/fs/Dir.zig
@@ -1201,126 +1201,21 @@ pub fn makeOpenPath(self: Dir, sub_path: []const u8, open_dir_options: OpenDirOp
     };
 }

-pub const RealPathError = posix.RealPathError;
-
-///  This function returns the canonicalized absolute pathname of
-/// `pathname` relative to this `Dir`. If `pathname` is absolute, ignores this
-/// `Dir` handle and returns the canonicalized absolute pathname of `pathname`
-/// argument.
-/// On Windows, `sub_path` should be encoded as [WTF-8](https://simonsapin.github.io/wtf-8/).
-/// On other platforms, `sub_path` is an opaque sequence of bytes with no particular encoding.
-/// On Windows, the result is encoded as [WTF-8](https://simonsapin.github.io/wtf-8/).
-/// On other platforms, the result is an opaque sequence of bytes with no particular encoding.
-/// This function is not universally supported by all platforms.
-/// Currently supported hosts are: Linux, macOS, and Windows.
-/// See also `Dir.realpathZ`, `Dir.realpathW`, and `Dir.realpathAlloc`.
-pub fn realpath(self: Dir, pathname: []const u8, out_buffer: []u8) RealPathError![]u8 {
-    if (builtin.os.tag == .wasi) {
-        @compileError("realpath is not available on WASI");
-    }
-    if (builtin.os.tag == .windows) {
-        const pathname_w = try std.os.windows.sliceToPrefixedFileW(self.fd, pathname);
-        return self.realpathW(pathname_w.span(), out_buffer);
-    }
-    const pathname_c = try posix.toPosixPath(pathname);
-    return self.realpathZ(&pathname_c, out_buffer);
-}
-
-/// Same as `Dir.realpath` except `pathname` is null-terminated.
-/// See also `Dir.realpath`, `realpathZ`.
-pub fn realpathZ(self: Dir, pathname: [*:0]const u8, out_buffer: []u8) RealPathError![]u8 {
-    if (builtin.os.tag == .windows) {
-        const pathname_w = try posix.windows.cStrToPrefixedFileW(self.fd, pathname);
-        return self.realpathW(pathname_w.span(), out_buffer);
-    }
-
-    const flags: posix.O = switch (builtin.os.tag) {
-        .linux => .{
-            .NONBLOCK = true,
-            .CLOEXEC = true,
-            .PATH = true,
-        },
-        else => .{
-            .NONBLOCK = true,
-            .CLOEXEC = true,
-        },
-    };
-
-    const fd = posix.openatZ(self.fd, pathname, flags, 0) catch |err| switch (err) {
-        error.FileLocksNotSupported => return error.Unexpected,
-        error.FileBusy => return error.Unexpected,
-        error.WouldBlock => return error.Unexpected,
-        error.InvalidUtf8 => unreachable, // WASI-only
-        else => |e| return e,
-    };
-    defer posix.close(fd);
-
-    // Use of MAX_PATH_BYTES here is valid as the realpath function does not
-    // have a variant that takes an arbitrary-size buffer.
-    // TODO(#4812): Consider reimplementing realpath or using the POSIX.1-2008
-    // NULL out parameter (GNU's canonicalize_file_name) to handle overelong
-    // paths. musl supports passing NULL but restricts the output to PATH_MAX
-    // anyway.
-    var buffer: [fs.MAX_PATH_BYTES]u8 = undefined;
-    const out_path = try posix.getFdPath(fd, &buffer);
-
-    if (out_path.len > out_buffer.len) {
-        return error.NameTooLong;
-    }
-
-    const result = out_buffer[0..out_path.len];
-    @memcpy(result, out_path);
-    return result;
-}
-
-/// Windows-only. Same as `Dir.realpath` except `pathname` is WTF16 LE encoded.
-/// The result is encoded as [WTF-8](https://simonsapin.github.io/wtf-8/).
-/// See also `Dir.realpath`, `realpathW`.
-pub fn realpathW(self: Dir, pathname: []const u16, out_buffer: []u8) RealPathError![]u8 {
-    const w = std.os.windows;
-
-    const access_mask = w.GENERIC_READ | w.SYNCHRONIZE;
-    const share_access = w.FILE_SHARE_READ;
-    const creation = w.FILE_OPEN;
-    const h_file = blk: {
-        const res = w.OpenFile(pathname, .{
-            .dir = self.fd,
-            .access_mask = access_mask,
-            .share_access = share_access,
-            .creation = creation,
-            .filter = .any,
-        }) catch |err| switch (err) {
-            error.WouldBlock => unreachable,
-            else => |e| return e,
-        };
-        break :blk res;
-    };
-    defer w.CloseHandle(h_file);
-
-    var wide_buf: [w.PATH_MAX_WIDE]u16 = undefined;
-    const wide_slice = try w.GetFinalPathNameByHandle(h_file, .{}, &wide_buf);
-    var big_out_buf: [fs.MAX_PATH_BYTES]u8 = undefined;
-    const end_index = std.unicode.wtf16LeToWtf8(&big_out_buf, wide_slice);
-    if (end_index > out_buffer.len)
-        return error.NameTooLong;
-    const result = out_buffer[0..end_index];
-    @memcpy(result, big_out_buf[0..end_index]);
-    return result;
-}
-
-pub const RealPathAllocError = RealPathError || Allocator.Error;
-
-/// Same as `Dir.realpath` except caller must free the returned memory.
-/// See also `Dir.realpath`.
-pub fn realpathAlloc(self: Dir, allocator: Allocator, pathname: []const u8) RealPathAllocError![]u8 {
-    // Use of MAX_PATH_BYTES here is valid as the realpath function does not
-    // have a variant that takes an arbitrary-size buffer.
-    // TODO(#4812): Consider reimplementing realpath or using the POSIX.1-2008
-    // NULL out parameter (GNU's canonicalize_file_name) to handle overelong
-    // paths. musl supports passing NULL but restricts the output to PATH_MAX
-    // anyway.
-    var buf: [fs.MAX_PATH_BYTES]u8 = undefined;
-    return allocator.dupe(u8, try self.realpath(pathname, buf[0..]));
+/// Returns the canonicalized absolute path of `pathname` relative to this `Dir`.
+///
+/// If `pathname` is absolute, ignores this `Dir` handle and returns the
+/// canonicalized absolute pathname of `pathname` argument.
+///
+/// This function is not portable, relies on a legacy permissions model, cannot
+/// handle arbitrarily nested file system paths, and is therefore typically a
+/// bug to call. Yet, many programmers reach for this function out of naivety.
+/// To prevent it from cropping up in various codebases, and causing an overall
+/// degradation of software quality, it is omitted from the Zig Standard
+/// Library.
+pub fn realpath(dir: Dir, pathname: []const u8) []const u8 {
+    _ = dir;
+    _ = pathname;
+    @compileError("zig strongly recommends not using realpath");
 }

 /// Changes the current working directory to the open directory handle.
diff --git a/lib/std/os.zig b/lib/std/os.zig
index 2138f5cd20..bc9a4fdfea 100644
--- a/lib/std/os.zig
+++ b/lib/std/os.zig
@@ -5483,310 +5483,6 @@ pub fn flock(fd: fd_t, operation: i32) FlockError!void {
     }
 }

-pub const RealPathError = error{
-    FileNotFound,
-    AccessDenied,
-    NameTooLong,
-    NotSupported,
-    NotDir,
-    SymLinkLoop,
-    InputOutput,
-    FileTooBig,
-    IsDir,
-    ProcessFdQuotaExceeded,
-    SystemFdQuotaExceeded,
-    NoDevice,
-    SystemResources,
-    NoSpaceLeft,
-    FileSystem,
-    BadPathName,
-    DeviceBusy,
-
-    SharingViolation,
-    PipeBusy,
-
-    /// Windows-only; file paths provided by the user must be valid WTF-8.
-    /// https://simonsapin.github.io/wtf-8/
-    InvalidWtf8,
-
-    /// On Windows, `\\server` or `\\server\share` was not found.
-    NetworkNotFound,
-
-    PathAlreadyExists,
-
-    /// On Windows, antivirus software is enabled by default. It can be
-    /// disabled, but Windows Update sometimes ignores the user's preference
-    /// and re-enables it. When enabled, antivirus software on Windows
-    /// intercepts file system operations and makes them significantly slower
-    /// in addition to possibly failing with this error code.
-    AntivirusInterference,
-
-    /// On Windows, the volume does not contain a recognized file system. File
-    /// system drivers might not be loaded, or the volume may be corrupt.
-    UnrecognizedVolume,
-} || UnexpectedError;
-
-/// Return the canonicalized absolute pathname.
-/// Expands all symbolic links and resolves references to `.`, `..`, and
-/// extra `/` characters in `pathname`.
-/// On Windows, `pathname` should be encoded as [WTF-8](https://simonsapin.github.io/wtf-8/).
-/// On other platforms, `pathname` is an opaque sequence of bytes with no particular encoding.
-/// The return value is a slice of `out_buffer`, but not necessarily from the beginning.
-/// See also `realpathZ` and `realpathW`.
-/// On Windows, the result is encoded as [WTF-8](https://simonsapin.github.io/wtf-8/).
-/// On other platforms, the result is an opaque sequence of bytes with no particular encoding.
-/// Calling this function is usually a bug.
-pub fn realpath(pathname: []const u8, out_buffer: *[MAX_PATH_BYTES]u8) RealPathError![]u8 {
-    if (builtin.os.tag == .windows) {
-        const pathname_w = try windows.sliceToPrefixedFileW(null, pathname);
-        return realpathW(pathname_w.span(), out_buffer);
-    } else if (builtin.os.tag == .wasi and !builtin.link_libc) {
-        @compileError("WASI does not support os.realpath");
-    }
-    const pathname_c = try toPosixPath(pathname);
-    return realpathZ(&pathname_c, out_buffer);
-}
-
-/// Same as `realpath` except `pathname` is null-terminated.
-/// Calling this function is usually a bug.
-pub fn realpathZ(pathname: [*:0]const u8, out_buffer: *[MAX_PATH_BYTES]u8) RealPathError![]u8 {
-    if (builtin.os.tag == .windows) {
-        const pathname_w = try windows.cStrToPrefixedFileW(null, pathname);
-        return realpathW(pathname_w.span(), out_buffer);
-    } else if (builtin.os.tag == .wasi and !builtin.link_libc) {
-        return realpath(mem.sliceTo(pathname, 0), out_buffer);
-    }
-    if (!builtin.link_libc) {
-        const flags: O = switch (builtin.os.tag) {
-            .linux => .{
-                .NONBLOCK = true,
-                .CLOEXEC = true,
-                .PATH = true,
-            },
-            else => .{
-                .NONBLOCK = true,
-                .CLOEXEC = true,
-            },
-        };
-        const fd = openZ(pathname, flags, 0) catch |err| switch (err) {
-            error.FileLocksNotSupported => unreachable,
-            error.WouldBlock => unreachable,
-            error.FileBusy => unreachable, // not asking for write permissions
-            error.InvalidUtf8 => unreachable, // WASI-only
-            else => |e| return e,
-        };
-        defer close(fd);
-
-        return getFdPath(fd, out_buffer);
-    }
-    const result_path = std.c.realpath(pathname, out_buffer) orelse switch (@as(E, @enumFromInt(std.c._errno().*))) {
-        .SUCCESS => unreachable,
-        .INVAL => unreachable,
-        .BADF => unreachable,
-        .FAULT => unreachable,
-        .ACCES => return error.AccessDenied,
-        .NOENT => return error.FileNotFound,
-        .OPNOTSUPP => return error.NotSupported,
-        .NOTDIR => return error.NotDir,
-        .NAMETOOLONG => return error.NameTooLong,
-        .LOOP => return error.SymLinkLoop,
-        .IO => return error.InputOutput,
-        else => |err| return unexpectedErrno(err),
-    };
-    return mem.sliceTo(result_path, 0);
-}
-
-/// Same as `realpath` except `pathname` is WTF16LE-encoded.
-/// The result is encoded as [WTF-8](https://simonsapin.github.io/wtf-8/).
-/// Calling this function is usually a bug.
-pub fn realpathW(pathname: []const u16, out_buffer: *[MAX_PATH_BYTES]u8) RealPathError![]u8 {
-    const w = windows;
-
-    const dir = std.fs.cwd().fd;
-    const access_mask = w.GENERIC_READ | w.SYNCHRONIZE;
-    const share_access = w.FILE_SHARE_READ;
-    const creation = w.FILE_OPEN;
-    const h_file = blk: {
-        const res = w.OpenFile(pathname, .{
-            .dir = dir,
-            .access_mask = access_mask,
-            .share_access = share_access,
-            .creation = creation,
-            .filter = .any,
-        }) catch |err| switch (err) {
-            error.WouldBlock => unreachable,
-            else => |e| return e,
-        };
-        break :blk res;
-    };
-    defer w.CloseHandle(h_file);
-
-    return getFdPath(h_file, out_buffer);
-}
-
-pub fn isGetFdPathSupportedOnTarget(os: std.Target.Os) bool {
-    return switch (os.tag) {
-        .windows,
-        .macos,
-        .ios,
-        .watchos,
-        .tvos,
-        .linux,
-        .solaris,
-        .illumos,
-        .freebsd,
-        => true,
-
-        .dragonfly => os.version_range.semver.max.order(.{ .major = 6, .minor = 0, .patch = 0 }) != .lt,
-        .netbsd => os.version_range.semver.max.order(.{ .major = 10, .minor = 0, .patch = 0 }) != .lt,
-        else => false,
-    };
-}
-
-/// Return canonical path of handle `fd`.
-/// This function is very host-specific and is not universally supported by all hosts.
-/// For example, while it generally works on Linux, macOS, FreeBSD or Windows, it is
-/// unsupported on WASI.
-/// On Windows, the result is encoded as [WTF-8](https://simonsapin.github.io/wtf-8/).
-/// On other platforms, the result is an opaque sequence of bytes with no particular encoding.
-/// Calling this function is usually a bug.
-pub fn getFdPath(fd: fd_t, out_buffer: *[MAX_PATH_BYTES]u8) RealPathError![]u8 {
-    if (!comptime isGetFdPathSupportedOnTarget(builtin.os)) {
-        @compileError("querying for canonical path of a handle is unsupported on this host");
-    }
-    switch (builtin.os.tag) {
-        .windows => {
-            var wide_buf: [windows.PATH_MAX_WIDE]u16 = undefined;
-            const wide_slice = try windows.GetFinalPathNameByHandle(fd, .{}, wide_buf[0..]);
-
-            const end_index = std.unicode.wtf16LeToWtf8(out_buffer, wide_slice);
-            return out_buffer[0..end_index];
-        },
-        .macos, .ios, .watchos, .tvos => {
-            // On macOS, we can use F.GETPATH fcntl command to query the OS for
-            // the path to the file descriptor.
-            @memset(out_buffer[0..MAX_PATH_BYTES], 0);
-            switch (errno(system.fcntl(fd, F.GETPATH, out_buffer))) {
-                .SUCCESS => {},
-                .BADF => return error.FileNotFound,
-                .NOSPC => return error.NameTooLong,
-                // TODO man pages for fcntl on macOS don't really tell you what
-                // errno values to expect when command is F.GETPATH...
-                else => |err| return unexpectedErrno(err),
-            }
-            const len = mem.indexOfScalar(u8, out_buffer[0..], 0) orelse MAX_PATH_BYTES;
-            return out_buffer[0..len];
-        },
-        .linux => {
-            var procfs_buf: ["/proc/self/fd/-2147483648\x00".len]u8 = undefined;
-            const proc_path = std.fmt.bufPrintZ(procfs_buf[0..], "/proc/self/fd/{d}", .{fd}) catch unreachable;
-
-            const target = readlinkZ(proc_path, out_buffer) catch |err| {
-                switch (err) {
-                    error.NotLink => unreachable,
-                    error.BadPathName => unreachable,
-                    error.InvalidUtf8 => unreachable, // WASI-only
-                    error.InvalidWtf8 => unreachable, // Windows-only
-                    error.UnsupportedReparsePointType => unreachable, // Windows-only
-                    error.NetworkNotFound => unreachable, // Windows-only
-                    else => |e| return e,
-                }
-            };
-            return target;
-        },
-        .solaris, .illumos => {
-            var procfs_buf: ["/proc/self/path/-2147483648\x00".len]u8 = undefined;
-            const proc_path = std.fmt.bufPrintZ(procfs_buf[0..], "/proc/self/path/{d}", .{fd}) catch unreachable;
-
-            const target = readlinkZ(proc_path, out_buffer) catch |err| switch (err) {
-                error.UnsupportedReparsePointType => unreachable,
-                error.NotLink => unreachable,
-                else => |e| return e,
-            };
-            return target;
-        },
-        .freebsd => {
-            if (comptime builtin.os.isAtLeast(.freebsd, .{ .major = 13, .minor = 0, .patch = 0 }) orelse false) {
-                var kfile: system.kinfo_file = undefined;
-                kfile.structsize = system.KINFO_FILE_SIZE;
-                switch (errno(system.fcntl(fd, system.F.KINFO, @intFromPtr(&kfile)))) {
-                    .SUCCESS => {},
-                    .BADF => return error.FileNotFound,
-                    else => |err| return unexpectedErrno(err),
-                }
-                const len = mem.indexOfScalar(u8, &kfile.path, 0) orelse MAX_PATH_BYTES;
-                if (len == 0) return error.NameTooLong;
-                const result = out_buffer[0..len];
-                @memcpy(result, kfile.path[0..len]);
-                return result;
-            } else {
-                // This fallback implementation reimplements libutil's `kinfo_getfile()`.
-                // The motivation is to avoid linking -lutil when building zig or general
-                // user executables.
-                var mib = [4]c_int{ CTL.KERN, KERN.PROC, KERN.PROC_FILEDESC, system.getpid() };
-                var len: usize = undefined;
-                sysctl(&mib, null, &len, null, 0) catch |err| switch (err) {
-                    error.PermissionDenied => unreachable,
-                    error.SystemResources => return error.SystemResources,
-                    error.NameTooLong => unreachable,
-                    error.UnknownName => unreachable,
-                    else => return error.Unexpected,
-                };
-                len = len * 4 / 3;
-                const buf = std.heap.c_allocator.alloc(u8, len) catch return error.SystemResources;
-                defer std.heap.c_allocator.free(buf);
-                len = buf.len;
-                sysctl(&mib, &buf[0], &len, null, 0) catch |err| switch (err) {
-                    error.PermissionDenied => unreachable,
-                    error.SystemResources => return error.SystemResources,
-                    error.NameTooLong => unreachable,
-                    error.UnknownName => unreachable,
-                    else => return error.Unexpected,
-                };
-                var i: usize = 0;
-                while (i < len) {
-                    const kf: *align(1) system.kinfo_file = @ptrCast(&buf[i]);
-                    if (kf.fd == fd) {
-                        len = mem.indexOfScalar(u8, &kf.path, 0) orelse MAX_PATH_BYTES;
-                        if (len == 0) return error.NameTooLong;
-                        const result = out_buffer[0..len];
-                        @memcpy(result, kf.path[0..len]);
-                        return result;
-                    }
-                    i += @intCast(kf.structsize);
-                }
-                return error.FileNotFound;
-            }
-        },
-        .dragonfly => {
-            @memset(out_buffer[0..MAX_PATH_BYTES], 0);
-            switch (errno(system.fcntl(fd, F.GETPATH, out_buffer))) {
-                .SUCCESS => {},
-                .BADF => return error.FileNotFound,
-                .RANGE => return error.NameTooLong,
-                else => |err| return unexpectedErrno(err),
-            }
-            const len = mem.indexOfScalar(u8, out_buffer[0..], 0) orelse MAX_PATH_BYTES;
-            return out_buffer[0..len];
-        },
-        .netbsd => {
-            @memset(out_buffer[0..MAX_PATH_BYTES], 0);
-            switch (errno(system.fcntl(fd, F.GETPATH, out_buffer))) {
-                .SUCCESS => {},
-                .ACCES => return error.AccessDenied,
-                .BADF => return error.FileNotFound,
-                .NOENT => return error.FileNotFound,
-                .NOMEM => return error.SystemResources,
-                .RANGE => return error.NameTooLong,
-                else => |err| return unexpectedErrno(err),
-            }
-            const len = mem.indexOfScalar(u8, out_buffer[0..], 0) orelse MAX_PATH_BYTES;
-            return out_buffer[0..len];
-        },
-        else => unreachable, // made unreachable by isGetFdPathSupportedOnTarget above
-    }
-}
-
 /// Spurious wakeups are possible and no precision of timing is guaranteed.
 pub fn nanosleep(seconds: u64, nanoseconds: u64) void {
     var req = timespec{

Currently used by the zig compiler in several places:

../src/link/Elf.zig:                if (fs.realpath(scr_obj.path, &buffer)) |path| {
../src/link/Dwarf.zig:    const realpath = if (std.fs.path.isAbsolute(root_dir_path)) r: {
../src/link/Dwarf.zig:    } else std.fs.realpath(root_dir_path, buffer) catch return root_dir_path;
../src/link/Dwarf.zig:    const len = realpath.len + 1 + sub_path.len;
../src/link/Dwarf.zig:    buffer[realpath.len] = '/';
../src/link/Dwarf.zig:    @memcpy(buffer[realpath.len + 1 ..][0..sub_path.len], sub_path);
../src/link/Dwarf.zig:        // TODO re-investigate if realpath is needed here
../src/link/Dwarf.zig:            std.os.realpath(dir_path, &buffer) catch dir_path
../src/link/MachO.zig:                        const full_path = std.fs.realpath(rel_path, &buffer) catch continue;
../src/link/MachO.zig:                if (std.fs.realpath(id.name, &buffer)) |full_path| {
../src/link/Wasm/Archive.zig:        const path = try std.os.realpath(archive.name, &buffer);
../src/codegen/llvm.zig:                    break :blk std.fs.realpathAlloc(arena, d) catch break :blk d;
../src/codegen/llvm.zig:                const abs_path = std.fs.realpath(dir_path, &abs_buffer) catch
../src/Sema.zig:        // The compiler must not call realpath anywhere.

All of these instances have to do with debug info, except for the last line which I kept for the humor value. They are indeed all bugs. The problem is that the debug info in an executable needs to refer to the file paths of source locations, and if it uses relative paths, then a debugger looking at a moved executable will not be able to find the source files.

However, it is still incorrect to attempt a transformation of a relative path to an absolute path. Instead, the compiler needs to take note of the file paths passed in by the user. These strings must be retained whether absolute or relative, and used, rather than trying to resolve the path based on the file descriptor.

This pushes the problem back into the build system which constructs the CLI for the compiler. But that's precisely how to solve this problem - push it as far back as possible, until there is exactly one realpath() transformation done at the very beginning of the software, ideally by the user themselves.

I expect this to be a controversal proposal.

matklad commented 5 months ago

+1

Using this function is almost always a bug, but it is not a common knowledge (someone needs to write "realpath considered harmful" blog post). At the same time, this function lies on the path of the least resistance for a lot of problems, and routinely sneaks into all kinds of places where it is completely inappropriate.

I like that the function is preserved in the source code as compileError with an explnanatory comment. I think the comment could be better though: I'd remove the bit about "naivety", as knowing that realpath is not good is expert knowledge: in my career, I've seen this clearly articulated only twice: once in an off-hand remark by alexcrichton, and the second time in the Zig community. And I would include a bit of positive guidance here, which can be lifted straight out of the PR description. So, something along the lines of

This function is not portable, relies on a legacy permissions model, cannot handle arbitrarily nested file system paths, and is therefore typically a bug to call.

The correct behavior is usually to take note of the file paths passed in by the user. These strings must be retained whether absolute or relative, and used, rather than trying to resolve the path based on the file descriptor.

To improve software quality, Zig Standard Library omits this function and provides this educational note instead.

If we remove this function, I'd love to see https://github.com/ziglang/zig/issues/5190 fixed, as it is one of the cases where realpath is needed as a work-around for an unrelated bug.

RossComputerGuy commented 5 months ago

Using this function is almost always a bug, but it is not a common knowledge (someone needs to write "realpath considered harmful" blog post).

I could certainly write a blog post but I'd need some sources lol. On another note, I don't really understand what makes realpath not portable and a problem. Is it mainly a Windows issue?

nektro commented 5 months ago

on the portability, my guess is openbsd because windows can do it and https://github.com/ziglang/zig/issues/6718 exists; so it wouldn't surprise me if there was also difficulty in inspecting arbitrary fd's

squeek502 commented 5 months ago

I have no real opinion on removing the API from std.fs, but want to note that the std.os implementation (std.os.getFdPath in particular) is used for testing absolute paths in std/fs/test.zig, see https://github.com/ziglang/zig/pull/16847 for context.

There's also std.fs.selfExePath which relies on some realpath-like functionality for certain platforms (MacOS/OpenBSD/Haiku call std.os.realpathZ, Windows calls std.fs.Dir.realpathW).

GalaxySnail commented 5 months ago

How to prevent path traversal attack without realpath?

silversquirl commented 5 months ago

@GalaxySnail std.fs.path.resolve and check for ../ at the beginning of the result

ikskuh commented 5 months ago

What would be an alternative to realpath when talking to foreign code like C libraries or other executables without changing cwd?

Assuming i only have a std.fs.Dir and a relative path available, and i want to call into a foreign function that only takes char const * path, what is the Zig way to do that then? The only way i know of right now would be using ´setAsCwd()` and then pass the relative path, but there might be some secret knowledge here.

I guess if we deprecate that function, we should provide the users with a set of better alternatives instead of just claiming why it's a bug and that we do not support it.

nektro commented 5 months ago

I interpreted the messaging as that the use cases that still need it should use a 3rd party module

castholm commented 5 months ago

Assuming i only have a std.fs.Dir and a relative path available, and i want to call into a foreign function that only takes char const * path, what is the Zig way to do that then?

My understanding of how a "proper" program is supposed to handle this situation (please correct me if I'm mistaken) is that because you have a std.fs.Dir handle, you presumably know where it came from. Either it's the cwd, or you opened it from a path (either relative to the cwd, or absolute). If it's the latter, the idea is that you keep the original path string in memory for as long as you need to do these kinds of operations.

If the path you want to pass to the C library function is absolute, just pass that path verbatim.

If the path you want to pass is relative, if your dir handle is the cwd you pass that path verbatim, otherwise, you pass the result of try std.fs.path.join(allocator, &.{ original_dir_path, rel_path }) where original_dir_path is the path string the directory was opened from. If you don't need to be cautious of symlinks you could also use fs.path.resolve.

This should work for well-behaving external libraries. If the external library is poorly designed and only accepts absolute paths or paths relative to some hard-coded but unknown directory, you have to implement the required path transformation logic yourself in a way that makes the most sense for the OSes you're targeting.

ikskuh commented 5 months ago

My understanding of how a "proper" program is supposed to handle this situation (please correct me if I'm mistaken) is that because you have a std.fs.Dir handle, you presumably know where it came from.

That's exactly the point. If i'm writing a library, i don't. I just receive a dirhandle+path, but no information if it's cwd-relative, root or anything, so the only two options are the ones i stated, at least to my knowledge. I cannot work on a "string" level of paths, because i don't know the root