fusepy / fusepy

Simple ctypes bindings for FUSE
ISC License
786 stars 190 forks source link

read not returning 0 to client #115

Open medakk opened 6 years ago

medakk commented 6 years ago

I am implementing a file system using fusepy. In my filesystem, the size of the file is not known at the time of getattr being called, so I return -1.

Later, in the read call, I return how much ever data is requested in each call as long as data is available, and then an empty bytes object to signify that there is no more data. However, fusepy seems to be returning something to the client every time. Here is a minimal example, with the rest of the logic removed, to show what I mean:

import os
import sys

from fuse import FUSE, Operations

class CurlFS(Operations):

    def __init__(self):
        self.fd = 0

    def getattr(self, path, fh=None):
        st = {
            'st_atime': 0,
            'st_ctime': 0,
            'st_gid': 0,         # Owned by root
            'st_mtime': 0,
            'st_nlink': 1,
            'st_size': -1,
            'st_uid': 0,         # Owned by root
        }

        if path == ('/'):
            st['st_mode'] = 0o040777 # directory, rwx permissions to all
        else:
            st['st_mode'] = 0o100777 # regular file, rwx permissions to all

        return st

    def open(self, path, flags):
        fd = self.fd
        self.fd += 1
        return fd

    def read(self, path, length, offset, fh):
        print('read:', path)
        return b''

if __name__ == '__main__':
    FUSE(CurlFS(), sys.argv[1], nothreads=True, foreground=True)

then, in another shell:

mkdir dragons
python example.py dragons/
cat dragons/fire

The console on the shell running fusepy keeps printing:

read: /fire
read: /fire
read: /fire
read: /fire
read: /fire
read: /fire
read: /fire
read: /fire
read: /fire
read: /fire
read: /fire
read: /fire
read: /fire

and the cat command never returns. What is the correct way to tell the client that there is no data remaining in fusepy?

I've tried using both python2.7 and python3.6. Also, with the latest version of fusepy from github

terencehonles commented 6 years ago

I'm not quite sure this is possible. Double checking my initial thought I don't think returning -1 is well defined. It's quite possible something somewhere on your system (FUSE) is casting -1 to an unsigned value and that means it is actually very large.

The definition of read expects you to return a buffer of length length and if you don't it will 0 fill the data to the expected length. You can see how using the direct_io mount option may be able to achieve your desired result, but you are sailing into uncharted waters

See: fuse_operations::read

terencehonles commented 6 years ago

Looking at Debian and Ubuntu's man pages for fuse.mount (see direct_io#2) this option is probably what you want

medakk commented 6 years ago

I looked at the docs for fuse_operations::read:

Read should return exactly the number of bytes requested except on EOF or error

So in the case of EOF, it would be valid to return less than the number of bytes asked, or even 0 bytes. What should I use as st_size in this case, however?

Here is the problematic part, when using strace on my python application: (i disabled the print functions uses in the sample code to keep the strace cleaner)

writev(3, [{iov_base="\20\0\0\0\0\0\0\0~\5\0\0\0\0\0\0", iov_len=16}, {iov_base="", iov_len=0}], 2) = 16
read(3, "P\0\0\0\17\0\0\0\177\5\0\0\0\0\0\0\2\0\0\0\0\0\0\0\350\3\0\0\350\3\0\0"..., 135168) = 80
writev(3, [{iov_base="\20\0\0\0\0\0\0\0\177\5\0\0\0\0\0\0", iov_len=16}, {iov_base="", iov_len=0}], 2) = 16
read(3, "P\0\0\0\17\0\0\0\200\5\0\0\0\0\0\0\2\0\0\0\0\0\0\0\350\3\0\0\350\3\0\0"..., 135168) = 80
writev(3, [{iov_base="\20\0\0\0\0\0\0\0\200\5\0\0\0\0\0\0", iov_len=16}, {iov_base="", iov_len=0}], 2) = 16
read(3, "P\0\0\0\17\0\0\0\201\5\0\0\0\0\0\0\2\0\0\0\0\0\0\0\350\3\0\0\350\3\0\0"..., 135168) = 80
writev(3, [{iov_base="\20\0\0\0\0\0\0\0\201\5\0\0\0\0\0\0", iov_len=16}, {iov_base="", iov_len=0}], 2) = 16
read(3, "P\0\0\0\17\0\0\0\202\5\0\0\0\0\0\0\2\0\0\0\0\0\0\0\350\3\0\0\350\3\0\0"..., 135168) = 80
writev(3, [{iov_base="\20\0\0\0\0\0\0\0\202\5\0\0\0\0\0\0", iov_len=16}, {iov_base="", iov_len=0}], 2) = 16
read(3, "P\0\0\0\17\0\0\0\203\5\0\0\0\0\0\0\2\0\0\0\0\0\0\0\350\3\0\0\350\3\0\0"..., 135168) = 80
writev(3, [{iov_base="\20\0\0\0\0\0\0\0\203\5\0\0\0\0\0\0", iov_len=16}, {iov_base="", iov_len=0}], 2) = 16
read(3, "P\0\0\0\17\0\0\0\204\5\0\0\0\0\0\0\2\0\0\0\0\0\0\0\350\3\0\0\350\3\0\0"..., 135168) = 80
writev(3, [{iov_base="\20\0\0\0\0\0\0\0\204\5\0\0\0\0\0\0", iov_len=16}, {iov_base="", iov_len=0}], 2) = 16
read(3, "P\0\0\0\17\0\0\0\205\5\0\0\0\0\0\0\2\0\0\0\0\0\0\0\350\3\0\0\350\3\0\0"..., 135168) = 80
writev(3, [{iov_base="\20\0\0\0\0\0\0\0\205\5\0\0\0\0\0\0", iov_len=16}, {iov_base="", iov_len=0}], 2) = 16
read(3, "P\0\0\0\17\0\0\0\206\5\0\0\0\0\0\0\2\0\0\0\0\0\0\0\350\3\0\0\350\3\0\0"..., 135168) = 80
writev(3, [{iov_base="\20\0\0\0\0\0\0\0\206\5\0\0\0\0\0\0", iov_len=16}, {iov_base="", iov_len=0}], 2) = 16
read(3, "P\0\0\0\17\0\0\0\207\5\0\0\0\0\0\0\2\0\0\0\0\0\0\0\350\3\0\0\350\3\0\0"..., 135168) = 80
writev(3, [{iov_base="\20\0\0\0\0\0\0\0\207\5\0\0\0\0\0\0", iov_len=16}, {iov_base="", iov_len=0}], 2) = 16
read(3, "P\0\0\0\17\0\0\0\210\5\0\0\0\0\0\0\2\0\0\0\0\0\0\0\350\3\0\0\350\3\0\0"..., 135168) = 80
writev(3, [{iov_base="\20\0\0\0\0\0\0\0\210\5\0\0\0\0\0\0", iov_len=16}, {iov_base="", iov_len=0}], 2) = 16
read(3, "P\0\0\0\17\0\0\0\211\5\0\0\0\0\0\0\2\0\0\0\0\0\0\0\350\3\0\0\350\3\0\0"..., 135168) = 80
writev(3, [{iov_base="\20\0\0\0\0\0\0\0\211\5\0\0\0\0\0\0", iov_len=16}, {iov_base="", iov_len=0}], 2) = 16
read(3, "P\0\0\0\17\0\0\0\212\5\0\0\0\0\0\0\2\0\0\0\0\0\0\0\350\3\0\0\350\3\0\0"..., 135168) = 80
writev(3, [{iov_base="\20\0\0\0\0\0\0\0\212\5\0\0\0\0\0\0", iov_len=16}, {iov_base="", iov_len=0}], 2) = 16
read(3, "P\0\0\0\17\0\0\0\213\5\0\0\0\0\0\0\2\0\0\0\0\0\0\0\350\3\0\0\350\3\0\0"..., 135168) = 80
writev(3, [{iov_base="\20\0\0\0\0\0\0\0\213\5\0\0\0\0\0\0", iov_len=16}, {iov_base="", iov_len=0}], 2) = 16

this goes on until I Ctrl-C the process.

As far as I can see, some data is always sent back, even though my application always returns empty bytes every time read is called.

In the meanwhile, I will look at direct_io.

terencehonles commented 6 years ago

I must have overlooked the EOF condition. However, you're definitely seeing an issue with the -1. I modified memory.py to do the same and it hung, if I were to return a constant 1, but have read return a full buffer it would be truncated to 1 character:

diff --git a/examples/memory.py b/examples/memory.py
index 419a343..6764536 100755
--- a/examples/memory.py
+++ b/examples/memory.py
@@ -54,7 +54,9 @@ class Memory(LoggingMixIn, Operations):
         if path not in self.files:
             raise FuseOSError(ENOENT)

-        return self.files[path]
+        data = dict(self.files[path])
+        data['st_size'] = 1
+        return data

     def getxattr(self, path, name, position=0):
         attrs = self.files[path].get('attrs', {})
@@ -84,6 +86,7 @@ class Memory(LoggingMixIn, Operations):
         return self.fd

     def read(self, path, size, offset, fh):
+        return '' if offset else 'b' * min(4096, size)
         return self.data[path][offset:offset + size]

     def readdir(self, path, fh):

If you enable direct_io then cat works as expected:

diff --git a/examples/memory.py b/examples/memory.py
index 419a343..6764536 100755
--- a/examples/memory.py
+++ b/examples/memory.py
@@ -54,7 +54,9 @@ class Memory(LoggingMixIn, Operations):
         if path not in self.files:
             raise FuseOSError(ENOENT)

-        return self.files[path]
+        data = dict(self.files[path])
+        data['st_size'] = -1
+        return data

     def getxattr(self, path, name, position=0):
         attrs = self.files[path].get('attrs', {})
@@ -84,6 +86,7 @@ class Memory(LoggingMixIn, Operations):
         return self.fd

     def read(self, path, size, offset, fh):
+        return '' if offset else 'b' * min(4096, size)
         return self.data[path][offset:offset + size]

     def readdir(self, path, fh):
@@ -159,4 +174,4 @@ if __name__ == '__main__':
     args = parser.parse_args()

     logging.basicConfig(level=logging.DEBUG)
-    fuse = FUSE(Memory(), args.mount, foreground=True, allow_other=True)
+    fuse = FUSE(Memory(), args.mount, foreground=True, allow_other=True, direct_io=True)
terencehonles commented 6 years ago

oh, and I should mention this is what I am getting via ls

fusepy/examples> ls -la test/t
-rw-r--r-- 1 root root 18446744073709551615 Apr 17 00:38 test/t