gilbertchen / duplicacy

A new generation cloud backup tool
https://duplicacy.com
Other
5.14k stars 334 forks source link

Duplicacy saves backup data in a way that kills the performance of usb 3.0 SMR hard drives. #670

Open pb2004 opened 3 months ago

pb2004 commented 3 months ago

Sorry to report here but here the fix for the problem seems simple. But from the beginning. External HDDs are mostly made with SMR technology. This means that they have a small CMR buffer in which they can do 32 KiB writes directly. When this buffer fills up, in order to write 32 KiB, the firmware of the drive must read e.g. 512 KiB in the SMR part, just like in an SSD without trim, and write them again with the changed 32KiB. Duplicacy, on the other hand, for a large backup writes these 32 KiB hundreds of thousands of times. Depending on the quality of the HDD firmware, this even makes it impossible to complete the backup. The solution seems to be to write 1MiB in one operation. Example patch here

pb2004 commented 3 months ago

Here is the patch that allowed me to back up to my 2TB external usb 3.0 drive:

From 20fa2649fa28bbfeca604d345214340eba8c86ce Mon Sep 17 00:00:00 2001
From: Piotr Borkowski <piotrborkowski@outlook.com>
Date: Sun, 21 Jul 2024 21:24:11 +0200
Subject: [PATCH] Changing the way backup files are saved to allow bearable
 operation on SMR hard disks. The way it works mimics robocopy: open the file
 with the FILE_FLAG_SEQUENTIAL_SCAN flag, set the file size with SetEndOfFile
 and write 1024KiB each in a single call to the WriteFile function. In
 addition, the use of FlushFileBuffers has been dropped, as there is very high
 performance penalty on SMR disks. Instead FILE_FLAG_WRITE_THROUGH is used.The
 effect of the changes is that I can back up 400GiB to my new 2 TiB usb 3.0
 drive from the company on the letter S at an almost acceptable speed. Before
 the changes, after about 20GiB the hard drive would effectively hang in
 firmware. The changes affect Windows and storage.UploadRateLimit <= 0.

---
 src/duplicacy_filestorage.go         |   2 +
 src/duplicacy_filestorage_windows.go | 286 +++++++++++++++++++++++++++
 2 files changed, 288 insertions(+)
 create mode 100644 src/duplicacy_filestorage_windows.go

diff --git a/src/duplicacy_filestorage.go b/src/duplicacy_filestorage.go
index fe9387b..a2b522e 100644
--- a/src/duplicacy_filestorage.go
+++ b/src/duplicacy_filestorage.go
@@ -1,3 +1,5 @@
+//go:build !windows
+
 // Copyright (c) Acrosync LLC. All rights reserved.
 // Free for personal use and commercial trial
 // Commercial use requires per-user licenses available from https://duplicacy.com
diff --git a/src/duplicacy_filestorage_windows.go b/src/duplicacy_filestorage_windows.go
new file mode 100644
index 0000000..29b299e
--- /dev/null
+++ b/src/duplicacy_filestorage_windows.go
@@ -0,0 +1,286 @@
+// Copyright (c) Acrosync LLC. All rights reserved.
+// Free for personal use and commercial trial
+// Commercial use requires per-user licenses available from https://duplicacy.com
+
+package duplicacy
+
+import (
+   "fmt"
+   "io"
+   "io/ioutil"
+   "math/rand"
+   "os"
+   "path"
+   "strings"
+   "golang.org/x/sys/windows"
+   "time"
+)
+
+// FileStorage is a local on-disk file storage implementing the Storage interface.
+type FileStorage struct {
+   StorageBase
+
+   isCacheNeeded   bool // Network storages require caching
+   storageDir      string
+   numberOfThreads int
+}
+
+// CreateFileStorage creates a file storage.
+func CreateFileStorage(storageDir string, isCacheNeeded bool, threads int) (storage *FileStorage, err error) {
+
+   var stat os.FileInfo
+
+   stat, err = os.Stat(storageDir)
+   if err != nil {
+       if os.IsNotExist(err) {
+           err = os.MkdirAll(storageDir, 0744)
+           if err != nil {
+               return nil, err
+           }
+       } else {
+           return nil, err
+       }
+   } else {
+       if !stat.IsDir() {
+           return nil, fmt.Errorf("The storage path %s is a file", storageDir)
+       }
+   }
+
+   for storageDir[len(storageDir)-1] == '/' {
+       storageDir = storageDir[:len(storageDir)-1]
+   }
+
+   storage = &FileStorage{
+       storageDir:      storageDir,
+       isCacheNeeded:   isCacheNeeded,
+       numberOfThreads: threads,
+   }
+
+   // Random number fo generating the temporary chunk file suffix.
+   rand.Seed(time.Now().UnixNano())
+
+   storage.DerivedStorage = storage
+   storage.SetDefaultNestingLevels([]int{2, 3}, 2)
+   return storage, nil
+}
+
+// ListFiles return the list of files and subdirectories under 'dir' (non-recursively).
+func (storage *FileStorage) ListFiles(threadIndex int, dir string) (files []string, sizes []int64, err error) {
+
+   fullPath := path.Join(storage.storageDir, dir)
+
+   list, err := ioutil.ReadDir(fullPath)
+   if err != nil {
+       if os.IsNotExist(err) {
+           return nil, nil, nil
+       }
+       return nil, nil, err
+   }
+
+   for _, f := range list {
+       name := f.Name()
+       if (f.IsDir() || f.Mode() & os.ModeSymlink != 0) && name[len(name)-1] != '/' {
+           name += "/"
+       }
+       files = append(files, name)
+       sizes = append(sizes, f.Size())
+   }
+
+   return files, sizes, nil
+}
+
+// DeleteFile deletes the file or directory at 'filePath'.
+func (storage *FileStorage) DeleteFile(threadIndex int, filePath string) (err error) {
+   err = os.Remove(path.Join(storage.storageDir, filePath))
+   if err == nil || os.IsNotExist(err) {
+       return nil
+   } else {
+       return err
+   }
+}
+
+// MoveFile renames the file.
+func (storage *FileStorage) MoveFile(threadIndex int, from string, to string) (err error) {
+   return os.Rename(path.Join(storage.storageDir, from), path.Join(storage.storageDir, to))
+}
+
+// CreateDirectory creates a new directory.
+func (storage *FileStorage) CreateDirectory(threadIndex int, dir string) (err error) {
+   err = os.Mkdir(path.Join(storage.storageDir, dir), 0744)
+   if err != nil && os.IsExist(err) {
+       return nil
+   } else {
+       return err
+   }
+}
+
+// GetFileInfo returns the information about the file or directory at 'filePath'.
+func (storage *FileStorage) GetFileInfo(threadIndex int, filePath string) (exist bool, isDir bool, size int64, err error) {
+   stat, err := os.Stat(path.Join(storage.storageDir, filePath))
+   if err != nil {
+       if os.IsNotExist(err) {
+           return false, false, 0, nil
+       } else {
+           return false, false, 0, err
+       }
+   }
+
+   return true, stat.IsDir(), stat.Size(), nil
+}
+
+// DownloadFile reads the file at 'filePath' into the chunk.
+func (storage *FileStorage) DownloadFile(threadIndex int, filePath string, chunk *Chunk) (err error) {
+
+   file, err := os.Open(path.Join(storage.storageDir, filePath))
+
+   if err != nil {
+       return err
+   }
+
+   defer file.Close()
+   if _, err = RateLimitedCopy(chunk, file, storage.DownloadRateLimit/storage.numberOfThreads); err != nil {
+       return err
+   }
+
+   return nil
+
+}
+
+// UploadFile writes 'content' to the file at 'filePath'
+func (storage *FileStorage) UploadFile(threadIndex int, filePath string, content []byte) (err error) {
+
+   fullPath := path.Join(storage.storageDir, filePath)
+
+   if len(strings.Split(filePath, "/")) > 2 {
+       dir := path.Dir(fullPath)
+       // Use Lstat() instead of Stat() since 1) Stat() doesn't work for deduplicated disks on Windows and 2) there isn't
+       // really a need to follow the link if filePath is a link.
+       stat, err := os.Lstat(dir)
+       if err != nil {
+           if !os.IsNotExist(err) {
+               return err
+           }
+           err = os.MkdirAll(dir, 0744)
+           if err != nil {
+               return err
+           }
+       } else {
+           if !stat.IsDir() && stat.Mode() & os.ModeSymlink == 0 {
+               return fmt.Errorf("The path %s is not a directory or symlink", dir)
+           }
+       }
+   }
+
+   letters := "abcdefghijklmnopqrstuvwxyz"
+   suffix := make([]byte, 8)
+   for i := range suffix {
+       suffix[i] = letters[rand.Intn(len(letters))]
+   }
+
+   temporaryFile := fullPath + "." + string(suffix) + ".tmp"
+
+   //fh windows.Handle
+   fh, err := windows.CreateFile(
+       windows.StringToUTF16Ptr(temporaryFile),
+       windows.GENERIC_WRITE,
+       0,
+       nil,
+       windows.CREATE_ALWAYS,
+       windows.FILE_ATTRIBUTE_NORMAL|windows.FILE_FLAG_SEQUENTIAL_SCAN|windows.FILE_FLAG_WRITE_THROUGH,
+       0,
+   )
+   if err != nil {
+       return err
+   }
+           
+   _, err = windows.SetFilePointer(fh, int32(len(content)), nil, windows.FILE_BEGIN)
+   if err != nil {
+       return err
+   }
+   
+   err = windows.SetEndOfFile(fh)
+   if err != nil {
+       return err
+   }
+   
+   _, err = windows.SetFilePointer(fh, 0, nil, windows.FILE_BEGIN)
+   if err != nil {
+       return err
+   }
+   
+   file := os.NewFile(uintptr(fh), temporaryFile)
+
+   //file, err := os.OpenFile(temporaryFile, os.O_WRONLY|os.O_CREATE|os.O_TRUNC, 0644)
+   //if err != nil {
+   //  return err
+   //}
+
+   if storage.UploadRateLimit <= 0 {
+       // fmt.Println("My code")
+       // buf := make([]byte, 1024 * 1024)
+       // _, err = io.CopyBuffer(file, bytes.NewReader(content), buf)
+       blockSize := 1024 * 1024
+       for i := 0; i < len(content); i += blockSize {
+           end := i + blockSize
+           if end > len(content) {
+               end = len(content) // Dostosuj rozmiar ostatniego bloku
+           }
+           _, err := file.Write(content[i:end])
+           if err != nil {
+               file.Close()
+               return err
+           }
+       }
+   } else {
+       reader := CreateRateLimitedReader(content, storage.UploadRateLimit/storage.numberOfThreads)
+       _, err = io.Copy(file, reader)
+       if err != nil {
+           file.Close()
+           return err
+       }
+   }
+
+   //if err = file.Sync(); err != nil {
+   //  pathErr, ok := err.(*os.PathError)
+   //  isNotSupported := ok && pathErr.Op == "sync" && pathErr.Err == syscall.ENOTSUP
+   //  if !isNotSupported {
+   //      _ = file.Close()
+   //      return err
+   //  }
+   //}
+
+   err = file.Close()
+   if err != nil {
+       return err
+   }
+
+   err = os.Rename(temporaryFile, fullPath)
+   if err != nil {
+
+       if _, e := os.Stat(fullPath); e == nil {
+           os.Remove(temporaryFile)
+           return nil
+       } else {
+           return err
+       }
+   }
+
+   return nil
+
+}
+
+// If a local snapshot cache is needed for the storage to avoid downloading/uploading chunks too often when
+// managing snapshots.
+func (storage *FileStorage) IsCacheNeeded() bool { return storage.isCacheNeeded }
+
+// If the 'MoveFile' method is implemented.
+func (storage *FileStorage) IsMoveFileImplemented() bool { return true }
+
+// If the storage can guarantee strong consistency.
+func (storage *FileStorage) IsStrongConsistent() bool { return true }
+
+// If the storage supports fast listing of files names.
+func (storage *FileStorage) IsFastListing() bool { return false }
+
+// Enable the test mode.
+func (storage *FileStorage) EnableTestMode() {}
-- 
2.45.2.windows.1

0001-Changing-the-way-backup-files-are-saved-to-allow-bea.patch