Closed cuducos closed 1 year ago
Reescrevi a estratégia do got
de uma forma que teremos mais controle sobre os downloads. Vou colar aqui para não perder, mas a ideia passa a ser remover o got
e implementar essa lógica (com testes e tudo mais).
Salve esse arquivo como main.go
em algum diretório, e use com go run main.go TAMANHO-DE-CADA-PEDAÇO URL DESTINO
, por exemplo go run main.go 1000000 http://200.152.38.155/CNPJ/Paises.zip Paises.zip
para baixar o arquivo em pedaços de 1Mb.
package main
import (
"fmt"
"io"
"log"
"math"
"net/http"
"os"
"strconv"
)
const retries = 7
type chunk struct {
idx int
retries int
start, end, size uint64
err error
contents []byte
}
func newChunk(idx int, start, end uint64) chunk {
c := chunk{idx: idx, retries: retries, start: start, end: end}
c.size = end - start + 1
return c
}
func newChunkFrom(chunk chunk) chunk { return newChunk(chunk.idx, chunk.start, chunk.end) }
type chunckDowloader struct {
url string
path string
client *http.Client
done chan chunk
}
func (c *chunckDowloader) downloadChunk(chunk chunk) {
log.Output(2, fmt.Sprintf("starting to download chunk %d…", chunk.idx)) // TODO: remove
defer log.Output(2, fmt.Sprintf("done to download chunk %d…", chunk.idx)) // TODO: remove
defer func() { c.done <- chunk }()
req, err := http.NewRequest("GET", c.url, nil)
if err != nil {
chunk.err = fmt.Errorf("could not create a request: %w", err)
return
}
req.Header.Set("Range", fmt.Sprintf("bytes=%d-%d", chunk.start, chunk.end))
resp, err := c.client.Do(req)
if err != nil {
chunk.err = fmt.Errorf("error sending the http request: %w", err)
return
}
defer resp.Body.Close()
if resp.ContentLength != int64(chunk.size) {
chunk.err = fmt.Errorf("got wrong content-length, expected %d, got %d", chunk.size, resp.ContentLength)
return
}
chunk.contents, err = io.ReadAll(resp.Body)
if err != nil {
chunk.err = fmt.Errorf("could not read chunk response body: %w", err)
return
}
if err != nil {
chunk.err = fmt.Errorf("could not write chunk to file: %w", err)
}
}
func (c *chunckDowloader) getSize() (uint64, error) {
r, err := c.client.Head(c.url)
if err != nil {
return 0, fmt.Errorf("error sending a http head request: %s", err)
}
defer r.Body.Close()
if r.ContentLength <= 0 {
return 0, fmt.Errorf("got content-lenght %d", r.ContentLength)
}
if r.Header.Get("Accept-Ranges") == "none" {
return 0, fmt.Errorf("server does not accept http range requests")
}
return uint64(r.ContentLength), nil
}
func download(client *http.Client, chunkSize uint64, url, path string) error {
c := chunckDowloader{
url: url,
path: path,
client: client,
done: make(chan chunk),
}
size, err := c.getSize()
if err != nil {
return fmt.Errorf("could not get the size of %s: %w", url, err)
}
log.Output(2, fmt.Sprintf("file size = %d", size)) // TODO: remove
count := uint64(math.Ceil(float64(size) / float64(chunkSize)))
log.Output(2, fmt.Sprintf("total chunks = %d", count)) // TODO: remove
var idx int
var start, end uint64
for {
if start > size {
break
}
end = (start + chunkSize) - 1
if end > (size - 1) {
end = size - 1
}
log.Output(2, fmt.Sprintf("chunk %d from %d to %d", idx, start, end)) // TODO: remove
go c.downloadChunk(newChunk(idx, start, end))
start += chunkSize - 1
idx++
}
dest, err := os.Create(path)
if err != nil {
return fmt.Errorf("could not create %s: %w", path, err)
}
defer dest.Close()
if err := dest.Truncate(int64(size)); err != nil {
return fmt.Errorf("could not truncate %s to %d bytes: %w", path, size, err)
}
var completed uint64
for {
chunk := <-c.done
if chunk.err != nil {
log.Output(2, fmt.Sprintf("error downloading chunk #%d: %s", chunk.idx+1, err)) // TODO: remove
if chunk.retries > 0 {
log.Output(2, fmt.Sprintf("retrying chunk #%d…", chunk.idx+1)) // TODO: remove
go c.downloadChunk(newChunkFrom(chunk))
} else {
return fmt.Errorf("could not download %s: %w", url, chunk.err)
}
}
dest.WriteAt(chunk.contents, int64(chunk.start))
completed++
if completed == count {
close(c.done)
break
}
}
return nil
}
func main() {
if len(os.Args) != 4 {
log.Fatal("missing CHUNK SIZE and/or URL and/or DESTINATION.\nUsage: chunk CHUNK SIZE URL DESTINATION")
}
chunkSize, err := strconv.ParseUint(os.Args[1], 10, 64)
if err != nil {
log.Fatalf("%s is not a vaklid CHUNK SIZE", os.Args[1])
}
if err := download(&http.Client{}, chunkSize, os.Args[2], os.Args[3]); err != nil {
log.Fatal(err)
}
}
O PR #145 tinha boa intenção mas, depois de utilizar o
got
integrado por 2 semanas, não consgeui completar o download de todos os arquivos. Acredito que valha a pena removergot
.