nabijaczleweli / cargo-update

A cargo subcommand for checking and applying updates to installed executables
MIT License
1.22k stars 42 forks source link

Support the new cargo sparse protocol #223

Closed orium closed 1 year ago

orium commented 1 year ago

Rust 1.68 now supports cargo sparse protocol for crate's registries. This avoid downloading the big git repo where the registry usually is. Rust 1.70 should also make this new protocol the default one.

It would be really nice to have cargo-update support this protocol (it currently downloads the git registry even if the sparse protocol is enabled in the cargo config).

nabijaczleweli commented 1 year ago

I've set CARGO_REGISTRIES_CRATES_IO_PROTOCOL=sparse and validated that, indeed, cargo dials whatever index.crates.io resolved to directly, but the .crates.toml line is still

"b3sum 1.3.3 (registry+https://github.com/rust-lang/crates.io-index)" = ["b3sum"]

so..? Is this just string replacement if the mode is on? Will the environment variable be set for cargo subprocesses once this "becomes the default" or do I need to exec cargo? The documentation link doesn't appear to document anything.

nabijaczleweli commented 1 year ago

And the on-disk format is different than the on-air and git formats, the ^A at the start looks like an attempt at versioning, and is of course not documented. Lovely.

nabijaczleweli commented 1 year ago
$ CARGO_REGISTRIES_CRATES_IO_PROTOCOL=sparse  target/debug/cargo-install-update install-update -lai  jot | awk '{print strftime() "\t" $0}'
[src\ops\mod.rs:730] CargoConfig {
    net_git_fetch_with_cli: env::var("CARGO_NET_GIT_FETCH_WITH_CLI").ok().and_then(|e|
                        if e.is_empty() {
                                Some(toml::Value::String(String::new()))
                            } else {
                               e.parse::<toml::Value>().ok()
                           }).or_else(||
                    {
                        cfg.as_mut()?.as_table_mut()?.remove("net")?.as_table_mut()?.remove("git-fetch-with-cli")
                    }).map(CargoConfig::truthy).unwrap_or(false),
    registries_crates_io_protocol_sparse: env::var("CARGO_REGISTRIES_CRATES_IO_PROTOCOL").map(|s|
                        s ==
                            "sparse").ok().or_else(||
                {
                    Some(cfg.as_mut()?.as_table_mut()?.remove("registries")?.as_table_mut()?.remove("crates-io")?.as_table_mut()?.remove("protocol")?.as_str()?
                            == "sparse")
                }).unwrap_or(false),
} = CargoConfig {
    net_git_fetch_with_cli: true,
    registries_crates_io_protocol_sparse: true,
}
Fri Mar 10 06:02:14 CEST 2023       Polling registry 'https://index.crates.io/'
Fri Mar 10 06:02:14 CEST 2023
Fri Mar 10 06:02:14 CEST 2023   Package         Installed  Latest   Needs update
Fri Mar 10 06:02:14 CEST 2023   cargo-count     v0.2.2     v0.2.4   Yes
Fri Mar 10 06:02:14 CEST 2023   cargo-deb       v1.21.0    v1.42.2  Yes
Fri Mar 10 06:02:14 CEST 2023   cargo-graph     v0.3.0     v0.3.1   Yes
Fri Mar 10 06:02:14 CEST 2023   cargo-outdated  v0.3.0     v0.11.2  Yes
Fri Mar 10 06:02:14 CEST 2023   cargo-update    v0.8.1     v11.1.2  Yes
Fri Mar 10 06:02:14 CEST 2023   checksums       v0.5.3     v0.9.1   Yes
Fri Mar 10 06:02:14 CEST 2023   gen-epub-book   v1.0.0     v2.3.2   Yes
Fri Mar 10 06:02:14 CEST 2023   identicon       v0.1.1     v0.2.1   Yes
Fri Mar 10 06:02:14 CEST 2023   jot             No         v0.1.0   Yes
Fri Mar 10 06:02:14 CEST 2023   racer           v2.1.42    v2.2.2   Yes
Fri Mar 10 06:02:14 CEST 2023   ripgrep         v11.0.2    v13.0.0  Yes
Fri Mar 10 06:02:14 CEST 2023   treesize        v0.3.0     ^0.4     Yes
Fri Mar 10 06:02:14 CEST 2023   cargo-navigate  v0.1.1     v0.1.1   No
Fri Mar 10 06:02:14 CEST 2023

:)

Please try this provisional patch and see if it works for you:

diff --git a/Cargo.toml b/Cargo.toml
index d70c14c733..efc295ccc3 100644
--- a/Cargo.toml
+++ b/Cargo.toml
@@ -65,6 +65,10 @@ features = ["serde"]
 version = "=2.33.3"
 features = ["wrap_help"]

+[dependencies.curl]
+version = "0.4"
+features = ["http2"]
+
 [target."cfg(all(unix, not(target_os = \"macos\")))".dependencies]  # Matches https://github.com/rust-lang/git2-rs/blob/0.13.25/Cargo.toml
 # https://app.travis-ci.com/github/nabijaczleweli/cargo-update/jobs/548138871#L272 vs https://app.travis-ci.com/github/nabijaczleweli/cargo-update/jobs/550595562#L279
 openssl-sys = "=0.9.70"
@@ -74,8 +78,9 @@ embed-resource = "1.7"

 [features]
 default = []
-vendored-openssl = ["git2/vendored-openssl"]
+vendored-openssl = ["git2/vendored-openssl", "curl/static-ssl"]
 vendored-libgit2 = ["git2/vendored-libgit2"]
+vendored-libcurl = ["curl/static-curl"]

 [[bin]]
 name = "cargo-install-update"
diff --git a/src/lib.rs b/src/lib.rs
index c953369599..db995b9306 100644
--- a/src/lib.rs
+++ b/src/lib.rs
@@ -376,6 +376,7 @@ extern crate regex;
 extern crate git2;
 #[macro_use]
 extern crate clap;
+extern crate curl;
 extern crate dirs;
 extern crate toml;
 extern crate json;
diff --git a/src/main.rs b/src/main.rs
index a43c6f3f1c..ccda5ba3b2 100644
--- a/src/main.rs
+++ b/src/main.rs
@@ -3,7 +3,6 @@ extern crate tabwriter;
 extern crate lazysort;
 extern crate git2;

-use git2::{Repository, ErrorCode as GitErrorCode};
 use std::io::{ErrorKind as IoErrorKind, Write, stdout, sink};
 use std::process::{Command, exit};
 use std::collections::BTreeMap;
@@ -72,7 +71,7 @@ fn actual_main() -> Result<(), i32> {
     // These are all in the same order and (item => [package names]) maps
     let mut registry_urls = BTreeMap::<_, Vec<_>>::new();
     for package in &packages {
-        registry_urls.entry(cargo_update::ops::get_index_url(&crates_file, &package.registry).map_err(|e| {
+        registry_urls.entry(cargo_update::ops::get_index_url(&crates_file, &package.registry, cargo_config.registries_crates_io_protocol_sparse).map_err(|e| {
                     eprintln!("Couldn't get registry for {}: {}.", package.name, e);
                     2
                 })?)
@@ -82,30 +81,32 @@ fn actual_main() -> Result<(), i32> {
     let registry_urls: Vec<_> = registry_urls.into_iter().collect();

     let registries: Vec<_> = Result::from_iter(registry_urls.iter()
-        .map(|((registry_url, _), pkg_names)| {
-            cargo_update::ops::assert_index_path(&opts.cargo_dir, &registry_url[..])
-                .map(|path| (path, &pkg_names[..]))
+        .map(|((registry_url, sparse, _), pkg_names)| {
+            cargo_update::ops::assert_index_path(&opts.cargo_dir, &registry_url[..], *sparse)
+                .map(|path| (path, *sparse, &pkg_names[..]))
                 .map_err(|e| {
                     eprintln!("Couldn't get package repository: {}.", e);
                     2
                 })
         }))?;
-    let mut registry_repos: Vec<_> = Result::from_iter(registries.iter().map(|(registry, _)| {
-        Repository::open(&registry).or_else(|e| if e.code() == GitErrorCode::NotFound {
-            Repository::init(&registry).map_err(|e| {
-                eprintln!("Failed to initialise fresh registry repository at {}: {}.\nTry running 'cargo search cargo-update' to initialise the repository.",
-                          registry.display(),
-                          e);
-                2
-            })
-        } else {
-            eprintln!("Failed to open registry repository at {}: {}.", registry.display(), e);
-            Err(2)
+    let mut registry_repos: Vec<_> = Result::from_iter(registries.iter().map(|(registry, sparse, _)| {
+        cargo_update::ops::open_index_repository(registry, *sparse).map_err(|(init, e)| {
+            match init {
+                true => {
+                    eprintln!("Failed to initialise fresh registry repository at {}: {}.\nTry running 'cargo search cargo-update' to initialise the \
+                               repository.",
+                              registry.display(),
+                              e)
+                }
+                false => eprintln!("Failed to open registry repository at {}: {}.", registry.display(), e),
+            }
+            2
         })
     }))?;
     for (i, mut registry_repo) in registry_repos.iter_mut().enumerate() {
         cargo_update::ops::update_index(&mut registry_repo,
                                         &(registry_urls[i].0).0,
+                                        registry_urls[i].1.iter(),
                                         http_proxy.as_ref().map(String::as_str),
                                         cargo_config.net_git_fetch_with_cli,
                                         &mut if !opts.quiet {
@@ -117,16 +118,16 @@ fn actual_main() -> Result<(), i32> {
                 2
             })?;
     }
-    let latest_registries: Vec<_> = Result::from_iter(registry_repos.iter().zip(registries.iter()).map(|(registry_repo, (registry, _))| {
-        registry_repo.revparse_single("FETCH_HEAD").or_else(|_| registry_repo.revparse_single("origin/HEAD"))
-            .map_err(|e| {
-                eprintln!("Failed to read remote HEAD of registry repository at {}: {}.", registry.display(), e);
-                2
-            })
+
+    let latest_registries: Vec<_> = Result::from_iter(registry_repos.iter().zip(registries.iter()).map(|(registry_repo, (registry, ..))| {
+        cargo_update::ops::parse_registry_head(registry_repo).map_err(|e| {
+            eprintln!("Failed to read remote HEAD of registry repository at {}: {}.", registry.display(), e);
+            2
+        })
     }))?;

     for package in &mut packages {
-        let registry_idx = match registries.iter().position(|(_, pkg_names)| pkg_names.contains(&package.name)) {
+        let registry_idx = match registries.iter().position(|(.., pkg_names)| pkg_names.contains(&package.name)) {
             Some(i) => i,
             None => {
                 panic!("Couldn't find registry for package {} (please report to http://github.com/nabijaczleweli/cargo-update)",
@@ -135,9 +136,7 @@ fn actual_main() -> Result<(), i32> {
         };

         let install_prereleases = configuration.get(&package.name).and_then(|c| c.install_prereleases);
-        package.pull_version(&latest_registries[registry_idx].as_commit().unwrap().tree().unwrap(),
-                             &registry_repos[registry_idx],
-                             install_prereleases);
+        package.pull_version(&latest_registries[registry_idx], &registry_repos[registry_idx], install_prereleases);
     }

     if !opts.quiet {
@@ -218,7 +217,7 @@ fn actual_main() -> Result<(), i32> {
                     }

                     let registry_name = match registry_urls.iter().find(|(_, pkg_names)| pkg_names.contains(&package.name)) {
-                        Some(u) => &(u.0).1,
+                        Some(u) => &(u.0).2,
                         None => {
                             panic!("Couldn't find registry URL for package {} (please report to http://github.com/nabijaczleweli/cargo-update)",
                                    &package.name[..])
diff --git a/src/ops/mod.rs b/src/ops/mod.rs
index 4d8282f19e..707d8f68b8 100644
--- a/src/ops/mod.rs
+++ b/src/ops/mod.rs
@@ -6,15 +6,20 @@
 //! continue with doing whatever you wish.

-use git2::{self, Config as GitConfig, Error as GitError, Cred as GitCred, RemoteCallbacks, CredentialType, FetchOptions, ProxyOptions, Repository, Tree, Oid};
+use git2::{self, ErrorCode as GitErrorCode, Config as GitConfig, Error as GitError, Cred as GitCred, RemoteCallbacks, CredentialType, FetchOptions,
+           ProxyOptions, Repository, Tree, Oid};
+use curl::easy::{WriteError as CurlWriteError, Handler as CurlHandler, Easy2 as CurlEasy};
 use semver::{VersionReq as SemverReq, Version as Semver};
 use std::io::{ErrorKind as IoErrorKind, Write, Read};
+use curl::multi::Multi as CurlMulti;
+use std::{cmp, env, mem, str, fs};
 use std::ffi::{OsString, OsStr};
 use std::collections::BTreeMap;
 use std::path::{PathBuf, Path};
 use std::hash::{Hasher, Hash};
-use std::{cmp, env, mem, fs};
+use std::iter::FromIterator;
 use std::process::Command;
+use std::time::Duration;
 use std::borrow::Cow;
 use regex::Regex;
 use url::Url;
@@ -73,11 +78,19 @@ pub struct RegistryPackage {
     ///
     /// Go to `https://crates.io/crates/{name}` to get the crate info, if available on the main repository.
     pub name: String,
+    // /// The registry the package is available from,
+    // /// and if it's [sparse](https://doc.rust-lang.org/stable/cargo/reference/registry-index.html#sparse-protocol).
+    // ///
+    // /// Can be a name from ~/.cargo/config.
+    // ///
+    // /// The main repository is `https://github.com/rust-lang/crates.io-index`.
+    // /// If sparse mode is configured, that URL is automatically replaced with `https://index.crates.io/`.
+    // pub registry: (String, bool),
     /// The registry the package is available from.
     ///
     /// Can be a name from ~/.cargo/config.
     ///
-    /// The main repository is `https://github.com/rust-lang/crates.io-index`
+    /// The main repository is `https://github.com/rust-lang/crates.io-index`, or `sparse+https://index.crates.io/`.
     pub registry: String,
     /// The package's locally installed version.
     pub version: Option<Semver>,
@@ -201,6 +214,10 @@ impl RegistryPackage {
             RegistryPackage {
                 name: c.get(1).unwrap().as_str().to_string(),
                 registry: c.get(3).unwrap().as_str().to_string(),
+                //registry: {
+                //    let reg = c.get(3).unwrap().as_str();
+                //    (reg.strip_prefix("sparse+").unwrap_or(reg).to_string(), reg.starts_with("sparse+"))
+                //},
                 version: Some(Semver::parse(c.get(2).unwrap().as_str()).unwrap()),
                 newest_version: None,
                 alternative_version: None,
@@ -211,23 +228,32 @@ impl RegistryPackage {
     }

     /// Download the version list for this crate off the specified repository tree and set the latest and alternative versions.
-    pub fn pull_version<'t>(&mut self, registry: &Tree<'t>, registry_parent: &'t Repository, install_prereleases: Option<bool>) {
-        let mut vers =
-            crate_versions(&mut &find_package_data(&self.name, registry, registry_parent).ok_or_else(|| format!("package {} not found", self.name)).unwrap()
-                                     [..]);
-        vers.sort();
+    pub fn pull_version(&mut self, registry: &RegistryTree, registry_parent: &Registry, install_prereleases: Option<bool>) {
+        let mut vers_git;
+        let vers = match (registry, registry_parent) {
+            (RegistryTree::Git(registry), Registry::Git(registry_parent)) => {
+                vers_git = crate_versions(&mut &find_package_data(&self.name, registry, registry_parent)
+                    .ok_or_else(|| format!("package {} not found", self.name))
+                    .unwrap()
+                                                     [..]);
+                vers_git.sort();
+                &vers_git
+            }
+            (RegistryTree::Sparse(()), Registry::Sparse(registry_parent)) => &registry_parent[&self.name],
+            _ => unreachable!(),
+        };

         self.newest_version = None;
         self.alternative_version = None;

-        let mut vers = vers.into_iter().rev();
+        let mut vers = vers.iter().rev();
         if let Some(newest) = vers.next() {
-            self.newest_version = Some(newest);
+            self.newest_version = Some(newest.clone());

             if self.newest_version.as_ref().unwrap().is_prerelease() && !install_prereleases.unwrap_or(false) {
                 if let Some(newest_nonpre) = vers.find(|v| !v.is_prerelease()) {
                     mem::swap(&mut self.alternative_version, &mut self.newest_version);
-                    self.newest_version = Some(newest_nonpre);
+                    self.newest_version = Some(newest_nonpre.clone());
                 }
             }
         }
@@ -689,11 +715,19 @@ impl PackageFilterElement {
 #[derive(Debug, Clone, Hash, PartialEq, Eq, PartialOrd, Ord)]
 pub struct CargoConfig {
     pub net_git_fetch_with_cli: bool,
+    /// https://blog.rust-lang.org/2023/03/09/Rust-1.68.0.html#cargos-sparse-protocol
+    /// https://doc.rust-lang.org/stable/cargo/reference/registry-index.html#sparse-protocol
+    pub registries_crates_io_protocol_sparse: bool,
 }

 impl CargoConfig {
     pub fn load(crates_file: &Path) -> CargoConfig {
-        CargoConfig {
+        let mut cfg = fs::read_to_string(crates_file.with_file_name("config"))
+            .or_else(|_| fs::read_to_string(crates_file.with_file_name("config.toml")))
+            .ok()
+            .and_then(|s| s.parse::<toml::Value>().ok());
+
+        dbg!(CargoConfig {
             net_git_fetch_with_cli: env::var("CARGO_NET_GIT_FETCH_WITH_CLI")
                 .ok()
                 .and_then(|e| if e.is_empty() {
@@ -702,11 +736,7 @@ impl CargoConfig {
                     e.parse::<toml::Value>().ok()
                 })
                 .or_else(|| {
-                    fs::read_to_string(crates_file.with_file_name("config"))
-                        .or_else(|_| fs::read_to_string(crates_file.with_file_name("config.toml")))
-                        .ok()?
-                        .parse::<toml::Value>()
-                        .ok()?
+                    cfg.as_mut()?
                         .as_table_mut()?
                         .remove("net")?
                         .as_table_mut()?
@@ -714,7 +744,21 @@ impl CargoConfig {
                 })
                 .map(CargoConfig::truthy)
                 .unwrap_or(false),
-        }
+            registries_crates_io_protocol_sparse: env::var("CARGO_REGISTRIES_CRATES_IO_PROTOCOL")
+                .map(|s| s == "sparse")
+                .ok()
+                .or_else(|| {
+                    Some(cfg.as_mut()?
+                        .as_table_mut()?
+                        .remove("registries")?
+                        .as_table_mut()?
+                        .remove("crates-io")?
+                        .as_table_mut()?
+                        .remove("protocol")?
+                        .as_str()? == "sparse")
+                })
+                .unwrap_or(false /* TODO: check if cargo 1.70+ */),
+        })
     }

     fn truthy(v: toml::Value) -> bool {
@@ -922,10 +966,11 @@ pub fn intersect_packages(installed: &[RegistryPackage], to_update: &[(String, O
 pub fn crate_versions<R: Read>(package_desc: &mut R) -> Vec<Semver> {
     let mut buf = String::new();
     package_desc.read_to_string(&mut buf).unwrap();
-    crate_versions_impl(buf)
+    crate_versions_str(&buf)
 }

-fn crate_versions_impl(buf: String) -> Vec<Semver> {
+/// Likewise, but from a blob
+pub fn crate_versions_str(buf: &str) -> Vec<Semver> {
     buf.lines()
         .map(|p| json::parse(p).unwrap())
         .filter(|j| !j["yanked"].as_bool().unwrap())
@@ -938,6 +983,8 @@ fn crate_versions_impl(buf: String) -> Vec<Semver> {
 /// As odd as it may be, this [can happen (if rarely) and is a supported
 /// configuration](https://github.com/nabijaczleweli/cargo-update/issues/150).
 ///
+/// Sparse registries return a meaningless value.
+///
 /// # Examples
 ///
 /// ```
@@ -945,12 +992,17 @@ fn crate_versions_impl(buf: String) -> Vec<Semver> {
 /// # use std::env::temp_dir;
 /// # let cargo_dir = temp_dir().join("cargo_update-doctest").join("assert_index_path-0");
 /// # let idx_dir = cargo_dir.join("registry").join("index").join("github.com-1ecc6299db9ec823");
-/// let index = assert_index_path(&cargo_dir, "https://github.com/rust-lang/crates.io-index").unwrap();
+/// let index = assert_index_path(&cargo_dir, "https://github.com/rust-lang/crates.io-index", false).unwrap();
 ///
 /// // Use find_package_data() to look for packages
 /// # assert_eq!(index, idx_dir);
+/// # assert_eq!(assert_index_path(&cargo_dir, "https://index.crates.io/", true).unwrap(),
+/// $            cargo_dir.join("registry").join("index").join("index.crates.io-6f17d22bba15001f").join(".cache"));
 /// ```
-pub fn assert_index_path(cargo_dir: &Path, registry_url: &str) -> Result<PathBuf, Cow<'static, str>> {
+pub fn assert_index_path(cargo_dir: &Path, registry_url: &str, sparse: bool) -> Result<PathBuf, Cow<'static, str>> {
+    if sparse {
+        return Ok(PathBuf::from("/ENOENT"));
+    }
     let path = cargo_dir.join("registry").join("index").join(registry_shortname(registry_url));
     match path.metadata() {
         Ok(meta) => {
@@ -968,6 +1020,19 @@ pub fn assert_index_path(cargo_dir: &Path, registry_url: &str) -> Result<PathBuf
     }
 }

+pub fn open_index_repository(registry: &Path, sparse: bool) -> Result<Registry, (bool, GitError)> {
+    match sparse {
+        false => {
+            Repository::open(&registry).map(Registry::Git).or_else(|e| if e.code() == GitErrorCode::NotFound {
+                Repository::init(&registry).map(Registry::Git).map_err(|e| (true, e))
+            } else {
+                Err((false, e))
+            })
+        }
+        true => Ok(Registry::Sparse(BTreeMap::new())),
+    }
+}
+
 /// Update the specified index repository from the specified URL.
 ///
 /// Historically, `cargo search` was used, first of an
@@ -1010,38 +1075,129 @@ pub fn assert_index_path(cargo_dir: &Path, registry_url: &str) -> Result<PathBuf
 ///
 /// Sometimes, however, even this isn't enough (see https://github.com/nabijaczleweli/cargo-update/issues/163),
 /// hence `fork_git`, which actually runs `$GIT` (default: `git`).
-pub fn update_index<W: Write>(index_repo: &mut Repository, repo_url: &str, http_proxy: Option<&str>, fork_git: bool, out: &mut W) -> Result<(), String> {
-    writeln!(out, "    Updating registry '{}'", repo_url).map_err(|_| "failed to write updating message".to_string())?;
-    if fork_git {
-        Command::new(env::var_os("GIT").as_ref().map(OsString::as_os_str).unwrap_or(OsStr::new("git"))).arg("-C")
-            .arg(index_repo.path())
-            .args(&["fetch", "-f", repo_url, "HEAD:refs/remotes/origin/HEAD"])
-            .status()
-            .map_err(|e| e.to_string())
-            .and_then(|e| if e.success() {
-                Ok(())
+///
+/// # Sparse indices
+///
+/// Have a `.cache` under the obvious path, then the usual `ca/rg/cargo-update`, but *the file is different than the standard
+/// format*: it starts with a ^A or ^C (I'm assuming these are versions, and if I looked at more files I would also've seen
+/// ^C), then Some Binary Data, then the ETag(?), then {NUL, version, NUL, usual JSON blob line} repeats.
+///
+/// I do not wanna be touching that shit. Just suck off all the files.<br />
+/// Shoulda stored the blobs verbatim and used `If-Modified-Since`. Too me.
+///
+/// Only in this mode is the package list used.
+pub fn update_index<W: Write, A: AsRef<str>, I: Iterator<Item = A>>(index_repo: &mut Registry, repo_url: &str, packages: I, http_proxy: Option<&str>,
+                                                                    fork_git: bool, out: &mut W)
+                                                                    -> Result<(), String> {
+    writeln!(out,
+             "    {} registry '{}'",
+             ["Updating", "Polling"][matches!(index_repo, Registry::Sparse(_)) as usize],
+             repo_url).map_err(|_| "failed to write updating message".to_string())?;
+    match index_repo {
+        Registry::Git(index_repo) => {
+            if fork_git {
+                Command::new(env::var_os("GIT").as_ref().map(OsString::as_os_str).unwrap_or(OsStr::new("git"))).arg("-C")
+                    .arg(index_repo.path())
+                    .args(&["fetch", "-f", repo_url, "HEAD:refs/remotes/origin/HEAD"])
+                    .status()
+                    .map_err(|e| e.to_string())
+                    .and_then(|e| if e.success() {
+                        Ok(())
+                    } else {
+                        Err(e.to_string())
+                    })?;
             } else {
-                Err(e.to_string())
-            })?;
-    } else {
-        index_repo.remote_anonymous(repo_url)
-            .and_then(|mut r| {
-                with_authentication(repo_url, |creds| {
-                    let mut cb = RemoteCallbacks::new();
-                    cb.credentials(|a, b, c| creds(a, b, c));
-
-                    r.fetch(&["HEAD:refs/remotes/origin/HEAD"],
-                            Some(&mut fetch_options_from_proxy_url_and_callbacks(repo_url, http_proxy, cb)),
-                            None)
-                })
-            })
-            .map_err(|e| e.message().to_string())?;
+                index_repo.remote_anonymous(repo_url)
+                    .and_then(|mut r| {
+                        with_authentication(repo_url, |creds| {
+                            let mut cb = RemoteCallbacks::new();
+                            cb.credentials(|a, b, c| creds(a, b, c));
+
+                            r.fetch(&["HEAD:refs/remotes/origin/HEAD"],
+                                    Some(&mut fetch_options_from_proxy_url_and_callbacks(repo_url, http_proxy, cb)),
+                                    None)
+                        })
+                    })
+                    .map_err(|e| e.message().to_string())?;
+            }
+        }
+        Registry::Sparse(registry) => {
+            // TODO: proxy
+            let mut sucker = CurlMulti::new();
+            sucker.pipelining(true, true).map_err(|e| format!("pipelining: {}", e))?;
+
+            let conns: Vec<_> = Result::from_iter(packages.map(|pkg| {
+                let mut conn = CurlEasy::new(SparseHandler(pkg.as_ref().to_string(), vec![]));
+                conn.url(&split_package_path(pkg.as_ref()).into_iter().fold(repo_url.to_string(), |mut u, s| {
+                        if !u.ends_with('/') {
+                            u.push('/');
+                        }
+                        u.push_str(&s);
+                        u
+                    }))
+                    .map_err(|e| format!("url: {}", e))?;
+                conn.pipewait(true).map_err(|e| format!("pipewait: {}", e))?;
+                sucker.add2(conn).map_err(|e| format!("add2: {}", e))
+            }))?;
+
+            while sucker.perform().map_err(|e| format!("perform: {}", e))? > 0 {
+                sucker.wait(&mut [], Duration::from_millis(200)).map_err(|e| format!("wait: {}", e))?;
+            }
+
+            for mut c in conns {
+                let pkg = mem::take(&mut c.get_mut().0);
+                match c.response_code().map_err(|e| format!("response_code: {}", e))? {
+                    200 => {}
+                    rc @ 404 | rc @ 410 | rc @ 451 => panic!("package {} doesn't exist: HTTP {}", pkg, rc),
+                    rc => panic!("package {}: HTTP {}", pkg, rc),
+                }
+                let mut resp = crate_versions_str(str::from_utf8(&c.get_ref().1).map_err(|e| format!("package {}: {}", pkg, e))?);
+                resp.sort();
+                registry.insert(pkg, resp);
+            }
+        }
     }
     writeln!(out).map_err(|_| "failed to write post-update newline".to_string())?;

     Ok(())
 }

+// Could we theoretically parse the semvers on the fly? Yes. Is it more trouble than it's worth? Also probably yes; there
+// doesn't appear to be a good way to bubble errors.
+// Same applies to just waiting instead of processing via .messages()
+struct SparseHandler(String, Vec<u8>);
+
+impl CurlHandler for SparseHandler {
+    fn write(&mut self, data: &[u8]) -> Result<usize, CurlWriteError> {
+        self.1.extend(data);
+        Ok(data.len())
+    }
+}
+
+
+pub enum Registry {
+    Git(Repository),
+    Sparse(BTreeMap<String, Vec<Semver>>),
+}
+
+pub enum RegistryTree<'a> {
+    Git(Tree<'a>),
+    Sparse(()),
+}
+
+pub fn parse_registry_head(registry_repo: &Registry) -> Result<RegistryTree, GitError> {
+    match registry_repo {
+        Registry::Git(registry_repo) => {
+            registry_repo.revparse_single("FETCH_HEAD")
+                .or_else(|_| registry_repo.revparse_single("origin/HEAD"))
+                .map(|h| h.as_commit().unwrap().tree().unwrap())
+                .map(RegistryTree::Git)
+        }
+        Registry::Sparse(_) => Ok(RegistryTree::Sparse(())),
+    }
+}
+
+
 fn fetch_options_from_proxy_url_and_callbacks<'a>(repo_url: &str, proxy_url: Option<&str>, callbacks: RemoteCallbacks<'a>) -> FetchOptions<'a> {
     let mut ret = FetchOptions::new();
     if let Some(proxy_url) = proxy_url {
@@ -1068,7 +1224,8 @@ fn fetch_options_from_proxy_url_and_callbacks<'a>(repo_url: &str, proxy_url: Opt
     ret
 }

-/// Get the URL to update index from and the cargo name for it from the config file parallel to the specified crates file
+/// Get the URL to update index from, whether it's "sparse", and the cargo name for it from the config file parallel to the
+/// specified crates file
 ///
 /// First gets the source name corresponding to the given URL, if appropriate,
 /// then chases the `source.$SRCNAME.replace-with` chain,
@@ -1077,10 +1234,14 @@ fn fetch_options_from_proxy_url_and_callbacks<'a>(repo_url: &str, proxy_url: Opt
 /// Prepopulates with `source.crates-io.registry = "https://github.com/rust-lang/crates.io-index"`,
 /// as specified in the book
 ///
+/// If `registries_crates_io_protocol_sparse`, `https://github.com/rust-lang/crates.io-index` is replaced with
+/// `sparse+https://index.crates.io/`.
+///
 /// Consult [#107](https://github.com/nabijaczleweli/cargo-update/issues/107) and
 /// the Cargo Book for details: https://doc.rust-lang.org/cargo/reference/source-replacement.html,
 /// https://doc.rust-lang.org/cargo/reference/registries.html.
-pub fn get_index_url(crates_file: &Path, registry: &str) -> Result<(String, Cow<'static, str>), Cow<'static, str>> {
+pub fn get_index_url(crates_file: &Path, registry: &str, registries_crates_io_protocol_sparse: bool)
+                     -> Result<(String, bool, Cow<'static, str>), Cow<'static, str>> {
     let mut config_file = crates_file.with_file_name("config");
     let config = if let Ok(cfg) = fs::read_to_string(&config_file).or_else(|_| {
         config_file.set_file_name("config.toml");
@@ -1089,7 +1250,11 @@ pub fn get_index_url(crates_file: &Path, registry: &str) -> Result<(String, Cow<
         toml::from_str::<toml::Value>(&cfg).map_err(|e| format!("{} not TOML: {}", config_file.display(), e))?
     } else {
         if registry == "https://github.com/rust-lang/crates.io-index" {
-            return Ok((registry.to_string(), "crates-io".into()));
+            if registries_crates_io_protocol_sparse {
+                return Ok(("https://index.crates.io/".to_string(), true, "crates-io".into()));
+            } else {
+                return Ok((registry.to_string(), false, "crates-io".into()));
+            }
         } else {
             Err(format!("Non-crates.io registry specified and no config file found at {} or {}. \
                          Due to a Cargo limitation we will not be able to install from there \
@@ -1104,8 +1269,13 @@ pub fn get_index_url(crates_file: &Path, registry: &str) -> Result<(String, Cow<
     let mut cur_source = Cow::from(registry);

     // Special case, always present
-    registries.insert("crates-io", Cow::from("https://github.com/rust-lang/crates.io-index"));
-    if cur_source == "https://github.com/rust-lang/crates.io-index" {
+    registries.insert("crates-io",
+                      Cow::from(if registries_crates_io_protocol_sparse {
+                          "sparse+https://index.crates.io/"
+                      } else {
+                          "https://github.com/rust-lang/crates.io-index"
+                      }));
+    if cur_source == "https://github.com/rust-lang/crates.io-index" || cur_source == "sparse+https://index.crates.io/" {
         cur_source = "crates-io".into();
     }

@@ -1149,13 +1319,15 @@ pub fn get_index_url(crates_file: &Path, registry: &str) -> Result<(String, Cow<
         cur_source = Cow::from(&repl[..]);
     }

-    registries.get(&cur_source[..]).map(|reg| (reg.to_string(), cur_source.to_string().into())).ok_or_else(|| {
-        format!("Couldn't find appropriate source URL for {} in {} (resolved to {:?})",
-                registry,
-                config_file.display(),
-                cur_source)
-            .into()
-    })
+    registries.get(&cur_source[..])
+        .map(|reg| (reg.strip_prefix("sparse+").unwrap_or(reg).to_string(), reg.starts_with("sparse+"), cur_source.to_string().into()))
+        .ok_or_else(|| {
+            format!("Couldn't find appropriate source URL for {} in {} (resolved to {:?})",
+                    registry,
+                    config_file.display(),
+                    cur_source)
+                .into()
+        })
 }

 /// Based on
@@ -1267,30 +1439,35 @@ fn with_authentication<T, F>(url: &str, mut f: F) -> Result<T, GitError>
 }

-/// Find package data in the specified cargo index tree.
-pub fn find_package_data<'t>(cratename: &str, registry: &Tree<'t>, registry_parent: &'t Repository) -> Option<Vec<u8>> {
-    let clen = cratename.len().to_string();
+/// Split `cargo-update` into `[ca, rg, cargo-update]`, `jot` into `[3, j, jot]`, &c.
+pub fn split_package_path(cratename: &str) -> Vec<Cow<str>> {
     let mut elems = Vec::new();
     if cratename.len() <= 3 {
-        elems.push(&clen[..]);
+        elems.push(cratename.len().to_string().into());
     }
     match cratename.len() {
         0 => panic!("0-length cratename"),
         1 | 2 => {}
-        3 => elems.push(&cratename[0..1]),
+        3 => elems.push(cratename[0..1].into()),
         _ => {
-            elems.push(&cratename[0..2]);
-            elems.push(&cratename[2..4]);
+            elems.push(cratename[0..2].into());
+            elems.push(cratename[2..4].into());
         }
     }
-    elems.push(cratename);
+    elems.push(cratename.into());
+    elems
+}
+
+/// Find package data in the specified cargo index tree.
+pub fn find_package_data<'t>(cratename: &str, registry: &Tree<'t>, registry_parent: &'t Repository) -> Option<Vec<u8>> {
+    let elems = split_package_path(cratename);

-    let ent = registry.get_name(elems[0])?;
+    let ent = registry.get_name(&elems[0])?;
     let obj = ent.to_object(registry_parent).ok()?;
-    let ent = obj.as_tree()?.get_name(elems[1])?;
+    let ent = obj.as_tree()?.get_name(&elems[1])?;
     let obj = ent.to_object(registry_parent).ok()?;
     if elems.len() == 3 {
-        let ent = obj.as_tree()?.get_name(elems[2])?;
+        let ent = obj.as_tree()?.get_name(&elems[2])?;
         let obj = ent.to_object(registry_parent).ok()?;
         Some(obj.as_blob()?.content().into())
     } else {
orium commented 1 year ago

Works for me! Thanks for investigating and implementing this so quickly @nabijaczleweli

I can now remove ~/.cargo/registry :)

nabijaczleweli commented 1 year ago

Can you try the latest master branch (at least d6477fd7286000f5a858034274160ba59fd0bdf5)?

nabijaczleweli commented 1 year ago

Released in v12.0.0 – works for me and earlier version worked for reporter.