Closed ArtemisDicoTiar closed 2 years ago
I have a function that downloads the metadata first then calls the normal download if the data is valid and the file doesn't exist. Might be useful either way for ideas...
url_set
contains a list of file id's
dest_path
contains the target directory with trailing dir separator already appended, and the dir already exists
def fetch_file_metadata_from_google_drive(id):
URL = "https://drive.google.com/uc?id="+id+"&authuser=0&export=download&confirm=t"
headers = {
"User-Agent": "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_10_1) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/39.0.2171.95 Safari/537.36" # NOQA
}
verify=True
url_origin = URL
session = requests.Session()
meta={}
meta['id']=id
meta['valid']=False
meta['filename']=''
meta['url']=URL
response = session.get(URL,allow_redirects=True, headers=headers, stream = True,verify=verify)
token = get_confirm_token(response)
meta['token']=token
for k in response.headers:
meta[k] = str(response.headers[k])
if ('Content-Disposition' in response.headers):
# Get the filename from the response header 'Content-Disposition'
match = re.search(r'filename="(?P<filename>.+)"', response.headers['Content-Disposition'])
# Make sure we found the filename field inside Content-Disposition
if match is None:
print('\n\nERROR: Unable to retrieve `dest_path` from `file_id`, please set it manually.')
return meta
if platform == 'win32':
# Make it Windows safe, stripping: \/<>:"|?*
remove_characters = dict((ord(char), None) for char in '\\/<>:"|?*')
else:
# Make it macOS and linux safe, stripping: /
remove_characters = dict((ord(char), None) for char in '/')
dest_path = match['filename'].translate(remove_characters)
meta['filename']=dest_path
meta['valid']=True
print(json.dumps(meta,indent=4))
else:
# Need to redirect with confirmation
try:
url = get_url_from_gdrive_confirmation(response.text)
except RuntimeError as e:
print("Access denied with the following error:")
error = "\n".join(textwrap.wrap(str(e)))
error = indent(error, "\t")
print("\n", error, "\n", file=sys.stderr)
print(
"You may still be able to access the file from the browser:",
file=sys.stderr,
)
print("\n\t", url_origin, "\n", file=sys.stderr)
meta['browser_url']=url_origin
return meta
print("redirect_url:",id,"-->",url)
meta['redirect_url']=url
return meta
for file_id in url_set:
meta = fetch_file_metadata_from_google_drive(file_id)
if (meta['valid']):
epath = dest_path + meta['filename']
if (not os.path.exists(epath)):
gdown.download(meta['url'],output=dest_path,quiet=False)
else:
print("ALREADY HAVE:", meta['filename'])
else:
print("INVALID:",file_id)
Downloading the file that already on the path will make redundant download. I hope there will be an option to handle this both on CLI and Python package.