Azure / azure-storage-python

Microsoft Azure Storage Library for Python
https://azure-storage.readthedocs.io
MIT License
338 stars 240 forks source link

Large File Upload Max_Conncurrenct #698

Open gismcieri opened 2 years ago

gismcieri commented 2 years ago

Hi,

I am using the following code from one of the other posts in the the azure sdk and i keep seeing there is a Max_Concurrency or Max_Connections. I was wondering if it can be used in the code to allow more chunk uploads at the same time.

`import os from azure.storage.blob import BlobServiceClient, BlobClient from azure.storage.blob import ContentSettings, ContainerClient,BlobBlock import uuid

Instantiate a new BlobServiceClient using a connection string

blob_service_client = BlobServiceClient.from_connection_string("Conn String")

Instantiate a new ContainerClient

container_client = blob_service_client.get_container_client('cctvvideo') blob_client = container_client.get_blob_client(r"file.mp4")

upload data

block_list=[] chunk_size=4000000 with open(r'C:\Temp\file.mp4','rb') as f:

while True: read_data = f.read(chunk_size) if not read_data: break # done blk_id = str(uuid.uuid4()) blob_client.stage_block(block_id=blk_id,data=read_data) block_list.append(BlobBlock(block_id=blk_id)) print (uuid.uuid4())

blob_client.commit_block_list(block_list)`

Thanks in advance.

ljluestc commented 10 months ago
import os
from azure.storage.blob import BlobServiceClient, BlobClient, BlobBlock
import uuid
from concurrent.futures import ThreadPoolExecutor

# Instantiate a new BlobServiceClient using a connection string
blob_service_client = BlobServiceClient.from_connection_string("Conn String")

# Instantiate a new ContainerClient
container_client = blob_service_client.get_container_client('cctvvideo')
blob_client = container_client.get_blob_client(r"file.mp4")

# Define a function to upload a chunk
def upload_chunk(chunk_data, block_list):
    blk_id = str(uuid.uuid4())
    blob_client.stage_block(block_id=blk_id, data=chunk_data)
    block_list.append(BlobBlock(block_id=blk_id))
    print(uuid.uuid4())

# Upload data
block_list = []
chunk_size = 4000000
with open(r'C:\Temp\file.mp4', 'rb') as f:
    with ThreadPoolExecutor(max_workers=5) as executor:  # You can adjust max_workers based on your needs
        while True:
            read_data = f.read(chunk_size)
            if not read_data:
                break  # done
            executor.submit(upload_chunk, read_data, block_list)

blob_client.commit_block_list(block_list)

ThreadPoolExecutor is used to concurrently execute the upload_chunk function for each chunk of data. The max_workers parameter in the ThreadPoolExecutor constructor controls the maximum number of threads used for concurrent uploads.