K0nkere / DL_Dice-detection-project

DnD dice detection with CNN and transfer learning / Project for ML Bookcamp
0 stars 0 forks source link

How to: S3 / Minio / boto3 #15

Open K0nkere opened 1 year ago

K0nkere commented 1 year ago

S3

import boto3
session = boto3.session.Session()
s3 = session.client(
    service_name='s3',
    endpoint_url='https://storage.yandexcloud.net'
)

# Создать новый бакет
s3.create_bucket(Bucket='bucket-name')

# Загрузить объекты в бакет

## Из строки
s3.put_object(Bucket='bucket-name', Key='object_name', Body='TEST', StorageClass='COLD')

## Из файла
s3.upload_file('this_script.py', 'bucket-name', 'py_script.py')
s3.upload_file('this_script.py', 'bucket-name', 'script/py_script.py')

# Получить список объектов в бакете
for key in s3.list_objects(Bucket='bucket-name')['Contents']:
    print(key['Key'])

# Удалить несколько объектов
forDeletion = [{'Key':'object_name'}, {'Key':'script/py_script.py'}]
response = s3.delete_objects(Bucket='bucket-name', Delete={'Objects': forDeletion})

# Получить объект
get_object_response = s3.get_object(Bucket='bucket-name',Key='py_script.py')
print(get_object_response['Body'].read())

# Загрузить объект из s3 в pd.DataFrame
import pandas as pd
import io

obj = s3.get_object(Bucket='external-source-parsing', Key='telegram_polls_2023-08-11 05:00:45.517697.csv')
pd.read_csv(io.BytesIO(obj['Body'].read()))
K0nkere commented 5 months ago

Minio

import io
from minio import Minio
from minio.error import S3Error

minio_client = Minio(
            endpoint=MINIO_ENDPOINT,
            access_key=MINIO_ACCESS_KEY,
            secret_key=MINIO_SECRET_KEY,
            secure=True
        )
try:
  file_length = len(file_content)
except:
  file_length = -1

# Чтение списка объектов
bucket_name = 'datahub-dbt-files'
directory = 'oil'
filename_prefix = ''
collection = minio_client.list_objects(
  bucket_name=bucket_name,
  prefix=f'{directory}/{filename_prefix}'
)
for el in collection:
  print(el.object_name)

# Чтение объекта в датафрейм
obj = 'filename.csv'
try:
    response = minio_client.get_object(bucket_name=bucket_name, object_name=f'{directory}/{obj}')
    df = pd.read_csv(
        io.BytesIO(response.read()),
        parse_dates=['ts_start', 'ts_end'],
    )
finally:
    response.close()
    response.release_conn()

# Чтение объекта в yaml
obj = 'params/model_params.yml'
response = minio_client.get_object(bucket_name=bucket_name, object_name=f'{directory}/{obj}')
if response.status==200:
  params_str = response.data.decode()
  params_dict = json.loads(params)

# Удаление объекта
minio_client.remove_object(
  object_name=f'{directory}/<filename>',
  bucket_name=bucket_name
)

# Запись объекта
minio_client.put_object(bucket_name,
  object_name,
  io.BytesIO(file_content),
  file_length,
  content_type=content_type)