MinIO#
MinIO is a high performance, distributed object storage system. It is built for large scale AI/ML, data lake and database workloads. It is software-defined, runs on any cloud or on-premises infrastructure and is 100% open source under the Apache V2 license. MinIO is designed to be used as a private cloud storage, to store and share files and data. It is Amazon S3 compatible.
In this tutorial we will see how to use MinIO with Python and with the MinIO client CLI tool (mc).
MinIO Python API#
[ ]:
!pip install minio
[ ]:
from minio import Minio
import os
[ ]:
client = Minio("minio:80",
access_key=os.environ["MINIO_ACCESS_KEY"],
secret_key=os.environ["MINIO_SECRET_KEY"],
secure=False)
List buckets#
[ ]:
client.list_buckets()
List Objects#
[ ]:
# List objects information.
objects = client.list_objects("my-bucket")
for obj in objects:
print(obj)
Create a bucket#
[ ]:
client.make_bucket("BUCKET_NAME")
Copy a file to MinIO#
[ ]:
client.fput_object("BUCKET_NAME", "BUCKET_PATH", "LOCAL_FILE")
Copy a folder to MinIO#
[ ]:
import glob
def upload_local_directory_to_minio(local_path, bucket_name, minio_path):
assert os.path.isdir(local_path)
for local_file in glob.glob(local_path + '/**'):
local_file = local_file.replace(os.sep, "/") # Replace \ with / on Windows
if not os.path.isfile(local_file):
upload_local_directory_to_minio(
local_file, bucket_name, minio_path + "/" + os.path.basename(local_file))
else:
remote_path = os.path.join(
minio_path, local_file[1 + len(local_path):])
remote_path = remote_path.replace(
os.sep, "/") # Replace \ with / on Windows
client.fput_object(bucket_name, remote_path, local_file)
[ ]:
upload_local_directory_to_minio("LOCAL_FOLDER","BUCKET","BUCKET_PATH")
Copy a file from MinIO#
[ ]:
client.fget_object("BUCKET_NAME", "FILENAME", "LOCAL_FILE")
Copy a folder from MinIO#
[ ]:
def download_minio_directory_to_local(minio_path, local_path, bucket_name):
objects = client.list_objects(bucket_name, prefix=minio_path, recursive=True)
for obj in objects:
remote_path = obj.object_name
local_file = os.path.join(local_path, remote_path[len(minio_path):])
local_file = local_file.replace(os.sep, "/") # Replace \ with / on Windows
local_dir = os.path.dirname(local_file)
if not os.path.exists(local_dir):
os.makedirs(local_dir)
client.fget_object(bucket_name, remote_path, local_file)
MinIO CLI#
[ ]:
%%bash
curl https://dl.min.io/client/mc/release/linux-amd64/mc \
--create-dirs \
-o $HOME/minio-binaries/mc
chmod +x $HOME/minio-binaries/mc
export PATH=$PATH:$HOME/minio-binaries/
[ ]:
!mc alias set minio http://minio:80 $MINIO_ACCESS_KEY $MINIO_SECRET_KEY
List buckets#
[ ]:
!mc ls minio
Create a bucket#
[ ]:
!mc mb minio/BUCKET_NAME
Copy a file or folder to MinIO#
[ ]:
!mc cp [--recursive] LOCAL_FILE minio/BUCKET_NAME/BUCKET_PATH
Copy a file or folder from MinIO#
[ ]:
!mc cp [--recursive] minio/BUCKET_NAME/BUCKET_PATH LOCAL_FILE
Sync a folder to MinIO#
[ ]:
!mc mirror [--watch] minio/BUCKET LOCAL_FOLDER
Create a link to externally upload files to MinIO#
[ ]:
!mc share upload --recursive --expire TIME minio/BUCKET
[ ]:
from pathlib import Path
import os
from tqdm import tqdm
import subprocess
if __name__ == "__main__":
rootdir = 'LOCAL_PATH'
bucket_path = 'BUCKET_PATH'
curl_command = "curl <MINIO_URL>/<BUCKET>"
curl_command += " <LINK>"
main_curl_command = curl_command.split(" ")
for subdir, dirs, files in os.walk(rootdir):
for file in tqdm(files):
curl_command = main_curl_command.copy()
curl_command.append("-F")
curl_command.append(f"key={bucket_path}/{Path(subdir).relative_to(rootdir).joinpath(file)}")
curl_command.append("-F")
curl_command.append(f"file=@{Path(subdir).joinpath( file)}")
subprocess.call(curl_command)