Module files

Sync client for files workflow.

upload

def upload(paths: List[Path],
           api_key: Optional[str] = None,
           api_url: Optional[str] = None,
           workspace_name: str = DEFAULT_WORKSPACE_NAME,
           write_mode: WriteMode = WriteMode.KEEP,
           blocking: bool = True,
           timeout_s: Optional[int] = None,
           show_progress: bool = True,
           recursive: bool = False,
           desired_file_types: Optional[List[str]] = None,
           enable_parallel_processing: bool = False) -> S3UploadSummary

Upload a folder to deepset Cloud.

Arguments:

paths: Path to the folder to upload. If the folder contains unsupported file types, they're skipped. deepset Cloud supports csv, docx, html, json, md, txt, pdf, pptx, xlsx, xml.
api_key: deepset Cloud API key to use for authentication.
api_url: API URL to use for authentication.
workspace_name: Name of the workspace to upload the files to. It uses the workspace from the .ENV file by default.
write_mode: Specifies what to do when a file with the same name already exists in the workspace. Possible options are: KEEP - uploads the file with the same name and keeps both files in the workspace. OVERWRITE - overwrites the file that is in the workspace. FAIL - fails to upload the file with the same name.
blocking: Whether to wait for the files to be uploaded and displayed in deepset Cloud.
timeout_s: Timeout in seconds for the blocking parameter.
show_progress: Shows the upload progress.
recursive: Uploads files from subfolders as well.
desired_file_types: A list of allowed file types to upload, defaults to ".txt, .pdf".
enable_parallel_processing: If True, the deepset Cloud will ingest the files in parallel. Use this to speed up the upload process and if you are not running concurrent uploads for the same files.

download

def download(workspace_name: str = DEFAULT_WORKSPACE_NAME,
             file_dir: Optional[Union[Path, str]] = None,
             name: Optional[str] = None,
             odata_filter: Optional[str] = None,
             include_meta: bool = True,
             batch_size: int = 50,
             api_key: Optional[str] = None,
             api_url: Optional[str] = None,
             show_progress: bool = True,
             timeout_s: Optional[int] = None) -> None

Download a folder to deepset Cloud.

Downloads all files from a workspace to a local folder.

Arguments:

workspace_name: Name of the workspace to upload the files to. It uses the workspace from the .ENV file by default.
file_dir: Path to the folder to download.
name: Name of the file to odata_filter by.
odata_filter: odata_filter by file meta data.
include_meta: Whether to include the file meta in the folder.
batch_size: Batch size for the listing.
api_key: API key to use for authentication.
api_url: API URL to use for authentication.
show_progress: Shows the upload progress.
timeout_s: Timeout in seconds for the API requests.

upload_texts

def upload_texts(files: List[DeepsetCloudFile],
                 api_key: Optional[str] = None,
                 api_url: Optional[str] = None,
                 workspace_name: str = DEFAULT_WORKSPACE_NAME,
                 write_mode: WriteMode = WriteMode.KEEP,
                 blocking: bool = True,
                 timeout_s: Optional[int] = None,
                 show_progress: bool = True,
                 enable_parallel_processing: bool = False) -> S3UploadSummary

Upload texts to deepset Cloud.

Arguments:

files: List of DeepsetCloudFiles to upload.
api_key: deepset Cloud API key to use for authentication.
api_url: API URL to use for authentication.
workspace_name: Name of the workspace to upload the files to. It uses the workspace from the .ENV file by default.
write_mode: Specifies what to do when a file with the same name already exists in the workspace. Possible options are: KEEP - uploads the file with the same name and keeps both files in the workspace. OVERWRITE - overwrites the file that is in the workspace. FAIL - fails to upload the file with the same name.
blocking: Whether to wait for the files to be uploaded and listed in deepset Cloud.
timeout_s: Timeout in seconds for the blocking parameter.
show_progress: Shows the upload progress.
enable_parallel_processing: If True, the deepset Cloud will ingest the files in parallel. Use this to speed up the upload process and if you are not running concurrent uploads for the same files.

Example:

from deepset_cloud_sdk.workflows.sync_client.files import upload_texts, DeepsetCloudFile

upload_texts(
    api_key="<deepsetCloud_API_key>",
    workspace_name="<default_workspace>", # optional, by default the environment variable "DEFAULT_WORKSPACE_NAME" is used
    files=[
        DeepsetCloudFile(
            name="example.txt",
            text="this is text",
            meta={"key": "value"},  # optional
        )
    ],
    blocking=True,  # optional, by default True
    timeout_s=300,  # optional, by default 300
)

upload_bytes

def upload_bytes(files: List[DeepsetCloudFileBytes],
                 api_key: Optional[str] = None,
                 api_url: Optional[str] = None,
                 workspace_name: str = DEFAULT_WORKSPACE_NAME,
                 write_mode: WriteMode = WriteMode.KEEP,
                 blocking: bool = True,
                 timeout_s: Optional[int] = None,
                 show_progress: bool = True,
                 enable_parallel_processing: bool = False) -> S3UploadSummary

Upload any supported file types to deepset Cloud. These include .csv, .docx, .html, .json, .md, .txt, .pdf, .pptx, .xlsx and .xml.

Arguments:

files: List of DeepsetCloudFilesBytes to upload.
api_key: deepset Cloud API key to use for authentication.
api_url: API URL to use for authentication.
workspace_name: Name of the workspace to upload the files to. It uses the workspace from the .ENV file by default.
write_mode: Specifies what to do when a file with the same name already exists in the workspace. Possible options are: KEEP - uploads the file with the same name and keeps both files in the workspace. OVERWRITE - overwrites the file that is in the workspace. FAIL - fails to upload the file with the same name.
blocking: Whether to wait for the files to be uploaded and listed in deepset Cloud.
timeout_s: Timeout in seconds for the blocking parameter.
show_progress: Shows the upload progress.
enable_parallel_processing: If True, the deepset Cloud will ingest the files in parallel. Use this to speed up the upload process and if you are not running concurrent uploads for the same files.

get_upload_session

def get_upload_session(
        session_id: UUID,
        api_key: Optional[str] = None,
        api_url: Optional[str] = None,
        workspace_name: str = DEFAULT_WORKSPACE_NAME) -> UploadSessionStatus

Get the status of an upload session.

Arguments:

session_id: ID of the upload session to get the status for.
api_key: deepset Cloud API key to use for authentication.
api_url: API URL to use for authentication.
workspace_name: Name of the workspace to upload the files to.

list_files

def list_files(
        api_key: Optional[str] = None,
        api_url: Optional[str] = None,
        workspace_name: str = DEFAULT_WORKSPACE_NAME,
        name: Optional[str] = None,
        odata_filter: Optional[str] = None,
        batch_size: int = 100,
        timeout_s: Optional[int] = None) -> Generator[List[File], None, None]

List files in a deepset Cloud workspace.

Arguments:

api_key: deepset Cloud API key to use for authentication.
api_url: API URL to use for authentication.
workspace_name: Name of the workspace to list the files from. It uses the workspace from the .ENV file by default.
name: Name of the file to odata_filter for.
odata_filter: odata_filter to apply to the file list. For example, odata_filter="category eq 'news'" lists files with metadata{"meta": {"category": "news"}}.
batch_size: Batch size to use for the file list.
timeout_s: Timeout in seconds for the API requests.

list_upload_sessions

def list_upload_sessions(
    api_key: Optional[str] = None,
    api_url: Optional[str] = None,
    workspace_name: str = DEFAULT_WORKSPACE_NAME,
    is_expired: Optional[bool] = False,
    batch_size: int = 100,
    timeout_s: Optional[int] = None
) -> Generator[List[UploadSessionDetail], None, None]

List the details of all upload sessions, including the closed ones.

Arguments:

api_key: deepset Cloud API key to use for authentication.
api_url: API URL to use for authentication.
workspace_name: Name of the workspace whose sessions you want to list. It uses the workspace from the .ENV file by default.
is_expired: Lists expired sessions.
batch_size: Batch size to use for the session list.
timeout_s: Timeout in seconds for the API request.