Skip to content

Module files

Sync client for files workflow.

upload

def upload(paths: List[Path],
           api_key: Optional[str] = None,
           api_url: Optional[str] = None,
           workspace_name: str = DEFAULT_WORKSPACE_NAME,
           write_mode: WriteMode = WriteMode.KEEP,
           blocking: bool = True,
           timeout_s: Optional[int] = None,
           show_progress: bool = True,
           recursive: bool = False,
           desired_file_types: Optional[List[str]] = None,
           enable_parallel_processing: bool = False,
           safe_mode: bool = False) -> S3UploadSummary

Upload a folder to deepset Cloud.

Arguments:

  • paths: Path to the folder to upload. If the folder contains unsupported file types, they're skipped. deepset Cloud supports csv, docx, html, json, md, txt, pdf, pptx, xlsx, xml.
  • api_key: deepset Cloud API key to use for authentication.
  • api_url: API URL to use for authentication.
  • workspace_name: Name of the workspace to upload the files to. It uses the workspace from the .ENV file by default.
  • write_mode: Specifies what to do when a file with the same name already exists in the workspace. Possible options are: KEEP - uploads the file with the same name and keeps both files in the workspace. OVERWRITE - overwrites the file that is in the workspace. FAIL - fails to upload the file with the same name.
  • blocking: Whether to wait for the files to be uploaded and displayed in deepset Cloud.
  • timeout_s: Timeout in seconds for the blocking parameter.
  • show_progress: Shows the upload progress.
  • recursive: Uploads files from subfolders as well.
  • desired_file_types: A list of allowed file types to upload, defaults to [".txt", ".pdf", ".docx", ".pptx", ".xlsx", ".xml", ".csv", ".html", ".md", ".json"]
  • enable_parallel_processing: If True, deepset Cloud ingests files in parallel. Use this to speed up the upload process. Make sure you are not running concurrent uploads for the same files.
  • safe_mode: If True, disables ingesting files in parallel.

download

def download(workspace_name: str = DEFAULT_WORKSPACE_NAME,
             file_dir: Optional[Union[Path, str]] = None,
             name: Optional[str] = None,
             odata_filter: Optional[str] = None,
             include_meta: bool = True,
             batch_size: int = 50,
             api_key: Optional[str] = None,
             api_url: Optional[str] = None,
             show_progress: bool = True,
             timeout_s: Optional[int] = None,
             safe_mode: bool = False) -> None

Download a folder to deepset Cloud.

Downloads all files from a workspace to a local folder.

Arguments:

  • workspace_name: Name of the workspace to upload the files to. It uses the workspace from the .ENV file by default.
  • file_dir: Path to the folder to download.
  • name: Name of the file to odata_filter by.
  • odata_filter: odata_filter by file meta data.
  • include_meta: Whether to include the file meta in the folder.
  • batch_size: Batch size for the listing.
  • api_key: API key to use for authentication.
  • api_url: API URL to use for authentication.
  • show_progress: Shows the upload progress.
  • timeout_s: Timeout in seconds for the API requests.
  • safe_mode: If True, disables ingesting files in parallel.

upload_texts

def upload_texts(files: List[DeepsetCloudFile],
                 api_key: Optional[str] = None,
                 api_url: Optional[str] = None,
                 workspace_name: str = DEFAULT_WORKSPACE_NAME,
                 write_mode: WriteMode = WriteMode.KEEP,
                 blocking: bool = True,
                 timeout_s: Optional[int] = None,
                 show_progress: bool = True,
                 enable_parallel_processing: bool = False) -> S3UploadSummary

Upload texts to deepset Cloud.

Arguments:

  • files: List of DeepsetCloudFiles to upload.
  • api_key: deepset Cloud API key to use for authentication.
  • api_url: API URL to use for authentication.
  • workspace_name: Name of the workspace to upload the files to. It uses the workspace from the .ENV file by default.
  • write_mode: Specifies what to do when a file with the same name already exists in the workspace. Possible options are: KEEP - uploads the file with the same name and keeps both files in the workspace. OVERWRITE - overwrites the file that is in the workspace. FAIL - fails to upload the file with the same name.
  • blocking: Whether to wait for the files to be uploaded and listed in deepset Cloud.
  • timeout_s: Timeout in seconds for the blocking parameter.
  • show_progress: Shows the upload progress.
  • enable_parallel_processing: If True, deepset Cloud ingests files in parallel. Use this to speed up the upload process. Make sure you are not running concurrent uploads for the same files.

Example:

from deepset_cloud_sdk.workflows.sync_client.files import upload_texts, DeepsetCloudFile

upload_texts(
    api_key="<deepsetCloud_API_key>",
    workspace_name="<default_workspace>", # optional, by default the environment variable "DEFAULT_WORKSPACE_NAME" is used
    files=[
        DeepsetCloudFile(
            name="example.txt",
            text="this is text",
            meta={"key": "value"},  # optional
        )
    ],
    blocking=True,  # optional, by default True
    timeout_s=300,  # optional, by default 300
)

upload_bytes

def upload_bytes(files: List[DeepsetCloudFileBytes],
                 api_key: Optional[str] = None,
                 api_url: Optional[str] = None,
                 workspace_name: str = DEFAULT_WORKSPACE_NAME,
                 write_mode: WriteMode = WriteMode.KEEP,
                 blocking: bool = True,
                 timeout_s: Optional[int] = None,
                 show_progress: bool = True,
                 enable_parallel_processing: bool = False) -> S3UploadSummary

Upload any supported file types to deepset Cloud. These include .csv, .docx, .html, .json, .md, .txt, .pdf, .pptx, .xlsx and .xml.

Arguments:

  • files: List of DeepsetCloudFilesBytes to upload.
  • api_key: deepset Cloud API key to use for authentication.
  • api_url: API URL to use for authentication.
  • workspace_name: Name of the workspace to upload the files to. It uses the workspace from the .ENV file by default.
  • write_mode: Specifies what to do when a file with the same name already exists in the workspace. Possible options are: KEEP - uploads the file with the same name and keeps both files in the workspace. OVERWRITE - overwrites the file that is in the workspace. FAIL - fails to upload the file with the same name.
  • blocking: Whether to wait for the files to be uploaded and listed in deepset Cloud.
  • timeout_s: Timeout in seconds for the blocking parameter.
  • show_progress: Shows the upload progress.
  • enable_parallel_processing: If True, deepset Cloud ingests files in parallel. Use this to speed up the upload process. Make sure you are not running concurrent uploads for the same files.

get_upload_session

def get_upload_session(
        session_id: UUID,
        api_key: Optional[str] = None,
        api_url: Optional[str] = None,
        workspace_name: str = DEFAULT_WORKSPACE_NAME) -> UploadSessionStatus

Get the status of an upload session.

Arguments:

  • session_id: ID of the upload session to get the status for.
  • api_key: deepset Cloud API key to use for authentication.
  • api_url: API URL to use for authentication.
  • workspace_name: Name of the workspace to upload the files to.

list_files

def list_files(
        api_key: Optional[str] = None,
        api_url: Optional[str] = None,
        workspace_name: str = DEFAULT_WORKSPACE_NAME,
        name: Optional[str] = None,
        odata_filter: Optional[str] = None,
        batch_size: int = 100,
        timeout_s: Optional[int] = None) -> Generator[List[File], None, None]

List files in a deepset Cloud workspace.

Arguments:

  • api_key: deepset Cloud API key to use for authentication.
  • api_url: API URL to use for authentication.
  • workspace_name: Name of the workspace to list the files from. It uses the workspace from the .ENV file by default.
  • name: Name of the file to odata_filter for.
  • odata_filter: odata_filter to apply to the file list. For example, odata_filter="category eq 'news'" lists files with metadata{"meta": {"category": "news"}}.
  • batch_size: Batch size to use for the file list.
  • timeout_s: Timeout in seconds for the API requests.

list_upload_sessions

def list_upload_sessions(
    api_key: Optional[str] = None,
    api_url: Optional[str] = None,
    workspace_name: str = DEFAULT_WORKSPACE_NAME,
    is_expired: Optional[bool] = False,
    batch_size: int = 100,
    timeout_s: Optional[int] = None
) -> Generator[List[UploadSessionDetail], None, None]

List the details of all upload sessions, including the closed ones.

Arguments:

  • api_key: deepset Cloud API key to use for authentication.
  • api_url: API URL to use for authentication.
  • workspace_name: Name of the workspace whose sessions you want to list. It uses the workspace from the .ENV file by default.
  • is_expired: Lists expired sessions.
  • batch_size: Batch size to use for the session list.
  • timeout_s: Timeout in seconds for the API request.