Skip to content

Module files

This module contains async functions for uploading files and folders to deepset Cloud.

list_files

async def list_files(
        api_key: Optional[str] = None,
        api_url: Optional[str] = None,
        workspace_name: str = DEFAULT_WORKSPACE_NAME,
        name: Optional[str] = None,
        content: Optional[str] = None,
        odata_filter: Optional[str] = None,
        batch_size: int = 100,
        timeout_s: Optional[int] = None) -> AsyncGenerator[List[File], None]

List all files in a workspace.

Arguments:

  • api_key: deepset Cloud API key to use for authentication.
  • api_url: API URL to use for authentication.
  • workspace_name: Name of the workspace to list the files from. It uses the workspace from the .ENV file by default.
  • name: Name of the file to odata_filter for.
  • content: Content of the file to odata_filter for.
  • odata_filter: The odata_filter to apply to the file list. For example, odata_filter="category eq 'news'" lists files with metadata {"meta": {"category": "news"}}.
  • timeout_s: The timeout in seconds for this API call.
  • batch_size: Batch size for the listing.

Returns:

List of files.

list_upload_sessions

async def list_upload_sessions(
    api_key: Optional[str] = None,
    api_url: Optional[str] = None,
    workspace_name: str = DEFAULT_WORKSPACE_NAME,
    is_expired: Optional[bool] = None,
    batch_size: int = 100,
    timeout_s: Optional[int] = None
) -> AsyncGenerator[List[UploadSessionDetail], None]

List the details of all upload sessions for a given workspace, including the closed sessions.

Arguments:

  • api_key: deepset Cloud API key to use for authentication.
  • api_url: API URL to use for authentication.
  • workspace_name: Name of the workspace to list the files from. It uses the workspace from the .ENV file by default.
  • is_expired: Whether to list expired upload sessions.
  • batch_size: Batch size for the listing.
  • timeout_s: Timeout in seconds for the API requests.

Returns:

List of files.

get_upload_session

async def get_upload_session(
        session_id: UUID,
        api_key: Optional[str] = None,
        api_url: Optional[str] = None,
        workspace_name: str = DEFAULT_WORKSPACE_NAME) -> UploadSessionStatus

Get the status of an upload session.

Arguments:

  • session_id: ID of the upload session to get the status for.
  • api_key: deepset Cloud API key to use for authentication.
  • api_url: API URL to use for authentication.
  • workspace_name: Name of the workspace to list the files from.

Returns:

List of files.

upload

async def upload(paths: List[Path],
                 api_key: Optional[str] = None,
                 api_url: Optional[str] = None,
                 workspace_name: str = DEFAULT_WORKSPACE_NAME,
                 write_mode: WriteMode = WriteMode.KEEP,
                 blocking: bool = True,
                 timeout_s: Optional[int] = None,
                 show_progress: bool = True,
                 recursive: bool = False) -> S3UploadSummary

Upload a folder to deepset Cloud.

Arguments:

  • paths: Path to the folder to upload. If the folder contains unsupported files, they're skipped. during the upload. Supported file formats are TXT and PDF.
  • api_key: API key to use for authentication.
  • api_url: API URL to use for authentication.
  • workspace_name: Name of the workspace to upload the files to. It uses the workspace from the .ENV file by default.
  • write_mode: Specifies what to do when a file with the same name already exists in the workspace. Possible options are: KEEP - uploads the file with the same name and keeps both files in the workspace. OVERWRITE - overwrites the file that is in the workspace. FAIL - fails to upload the file with the same name.
  • blocking: Whether to wait for the upload to finish.
  • timeout_s: Timeout in seconds for the upload.
  • show_progress: Shows the upload progress.
  • recursive: Uploads files from subdirectories as well.

download

async def download(workspace_name: str = DEFAULT_WORKSPACE_NAME,
                   file_dir: Optional[Union[Path, str]] = None,
                   name: Optional[str] = None,
                   content: Optional[str] = None,
                   odata_filter: Optional[str] = None,
                   include_meta: bool = True,
                   batch_size: int = 50,
                   api_key: Optional[str] = None,
                   api_url: Optional[str] = None,
                   show_progress: bool = True,
                   timeout_s: Optional[int] = None) -> None

Download a folder to deepset Cloud.

Downloads all files from a workspace to a local folder.

Arguments:

  • workspace_name: Name of the workspace to upload the files to. It uses the workspace from the .ENV file by default.
  • file_dir: Path to the folder to download. If the folder contains unsupported files, they're skipped. during the upload. Supported file formats are TXT and PDF.
  • include_meta: Whether to include the file meta in the folder.
  • batch_size: Batch size for the listing.
  • api_key: API key to use for authentication.
  • api_url: API URL to use for authentication.
  • show_progress: Shows the upload progress.

upload_texts

async def upload_texts(files: List[DeepsetCloudFile],
                       api_key: Optional[str] = None,
                       api_url: Optional[str] = None,
                       workspace_name: str = DEFAULT_WORKSPACE_NAME,
                       write_mode: WriteMode = WriteMode.KEEP,
                       blocking: bool = True,
                       timeout_s: Optional[int] = None,
                       show_progress: bool = True) -> S3UploadSummary

Upload raw texts to deepset Cloud.

Arguments:

  • files: List of DeepsetCloudFiles to upload.
  • api_key: deepset Cloud API key to use for authentication.
  • api_url: API URL to use for authentication.
  • workspace_name: Name of the workspace to upload the files to. It uses the workspace from the .ENV file by default.
  • write_mode: Specifies what to do when a file with the same name already exists in the workspace. Possible options are: KEEP - uploads the file with the same name and keeps both files in the workspace. OVERWRITE - overwrites the file that is in the workspace. FAIL - fails to upload the file with the same name.
  • blocking: Whether to wait for the files to be listed and displayed in deepset Cloud. This may take a couple of minutes.
  • timeout_s: Timeout in seconds for the blocking parameter.
  • show_progress: Shows the upload progress.