Source code for aws_resource_search.downloader

# -*- coding: utf-8 -*-

"""
This module provides utilities to download AWS resource data from AWS API.
"""

import typing as T
import dataclasses

import jmespath
from iterproxy import IterProxy

from .base_model import BaseModel

if T.TYPE_CHECKING:  # pragma: no cover
    from boto_session_manager import BotoSesManager
    import sayt.api as sayt

T_RESULT_DATA = T.Union["sayt.T_DOCUMENT", str]
"""
Type hint for boto3 API result data. Each one represents a single AWS resource.
"""


[docs]class ResourceIterproxy(IterProxy[T_RESULT_DATA]): """ Advanced iterator object for AWS resource data in boto3 API response. Ref: https://github.com/MacHu-GWU/iterproxy-project """
[docs]@dataclasses.dataclass class ResultPath(BaseModel): """ Defines how to extract list of AWS resource data from boto3 API call response. For example, the `s3_client.list_buckets <https://boto3.amazonaws.com/v1/documentation/api/latest/reference/services/s3/client/list_buckets.html>`_ API call returns the following response: .. code-block:: python { 'Buckets': [ { 'Name': 'string', 'CreationDate': datetime(2015, 1, 1) }, ], 'Owner': { 'DisplayName': 'string', 'ID': 'string' } } We aim to extract the list of S3 bucket data from the ``Buckets`` field of the response. Similarly, for EC2 Instance, the result path for ``ec2_client.describe_instances <https://boto3.amazonaws.com/v1/documentation/api/latest/reference/services/ec2/client/describe_instances.html>`_ API call response is ``Reservations[].Instances[]``. :param path: the `jmespath <https://jmespath.org/>`_ notation to the result path. It will return an empty list if the result path doesn't exist in the response. :param _compiled: the compiled jmespath expression. This class will be created only once for each AWS resource type, so that we should cache it for better performance. """ path: str = dataclasses.field() _compiled: jmespath.parser.ParsedResult = dataclasses.field(init=False) def __post_init__(self): self._compiled = jmespath.compile(self.path + " || `[]`")
[docs] def extract(self, response: dict) -> T.Iterator[T_RESULT_DATA]: """ Extract list of AWS resource data from boto3 API call response. :param response: original boto3 API response :return: for example, for ``s3_client.list_buckets``, it will return: .. code-block:: python [ { 'Name': 'string', 'CreationDate': datetime(2015, 1, 1) }, ... ] """ return self._compiled.search(response)
[docs]def list_resources( bsm: "BotoSesManager", service: str, method: str, is_paginator: bool, boto_kwargs: T.Optional[dict], result_path: ResultPath, ) -> ResourceIterproxy: """ Call boto3 API to list AWS resources. Example: .. code-block:: python >>> for iam_group_data in list_resources( ... bsm=bsm, ... service="iam", ... method="list_groups", ... is_paginator=True, ... boto_kwargs=dict( ... PaginationConfig=dict( ... MaxItems=9999, ... PageSize=1000, ... ) ... ), ... result_path=ResultPath(path="Groups"), ... ): ... print(iam_group_data) :param bsm: the ``boto_session_manager.BotoSesManager`` object. :param service: the AWS service name for creating the boto3 client. for example, the AWS S3 service name is ``s3``. :param method: the boto3 client API method to call for listing AWS resources. for example, we use ``list_buckets`` method for getting AWS S3 buckets, we use ``describe_instances`` method for getting AWS EC2 instances. :param is_paginator: boolean value to indicate whether the method is a paginator. for example, it is False for ``s3.list_buckets`` method, it is True for ``ec2.describe_instances`` method. :param boto_kwargs: the keyword arguments for the boto3 client API call. if it is a paginator, it often contains ``PaginationConfig`` key. :param result_path: the :class:`ResultPath` object to extract list of AWS resource """ def func(): if boto_kwargs is None: kwargs = {} else: kwargs = boto_kwargs client = bsm.get_client(service) if is_paginator: paginator = client.get_paginator(method) for response in paginator.paginate(**kwargs): yield from result_path.extract(response) else: response = getattr(client, method)(**kwargs) yield from result_path.extract(response) return ResourceIterproxy(func())
[docs]def extract_tags(data: dict) -> T.Dict[str, str]: """ Extract tags key value pair from boto3 API call response data. :param data: it is the dictionary representation of one AWS resource, it could be the original boto3 API response, it also could be a nested dictionary. For example: in ``s3.list_buckets`` response, the original response is the data, because it has a ``TagSet`` field. in ``iam.list_roles`` response, the dict in the ``Roles`` list is the data. """ if "tags" in data: tag_data = data["tags"] elif "Tags" in data: tag_data = data["Tags"] elif "TagSet" in data: tag_data = data["TagSet"] elif "TagList" in data: tag_data = data["TagList"] else: return {} if isinstance(tag_data, dict): return tag_data elif isinstance(tag_data, list): tags = {} for dct in tag_data: if "Key" in dct: tags[dct["Key"]] = dct["Value"] elif "key" in dct: tags[dct["key"]] = dct["value"] elif "TagKey" in dct: tags[dct["TagKey"]] = dct["TagValue"] else: raise ValueError(f"unable to extract tags from {data}") return tags else: raise TypeError(f"unable to extract tags from {data}")