Skip to content

ECS (Elastic Container Service)

aws-annoying ecs task-definition-lifecycle

Expire and delete ECS task definitions.

Expire and delete ECS task definitions for a given family, keeping revisions adhering to the given constraint. You can use this command to clean up old task definitions that are no longer needed.

Example usage:

aws-annoying ecs task-definition-lifecycle \
    --family <task-definition-family> \
    --keep-latest 5 \
    --delete
Source code in aws_annoying/cli/ecs/task_definition_lifecycle.py
@ecs_app.command()
def task_definition_lifecycle(
    ctx: typer.Context,
    *,
    family: str = typer.Option(
        ...,
        help="The name of the task definition family.",
        show_default=False,
    ),
    keep_latest: int = typer.Option(
        ...,
        help="Number of latest (revision) task definitions to keep.",
        show_default=False,
        min=1,
        max=100,
    ),
    delete: bool = typer.Option(
        False,  # noqa: FBT003
        help="Delete the task definition after deregistering it.",
    ),
) -> None:
    r"""Expire and delete ECS task definitions.

    Expire and delete ECS task definitions for a given family, keeping revisions adhering to
    the given constraint. You can use this command to clean up old task definitions that are no
    longer needed.

    Example usage:

    ```shell
    aws-annoying ecs task-definition-lifecycle \
        --family <task-definition-family> \
        --keep-latest 5 \
        --delete
    ```
    """
    dry_run = ctx.meta["dry_run"]
    ecs = boto3.client("ecs")

    # Get all task definitions for the family
    response_iter = ecs.get_paginator("list_task_definitions").paginate(
        familyPrefix=family,
        status="ACTIVE",
        sort="ASC",
    )
    task_definition_arns = []
    for response in response_iter:
        task_definition_arns.extend(response["taskDefinitionArns"])

    # Sort by revision number
    task_definition_arns.sort(key=lambda arn: int(arn.split(":")[-1]))

    # Keep the latest N task definitions
    expired_taskdef_arns = task_definition_arns[:-keep_latest]
    logger.warning("Deregistering %d task definitions...", len(expired_taskdef_arns))
    for arn in expired_taskdef_arns:
        if not dry_run:
            ecs.deregister_task_definition(taskDefinition=arn)

        # ARN like: "arn:aws:ecs:<region>:<account-id>:task-definition/<family>:<revision>"
        _, family_revision = arn.split(":task-definition/")
        logger.warning("Deregistered task definition [yellow]%r[/yellow]", family_revision)

    if delete and expired_taskdef_arns:
        # Delete the expired task definitions in chunks due to API limitation
        logger.warning(
            "Deleting %d task definitions in chunks of size %d...",
            len(expired_taskdef_arns),
            _DELETE_CHUNK_SIZE,
        )
        for idx, chunk in enumerate(_chunker(expired_taskdef_arns, _DELETE_CHUNK_SIZE)):
            if not dry_run:
                ecs.delete_task_definitions(taskDefinitions=chunk)

            logger.warning("Deleted %d task definitions in %d-th batch.", len(chunk), idx)

aws-annoying ecs wait-for-deployment

Wait for ECS deployment for a specific service to start, complete and stabilize.

It's designed to be used after triggering a deployment (e.g., updating service, deploying new task definition), in conjunction with CI/CD pipelines or deployment scripts.

Below is an example of using this command in GitHub Actions workflow:

  ...

  - name: Deploy to ECS service
    id: deploy-ecs
    uses: aws-actions/amazon-ecs-deploy-task-definition@v2
    with:
      task-definition: ${{ steps.render-task-definition.outputs.task-definition }}
      cluster: ${{ vars.AWS_ECS_CLUSTER }}
      service: ${{ vars.AWS_ECS_SERVICE }}
      wait-for-service-stability: false

  - name: Wait for deployment complete
    run: |
      pipx run aws-annoying \
        --verbose \
        ecs wait-for-deployment \
          --cluster '${{ vars.AWS_ECS_CLUSTER }}' \
          --service '${{ vars.AWS_ECS_SERVICE }}' \
          --wait-for-start \
          --wait-for-stability \
          --timeout-seconds 600 \
          --expected-task-definition '${{ steps.deploy-ecs.outputs.task-definition-arn }}'

  ...

--wait-for-start is necessary because there could be no deployment right after the deploy action.

Source code in aws_annoying/cli/ecs/wait_for_deployment.py
@ecs_app.command()
def wait_for_deployment(  # noqa: PLR0913
    *,
    cluster: str = typer.Option(
        ...,
        help="The name of the ECS cluster.",
        show_default=False,
    ),
    service: str = typer.Option(
        ...,
        help="The name of the ECS service.",
        show_default=False,
    ),
    expected_task_definition: Optional[str] = typer.Option(
        None,
        help=(
            "The service's task definition expected after deployment."
            " If provided, it will be used to assert the service's task definition after deployment finished or timed out."  # noqa: E501
        ),
        show_default=False,
    ),
    polling_interval: int = typer.Option(
        5,
        help="The interval between any polling attempts, in seconds.",
        min=1,
    ),
    timeout_seconds: Optional[int] = typer.Option(
        None,
        help=(
            "The maximum time to wait for the deployment to complete, in seconds."
            " If not provided, it will wait indefinitely."
        ),
        min=1,
    ),
    wait_for_start: bool = typer.Option(
        True,  # noqa: FBT003
        help=(
            "Whether to wait for the deployment to start."
            " Because there could be no deployment right after the deploy,"
            " this option will wait for a new deployment to start if no running deployment is found."
        ),
    ),
    wait_for_stability: bool = typer.Option(
        False,  # noqa: FBT003
        help="Whether to wait for the service to be stable after the deployment.",
    ),
) -> None:
    r"""Wait for ECS deployment for a specific service to start, complete and stabilize.

    It's designed to be used after triggering a deployment (e.g., updating service, deploying new task definition),
    in conjunction with CI/CD pipelines or deployment scripts.

    Below is an example of using this command in GitHub Actions workflow:

    ```yaml
      ...

      - name: Deploy to ECS service
        id: deploy-ecs
        uses: aws-actions/amazon-ecs-deploy-task-definition@v2
        with:
          task-definition: ${{ steps.render-task-definition.outputs.task-definition }}
          cluster: ${{ vars.AWS_ECS_CLUSTER }}
          service: ${{ vars.AWS_ECS_SERVICE }}
          wait-for-service-stability: false

      - name: Wait for deployment complete
        run: |
          pipx run aws-annoying \
            --verbose \
            ecs wait-for-deployment \
              --cluster '${{ vars.AWS_ECS_CLUSTER }}' \
              --service '${{ vars.AWS_ECS_SERVICE }}' \
              --wait-for-start \
              --wait-for-stability \
              --timeout-seconds 600 \
              --expected-task-definition '${{ steps.deploy-ecs.outputs.task-definition-arn }}'

      ...
    ```

    `--wait-for-start` is necessary because there could be no deployment right after the deploy action.
    """
    start = datetime.now(tz=timezone.utc)
    try:
        with Timeout(timeout_seconds):
            _wait_for_deployment(
                ECSServiceRef(cluster=cluster, service=service),
                wait_for_start=wait_for_start,
                polling_interval=polling_interval,
                wait_for_stability=wait_for_stability,
                expected_task_definition=expected_task_definition,
            )
    except OperationTimeoutError:
        logger.error(  # noqa: TRY400
            "Timeout reached after %s seconds. The deployment may not have finished.",
            timeout_seconds,
        )
        raise typer.Exit(1) from None
    except DeploymentFailedError as err:
        elapsed = datetime.now(tz=timezone.utc) - start
        logger.error(  # noqa: TRY400
            "Deployment failed in [bold]%.2f[/bold] seconds with error: %s",
            elapsed.total_seconds(),
            err,
        )
        raise typer.Exit(1) from None
    else:
        elapsed = datetime.now(tz=timezone.utc) - start
        logger.info(
            "Deployment completed in [bold]%.2f[/bold] seconds.",
            elapsed.total_seconds(),
        )