aws / aws-cdk

The AWS Cloud Development Kit is a framework for defining cloud infrastructure in code
https://aws.amazon.com/cdk
Apache License 2.0
11.57k stars 3.88k forks source link

Shared stepfunction role causing (false) cyclic dependency #15861

Open nsaman opened 3 years ago

nsaman commented 3 years ago

:question: General Issue

When having parent stack StackA and child stack StackB, If StackA passes a Role used in StateMachine of StackB, a cyclic reference error is produced. I've create code repository SharedStepFunctionRoleDemo to show the below error

The Question

Is there a defect below? StackA.ts

export class StackA extends Stack {
    readonly stepFunctionRole: Role;

    constructor(scope: Construct, id: string, props: StackProps) {
        super(scope, id, props);

        this.stepFunctionRole = new Role(this, "StepFunctionRole", {
            roleName: "step-function-role",
            assumedBy: new ServicePrincipal(`states.us-east-1.amazonaws.com`)
        });
        const policy = new Policy(this, "StepFunctionRolePolicy", {
            roles: [this.stepFunctionRole],
            statements: [
                new PolicyStatement({
                    actions: [
                        'lambda:InvokeFunction',
                        'states:StartExecution',
                        'states:DescribeExecution',
                        'states:StopExecution'
                    ],
                    effect: Effect.ALLOW,
                    resources: ['*']
                }),
                new PolicyStatement({
                    actions: [
                        'events:PutTargets',
                        'events:PutRule',
                        'events:DescribeRule'
                    ],
                    effect: Effect.ALLOW,
                    resources: ['*']
                })
            ]
        });
    }
}

StackB.ts

export interface StackBProps extends StackProps {
    readonly stepFunctionRole: Role;
}
export class StackB extends Stack {
    constructor(scope: Construct, id: string, props: StackBProps) {
        super(scope, id, props);

        const done = new Succeed(this, "Done");
        const functionA = new Function(this, "FunctionA", {
            runtime: Runtime.PYTHON_3_8,
            code: Code.fromInline("print('Hey12345')"),
            handler: "Test",
            functionName: "FunctionAName",
            timeout: Duration.seconds(16),
            description: `Generated on : ${new Date().toISOString()}`
        });
        const invoke = new LambdaInvoke(this, "invoke", {
            lambdaFunction: functionA
        });
        const stateMachine = new StateMachine(this, "StateMachine", {
            definition: invoke.next(done),
            stateMachineName: "StateMachineB",
            role: props.stepFunctionRole
        });
    }
}

app.ts

const app = new App();

const bootstrap = PersonalBootstrap(app);

const stackA = new StackA(app, "StackA", {
  env: bootstrap.deploymentEnvironment,
  stackName: "StackA",
  softwareType: SoftwareType.INFRASTRUCTURE
});
const stackB = new StackB(app, "stackB", {
  env: bootstrap.deploymentEnvironment,
  stackName: "StackB",
  softwareType: SoftwareType.INFRASTRUCTURE,
  stepFunctionRole: stackA.stepFunctionRole
});

package.json

{
  "version": "1.0.0",
  "license": "UNLICENSED",
  "main": "dist/lib/app.js",
  "types": "dist/types/app.d.ts",
  "scripts": {
    "clean": "rm -rf dist && rm -rf cdk.out",
    "build": "tsc",
    "watch": "tsc -w",
    "prepare": "npm run-script build",
    "test": "jest --coverage",
    "test:clean": "npm run-script test -- -u"
  },
  "dependencies": {},
  "devDependencies": {
    "@types/node": "*",
    "constructs": "^3.2.0",
    "monocdk": "^1.72.0",
    "typescript": "~4.0.5",
    "@types/jest": "^26.0.23",
    "jest": "^26.6.3",
    "ts-jest": "^26.5.6",
    "ts-node": "^10.0.0",
    "@monocdk-experiment/assert": "^1.72.0"
  }
}

Environment

Other information

> jest --coverage

 FAIL  tst/stacks/StackTest.test.ts (15.496 s)
  ✕ Test StackA compiles (30 ms)
  ✕ Test StackB compiles (5 ms)

  ● Test StackA compiles

    'StackB' depends on 'StackA' ("StackB/StateMachine/Resource" depends on "StackA/StepFunctionRole/Resource", "StackB/StateMachine/Resource" depends on "StackA/StepFunctionRole/DefaultPolicy/Resource"). Adding this dependency (StackA -> StackB/FunctionA/Resource.Arn) would create a cyclic reference.

      29 |
      30 | test("Test StackA compiles", () => {
    > 31 |     expect(SynthUtils.toCloudFormation(StackTest.STACK_A)).toMatchSnapshot();
         |                       ^
      32 | });
      33 |
      34 | test("Test StackB compiles", () => {

      at StackA._addAssemblyDependency (node_modules/monocdk/lib/core/lib/stack.ts:330:19)
      at Object.addDependency (node_modules/monocdk/lib/core/lib/deps.ts:46:24)
      at StackA.addDependency (node_modules/monocdk/lib/core/lib/stack.ts:222:9)
      at resolveValue (node_modules/monocdk/lib/core/lib/private/refs.ts:83:14)
      at Object.resolveReferences (node_modules/monocdk/lib/core/lib/private/refs.ts:26:30)
      at Object.prepareApp (node_modules/monocdk/lib/core/lib/private/prepare-app.ts:28:5)
      at Object.synthesize (node_modules/monocdk/lib/core/lib/private/synthesis.ts:19:5)
      at App.synth (node_modules/monocdk/lib/core/lib/stage.ts:74:29)
      at synthesizeApp (node_modules/@monocdk-experiment/assert/lib/synth-utils.ts:76:15)
      at Function._synthesizeWithNested (node_modules/@monocdk-experiment/assert/lib/synth-utils.ts:53:22)
      at Function.toCloudFormation (node_modules/@monocdk-experiment/assert/lib/synth-utils.ts:20:24)
      at Object.<anonymous> (tst/stacks/StackTest.test.ts:31:23)

  ● Test StackB compiles

    'StackB' depends on 'StackA' ("StackB/StateMachine/Resource" depends on "StackA/StepFunctionRole/Resource", "StackB/StateMachine/Resource" depends on "StackA/StepFunctionRole/DefaultPolicy/Resource", "StackB/StateMachine/Resource" depends on "StackA/StepFunctionRole/Resource", "StackB/StateMachine/Resource" depends on "StackA/StepFunctionRole/DefaultPolicy/Resource"). Adding this dependency (StackA -> StackB/FunctionA/Resource.Arn) would create a cyclic reference.

      33 |
      34 | test("Test StackB compiles", () => {
    > 35 |     expect(SynthUtils.toCloudFormation(StackTest.STACK_B)).toMatchSnapshot();
         |                       ^
      36 | });

      at StackA._addAssemblyDependency (node_modules/monocdk/lib/core/lib/stack.ts:330:19)
      at Object.addDependency (node_modules/monocdk/lib/core/lib/deps.ts:46:24)
      at StackA.addDependency (node_modules/monocdk/lib/core/lib/stack.ts:222:9)
      at resolveValue (node_modules/monocdk/lib/core/lib/private/refs.ts:83:14)
      at Object.resolveReferences (node_modules/monocdk/lib/core/lib/private/refs.ts:26:30)
      at Object.prepareApp (node_modules/monocdk/lib/core/lib/private/prepare-app.ts:28:5)
      at Object.synthesize (node_modules/monocdk/lib/core/lib/private/synthesis.ts:19:5)
      at App.synth (node_modules/monocdk/lib/core/lib/stage.ts:74:29)
      at synthesizeApp (node_modules/@monocdk-experiment/assert/lib/synth-utils.ts:76:15)
      at Function._synthesizeWithNested (node_modules/@monocdk-experiment/assert/lib/synth-utils.ts:53:22)
      at Function.toCloudFormation (node_modules/@monocdk-experiment/assert/lib/synth-utils.ts:20:24)
      at Object.<anonymous> (tst/stacks/StackTest.test.ts:35:23)
peterwoodworth commented 3 years ago

I was able to reproduce this error. I'm not sure why this is occurring, let me investigate this

ryparker commented 3 years ago

I'm able to reproduce this error with the following code:

import * as cdk from '@aws-cdk/core';
import * as iam from '@aws-cdk/aws-iam';
import * as lambda from '@aws-cdk/aws-lambda';
import * as stft from '@aws-cdk/aws-stepfunctions-tasks';
import * as stf from '@aws-cdk/aws-stepfunctions';

export class StackA extends cdk.Stack {
  readonly stepFunctionRole: iam.Role;

  constructor(scope: cdk.Construct, id: string, props: cdk.StackProps) {
    super(scope, id, props);

    this.stepFunctionRole = new iam.Role(this, "StepFunctionRole", {
      roleName: "step-function-role",
      assumedBy: new iam.ServicePrincipal(`states.us-east-1.amazonaws.com`)
    });
  }
}

export class StackB extends cdk.Stack {
  constructor(scope: cdk.Construct, id: string, props: cdk.StackProps & {
    stepFunctionRole: iam.Role;
  }) {
    super(scope, id, props);

    const done = new stf.Succeed(this, "Done");

    const functionA = new lambda.Function(this, "FunctionA", {
      runtime: lambda.Runtime.PYTHON_3_8,
      code: lambda.Code.fromInline("print('Hey12345')"),
      handler: "Test",
    });

    const invoke = new stft.LambdaInvoke(this, "invoke", {
      lambdaFunction: functionA
    });

    new stf.StateMachine(this, "StateMachine", {
      definition: invoke.next(done),
      stateMachineName: "StateMachineB",
      role: props.stepFunctionRole // Relies on StackA. Causes cyclic reference error.
    });
  }
}

const app = new cdk.App();

const stackA = new StackA(app, "StackA", {
  stackName: "StackA"
});

const stackB = new StackB(app, "StackB", {
  stepFunctionRole: stackA.stepFunctionRole
});

possibly related to https://github.com/aws/aws-cdk/issues/11650

peterwoodworth commented 3 years ago

Pinging you here as well @BenChaimberg, can you take a look at this? Thank you

BenChaimberg commented 3 years ago

Essentially, the state machine construct attempts to add all the required permissions for executing the various tasks in the state machine definition to the execution role via role.addToPrincipalPolicy https://github.com/aws/aws-cdk/blob/9ba8387892cb789deb27bacbd7f2a495988304dd/packages/%40aws-cdk/aws-stepfunctions/lib/state-machine.ts#L406-L408 https://github.com/aws/aws-cdk/blob/9ba8387892cb789deb27bacbd7f2a495988304dd/packages/%40aws-cdk/aws-stepfunctions/lib/state-machine.ts#L429-L431

This, in turn, adds the policy statements to the role's default policy, creating the policy under the role's scope if it doesn't exist yet. https://github.com/aws/aws-cdk/blob/9ba8387892cb789deb27bacbd7f2a495988304dd/packages/%40aws-cdk/aws-iam/lib/role.ts#L380-L387

This means that the permissions for the state machine are added in the role stack, not the state machine stack. Because the state machine depends on the role (in order to specify which role is used as the state machine execution role) and the policy depends on the task resources (in order to specify which resources the execution role has access to), if the state machine and resources are in one stack and the role is in the other, then a cyclical dependency occurs.

This problem can be worked around by re-importing the role in the consuming stack, like so:

const stepFunctionRole = iam.Role.fromRoleArn(this, 'StepFunctionRole', props.stepFunctionRole.roleArn);

This means that when a new default policy for the role is created, it is done in the re-imported role's scope and thus the consuming stack's scope.

I believe this could be mitigated by creating a policy manually with the necessary statements and then attaching it to the role. However, I wonder if this should be behaviour that is mastered in the Role construct itself, as this problem seems larger than just Step Functions.

@rix0rrr is there a canonical way of permissions granting cross-stack that I'm missing or is there a place here for automatically creating a new policy if the permission is granted cross-stack?

github-actions[bot] commented 2 years ago

This issue has not received any attention in 1 year. If you want to keep this issue open, please leave a comment below and auto-close will be canceled.

spkapust commented 1 year ago

I was experiencing the same issue. I used the workaround suggested by BenChaimberg and it worked. Commenting so that this hopefully will be reopened

peterwoodworth commented 1 year ago

Thanks for letting us know @spkapust, reopening 🙂

keagansc2 commented 1 year ago

Adding here if anyone else encounters the issue. If you are going to run another stepfunction within another (this being creating a "StepFunctionsStartExecution" task) with all Step Functions using the same role the curcular dependencies error will always be seen if you import a Role (this being through either the from_role_arn or from_role_name functions).

Not entirely sure why this occurs however, if the same role is used all the "sub" State Machines the CDK automatically adds a "DependsOn" on the IAM policy created for the "main" state machine that would be calling the StartExecution API call. The same role can be used however, would need to be imported/assigned to a different "variable".

I am placing a very simple iteration on this below to showcase it. Such that if the same "imported" role assigned varialbe is used for the two state machine definitions the error will always be seen however, if we simply "import" the role again and use the "second" imported role through all aspects of a sub sequence of statemachines and the "first" imported role for the StateMachine which would contain the "StepFunctionsStartExecution" tasks then the error is not seen. ==============================Simple 1 depth StateMachine within another==============================

from aws_cdk import (
    aws_stepfunctions as _aws_stepfunctions,
    aws_stepfunctions_tasks as _aws_stepfunctions_tasks,
    App, Duration, Stack,
    aws_iam as iam,
)

import os

class StepFunctionCircularDependency(Stack):
    def __init__(self, app: App, id: str, **kwargs) -> None:
        super().__init__(app, id, **kwargs)

        account_id = os.environ["CDK_DEFAULT_ACCOUNT"]
        role_name = "MyRoleName"

        imported_role = iam.Role.from_role_arn(
            self,
            "MyImportedRole",
            role_arn=f"arn:aws:iam::{account_id}:role/{role_name}"
        )

        second_imported_role = iam.Role.from_role_arn(
            self,
            "MyImportedRole",
            role_arn=f"arn:aws:iam::{account_id}:role/{role_name}"
        )

        pass1 = _aws_stepfunctions.Pass(
            self,
            "Passing once"
        )

        pass2 = _aws_stepfunctions.Succeed(
            self,
            "Passing twice"
        )

        definition = pass1.next(pass2)
        sm1 = _aws_stepfunctions.StateMachine(
            self,
            f"My second StateMachine",
            state_machine_name=f"SecondStateMachine",
            definition=definition,
            timeout=Duration.minutes(5),
            role=imported_role
            #role=second_imported_role
        )

        execute_sm1 = _aws_stepfunctions_tasks.StepFunctionsStartExecution(
            self,
            "executesm1",
            state_machine=sm1,
            name="ExecutingSM1"
        )

        end_execute = _aws_stepfunctions.Succeed(
            self,
            "End"
        )

        final_definition = execute_sm1.next(end_execute)

        sm_definition = _aws_stepfunctions.StateMachine(
            self,
            f"My StateMachine",
            state_machine_name="MyStateMachine",
            definition=final_definition,
            timeout=Duration.minutes(5),
            role=imported_role
        )