Open snemir2 opened 6 months ago
config file
HeadNode:
CustomActions:
OnNodeStart:
Script: "s3://a2ai-cloud-build-artifacts-dev-654225707598-us-east-2/scripts/branch/release-v3/download_and_run_cookbook.sh"
Args:
- "HeadNodeOnNodeStart"
- "s3://a2ai-cloud-build-artifacts-dev-654225707598-us-east-2/a2ai-cloud-cookbook/branch/release-v3/a2a-cloud.tar.gz"
- "s3://a2ai-cluster-provision-artifacts-dev-654225707598-us-east-2/A2AiClustersergey/config/a2ai.json"
OnNodeConfigured:
Script: "s3://a2ai-cloud-build-artifacts-dev-654225707598-us-east-2/scripts/branch/release-v3/download_and_run_cookbook.sh"
Args:
- "HeadNodeOnNodeConfigured"
- "s3://a2ai-cloud-build-artifacts-dev-654225707598-us-east-2/a2ai-cloud-cookbook/branch/release-v3/a2a-cloud.tar.gz"
- "s3://a2ai-cluster-provision-artifacts-dev-654225707598-us-east-2/A2AiClustersergey/config/a2ai.json"
OnNodeUpdated:
Script: "s3://a2ai-cloud-build-artifacts-dev-654225707598-us-east-2/scripts/branch/release-v3/download_and_run_cookbook.sh"
Args:
- "HeadNodeOnNodeUpdated"
- "s3://a2ai-cloud-build-artifacts-dev-654225707598-us-east-2/a2ai-cloud-cookbook/branch/release-v3/a2a-cloud.tar.gz"
- "s3://a2ai-cluster-provision-artifacts-dev-654225707598-us-east-2/A2AiClustersergey/config/a2ai.json"
Iam:
InstanceRole: arn:aws:iam::654225707598:role/HeadNodeRole
InstanceType: t3.medium
Networking:
AdditionalSecurityGroups:
# posit workbench launcher callback
- sg-0e3cf05f5dc1a59da
SubnetId: subnet-00c65375744fb5ea5 ##public for now, but could/should be private same as compute
ElasticIp: false
Ssh:
KeyName: admin_ED25519
LocalStorage:
RootVolume:
Size: 500 # we allocate 2xRAM swap file on root filesystem
Dcv:
Enabled: true
Port: 8443
AllowedIps: 10.0.0.0/8 #only letting to access from private ip space. Accessed via cabby/proxy most of the times anywazys
Image:
Os: ubuntu2204
CustomAmi: ami-06d0647411b892c8d
Region: us-east-2
Scheduling:
Scheduler: slurm
SlurmSettings:
QueueUpdateStrategy: DRAIN #Must be one of: DRAIN, COMPUTE_FLEET_STOP, TERMINATE.
Dns:
DisableManagedDns: False
#UseEc2Hostnames: True
Database:
Uri: slurmaccountingdb-slurmaccountingdb5933648d-brbj4rghp0xk.cluster-c8saxlvocbwo.us-east-2.rds.amazonaws.com:3306
UserName: admin
PasswordSecretArn: arn:aws:secretsmanager:us-east-2:654225707598:secret:/a2ai/a2ai-cloud/slurm-accounting-db-password-LLm2tb
SlurmQueues:
- Name: cpu2mem4gb
CapacityType: SPOT
ComputeResources:
- Name: c6i-large
Instances:
- InstanceType: c6i.large
MaxCount: 100
MinCount: 0
CustomActions:
OnNodeStart:
Script: "s3://a2ai-cloud-build-artifacts-dev-654225707598-us-east-2/scripts/branch/release-v3/download_and_run_cookbook.sh"
Args:
- "ComputeNodeOnNodeStart"
- "s3://a2ai-cloud-build-artifacts-dev-654225707598-us-east-2/a2ai-cloud-cookbook/branch/release-v3/a2a-cloud.tar.gz"
- "s3://a2ai-cluster-provision-artifacts-dev-654225707598-us-east-2/A2AiClustersergey/config/a2ai.json"
OnNodeConfigured:
Script: "s3://a2ai-cloud-build-artifacts-dev-654225707598-us-east-2/scripts/branch/release-v3/download_and_run_cookbook.sh"
Args:
- "ComputeNodeOnNodeConfigured"
- "s3://a2ai-cloud-build-artifacts-dev-654225707598-us-east-2/a2ai-cloud-cookbook/branch/release-v3/a2a-cloud.tar.gz"
- "s3://a2ai-cluster-provision-artifacts-dev-654225707598-us-east-2/A2AiClustersergey/config/a2ai.json"
Iam:
InstanceRole: arn:aws:iam::654225707598:role/ComputeFleetRole
Networking:
SubnetIds:
- "subnet-00c65375744fb5ea5"
- "subnet-080fb7908dc6c99df"
- Name: cpu4mem32gb
CapacityType: SPOT
ComputeResources:
- Name: r6i-xlarge
Instances:
- InstanceType: r6i.xlarge
MaxCount: 100
MinCount: 0
CustomActions:
OnNodeStart:
Script: "s3://a2ai-cloud-build-artifacts-dev-654225707598-us-east-2/scripts/branch/release-v3/download_and_run_cookbook.sh"
Args:
- "ComputeNodeOnNodeStart"
- "s3://a2ai-cloud-build-artifacts-dev-654225707598-us-east-2/a2ai-cloud-cookbook/branch/release-v3/a2a-cloud.tar.gz"
- "s3://a2ai-cluster-provision-artifacts-dev-654225707598-us-east-2/A2AiClustersergey/config/a2ai.json"
OnNodeConfigured:
Script: "s3://a2ai-cloud-build-artifacts-dev-654225707598-us-east-2/scripts/branch/release-v3/download_and_run_cookbook.sh"
Args:
- "ComputeNodeOnNodeConfigured"
- "s3://a2ai-cloud-build-artifacts-dev-654225707598-us-east-2/a2ai-cloud-cookbook/branch/release-v3/a2a-cloud.tar.gz"
- "s3://a2ai-cluster-provision-artifacts-dev-654225707598-us-east-2/A2AiClustersergey/config/a2ai.json"
Iam:
InstanceRole: arn:aws:iam::654225707598:role/ComputeFleetRole
Networking:
SubnetIds:
- "subnet-00c65375744fb5ea5"
- "subnet-080fb7908dc6c99df"
- Name: cpu8mem64gb
CapacityType: SPOT
ComputeResources:
- Name: r6i-2xlarge
Instances:
- InstanceType: r6i.2xlarge
MaxCount: 100
MinCount: 0
CustomActions:
OnNodeStart:
Script: "s3://a2ai-cloud-build-artifacts-dev-654225707598-us-east-2/scripts/branch/release-v3/download_and_run_cookbook.sh"
Args:
- "ComputeNodeOnNodeStart"
- "s3://a2ai-cloud-build-artifacts-dev-654225707598-us-east-2/a2ai-cloud-cookbook/branch/release-v3/a2a-cloud.tar.gz"
- "s3://a2ai-cluster-provision-artifacts-dev-654225707598-us-east-2/A2AiClustersergey/config/a2ai.json"
OnNodeConfigured:
Script: "s3://a2ai-cloud-build-artifacts-dev-654225707598-us-east-2/scripts/branch/release-v3/download_and_run_cookbook.sh"
Args:
- "ComputeNodeOnNodeConfigured"
- "s3://a2ai-cloud-build-artifacts-dev-654225707598-us-east-2/a2ai-cloud-cookbook/branch/release-v3/a2a-cloud.tar.gz"
- "s3://a2ai-cluster-provision-artifacts-dev-654225707598-us-east-2/A2AiClustersergey/config/a2ai.json"
Iam:
InstanceRole: arn:aws:iam::654225707598:role/ComputeFleetRole
Networking:
SubnetIds:
- "subnet-00c65375744fb5ea5"
- "subnet-080fb7908dc6c99df"
- Name: cpu2mem8gb
CapacityType: SPOT
ComputeResources:
- Name: c6i-xlarge
Instances:
- InstanceType: c6i.xlarge
MaxCount: 100
MinCount: 0
DisableSimultaneousMultithreading: true
CustomActions:
OnNodeStart:
Script: "s3://a2ai-cloud-build-artifacts-dev-654225707598-us-east-2/scripts/branch/release-v3/download_and_run_cookbook.sh"
Args:
- "ComputeNodeOnNodeStart"
- "s3://a2ai-cloud-build-artifacts-dev-654225707598-us-east-2/a2ai-cloud-cookbook/branch/release-v3/a2a-cloud.tar.gz"
- "s3://a2ai-cluster-provision-artifacts-dev-654225707598-us-east-2/A2AiClustersergey/config/a2ai.json"
OnNodeConfigured:
Script: "s3://a2ai-cloud-build-artifacts-dev-654225707598-us-east-2/scripts/branch/release-v3/download_and_run_cookbook.sh"
Args:
- "ComputeNodeOnNodeConfigured"
- "s3://a2ai-cloud-build-artifacts-dev-654225707598-us-east-2/a2ai-cloud-cookbook/branch/release-v3/a2a-cloud.tar.gz"
- "s3://a2ai-cluster-provision-artifacts-dev-654225707598-us-east-2/A2AiClustersergey/config/a2ai.json"
Iam:
InstanceRole: arn:aws:iam::654225707598:role/ComputeFleetRole
Networking:
SubnetIds:
- "subnet-00c65375744fb5ea5"
- "subnet-080fb7908dc6c99df"
- Name: cpu4mem16gb
CapacityType: SPOT
ComputeResources:
- Name: c6i-2xlarge
Instances:
- InstanceType: c6i.2xlarge
MaxCount: 100
MinCount: 0
DisableSimultaneousMultithreading: true
CustomActions:
OnNodeStart:
Script: "s3://a2ai-cloud-build-artifacts-dev-654225707598-us-east-2/scripts/branch/release-v3/download_and_run_cookbook.sh"
Args:
- "ComputeNodeOnNodeStart"
- "s3://a2ai-cloud-build-artifacts-dev-654225707598-us-east-2/a2ai-cloud-cookbook/branch/release-v3/a2a-cloud.tar.gz"
- "s3://a2ai-cluster-provision-artifacts-dev-654225707598-us-east-2/A2AiClustersergey/config/a2ai.json"
OnNodeConfigured:
Script: "s3://a2ai-cloud-build-artifacts-dev-654225707598-us-east-2/scripts/branch/release-v3/download_and_run_cookbook.sh"
Args:
- "ComputeNodeOnNodeConfigured"
- "s3://a2ai-cloud-build-artifacts-dev-654225707598-us-east-2/a2ai-cloud-cookbook/branch/release-v3/a2a-cloud.tar.gz"
- "s3://a2ai-cluster-provision-artifacts-dev-654225707598-us-east-2/A2AiClustersergey/config/a2ai.json"
Iam:
InstanceRole: arn:aws:iam::654225707598:role/ComputeFleetRole
Networking:
SubnetIds:
- "subnet-00c65375744fb5ea5"
- "subnet-080fb7908dc6c99df"
- Name: cpu16mem128gb
CapacityType: SPOT
ComputeResources:
- Name: r6i-4xlarge
Instances:
- InstanceType: r6i.4xlarge
MaxCount: 100
MinCount: 0
CustomActions:
OnNodeStart:
Script: "s3://a2ai-cloud-build-artifacts-dev-654225707598-us-east-2/scripts/branch/release-v3/download_and_run_cookbook.sh"
Args:
- "ComputeNodeOnNodeStart"
- "s3://a2ai-cloud-build-artifacts-dev-654225707598-us-east-2/a2ai-cloud-cookbook/branch/release-v3/a2a-cloud.tar.gz"
- "s3://a2ai-cluster-provision-artifacts-dev-654225707598-us-east-2/A2AiClustersergey/config/a2ai.json"
OnNodeConfigured:
Script: "s3://a2ai-cloud-build-artifacts-dev-654225707598-us-east-2/scripts/branch/release-v3/download_and_run_cookbook.sh"
Args:
- "ComputeNodeOnNodeConfigured"
- "s3://a2ai-cloud-build-artifacts-dev-654225707598-us-east-2/a2ai-cloud-cookbook/branch/release-v3/a2a-cloud.tar.gz"
- "s3://a2ai-cluster-provision-artifacts-dev-654225707598-us-east-2/A2AiClustersergey/config/a2ai.json"
Iam:
InstanceRole: arn:aws:iam::654225707598:role/ComputeFleetRole
Networking:
SubnetIds:
- "subnet-00c65375744fb5ea5"
- "subnet-080fb7908dc6c99df"
- Name: cpu8mem32gb
CapacityType: SPOT
ComputeResources:
- Name: c6i-4xlarge
Instances:
- InstanceType: c6i.4xlarge
MaxCount: 100
MinCount: 0
DisableSimultaneousMultithreading: true
CustomActions:
OnNodeStart:
Script: "s3://a2ai-cloud-build-artifacts-dev-654225707598-us-east-2/scripts/branch/release-v3/download_and_run_cookbook.sh"
Args:
- "ComputeNodeOnNodeStart"
- "s3://a2ai-cloud-build-artifacts-dev-654225707598-us-east-2/a2ai-cloud-cookbook/branch/release-v3/a2a-cloud.tar.gz"
- "s3://a2ai-cluster-provision-artifacts-dev-654225707598-us-east-2/A2AiClustersergey/config/a2ai.json"
OnNodeConfigured:
Script: "s3://a2ai-cloud-build-artifacts-dev-654225707598-us-east-2/scripts/branch/release-v3/download_and_run_cookbook.sh"
Args:
- "ComputeNodeOnNodeConfigured"
- "s3://a2ai-cloud-build-artifacts-dev-654225707598-us-east-2/a2ai-cloud-cookbook/branch/release-v3/a2a-cloud.tar.gz"
- "s3://a2ai-cluster-provision-artifacts-dev-654225707598-us-east-2/A2AiClustersergey/config/a2ai.json"
Iam:
InstanceRole: arn:aws:iam::654225707598:role/ComputeFleetRole
Networking:
SubnetIds:
- "subnet-00c65375744fb5ea5"
- "subnet-080fb7908dc6c99df"
- Name: cpu16mem64gb
CapacityType: SPOT
ComputeResources:
- Name: c6i-8xlarge
Instances:
- InstanceType: c6i.8xlarge
MaxCount: 100
MinCount: 0
DisableSimultaneousMultithreading: true
CustomActions:
OnNodeStart:
Script: "s3://a2ai-cloud-build-artifacts-dev-654225707598-us-east-2/scripts/branch/release-v3/download_and_run_cookbook.sh"
Args:
- "ComputeNodeOnNodeStart"
- "s3://a2ai-cloud-build-artifacts-dev-654225707598-us-east-2/a2ai-cloud-cookbook/branch/release-v3/a2a-cloud.tar.gz"
- "s3://a2ai-cluster-provision-artifacts-dev-654225707598-us-east-2/A2AiClustersergey/config/a2ai.json"
OnNodeConfigured:
Script: "s3://a2ai-cloud-build-artifacts-dev-654225707598-us-east-2/scripts/branch/release-v3/download_and_run_cookbook.sh"
Args:
- "ComputeNodeOnNodeConfigured"
- "s3://a2ai-cloud-build-artifacts-dev-654225707598-us-east-2/a2ai-cloud-cookbook/branch/release-v3/a2a-cloud.tar.gz"
- "s3://a2ai-cluster-provision-artifacts-dev-654225707598-us-east-2/A2AiClustersergey/config/a2ai.json"
Iam:
InstanceRole: arn:aws:iam::654225707598:role/ComputeFleetRole
Networking:
SubnetIds:
- "subnet-00c65375744fb5ea5"
- "subnet-080fb7908dc6c99df"
- Name: cpu32mem128gb
CapacityType: SPOT
ComputeResources:
- Name: c6i-16xlarge
Instances:
- InstanceType: c6i.16xlarge
MaxCount: 100
MinCount: 0
DisableSimultaneousMultithreading: true
CustomActions:
OnNodeStart:
Script: "s3://a2ai-cloud-build-artifacts-dev-654225707598-us-east-2/scripts/branch/release-v3/download_and_run_cookbook.sh"
Args:
- "ComputeNodeOnNodeStart"
- "s3://a2ai-cloud-build-artifacts-dev-654225707598-us-east-2/a2ai-cloud-cookbook/branch/release-v3/a2a-cloud.tar.gz"
- "s3://a2ai-cluster-provision-artifacts-dev-654225707598-us-east-2/A2AiClustersergey/config/a2ai.json"
OnNodeConfigured:
Script: "s3://a2ai-cloud-build-artifacts-dev-654225707598-us-east-2/scripts/branch/release-v3/download_and_run_cookbook.sh"
Args:
- "ComputeNodeOnNodeConfigured"
- "s3://a2ai-cloud-build-artifacts-dev-654225707598-us-east-2/a2ai-cloud-cookbook/branch/release-v3/a2a-cloud.tar.gz"
- "s3://a2ai-cluster-provision-artifacts-dev-654225707598-us-east-2/A2AiClustersergey/config/a2ai.json"
Iam:
InstanceRole: arn:aws:iam::654225707598:role/ComputeFleetRole
Networking:
SubnetIds:
- "subnet-00c65375744fb5ea5"
- "subnet-080fb7908dc6c99df"
SharedStorage:
- MountDir: /data
Name: FsxSharedDataFilesystem
StorageType: FsxLustre
FsxLustreSettings:
FileSystemId: fs-0db92f52bb64ade74
- MountDir: /cluster-data
Name: ClusterSharedDataFilesystem
StorageType: Ebs
EbsSettings:
VolumeType: gp3
Size: 100
Throughput: 500
DeletionPolicy: Delete
Encrypted: true
DirectoryService:
DomainName: ad.dev.a2-ai.cloud
DomainAddr: ldaps://ad.dev.a2-ai.cloud
PasswordSecretArn: arn:aws:secretsmanager:us-east-2:654225707598:secret:/a2ai/a2ai-cloud/ad_bind_user_password-0oxi9e
DomainReadOnlyUser: CN=AD ReadOnly,OU=Users,OU=ad-dev,DC=ad,DC=dev,DC=a2-ai,DC=cloud
LdapTlsCaCert: /opt/parallelcluster/shared/directory_service/domain-certificate.crt
LdapTlsReqCert: demand
#LdapAccessFilter: string
GenerateSshKeysForUsers: true
#AdditionalSssdConfigs:
Tags:
- Key: A2AI:a2ai-cloud-version
Value: branch/release-v3
- Key: A2AI:a2ai-cloud-env
Value: dev
- Key: A2AI:creator
Value: sergey```
describe-cluster output
pcluster describe-cluster -n A2AiClustersergey --region us-east-2
{
"creationTime": "2024-01-03T20:20:10.856Z",
"headNode": {
"launchTime": "2024-01-03T20:24:23.000Z",
"instanceId": "i-0a44846fde3aa054c",
"instanceType": "t3.medium",
"state": "running",
"privateIpAddress": "10.2.42.167"
},
"version": "3.8.0",
"clusterConfiguration": {
"url": "https://parallelcluster-97a8b56da16cbe1e-v1-do-not-delete.s3.us-east-2.amazonaws.com/parallelcluster/3.8.0/clusters/a2aiclustersergey-8blzpedizys4pybh/configs/cluster-config.yaml?versionId=EeeUlRSiAZpVZ2ADoj.2GL7keIHf1aqZ&X-Amz-Algorithm=AWS4-HMAC-SHA256&X-Amz-Credential=ASIAZQUXECJHNEE4JIEM%2F20240103%2Fus-east-2%2Fs3%2Faws4_request&X-Amz-Date=20240103T204603Z&X-Amz-Expires=3600&X-Amz-SignedHeaders=host&X-Amz-Security-Token=IQoJb3JpZ2luX2VjEMz%2F%2F%2F%2F%2F%2F%2F%2F%2F%2FwEaCXVzLWVhc3QtMiJIMEYCIQCn82A%2BYvjtedv3jOdnJkxl6QwYN46lBZ2NJel3jpeRrAIhAIwSIQCZ3mAmuqBN6E5sToZbqGq6C9qrujHbdFoFAdz%2FKv4CCGUQABoMNjU0MjI1NzA3NTk4IgyswAK689OXTeh%2B3sEq2wL%2B2gfLNIVpbFK1X3ZTnGhvzhuMg9tA5aWKE6c494QTf4dTjimy7oIA1uxi3O8ITg5fRw8V3MGKuiGPgx%2BIxYXUQNDyHEa2cIq4aFnpOTZzExMzJVjjDf7TY8BJz%2B9brhgy21ehRvEf5E7lgDSXbuwGvJC4b7fP3uzHA7pKEPbBzNOcOfow%2FSMDfsSGhNDjp7JK%2FrEh7v%2BK10W2Wgul0vomd1UoNGGzN3M8a4FOKSdE0OSOa6Z%2FrLFhEZnzb8LcT4hI3o2yuvywG7v50cngrmdlg7u1s0JFC6F8KGeUrK1d46BdQWzkaVZ7ev97EN8Bah4urfbmThict7Zv10%2Fc%2Fqa%2B7x6PCcg8V5arnNWHsjAP23VfCfpMGYGIk2my%2BvAVDR%2FA0AUBTDBAoOVwplxTLQCzCnXBVXIxW%2B%2Bul9gkLyAleX0ySGXxh5xbCRiAl2nzKo440zLjaHzlqfe7cjCvgtesBjqmAYz6w4z3fdbHcTCqbzqvnzrtYjkRp%2BkiGc1zAHfd%2FjI%2B4puJZYQe5GoTqTdmOAH5O1xWvAgFGG8BkX8cDkqo5xzU%2B6KB5rGG3Z5TDW5w2ERhbZLGJKUugo14IlntcgTpjO9%2BKcUURrqeZUnWrdadvnWauZ%2BqLga03xCgLPxzUoJc2%2FnfaJXnbi%2Fff%2FcbuzJf5CIxFT48w3Gu2faH1wbQCY%2FF2PbjKTk%3D&X-Amz-Signature=a3ae7089f1513eb8261041530e7dd0fe32b237ef2d695de0e47c17bbae5f8ef2"
},
"tags": [
{
"value": "branch/release-v3",
"key": "A2AI:a2ai-cloud-version"
},
{
"value": "3.8.0",
"key": "parallelcluster:version"
},
{
"value": "A2AiClustersergey",
"key": "parallelcluster:cluster-name"
},
{
"value": "sergey",
"key": "A2AI:creator"
},
{
"value": "dev",
"key": "A2AI:a2ai-cloud-env"
}
],
"cloudFormationStackStatus": "CREATE_COMPLETE",
"clusterName": "A2AiClustersergey",
"computeFleetStatus": "RUNNING",
"cloudformationStackArn": "arn:aws:cloudformation:us-east-2:654225707598:stack/A2AiClustersergey/7e9b0c10-aa75-11ee-b08b-0a01aa5f1f7b",
"lastUpdatedTime": "2024-01-03T20:20:10.856Z",
"region": "us-east-2",
"clusterStatus": "CREATE_COMPLETE",
"scheduler": {
"type": "slurm"
}
}
Hi @snemir2,
we performed some tests on some simple internal clusters and we were able to use the sreport
command. Though, I admit that it's as immediate as sacct
.
Two things we noticed:
--account
option of sbatch
). This seems to be present on your side;hostname
, uptime
, whoami
and qstat
) are commands that return immediately and won't keep nodes allocated for long.For any other issue in using sreport
, I would recommend contacting SchedMD.
Thank you so much for the feedback; the short demo jobs are just red herrings -- something that I run for demo purposes. We had the same problem in prod with days-long jobs.
A perfect solution for us would be to have a way to map an AD group to a slurm account. Simmingly, no good way of doing that.
A perfect solution for us would be to have a way to map an AD group to a slurm account. Simmingly, no good way of doing that.
I would say that must be done manually when you set up the Slurm accounting database: in ParallelCluster you may have a post-install script that bootstraps the Slurm database from the AD information (although I'm not sure about how to best implement this).
Required Info:
AWS ParallelCluster version [e.g. 3.1.1]: 3.8.0
Full cluster configuration without any credentials or personal data. ( see next comment, The important point is that slurm accounting database on RDS/mysql aurora is configured)
Cluster name: A2AiClustersergey
Output of
pcluster describe-cluster
command.[Optional] Arn of the cluster CloudFormation main stack: arn:aws:cloudformation:us-east-2:654225707598:stack/A2AiClustersergey/7e9b0c10-aa75-11ee-b08b-0a01aa5f1f7b
Bug description and how to reproduce: With slurm accounting enabled, sreport does not contain any entries at all.
BUT, sreport returns nothing
If you are reporting issues about scaling or job failure: We cannot work on issues without proper logs. We STRONGLY recommend following this guide and attach the complete cluster log archive with the ticket.
For issues with Slurm scheduler, please attach the following logs:
/var/log/parallelcluster/clustermgtd
,/var/log/parallelcluster/clusterstatusmgtd
(if version >= 3.2.0),/var/log/parallelcluster/slurm_resume.log
,/var/log/parallelcluster/slurm_suspend.log
,/var/log/parallelcluster/slurm_fleet_status_manager.log
(if version >= 3.2.0) and/var/log/slurmctld.log
./var/log/parallelcluster/computemgtd.log
and/var/log/slurmd.log
.Additional context: Setup is basically per https://aws.amazon.com/blogs/hpc/leveraging-slurm-accounting-in-aws-parallelcluster/ with rds/aurora database.
Any other context about the problem. E.g.:
~/.parallelcluster/pcluster-cli.log