Open brandonhilkert opened 2 years ago
I'll take a look.
Can you tell me your host operating system and other environment details.
AWS fargate containers on Ruby 3.1 containers with Debian Bullseye. We reverted back to Ruby 2.7 which changed async back to 1.2.6 and the issues went away.
They seem to come in batches every 6-12 hours or so when it was live. It’d throw all kinds of different errors around Console.logger lines and then work just fine a minute later.
Ohhhhhhhh... this is interesting.
In my work, we use the same containers, and run into massive IO lag too... we could never figure it out... let me get more background...
My understanding is that on ECS, stdout
and stderr
are pipes to another container which processes logs - is that correct?
Hmmmm I don’t know honestly. Wouldn’t surprise me, but I’m not familiar enough with the underpinnings to say for sure.
@brandonhilkert is it possible to share your ECS task definition and the docker image definition for the ruby 3.1 and 2.7 images
FROM ruby:3.1-bullseye
ENV HOME /thing
RUN mkdir $HOME
WORKDIR $HOME
COPY Gemfile* $HOME/
COPY vendor/cache vendor/cache
RUN bundle install --local
COPY . .
CMD ["ruby", "./run.rb"]
EXPOSE 53
FROM ruby:2.7
ENV HOME /thing
RUN mkdir $HOME
WORKDIR $HOME
COPY Gemfile* $HOME/
COPY vendor/cache vendor/cache
RUN bundle install --local
COPY . .
CMD ["ruby", "./run.rb"]
EXPOSE 53
{
"ipcMode": null,
"executionRoleArn": "arn:aws:iam::XXXX:role/EcsExecutionRole",
"containerDefinitions": [
{
"dnsSearchDomains": null,
"environmentFiles": null,
"logConfiguration": {
"logDriver": "awslogs",
"secretOptions": null,
"options": {
"awslogs-group": "/ecs/services/dns/api",
"awslogs-region": "us-east-1",
"awslogs-stream-prefix": "ecs"
}
},
"entryPoint": null,
"portMappings": [
{
"hostPort": 5353,
"protocol": "udp",
"containerPort": 5353
}
],
"command": null,
"linuxParameters": null,
"cpu": 0,
"environment": [
],
"resourceRequirements": null,
"ulimits": [
{
"name": "nofile",
"softLimit": 10240,
"hardLimit": 10240
}
],
"dnsServers": null,
"mountPoints": [],
"workingDirectory": null,
"secrets": null,
"dockerSecurityOptions": null,
"memory": null,
"memoryReservation": null,
"volumesFrom": [],
"stopTimeout": null,
"image": "XXXX.dkr.ecr.us-east-1.amazonaws.com/XXX-dns:latest",
"startTimeout": null,
"firelensConfiguration": null,
"dependsOn": null,
"disableNetworking": null,
"interactive": null,
"healthCheck": null,
"essential": true,
"links": null,
"hostname": null,
"extraHosts": null,
"pseudoTerminal": null,
"user": null,
"readonlyRootFilesystem": null,
"dockerLabels": null,
"systemControls": null,
"privileged": null,
"name": "dns-api"
},
{
"dnsSearchDomains": null,
"environmentFiles": null,
"logConfiguration": {
"logDriver": "awslogs",
"secretOptions": null,
"options": {
"awslogs-group": "/ecs/services/dns/logs",
"awslogs-region": "us-east-1",
"awslogs-stream-prefix": "ecs"
}
},
"entryPoint": null,
"portMappings": [
{
"hostPort": 9000,
"protocol": "udp",
"containerPort": 9000
}
],
"command": null,
"linuxParameters": null,
"cpu": 0,
"environment": [],
"resourceRequirements": null,
"ulimits": null,
"dnsServers": null,
"mountPoints": [],
"workingDirectory": null,
"secrets": null,
"dockerSecurityOptions": null,
"memory": null,
"memoryReservation": null,
"volumesFrom": [],
"stopTimeout": null,
"image": "XXXX.dkr.ecr.us-east-1.amazonaws.com/dns-logs:latest",
"startTimeout": null,
"firelensConfiguration": null,
"dependsOn": null,
"disableNetworking": null,
"interactive": null,
"healthCheck": null,
"essential": true,
"links": null,
"hostname": null,
"extraHosts": null,
"pseudoTerminal": null,
"user": null,
"readonlyRootFilesystem": null,
"dockerLabels": null,
"systemControls": null,
"privileged": null,
"name": "dns-logs"
}
],
"placementConstraints": [],
"memory": "1024",
"taskRoleArn": "arn:aws:iam::XXXX:role/EcsDnsRole",
"compatibilities": [
"EC2",
"FARGATE"
],
"taskDefinitionArn": "arn:aws:ecs:us-east-1:XXXX:task-definition/dns-api:15",
"family": "dns-api",
"requiresAttributes": [
{
"targetId": null,
"targetType": null,
"value": null,
"name": "com.amazonaws.ecs.capability.logging-driver.awslogs"
},
{
"targetId": null,
"targetType": null,
"value": null,
"name": "ecs.capability.execution-role-awslogs"
},
{
"targetId": null,
"targetType": null,
"value": null,
"name": "com.amazonaws.ecs.capability.ecr-auth"
},
{
"targetId": null,
"targetType": null,
"value": null,
"name": "com.amazonaws.ecs.capability.docker-remote-api.1.19"
},
{
"targetId": null,
"targetType": null,
"value": null,
"name": "com.amazonaws.ecs.capability.task-iam-role"
},
{
"targetId": null,
"targetType": null,
"value": null,
"name": "ecs.capability.execution-role-ecr-pull"
},
{
"targetId": null,
"targetType": null,
"value": null,
"name": "com.amazonaws.ecs.capability.docker-remote-api.1.18"
},
{
"targetId": null,
"targetType": null,
"value": null,
"name": "ecs.capability.task-eni"
}
],
"pidMode": null,
"requiresCompatibilities": [
"FARGATE"
],
"networkMode": "awsvpc",
"runtimePlatform": null,
"cpu": "512",
"revision": 15,
"status": "ACTIVE",
"inferenceAccelerators": null,
"proxyConfiguration": null,
"volumes": []
}
I don't see firelens configuration which is what I'm a little familiar with. I'll need to dig into this one.
Is there any chance you can try on Ruby head, there have been a number of bug fixes. Also, we identified an issue in io-event
, which happens in high traffic scenarios, but we have not fixed it yet. There should be a fix within a month or so.
There have been a number of fixes in CRuby since this was originally discussed, would be interested to know if Ruby 3.3.1 still produced the issues.
It may have been related to https://github.com/ruby/ruby/pull/9792
We recently updated a DNS app using
async-dns
to Ruby 3.1. That caused us to update lines likeAsync.logger.debug!
withConsole.logger.debug!
. We're seeing errors from lines like:Any idea what we should look for or adjust?