Closed stephanosio closed 11 months ago
conf t
# Port 1/12
int eth1/12
desc compute1-mtcol-el9p Port 1
channel-group 12 mode active
no shut
exit
# Port 1/13
int eth1/13
desc compute1-mtcol-el9p Port 2
channel-group 12 mode active
no shut
exit
# vPC 12 for Port 1/12 and 1/13
int po12
desc compute1-mtcol-el9p Port 1, 2
switchport mode trunk
switchport trunk native vlan 1
switchport trunk allow vlan 100,116,132,900,901
vpc 12
no shut
exit
conf t
# Port 1/12
int eth1/12
desc compute1-mtcol-el9p Port 3
channel-group 12 mode active
no shut
exit
# Port 1/13
int eth1/13
desc compute1-mtcol-el9p Port 4
channel-group 12 mode active
no shut
exit
# vPC 12 for Port 1/12 and Port 1/13
int po12
desc compute1-mtcol-el9p Port 3, 4
switchport mode trunk
switchport trunk native vlan 1
switchport trunk allow vlan 100,116,132,900,901
vpc 12
no shut
exit
coresw11# show int po12
port-channel12 is up
admin state is up,
vPC Status: Up, vPC number: 12
Hardware: Port-Channel, address: 84b8.02f0.72fc (bia 84b8.02f0.72fc)
Description: compute1-mtcol-el9p Port 1, 2
MTU 1500 bytes, BW 20000000 Kbit , DLY 10 usec
reliability 255/255, txload 1/255, rxload 1/255
Encapsulation ARPA, medium is broadcast
Port mode is trunk
full-duplex, 10 Gb/s
Input flow-control is off, output flow-control is off
Auto-mdix is turned off
Switchport monitor is off
EtherType is 0x8100
Members in this channel: Eth1/12, Eth1/13
Last clearing of "show interface" counters never
2 interface resets
Load-Interval #1: 30 seconds
30 seconds input rate 144 bits/sec, 0 packets/sec
30 seconds output rate 15096 bits/sec, 21 packets/sec
input rate 144 bps, 0 pps; output rate 15.10 Kbps, 21 pps
Load-Interval #2: 5 minute (300 seconds)
300 seconds input rate 360 bits/sec, 0 packets/sec
300 seconds output rate 25664 bits/sec, 16 packets/sec
input rate 360 bps, 0 pps; output rate 25.66 Kbps, 16 pps
RX
0 unicast packets 1421 multicast packets 19 broadcast packets
1440 input packets 199089 bytes
0 jumbo packets 0 storm suppression packets
0 runts 0 giants 0 CRC 0 no buffer
0 input error 0 short frame 0 overrun 0 underrun 0 ignored
0 watchdog 0 bad etype drop 0 bad proto drop 0 if down drop
0 input with dribble 0 input discard
34 Rx pause
TX
2512 unicast packets 2163 multicast packets 3038 broadcast packets
7713 output packets 1187025 bytes
184 jumbo packets
0 output error 0 collision 0 deferred 0 late collision
0 lost carrier 0 no carrier 0 babble 0 output discard
0 Tx pause
coresw11# show vpc 12 vPC status
----------------------------------------------------------------------------
Id Port Status Consistency Reason Active vlans
-- ------------ ------ ----------- ------ ---------------
12 Po12 up success success 100,116,132,
900-901
Please check "show vpc consistency-parameters vpc <vpc-num>" for the
consistency reason of down vpc and for type-2 consistency reasons for
any vpc.
coresw12# show int po12 port-channel12 is up
admin state is up,
vPC Status: Up, vPC number: 12
Hardware: Port-Channel, address: f4cf.e228.5357 (bia f4cf.e228.5357)
Description: compute1-mtcol-el9p Port 3, 4
MTU 1500 bytes, BW 20000000 Kbit , DLY 10 usec
reliability 255/255, txload 1/255, rxload 1/255
Encapsulation ARPA, medium is broadcast
Port mode is trunk
full-duplex, 10 Gb/s
Input flow-control is off, output flow-control is off
Auto-mdix is turned off
Switchport monitor is off
EtherType is 0x8100
Members in this channel: Eth1/12, Eth1/13
Last clearing of "show interface" counters never
2 interface resets
Load-Interval #1: 30 seconds
30 seconds input rate 88 bits/sec, 0 packets/sec
30 seconds output rate 1984 bits/sec, 3 packets/sec
input rate 88 bps, 0 pps; output rate 1.98 Kbps, 3 pps
Load-Interval #2: 5 minute (300 seconds)
300 seconds input rate 336 bits/sec, 0 packets/sec
300 seconds output rate 11920 bits/sec, 8 packets/sec
input rate 336 bps, 0 pps; output rate 11.92 Kbps, 8 pps
RX
0 unicast packets 1485 multicast packets 31 broadcast packets
1516 input packets 212142 bytes
0 jumbo packets 0 storm suppression packets
0 runts 0 giants 0 CRC 0 no buffer
0 input error 0 short frame 0 overrun 0 underrun 0 ignored
0 watchdog 0 bad etype drop 0 bad proto drop 0 if down drop
0 input with dribble 0 input discard
36 Rx pause
TX
2050 unicast packets 2836 multicast packets 8 broadcast packets
4894 output packets 967566 bytes
185 jumbo packets
0 output error 0 collision 0 deferred 0 late collision
0 lost carrier 0 no carrier 0 babble 0 output discard
0 Tx pause
coresw12# show vpc 12
vPC status
----------------------------------------------------------------------------
Id Port Status Consistency Reason Active vlans
-- ------------ ------ ----------- ------ ---------------
12 Po12 up success success 100,116,132,
900-901
Please check "show vpc consistency-parameters vpc <vpc-num>" for the
consistency reason of down vpc and for type-2 consistency reasons for
any vpc.
[root@compute1-mtcol-el9p ~]# ethtool bond0
Settings for bond0:
Supported ports: [ ]
Supported link modes: Not reported
Supported pause frame use: No
Supports auto-negotiation: No
Supported FEC modes: Not reported
Advertised link modes: Not reported
Advertised pause frame use: No
Advertised auto-negotiation: No
Advertised FEC modes: Not reported
Speed: 40000Mb/s
Duplex: Full
Auto-negotiation: off
Port: Other
PHYAD: 0
Transceiver: internal
Link detected: yes
[root@compute1-mtcol-el9p ~]# systool -c fc_host -v host0
Class = "fc_host"
Class Device = "host0"
Class Device path = "/sys/devices/pci0001:00/0001:00:01.0/0001:01:00.0/host0/fc_host/host0"
dev_loss_tmo = "16"
fabric_name = "0x10000005332ea9c2"
issue_lip = <store method only>
max_npiv_vports = "254"
node_name = "0x500143802426ad0d"
npiv_vports_inuse = "0"
port_id = "0x012600"
port_name = "0x500143802426ad0c"
port_state = "Online"
port_type = "NPort (fabric via point-to-point)"
speed = "8 Gbit"
supported_classes = "Class 3"
supported_speeds = "1 Gbit, 2 Gbit, 4 Gbit, 8 Gbit"
symbolic_name = "HPAJ764A FW:v8.07.00 DVR:v10.02.07.900-k"
system_hostname = ""
tgtid_bind_type = "wwpn (World Wide Port Name)"
uevent =
vport_create = <store method only>
vport_delete = <store method only>
Device = "host0"
Device path = "/sys/devices/pci0001:00/0001:00:01.0/0001:01:00.0/host0"
fw_dump =
issue_logo = <store method only>
nvram = "ISP "
optrom_ctl = <store method only>
optrom =
reset = <store method only>
sfp = ""
uevent = "DEVTYPE=scsi_host"
vpd = "$"
[root@compute1-mtcol-el9p ~]# systool -c fc_host -v host1
Class = "fc_host"
Class Device = "host1"
Class Device path = "/sys/devices/pci0001:00/0001:00:01.0/0001:01:00.1/host1/fc_host/host1"
dev_loss_tmo = "16"
fabric_name = "0x10000005332fea72"
issue_lip = <store method only>
max_npiv_vports = "254"
node_name = "0x500143802426ad0f"
npiv_vports_inuse = "0"
port_id = "0x022500"
port_name = "0x500143802426ad0e"
port_state = "Online"
port_type = "NPort (fabric via point-to-point)"
speed = "8 Gbit"
supported_classes = "Class 3"
supported_speeds = "1 Gbit, 2 Gbit, 4 Gbit, 8 Gbit"
symbolic_name = "HPAJ764A FW:v8.07.00 DVR:v10.02.07.900-k"
system_hostname = ""
tgtid_bind_type = "wwpn (World Wide Port Name)"
uevent =
vport_create = <store method only>
vport_delete = <store method only>
Device = "host1"
Device path = "/sys/devices/pci0001:00/0001:00:01.0/0001:01:00.1/host1"
fw_dump =
issue_logo = <store method only>
nvram = "ISP "
optrom_ctl = <store method only>
optrom =
reset = <store method only>
sfp = ""
uevent = "DEVTYPE=scsi_host"
vpd = "$"
[root@compute1-mtcol-el9p ~]# systool -c fc_host -v host2
Class = "fc_host"
Class Device = "host2"
Class Device path = "/sys/devices/pci0007:00/0007:00:01.0/0007:01:00.0/host2/fc_host/host2"
dev_loss_tmo = "16"
fabric_name = "0x10000005332fea72"
issue_lip = <store method only>
max_npiv_vports = "254"
node_name = "0x5001438024262ba5"
npiv_vports_inuse = "0"
port_id = "0x022600"
port_name = "0x5001438024262ba4"
port_state = "Online"
port_type = "NPort (fabric via point-to-point)"
speed = "8 Gbit"
supported_classes = "Class 3"
supported_speeds = "1 Gbit, 2 Gbit, 4 Gbit, 8 Gbit"
symbolic_name = "HPAJ764A FW:v8.07.00 DVR:v10.02.07.900-k"
system_hostname = ""
tgtid_bind_type = "wwpn (World Wide Port Name)"
uevent =
vport_create = <store method only>
vport_delete = <store method only>
Device = "host2"
Device path = "/sys/devices/pci0007:00/0007:00:01.0/0007:01:00.0/host2"
fw_dump =
issue_logo = <store method only>
nvram = "ISP "
optrom_ctl = <store method only>
optrom =
reset = <store method only>
sfp = ""
uevent = "DEVTYPE=scsi_host"
vpd = "$"
[root@compute1-mtcol-el9p ~]# systool -c fc_host -v host3
Class = "fc_host"
Class Device = "host3"
Class Device path = "/sys/devices/pci0007:00/0007:00:01.0/0007:01:00.1/host3/fc_host/host3"
dev_loss_tmo = "16"
fabric_name = "0x10000005332ea9c2"
issue_lip = <store method only>
max_npiv_vports = "254"
node_name = "0x5001438024262ba7"
npiv_vports_inuse = "0"
port_id = "0x012500"
port_name = "0x5001438024262ba6"
port_state = "Online"
port_type = "NPort (fabric via point-to-point)"
speed = "8 Gbit"
supported_classes = "Class 3"
supported_speeds = "1 Gbit, 2 Gbit, 4 Gbit, 8 Gbit"
symbolic_name = "HPAJ764A FW:v8.07.00 DVR:v10.02.07.900-k"
system_hostname = ""
tgtid_bind_type = "wwpn (World Wide Port Name)"
uevent =
vport_create = <store method only>
vport_delete = <store method only>
Device = "host3"
Device path = "/sys/devices/pci0007:00/0007:00:01.0/0007:01:00.1/host3"
fw_dump =
issue_logo = <store method only>
nvram = "ISP "
optrom_ctl = <store method only>
optrom =
reset = <store method only>
sfp = ""
uevent = "DEVTYPE=scsi_host"
vpd = "$"
[root@compute1-mtcol-el9p ~]# lsblk
NAME MAJ:MIN RM SIZE RO TYPE MOUNTPOINTS
loop0 7:0 0 11G 0 loop /var/lib/machines
sda 8:0 0 20G 0 disk
└─mpathk 253:2 0 20G 0 mpath
├─mpathk1 253:3 0 2.1G 0 part
└─mpathk15 253:4 0 99M 0 part
sdb 8:16 0 20G 0 disk
└─mpathk 253:2 0 20G 0 mpath
├─mpathk1 253:3 0 2.1G 0 part
└─mpathk15 253:4 0 99M 0 part
sdc 8:32 0 20G 0 disk
└─mpathk 253:2 0 20G 0 mpath
├─mpathk1 253:3 0 2.1G 0 part
└─mpathk15 253:4 0 99M 0 part
sdd 8:48 0 20G 0 disk
└─mpathk 253:2 0 20G 0 mpath
├─mpathk1 253:3 0 2.1G 0 part
└─mpathk15 253:4 0 99M 0 part
sde 8:64 0 20G 0 disk
└─mpathk 253:2 0 20G 0 mpath
├─mpathk1 253:3 0 2.1G 0 part
└─mpathk15 253:4 0 99M 0 part
sdf 8:80 0 20G 0 disk
└─mpathk 253:2 0 20G 0 mpath
├─mpathk1 253:3 0 2.1G 0 part
└─mpathk15 253:4 0 99M 0 part
sdg 8:96 0 20G 0 disk
└─mpathk 253:2 0 20G 0 mpath
├─mpathk1 253:3 0 2.1G 0 part
└─mpathk15 253:4 0 99M 0 part
sdh 8:112 0 20G 0 disk
└─mpathk 253:2 0 20G 0 mpath
├─mpathk1 253:3 0 2.1G 0 part
└─mpathk15 253:4 0 99M 0 part
sdi 8:128 0 20G 0 disk
└─mpathk 253:2 0 20G 0 mpath
├─mpathk1 253:3 0 2.1G 0 part
└─mpathk15 253:4 0 99M 0 part
sdj 8:144 0 20G 0 disk
└─mpathk 253:2 0 20G 0 mpath
├─mpathk1 253:3 0 2.1G 0 part
└─mpathk15 253:4 0 99M 0 part
sdk 8:160 0 20G 0 disk
└─mpathk 253:2 0 20G 0 mpath
├─mpathk1 253:3 0 2.1G 0 part
└─mpathk15 253:4 0 99M 0 part
sdl 8:176 0 20G 0 disk
└─mpathk 253:2 0 20G 0 mpath
├─mpathk1 253:3 0 2.1G 0 part
└─mpathk15 253:4 0 99M 0 part
sdm 8:192 0 20G 0 disk
└─mpathk 253:2 0 20G 0 mpath
├─mpathk1 253:3 0 2.1G 0 part
└─mpathk15 253:4 0 99M 0 part
sdn 8:208 0 20G 0 disk
└─mpathk 253:2 0 20G 0 mpath
├─mpathk1 253:3 0 2.1G 0 part
└─mpathk15 253:4 0 99M 0 part
sdo 8:224 0 20G 0 disk
└─mpathk 253:2 0 20G 0 mpath
├─mpathk1 253:3 0 2.1G 0 part
└─mpathk15 253:4 0 99M 0 part
sdp 8:240 0 20G 0 disk
└─mpathk 253:2 0 20G 0 mpath
├─mpathk1 253:3 0 2.1G 0 part
└─mpathk15 253:4 0 99M 0 part
nvme0n1 259:0 0 894.3G 0 disk
├─nvme0n1p1 259:1 0 600M 0 part /boot/efi
├─nvme0n1p2 259:2 0 1G 0 part /boot
└─nvme0n1p3 259:3 0 892.7G 0 part
├─rl-root 253:0 0 877.7G 0 lvm /
└─rl-swap 253:1 0 15G 0 lvm [SWAP]
[root@compute1-mtcol-el9p ~]# multipath -ll
mpathk (360002ac000000000000005540000bb75) dm-2 3PARdata,VV
size=20G features='1 queue_if_no_path' hwhandler='1 alua' wp=rw
`-+- policy='service-time 0' prio=50 status=active
|- 3:0:5:0 sda 8:0 active ready running
|- 3:0:1:0 sdb 8:16 active ready running
|- 3:0:4:0 sdc 8:32 active ready running
|- 3:0:2:0 sdd 8:48 active ready running
|- 1:0:5:0 sde 8:64 active ready running
|- 1:0:3:0 sdf 8:80 active ready running
|- 1:0:4:0 sdg 8:96 active ready running
|- 2:0:5:0 sdi 8:128 active ready running
|- 1:0:1:0 sdh 8:112 active ready running
|- 2:0:4:0 sdj 8:144 active ready running
|- 2:0:2:0 sdk 8:160 active ready running
|- 2:0:1:0 sdl 8:176 active ready running
|- 0:0:4:0 sdn 8:208 active ready running
|- 0:0:3:0 sdo 8:224 active ready running
|- 0:0:5:0 sdm 8:192 active ready running
`- 0:0:2:0 sdp 8:240 active ready running
Re: Investigate why OSA configures AArch64 compute hosts with VNC disabled
The rationale is described in https://github.com/openstack/openstack-ansible-os_nova/commit/0087026132dea946214d6527298f45179a9ff764:
Currently there is no support for vnc or spice consoles on
arm64 architecture. Set the default to be serialconsole.
Note that it says "spice console" instead of vnc in the above commit because, at the time, the default console type was SPICE, which was later changed to VNC.
After trying out the novnc
console type on AArch64, it currently works and the above change no longer seems to be valid; so, for now, we are forcing nova_console_type: novnc
for all architectures.
See https://gist.github.com/stephanosio/d1988ffa3e03039b5bae0f6eb2be32ea
https://github.com/Centrinix/openstack-ansible/commit/e882ae2e42b0cd5246a51ba50eede7e0fecdc13d https://github.com/Centrinix/openstack-ansible/commit/25008d0956954f3bff9189861547b0d7891647c3 https://github.com/Centrinix/openstack-ansible/commit/c92ff20c7e0e3c8ad19fee44ea615f33cc032332
https://github.com/Centrinix/openstack-ansible-os_nova/commit/f7e66193e5a156dbc8c033c7a81f33b5559e739f https://github.com/Centrinix/openstack-ansible-os_nova/commit/9b7160f4b3786c6a751bbe617a7e11d11c490f34
Timed LLVM Compilation 16.0:
pts/build-llvm-1.5.0 [Build System: Ninja]
Build System: Ninja:
1006.875
1000.19
1000.264
Average: 1002.443 Seconds
Deviation: 0.38%
Timed LLVM Compilation 16.0:
pts/build-llvm-1.5.0 [Build System: Ninja]
Build System: Ninja:
1219.646
1223.074
1230.368
Average: 1224.363 Seconds
Deviation: 0.45%
Timed LLVM Compilation 16.0:
pts/build-llvm-1.5.0 [Build System: Ninja]
Build System: Ninja:
1781.455
1843.532
1852.684
Average: 1825.890 Seconds
Deviation: 2.12%
Note that the Ampere Altra benchmark result is only with 2 memory channels across 160 cores (and only 1GB RAM per core, compared to 2GB RAM per core for the rest -- kernel disk RAM caching may have played a role). This is to be re-tested after the planned memory upgrade. Even after re-testing with the upgraded RAM (14 active channels), there was no discernible performance improvement.
Re: RAM Upgrade
Instead of the 16x 32GB DIMMs mentioned above, 28x 16GB PC4-2133P RDIMMs were installed for a total of 448GB because the server does not seem to be compatible with LRDIMMs, which the 32GB DIMMs were. The extra 64GB is mainly there to enable additional two channels on the CPU0 (it turns out Altra supports different numer of channels on CPU0 and CPU1. The CPU1 is still operating with 6 channels), which should give a bit of performance boost as well as extra memory for hosting test instances.
Set up Ampere Mt. Collins 2U server
TODO
compute1-dl380g8-9ys5
.compute1-mtcol2u-el9p
, short formel9p
.Investigate why OSA configures AArch64 compute hosts with VNC disabled-> see https://github.com/zephyrproject-rtos/infrastructure-private/issues/147#issuecomment-1821456409pts/build-llvm
test for comparison against the AWS EC2c5a.4xlarge
instances.Hardware configuration
Initial configuration (as provided by Ampere)
Additional configuration (added by Stephanos)