cndaqiang / E5-PC-daily

服务器集群管理遇到的问题和总结
1 stars 0 forks source link

服务器安全和管理 #46

Open cndaqiang opened 3 years ago

cndaqiang commented 3 years ago

关闭ssh root 登陆

vi /etc/ssh/sshd_config
PermitRootLogin no
service sshd restart
cndaqiang commented 3 years ago

ssh监听端口

vi /etc/ssh/sshd_config

填入

Port 22
Port 666

重启

systemctl restart sshd
cndaqiang commented 3 years ago

查看linux桌面

ps -A | egrep -i "gnome|kde|mate|cinnamon|lx|xfce|jwm"
cndaqiang commented 3 years ago

mac地址

vi /etc/sysconfig/network-scripts/ifcfg-eno1
MACADDR=a4:bf:01:70:71:56
cndaqiang commented 3 years ago

linux查询命令

系统版本

root@pve:~# uname -a
Linux pve 5.4.106-1-pve #1 SMP PVE 5.4.106-1 (Fri, 19 Mar 2021 11:08:47 +0100) x86_64 GNU/Linux
#x86_64 即64位
#i386 即32位

硬件信息

How do you check RAM voltage from Linux (can't see that on BIOS)

dmidecode -t [type] (use 5 for voltage and 17 for memory details.)
 0   BIOS
 1   System
 2   Base Board
 3   Chassis
 4   Processor
 5   Memory Controller
 6   Memory Module
 7   Cache
 8   Port Connector
 9   System Slots
10   On Board Devices
11   OEM Strings
12   System Configuration Options
13   BIOS Language
14   Group Associations
15   System Event Log
16   Physical Memory Array
17   Memory Device
18   32-bit Memory Error
19   Memory Array Mapped Address
20   Memory Device Mapped Address
21   Built-in Pointing Device
22   Portable Battery
23   System Reset
24   Hardware Security
25   System Power Controls
26   Voltage Probe
27   Cooling Device
28   Temperature Probe
29   Electrical Current Probe
30   Out-of-band Remote Access
31   Boot Integrity Services
32   System Boot
33   64-bit Memory Error
34   Management Device
35   Management Device Component
36   Management Device Threshold Data
37   Memory Channel
38   IPMI Device
39   Power Supply

# 也可替换为下面的关键词, 入dmidecode -t memory可以查看支持的最大容量
       Keyword     Types
       ──────────────────────────────
       bios        0, 13
       system      1, 12, 15, 23, 32
       baseboard   2, 10, 41
       chassis     3
       processor   4
       memory      5, 6, 16, 17
       cache       7
       connector   8
       slot        9
#使用man查看更多参数

查看两个内存槽位, 下面的Part Number参数可以搜索到内存的具体型号

root@pve:~# dmidecode -t 17
# dmidecode 3.2
Getting SMBIOS data from sysfs.
SMBIOS 2.8 present.

Handle 0x000B, DMI type 17, 40 bytes
Memory Device
        Array Handle: 0x0009
        Error Information Handle: Not Provided
        Total Width: 64 bits
        Data Width: 64 bits
        Size: 4096 MB
        Form Factor: DIMM
        Set: None
        Locator: A1_DIMM0
        Bank Locator: A1_BANK0
        Type: DDR3
        Type Detail: Unknown
        Speed: 1333 MT/s
        Manufacturer: Hynix Semiconduc
        Serial Number: 00117F99  
        Asset Tag: A1_AssetTagNum0
        Part Number: HMT351S6CFR8C-PB  
        Rank: 2
        Configured Memory Speed: 1333 MT/s
        Minimum Voltage: 48.408 V
        Maximum Voltage: 38.992 V
        Configured Voltage: Unknown

Handle 0x000D, DMI type 17, 40 bytes
Memory Device
        Array Handle: 0x0009
        Error Information Handle: Not Provided
        Total Width: Unknown
        Data Width: 64 bits
        Size: No Module Installed
        Form Factor: DIMM
        Set: None
        Locator: A1_DIMM1
        Bank Locator: A1_BANK1
        Type: Unknown
        Type Detail: Unknown
        Speed: Unknown
        Manufacturer: A1_Manufacturer1
        Serial Number: A1_SerNum1
        Asset Tag: A1_AssetTagNum1
        Part Number: Array1_PartNumber1
        Rank: Unknown
        Configured Memory Speed: Unknown
        Minimum Voltage: 39.417 V
        Maximum Voltage: Unknown
        Configured Voltage: Unknown
cndaqiang commented 2 years ago

slurm统计用户机时脚本

只要slurm中有用户才可以统计,不然统计 到的用户名是NULL

[root@mgmt ~]# sacctmgr add account test1 Cluster=cluster
[root@mgmt ~]# sacctmgr add user test1 account=test1
#!/usr/bin/env python3
# -*- coding: utf-8 -*-
"""
@Time    : 2021-02-06 @IOP
@Author  : cndaqiang
@Blog    : cndaqiang.github.io
@File    : 从slurm数据库读取用户使用cpu机时
"""

import pandas as pd
import pymysql
import time
import numpy as np

#时间转时间戳
start="2021-02-06 00:00:00"
end="2021-02-07 00:00:00"
#end=time.strftime("%Y%m%d.%H.%M.%S",time.localtime(time.time())) #当前时刻

unixstart   =   time.mktime(time.strptime(start, "%Y-%m-%d %H:%M:%S"))
unixend     =   time.mktime(time.strptime(end  , "%Y-%m-%d %H:%M:%S"))

fileout="TimeAccount"+time.strftime("%Y%m%d.%H.%M.%S",time.localtime(time.time()))+".csv"
## 加上字符集参数,防止中文乱码
dbconn=pymysql.connect(
  database="slurm_acct_db",
  user="mysqluserid",
  password="mysqlpasswd",
 )

#sql语句
sqlcmd="select  account, cpus_req, time_eligible, time_start, time_end, time_suspended from slurm_acct_db.cluster_job_table " + \
    "where time_start >= "+str(unixstart)+" and time_start <= "+str(unixend)

#利用pandas 模块导入mysql数据
a=pd.read_sql(sqlcmd,dbconn)
#计算cpu时间
a['cputime']=(a['time_end']-a['time_start'])*a['cpus_req']
#按用户求和用时
sumuser=a.groupby('account').sum()

account=sumuser.index.values
timeuse=sumuser['cputime'].values
#timehms=np.zeros(timeuse.size, dtype=np.str)
with open(fileout,'w') as f:
    f.write('%20s \t %20s \n'%('username', 'CPU Times') )
    for i in np.arange(account.size):
        seconds=timeuse[i]
        m, s = divmod(seconds, 60)
        h, m = divmod(m, 60)
        d, h = divmod(h, 24)
        time2read="%dday %dh %02dm %02ds" % (d, h, m, s)
        f.write('%20s \t %20s \n'%(account[i],time2read) )

print("Save Time account to file: "+fileout)

#unix时间戳转时间
#time.strftime("%Y-%m-%d %H:%M:%S",time.localtime(1612613913))

最终提交给财务的版本

#!/usr/bin/env python3
# -*- coding: utf-8 -*-

import pandas as pd
import pymysql
import time
import numpy as np

#时间转时间戳
start="2021-02-01 00:00:00"
#end="2021-05-31 00:00:00"
end=time.strftime("%Y-%m-%d %H:%M:%S",time.localtime(time.time())) #当前时刻

unixstart   =   time.mktime(time.strptime(start, "%Y-%m-%d %H:%M:%S"))
unixend     =   time.mktime(time.strptime(end  , "%Y-%m-%d %H:%M:%S"))

fileout="TimeAccount"+time.strftime("%Y%m%d.%H.%M.%S",time.localtime(time.time()))+".csv"
## 加上字符集参数,防止中文乱码
dbconn=pymysql.connect(
  database="slurm_acct_db",
  user="mysqluserid",
  password="mysqlpasswd",
 )

#sql语句
sqlcmd="select  account, cpus_req, time_eligible, time_start, time_end, time_suspended from slurm_acct_db.cluster_job_table " + \
    "where time_start >= "+str(unixstart)+" and time_start <= "+str(unixend)+" ORDER BY time_start ;"

#利用pandas 模块导入mysql数据
a=pd.read_sql(sqlcmd,dbconn)

#丢掉没算完的任务
#a=a.drop(a[a['time_end']<10].index)
#a=a.drop(a[a['time_start']<10].index)
#删除没有录入的None用户
a.dropna(axis=0, how='any', thresh=None, subset=['account'], inplace=True)

minstart=time.strftime("%Y%m%d.%H.%M.%S",time.localtime(a['time_start'].min()))
maxstart=time.strftime("%Y%m%d.%H.%M.%S",time.localtime(a['time_start'].max()))

fileout="TimeAccount"+time.strftime("%Y%m%d.%H.%M.%S",time.localtime(time.time()))+"From"+minstart+"To"+maxstart+".csv"

#计算cpu时间
a['cputime']=(a['time_end']-a['time_start'])*a['cpus_req']
#作业数
a['tasknums']=1

#print(a)
#按用户求和用时
sumuser=a.groupby('account').sum()

account=sumuser.index.values
timeuse=sumuser['cputime'].values
tasknums=sumuser['tasknums'].values
#timehms=np.zeros(timeuse.size, dtype=np.str)
with open(fileout,'w', encoding='utf-8-sig') as f:
    f.write('%20s , %20s , %20s , %20s,  %20s \n'%('用户', '作业数', '机时(hour)','CPU时间(hour)','费用(元)') )
    for i in np.arange(account.size):
        seconds=timeuse[i]
        m, s = divmod(seconds, 60)
        h, m = divmod(m, 60)
        #d, h = divmod(h, 24)
        #time2read="%dday %dh %02dm %02ds" % (d, h, m, s)
        time2read="%d" % ( h)
        cputime="%d" %( h/48 )
        money="%d" % ( h*0.4)
        f.write('%20s , %20s , %20s , %20s , %20s \n'%(account[i],tasknums[i],time2read,cputime, money) )

print("Save Time account to file: "+fileout)
print("start",minstart)
print("end",maxstart)
#unix时间戳转时间
#time.strftime("%Y-%m-%d %H:%M:%S",time.localtime(1612613913))

定期统计上月本日到今天的脚本

source $HOME/.bashrc
cd $HOME/SLURMTime
date >> auto.log
./mysql_auto.py
#!/usr/bin/env python3
# -*- coding: utf-8 -*-

import pandas as pd
import pymysql
import time
import numpy as np

#时间转时间戳
start="2021-01-01 00:00:00"
end="2022-02-01 23:00:00"
#end=time.strftime("%Y-%m-%d %H:%M:%S",time.localtime(time.time())) #当前时刻

daystr=str(time.localtime((time.time())).tm_mday)
start=time.strftime("%Y-%m-"+daystr+" 00:00:00",time.localtime((time.time()-2419200))) #减去28,得到上个月的这一天
end=time.strftime("%Y-%m-"+daystr+" 00:00:00",time.localtime((time.time()))) #这个月的这一天

print(daystr,start,end)
unixstart   =  time.mktime(time.strptime(start, "%Y-%m-%d %H:%M:%S"))
unixend     =  time.mktime(time.strptime(end  , "%Y-%m-%d %H:%M:%S"))

fileout="TimeAccount"+time.strftime("%Y%m%d.%H.%M.%S",time.localtime(time.time()))+".csv"
## 加上字符集参数,防止中文乱码
dbconn=pymysql.connect(
  database="slurm_acct_db",
  user="mysqluserid",
  password="mysqlpasswd",
 )

#sql语句
sqlcmd="select  account, cpus_req, time_eligible, time_start, time_end, time_suspended from slurm_acct_db.cluster_job_table " + \
    "where time_start >= "+str(unixstart)+" and time_start <= "+str(unixend)+" ORDER BY time_start ;"

#利用pandas 模块导入mysql数据
a=pd.read_sql(sqlcmd,dbconn)

#丢掉没算完的任务
a=a.drop(a[a['time_end']<10].index)
a=a.drop(a[a['time_start']<10].index)
#删除没有录入的None用户
a.dropna(axis=0, how='any', thresh=None, subset=['account'], inplace=True)

minstart=time.strftime("%Y%m%d.%H.%M.%S",time.localtime(a['time_start'].min()))
maxstart=time.strftime("%Y%m%d.%H.%M.%S",time.localtime(a['time_start'].max()))

fileout="TimeAccount"+time.strftime("%Y%m%d.%H.%M.%S",time.localtime(time.time()))+"From"+minstart+"To"+maxstart+".csv"

#计算cpu时间, 
a['cputime']=(a['time_end']-a['time_start'])*a['cpus_req']
a['onecpu']=(a['time_end']-a['time_start'])
#作业数
a['tasknums']=1

#print(a)
#按用户求和用时
sumuser=a.groupby('account').sum()

account=sumuser.index.values
timeuse=sumuser['cputime'].values
tasknums=sumuser['tasknums'].values
onecpu=sumuser['onecpu'].values
#timehms=np.zeros(timeuse.size, dtype=np.str)
info=pd.read_csv("用户和队列配置.csv",skiprows=1)

with open(fileout,'w', encoding='utf-8-sig') as f:
    f.write('%20s , %20s , %20s , %20s , %20s , %20s,  %20s \n'%('用户名', '姓名','作业数', '机时(hour)','CPU时间(hour)','费用(元)','备注') )
    for i in np.arange(account.size):
        #所有CPU经历的时间和
        seconds=timeuse[i]
        m, s = divmod(seconds, 60)
        h, m = divmod(m, 60)
        #d, h = divmod(h, 24)
        #time2read="%dday %dh %02dm %02ds" % (d, h, m, s)
        time2read="%d" % ( h)
        #cputime="%d" %( h/48 )
        money="%d" % ( h*0.4)
        #每个CPU经历的时间
        seconds=onecpu[i]
        m, s = divmod(seconds, 60)
        h, m = divmod(m, 60)
        cputime="%d" %( h )
        if account[i] in info['username'].values:
          xingming=info[info['username'] == account[i]]['姓名'].values[0]
          beizhu=info[info['username'] == account[i]]['信息'].values[0]
        else:
          xingming=""
          beizhu=""
        f.write('%20s , %20s , %20s , %20s , %20s , %20s , %20s \n'%(account[i],xingming,tasknums[i],time2read,cputime, money,beizhu) )

print("Save Time account to file: "+fileout)
print("start",minstart)
print("end",maxstart)
#unix时间戳转时间
#time.strftime("%Y-%m-%d %H:%M:%S",time.localtime(1612613913))