用python实现vmware esxi虚拟机断电自动关机
用python实现vmware esxi虚拟机断电自动关机
一、前言
机房服务器一般配备UPS,当市电停掉时,仍然到维持一段时间。但是,如果长时间停电,UPS也撑不了。此时就需要及时关机以确保数据安全。很多年以前用过winpower软件,通过目标系统安装agent,并用winPower连接UPS,当UPS电量低于设置的阈值时,给各个agent下发关机指令,以实现自动关机。这种方式可以充分利用UPS电量,较为精准的判断电池续航情况,并采取相应动作,但是操作起来有点麻烦。
另一种方式是采用自动化脚本的方式,通过循环ping 某个常接市电设备ip,当失败超过一定次数,触发powerOffVMs,最后shutdown esxi系统。这种方式无需与UPS通讯,只要市电断开,超过一段时间就执行关机指令,超作起来比较方便。但是,这种方式可能存在误判,比如交换机挂了,此时ping设备必然失败,最终会触发关机。因此下面介绍一种更为稳妥的方案。
二、实现
原理
利用esxcli 来
esxcli hardware ipmi sdr list | grep "Power\ Supply\ 1\ Status\|Power\ Supply\ 2\ Status"
来检测电源是否正常。当其中一路电源断开或异常时,调用powerOffVms.py
关闭各虚拟机,最后再调用poweroff
来关闭esxi。其实还可以利用snmp连接到idrac,也是通过检查电源状态来实现自动监控,但是这种方式又依赖网络了,跟上面提到的方式二差不多。
==注意,这里默认服务器都具备双路电源,如果只有一路电源,那不适用此方法。==
代码
源码如下(有点啰嗦,几年前写的,将就看吧),racadm是为了兼容R710,因为R710的esxcli 指令不支持获取电源状态。执行python脚本后,会修改etc/rc.local.d/local.sh以实现开机自动执行脚本。本例中powerOffDelay是指断电20分钟后就触发自动关机指令。但是当断电期间,又来电时,会重新计时。
本代码在R710至R750上测试通过,不代表你的也可以,仅作参考 ,请根据自己实际情况决定是否使用。
#!/usr/bin/python from genericpath import exists, isfile import os import time import subprocess import fileinput import re import sys # version 20190916 usage: add python autoShutdown.py to /etc/rc.local.d/local.sh and uncomment # by root6819 Q:302777528 site: www.qipanet.com # tools Path basePath = os.path.split(os.path.realpath(__file__))[0] # the time(second) when you want to shutdown while check power False fPowerOff = basePath + '/powerOffVms_py3.0.py' # Racadm-Dell-EMC-Web-9.3.0-3379.VIB-ESX67i.zip fRacadm = basePath+'/Racadm-Dell-EMC-Web-9.3.0-3379.VIB-ESX67i.zip' fLocalOld = '/etc/rc.local.d/local.sh' fLocalNew = basePath+'/local.sh.bak' interval = 15 # must >=5s,everty check will cost some time. per 15s trigger a check powerOffDelay = 1200 # powerOff after supply off in 20Minutes iFailTimes = 0 # +1 when check Power Off,if ok then reset iFailTimes=0 , if iFailTimes*interval>powerOffDelay then powerOff machine ="" # use to check if it is R710 machine def getMachine(): cmd='localcli hardware platform get |grep R710' machine=os.popen(cmd).read().strip() showMsg("the machine is R710 ?"+ machine) def modify_file(file_name, pattern, value=""): writeLog('now modify file '+file_name+'...') try: fh = fileinput.input(file_name, inplace=True) for line in fh: replacement = value + line line = re.sub(pattern, replacement, line) sys.stdout.write(line) fh.close() writeLog('write Ok') return True except Exception as e: writeLog(e) return False def myPopen(cmd): p = subprocess.Popen( cmd, shell=True, stdout=subprocess.PIPE, stderr=subprocess.PIPE) ret, err = p.communicate() return ret.decode('utf-8', 'ignore'), err.decode('utf-8', 'ignore') def writeLog(msg): path = basePath+"/Log" if not os.path.exists(path): os.makedirs(path) showMsg(msg) f = open(path+"/log.txt", mode='a+') strMsg = '[{0}]: {1} \n'.format(time.strftime('%Y-%m-%d %H:%M:%S'), msg) f.writelines(strMsg) f.close() def showMsg(msg): strMsg = '[{0}]: {1} \n'.format(time.strftime('%Y-%m-%d %H:%M:%S'), msg) print(strMsg) def checkLocalSh(): try: # check local.sh is config right with open(fLocalOld, mode='r+') as f: tmpStr = f.read() # python /vmfs/volumes/datastore_ssd/tools/autoShutdown.py & if not 'autoShutdown3.0.py &' in tmpStr: return False return True except Exception as e: writeLog('checkLocalShErr>>') writeLog(e) return False def shutDownAll(): cmd = fPowerOff # esxi6.7 powerOffVms use old version script,so we must reWrite the script writeLog('powerOffVms path>>%s ' % cmd) result = os.popen(cmd).read() writeLog('powerOffVms result>>%s ' % result) writeLog('now poweroff') os.popen('poweroff') def checkPS(): #check is R710 if 'R710' in machine: cmd = 'racadm getsensorinfo |grep ^PS.*AC |grep -c Present' result = os.popen(cmd).read().strip() if result == '2': #showMsg('checkPs OK') return True else: #showMsg(str(iFailTimes)+'times checkPs not OK>>'+result) return False else: #Power Supply AC lost or Presence detected cmd = 'localcli hardware ipmi sdr list | grep "Power\ Supply\ 1\ Status\|Power\ Supply\ 2\ Status"' result = os.popen(cmd).read().strip().lower() if not 'detected' in result: #must have at lease one supply detected in result showMsg(str(iFailTimes)+'checkPS() result null,check again...') return checkPS() if 'lost' in result: showMsg(str(iFailTimes)+'times checkPs not OK>>'+result) return False else: return True def checkRacadm(): # check racadm support _cmd = 'racadm version |grep -w version' _result, _err = myPopen(_cmd) if 'not found' in _err or '不是内部或外部命令' in _err: writeLog('racadm tool not install,pls install it first!') return False writeLog('racadm check oK,the Version is>>'+_result) return True if not checkRacadm(): '''check FirstTime if not exists auto install ''' if not os.path.exists(fRacadm): writeLog(fRacadm + ' file not exists,could not auto instal!') exit() writeLog(fRacadm + ' try auto install...') _result, _err = myPopen('esxcli software vib install -d '+fRacadm) writeLog('install result>>'+_result) if _err or _err is not '': writeLog('install _err>>'+_err) # check again if not checkRacadm(): exit() if not checkLocalSh(): # bak then goto write _result, _err = myPopen('cp -f '+fLocalOld+' ' + fLocalNew) writeLog('cp Result>>'+_result) if _err or _err is not '': writeLog('cp err>>'+_err) exit() _str = 'python '+basePath+'/autoShutdown3.0.py &\n' if not modify_file(fLocalOld, 'exit 0', _str): exit() # check again if not checkLocalSh(): writeLog('checkLocalSh twice failed,exit now!') exit() writeLog('checkLocalSh Ok!') _fPath = basePath + '/powerOffVms_py3.0.py' if not os.path.exists(_fPath): writeLog(_fPath+' file not exists,pls confirm!') exit() writeLog(_fPath+' check file OK!') getMachine() isFirst = True while iFailTimes*interval < powerOffDelay: if checkPS(): iFailTimes = 0 if isFirst: isFirst = False writeLog('checkPS OK') else: isFirst = True iFailTimes += 1 if iFailTimes == 2: writeLog('checkPS failed,maybe a power supply is lost!!!') # because checkPS will costs about 3 seconds time.sleep(interval-3) shutDownAll()
评论已关闭