玩命加载中 . . .

Monitor mds failed


Scripts

monitor_mds_failed.sh

#!/bin/bash

if read -t 5 -p "WARN: Have you confirmed that the maximum number of active-mds has been set for VS? [y|n] :" yn
then
    if [[ $yn == [Yy] ]];then
        echo -e 'Running...'
    elif [[ $yn == [Nn] ]];then
        echo -e "\e[0;31;1mExit ...\e[0m"
        exit
    else [[ $yn != [YyNn] ]]
        echo -e "\e[0;33;1mPlease check what you input! Only can input y or n\e[0m"
        exit
    fi
else
    echo " "
    echo -e "\e[0;33;1mTimeOut ...\e[0m"
    exit
fi


log_file="mds_failed.log"

rm -rf ${log_file}

echo -e "  To stop all of btmds-agent for all enabled GW nodes"
onnode -p all '/usr/bin/python /usr/local/bin/btmds-agent stop'

for i in {1..1000}; 
do
    echo "--------------------------- ${i} ----------------------" >> ${log_file}
    date_time1=`date '+%Y-%m-%d %H:%M:%S'`
    echo -e "  --  [${date_time1}]  restart ceph-mds.target on all nodes" >> ${log_file}
    onnode -p all systemctl restart ceph-mds.target;sleep 5

    date_time2=`date '+%Y-%m-%d %H:%M:%S'`
    echo -e "  --  [${date_time2}]  Check mds failed status" >> ${log_file}
    for j in {0..100000}
    do
        res=`ceph -s | grep 'mds:'`
        if [[ ${res} =~ 'failed' ]];then
            date_time3=`date '+%Y-%m-%d %H:%M:%S'`
            msg="[${date_time3}]  [ERROR]  Find failed ceph-mds, exit!!"
            echo -e ${msg} 
            echo -e ${msg} >> ${log_file}
            exit
        else
            date_time4=`date '+%Y-%m-%d %H:%M:%S'`
            echo -e "--  [${date_time4}]  [${j}]time(s), not find failed mds, sleep 5s will try again" >> ${log_file}
            sleep 5
            if [[ ${j} -gt 19 ]];then
                break
            fi
        fi
    done
done


文章作者: Gavin Wang
版权声明: 本博客所有文章除特別声明外,均采用 CC BY 4.0 许可协议。转载请注明来源 Gavin Wang !
  目录