玩命加载中 . . .

获取ceph large omap object


概述

前几天客户报自己的一套原生ceph集群出现 'Large omap objects’告警,请我们免费技术支持一下解决这个告警。

告警信息如下所示:

过程

获取到具体哪个Object omap较大,脚本内容参考如下:

#!/usr/bin/env python

import os
import sys
import time
import json
import rados
import rbd

ceph_conf_path = '/etc/ceph/ceph.conf'
rados_connect_timeout = 5

if not os.path.exists(ceph_conf_path):
    print("[ERROR] Incorrect path of ceph.conf, exit!!!")
    sys.exit(1)

class RADOSClient(object):
    def __init__(self,driver,pool=None):
        self.driver = driver
        self.client, self.ioctx = driver._connect_to_rados(pool)
    def __enter__(self):
        return self
    def __exit__(self, type_, value, traceback):
        self.driver._disconnect_from_rados(self.client, self.ioctx)

class RBDDriver(object):
    def __init__(self,ceph_conf_path,rados_connect_timeout,pool=None):
        self.ceph_conf_path = ceph_conf_path
        self.rados_connect_timeout = rados_connect_timeout
        self.pool = pool
    def _connect_to_rados(self, pool=None):
        client = rados.Rados(conffile=self.ceph_conf_path)
        try:
            if self.rados_connect_timeout >= 0:
                client.connect(timeout=self.rados_connect_timeout)
            else:
                client.connect()
            if self.pool == None:
                ioctx = None
            else:
                ioctx = client.open_ioctx(self.pool)
            return client, ioctx
        except rados.Error:
            msg = "Error connecting to ceph cluster."
            client.shutdown()
            raise msg

    def _disconnect_from_rados(self, client, ioctx=None):
        if ioctx == None:
            client.shutdown()
        else:
            ioctx.close()
            client.shutdown()

class cmd_manager():
    def get_large_omap_obj_pool_name(self):
        with RADOSClient(RBDDriver(ceph_conf_path,rados_connect_timeout)) as dr:
            result = ''
            cmd = '{"prefix": "health", "detail": "detail", "format": "json"}'
            result = dr.client.mon_command(cmd,result)
            if result[0] == 0:
                res_ = json.loads(result[1])
                if len(res_["checks"]) and res_["checks"].has_key("LARGE_OMAP_OBJECTS"):
                    return res_["checks"]['LARGE_OMAP_OBJECTS']['detail'][0]['message'].split("'")[1]
            else:
                return False
    def get_pg_list_by_pool(self, pool_name):
        with RADOSClient(RBDDriver(ceph_conf_path, rados_connect_timeout)) as dr:
            result = ''
            cmd = '{"prefix": "pg ls-by-pool", "poolstr": "' + pool_name + '", "format": "json"}'
            result = dr.client.mon_command(cmd,result)
            if result[0] == 0:
                return json.loads(result[1])
            else:
                return False

cmd_ = cmd_manager()
pool_name =  cmd_.get_large_omap_obj_pool_name()
if pool_name:
    print "Large omap objects pool_name = {0}".format(pool_name)
    res =  cmd_.get_pg_list_by_pool(pool_name)
    for i in res:
        num_large_objs = i["stat_sum"]["num_large_omap_objects"]
        if num_large_objs != 0:
            print "pgid={0} OSDs={1} num_large_omap_objects={2}".format(i["pgid"], i["acting"], num_large_objs)
else:
    print "[Cool] Not find objects pool_name, as expected, so exit."

问题解决

客户的这个问题解决,归根结底是 RGW reshard 太小导致的(关闭了auto reshard功能),单个bucket中objects数量超过了20万(osd_deep_scrub_large_omap_object_key_threshold),放大这个告警阈值( ceph daemon osd.* injectargs “–osd_deep_scrub_large_omap_object_key_threshold 300000”)虽然能解决客户问题,但客户坚持不修改,说是要遵守规则,不能惯着业务端的人,至此唯一的解决方法就是迁移走bucket中的一部分数据,然后执行deep scrub来解决了(放大shard能解决,但部分index无法落到SSD上,影响整体性能)。


文章作者: Gavin Wang
版权声明: 本博客所有文章除特別声明外,均采用 CC BY 4.0 许可协议。转载请注明来源 Gavin Wang !
  目录