ceph 查看默认配置、内存、延迟等相关维护操作

查看CEPH相关服务默认配置

# 查看整个集群的配置
    root@pve-ceph01:~# ceph-conf --show-config | grep osd_recovery
    osd_recovery_cost = 20971520
    osd_recovery_delay_start = 0.000000
    osd_recovery_max_active = 0
    osd_recovery_max_active_hdd = 3
    osd_recovery_max_active_ssd = 10
    osd_recovery_max_chunk = 8388608
    osd_recovery_max_omap_entries_per_chunk = 8096
    osd_recovery_max_single_start = 1
    osd_recovery_op_priority = 3
    osd_recovery_op_warn_multiple = 16
    osd_recovery_priority = 5
    osd_recovery_retry_interval = 30.000000
    osd_recovery_sleep = 0.000000
    osd_recovery_sleep_hdd = 0.100000
    osd_recovery_sleep_hybrid = 0.025000
    osd_recovery_sleep_ssd = 0.000000

# 查看特定OSD的配置
    ceph config show osd.0
    ceph daemon osd.0  config show 
    #ceph daemon /var/run/ceph/ceph-osd.0.asok  config show

    root@pve-ceph01:~# ceph config show osd.0
    NAME                                             VALUE                             SOURCE    OVERRIDES  IGNORES
    auth_client_required                             cephx                             file                        
    auth_cluster_required                            cephx                             file                        
    auth_service_required                            cephx                             file                        
    cluster_network                                  10.99.99.1/24                     file                        
    daemonize                                        false                             override                    
    keyring                                          $osd_data/keyring                 default                     
    leveldb_log                                                                        default                     
    mon_allow_pool_delete                            true                              file                        
    mon_host                                         10.99.99.1 10.99.99.2 10.99.99.3  file                        
    ms_bind_ipv4                                     true                              file                        
    ms_bind_ipv6                                     false                             file                        
    no_config_file                                   false                             override                    
    osd_delete_sleep                                 0.000000                          override          

OSD内存相关

查看OSD内存实际占用情况

root@fuse01:~# systemctl status ceph-osd@3
● ceph-osd@3.service - Ceph object storage daemon osd.3
     Loaded: loaded (/lib/systemd/system/ceph-osd@.service; enabled-runtime; vendor preset: enabled)
    Drop-In: /usr/lib/systemd/system/ceph-osd@.service.d
             └─ceph-after-pve-cluster.conf
     Active: active (running) since Mon 2023-07-03 20:09:25 CST; 3 days ago
    Process: 5043 ExecStartPre=/usr/libexec/ceph/ceph-osd-prestart.sh --cluster ${CLUSTER} --id 3 (code=exited, status=0/SUCCESS)
   Main PID: 5066 (ceph-osd)
      Tasks: 64
     Memory: 7.6G
        CPU: 4h 13min 30.816s
     CGroup: /system.slice/system-ceph\x2dosd.slice/ceph-osd@3.service
             └─5066 /usr/bin/ceph-osd -f --cluster ceph --id 3 --setuser ceph --setgroup ceph

查看OSD默认内存设置

# 两种方式 任选(前提是该daemon在该host上运行)
    #ceph daemon /var/run/ceph/ceph-osd.1.asok  config show | grep -i osd_memory_target
    ceph daemon osd.1  config show | grep -i osd_memory_target

root@tkyhost002:~# ceph daemon /var/run/ceph/ceph-osd.1.asok  config show | grep osd_memory
    "osd_memory_base": "805306368",
    "osd_memory_cache_min": "134217728",
    "osd_memory_cache_resize_interval": "1.000000",
    "osd_memory_expected_fragmentation": "0.150000",
    "osd_memory_target": "4294967296",      # 单位B,字节
    "osd_memory_target_autotune": "false",
    "osd_memory_target_cgroup_limit_ratio": "0.800000",

限制OSD内存的三中方式(默认4GB)

# 前提:需要禁用OSD的内存自动调整功能(ceph16、ceph17默认禁用)
    ceph config set osd.0 osd_memory_target_autotune false      # 关闭特定OSD内存自动调整
    ceph config set osd osd_memory_target_autotune false        # 关闭所有OSD内存自动调整

# A、限制 osd 内存用量(重启osd后会失效,单位字节,默认4GB,值为4294967296)
    # 对一个osd的内存进行限制2G
    ceph tell osd.0 injectargs --osd_memory_target 2147483648
    # 对所有osd的内存进行限制2G
    ceph tell osd.* injectargs --osd_memory_target 2147483648

    # 对某节点上所有osd 执行内存2G限制
    for osd in $(ceph osd ls-tree $HOSTNAME); do ceph tell osd.$osd injectargs --osd_memory_target 2147483648; done

# B、限制 osd 内存(重启osd、重启host都不会失效、除非有其他配置文件)
    # ceph config set osd.0 osd_memory_target 2147483648
    ceph config set osd.0 osd_memory_target 2G

    # 若是要设置其他节点的osd,(感觉没必要,先放这儿)
    # ceph config set osd/host:HOSTNAME osd_memory_target 2G

    # 对某节点上所有osd 执行内存2G限制
    for osd in $(ceph osd ls-tree $HOSTNAME); do ceph config set osd.$osd osd_memory_target 2147483648; done

# C、限制 osd 内存(此方式、不是立即生效,需要重启ceph-osd.target)
    root@pve-ceph02:~# cat /etc/ceph/ceph.conf 
    [osd]
        osd_memory_target = 2147483648

使用OSD内存分析器

# 启用osd.0 内存分析器(重启osd后会还原为关闭状态)
    ceph tell osd.0 heap start_profiler

# 打印内存分析器收集到的堆栈占用数据
    # ceph tell osd.0 heap stats
    # ceph daemon osd.0 dump_mempools       # 或者查看内存pool信息

    root@3-ceph15-1:~# ceph daemon osd.0 heap stats
    osd.0 tcmalloc heap stats:------------------------------------------------
    MALLOC:       54264544 (   51.8 MiB) Bytes in use by application
    MALLOC: +            0 (    0.0 MiB) Bytes in page heap freelist
    MALLOC: +       656192 (    0.6 MiB) Bytes in central cache freelist
    MALLOC: +      2023424 (    1.9 MiB) Bytes in transfer cache freelist
    MALLOC: +      3275232 (    3.1 MiB) Bytes in thread cache freelists
    MALLOC: +      2752512 (    2.6 MiB) Bytes in malloc metadata
    MALLOC:   ------------
    MALLOC: =     62971904 (   60.1 MiB) Actual memory used (physical + swap)
    MALLOC: +     25116672 (   24.0 MiB) Bytes released to OS (aka unmapped)
    MALLOC:   ------------
    MALLOC: =     88088576 (   84.0 MiB) Virtual address space used
    MALLOC:
    MALLOC:            861              Spans in use
    MALLOC:             36              Thread heaps in use
    MALLOC:           8192              Tcmalloc page size
    ------------------------------------------------
    Call ReleaseFreeMemory() to release freelist memory to the OS (via madvise()).
    Bytes released to the OS take up virtual address space but no physical memory.

# 或者不打印,直接导出
    # ceph tell osd.0 heap dump
    ceph daemon osd.0 heap dump    # 会导出为 /var/log/ceph/osd.0.profile.0001.heap 这个文件,若多次执行,序号自增

# 查看内存占用情况
    # 需安装软件 yum install google-perftools ; apt install google-perftools
    #/usr/bin/pprof --text /usr/bin/ceph-osd /var/log/ceph/osd.0.profile.0001.heap |more
    /usr/bin/pprof-symbolize --text /usr/bin/ceph-osd /var/log/ceph/osd.0.profile.0001.heap

# 释放已经被tcmalloc占用但是没有被ceph占用的内存
    ceph tell osd.0 heap release

# 关闭osd.0 的内存分析器
    ceph tell osd.0 heap stop_profiler

OSD延迟相关

查看OSD的延迟,并排序

root@fuse01:~# ceph osd perf |sort -nk3
osd  commit_latency(ms)  apply_latency(ms)
  0                   5                  5
 14                   6                  6
  4                   7                  7
  5                   7                  7
 13                   8                  8
  3                   8                  8
  6                   9                  9
  9                   9                  9
  1                  10                 10
  7                  10                 10
  2                  12                 12
  8                  15                 15

查看OSD延迟的duration

#详细查看osd的处理延迟,(看queued_for_pg:操作已放入队列,有pg处理 和reached_pg:pg已经开始执行处理;这两个event的时间戳,差值是多少呢?)
    ceph daemon osd.2 dump_historic_ops | grep duration

#要关心op的duration字段,它表示了这个op处理的时间开支,一个完整的过程是从initiated开始到done结束
    ceph daemon osd.2 dump_historic_ops_by_duration
"description": "osd_op(client.5710671.0:911515330 5.169as0 5:5969a3d9:::a6d19067-0ebc-4320-b176-6f902d463f77.298537.1__shadow_.hLHnv2N2acmbbO99OVJlov-DCq0_B6l_1:head [call refcount.put] snapc 0=[] ondisk+write+known_if_redirected e75969)",
            "initiated_at": "2022-05-17 14:56:02.920176",
            "age": 533.331712,
            "duration": 0.496794,
            "type_data": {
                "flag_point": "commit sent; apply or cleanup",
                "client_info": {
                    "client": "client.5710671",
                    "client_addr": "10.95.154.134:0/4118870958",
                    "tid": 911515330
                },
                "events": [
                    {
                        "time": "2022-05-17 14:56:02.920176",
                        "event": "initiated"
                    },
                    {
                        "time": "2022-05-17 14:56:02.920176",
                        "event": "header_read"
                    },
                    {
                        "time": "2022-05-17 14:56:02.920185",
                        "event": "throttled"
                    },
                    {
                        "time": "2022-05-17 14:56:02.920213",
                        "event": "all_read"
                    },
                    {
                        "time": "2022-05-17 14:56:02.920276",
                        "event": "dispatched"
                    },
                    {
                        "time": "2022-05-17 14:56:02.920286",
                        "event": "queued_for_pg"
                    },
                    {
                        "time": "2022-05-17 14:56:02.929867",
                        "event": "reached_pg"
                    },
                    {
                        "time": "2022-05-17 14:56:02.939236",
                        "event": "started"
                    },
                    {
                        "time": "2022-05-17 14:56:02.940487",
                        "event": "sub_op_started"
                    },
                    {
                        "time": "2022-05-17 14:56:02.942894",
                        "event": "sub_op_committed"
                    },
                    {
                        "time": "2022-05-17 14:56:03.416938",
                        "event": "commit_sent"
                    },
                    {
                        "time": "2022-05-17 14:56:03.416970",
                        "event": "done"
                    }
                ]
这是一个完整的op处理流程,我们最关心的,是这个op的duration字段,它表示了这个op处理的时间开支,
从initiated到done,我们可以看到处理步骤中的每一步所消耗的时间,这个op从接收到处理完成返回,花费了0.496794s,这个时间算多吗?
总体来说有点多,它是一个ssd+HDD的osd,这个操作花费了差不多0.5s是算比较慢的了,看一下流程,发现多数时间在sub_op_committed,
这个过程表示的是将数据提交给磁盘到返回所花费的时间,算比较慢了,不过还没到slow req的程度,再看看它的磁盘情况
声明:本文为原创,作者为 辣条①号,转载时请保留本声明及附带文章链接:https://boke.wsfnk.com/archives/1169.html
谢谢你请我吃辣条谢谢你请我吃辣条

如果文章对你有帮助,欢迎点击上方按钮打赏作者

最后编辑于:2023/7/7作者: 辣条①号

目标:网络规划设计师、系统工程师、ceph存储工程师、云计算工程师。 不负遇见,不谈亏欠!

暂无评论

发表回复

您的电子邮箱地址不会被公开。 必填项已用*标注

arrow grin ! ? cool roll eek evil razz mrgreen smile oops lol mad twisted wink idea cry shock neutral sad ???

文章目录