文章目录
查看CEPH相关服务默认配置
# 查看整个集群的配置
root@pve-ceph01:~# ceph-conf --show-config | grep osd_recovery
osd_recovery_cost = 20971520
osd_recovery_delay_start = 0.000000
osd_recovery_max_active = 0
osd_recovery_max_active_hdd = 3
osd_recovery_max_active_ssd = 10
osd_recovery_max_chunk = 8388608
osd_recovery_max_omap_entries_per_chunk = 8096
osd_recovery_max_single_start = 1
osd_recovery_op_priority = 3
osd_recovery_op_warn_multiple = 16
osd_recovery_priority = 5
osd_recovery_retry_interval = 30.000000
osd_recovery_sleep = 0.000000
osd_recovery_sleep_hdd = 0.100000
osd_recovery_sleep_hybrid = 0.025000
osd_recovery_sleep_ssd = 0.000000
# 查看特定OSD的配置
ceph config show osd.0
ceph daemon osd.0 config show
#ceph daemon /var/run/ceph/ceph-osd.0.asok config show
root@pve-ceph01:~# ceph config show osd.0
NAME VALUE SOURCE OVERRIDES IGNORES
auth_client_required cephx file
auth_cluster_required cephx file
auth_service_required cephx file
cluster_network 10.99.99.1/24 file
daemonize false override
keyring $osd_data/keyring default
leveldb_log default
mon_allow_pool_delete true file
mon_host 10.99.99.1 10.99.99.2 10.99.99.3 file
ms_bind_ipv4 true file
ms_bind_ipv6 false file
no_config_file false override
osd_delete_sleep 0.000000 override
OSD内存相关
查看OSD内存实际占用情况
root@fuse01:~# systemctl status ceph-osd@3
● ceph-osd@3.service - Ceph object storage daemon osd.3
Loaded: loaded (/lib/systemd/system/ceph-osd@.service; enabled-runtime; vendor preset: enabled)
Drop-In: /usr/lib/systemd/system/ceph-osd@.service.d
└─ceph-after-pve-cluster.conf
Active: active (running) since Mon 2023-07-03 20:09:25 CST; 3 days ago
Process: 5043 ExecStartPre=/usr/libexec/ceph/ceph-osd-prestart.sh --cluster ${CLUSTER} --id 3 (code=exited, status=0/SUCCESS)
Main PID: 5066 (ceph-osd)
Tasks: 64
Memory: 7.6G
CPU: 4h 13min 30.816s
CGroup: /system.slice/system-ceph\x2dosd.slice/ceph-osd@3.service
└─5066 /usr/bin/ceph-osd -f --cluster ceph --id 3 --setuser ceph --setgroup ceph
查看OSD默认内存设置
# 两种方式 任选(前提是该daemon在该host上运行)
#ceph daemon /var/run/ceph/ceph-osd.1.asok config show | grep -i osd_memory_target
ceph daemon osd.1 config show | grep -i osd_memory_target
root@tkyhost002:~# ceph daemon /var/run/ceph/ceph-osd.1.asok config show | grep osd_memory
"osd_memory_base": "805306368",
"osd_memory_cache_min": "134217728",
"osd_memory_cache_resize_interval": "1.000000",
"osd_memory_expected_fragmentation": "0.150000",
"osd_memory_target": "4294967296", # 单位B,字节
"osd_memory_target_autotune": "false",
"osd_memory_target_cgroup_limit_ratio": "0.800000",
限制OSD内存的三中方式(默认4GB)
# 前提:需要禁用OSD的内存自动调整功能(ceph16、ceph17默认禁用)
ceph config set osd.0 osd_memory_target_autotune false # 关闭特定OSD内存自动调整
ceph config set osd osd_memory_target_autotune false # 关闭所有OSD内存自动调整
# A、限制 osd 内存用量(重启osd后会失效,单位字节,默认4GB,值为4294967296)
# 对一个osd的内存进行限制2G
ceph tell osd.0 injectargs --osd_memory_target 2147483648
# 对所有osd的内存进行限制2G
ceph tell osd.* injectargs --osd_memory_target 2147483648
# 对某节点上所有osd 执行内存2G限制
for osd in $(ceph osd ls-tree $HOSTNAME); do ceph tell osd.$osd injectargs --osd_memory_target 2147483648; done
# B、限制 osd 内存(重启osd、重启host都不会失效、除非有其他配置文件)
# ceph config set osd.0 osd_memory_target 2147483648
ceph config set osd.0 osd_memory_target 2G
# 若是要设置其他节点的osd,(感觉没必要,先放这儿)
# ceph config set osd/host:HOSTNAME osd_memory_target 2G
# 对某节点上所有osd 执行内存2G限制
for osd in $(ceph osd ls-tree $HOSTNAME); do ceph config set osd.$osd osd_memory_target 2147483648; done
# C、限制 osd 内存(此方式、不是立即生效,需要重启ceph-osd.target)
root@pve-ceph02:~# cat /etc/ceph/ceph.conf
[osd]
osd_memory_target = 2147483648
使用OSD内存分析器
# 启用osd.0 内存分析器(重启osd后会还原为关闭状态)
ceph tell osd.0 heap start_profiler
# 打印内存分析器收集到的堆栈占用数据
# ceph tell osd.0 heap stats
# ceph daemon osd.0 dump_mempools # 或者查看内存pool信息
root@3-ceph15-1:~# ceph daemon osd.0 heap stats
osd.0 tcmalloc heap stats:------------------------------------------------
MALLOC: 54264544 ( 51.8 MiB) Bytes in use by application
MALLOC: + 0 ( 0.0 MiB) Bytes in page heap freelist
MALLOC: + 656192 ( 0.6 MiB) Bytes in central cache freelist
MALLOC: + 2023424 ( 1.9 MiB) Bytes in transfer cache freelist
MALLOC: + 3275232 ( 3.1 MiB) Bytes in thread cache freelists
MALLOC: + 2752512 ( 2.6 MiB) Bytes in malloc metadata
MALLOC: ------------
MALLOC: = 62971904 ( 60.1 MiB) Actual memory used (physical + swap)
MALLOC: + 25116672 ( 24.0 MiB) Bytes released to OS (aka unmapped)
MALLOC: ------------
MALLOC: = 88088576 ( 84.0 MiB) Virtual address space used
MALLOC:
MALLOC: 861 Spans in use
MALLOC: 36 Thread heaps in use
MALLOC: 8192 Tcmalloc page size
------------------------------------------------
Call ReleaseFreeMemory() to release freelist memory to the OS (via madvise()).
Bytes released to the OS take up virtual address space but no physical memory.
# 或者不打印,直接导出
# ceph tell osd.0 heap dump
ceph daemon osd.0 heap dump # 会导出为 /var/log/ceph/osd.0.profile.0001.heap 这个文件,若多次执行,序号自增
# 查看内存占用情况
# 需安装软件 yum install google-perftools ; apt install google-perftools
#/usr/bin/pprof --text /usr/bin/ceph-osd /var/log/ceph/osd.0.profile.0001.heap |more
/usr/bin/pprof-symbolize --text /usr/bin/ceph-osd /var/log/ceph/osd.0.profile.0001.heap
# 释放已经被tcmalloc占用但是没有被ceph占用的内存
ceph tell osd.0 heap release
# 关闭osd.0 的内存分析器
ceph tell osd.0 heap stop_profiler
OSD延迟相关
查看OSD的延迟,并排序
root@fuse01:~# ceph osd perf |sort -nk3
osd commit_latency(ms) apply_latency(ms)
0 5 5
14 6 6
4 7 7
5 7 7
13 8 8
3 8 8
6 9 9
9 9 9
1 10 10
7 10 10
2 12 12
8 15 15
查看OSD延迟的duration
#详细查看osd的处理延迟,(看queued_for_pg:操作已放入队列,有pg处理 和reached_pg:pg已经开始执行处理;这两个event的时间戳,差值是多少呢?)
ceph daemon osd.2 dump_historic_ops | grep duration
#要关心op的duration字段,它表示了这个op处理的时间开支,一个完整的过程是从initiated开始到done结束
ceph daemon osd.2 dump_historic_ops_by_duration
"description": "osd_op(client.5710671.0:911515330 5.169as0 5:5969a3d9:::a6d19067-0ebc-4320-b176-6f902d463f77.298537.1__shadow_.hLHnv2N2acmbbO99OVJlov-DCq0_B6l_1:head [call refcount.put] snapc 0=[] ondisk+write+known_if_redirected e75969)",
"initiated_at": "2022-05-17 14:56:02.920176",
"age": 533.331712,
"duration": 0.496794,
"type_data": {
"flag_point": "commit sent; apply or cleanup",
"client_info": {
"client": "client.5710671",
"client_addr": "10.95.154.134:0/4118870958",
"tid": 911515330
},
"events": [
{
"time": "2022-05-17 14:56:02.920176",
"event": "initiated"
},
{
"time": "2022-05-17 14:56:02.920176",
"event": "header_read"
},
{
"time": "2022-05-17 14:56:02.920185",
"event": "throttled"
},
{
"time": "2022-05-17 14:56:02.920213",
"event": "all_read"
},
{
"time": "2022-05-17 14:56:02.920276",
"event": "dispatched"
},
{
"time": "2022-05-17 14:56:02.920286",
"event": "queued_for_pg"
},
{
"time": "2022-05-17 14:56:02.929867",
"event": "reached_pg"
},
{
"time": "2022-05-17 14:56:02.939236",
"event": "started"
},
{
"time": "2022-05-17 14:56:02.940487",
"event": "sub_op_started"
},
{
"time": "2022-05-17 14:56:02.942894",
"event": "sub_op_committed"
},
{
"time": "2022-05-17 14:56:03.416938",
"event": "commit_sent"
},
{
"time": "2022-05-17 14:56:03.416970",
"event": "done"
}
]
这是一个完整的op处理流程,我们最关心的,是这个op的duration字段,它表示了这个op处理的时间开支,
从initiated到done,我们可以看到处理步骤中的每一步所消耗的时间,这个op从接收到处理完成返回,花费了0.496794s,这个时间算多吗?
总体来说有点多,它是一个ssd+HDD的osd,这个操作花费了差不多0.5s是算比较慢的了,看一下流程,发现多数时间在sub_op_committed,
这个过程表示的是将数据提交给磁盘到返回所花费的时间,算比较慢了,不过还没到slow req的程度,再看看它的磁盘情况
如果文章对你有帮助,欢迎点击上方按钮打赏作者
暂无评论