commit 321b8cb7e3a6e4690ce3aac090f9760ff312fffc Author: Chinasubbareddy Mallavarapu Date: Tue Oct 13 17:19:47 2020 +0000 [ceph-osd] Logic improvement for used osd disk detection This is to improve the logic to detect used osd disks so that scripts will not zap the osd disks agressively. also adding debugging mode for pvdisplay commands to capture more logs during failure scenarios along with reading osd force repair flag from values. Change-Id: Id2996211dd92ac963ad531f8671a7cc8f7b7d2d5 diff --git a/ceph-osd/Chart.yaml b/ceph-osd/Chart.yaml index abd9da7..fb625d3 100644 --- a/ceph-osd/Chart.yaml +++ b/ceph-osd/Chart.yaml @@ -15,6 +15,6 @@ apiVersion: v1 appVersion: v1.0.0 description: OpenStack-Helm Ceph OSD name: ceph-osd -version: 0.1.5 +version: 0.1.6 home: https://github.com/ceph/ceph ... diff --git a/ceph-osd/templates/bin/osd/ceph-volume/_common.sh.tpl b/ceph-osd/templates/bin/osd/ceph-volume/_common.sh.tpl index 8476e9a..7ee57a9 100644 --- a/ceph-osd/templates/bin/osd/ceph-volume/_common.sh.tpl +++ b/ceph-osd/templates/bin/osd/ceph-volume/_common.sh.tpl @@ -248,7 +248,7 @@ function disk_zap { locked lvremove -y ${logical_volume} fi done - local volume_group=$(pvdisplay ${device} | grep "VG Name" | awk '/ceph/{print $3}' | grep "ceph") + local volume_group=$(pvdisplay -ddd -v ${device} | grep "VG Name" | awk '/ceph/{print $3}' | grep "ceph") if [[ ${volume_group} ]]; then vgremove -y ${volume_group} pvremove -y ${device} @@ -262,6 +262,7 @@ function disk_zap { function udev_settle { osd_devices="${OSD_DEVICE}" + udevadm settle --timeout=600 partprobe "${OSD_DEVICE}" locked pvscan --cache locked vgscan --cache @@ -273,7 +274,7 @@ function udev_settle { local block_db="$BLOCK_DB" local db_vg="$(echo $block_db | cut -d'/' -f1)" if [ ! -z "$db_vg" ]; then - block_db=$(locked pvdisplay | grep -B1 "$db_vg" | awk '/PV Name/{print $3}') + block_db=$(locked pvdisplay -ddd -v | grep -B1 "$db_vg" | awk '/PV Name/{print $3}') fi locked partprobe "${block_db}" fi @@ -283,7 +284,7 @@ function udev_settle { local block_wal="$BLOCK_WAL" local wal_vg="$(echo $block_wal | cut -d'/' -f1)" if [ ! -z "$wal_vg" ]; then - block_wal=$(locked pvdisplay | grep -B1 "$wal_vg" | awk '/PV Name/{print $3}') + block_wal=$(locked pvdisplay -ddd -v | grep -B1 "$wal_vg" | awk '/PV Name/{print $3}') fi locked partprobe "${block_wal}" fi @@ -319,7 +320,7 @@ function udev_settle { function get_lv_from_device { device="$1" - locked pvdisplay -m ${device} | awk '/Logical volume/{print $3}' + locked pvdisplay -ddd -v -m ${device} | awk '/Logical volume/{print $3}' } # Helper function to get an lvm tag from a logical volume @@ -431,7 +432,7 @@ function get_lvm_path_from_device { function get_vg_name_from_device { device="$1" - pv_uuid=$(pvdisplay ${device} | awk '/PV UUID/{print $3}') + pv_uuid=$(pvdisplay -ddd -v ${device} | awk '/PV UUID/{print $3}') if [[ "${pv_uuid}" ]]; then echo "ceph-vg-${pv_uuid}" @@ -441,7 +442,7 @@ function get_vg_name_from_device { function get_lv_name_from_device { device="$1" device_type="$2" - pv_uuid=$(pvdisplay ${device} | awk '/PV UUID/{print $3}') + pv_uuid=$(pvdisplay -ddd -v ${device} | awk '/PV UUID/{print $3}') if [[ "${pv_uuid}" ]]; then echo "ceph-${device_type}-${pv_uuid}" diff --git a/ceph-osd/templates/bin/osd/ceph-volume/_init-with-ceph-volume.sh.tpl b/ceph-osd/templates/bin/osd/ceph-volume/_init-with-ceph-volume.sh.tpl index 4bde715..deeec10 100644 --- a/ceph-osd/templates/bin/osd/ceph-volume/_init-with-ceph-volume.sh.tpl +++ b/ceph-osd/templates/bin/osd/ceph-volume/_init-with-ceph-volume.sh.tpl @@ -18,7 +18,7 @@ set -ex source /tmp/osd-common-ceph-volume.sh -: "${OSD_FORCE_REPAIR:=1}" +: "${OSD_FORCE_REPAIR:=0}" # We do not want to zap journal disk. Tracking this option seperatly. : "${JOURNAL_FORCE_ZAP:=0}" @@ -41,7 +41,7 @@ fi # Renames a single VG if necessary function rename_vg { local physical_disk=$1 - local old_vg_name=$(locked pvdisplay ${physical_disk} | awk '/VG Name/{print $3}') + local old_vg_name=$(locked pvdisplay -ddd -v ${physical_disk} | awk '/VG Name/{print $3}') local vg_name=$(get_vg_name_from_device ${physical_disk}) if [[ "${old_vg_name}" ]] && [[ "${vg_name}" != "${old_vg_name}" ]]; then @@ -52,7 +52,7 @@ function rename_vg { # Renames all LVs associated with an OSD as necesasry function rename_lvs { local data_disk=$1 - local vg_name=$(locked pvdisplay ${data_disk} | awk '/VG Name/{print $3}') + local vg_name=$(locked pvdisplay -ddd -v ${data_disk} | awk '/VG Name/{print $3}') if [[ "${vg_name}" ]]; then # Rename the OSD volume if necessary @@ -104,7 +104,7 @@ function rename_lvs { # renaming should be completed prior to calling this function update_lv_tags { local data_disk=$1 - local pv_uuid=$(pvdisplay ${data_disk} | awk '/PV UUID/{print $3}') + local pv_uuid=$(pvdisplay -ddd -v ${data_disk} | awk '/PV UUID/{print $3}') if [[ "${pv_uuid}" ]]; then local volumes="$(lvs --no-headings | grep -e "${pv_uuid}")" @@ -289,6 +289,8 @@ function osd_disk_prepare { elif [[ $(sgdisk --print ${OSD_DEVICE} | grep "F800") ]]; then DM_DEV=${OSD_DEVICE}$(sgdisk --print ${OSD_DEVICE} | grep "F800" | awk '{print $1}') CEPH_DISK_USED=1 + elif [[ $(lsblk ${OSD_DEVICE}|grep -i ceph) ]]; then + CEPH_DISK_USED=1 else dm_lv_name="$(get_lv_name_from_device ${OSD_DEVICE} lv | sed 's/-/--/g')" if [[ ! -z "${dm_lv_name}" ]] && [[ ! -z "$(dmsetup ls | grep ${dm_lv_name})" ]]; then @@ -422,7 +424,7 @@ function osd_disk_prepare { global_locked prep_device "${BLOCK_DB}" "${BLOCK_DB_SIZE}" "db" "${OSD_DEVICE}" fi else - if pvdisplay ${OSD_DEVICE} | awk '/VG Name/{print $3}' | grep "ceph"; then + if pvdisplay -ddd -v ${OSD_DEVICE} | awk '/VG Name/{print $3}' | grep "ceph"; then CEPH_LVM_PREPARE=0 fi fi diff --git a/ceph-osd/templates/daemonset-osd.yaml b/ceph-osd/templates/daemonset-osd.yaml index 2e3edd1..03c1080 100644 --- a/ceph-osd/templates/daemonset-osd.yaml +++ b/ceph-osd/templates/daemonset-osd.yaml @@ -187,6 +187,8 @@ spec: value: "ceph" - name: DEPLOY_TOOL value: {{ .Values.deploy.tool }} + - name: OSD_FORCE_REPAIR + value: {{ .Values.deploy.osd_force_repair | quote }} - name: CEPH_GET_ADMIN_KEY value: "1" - name: NAMESPACE diff --git a/ceph-osd/values.yaml b/ceph-osd/values.yaml index a691d2a..c49b321 100644 --- a/ceph-osd/values.yaml +++ b/ceph-osd/values.yaml @@ -47,6 +47,8 @@ labels: # osds, need to change this after moving the gates to disk backed osd. deploy: tool: "ceph-volume" +# NOTE: set this to 1 if osd disk needs wiping in case of reusing from previous deployment + osd_force_repair: 1 pod: security_context: