commit c52a89ae04e1586afaed93929ffdcd0e5614c93d Author: Michal Nasiadka Date: Wed Aug 14 11:15:33 2019 +0000 Use Docker healthchecks for core services This change enables the use of Docker healthchecks for core OpenStack services. Also check-failures.sh has been updated to treat containers with unhealthy status as failed. Implements: blueprint container-health-check Change-Id: I79c6b11511ce8af70f77e2f6a490b59b477fefbb diff --git a/ansible/group_vars/all.yml b/ansible/group_vars/all.yml index 6646acb..17a18cf 100644 --- a/ansible/group_vars/all.yml +++ b/ansible/group_vars/all.yml @@ -152,6 +152,16 @@ docker_common_options: # Dimension options for Docker Containers default_container_dimensions: {} +##################### +# Healthcheck options +##################### +enable_container_healthchecks: "yes" +# Healthcheck options for Docker containers +# interval/timeout/start_period are in seconds +default_container_healthcheck_interval: 30 +default_container_healthcheck_timeout: 30 +default_container_healthcheck_retries: 3 +default_container_healthcheck_start_period: 5 ####################### # Extra volumes options @@ -289,6 +299,7 @@ glance_internal_fqdn: "{{ kolla_internal_fqdn }}" glance_external_fqdn: "{{ kolla_external_fqdn }}" glance_api_port: "9292" glance_api_listen_port: "{{ glance_api_port }}" +glance_tls_proxy_stats_port: "9293" gnocchi_internal_fqdn: "{{ kolla_internal_fqdn }}" gnocchi_external_fqdn: "{{ kolla_external_fqdn }}" diff --git a/ansible/roles/glance/defaults/main.yml b/ansible/roles/glance/defaults/main.yml index 7f70139..0e6191b 100644 --- a/ansible/roles/glance/defaults/main.yml +++ b/ansible/roles/glance/defaults/main.yml @@ -12,6 +12,7 @@ glance_services: privileged: "{{ enable_cinder | bool and enable_cinder_backend_iscsi | bool }}" volumes: "{{ glance_api_default_volumes + glance_api_extra_volumes }}" dimensions: "{{ glance_api_dimensions }}" + healthcheck: "{{ glance_api_healthcheck }}" haproxy: glance_api: enabled: "{{ enable_glance | bool and not glance_enable_tls_backend | bool }}" @@ -41,6 +42,7 @@ glance_services: image: "{{ glance_tls_proxy_image_full }}" volumes: "{{ glance_tls_proxy_default_volumes + glance_tls_proxy_extra_volumes }}" dimensions: "{{ glance_tls_proxy_dimensions }}" + healthcheck: "{{ glance_tls_proxy_healthcheck }}" haproxy: glance_tls_proxy: enabled: "{{ enable_glance | bool and glance_enable_tls_backend | bool }}" @@ -130,6 +132,32 @@ glance_tls_proxy_image_full: "{{ glance_tls_proxy_image }}:{{ glance_tls_proxy_t glance_api_dimensions: "{{ default_container_dimensions }}" glance_tls_proxy_dimensions: "{{ default_container_dimensions }}" +glance_api_enable_healthchecks: "{{ enable_container_healthchecks }}" +glance_api_healthcheck_interval: "{{ default_container_healthcheck_interval }}" +glance_api_healthcheck_retries: "{{ default_container_healthcheck_retries }}" +glance_api_healthcheck_start_period: "{{ default_container_healthcheck_start_period }}" +glance_api_healthcheck_test: ["CMD-SHELL", "healthcheck_curl http://{% if glance_enable_tls_backend | bool %}localhost{% else %}{{ api_interface_address | put_address_in_context('url') }}{% endif %}:{{ glance_api_listen_port }}"] +glance_api_healthcheck_timeout: "{{ default_container_healthcheck_timeout }}" +glance_api_healthcheck: + interval: "{{ glance_api_healthcheck_interval }}" + retries: "{{ glance_api_healthcheck_retries }}" + start_period: "{{ glance_api_healthcheck_start_period }}" + test: "{% if glance_api_enable_healthchecks | bool %}{{ glance_api_healthcheck_test }}{% else %}NONE{% endif %}" + timeout: "{{ glance_api_healthcheck_timeout }}" + +glance_tls_proxy_enable_healthchecks: "{{ enable_container_healthchecks }}" +glance_tls_proxy_healthcheck_interval: "{{ default_container_healthcheck_interval }}" +glance_tls_proxy_healthcheck_retries: "{{ default_container_healthcheck_retries }}" +glance_tls_proxy_healthcheck_start_period: "{{ default_container_healthcheck_start_period }}" +glance_tls_proxy_healthcheck_test: ["CMD-SHELL", "healthcheck_curl -u {{ haproxy_user }}:{{ haproxy_password }} {{ api_interface_address | put_address_in_context('url') }}:{{ glance_tls_proxy_stats_port }}"] +glance_tls_proxy_healthcheck_timeout: "{{ default_container_healthcheck_timeout }}" +glance_tls_proxy_healthcheck: + interval: "{{ glance_tls_proxy_healthcheck_interval }}" + retries: "{{ glance_tls_proxy_healthcheck_retries }}" + start_period: "{{ glance_tls_proxy_healthcheck_start_period }}" + test: "{% if glance_tls_proxy_enable_healthchecks | bool %}{{ glance_tls_proxy_healthcheck_test }}{% else %}NONE{% endif %}" + timeout: "{{ glance_tls_proxy_healthcheck_timeout }}" + glance_api_default_volumes: - "{{ node_config_directory }}/glance-api/:{{ container_config_directory }}/:ro" - "/etc/localtime:/etc/localtime:ro" diff --git a/ansible/roles/glance/handlers/main.yml b/ansible/roles/glance/handlers/main.yml index 5abf923..a866553 100644 --- a/ansible/roles/glance/handlers/main.yml +++ b/ansible/roles/glance/handlers/main.yml @@ -13,6 +13,7 @@ environment: "{{ service.environment }}" volumes: "{{ service.volumes|reject('equalto', '')|list }}" dimensions: "{{ service.dimensions }}" + healthcheck: "{{ service.healthcheck | default(omit) }}" when: - kolla_action != "config" @@ -28,5 +29,6 @@ image: "{{ service.image }}" volumes: "{{ service.volumes|reject('equalto', '')|list }}" dimensions: "{{ service.dimensions }}" + healthcheck: "{{ service.healthcheck | default(omit) }}" when: - kolla_action != "config" diff --git a/ansible/roles/glance/tasks/check-containers.yml b/ansible/roles/glance/tasks/check-containers.yml index 548fff7..ae3dd22 100644 --- a/ansible/roles/glance/tasks/check-containers.yml +++ b/ansible/roles/glance/tasks/check-containers.yml @@ -10,6 +10,7 @@ environment: "{{ item.value.environment | default(omit) }}" volumes: "{{ item.value.volumes|reject('equalto', '')|list }}" dimensions: "{{ item.value.dimensions }}" + healthcheck: "{{ item.value.healthcheck | default(omit) }}" when: - item.value.host_in_groups | bool - item.value.enabled | bool diff --git a/ansible/roles/glance/templates/glance-tls-proxy.cfg.j2 b/ansible/roles/glance/templates/glance-tls-proxy.cfg.j2 index ef86c22..18e29e9 100644 --- a/ansible/roles/glance/templates/glance-tls-proxy.cfg.j2 +++ b/ansible/roles/glance/templates/glance-tls-proxy.cfg.j2 @@ -30,6 +30,15 @@ defaults balance {{ glance_tls_proxy_defaults_balance }} maxconn {{ glance_tls_proxy_defaults_max_connections }} +listen stats + bind {{ api_interface_address }}:{{ glance_tls_proxy_stats_port }} + mode http + stats enable + stats uri / + stats refresh 15s + stats realm Haproxy\ Stats + stats auth {{ haproxy_user }}:{{ haproxy_password }} + frontend glance_backend_tls bind {{ api_interface_address }}:{{ glance_api_listen_port }} ssl crt /etc/glance/certs/glance-cert-and-key.pem default_backend glance_api diff --git a/ansible/roles/heat/defaults/main.yml b/ansible/roles/heat/defaults/main.yml index 5ec8c20..e725548 100644 --- a/ansible/roles/heat/defaults/main.yml +++ b/ansible/roles/heat/defaults/main.yml @@ -9,6 +9,7 @@ heat_services: image: "{{ heat_api_image_full }}" volumes: "{{ heat_api_default_volumes + heat_api_extra_volumes }}" dimensions: "{{ heat_api_dimensions }}" + healthcheck: "{{ heat_api_healthcheck }}" haproxy: heat_api: enabled: "{{ enable_heat }}" @@ -31,6 +32,7 @@ heat_services: image: "{{ heat_api_cfn_image_full }}" volumes: "{{ heat_api_cfn_default_volumes + heat_api_cfn_extra_volumes }}" dimensions: "{{ heat_api_cfn_dimensions }}" + healthcheck: "{{ heat_api_cfn_healthcheck }}" haproxy: heat_api_cfn: enabled: "{{ enable_heat }}" @@ -53,6 +55,7 @@ heat_services: image: "{{ heat_engine_image_full }}" volumes: "{{ heat_engine_default_volumes + heat_engine_extra_volumes }}" dimensions: "{{ heat_engine_dimensions }}" + healthcheck: "{{ heat_engine_healthcheck }}" #################### # Database @@ -84,6 +87,45 @@ heat_api_dimensions: "{{ default_container_dimensions }}" heat_api_cfn_dimensions: "{{ default_container_dimensions }}" heat_engine_dimensions: "{{ default_container_dimensions }}" +heat_api_enable_healthchecks: "{{ enable_container_healthchecks }}" +heat_api_healthcheck_interval: "{{ default_container_healthcheck_interval }}" +heat_api_healthcheck_retries: "{{ default_container_healthcheck_retries }}" +heat_api_healthcheck_start_period: "{{ default_container_healthcheck_start_period }}" +heat_api_healthcheck_test: ["CMD-SHELL", "healthcheck_curl {{ 'https' if heat_enable_tls_backend | bool else 'http' }}://{{ api_interface_address | put_address_in_context('url') }}:{{ heat_api_listen_port }}"] +heat_api_healthcheck_timeout: "{{ default_container_healthcheck_timeout }}" +heat_api_healthcheck: + interval: "{{ heat_api_healthcheck_interval }}" + retries: "{{ heat_api_healthcheck_retries }}" + start_period: "{{ heat_api_healthcheck_start_period }}" + test: "{% if heat_api_enable_healthchecks | bool %}{{ heat_api_healthcheck_test }}{% else %}NONE{% endif %}" + timeout: "{{ heat_api_healthcheck_timeout }}" + +heat_api_cfn_enable_healthchecks: "{{ enable_container_healthchecks }}" +heat_api_cfn_healthcheck_interval: "{{ default_container_healthcheck_interval }}" +heat_api_cfn_healthcheck_retries: "{{ default_container_healthcheck_retries }}" +heat_api_cfn_healthcheck_start_period: "{{ default_container_healthcheck_start_period }}" +heat_api_cfn_healthcheck_test: ["CMD-SHELL", "healthcheck_curl {{ 'https' if heat_enable_tls_backend | bool else 'http' }}://{{ api_interface_address | put_address_in_context('url') }}:{{ heat_api_cfn_listen_port }}"] +heat_api_cfn_healthcheck_timeout: "{{ default_container_healthcheck_timeout }}" +heat_api_cfn_healthcheck: + interval: "{{ heat_api_cfn_healthcheck_interval }}" + retries: "{{ heat_api_cfn_healthcheck_retries }}" + start_period: "{{ heat_api_cfn_healthcheck_start_period }}" + test: "{% if heat_api_cfn_enable_healthchecks | bool %}{{ heat_api_cfn_healthcheck_test }}{% else %}NONE{% endif %}" + timeout: "{{ heat_api_cfn_healthcheck_timeout }}" + +heat_engine_enable_healthchecks: "{{ enable_container_healthchecks }}" +heat_engine_healthcheck_interval: "{{ default_container_healthcheck_interval }}" +heat_engine_healthcheck_retries: "{{ default_container_healthcheck_retries }}" +heat_engine_healthcheck_start_period: "{{ default_container_healthcheck_start_period }}" +heat_engine_healthcheck_test: ["CMD-SHELL", "healthcheck_port heat-engine {{ om_rpc_port }}"] +heat_engine_healthcheck_timeout: "{{ default_container_healthcheck_timeout }}" +heat_engine_healthcheck: + interval: "{{ heat_engine_healthcheck_interval }}" + retries: "{{ heat_engine_healthcheck_retries }}" + start_period: "{{ heat_engine_healthcheck_start_period }}" + test: "{% if heat_engine_enable_healthchecks | bool %}{{ heat_engine_healthcheck_test }}{% else %}NONE{% endif %}" + timeout: "{{ heat_engine_healthcheck_timeout }}" + heat_api_default_volumes: - "{{ node_config_directory }}/heat-api/:{{ container_config_directory }}/:ro" - "/etc/localtime:/etc/localtime:ro" diff --git a/ansible/roles/heat/handlers/main.yml b/ansible/roles/heat/handlers/main.yml index f2a4a47..a21064e 100644 --- a/ansible/roles/heat/handlers/main.yml +++ b/ansible/roles/heat/handlers/main.yml @@ -11,6 +11,7 @@ image: "{{ service.image }}" volumes: "{{ service.volumes|reject('equalto', '')|list }}" dimensions: "{{ service.dimensions }}" + healthcheck: "{{ service.healthcheck | default(omit) }}" when: - kolla_action != "config" @@ -26,6 +27,7 @@ image: "{{ service.image }}" volumes: "{{ service.volumes|reject('equalto', '')|list }}" dimensions: "{{ service.dimensions }}" + healthcheck: "{{ service.healthcheck | default(omit) }}" when: - kolla_action != "config" @@ -41,5 +43,6 @@ image: "{{ service.image }}" volumes: "{{ service.volumes|reject('equalto', '')|list }}" dimensions: "{{ service.dimensions }}" + healthcheck: "{{ service.healthcheck | default(omit) }}" when: - kolla_action != "config" diff --git a/ansible/roles/heat/tasks/check-containers.yml b/ansible/roles/heat/tasks/check-containers.yml index 4852877..ba598bd 100644 --- a/ansible/roles/heat/tasks/check-containers.yml +++ b/ansible/roles/heat/tasks/check-containers.yml @@ -8,6 +8,7 @@ image: "{{ item.value.image }}" volumes: "{{ item.value.volumes|reject('equalto', '')|list }}" dimensions: "{{ item.value.dimensions }}" + healthcheck: "{{ item.value.healthcheck | default(omit) }}" when: - inventory_hostname in groups[item.value.group] - item.value.enabled | bool diff --git a/ansible/roles/horizon/defaults/main.yml b/ansible/roles/horizon/defaults/main.yml index 57d3f2b..c8698b1 100644 --- a/ansible/roles/horizon/defaults/main.yml +++ b/ansible/roles/horizon/defaults/main.yml @@ -36,6 +36,7 @@ horizon_services: FORCE_GENERATE: "{{ 'yes' if horizon_dev_mode | bool else 'no' }}" volumes: "{{ horizon_default_volumes + horizon_extra_volumes }}" dimensions: "{{ horizon_dimensions }}" + healthcheck: "{{ horizon_healthcheck }}" haproxy: horizon: enabled: "{{ enable_horizon }}" @@ -87,6 +88,19 @@ horizon_image_full: "{{ horizon_image }}:{{ horizon_tag }}" horizon_dimensions: "{{ default_container_dimensions }}" +horizon_enable_healthchecks: "{{ enable_container_healthchecks }}" +horizon_healthcheck_interval: "{{ default_container_healthcheck_interval }}" +horizon_healthcheck_retries: "{{ default_container_healthcheck_retries }}" +horizon_healthcheck_start_period: "{{ default_container_healthcheck_start_period }}" +horizon_healthcheck_test: ["CMD-SHELL", "healthcheck_curl {{ 'https' if horizon_enable_tls_backend | bool else 'http' }}://{{ api_interface_address | put_address_in_context('url') }}:{{ horizon_listen_port }}"] +horizon_healthcheck_timeout: "{{ default_container_healthcheck_timeout }}" +horizon_healthcheck: + interval: "{{ horizon_healthcheck_interval }}" + retries: "{{ horizon_healthcheck_retries }}" + start_period: "{{ horizon_healthcheck_start_period }}" + test: "{% if horizon_enable_healthchecks | bool %}{{ horizon_healthcheck_test }}{% else %}NONE{% endif %}" + timeout: "{{ horizon_healthcheck_timeout }}" + horizon_default_volumes: - "{{ node_config_directory }}/horizon/:{{ container_config_directory }}/:ro" - "{{ kolla_dev_repos_directory ~ '/horizon/horizon:/var/lib/kolla/venv/lib/python' ~ distro_python_version ~ '/site-packages/horizon' if horizon_dev_mode | bool else '' }}" diff --git a/ansible/roles/horizon/handlers/main.yml b/ansible/roles/horizon/handlers/main.yml index c1f7110..51b41dc 100644 --- a/ansible/roles/horizon/handlers/main.yml +++ b/ansible/roles/horizon/handlers/main.yml @@ -12,5 +12,6 @@ environment: "{{ service.environment }}" volumes: "{{ service.volumes }}" dimensions: "{{ service.dimensions }}" + healthcheck: "{{ service.healthcheck | default(omit) }}" when: - kolla_action != "config" diff --git a/ansible/roles/horizon/tasks/check-containers.yml b/ansible/roles/horizon/tasks/check-containers.yml index 24b0aae..8bec8a6 100644 --- a/ansible/roles/horizon/tasks/check-containers.yml +++ b/ansible/roles/horizon/tasks/check-containers.yml @@ -11,6 +11,7 @@ environment: "{{ horizon.environment }}" volumes: "{{ horizon.volumes }}" dimensions: "{{ horizon.dimensions }}" + healthcheck: "{{ horizon.healthcheck | default(omit) }}" when: - inventory_hostname in groups[horizon.group] - horizon.enabled | bool diff --git a/ansible/roles/keystone/defaults/main.yml b/ansible/roles/keystone/defaults/main.yml index 982d7e6..a8267f6 100644 --- a/ansible/roles/keystone/defaults/main.yml +++ b/ansible/roles/keystone/defaults/main.yml @@ -9,6 +9,7 @@ keystone_services: image: "{{ keystone_image_full }}" volumes: "{{ keystone_default_volumes + keystone_extra_volumes }}" dimensions: "{{ keystone_dimensions }}" + healthcheck: "{{ keystone_healthcheck }}" haproxy: keystone_internal: enabled: "{{ enable_keystone }}" @@ -43,6 +44,7 @@ keystone_services: - "kolla_logs:/var/log/kolla/" - "keystone_fernet_tokens:/etc/keystone/fernet-keys" dimensions: "{{ keystone_ssh_dimensions }}" + healthcheck: "{{ keystone_ssh_healthcheck }}" keystone-fernet: container_name: "keystone_fernet" group: "keystone" @@ -56,7 +58,6 @@ keystone_services: - "keystone_fernet_tokens:/etc/keystone/fernet-keys" dimensions: "{{ keystone_fernet_dimensions }}" - #################### # Database #################### @@ -94,6 +95,32 @@ keystone_dimensions: "{{ default_container_dimensions }}" keystone_fernet_dimensions: "{{ default_container_dimensions }}" keystone_ssh_dimensions: "{{ default_container_dimensions }}" +keystone_enable_healthchecks: "{{ enable_container_healthchecks }}" +keystone_healthcheck_interval: "{{ default_container_healthcheck_interval }}" +keystone_healthcheck_retries: "{{ default_container_healthcheck_retries }}" +keystone_healthcheck_start_period: "{{ default_container_healthcheck_start_period }}" +keystone_healthcheck_test: ["CMD-SHELL", "healthcheck_curl {{ 'https' if keystone_enable_tls_backend | bool else 'http' }}://{{ api_interface_address | put_address_in_context('url') }}:{{ keystone_public_listen_port }}"] +keystone_healthcheck_timeout: "{{ default_container_healthcheck_timeout }}" +keystone_healthcheck: + interval: "{{ keystone_healthcheck_interval }}" + retries: "{{ keystone_healthcheck_retries }}" + start_period: "{{ keystone_healthcheck_start_period }}" + test: "{% if keystone_enable_healthchecks | bool %}{{ keystone_healthcheck_test }}{% else %}NONE{% endif %}" + timeout: "{{ keystone_healthcheck_timeout }}" + +keystone_ssh_enable_healthchecks: "{{ enable_container_healthchecks }}" +keystone_ssh_healthcheck_interval: "{{ default_container_healthcheck_interval }}" +keystone_ssh_healthcheck_retries: "{{ default_container_healthcheck_retries }}" +keystone_ssh_healthcheck_start_period: "{{ default_container_healthcheck_start_period }}" +keystone_ssh_healthcheck_test: ["CMD-SHELL", "healthcheck_listen sshd {{ keystone_ssh_port }}"] +keystone_ssh_healthcheck_timeout: "{{ default_container_healthcheck_timeout }}" +keystone_ssh_healthcheck: + interval: "{{ keystone_ssh_healthcheck_interval }}" + retries: "{{ keystone_ssh_healthcheck_retries }}" + start_period: "{{ keystone_ssh_healthcheck_start_period }}" + test: "{% if keystone_ssh_enable_healthchecks | bool %}{{ keystone_ssh_healthcheck_test }}{% else %}NONE{% endif %}" + timeout: "{{ keystone_ssh_healthcheck_timeout }}" + keystone_default_volumes: - "{{ node_config_directory }}/keystone/:{{ container_config_directory }}/:ro" - "/etc/localtime:/etc/localtime:ro" diff --git a/ansible/roles/keystone/handlers/main.yml b/ansible/roles/keystone/handlers/main.yml index b738906..8acb7e5 100644 --- a/ansible/roles/keystone/handlers/main.yml +++ b/ansible/roles/keystone/handlers/main.yml @@ -38,6 +38,7 @@ image: "{{ service.image }}" volumes: "{{ service.volumes|reject('equalto', '')|list }}" dimensions: "{{ service.dimensions }}" + healthcheck: "{{ service.healthcheck | default(omit) }}" when: - kolla_action != "config" @@ -53,6 +54,7 @@ image: "{{ service.image }}" volumes: "{{ service.volumes|reject('equalto', '')|list }}" dimensions: "{{ service.dimensions }}" + healthcheck: "{{ service.healthcheck | default(omit) }}" when: - kolla_action != "config" @@ -68,6 +70,7 @@ image: "{{ service.image }}" volumes: "{{ service.volumes|reject('equalto', '')|list }}" dimensions: "{{ service.dimensions }}" + healthcheck: "{{ service.healthcheck | default(omit) }}" when: - kolla_action != "config" diff --git a/ansible/roles/keystone/tasks/check-containers.yml b/ansible/roles/keystone/tasks/check-containers.yml index 9f9ad73..437495e 100644 --- a/ansible/roles/keystone/tasks/check-containers.yml +++ b/ansible/roles/keystone/tasks/check-containers.yml @@ -8,6 +8,7 @@ image: "{{ item.value.image }}" volumes: "{{ item.value.volumes|reject('equalto', '')|list }}" dimensions: "{{ item.value.dimensions }}" + healthcheck: "{{ item.value.healthcheck | default(omit) }}" when: - inventory_hostname in groups[item.value.group] - item.value.enabled | bool diff --git a/ansible/roles/neutron/defaults/main.yml b/ansible/roles/neutron/defaults/main.yml index af3fd83..c67d985 100644 --- a/ansible/roles/neutron/defaults/main.yml +++ b/ansible/roles/neutron/defaults/main.yml @@ -10,6 +10,7 @@ neutron_services: host_in_groups: "{{ inventory_hostname in groups['neutron-server'] }}" volumes: "{{ neutron_server_default_volumes + neutron_server_extra_volumes }}" dimensions: "{{ neutron_server_dimensions }}" + healthcheck: "{{ neutron_server_healthcheck }}" haproxy: neutron_server: enabled: "{{ enable_neutron }}" @@ -47,6 +48,7 @@ neutron_services: }} volumes: "{{ neutron_openvswitch_agent_default_volumes + neutron_openvswitch_agent_extra_volumes }}" dimensions: "{{ neutron_openvswitch_agent_dimensions }}" + healthcheck: "{{ neutron_openvswitch_agent_healthcheck }}" neutron-linuxbridge-agent: container_name: "neutron_linuxbridge_agent" image: "{{ neutron_linuxbridge_agent_image_full }}" @@ -64,6 +66,7 @@ neutron_services: }} volumes: "{{ neutron_linuxbridge_agent_default_volumes + neutron_linuxbridge_agent_extra_volumes }}" dimensions: "{{ neutron_linuxbridge_agent_dimensions }}" + healthcheck: "{{ neutron_linuxbridge_agent_healthcheck }}" neutron-dhcp-agent: container_name: "neutron_dhcp_agent" image: "{{ neutron_dhcp_agent_image_full }}" @@ -73,6 +76,7 @@ neutron_services: host_in_groups: "{{ inventory_hostname in groups['neutron-dhcp-agent'] }}" volumes: "{{ neutron_dhcp_agent_default_volumes + neutron_dhcp_agent_extra_volumes }}" dimensions: "{{ neutron_dhcp_agent_dimensions }}" + healthcheck: "{{ neutron_dhcp_agent_healthcheck }}" neutron-l3-agent: container_name: "neutron_l3_agent" image: "{{ neutron_l3_agent_image_full }}" @@ -87,6 +91,7 @@ neutron_services: }} volumes: "{{ neutron_l3_agent_default_volumes + neutron_l3_agent_extra_volumes }}" dimensions: "{{ neutron_l3_agent_dimensions }}" + healthcheck: "{{ neutron_l3_agent_healthcheck }}" neutron-sriov-agent: container_name: "neutron_sriov_agent" image: "{{ neutron_sriov_agent_image_full }}" @@ -95,6 +100,7 @@ neutron_services: host_in_groups: "{{ inventory_hostname in groups['compute'] }}" volumes: "{{ neutron_sriov_agent_default_volumes + neutron_sriov_agent_extra_volumes }}" dimensions: "{{ neutron_sriov_agent_dimensions }}" + healthcheck: "{{ neutron_sriov_agent_healthcheck }}" neutron-mlnx-agent: container_name: "neutron_mlnx_agent" image: "{{ neutron_mlnx_agent_image_full }}" @@ -122,6 +128,7 @@ neutron_services: }} volumes: "{{ neutron_metadata_agent_default_volumes + neutron_metadata_agent_extra_volumes }}" dimensions: "{{ neutron_metadata_agent_dimensions }}" + healthcheck: "{{ neutron_metadata_agent_healthcheck }}" neutron-ovn-metadata-agent: container_name: "neutron_ovn_metadata_agent" image: "{{ neutron_ovn_metadata_agent_image_full }}" @@ -130,6 +137,7 @@ neutron_services: host_in_groups: "{{ inventory_hostname in groups['neutron-ovn-metadata-agent'] }}" volumes: "{{ neutron_ovn_metadata_agent_default_volumes + neutron_ovn_metadata_agent_extra_volumes }}" dimensions: "{{ neutron_ovn_metadata_agent_dimensions }}" + healthcheck: "{{ neutron_ovn_metadata_agent_healthcheck }}" neutron-bgp-dragent: container_name: "neutron_bgp_dragent" image: "{{ neutron_bgp_dragent_image_full }}" @@ -254,6 +262,109 @@ neutron_infoblox_ipam_agent_dimensions: "{{ default_container_dimensions }}" neutron_metering_agent_dimensions: "{{ neutron_agent_dimensions }}" ironic_neutron_agent_dimensions: "{{ default_container_dimensions }}" +neutron_dhcp_agent_enable_healthchecks: "{{ enable_container_healthchecks }}" +neutron_dhcp_agent_healthcheck_interval: "{{ default_container_healthcheck_interval }}" +neutron_dhcp_agent_healthcheck_retries: "{{ default_container_healthcheck_retries }}" +neutron_dhcp_agent_healthcheck_start_period: "{{ default_container_healthcheck_start_period }}" +neutron_dhcp_agent_healthcheck_test: ["CMD-SHELL", "healthcheck_port neutron-dhcp-agent {{ om_rpc_port }}"] +neutron_dhcp_agent_healthcheck_timeout: "{{ default_container_healthcheck_timeout }}" +neutron_dhcp_agent_healthcheck: + interval: "{{ neutron_dhcp_agent_healthcheck_interval }}" + retries: "{{ neutron_dhcp_agent_healthcheck_retries }}" + start_period: "{{ neutron_dhcp_agent_healthcheck_start_period }}" + test: "{% if neutron_dhcp_agent_enable_healthchecks | bool %}{{ neutron_dhcp_agent_healthcheck_test }}{% else %}NONE{% endif %}" + timeout: "{{ neutron_dhcp_agent_healthcheck_timeout }}" + +neutron_l3_agent_enable_healthchecks: "{{ enable_container_healthchecks }}" +neutron_l3_agent_healthcheck_interval: "{{ default_container_healthcheck_interval }}" +neutron_l3_agent_healthcheck_retries: "{{ default_container_healthcheck_retries }}" +neutron_l3_agent_healthcheck_start_period: "{{ default_container_healthcheck_start_period }}" +neutron_l3_agent_healthcheck_test: ["CMD-SHELL", "healthcheck_port 'neutron-l3-agent ' {{ om_rpc_port }}"] +neutron_l3_agent_healthcheck_timeout: "{{ default_container_healthcheck_timeout }}" +neutron_l3_agent_healthcheck: + interval: "{{ neutron_l3_agent_healthcheck_interval }}" + retries: "{{ neutron_l3_agent_healthcheck_retries }}" + start_period: "{{ neutron_l3_agent_healthcheck_start_period }}" + test: "{% if neutron_l3_agent_enable_healthchecks | bool %}{{ neutron_l3_agent_healthcheck_test }}{% else %}NONE{% endif %}" + timeout: "{{ neutron_l3_agent_healthcheck_timeout }}" + +neutron_linuxbridge_agent_enable_healthchecks: "{{ enable_container_healthchecks }}" +neutron_linuxbridge_agent_healthcheck_interval: "{{ default_container_healthcheck_interval }}" +neutron_linuxbridge_agent_healthcheck_retries: "{{ default_container_healthcheck_retries }}" +neutron_linuxbridge_agent_healthcheck_start_period: "{{ default_container_healthcheck_start_period }}" +neutron_linuxbridge_agent_healthcheck_test: ["CMD-SHELL", "healthcheck_port neutron-linuxbridge-agent {{ om_rpc_port }}"] +neutron_linuxbridge_agent_healthcheck_timeout: "{{ default_container_healthcheck_timeout }}" +neutron_linuxbridge_agent_healthcheck: + interval: "{{ neutron_linuxbridge_agent_healthcheck_interval }}" + retries: "{{ neutron_linuxbridge_agent_healthcheck_retries }}" + start_period: "{{ neutron_linuxbridge_agent_healthcheck_start_period }}" + test: "{% if neutron_linuxbridge_agent_enable_healthchecks | bool %}{{ neutron_linuxbridge_agent_healthcheck_test }}{% else %}NONE{% endif %}" + timeout: "{{ neutron_linuxbridge_agent_healthcheck_timeout }}" + +neutron_metadata_agent_enable_healthchecks: "{{ enable_container_healthchecks }}" +neutron_metadata_agent_healthcheck_interval: "{{ default_container_healthcheck_interval }}" +neutron_metadata_agent_healthcheck_retries: "{{ default_container_healthcheck_retries }}" +neutron_metadata_agent_healthcheck_start_period: "{{ default_container_healthcheck_start_period }}" +neutron_metadata_agent_healthcheck_test: ["CMD-SHELL", "healthcheck_port neutron-metadata-agent {{ om_rpc_port }}"] +neutron_metadata_agent_healthcheck_timeout: "{{ default_container_healthcheck_timeout }}" +neutron_metadata_agent_healthcheck: + interval: "{{ neutron_metadata_agent_healthcheck_interval }}" + retries: "{{ neutron_metadata_agent_healthcheck_retries }}" + start_period: "{{ neutron_metadata_agent_healthcheck_start_period }}" + test: "{% if neutron_metadata_agent_enable_healthchecks | bool %}{{ neutron_metadata_agent_healthcheck_test }}{% else %}NONE{% endif %}" + timeout: "{{ neutron_metadata_agent_healthcheck_timeout }}" + +neutron_openvswitch_agent_enable_healthchecks: "{{ enable_container_healthchecks }}" +neutron_openvswitch_agent_healthcheck_interval: "{{ default_container_healthcheck_interval }}" +neutron_openvswitch_agent_healthcheck_retries: "{{ default_container_healthcheck_retries }}" +neutron_openvswitch_agent_healthcheck_start_period: "{{ default_container_healthcheck_start_period }}" +neutron_openvswitch_agent_healthcheck_test: ["CMD-SHELL", "healthcheck_port neutron-openvswitch-agent {{ om_rpc_port }}"] +neutron_openvswitch_agent_healthcheck_timeout: "{{ default_container_healthcheck_timeout }}" +neutron_openvswitch_agent_healthcheck: + interval: "{{ neutron_openvswitch_agent_healthcheck_interval }}" + retries: "{{ neutron_openvswitch_agent_healthcheck_retries }}" + start_period: "{{ neutron_openvswitch_agent_healthcheck_start_period }}" + test: "{% if neutron_openvswitch_agent_enable_healthchecks | bool %}{{ neutron_openvswitch_agent_healthcheck_test }}{% else %}NONE{% endif %}" + timeout: "{{ neutron_openvswitch_agent_healthcheck_timeout }}" + +neutron_ovn_metadata_agent_enable_healthchecks: "{{ enable_container_healthchecks }}" +neutron_ovn_metadata_agent_healthcheck_interval: "{{ default_container_healthcheck_interval }}" +neutron_ovn_metadata_agent_healthcheck_retries: "{{ default_container_healthcheck_retries }}" +neutron_ovn_metadata_agent_healthcheck_start_period: "{{ default_container_healthcheck_start_period }}" +neutron_ovn_metadata_agent_healthcheck_test: ["CMD-SHELL", "healthcheck_port python {{ ovn_sb_db_port }}"] +neutron_ovn_metadata_agent_healthcheck_timeout: "{{ default_container_healthcheck_timeout }}" +neutron_ovn_metadata_agent_healthcheck: + interval: "{{ neutron_ovn_metadata_agent_healthcheck_interval }}" + retries: "{{ neutron_ovn_metadata_agent_healthcheck_retries }}" + start_period: "{{ neutron_ovn_metadata_agent_healthcheck_start_period }}" + test: "{% if neutron_ovn_metadata_agent_enable_healthchecks | bool %}{{ neutron_ovn_metadata_agent_healthcheck_test }}{% else %}NONE{% endif %}" + timeout: "{{ neutron_ovn_metadata_agent_healthcheck_timeout }}" + +neutron_server_enable_healthchecks: "{{ enable_container_healthchecks }}" +neutron_server_healthcheck_interval: "{{ default_container_healthcheck_interval }}" +neutron_server_healthcheck_retries: "{{ default_container_healthcheck_retries }}" +neutron_server_healthcheck_start_period: "{{ default_container_healthcheck_start_period }}" +neutron_server_healthcheck_test: ["CMD-SHELL", "healthcheck_curl http://{{ api_interface_address | put_address_in_context('url') }}:{{ neutron_server_listen_port }}"] +neutron_server_healthcheck_timeout: "{{ default_container_healthcheck_timeout }}" +neutron_server_healthcheck: + interval: "{{ neutron_server_healthcheck_interval }}" + retries: "{{ neutron_server_healthcheck_retries }}" + start_period: "{{ neutron_server_healthcheck_start_period }}" + test: "{% if neutron_server_enable_healthchecks | bool %}{{ neutron_server_healthcheck_test }}{% else %}NONE{% endif %}" + timeout: "{{ neutron_server_healthcheck_timeout }}" + +neutron_sriov_agent_enable_healthchecks: "{{ enable_container_healthchecks }}" +neutron_sriov_agent_healthcheck_interval: "{{ default_container_healthcheck_interval }}" +neutron_sriov_agent_healthcheck_retries: "{{ default_container_healthcheck_retries }}" +neutron_sriov_agent_healthcheck_start_period: "{{ default_container_healthcheck_start_period }}" +neutron_sriov_agent_healthcheck_test: ["CMD-SHELL", "healthcheck_port neutron-sriov-nic-agent {{ om_rpc_port }}"] +neutron_sriov_agent_healthcheck_timeout: "{{ default_container_healthcheck_timeout }}" +neutron_sriov_agent_healthcheck: + interval: "{{ neutron_sriov_agent_healthcheck_interval }}" + retries: "{{ neutron_sriov_agent_healthcheck_retries }}" + start_period: "{{ neutron_sriov_agent_healthcheck_start_period }}" + test: "{% if neutron_sriov_agent_enable_healthchecks | bool %}{{ neutron_sriov_agent_healthcheck_test }}{% else %}NONE{% endif %}" + timeout: "{{ neutron_sriov_agent_healthcheck_timeout }}" neutron_dhcp_agent_default_volumes: - "{{ node_config_directory }}/neutron-dhcp-agent/:{{ container_config_directory }}/:ro" diff --git a/ansible/roles/neutron/handlers/main.yml b/ansible/roles/neutron/handlers/main.yml index e42351f..0016f33 100644 --- a/ansible/roles/neutron/handlers/main.yml +++ b/ansible/roles/neutron/handlers/main.yml @@ -12,6 +12,7 @@ volumes: "{{ service.volumes|reject('equalto', '')|list }}" dimensions: "{{ service.dimensions }}" privileged: "{{ service.privileged | default(False) }}" + healthcheck: "{{ service.healthcheck | default(omit) }}" when: - kolla_action != "config" @@ -29,6 +30,7 @@ volumes: "{{ service.volumes|reject('equalto', '')|list }}" dimensions: "{{ service.dimensions }}" privileged: "{{ service.privileged | default(False) }}" + healthcheck: "{{ service.healthcheck | default(omit) }}" when: - kolla_action != "config" @@ -45,6 +47,7 @@ volumes: "{{ service.volumes|reject('equalto', '')|list }}" dimensions: "{{ service.dimensions }}" privileged: "{{ service.privileged | default(False) }}" + healthcheck: "{{ service.healthcheck | default(omit) }}" with_sequence: "start=1 end={{ num_nova_fake_per_node }}" when: - kolla_action != "config" @@ -63,6 +66,7 @@ volumes: "{{ service.volumes|reject('equalto', '')|list }}" dimensions: "{{ service.dimensions }}" privileged: "{{ service.privileged | default(False) }}" + healthcheck: "{{ service.healthcheck | default(omit) }}" when: - kolla_action != "config" @@ -79,6 +83,7 @@ volumes: "{{ service.volumes|reject('equalto', '')|list }}" dimensions: "{{ service.dimensions }}" privileged: "{{ service.privileged | default(False) }}" + healthcheck: "{{ service.healthcheck | default(omit) }}" when: - kolla_action != "config" @@ -96,6 +101,7 @@ volumes: "{{ service.volumes|reject('equalto', '')|list }}" dimensions: "{{ service.dimensions }}" privileged: "{{ service.privileged | default(False) }}" + healthcheck: "{{ service.healthcheck | default(omit) }}" when: - kolla_action != "config" @@ -112,6 +118,7 @@ volumes: "{{ service.volumes|reject('equalto', '')|list }}" dimensions: "{{ service.dimensions }}" privileged: "{{ service.privileged | default(False) }}" + healthcheck: "{{ service.healthcheck | default(omit) }}" when: - kolla_action != "config" @@ -160,6 +167,7 @@ volumes: "{{ service.volumes|reject('equalto', '')|list }}" dimensions: "{{ service.dimensions }}" privileged: "{{ service.privileged | default(False) }}" + healthcheck: "{{ service.healthcheck | default(omit) }}" when: - kolla_action != "config" @@ -176,6 +184,7 @@ volumes: "{{ service.volumes|reject('equalto', '')|list }}" dimensions: "{{ service.dimensions }}" privileged: "{{ service.privileged | default(False) }}" + healthcheck: "{{ service.healthcheck | default(omit) }}" when: - kolla_action != "config" @@ -192,6 +201,7 @@ volumes: "{{ service.volumes }}" dimensions: "{{ service.dimensions }}" privileged: "{{ service.privileged | default(False) }}" + healthcheck: "{{ service.healthcheck | default(omit) }}" when: - kolla_action != "config" @@ -208,6 +218,7 @@ volumes: "{{ service.volumes }}" dimensions: "{{ service.dimensions }}" privileged: "{{ service.privileged | default(False) }}" + healthcheck: "{{ service.healthcheck | default(omit) }}" when: - kolla_action != "config" @@ -224,6 +235,7 @@ volumes: "{{ service.volumes|reject('equalto', '')|list }}" dimensions: "{{ service.dimensions }}" privileged: "{{ service.privileged | default(False) }}" + healthcheck: "{{ service.healthcheck | default(omit) }}" when: - kolla_action != "config" @@ -240,5 +252,6 @@ volumes: "{{ service.volumes }}" dimensions: "{{ service.dimensions }}" privileged: "{{ service.privileged | default(False) }}" + healthcheck: "{{ service.healthcheck | default(omit) }}" when: - kolla_action != "config" diff --git a/ansible/roles/neutron/tasks/check-containers.yml b/ansible/roles/neutron/tasks/check-containers.yml index 12365b7..1991722 100644 --- a/ansible/roles/neutron/tasks/check-containers.yml +++ b/ansible/roles/neutron/tasks/check-containers.yml @@ -10,6 +10,7 @@ volumes: "{{ item.value.volumes }}" dimensions: "{{ item.value.dimensions }}" environment: "{{ item.value.environment | default(omit) }}" + healthcheck: "{{ item.value.healthcheck | default(omit) }}" when: - item.value.enabled | bool - item.value.host_in_groups | bool diff --git a/ansible/roles/nova-cell/defaults/main.yml b/ansible/roles/nova-cell/defaults/main.yml index 387bf9c..a8d1888 100644 --- a/ansible/roles/nova-cell/defaults/main.yml +++ b/ansible/roles/nova-cell/defaults/main.yml @@ -11,6 +11,7 @@ nova_cell_services: privileged: True volumes: "{{ nova_libvirt_default_volumes + nova_libvirt_extra_volumes }}" dimensions: "{{ nova_libvirt_dimensions }}" + healthcheck: "{{ nova_libvirt_healthcheck }}" nova-ssh: container_name: "nova_ssh" group: "{{ nova_cell_compute_group }}" @@ -18,6 +19,7 @@ nova_cell_services: enabled: "{{ enable_nova_ssh | bool }}" volumes: "{{ nova_ssh_default_volumes + nova_ssh_extra_volumes }}" dimensions: "{{ nova_ssh_dimensions }}" + healthcheck: "{{ nova_ssh_healthcheck }}" nova-novncproxy: container_name: "nova_novncproxy" group: "{{ nova_cell_novncproxy_group }}" @@ -25,6 +27,7 @@ nova_cell_services: enabled: "{{ nova_console == 'novnc' }}" volumes: "{{ nova_novncproxy_default_volumes + nova_novncproxy_extra_volumes }}" dimensions: "{{ nova_novncproxy_dimensions }}" + healthcheck: "{{ nova_novncproxy_healthcheck }}" nova-spicehtml5proxy: container_name: "nova_spicehtml5proxy" group: "{{ nova_cell_spicehtml5proxy_group }}" @@ -46,6 +49,7 @@ nova_cell_services: image: "{{ nova_conductor_image_full }}" volumes: "{{ nova_conductor_default_volumes + nova_conductor_extra_volumes }}" dimensions: "{{ nova_conductor_dimensions }}" + healthcheck: "{{ nova_conductor_healthcheck }}" nova-compute: container_name: "nova_compute" group: "{{ nova_cell_compute_group }}" @@ -57,6 +61,7 @@ nova_cell_services: ipc_mode: "host" volumes: "{{ nova_compute_default_volumes + nova_compute_extra_volumes }}" dimensions: "{{ nova_compute_dimensions }}" + healthcheck: "{{ nova_compute_healthcheck }}" nova-compute-ironic: container_name: "nova_compute_ironic" group: "{{ nova_cell_compute_ironic_group }}" @@ -64,6 +69,7 @@ nova_cell_services: enabled: "{{ enable_ironic | bool and nova_cell_name == nova_cell_ironic_cell_name }}" volumes: "{{ nova_compute_ironic_default_volumes + nova_compute_ironic_extra_volumes }}" dimensions: "{{ nova_compute_ironic_dimensions }}" + healthcheck: "{{ nova_compute_ironic_healthcheck }}" #################### # Ceph options @@ -236,6 +242,84 @@ nova_conductor_dimensions: "{{ default_container_dimensions }}" nova_compute_dimensions: "{{ default_container_dimensions }}" nova_compute_ironic_dimensions: "{{ default_container_dimensions }}" +nova_libvirt_enable_healthchecks: "{{ enable_container_healthchecks }}" +nova_libvirt_healthcheck_interval: "{{ default_container_healthcheck_interval }}" +nova_libvirt_healthcheck_retries: "{{ default_container_healthcheck_retries }}" +nova_libvirt_healthcheck_start_period: "{{ default_container_healthcheck_start_period }}" +nova_libvirt_healthcheck_test: ["CMD-SHELL", "virsh version --daemon"] +nova_libvirt_healthcheck_timeout: "{{ default_container_healthcheck_timeout }}" +nova_libvirt_healthcheck: + interval: "{{ nova_libvirt_healthcheck_interval }}" + retries: "{{ nova_libvirt_healthcheck_retries }}" + start_period: "{{ nova_libvirt_healthcheck_start_period }}" + test: "{% if nova_libvirt_enable_healthchecks | bool %}{{ nova_libvirt_healthcheck_test }}{% else %}NONE{% endif %}" + timeout: "{{ nova_libvirt_healthcheck_timeout }}" + +nova_ssh_enable_healthchecks: "{{ enable_container_healthchecks }}" +nova_ssh_healthcheck_interval: "{{ default_container_healthcheck_interval }}" +nova_ssh_healthcheck_retries: "{{ default_container_healthcheck_retries }}" +nova_ssh_healthcheck_start_period: "{{ default_container_healthcheck_start_period }}" +nova_ssh_healthcheck_test: ["CMD-SHELL", "healthcheck_listen sshd {{ nova_ssh_port }}"] +nova_ssh_healthcheck_timeout: "{{ default_container_healthcheck_timeout }}" +nova_ssh_healthcheck: + interval: "{{ nova_ssh_healthcheck_interval }}" + retries: "{{ nova_ssh_healthcheck_retries }}" + start_period: "{{ nova_ssh_healthcheck_start_period }}" + test: "{% if nova_ssh_enable_healthchecks | bool %}{{ nova_ssh_healthcheck_test }}{% else %}NONE{% endif %}" + timeout: "{{ nova_ssh_healthcheck_timeout }}" + +nova_novncproxy_enable_healthchecks: "{{ enable_container_healthchecks }}" +nova_novncproxy_healthcheck_interval: "{{ default_container_healthcheck_interval }}" +nova_novncproxy_healthcheck_retries: "{{ default_container_healthcheck_retries }}" +nova_novncproxy_healthcheck_start_period: "{{ default_container_healthcheck_start_period }}" +nova_novncproxy_healthcheck_test: ["CMD-SHELL", "healthcheck_curl http://{{ api_interface_address | put_address_in_context('url') }}:{{ nova_novncproxy_listen_port }}/vnc_auto.html"] +nova_novncproxy_healthcheck_timeout: "{{ default_container_healthcheck_timeout }}" +nova_novncproxy_healthcheck: + interval: "{{ nova_novncproxy_healthcheck_interval }}" + retries: "{{ nova_novncproxy_healthcheck_retries }}" + start_period: "{{ nova_novncproxy_healthcheck_start_period }}" + test: "{% if nova_novncproxy_enable_healthchecks | bool %}{{ nova_novncproxy_healthcheck_test }}{% else %}NONE{% endif %}" + timeout: "{{ nova_novncproxy_healthcheck_timeout }}" + +nova_conductor_enable_healthchecks: "{{ enable_container_healthchecks }}" +nova_conductor_healthcheck_interval: "{{ default_container_healthcheck_interval }}" +nova_conductor_healthcheck_retries: "{{ default_container_healthcheck_retries }}" +nova_conductor_healthcheck_start_period: "{{ default_container_healthcheck_start_period }}" +nova_conductor_healthcheck_test: ["CMD-SHELL", "healthcheck_port nova-conductor {{ om_rpc_port }}"] +nova_conductor_healthcheck_timeout: "{{ default_container_healthcheck_timeout }}" +nova_conductor_healthcheck: + interval: "{{ nova_conductor_healthcheck_interval }}" + retries: "{{ nova_conductor_healthcheck_retries }}" + start_period: "{{ nova_conductor_healthcheck_start_period }}" + test: "{% if nova_conductor_enable_healthchecks | bool %}{{ nova_conductor_healthcheck_test }}{% else %}NONE{% endif %}" + timeout: "{{ nova_conductor_healthcheck_timeout }}" + +nova_compute_enable_healthchecks: "{{ enable_container_healthchecks }}" +nova_compute_healthcheck_interval: "{{ default_container_healthcheck_interval }}" +nova_compute_healthcheck_retries: "{{ default_container_healthcheck_retries }}" +nova_compute_healthcheck_start_period: "{{ default_container_healthcheck_start_period }}" +nova_compute_healthcheck_test: ["CMD-SHELL", "healthcheck_port nova-compute {{ om_rpc_port }}"] +nova_compute_healthcheck_timeout: "{{ default_container_healthcheck_timeout }}" +nova_compute_healthcheck: + interval: "{{ nova_compute_healthcheck_interval }}" + retries: "{{ nova_compute_healthcheck_retries }}" + start_period: "{{ nova_compute_healthcheck_start_period }}" + test: "{% if nova_compute_enable_healthchecks | bool %}{{ nova_compute_healthcheck_test }}{% else %}NONE{% endif %}" + timeout: "{{ nova_compute_healthcheck_timeout }}" + +nova_compute_ironic_enable_healthchecks: "{{ enable_container_healthchecks }}" +nova_compute_ironic_healthcheck_interval: "{{ default_container_healthcheck_interval }}" +nova_compute_ironic_healthcheck_retries: "{{ default_container_healthcheck_retries }}" +nova_compute_ironic_healthcheck_start_period: "{{ default_container_healthcheck_start_period }}" +nova_compute_ironic_healthcheck_test: ["CMD-SHELL", "healthcheck_port nova-compute {{ om_rpc_port }}"] +nova_compute_ironic_healthcheck_timeout: "{{ default_container_healthcheck_timeout }}" +nova_compute_ironic_healthcheck: + interval: "{{ nova_compute_ironic_healthcheck_interval }}" + retries: "{{ nova_compute_ironic_healthcheck_retries }}" + start_period: "{{ nova_compute_ironic_healthcheck_start_period }}" + test: "{% if nova_compute_ironic_enable_healthchecks | bool %}{{ nova_compute_ironic_healthcheck_test }}{% else %}NONE{% endif %}" + timeout: "{{ nova_compute_ironic_healthcheck_timeout }}" + nova_libvirt_default_volumes: - "{{ node_config_directory }}/nova-libvirt/:{{ container_config_directory }}/:ro" - "/etc/localtime:/etc/localtime:ro" diff --git a/ansible/roles/nova-cell/handlers/main.yml b/ansible/roles/nova-cell/handlers/main.yml index d5c624e..c7dd63d 100644 --- a/ansible/roles/nova-cell/handlers/main.yml +++ b/ansible/roles/nova-cell/handlers/main.yml @@ -12,6 +12,7 @@ privileged: "{{ service.privileged | default(False) }}" volumes: "{{ service.volumes|reject('equalto', '')|list }}" dimensions: "{{ service.dimensions }}" + healthcheck: "{{ service.healthcheck | default(omit) }}" when: - kolla_action != "config" - kolla_action != "upgrade" or not nova_safety_upgrade | bool @@ -29,6 +30,7 @@ privileged: "{{ service.privileged | default(False) }}" volumes: "{{ service.volumes|reject('equalto', '')|list }}" dimensions: "{{ service.dimensions }}" + healthcheck: "{{ service.healthcheck | default(omit) }}" when: - kolla_action != "config" - kolla_action != "upgrade" or not nova_safety_upgrade | bool @@ -46,6 +48,7 @@ privileged: "{{ service.privileged | default(False) }}" volumes: "{{ service.volumes|reject('equalto', '')|list }}" dimensions: "{{ service.dimensions }}" + healthcheck: "{{ service.healthcheck | default(omit) }}" when: - kolla_action != "config" - kolla_action != "upgrade" or not nova_safety_upgrade | bool @@ -63,6 +66,7 @@ privileged: "{{ service.privileged | default(False) }}" volumes: "{{ service.volumes|reject('equalto', '')|list }}" dimensions: "{{ service.dimensions }}" + healthcheck: "{{ service.healthcheck | default(omit) }}" when: - kolla_action != "config" - kolla_action != "upgrade" or not nova_safety_upgrade | bool @@ -81,6 +85,7 @@ privileged: "{{ service.privileged | default(False) }}" volumes: "{{ service.volumes|reject('equalto', '')|list }}" dimensions: "{{ service.dimensions }}" + healthcheck: "{{ service.healthcheck | default(omit) }}" when: - kolla_action != "config" @@ -98,6 +103,7 @@ privileged: "{{ service.privileged | default(False) }}" volumes: "{{ service.volumes|reject('equalto', '')|list }}" dimensions: "{{ service.dimensions }}" + healthcheck: "{{ service.healthcheck | default(omit) }}" register: restart_nova_libvirt # NOTE(Jeffrey4l): retry 5 to remove nova_libvirt container because when # guests running, nova_libvirt will raise error even though it is removed. @@ -115,12 +121,13 @@ action: "recreate_or_restart_container" common_options: "{{ docker_common_options }}" name: "{{ service.container_name }}" - environment: "{{ service.environment | default(omit) }}" + environment: "{{ service.environment }}" image: "{{ service.image }}" privileged: "{{ service.privileged | default(False) }}" - ipc_mode: "{{ service.ipc_mode | default(omit) }}" + ipc_mode: "{{ service.ipc_mode }}" volumes: "{{ service.volumes|reject('equalto', '')|list }}" dimensions: "{{ service.dimensions }}" + healthcheck: "{{ service.healthcheck | default(omit) }}" when: - kolla_action != "config" @@ -137,6 +144,7 @@ privileged: "{{ service.privileged | default(False) }}" volumes: "{{ service.volumes|reject('equalto', '')|list }}" dimensions: "{{ service.dimensions }}" + healthcheck: "{{ service.healthcheck | default(omit) }}" when: - kolla_action != "config" diff --git a/ansible/roles/nova-cell/tasks/check-containers.yml b/ansible/roles/nova-cell/tasks/check-containers.yml index 356cc01..4746919 100644 --- a/ansible/roles/nova-cell/tasks/check-containers.yml +++ b/ansible/roles/nova-cell/tasks/check-containers.yml @@ -12,6 +12,7 @@ privileged: "{{ item.value.privileged|default(False) }}" volumes: "{{ item.value.volumes|reject('equalto', '')|list }}" dimensions: "{{ item.value.dimensions }}" + healthcheck: "{{ item.value.healthcheck | default(omit) }}" when: - inventory_hostname in groups[item.value.group] - item.value.enabled | bool diff --git a/ansible/roles/nova/defaults/main.yml b/ansible/roles/nova/defaults/main.yml index 6c80dfc..55fc167 100644 --- a/ansible/roles/nova/defaults/main.yml +++ b/ansible/roles/nova/defaults/main.yml @@ -10,6 +10,7 @@ nova_services: privileged: True volumes: "{{ nova_api_default_volumes + nova_api_extra_volumes }}" dimensions: "{{ nova_api_dimensions }}" + healthcheck: "{{ nova_api_healthcheck }}" haproxy: nova_api: enabled: "{{ enable_nova }}" @@ -46,6 +47,7 @@ nova_services: enabled: True volumes: "{{ nova_scheduler_default_volumes + nova_scheduler_extra_volumes }}" dimensions: "{{ nova_scheduler_dimensions }}" + healthcheck: "{{ nova_scheduler_healthcheck }}" nova-super-conductor: container_name: "nova_super_conductor" group: "nova-super-conductor" @@ -53,6 +55,7 @@ nova_services: image: "{{ nova_super_conductor_image_full }}" volumes: "{{ nova_super_conductor_default_volumes + nova_super_conductor_extra_volumes }}" dimensions: "{{ nova_super_conductor_dimensions }}" + healthcheck: "{{ nova_super_conductor_healthcheck }}" #################### # Database @@ -93,6 +96,45 @@ nova_api_dimensions: "{{ default_container_dimensions }}" nova_scheduler_dimensions: "{{ default_container_dimensions }}" nova_super_conductor_dimensions: "{{ default_container_dimensions }}" +nova_api_enable_healthchecks: "{{ enable_container_healthchecks }}" +nova_api_healthcheck_interval: "{{ default_container_healthcheck_interval }}" +nova_api_healthcheck_retries: "{{ default_container_healthcheck_retries }}" +nova_api_healthcheck_start_period: "{{ default_container_healthcheck_start_period }}" +nova_api_healthcheck_test: ["CMD-SHELL", "healthcheck_curl {{ 'https' if nova_enable_tls_backend | bool else 'http' }}://{{ api_interface_address | put_address_in_context('url') }}:{{ nova_api_listen_port}} "] +nova_api_healthcheck_timeout: "{{ default_container_healthcheck_timeout }}" +nova_api_healthcheck: + interval: "{{ nova_api_healthcheck_interval }}" + retries: "{{ nova_api_healthcheck_retries }}" + start_period: "{{ nova_api_healthcheck_start_period }}" + test: "{% if nova_api_enable_healthchecks | bool %}{{ nova_api_healthcheck_test }}{% else %}NONE{% endif %}" + timeout: "{{ nova_api_healthcheck_timeout }}" + +nova_scheduler_enable_healthchecks: "{{ enable_container_healthchecks }}" +nova_scheduler_healthcheck_interval: "{{ default_container_healthcheck_interval }}" +nova_scheduler_healthcheck_retries: "{{ default_container_healthcheck_retries }}" +nova_scheduler_healthcheck_start_period: "{{ default_container_healthcheck_start_period }}" +nova_scheduler_healthcheck_test: ["CMD-SHELL", "healthcheck_port nova-scheduler {{ om_rpc_port }}"] +nova_scheduler_healthcheck_timeout: "{{ default_container_healthcheck_timeout }}" +nova_scheduler_healthcheck: + interval: "{{ nova_scheduler_healthcheck_interval }}" + retries: "{{ nova_scheduler_healthcheck_retries }}" + start_period: "{{ nova_scheduler_healthcheck_start_period }}" + test: "{% if nova_scheduler_enable_healthchecks | bool %}{{ nova_scheduler_healthcheck_test }}{% else %}NONE{% endif %}" + timeout: "{{ nova_scheduler_healthcheck_timeout }}" + +nova_super_conductor_enable_healthchecks: "{{ enable_container_healthchecks }}" +nova_super_conductor_healthcheck_interval: "{{ default_container_healthcheck_interval }}" +nova_super_conductor_healthcheck_retries: "{{ default_container_healthcheck_retries }}" +nova_super_conductor_healthcheck_start_period: "{{ default_container_healthcheck_start_period }}" +nova_super_conductor_healthcheck_test: ["CMD-SHELL", "healthcheck_port nova-conductor {{ om_rpc_port }}"] +nova_super_conductor_healthcheck_timeout: "{{ default_container_healthcheck_timeout }}" +nova_super_conductor_healthcheck: + interval: "{{ nova_super_conductor_healthcheck_interval }}" + retries: "{{ nova_super_conductor_healthcheck_retries }}" + start_period: "{{ nova_super_conductor_healthcheck_start_period }}" + test: "{% if nova_super_conductor_enable_healthchecks | bool %}{{ nova_super_conductor_healthcheck_test }}{% else %}NONE{% endif %}" + timeout: "{{ nova_super_conductor_healthcheck_timeout }}" + nova_api_default_volumes: - "{{ node_config_directory }}/nova-api/:{{ container_config_directory }}/:ro" - "/etc/localtime:/etc/localtime:ro" diff --git a/ansible/roles/nova/handlers/main.yml b/ansible/roles/nova/handlers/main.yml index 8f89a52..6913324 100644 --- a/ansible/roles/nova/handlers/main.yml +++ b/ansible/roles/nova/handlers/main.yml @@ -12,6 +12,7 @@ privileged: "{{ service.privileged | default(False) }}" volumes: "{{ service.volumes|reject('equalto', '')|list }}" dimensions: "{{ service.dimensions }}" + healthcheck: "{{ service.healthcheck | default(omit) }}" when: - kolla_action != "config" - kolla_action != "upgrade" or not nova_safety_upgrade | bool @@ -29,6 +30,7 @@ privileged: "{{ service.privileged | default(False) }}" volumes: "{{ service.volumes|reject('equalto', '')|list }}" dimensions: "{{ service.dimensions }}" + healthcheck: "{{ service.healthcheck | default(omit) }}" when: - kolla_action != "config" - kolla_action != "upgrade" or not nova_safety_upgrade | bool @@ -46,6 +48,7 @@ privileged: "{{ service.privileged | default(False) }}" volumes: "{{ service.volumes|reject('equalto', '')|list }}" dimensions: "{{ service.dimensions }}" + healthcheck: "{{ service.healthcheck | default(omit) }}" when: - kolla_action != "config" - kolla_action != "upgrade" or not nova_safety_upgrade | bool diff --git a/ansible/roles/nova/tasks/check-containers.yml b/ansible/roles/nova/tasks/check-containers.yml index 8457a7c..1c278f1 100644 --- a/ansible/roles/nova/tasks/check-containers.yml +++ b/ansible/roles/nova/tasks/check-containers.yml @@ -12,6 +12,7 @@ privileged: "{{ item.value.privileged|default(False) }}" volumes: "{{ item.value.volumes|reject('equalto', '')|list }}" dimensions: "{{ item.value.dimensions }}" + healthcheck: "{{ item.value.healthcheck | default(omit) }}" when: - inventory_hostname in groups[item.value.group] - item.value.enabled | bool diff --git a/ansible/roles/placement/defaults/main.yml b/ansible/roles/placement/defaults/main.yml index e14bf9c..1bcfabd 100644 --- a/ansible/roles/placement/defaults/main.yml +++ b/ansible/roles/placement/defaults/main.yml @@ -9,6 +9,7 @@ placement_services: enabled: True volumes: "{{ placement_api_default_volumes + placement_api_extra_volumes }}" dimensions: "{{ placement_api_dimensions }}" + healthcheck: "{{ placement_api_healthcheck }}" haproxy: placement_api: enabled: "{{ enable_placement }}" @@ -44,6 +45,19 @@ placement_api_image_full: "{{ placement_api_image }}:{{ placement_api_tag }}" placement_api_dimensions: "{{ default_container_dimensions }}" +placement_api_enable_healthchecks: "{{ enable_container_healthchecks }}" +placement_api_healthcheck_interval: "{{ default_container_healthcheck_interval }}" +placement_api_healthcheck_retries: "{{ default_container_healthcheck_retries }}" +placement_api_healthcheck_start_period: "{{ default_container_healthcheck_start_period }}" +placement_api_healthcheck_test: ["CMD-SHELL", "healthcheck_curl {{ 'https' if placement_enable_tls_backend | bool else 'http' }}://{{ api_interface_address | put_address_in_context('url') }}:{{ placement_api_listen_port }}"] +placement_api_healthcheck_timeout: "{{ default_container_healthcheck_timeout }}" +placement_api_healthcheck: + interval: "{{ placement_api_healthcheck_interval }}" + retries: "{{ placement_api_healthcheck_retries }}" + start_period: "{{ placement_api_healthcheck_start_period }}" + test: "{% if placement_api_enable_healthchecks | bool %}{{ placement_api_healthcheck_test }}{% else %}NONE{% endif %}" + timeout: "{{ placement_api_healthcheck_timeout }}" + placement_api_default_volumes: - "{{ node_config_directory }}/placement-api/:{{ container_config_directory }}/:ro" - "/etc/localtime:/etc/localtime:ro" diff --git a/ansible/roles/placement/handlers/main.yml b/ansible/roles/placement/handlers/main.yml index ffeb23b..153dc81 100644 --- a/ansible/roles/placement/handlers/main.yml +++ b/ansible/roles/placement/handlers/main.yml @@ -11,5 +11,6 @@ image: "{{ service.image }}" volumes: "{{ service.volumes|reject('equalto', '')|list }}" dimensions: "{{ service.dimensions }}" + healthcheck: "{{ service.healthcheck | default(omit) }}" when: - kolla_action != "config" diff --git a/ansible/roles/placement/tasks/check-containers.yml b/ansible/roles/placement/tasks/check-containers.yml index 797a206..1203bea 100644 --- a/ansible/roles/placement/tasks/check-containers.yml +++ b/ansible/roles/placement/tasks/check-containers.yml @@ -12,6 +12,7 @@ privileged: "{{ item.value.privileged|default(False) }}" volumes: "{{ item.value.volumes|reject('equalto', '')|list }}" dimensions: "{{ item.value.dimensions }}" + healthcheck: "{{ item.value.healthcheck | default(omit) }}" when: - inventory_hostname in groups[item.value.group] - item.value.enabled | bool diff --git a/etc/kolla/globals.yml b/etc/kolla/globals.yml index d1cb1ff..3c64dff 100644 --- a/etc/kolla/globals.yml +++ b/etc/kolla/globals.yml @@ -179,6 +179,17 @@ # kernel_memory: # ulimits: +##################### +# Healthcheck options +##################### +#enable_container_healthchecks: "yes" +# Healthcheck options for Docker containers +# interval/timeout/start_period are in seconds +#default_container_healthcheck_interval: 30 +#default_container_healthcheck_timeout: 30 +#default_container_healthcheck_retries: 3 +#default_container_healthcheck_start_period: 5 + ############# # TLS options diff --git a/releasenotes/notes/implement-docker-healthchecks-feb3ce7fdbf8c2b6.yaml b/releasenotes/notes/implement-docker-healthchecks-feb3ce7fdbf8c2b6.yaml new file mode 100644 index 0000000..e3fc5f7 --- /dev/null +++ b/releasenotes/notes/implement-docker-healthchecks-feb3ce7fdbf8c2b6.yaml @@ -0,0 +1,9 @@ +--- +features: + - | + Implements container healthchecks for core OpenStack services. Docker + healthchecks are periodically called scripts that check health of a + running service that expose health information in ``docker ps`` output + and trigger a ``health_status event``. Healthchecks are now enabled + by default and can be disabled by setting + ``enable_container_healthchecks`` to ``no`` in ``globals.yml``. diff --git a/tests/check-failure.sh b/tests/check-failure.sh index 6689d86..fe03c98 100755 --- a/tests/check-failure.sh +++ b/tests/check-failure.sh @@ -11,6 +11,8 @@ check_failure() { # All docker container's status are created, restarting, running, removing, # paused, exited and dead. Containers without running status are treated as # failure. removing is added in docker 1.13, just ignore it now. + # In addition to that, containers in unhealthy state (from healthchecks) + # are trated as failure. failed_containers=$(sudo docker ps -a --format "{{.Names}}" \ --filter status=created \ --filter status=restarting \ @@ -18,10 +20,16 @@ check_failure() { --filter status=exited \ --filter status=dead) + unhealthy_containers=$(sudo docker ps -a --format "{{.Names}}" \ + --filter health=unhealthy) + if [[ -n "$failed_containers" ]]; then exit 1; fi -} + if [[ -n "$unhealthy_containers" ]]; then + exit 1; + fi +} check_failure