commit 30c8018731e8c5099757e5ea46af9f635c0747ba Author: James Parker Date: Fri Sep 4 10:56:35 2020 -0400 Test soft reboot after live-migration Commit looks to add regression coverage for [1]. It introduces two tests that follow the same procedure with the only differing feature being the parameter utilized to define the available pinned CPUs on the compute host. The test updates two compute hosts to use different cpu ranges for cpu_dedicated_set, e.g. host0 uses [0-1] and host1 [2-3]. An instance is created and its associated pinned CPUs are recorded. It is then migrated to the other host and the updated pinned CPUs are compared against the CPUs prior to migration, asserting they are no longer the same. Finally the test soft reboots the instance and asserts that its pinned CPUs remain the same. Test introduces four classes to test_live_migration.py. First is LiveMigrationAndReboot which handles all of the test logic. It's two children, VCPUPinSetMigrateAndReboot and CPUDedicatedMigrateAndReboot, provide test parameters necessary to execute the test logic with either vcpu_pin_set or cpu_dedicated_set. Lastly it creates a new base class for all tests, LiveMigrationBase, that allows for LiveMigrationTest and LiveMigrationAndReboot to both inherit from. The tests needs to leverage a lot of the helper functions found in test_cpu_pinning.NUMALiveMigrationBase. To prevent duplication and since these tests does not work with anything NUMA specific, the necessary helper functions were moved to base.BaseWhiteboxComputeTest. This includes get_all_cpus, get_pinning_as_set, and _get_cpu_set. Lastly it moves parse_cpu_spec from compute.test_cpu_pinning to the utils module. [1] https://bugs.launchpad.net/nova/+bug/1890501 Change-Id: I0271894acd0689b947974c86910b3d8c41aa9d72 diff --git a/whitebox_tempest_plugin/api/compute/base.py b/whitebox_tempest_plugin/api/compute/base.py index ced4a59..f82f0f7 100644 --- a/whitebox_tempest_plugin/api/compute/base.py +++ b/whitebox_tempest_plugin/api/compute/base.py @@ -134,3 +134,26 @@ class BaseWhiteboxComputeTest(base.BaseV2ComputeAdminTest): msg += "]" self.assertEqual(target_host, self.get_host_for_server(server_id), msg) + + def get_all_cpus(self): + """Aggregate the dictionary values of [whitebox]/cpu_topology from + tempest.conf into a list of pCPU ids. + """ + topology_dict = CONF.whitebox_hardware.cpu_topology + cpus = [] + [cpus.extend(c) for c in topology_dict.values()] + return cpus + + def get_pinning_as_set(self, server_id): + pinset = set() + root = self.get_server_xml(server_id) + vcpupins = root.findall('./cputune/vcpupin') + for pin in vcpupins: + pinset |= whitebox_utils.parse_cpu_spec(pin.get('cpuset')) + return pinset + + def _get_cpu_spec(self, cpu_list): + """Returns a libvirt-style CPU spec from the provided list of integers. For + example, given [0, 2, 3], returns "0,2,3". + """ + return ','.join(map(str, cpu_list)) diff --git a/whitebox_tempest_plugin/api/compute/test_cpu_pinning.py b/whitebox_tempest_plugin/api/compute/test_cpu_pinning.py index bb3f949..adc57e6 100644 --- a/whitebox_tempest_plugin/api/compute/test_cpu_pinning.py +++ b/whitebox_tempest_plugin/api/compute/test_cpu_pinning.py @@ -36,7 +36,6 @@ from tempest import config from tempest.lib import decorators from whitebox_tempest_plugin.api.compute import base -from whitebox_tempest_plugin import exceptions from whitebox_tempest_plugin.services import clients from whitebox_tempest_plugin import utils as whitebox_utils @@ -47,67 +46,6 @@ CONF = config.CONF LOG = logging.getLogger(__name__) -def parse_cpu_spec(spec): - """Parse a CPU set specification. - - NOTE(artom): This has been lifted from Nova with minor exceptions-related - adjustments. - - Each element in the list is either a single CPU number, a range of - CPU numbers, or a caret followed by a CPU number to be excluded - from a previous range. - - :param spec: cpu set string eg "1-4,^3,6" - - :returns: a set of CPU indexes - """ - cpuset_ids = set() - cpuset_reject_ids = set() - for rule in spec.split(','): - rule = rule.strip() - # Handle multi ',' - if len(rule) < 1: - continue - # Note the count limit in the .split() call - range_parts = rule.split('-', 1) - if len(range_parts) > 1: - reject = False - if range_parts[0] and range_parts[0][0] == '^': - reject = True - range_parts[0] = str(range_parts[0][1:]) - - # So, this was a range; start by converting the parts to ints - try: - start, end = [int(p.strip()) for p in range_parts] - except ValueError: - raise exceptions.InvalidCPUSpec(spec=spec) - # Make sure it's a valid range - if start > end: - raise exceptions.InvalidCPUSpec(spec=spec) - # Add available CPU ids to set - if not reject: - cpuset_ids |= set(range(start, end + 1)) - else: - cpuset_reject_ids |= set(range(start, end + 1)) - elif rule[0] == '^': - # Not a range, the rule is an exclusion rule; convert to int - try: - cpuset_reject_ids.add(int(rule[1:].strip())) - except ValueError: - raise exceptions.InvalidCPUSpec(spec=spec) - else: - # OK, a single CPU to include; convert to int - try: - cpuset_ids.add(int(rule)) - except ValueError: - raise exceptions.InvalidCPUSpec(spec=spec) - - # Use sets to handle the exclusion rules for us - cpuset_ids -= cpuset_reject_ids - - return cpuset_ids - - class BasePinningTest(base.BaseWhiteboxComputeTest): shared_cpu_policy = {'hw:cpu_policy': 'shared'} @@ -126,8 +64,8 @@ class BasePinningTest(base.BaseWhiteboxComputeTest): memnodes = root.findall('./numatune/memnode') cell_pins = {} for memnode in memnodes: - cell_pins[int(memnode.get('cellid'))] = parse_cpu_spec( - memnode.get('nodeset')) + cell_pins[int(memnode.get('cellid'))] = \ + whitebox_utils.parse_cpu_spec(memnode.get('nodeset')) return cell_pins @@ -143,7 +81,8 @@ class BasePinningTest(base.BaseWhiteboxComputeTest): emulatorpins = root.findall('./cputune/emulatorpin') emulator_threads = set() for pin in emulatorpins: - emulator_threads |= parse_cpu_spec(pin.get('cpuset')) + emulator_threads |= \ + whitebox_utils.parse_cpu_spec(pin.get('cpuset')) return emulator_threads @@ -437,20 +376,6 @@ class NUMALiveMigrationBase(BasePinningTest): CONF.whitebox.max_compute_nodes > 2): raise cls.skipException('Exactly 2 compute nodes required.') - def get_pinning_as_set(self, server_id): - pinset = set() - root = self.get_server_xml(server_id) - vcpupins = root.findall('./cputune/vcpupin') - for pin in vcpupins: - pinset |= parse_cpu_spec(pin.get('cpuset')) - return pinset - - def _get_cpu_spec(self, cpu_list): - """Returns a libvirt-style CPU spec from the provided list of integers. For - example, given [0, 2, 3], returns "0,2,3". - """ - return ','.join(map(str, cpu_list)) - def _get_cpu_pins_from_db_topology(self, db_topology): """Given a JSON object representing a instance's database NUMA topology, returns a dict of dicts indicating CPU pinning, for example: @@ -488,16 +413,7 @@ class NUMALiveMigrationBase(BasePinningTest): """ root = self.get_server_xml(server_id) cpuset = root.find('./vcpu').attrib.get('cpuset', None) - return parse_cpu_spec(cpuset) - - def get_all_cpus(self): - """Aggregate the dictionary values of [whitebox]/cpu_topology from - tempest.conf into a list of pCPU ids. - """ - topology_dict = CONF.whitebox_hardware.cpu_topology - cpus = [] - [cpus.extend(c) for c in topology_dict.values()] - return cpus + return whitebox_utils.parse_cpu_spec(cpuset) class NUMALiveMigrationTest(NUMALiveMigrationBase): diff --git a/whitebox_tempest_plugin/api/compute/test_live_migration.py b/whitebox_tempest_plugin/api/compute/test_live_migration.py index 9f01ee4..c2a4972 100644 --- a/whitebox_tempest_plugin/api/compute/test_live_migration.py +++ b/whitebox_tempest_plugin/api/compute/test_live_migration.py @@ -22,6 +22,8 @@ from tempest import config from tempest.lib import decorators from whitebox_tempest_plugin.api.compute import base +from whitebox_tempest_plugin.services import clients +from whitebox_tempest_plugin import utils as whitebox_utils CONF = config.CONF LOG = logging.getLogger(__name__) @@ -30,13 +32,13 @@ LOG = logging.getLogger(__name__) # tempest.api.compute.admin.test_live_migration -class LiveMigrationTest(base.BaseWhiteboxComputeTest): +class LiveMigrationBase(base.BaseWhiteboxComputeTest): # First support for block_migration='auto': since Mitaka (OSP9) min_microversion = '2.25' @classmethod def skip_checks(cls): - super(LiveMigrationTest, cls).skip_checks() + super(LiveMigrationBase, cls).skip_checks() if not CONF.compute_feature_enabled.live_migration: skip_msg = ("%s skipped as live-migration is " @@ -56,7 +58,12 @@ class LiveMigrationTest(base.BaseWhiteboxComputeTest): # TODO(mriedem): SSH validation before and after the instance is # live migrated would be a nice test wrinkle addition. cls.set_network_resources(network=True, subnet=True) - super(LiveMigrationTest, cls).setup_credentials() + super(LiveMigrationBase, cls).setup_credentials() + + +class LiveMigrationTest(LiveMigrationBase): + # First support for block_migration='auto': since Mitaka (OSP9) + min_microversion = '2.25' @testtools.skipUnless(CONF.compute_feature_enabled. volume_backed_live_migration, @@ -85,3 +92,115 @@ class LiveMigrationTest(base.BaseWhiteboxComputeTest): # Assert cache-mode has not changed during live migration self.assertEqual(cache_type, root_disk_cache()) + + +class LiveMigrationAndReboot(LiveMigrationBase): + + dedicated_cpu_policy = {'hw:cpu_policy': 'dedicated'} + + @classmethod + def skip_checks(cls): + super(LiveMigrationAndReboot, cls).skip_checks() + if getattr(CONF.whitebox_hardware, 'cpu_topology', None) is None: + msg = "cpu_topology in whitebox-hardware is not present" + raise cls.skipException(msg) + + def _migrate_and_reboot_instance(self, section, cpu_set_parameter): + flavor_vcpu_size = 2 + cpu_list = self.get_all_cpus() + if len(cpu_list) < 4: + raise self.skipException('Requires 4 or more pCPUs to execute ' + 'the test') + + host1, host2 = self.list_compute_hosts() + + # Create two different cpu dedicated ranges for each host in order + # to force different domain XML after instance migration + host1_dedicated_set = cpu_list[:2] + host2_dedicated_set = cpu_list[2:4] + + dedicated_flavor = self.create_flavor( + vcpus=flavor_vcpu_size, + extra_specs=self.dedicated_cpu_policy + ) + + host1_sm = clients.NovaServiceManager(host1, 'nova-compute', + self.os_admin.services_client) + host2_sm = clients.NovaServiceManager(host2, 'nova-compute', + self.os_admin.services_client) + + with whitebox_utils.multicontext( + host1_sm.config_options((section, cpu_set_parameter, + self._get_cpu_spec(host1_dedicated_set))), + host2_sm.config_options((section, cpu_set_parameter, + self._get_cpu_spec(host2_dedicated_set))) + ): + # Create a server with a dedicated cpu policy + server = self.create_test_server( + flavor=dedicated_flavor['id'] + ) + + # Gather the pinned CPUs for the instance prior to migration + pinned_cpus_pre_migration = self.get_pinning_as_set(server['id']) + + # Determine the destination migration host and migrate the server + # to that host + compute_dest = self.get_host_other_than(server['id']) + self.live_migrate(server['id'], compute_dest, 'ACTIVE') + + # After successful migration determine the instances pinned CPUs + pinned_cpus_post_migration = self.get_pinning_as_set(server['id']) + + # Confirm the pCPUs are no longer the same as they were when + # on the source compute host + self.assertTrue( + pinned_cpus_post_migration.isdisjoint( + pinned_cpus_pre_migration), + "After migration the the server %s's current pinned CPU's " + "%s should no longer match the pinned CPU's it had pre " + " migration %s" % (server['id'], pinned_cpus_post_migration, + pinned_cpus_pre_migration) + ) + + # Soft reboot the server + # TODO(artom) If the soft reboot fails, the libvirt driver will do + # a hard reboot. This is only detectable through log parsing, so to + # be 100% sure we got the soft reboot we wanted, we should probably + # do that. + self.servers_client.reboot_server(server['id'], type='SOFT') + + # Gather the server's pinned CPUs after the soft reboot + pinned_cpus_post_reboot = self.get_pinning_as_set(server['id']) + + # Validate the server's pinned CPUs remain the same after the + # reboot + self.assertTrue( + pinned_cpus_post_migration == pinned_cpus_post_reboot, + 'After soft rebooting server %s its pinned CPUs should have ' + 'remained the same as %s, but are instead now %s' % ( + server['id'], pinned_cpus_post_migration, + pinned_cpus_post_reboot) + ) + + self.delete_server(server['id']) + + +class VCPUPinSetMigrateAndReboot(LiveMigrationAndReboot): + + max_microversion = '2.79' + pin_set_mode = 'vcpu_pin_set' + pin_section = 'DEFAULT' + + def test_vcpu_pin_migrate_and_reboot(self): + self._migrate_and_reboot_instance(self.pin_section, self.pin_set_mode) + + +class CPUDedicatedMigrateAndReboot(LiveMigrationAndReboot): + + min_microversion = '2.79' + max_microversion = 'latest' + pin_set_mode = 'cpu_dedicated_set' + pin_section = 'compute' + + def test_cpu_dedicated_migrate_and_reboot(self): + self._migrate_and_reboot_instance(self.pin_section, self.pin_set_mode) diff --git a/whitebox_tempest_plugin/utils.py b/whitebox_tempest_plugin/utils.py index d90d8f4..19c3cb4 100644 --- a/whitebox_tempest_plugin/utils.py +++ b/whitebox_tempest_plugin/utils.py @@ -70,3 +70,64 @@ def get_ctlplane_address(compute_hostname): return CONF.whitebox.ctlplane_addresses[compute_hostname] raise exceptions.CtrlplaneAddressResolutionError(host=compute_hostname) + + +def parse_cpu_spec(spec): + """Parse a CPU set specification. + + NOTE(artom): This has been lifted from Nova with minor + exceptions-related adjustments. + + Each element in the list is either a single CPU number, a range of + CPU numbers, or a caret followed by a CPU number to be excluded + from a previous range. + + :param spec: cpu set string eg "1-4,^3,6" + + :returns: a set of CPU indexes + """ + cpuset_ids = set() + cpuset_reject_ids = set() + for rule in spec.split(','): + rule = rule.strip() + # Handle multi ',' + if len(rule) < 1: + continue + # Note the count limit in the .split() call + range_parts = rule.split('-', 1) + if len(range_parts) > 1: + reject = False + if range_parts[0] and range_parts[0][0] == '^': + reject = True + range_parts[0] = str(range_parts[0][1:]) + + # So, this was a range; start by converting the parts to ints + try: + start, end = [int(p.strip()) for p in range_parts] + except ValueError: + raise exceptions.InvalidCPUSpec(spec=spec) + # Make sure it's a valid range + if start > end: + raise exceptions.InvalidCPUSpec(spec=spec) + # Add available CPU ids to set + if not reject: + cpuset_ids |= set(range(start, end + 1)) + else: + cpuset_reject_ids |= set(range(start, end + 1)) + elif rule[0] == '^': + # Not a range, the rule is an exclusion rule; convert to int + try: + cpuset_reject_ids.add(int(rule[1:].strip())) + except ValueError: + raise exceptions.InvalidCPUSpec(spec=spec) + else: + # OK, a single CPU to include; convert to int + try: + cpuset_ids.add(int(rule)) + except ValueError: + raise exceptions.InvalidCPUSpec(spec=spec) + + # Use sets to handle the exclusion rules for us + cpuset_ids -= cpuset_reject_ids + + return cpuset_ids