Source code for ceilometer.compute.discovery

#
# Copyright 2014 Red Hat, Inc
#
# Licensed under the Apache License, Version 2.0 (the "License"); you may
# not use this file except in compliance with the License. You may obtain
# a copy of the License at
#
#      http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
# License for the specific language governing permissions and limitations
# under the License.

import hashlib
from lxml import etree
import operator

import cachetools
from novaclient import exceptions
from oslo_config import cfg
from oslo_log import log
from oslo_utils import timeutils


try:
    import libvirt
except ImportError:
    libvirt = None

from ceilometer.agent import plugin_base
from ceilometer.compute.virt.libvirt import utils as libvirt_utils
from ceilometer import nova_client

OPTS = [
    cfg.BoolOpt('workload_partitioning',
                default=False,
                deprecated_for_removal=True,
                help='Enable work-load partitioning, allowing multiple '
                     'compute agents to be run simultaneously. '
                     '(replaced by instance_discovery_method)'),
    cfg.StrOpt('instance_discovery_method',
               default='libvirt_metadata',
               choices=['naive', 'workload_partitioning', 'libvirt_metadata'],
               help="Ceilometer offers many methods to discover the instance "
                    "running on a compute node: \n"
                    "* naive: poll nova to get all instances\n"
                    "* workload_partitioning: poll nova to get instances of "
                    "the compute\n"
                    "* libvirt_metadata: get instances from libvirt metadata "
                    "  but without instance metadata (recommended for Gnocchi "
                    "  backend"),
    cfg.IntOpt('resource_update_interval',
               default=0,
               min=0,
               help="New instances will be discovered periodically based"
                    " on this option (in seconds). By default, "
                    "the agent discovers instances according to pipeline "
                    "polling interval. If option is greater than 0, "
                    "the instance list to poll will be updated based "
                    "on this option's interval. Measurements relating "
                    "to the instances will match intervals "
                    "defined in pipeline. "),
    cfg.IntOpt('resource_cache_expiry',
               default=3600,
               min=0,
               help="The expiry to totally refresh the instances resource "
                    "cache, since the instance may be migrated to another "
                    "host, we need to clean the legacy instances info in "
                    "local cache by totally refreshing the local cache. "
                    "The minimum should be the value of the config option "
                    "of resource_update_interval. This option is only used "
                    "for agent polling to Nova API, so it will works only "
                    "when 'instance_discovery_method' was set to 'naive'.")
]

LOG = log.getLogger(__name__)


[docs]class NovaLikeServer(object):
    def __init__(self, **kwargs):
        self.id = kwargs.pop('id')
        for k, v in kwargs.items():
            setattr(self, k, v)

    def __repr__(self):
        return '<NovaLikeServer: %s>' % getattr(self, 'name', 'unknown-name')

    def __eq__(self, other):
        return self.id == other.id


[docs]class InstanceDiscovery(plugin_base.DiscoveryBase):
    method = None

    def __init__(self, conf):
        super(InstanceDiscovery, self).__init__(conf)
        if not self.method:
            self.method = conf.compute.instance_discovery_method

            # For backward compatibility
            if self.method == "naive" and conf.compute.workload_partitioning:
                self.method = "workload_partitioning"

        self.nova_cli = nova_client.Client(conf)
        self.expiration_time = conf.compute.resource_update_interval
        self.cache_expiry = conf.compute.resource_cache_expiry
        if self.method == "libvirt_metadata":
            self._connection = None
            # 4096 instances on a compute should be enough :)
            self._flavor_cache = cachetools.LRUCache(4096)
        else:
            self.instances = {}
            self.last_run = None
            self.last_cache_expire = None

    @property
    def connection(self):
        if not self._connection:
            self._connection = libvirt_utils.get_libvirt_connection(self.conf)
        return self._connection

    @connection.setter
    def connection(self, value):
        self._connection = value

[docs]    def discover(self, manager, param=None):
        """Discover resources to monitor."""
        if self.method != "libvirt_metadata":
            return self.discover_nova_polling(manager, param=None)
        else:
            return self.discover_libvirt_polling(manager, param=None)

    @staticmethod
    def _safe_find_int(xml, path):
        elem = xml.find("./%s" % path)
        if elem is not None:
            return int(elem.text)
        return 0

    @cachetools.cachedmethod(operator.attrgetter('_flavor_cache'))
[docs]    def get_flavor_id(self, name):
        try:
            return self.nova_cli.nova_client.flavors.find(name=name).id
        except exceptions.NotFound:
            return None

    @libvirt_utils.retry_on_disconnect
[docs]    def discover_libvirt_polling(self, manager, param=None):
        instances = []
        for domain in self.connection.listAllDomains():
            full_xml = etree.fromstring(domain.XMLDesc())
            os_type_xml = full_xml.find("./os/type")

            xml_string = domain.metadata(
                libvirt.VIR_DOMAIN_METADATA_ELEMENT,
                "http://openstack.org/xmlns/libvirt/nova/1.0")
            metadata_xml = etree.fromstring(xml_string)

            # TODO(sileht): We don't have the flavor ID here So the Gnocchi
            # resource update will fail for compute sample (or put None ?)
            # We currently poll nova to get the flavor ID, but storing the
            # flavor_id doesn't have any sense because the flavor description
            # can change over the time, we should store the detail of the
            # flavor. this is why nova doesn't put the id in the libvirt
            # metadata

            # This implements
            flavor_xml = metadata_xml.find("./flavor")
            flavor = {
                "id": self.get_flavor_id(flavor_xml.attrib["name"]),
                "name": flavor_xml.attrib["name"],
                "vcpus": self._safe_find_int(flavor_xml, "vcpus"),
                "ram": self._safe_find_int(flavor_xml, "memory"),
                "disk": self._safe_find_int(flavor_xml, "disk"),
                "ephemeral": self._safe_find_int(flavor_xml, "ephemeral"),
                "swap": self._safe_find_int(flavor_xml, "swap"),
            }
            dom_state = domain.state()[0]
            vm_state = libvirt_utils.LIBVIRT_POWER_STATE.get(dom_state)
            status = libvirt_utils.LIBVIRT_STATUS.get(dom_state)

            user_id = metadata_xml.find("./owner/user").attrib["uuid"]
            project_id = metadata_xml.find("./owner/project").attrib["uuid"]

            # From:
            # https://github.com/openstack/nova/blob/852f40fd0c6e9d8878212ff3120556668023f1c4/nova/api/openstack/compute/views/servers.py#L214-L220
            host_id = hashlib.sha224(
                (project_id + self.conf.host).encode('utf-8')).hexdigest()

            # The image description is partial, but Gnocchi only care about the
            # id, so we are fine
            image_xml = metadata_xml.find("./root[@type='image']")
            image = ({'id': image_xml.attrib['uuid']}
                     if image_xml is not None else None)

            instance_data = {
                "id": domain.UUIDString(),
                "name": metadata_xml.find("./name").text,
                "flavor": flavor,
                "image": image,
                "os_type": os_type_xml.text,
                "architecture": os_type_xml.attrib["arch"],

                "OS-EXT-SRV-ATTR:instance_name": domain.name(),
                "OS-EXT-SRV-ATTR:host": self.conf.host,
                "OS-EXT-STS:vm_state": vm_state,

                "tenant_id": project_id,
                "user_id": user_id,

                "hostId": host_id,
                "status": status,

                # NOTE(sileht): Other fields that Ceilometer tracks
                # where we can't get the value here, but their are
                # retreived by notification
                "metadata": {},
                # "OS-EXT-STS:task_state"
                # 'reservation_id',
                # 'OS-EXT-AZ:availability_zone',
                # 'kernel_id',
                # 'ramdisk_id',
                # some image detail
            }

            LOG.debug("instance data: %s", instance_data)
            instances.append(NovaLikeServer(**instance_data))
        return instances

[docs]    def discover_nova_polling(self, manager, param=None):
        secs_from_last_update = 0
        utc_now = timeutils.utcnow(True)
        secs_from_last_expire = 0
        if self.last_run:
            secs_from_last_update = timeutils.delta_seconds(
                self.last_run, utc_now)
        if self.last_cache_expire:
            secs_from_last_expire = timeutils.delta_seconds(
                self.last_cache_expire, utc_now)

        instances = []
        # NOTE(ityaptin) we update make a nova request only if
        # it's a first discovery or resources expired
        if not self.last_run or secs_from_last_update >= self.expiration_time:
            try:
                if secs_from_last_expire < self.cache_expiry and self.last_run:
                    since = self.last_run.isoformat()
                else:
                    since = None
                    self.instances.clear()
                    self.last_cache_expire = utc_now
                instances = self.nova_cli.instance_get_all_by_host(
                    self.conf.host, since)
                self.last_run = utc_now
            except Exception:
                # NOTE(zqfan): instance_get_all_by_host is wrapped and will log
                # exception when there is any error. It is no need to raise it
                # again and print one more time.
                return []

        for instance in instances:
            if getattr(instance, 'OS-EXT-STS:vm_state', None) in ['deleted',
                                                                  'error']:
                self.instances.pop(instance.id, None)
            else:
                self.instances[instance.id] = instance

        return self.instances.values()

    @property
    def group_id(self):
        if self.method == "workload_partitioning":
            return self.conf.host
        else:
            return None
OpenStack

Source code for ceilometer.compute.discovery

Project Source

OpenStack

Source code for ceilometer.compute.discovery

Project Source

Quick search

Navigation