commit bb27badf7650ffeb54a6e63efef44f9bba5b6258 Author: Julia Kreger Date: Mon Aug 10 15:26:29 2020 -0700 Add basic retries for inspection A transitory connection failure, such as one caused by a port being held down for traffic forwarding, can experience intermittent connectivity failures which result in failed introspections. Now the agent retries. Change-Id: I72c5e3aca000d3854a17f8a461b1a2935e5c0d9b diff --git a/ironic_python_agent/inspector.py b/ironic_python_agent/inspector.py index 7cbddb8..f4076c5 100644 --- a/ironic_python_agent/inspector.py +++ b/ironic_python_agent/inspector.py @@ -24,6 +24,7 @@ from oslo_serialization import jsonutils from oslo_utils import excutils import requests import stevedore +import tenacity from ironic_python_agent import config from ironic_python_agent import encoding @@ -115,6 +116,17 @@ def inspect(): return resp.get('uuid') +@tenacity.retry( + retry=tenacity.retry_if_exception_type( + requests.exceptions.ConnectionError), + stop=tenacity.stop_after_attempt(5), + wait=tenacity.wait_fixed(5), + reraise=True) +def _post_to_inspector(url, data, verify, cert): + return requests.post(CONF.inspection_callback_url, data=data, + verify=verify, cert=cert) + + def call_inspector(data, failures): """Post data to inspector.""" data['error'] = failures.get_error() @@ -127,8 +139,8 @@ def call_inspector(data, failures): data = encoder.encode(data) verify, cert = utils.get_ssl_client_options(CONF) - resp = requests.post(CONF.inspection_callback_url, data=data, - verify=verify, cert=cert) + resp = _post_to_inspector(CONF.inspection_callback_url, data=data, + verify=verify, cert=cert) if resp.status_code >= 400: LOG.error('inspector %s error %d: %s, proceeding with lookup', CONF.inspection_callback_url, diff --git a/ironic_python_agent/tests/unit/test_inspector.py b/ironic_python_agent/tests/unit/test_inspector.py index 900a048..f3f6f13 100644 --- a/ironic_python_agent/tests/unit/test_inspector.py +++ b/ironic_python_agent/tests/unit/test_inspector.py @@ -191,6 +191,15 @@ class TestCallInspector(base.IronicAgentTest): data='{"data": 42, "error": null}') self.assertIsNone(res) + def test_inspector_retries(self, mock_post): + mock_post.side_effect = requests.exceptions.ConnectionError + failures = utils.AccumulatedFailures() + data = collections.OrderedDict(data=42) + self.assertRaises(requests.exceptions.ConnectionError, + inspector.call_inspector, + data, failures) + self.assertEqual(5, mock_post.call_count) + class BaseDiscoverTest(base.IronicAgentTest): def setUp(self): diff --git a/releasenotes/notes/add-inspection-retry-1d385f69607c1452.yaml b/releasenotes/notes/add-inspection-retry-1d385f69607c1452.yaml new file mode 100644 index 0000000..100253e --- /dev/null +++ b/releasenotes/notes/add-inspection-retry-1d385f69607c1452.yaml @@ -0,0 +1,6 @@ +--- +fixes: + - | + Fixes an issue where intermittent or transitory connection issues can cause + inspection to fail. The ramdisk now retries to report to inspector a total + of five times.