commit 9a0bac4ceb15ba0e6b24d0cd021972341e97e60b Author: Tim Burke Date: Mon Sep 28 09:34:58 2020 -0700 Fail short reads in SegmentedIterable ...and check for it *before* doing the MD5 check. We see this happen ocassionally, but as best I can tell, it's always due to a ChunkReadTimeout popping in the proxy that it can't recover from. Change-Id: If238725bbec4fc3f6c8d000599c735a7c4972f7d diff --git a/swift/common/request_helpers.py b/swift/common/request_helpers.py index 000752e..f79b47f 100644 --- a/swift/common/request_helpers.py +++ b/swift/common/request_helpers.py @@ -597,6 +597,7 @@ class SegmentedIterable(object): else: self.current_resp = seg_resp + resp_len = 0 seg_hash = None if seg_resp.etag and not seg_req.headers.get('Range'): # Only calculate the MD5 if it we can use it to validate @@ -609,15 +610,26 @@ class SegmentedIterable(object): for chunk in itertools.chain.from_iterable(document_iters): if seg_hash: seg_hash.update(chunk) + resp_len += len(chunk) yield (seg_req.path, chunk) close_if_possible(seg_resp.app_iter) - if seg_hash and seg_hash.hexdigest() != seg_resp.etag: - raise SegmentError( - "Bad MD5 checksum in %(name)s for %(seg)s: headers had" - " %(etag)s, but object MD5 was actually %(actual)s" % - {'seg': seg_req.path, 'etag': seg_resp.etag, - 'name': self.name, 'actual': seg_hash.hexdigest()}) + if seg_hash: + if resp_len != seg_resp.content_length: + raise SegmentError( + "Bad response length for %(seg)s as part of %(name)s: " + "headers had %(from_headers)s, but response length " + "was actually %(actual)s" % + {'seg': seg_req.path, + 'from_headers': seg_resp.content_length, + 'name': self.name, 'actual': resp_len}) + if seg_hash.hexdigest() != seg_resp.etag: + raise SegmentError( + "Bad MD5 checksum for %(seg)s as part of %(name)s: " + "headers had %(etag)s, but object MD5 was actually " + "%(actual)s" % + {'seg': seg_req.path, 'etag': seg_resp.etag, + 'name': self.name, 'actual': seg_hash.hexdigest()}) def _byte_counting_iter(self): # Checks that we give the client the right number of bytes. Raises diff --git a/test/unit/common/middleware/test_dlo.py b/test/unit/common/middleware/test_dlo.py index d8f6d6a..24bdd72 100644 --- a/test/unit/common/middleware/test_dlo.py +++ b/test/unit/common/middleware/test_dlo.py @@ -663,7 +663,7 @@ class TestDloGetManifest(DloTestCase): self.app.register( 'GET', '/v1/AUTH_test/c/seg_02', swob.HTTPOk, {'Content-Length': '5', 'Etag': md5hex("bbbbb")}, - 'bbWRONGbb') + 'WRONG') req = swob.Request.blank('/v1/AUTH_test/mancon/manifest', environ={'REQUEST_METHOD': 'GET'}) @@ -671,8 +671,35 @@ class TestDloGetManifest(DloTestCase): headers = HeaderKeyDict(headers) self.assertEqual(status, "200 OK") + self.assertEqual(headers['Content-Length'], "25") # stop after error - self.assertEqual(body, b"aaaaabbWRONGbb") + self.assertEqual(body, b"aaaaaWRONG") + log_lines = self.dlo.logger.get_lines_for_level('error') + self.assertEqual(len(log_lines), 1, + 'Expected one log line, got %r' % log_lines) + self.assertEqual(log_lines[0][:21], 'Bad MD5 checksum for ') + + def test_mismatched_length_fetching_second_segment(self): + self.app.register( + 'GET', '/v1/AUTH_test/c/seg_02', + swob.HTTPOk, {'Content-Length': '5', 'Etag': md5hex("bbbb")}, + # Use a list so we can get a discrepency between content-length and + # number of bytes in the app_iter + [b'b' * 4]) + + req = swob.Request.blank('/v1/AUTH_test/mancon/manifest', + environ={'REQUEST_METHOD': 'GET'}) + status, headers, body = self.call_dlo(req) + headers = HeaderKeyDict(headers) + + self.assertEqual(status, "200 OK") + self.assertEqual(headers['Content-Length'], "25") + # stop after error + self.assertEqual(body, b"aaaaabbbb") + log_lines = self.dlo.logger.get_lines_for_level('error') + self.assertEqual(len(log_lines), 1, + 'Expected one log line, got %r' % log_lines) + self.assertEqual(log_lines[0][:24], 'Bad response length for ') def test_etag_comparison_ignores_quotes(self): # a little future-proofing here in case we ever fix this in swob diff --git a/test/unit/common/middleware/test_slo.py b/test/unit/common/middleware/test_slo.py index a9343cf..d6b6d1f 100644 --- a/test/unit/common/middleware/test_slo.py +++ b/test/unit/common/middleware/test_slo.py @@ -3398,6 +3398,67 @@ class TestSloGetManifest(SloTestCase): '82136b4240d6ce4ea7d03e51469a393b or 10 != 999999.' ]) + def test_mismatched_checksum(self): + self.app.register( + 'GET', '/v1/AUTH_test/gettest/a_5', + swob.HTTPOk, {'Content-Length': '5', + 'Etag': md5hex('a' * 5)}, + # this segment has invalid content + 'x' * 5) + + self.app.register( + 'GET', '/v1/AUTH_test/gettest/manifest', + swob.HTTPOk, {'Content-Type': 'application/json', + 'X-Static-Large-Object': 'true'}, + json.dumps([{'name': '/gettest/b_10', 'hash': md5hex('b' * 10), + 'content_type': 'text/plain', 'bytes': '10'}, + {'name': '/gettest/a_5', 'hash': md5hex('a' * 5), + 'content_type': 'text/plain', 'bytes': '5'}, + {'name': '/gettest/c_15', 'hash': md5hex('c' * 15), + 'content_type': 'text/plain', 'bytes': '15'}])) + + req = Request.blank('/v1/AUTH_test/gettest/manifest') + status, headers, body = self.call_slo(req) + + self.assertEqual('200 OK', status) + self.assertEqual(body, (b'b' * 10 + b'x' * 5)) + self.assertEqual(self.slo.logger.get_lines_for_level('error'), [ + 'Bad MD5 checksum for /v1/AUTH_test/gettest/a_5 as part of ' + '/v1/AUTH_test/gettest/manifest: headers had ' + '594f803b380a41396ed63dca39503542, but object MD5 was ' + 'actually fb0e22c79ac75679e9881e6ba183b354', + ]) + + def test_mismatched_length(self): + self.app.register( + 'GET', '/v1/AUTH_test/gettest/a_5', + swob.HTTPOk, {'Content-Length': '5', + 'Etag': md5hex('a' * 5)}, + # this segment comes up short + [b'a' * 4]) + + self.app.register( + 'GET', '/v1/AUTH_test/gettest/manifest', + swob.HTTPOk, {'Content-Type': 'application/json', + 'X-Static-Large-Object': 'true'}, + json.dumps([{'name': '/gettest/b_10', 'hash': md5hex('b' * 10), + 'content_type': 'text/plain', 'bytes': '10'}, + {'name': '/gettest/a_5', 'hash': md5hex('a' * 5), + 'content_type': 'text/plain', 'bytes': '5'}, + {'name': '/gettest/c_15', 'hash': md5hex('c' * 15), + 'content_type': 'text/plain', 'bytes': '15'}])) + + req = Request.blank('/v1/AUTH_test/gettest/manifest') + status, headers, body = self.call_slo(req) + + self.assertEqual('200 OK', status) + self.assertEqual(body, (b'b' * 10 + b'a' * 4)) + self.assertEqual(self.slo.logger.get_lines_for_level('error'), [ + 'Bad response length for /v1/AUTH_test/gettest/a_5 as part of ' + '/v1/AUTH_test/gettest/manifest: headers had 5, but ' + 'response length was actually 4', + ]) + def test_first_segment_mismatched_etag(self): self.app.register('GET', '/v1/AUTH_test/gettest/manifest-badetag', swob.HTTPOk, {'Content-Type': 'application/json',