From c0340f2918c7906e31370407a57a87f207e1c730 Mon Sep 17 00:00:00 2001 From: Mathieu Leplatre Date: Fri, 19 Apr 2024 10:55:41 +0200 Subject: [PATCH] Add attachment integrity check (#1418) * Add attachment integrity check * Update baselines --- .bandit.baseline | 49 ++------ .secrets.baseline | 22 +++- .../remotesettings/attachments_integrity.py | 68 +++++++++++ .../test_attachments_integrity.py | 115 ++++++++++++++++++ 4 files changed, 213 insertions(+), 41 deletions(-) create mode 100644 checks/remotesettings/attachments_integrity.py create mode 100644 tests/checks/remotesettings/test_attachments_integrity.py diff --git a/.bandit.baseline b/.bandit.baseline index ec97d74a..6fea09fe 100644 --- a/.bandit.baseline +++ b/.bandit.baseline @@ -1,14 +1,14 @@ { "errors": [], - "generated_at": "2023-05-25T13:28:03Z", + "generated_at": "2024-04-18T11:21:08Z", "metrics": { "_totals": { - "CONFIDENCE.HIGH": 3, + "CONFIDENCE.HIGH": 2, "CONFIDENCE.LOW": 0, "CONFIDENCE.MEDIUM": 0, "CONFIDENCE.UNDEFINED": 0, "SEVERITY.HIGH": 0, - "SEVERITY.LOW": 3, + "SEVERITY.LOW": 2, "SEVERITY.MEDIUM": 0, "SEVERITY.UNDEFINED": 0, "loc": 883, @@ -42,15 +42,15 @@ "skipped_tests": 0 }, "telescope/app.py": { - "CONFIDENCE.HIGH": 3, + "CONFIDENCE.HIGH": 2, "CONFIDENCE.LOW": 0, "CONFIDENCE.MEDIUM": 0, "CONFIDENCE.UNDEFINED": 0, "SEVERITY.HIGH": 0, - "SEVERITY.LOW": 3, + "SEVERITY.LOW": 2, "SEVERITY.MEDIUM": 0, "SEVERITY.UNDEFINED": 0, - "loc": 380, + "loc": 382, "nosec": 0, "skipped_tests": 0 }, @@ -63,7 +63,7 @@ "SEVERITY.LOW": 0, "SEVERITY.MEDIUM": 0, "SEVERITY.UNDEFINED": 0, - "loc": 89, + "loc": 87, "nosec": 1, "skipped_tests": 0 }, @@ -124,35 +124,12 @@ "line_range": [ 6 ], - "more_info": "https://bandit.readthedocs.io/en/1.7.5/blacklists/blacklist_imports.html#b404-import-subprocess", + "more_info": "https://bandit.readthedocs.io/en/1.7.8/blacklists/blacklist_imports.html#b404-import-subprocess", "test_id": "B404", "test_name": "blacklist" }, { - "code": "218 # Check that `curl` has HTTP2 and HTTP3 for `checks.core.http_versions`\n219 curl_cmd = subprocess.run(\n220 [\"curl\", \"--version\"],\n221 capture_output=True,\n222 )\n223 output = curl_cmd.stdout.strip().decode()\n", - "col_offset": 15, - "end_col_offset": 5, - "filename": "telescope/app.py", - "issue_confidence": "HIGH", - "issue_cwe": { - "id": 78, - "link": "https://cwe.mitre.org/data/definitions/78.html" - }, - "issue_severity": "LOW", - "issue_text": "Starting a process with a partial executable path", - "line_number": 219, - "line_range": [ - 219, - 220, - 221, - 222 - ], - "more_info": "https://bandit.readthedocs.io/en/1.7.5/plugins/b607_start_process_with_partial_path.html", - "test_id": "B607", - "test_name": "start_process_with_partial_path" - }, - { - "code": "218 # Check that `curl` has HTTP2 and HTTP3 for `checks.core.http_versions`\n219 curl_cmd = subprocess.run(\n220 [\"curl\", \"--version\"],\n221 capture_output=True,\n222 )\n223 output = curl_cmd.stdout.strip().decode()\n", + "code": "219 # Check that `curl` has HTTP2 and HTTP3 for `checks.core.http_versions`\n220 curl_cmd = subprocess.run(\n221 [config.CURL_BINARY_PATH, \"--version\"],\n222 capture_output=True,\n223 )\n224 output = curl_cmd.stdout.strip().decode()\n", "col_offset": 15, "end_col_offset": 5, "filename": "telescope/app.py", @@ -163,14 +140,14 @@ }, "issue_severity": "LOW", "issue_text": "subprocess call - check for execution of untrusted input.", - "line_number": 219, + "line_number": 220, "line_range": [ - 219, 220, 221, - 222 + 222, + 223 ], - "more_info": "https://bandit.readthedocs.io/en/1.7.5/plugins/b603_subprocess_without_shell_equals_true.html", + "more_info": "https://bandit.readthedocs.io/en/1.7.8/plugins/b603_subprocess_without_shell_equals_true.html", "test_id": "B603", "test_name": "subprocess_without_shell_equals_true" } diff --git a/.secrets.baseline b/.secrets.baseline index 46f17034..5e9b514c 100644 --- a/.secrets.baseline +++ b/.secrets.baseline @@ -75,10 +75,6 @@ { "path": "detect_secrets.filters.allowlist.is_line_allowlisted" }, - { - "path": "detect_secrets.filters.common.is_baseline_file", - "filename": ".secrets.baseline" - }, { "path": "detect_secrets.filters.common.is_ignored_due_to_verification_policies", "min_level": 2 @@ -137,6 +133,22 @@ "line_number": 23 } ], + "tests/checks/remotesettings/test_attachments_integrity.py": [ + { + "type": "Hex High Entropy String", + "filename": "tests/checks/remotesettings/test_attachments_integrity.py", + "hashed_secret": "263e1869fb57deeb75844cf85d55f0d03a6019fc", + "is_verified": false, + "line_number": 31 + }, + { + "type": "Hex High Entropy String", + "filename": "tests/checks/remotesettings/test_attachments_integrity.py", + "hashed_secret": "fdf34abe05071170ce2ec082a18aa762ec454ae3", + "is_verified": false, + "line_number": 39 + } + ], "tests/checks/remotesettings/test_public_suffix_list.py": [ { "type": "Hex High Entropy String", @@ -147,5 +159,5 @@ } ] }, - "generated_at": "2023-09-19T13:14:09Z" + "generated_at": "2024-04-18T11:22:16Z" } diff --git a/checks/remotesettings/attachments_integrity.py b/checks/remotesettings/attachments_integrity.py new file mode 100644 index 00000000..a8ec2352 --- /dev/null +++ b/checks/remotesettings/attachments_integrity.py @@ -0,0 +1,68 @@ +""" +Every attachment in every collection has the right size and hash. + +The URLs of invalid attachments is returned along with the number of checked records. +""" + +import hashlib + +import aiohttp + +from telescope.typings import CheckResult +from telescope.utils import ClientSession, run_parallel + +from .utils import KintoClient + + +async def test_attachment(session, attachment): + url = attachment["location"] + try: + async with session.get(url) as response: + binary = await response.read() + except aiohttp.client_exceptions.ClientError as exc: + return {"url": url, "error": str(exc)}, False + + if (bz := len(binary)) != (az := attachment["size"]): + return {"url": url, "error": f"size differ ({bz}!={az})"}, False + + h = hashlib.sha256(binary) + if (bh := h.hexdigest()) != (ah := attachment["hash"]): + return {"url": url, "error": f"hash differ ({bh}!={ah})"}, False + + return {}, True + + +async def run(server: str) -> CheckResult: + client = KintoClient(server_url=server) + + info = await client.server_info() + base_url = info["capabilities"]["attachments"]["base_url"] + + # Fetch collections records in parallel. + entries = await client.get_monitor_changes() + futures = [ + client.get_records( + bucket=entry["bucket"], + collection=entry["collection"], + _expected=entry["last_modified"], + ) + for entry in entries + if "preview" not in entry["bucket"] + ] + results = await run_parallel(*futures) + + # For each record that has an attachment, check the attachment content. + attachments = [] + for entry, records in zip(entries, results): + for record in records: + if "attachment" not in record: + continue + attachment = record["attachment"] + attachment["location"] = base_url + attachment["location"] + attachments.append(attachment) + + async with ClientSession() as session: + futures = [test_attachment(session, attachment) for attachment in attachments] + results = await run_parallel(*futures) + bad = [result for result, success in results if not success] + return len(bad) == 0, {"bad": bad, "checked": len(attachments)} diff --git a/tests/checks/remotesettings/test_attachments_integrity.py b/tests/checks/remotesettings/test_attachments_integrity.py new file mode 100644 index 00000000..f629324f --- /dev/null +++ b/tests/checks/remotesettings/test_attachments_integrity.py @@ -0,0 +1,115 @@ +from checks.remotesettings.attachments_integrity import run + + +RECORDS_URL = "/buckets/{}/collections/{}/records" + + +async def test_positive(mock_responses, mock_aioresponses): + server_url = "http://fake.local/v1" + mock_responses.get( + server_url + "/", + payload={"capabilities": {"attachments": {"base_url": "http://cdn/"}}}, + ) + changes_url = server_url + RECORDS_URL.format("monitor", "changes") + mock_responses.get( + changes_url, + payload={ + "data": [ + {"id": "abc", "bucket": "bid", "collection": "cid", "last_modified": 42} + ] + }, + ) + records_url = server_url + RECORDS_URL.format("bid", "cid") + "?_expected=42" + mock_responses.get( + records_url, + payload={ + "data": [ + { + "id": "abc", + "attachment": { + "size": 5, + "hash": "ed968e840d10d2d313a870bc131a4e2c311d7ad09bdf32b3418147221f51a6e2", + "location": "file1.jpg", + }, + }, + { + "id": "efg", + "attachment": { + "size": 10, + "hash": "bf2cb58a68f684d95a3b78ef8f661c9a4e5b09e82cc8f9cc88cce90528caeb27", + "location": "file2.jpg", + }, + }, + {"id": "ijk"}, + ] + }, + ) + mock_aioresponses.get("http://cdn/file1.jpg", body=b"a" * 5) + mock_aioresponses.get("http://cdn/file2.jpg", body=b"a" * 10) + + status, data = await run(server_url) + + # assert status is True + assert data == {"bad": [], "checked": 2} + + +async def test_negative(mock_responses, mock_aioresponses): + server_url = "http://fake.local/v1" + mock_responses.get( + server_url + "/", + payload={"capabilities": {"attachments": {"base_url": "http://cdn/"}}}, + ) + changes_url = server_url + RECORDS_URL.format("monitor", "changes") + mock_responses.get( + changes_url, + payload={ + "data": [ + {"id": "abc", "bucket": "bid", "collection": "cid", "last_modified": 42} + ] + }, + ) + records_url = server_url + RECORDS_URL.format("bid", "cid") + "?_expected=42" + mock_responses.get( + records_url, + payload={ + "data": [ + { + "id": "abc", + "attachment": { + "size": 7, + "hash": "ed968e840d10d2d313a870bc131a4e2c311d7ad09bdf32b3418147221f51a6e2", + "location": "file1.jpg", + }, + }, + {"id": "efg", "attachment": {"location": "missing.jpg"}}, + { + "id": "ijk", + "attachment": {"size": 10, "hash": "foo", "location": "file2.jpg"}, + }, + {"id": "lmn"}, + ] + }, + ) + mock_aioresponses.get("http://cdn/file1.jpg", body=b"a" * 5) + mock_aioresponses.get("http://cdn/file2.jpg", body=b"a" * 10) + + status, data = await run(server_url) + + assert status is False + assert data == { + "bad": [ + { + "error": "size differ (5!=7)", + "url": "http://cdn/file1.jpg", + }, + { + "error": "Connection refused: GET http://cdn/missing.jpg", + "url": "http://cdn/missing.jpg", + }, + { + "error": "hash differ (bf2cb58a68f684d95a3b78ef8f661c9a4e5b09e82cc8f9cc88cce90528caeb27!=foo)", + "url": "http://cdn/file2.jpg", + }, + ], + "checked": 3, + }