Skip to content

Commit

Permalink
Add attachment integrity check (#1418)
Browse files Browse the repository at this point in the history
* Add attachment integrity check

* Update baselines
  • Loading branch information
leplatrem authored Apr 19, 2024
1 parent 7660485 commit c0340f2
Show file tree
Hide file tree
Showing 4 changed files with 213 additions and 41 deletions.
49 changes: 13 additions & 36 deletions .bandit.baseline
Original file line number Diff line number Diff line change
@@ -1,14 +1,14 @@
{
"errors": [],
"generated_at": "2023-05-25T13:28:03Z",
"generated_at": "2024-04-18T11:21:08Z",
"metrics": {
"_totals": {
"CONFIDENCE.HIGH": 3,
"CONFIDENCE.HIGH": 2,
"CONFIDENCE.LOW": 0,
"CONFIDENCE.MEDIUM": 0,
"CONFIDENCE.UNDEFINED": 0,
"SEVERITY.HIGH": 0,
"SEVERITY.LOW": 3,
"SEVERITY.LOW": 2,
"SEVERITY.MEDIUM": 0,
"SEVERITY.UNDEFINED": 0,
"loc": 883,
Expand Down Expand Up @@ -42,15 +42,15 @@
"skipped_tests": 0
},
"telescope/app.py": {
"CONFIDENCE.HIGH": 3,
"CONFIDENCE.HIGH": 2,
"CONFIDENCE.LOW": 0,
"CONFIDENCE.MEDIUM": 0,
"CONFIDENCE.UNDEFINED": 0,
"SEVERITY.HIGH": 0,
"SEVERITY.LOW": 3,
"SEVERITY.LOW": 2,
"SEVERITY.MEDIUM": 0,
"SEVERITY.UNDEFINED": 0,
"loc": 380,
"loc": 382,
"nosec": 0,
"skipped_tests": 0
},
Expand All @@ -63,7 +63,7 @@
"SEVERITY.LOW": 0,
"SEVERITY.MEDIUM": 0,
"SEVERITY.UNDEFINED": 0,
"loc": 89,
"loc": 87,
"nosec": 1,
"skipped_tests": 0
},
Expand Down Expand Up @@ -124,35 +124,12 @@
"line_range": [
6
],
"more_info": "https://bandit.readthedocs.io/en/1.7.5/blacklists/blacklist_imports.html#b404-import-subprocess",
"more_info": "https://bandit.readthedocs.io/en/1.7.8/blacklists/blacklist_imports.html#b404-import-subprocess",
"test_id": "B404",
"test_name": "blacklist"
},
{
"code": "218 # Check that `curl` has HTTP2 and HTTP3 for `checks.core.http_versions`\n219 curl_cmd = subprocess.run(\n220 [\"curl\", \"--version\"],\n221 capture_output=True,\n222 )\n223 output = curl_cmd.stdout.strip().decode()\n",
"col_offset": 15,
"end_col_offset": 5,
"filename": "telescope/app.py",
"issue_confidence": "HIGH",
"issue_cwe": {
"id": 78,
"link": "https://cwe.mitre.org/data/definitions/78.html"
},
"issue_severity": "LOW",
"issue_text": "Starting a process with a partial executable path",
"line_number": 219,
"line_range": [
219,
220,
221,
222
],
"more_info": "https://bandit.readthedocs.io/en/1.7.5/plugins/b607_start_process_with_partial_path.html",
"test_id": "B607",
"test_name": "start_process_with_partial_path"
},
{
"code": "218 # Check that `curl` has HTTP2 and HTTP3 for `checks.core.http_versions`\n219 curl_cmd = subprocess.run(\n220 [\"curl\", \"--version\"],\n221 capture_output=True,\n222 )\n223 output = curl_cmd.stdout.strip().decode()\n",
"code": "219 # Check that `curl` has HTTP2 and HTTP3 for `checks.core.http_versions`\n220 curl_cmd = subprocess.run(\n221 [config.CURL_BINARY_PATH, \"--version\"],\n222 capture_output=True,\n223 )\n224 output = curl_cmd.stdout.strip().decode()\n",
"col_offset": 15,
"end_col_offset": 5,
"filename": "telescope/app.py",
Expand All @@ -163,14 +140,14 @@
},
"issue_severity": "LOW",
"issue_text": "subprocess call - check for execution of untrusted input.",
"line_number": 219,
"line_number": 220,
"line_range": [
219,
220,
221,
222
222,
223
],
"more_info": "https://bandit.readthedocs.io/en/1.7.5/plugins/b603_subprocess_without_shell_equals_true.html",
"more_info": "https://bandit.readthedocs.io/en/1.7.8/plugins/b603_subprocess_without_shell_equals_true.html",
"test_id": "B603",
"test_name": "subprocess_without_shell_equals_true"
}
Expand Down
22 changes: 17 additions & 5 deletions .secrets.baseline
Original file line number Diff line number Diff line change
Expand Up @@ -75,10 +75,6 @@
{
"path": "detect_secrets.filters.allowlist.is_line_allowlisted"
},
{
"path": "detect_secrets.filters.common.is_baseline_file",
"filename": ".secrets.baseline"
},
{
"path": "detect_secrets.filters.common.is_ignored_due_to_verification_policies",
"min_level": 2
Expand Down Expand Up @@ -137,6 +133,22 @@
"line_number": 23
}
],
"tests/checks/remotesettings/test_attachments_integrity.py": [
{
"type": "Hex High Entropy String",
"filename": "tests/checks/remotesettings/test_attachments_integrity.py",
"hashed_secret": "263e1869fb57deeb75844cf85d55f0d03a6019fc",
"is_verified": false,
"line_number": 31
},
{
"type": "Hex High Entropy String",
"filename": "tests/checks/remotesettings/test_attachments_integrity.py",
"hashed_secret": "fdf34abe05071170ce2ec082a18aa762ec454ae3",
"is_verified": false,
"line_number": 39
}
],
"tests/checks/remotesettings/test_public_suffix_list.py": [
{
"type": "Hex High Entropy String",
Expand All @@ -147,5 +159,5 @@
}
]
},
"generated_at": "2023-09-19T13:14:09Z"
"generated_at": "2024-04-18T11:22:16Z"
}
68 changes: 68 additions & 0 deletions checks/remotesettings/attachments_integrity.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,68 @@
"""
Every attachment in every collection has the right size and hash.
The URLs of invalid attachments is returned along with the number of checked records.
"""

import hashlib

import aiohttp

from telescope.typings import CheckResult
from telescope.utils import ClientSession, run_parallel

from .utils import KintoClient


async def test_attachment(session, attachment):
url = attachment["location"]
try:
async with session.get(url) as response:
binary = await response.read()
except aiohttp.client_exceptions.ClientError as exc:
return {"url": url, "error": str(exc)}, False

if (bz := len(binary)) != (az := attachment["size"]):
return {"url": url, "error": f"size differ ({bz}!={az})"}, False

h = hashlib.sha256(binary)
if (bh := h.hexdigest()) != (ah := attachment["hash"]):
return {"url": url, "error": f"hash differ ({bh}!={ah})"}, False

return {}, True


async def run(server: str) -> CheckResult:
client = KintoClient(server_url=server)

info = await client.server_info()
base_url = info["capabilities"]["attachments"]["base_url"]

# Fetch collections records in parallel.
entries = await client.get_monitor_changes()
futures = [
client.get_records(
bucket=entry["bucket"],
collection=entry["collection"],
_expected=entry["last_modified"],
)
for entry in entries
if "preview" not in entry["bucket"]
]
results = await run_parallel(*futures)

# For each record that has an attachment, check the attachment content.
attachments = []
for entry, records in zip(entries, results):
for record in records:
if "attachment" not in record:
continue
attachment = record["attachment"]
attachment["location"] = base_url + attachment["location"]
attachments.append(attachment)

async with ClientSession() as session:
futures = [test_attachment(session, attachment) for attachment in attachments]
results = await run_parallel(*futures)
bad = [result for result, success in results if not success]
return len(bad) == 0, {"bad": bad, "checked": len(attachments)}
115 changes: 115 additions & 0 deletions tests/checks/remotesettings/test_attachments_integrity.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,115 @@
from checks.remotesettings.attachments_integrity import run


RECORDS_URL = "/buckets/{}/collections/{}/records"


async def test_positive(mock_responses, mock_aioresponses):
server_url = "http://fake.local/v1"
mock_responses.get(
server_url + "/",
payload={"capabilities": {"attachments": {"base_url": "http://cdn/"}}},
)
changes_url = server_url + RECORDS_URL.format("monitor", "changes")
mock_responses.get(
changes_url,
payload={
"data": [
{"id": "abc", "bucket": "bid", "collection": "cid", "last_modified": 42}
]
},
)
records_url = server_url + RECORDS_URL.format("bid", "cid") + "?_expected=42"
mock_responses.get(
records_url,
payload={
"data": [
{
"id": "abc",
"attachment": {
"size": 5,
"hash": "ed968e840d10d2d313a870bc131a4e2c311d7ad09bdf32b3418147221f51a6e2",
"location": "file1.jpg",
},
},
{
"id": "efg",
"attachment": {
"size": 10,
"hash": "bf2cb58a68f684d95a3b78ef8f661c9a4e5b09e82cc8f9cc88cce90528caeb27",
"location": "file2.jpg",
},
},
{"id": "ijk"},
]
},
)
mock_aioresponses.get("http://cdn/file1.jpg", body=b"a" * 5)
mock_aioresponses.get("http://cdn/file2.jpg", body=b"a" * 10)

status, data = await run(server_url)

# assert status is True
assert data == {"bad": [], "checked": 2}


async def test_negative(mock_responses, mock_aioresponses):
server_url = "http://fake.local/v1"
mock_responses.get(
server_url + "/",
payload={"capabilities": {"attachments": {"base_url": "http://cdn/"}}},
)
changes_url = server_url + RECORDS_URL.format("monitor", "changes")
mock_responses.get(
changes_url,
payload={
"data": [
{"id": "abc", "bucket": "bid", "collection": "cid", "last_modified": 42}
]
},
)
records_url = server_url + RECORDS_URL.format("bid", "cid") + "?_expected=42"
mock_responses.get(
records_url,
payload={
"data": [
{
"id": "abc",
"attachment": {
"size": 7,
"hash": "ed968e840d10d2d313a870bc131a4e2c311d7ad09bdf32b3418147221f51a6e2",
"location": "file1.jpg",
},
},
{"id": "efg", "attachment": {"location": "missing.jpg"}},
{
"id": "ijk",
"attachment": {"size": 10, "hash": "foo", "location": "file2.jpg"},
},
{"id": "lmn"},
]
},
)
mock_aioresponses.get("http://cdn/file1.jpg", body=b"a" * 5)
mock_aioresponses.get("http://cdn/file2.jpg", body=b"a" * 10)

status, data = await run(server_url)

assert status is False
assert data == {
"bad": [
{
"error": "size differ (5!=7)",
"url": "http://cdn/file1.jpg",
},
{
"error": "Connection refused: GET http://cdn/missing.jpg",
"url": "http://cdn/missing.jpg",
},
{
"error": "hash differ (bf2cb58a68f684d95a3b78ef8f661c9a4e5b09e82cc8f9cc88cce90528caeb27!=foo)",
"url": "http://cdn/file2.jpg",
},
],
"checked": 3,
}

0 comments on commit c0340f2

Please sign in to comment.