diff --git a/checks/remotesettings/attachments_bundles.py b/checks/remotesettings/attachments_bundles.py new file mode 100644 index 00000000..cf1ee816 --- /dev/null +++ b/checks/remotesettings/attachments_bundles.py @@ -0,0 +1,119 @@ +""" +Verify freshness and validity of attachment bundles. + +For each collection where the attachments bundle is enable, return the modification timestamp and number of attachments bundled. +""" + +import io +import logging +import urllib.parse +import zipfile +from typing import Any + +from telescope.typings import CheckResult +from telescope.utils import ( + ClientSession, + retry_decorator, + run_parallel, + utcfromhttpdate, + utcfromtimestamp, +) + +from .utils import KintoClient, fetch_signed_resources + + +EXPOSED_PARAMETERS = ["server"] + +logger = logging.getLogger(__name__) + + +@retry_decorator +async def fetch_binary(url: str, **kwargs) -> tuple[int, str, bytes]: + human_url = urllib.parse.unquote(url) + logger.debug(f"Fetch binary from '{human_url}'") + async with ClientSession() as session: + async with session.get(url, **kwargs) as response: + return ( + response.status, + response.headers.get("Last-Modified", "Mon, 01 Jan 1970 00:00:00 GMT"), + await response.read(), + ) + + +async def run( + server: str, auth: str, margin_publication_hours: int = 12 +) -> CheckResult: + client = KintoClient(server_url=server, auth=auth) + resources = await fetch_signed_resources(server, auth) + + logger.debug("Fetch metadata of %s collections", len(resources)) + futures = [ + client.get_collection( + bucket=resource["source"]["bucket"], + id=resource["source"]["collection"], + ) + for resource in resources + ] + sources_metadata = await run_parallel(*futures) + resources_sources_metadata = zip(resources, sources_metadata) + + metadata_for_bundled = [ + (r, m) + for r, m in resources_sources_metadata + if m["data"].get("attachment", {}).get("bundle", False) + ] + logger.info("%s collections with attachments bundle", len(metadata_for_bundled)) + assert metadata_for_bundled, metadata_for_bundled + records_timestamps = [ + resource["last_modified"] for resource, _ in metadata_for_bundled + ] + + info = await client.server_info() + base_url = info["capabilities"]["attachments"]["base_url"] + + futures_bundles = [] + for resource, metadata in metadata_for_bundled: + bid = resource["destination"]["bucket"] + cid = metadata["data"]["id"] + url = f"{base_url}bundles/{bid}--{cid}.zip" + futures_bundles.append(fetch_binary(url)) + bundles = await run_parallel(*futures_bundles) + + timestamps_metadata_bundles = zip(records_timestamps, metadata_for_bundled, bundles) + + result: dict[str, dict[str, Any]] = {} + success = True + for timestamp, (resource, metadata), bundle in timestamps_metadata_bundles: + http_status, modified, binary = bundle + bid = resource["destination"]["bucket"] + cid = metadata["data"]["id"] + if http_status >= 400: + result[f"{bid}/{cid}"] = {"status": "missing"} + success = False + continue + + try: + z = zipfile.ZipFile(io.BytesIO(binary)) + nfiles = len(z.namelist()) + except zipfile.BadZipFile: + result[f"{bid}/{cid}"] = {"status": "bad zip"} + success = False + continue + + bundle_ts = utcfromhttpdate(modified) + records_ts = utcfromtimestamp(timestamp) + status = ( + "outdated" + if ((records_ts - bundle_ts).total_seconds() / 3600) + > margin_publication_hours + else "ok" + ) + result[f"{bid}/{cid}"] = { + "status": status, + "size": len(binary), + "attachments": nfiles, + "publication_timestamp": bundle_ts.isoformat(), + "collection_timestamp": records_ts.isoformat(), + } + + return success, result diff --git a/telescope/utils.py b/telescope/utils.py index 0e1b1dc1..b170abcc 100644 --- a/telescope/utils.py +++ b/telescope/utils.py @@ -1,4 +1,5 @@ import asyncio +import email.utils import json import logging import textwrap @@ -164,6 +165,10 @@ def utcfromisoformat(iso8601): return datetime.fromisoformat(iso8601_tz).replace(tzinfo=timezone.utc) +def utcfromhttpdate(httpdate): + return email.utils.parsedate_to_datetime(httpdate).replace(tzinfo=timezone.utc) + + def render_checks(func): async def wrapper(request): # First, check that client requests supported output format. diff --git a/tests/checks/remotesettings/test_attachments_bundles.py b/tests/checks/remotesettings/test_attachments_bundles.py new file mode 100644 index 00000000..6241f177 --- /dev/null +++ b/tests/checks/remotesettings/test_attachments_bundles.py @@ -0,0 +1,150 @@ +import io +import zipfile + +from checks.remotesettings.attachments_bundles import run + + +COLLECTION_URL = "/buckets/{}/collections/{}" +RECORDS_URL = "/buckets/{}/collections/{}/records" +CHANGESET_URL = "/buckets/{}/collections/{}/changeset" + + +def build_zip(num_files=3): + zip_buffer = io.BytesIO() + with zipfile.ZipFile(zip_buffer, "w", zipfile.ZIP_DEFLATED) as zip_file: + for i in range(num_files): + file_name = f"fake_file_{i}.txt" + zip_file.writestr(file_name, 1024 * "x") + return zip_buffer.getvalue() + + +async def test_negative(mock_responses, mock_aioresponses): + server_url = "http://fake.local/v1" + mock_responses.get( + server_url + "/", + payload={ + "capabilities": { + "attachments": {"base_url": "http://cdn/"}, + "signer": { + "resources": [ + { + "source": {"bucket": "main-workspace", "collection": None}, + "preview": {"bucket": "main-preview", "collection": None}, + "destination": {"bucket": "main", "collection": None}, + } + ] + }, + } + }, + ) + may8_ts = 389664061000 + may8_http = "Mon, 08 May 1982 00:01:01 GMT" + may8_iso = "1982-05-08T00:01:01+00:00" + + changes_url = server_url + RECORDS_URL.format("monitor", "changes") + mock_responses.get( + changes_url, + payload={ + "data": [ + { + "id": "abc", + "bucket": "main", + "collection": "missing", + "last_modified": may8_ts, + }, + { + "id": "efg", + "bucket": "main", + "collection": "ok", + "last_modified": may8_ts, + }, + { + "id": "hij", + "bucket": "main", + "collection": "badzip", + "last_modified": may8_ts, + }, + { + "id": "klm", + "bucket": "main", + "collection": "outdated", + "last_modified": may8_ts + 24 * 3600 * 1000 + 60 * 1000, + }, + { + "id": "nop", + "bucket": "main", + "collection": "late", + "last_modified": may8_ts + 600 * 1000, + }, + { + "id": "qrs", + "bucket": "main", + "collection": "no-bundle", + "last_modified": may8_ts, + }, + ] + }, + ) + + for cid in ("missing", "ok", "badzip", "outdated", "late", "no-bundle"): + mock_responses.get( + server_url + COLLECTION_URL.format("main-workspace", cid), + payload={ + "data": { + "id": cid, + "bucket": "main-workspace", + "attachment": {"bundle": cid != "no-bundle"}, + } + }, + ) + + mock_aioresponses.get("http://cdn/bundles/main--missing.zip", status=404) + mock_aioresponses.get( + "http://cdn/bundles/main--ok.zip", + body=build_zip(), + headers={"Last-Modified": may8_http}, + ) + mock_aioresponses.get( + "http://cdn/bundles/main--outdated.zip", + body=build_zip(num_files=6), + headers={"Last-Modified": may8_http}, + ) + mock_aioresponses.get( + "http://cdn/bundles/main--late.zip", + body=build_zip(num_files=6), + headers={"Last-Modified": may8_http}, + ) + mock_aioresponses.get( + "http://cdn/bundles/main--badzip.zip", + body=b"boom", + headers={"Last-Modified": may8_http}, + ) + + status, data = await run(server_url, auth="") + + assert status is False + assert data == { + "main/badzip": {"status": "bad zip"}, + "main/missing": {"status": "missing"}, + "main/ok": { + "status": "ok", + "attachments": 3, + "collection_timestamp": "1982-05-08T00:01:01+00:00", + "publication_timestamp": may8_iso, + "size": 373, + }, + "main/late": { + "status": "ok", + "attachments": 6, + "collection_timestamp": "1982-05-08T00:11:01+00:00", + "publication_timestamp": may8_iso, + "size": 724, + }, + "main/outdated": { + "attachments": 6, + "collection_timestamp": "1982-05-09T00:02:01+00:00", + "publication_timestamp": may8_iso, + "size": 724, + "status": "outdated", + }, + }