From 856076eeb75f4032dbaac8b17cd29fb92f501fb0 Mon Sep 17 00:00:00 2001 From: Jonny Scott Date: Wed, 29 Jan 2025 11:02:37 +0100 Subject: [PATCH 1/8] pld joint accountant --- pfl/privacy/__init__.py | 1 + pfl/privacy/joint_privacy_accountant.py | 329 ++++++++++++++++++ .../privacy/test_joint_privacy_accountant.py | 150 ++++++++ 3 files changed, 480 insertions(+) create mode 100644 pfl/privacy/joint_privacy_accountant.py create mode 100644 tests/privacy/test_joint_privacy_accountant.py diff --git a/pfl/privacy/__init__.py b/pfl/privacy/__init__.py index 1735c14..86d2d7b 100644 --- a/pfl/privacy/__init__.py +++ b/pfl/privacy/__init__.py @@ -1,6 +1,7 @@ # Copyright © 2023-2024 Apple Inc. from .gaussian_mechanism import GaussianMechanism +from .joint_privacy_accountant import JointPLDPrivacyAccountant from .laplace_mechanism import LaplaceMechanism from .privacy_accountant import PLDPrivacyAccountant, PrivacyAccountant, PRVPrivacyAccountant, RDPPrivacyAccountant from .privacy_mechanism import ( diff --git a/pfl/privacy/joint_privacy_accountant.py b/pfl/privacy/joint_privacy_accountant.py new file mode 100644 index 0000000..7ca3a30 --- /dev/null +++ b/pfl/privacy/joint_privacy_accountant.py @@ -0,0 +1,329 @@ +# Copyright © 2023-2024 Apple Inc. +''' +Privacy accountants for differential privacy. +''' + +import math +import time +from dataclasses import dataclass +from typing import Callable, List, Optional, Tuple, TypeVar + +from dp_accounting import dp_event +from dp_accounting.pld import privacy_loss_distribution +from dp_accounting.rdp import rdp_privacy_accountant +from prv_accountant import LaplaceMechanism, PoissonSubsampledGaussianMechanism, PRVAccountant + +from .privacy_accountant import binary_search_function + +MIN_BOUND_NOISE_PARAMETER = 0 +MAX_BOUND_NOISE_PARAMETER = 100 +RTOL_NOISE_PARAMETER = 0.001 +CONFIDENCE_THRESHOLD_NOISE_PARAMETER = 1e-8 + +MIN_BOUND_EPSILON = 0 +MAX_BOUND_EPSILON = 30 +RTOL_EPSILON = 0.001 +CONFIDENCE_THRESHOLD_EPSILON = 1e-8 + + +@dataclass +class JointPrivacyAccountant: + """ + Tracks the privacy loss over multiple composition steps with multiple + mechanisms simultaneously. Either two or three of the variables epsilon, + delta and noise_parameters must be defined. + + If all three are defined a check will be performed to make sure a valid set of + variable values has been provided. + + If two are defined, the remaining variable can be computed. In the case that + epsilon and delta are defined then the budget_proportions parameter + must be provided, specifying what fraction of the total budget each + mechanism is allocated. For budget_proportions = [p_1, p_2, ...] + the noise parameters are then computed as follows. We find large_epsilon + and noise parameters [sigma_1, sigma_2, ...] such that the following two + constraints hold: + 1. For each i, mechanism_i with noise parameter sigma_i is (large_epsilon * p_i, delta) + DP after all composition steps + 2. The composition of all mechanisms over all steps is (epsilon, delta) DP + + :param num_compositions: + Maximum number of compositions to be performed with each mechanism. + :param sampling_probability: + Maximum probability of sampling each entity being privatized. E.g. if + the unit of privacy is one device, this is the probability of each + device participating. + :param mechanisms: + The list of noise mechanisms to be used, each can be either Gaussian or Laplace. + :param epsilon: + The privacy loss random variable. It controls how much the output of + the mechanism can vary between two neighboring databases. + :param delta: + The probability that all privacy will be lost. + :param budget_proportions: + List specifying the proportion of the total (epsilon, delta) privacy budget + each mechanism is allocated. + :param noise_parameters: + The parameters for DP noise for each mechanism. For the Gaussian + mechanism, the noise parameter is the standard deviation of the noise. + For the Laplace mechanism, the noise parameter is the scale of the noise. + :param noise_scale: + A value \\in [0, 1] multiplied with the standard deviation of the noise + to be added for privatization. Typically used to experiment with lower + sampling probabilities when it is not possible or desirable to increase + the population size of the units being privatized, e.g. user devices. + """ + num_compositions: int + sampling_probability: float + mechanisms: List[str] + epsilon: Optional[float] = None + delta: Optional[float] = None + budget_proportions: Optional[List[float]] = None + noise_parameters: Optional[List[float]] = None + noise_scale: float = 1.0 + + def __post_init__(self): + assert [ + self.epsilon, self.delta, self.noise_parameters + ].count(None) <= 1, ( + f'At least two of epsilon ({self.epsilon}),' + f'delta ({self.delta}) and noise parameters ({self.noise_parameters})' + 'must be defined for a joint privacy accountant') + if self.noise_scale <= 0 or self.noise_scale > 1.0: + raise ValueError("noise_scale must be in range (0,1]") + assert ( + self.sampling_probability >= 0 + and self.sampling_probability <= 1.0), ( + f'Sampling probability {self.sampling_probability} is invalid.' + 'Must be in range [0, 1]') + assert self.num_compositions > 0 and isinstance( + self.num_compositions, + int), ('Number of compositions must be a positive integer') + if self.noise_parameters is not None: + for noise_parameter in self.noise_parameters: + assert noise_parameter > 0, ( + 'All noise parameters must be positive real values.') + if self.epsilon is not None: + assert self.epsilon >= 0, ( + 'Epsilon must be a non-negative real value') + if self.delta is not None: + assert self.delta > 0 and self.delta < 1, ( + 'Delta should be a positive real value in range (0, 1)') + + self.mechanisms = [mechanism.lower() for mechanism in self.mechanisms] + + @property + def cohort_noise_parameters(self): + """ + Noise parameters to be used on a cohort of users. + Noise scale is considered. + """ + return [noise_parameter * self.noise_scale for noise_parameter in self.noise_parameters] + + +@dataclass +class JointPLDPrivacyAccountant(JointPrivacyAccountant): + """ + Uses Privacy Loss Distribution (PLD) privacy accountant, from dp-accounting + package for each mechanism. + + :param value_discretization_interval: + The length of the dicretization interval for the privacy loss + distribution. Rounding will occur to integer multiples of + value_discretization_interval. Smaller values yield more accurate + estimates of the privacy loss, while incurring higher compute and + memory. Hence, larger values decrease computation time. Note that the + accountant algorithm maintains similar error bounds as the value of + value_discretization_interval is changed. + :param use_connect_dots: + boolean indicating whether or not to use Connect-the-Dots algorithm by + Doroshenko et al., which gives tighter discrete approximations of PLDs. + :param pessimistic_estimate: + boolean indicating whether rounding used in PLD algorithm results in + epsilon-hockey stick divergence computation yielding upper estimate to + real value. + :param log_mass_truncation_bound: + The natural log of probability mass that may be discarded from noise + distribution. + Larger values will increase the error. + """ + value_discretization_interval: float = 1e-4 + use_connect_dots: bool = True + pessimistic_estimate: bool = True + log_mass_truncation_bound: float = -50 + + def __post_init__(self): + super().__post_init__() + self.min_bounds = [MIN_BOUND_NOISE_PARAMETER] * len(self.mechanisms) + self.max_bounds = [MAX_BOUND_NOISE_PARAMETER] * len(self.mechanisms) + + for mechanism in self.mechanisms: + assert mechanism in [ + 'gaussian', 'laplace' + ], ('Only gaussian and laplace mechanisms are supported.') + + if self.budget_proportions: + assert len(self.mechanisms) == len(self.budget_proportions), ( + 'Mechansim names and budget proportions must have the same length' + ) + + assert math.isclose(sum(self.budget_proportions), 1, rel_tol=1e-3), ( + 'Privacy budget proportions must sum to 1.' + ) + + # Epsilon, delta, noise parameter all defined. Nothing to do. + if [self.epsilon, self.delta, self.noise_parameters].count(None) == 0: + assert math.isclose( + self.get_composed_accountant( + self.mechanisms, self.noise_parameters, + self.pessimistic_estimate, self.sampling_probability, + self.use_connect_dots, self.value_discretization_interval, + self.num_compositions).get_delta_for_epsilon(self.epsilon), + self.delta, + rel_tol=1e-3), ( + 'Invalid settings of epsilon, delta, noise_parameters for ' + 'JointPLDPrivacyAccountant') + + else: + # Only two of epsilon, delta, noise parameters defined. + # Compute remaining variable + if self.noise_parameters: + composed_pld = self.get_composed_accountant( + self.mechanisms, self.noise_parameters, + self.pessimistic_estimate, self.sampling_probability, + self.use_connect_dots, self.value_discretization_interval, + self.num_compositions) + + if self.epsilon: + self.delta = composed_pld.get_delta_for_epsilon( + self.epsilon) + else: + self.epsilon = composed_pld.get_epsilon_for_delta( + self.delta) + + else: + # Do binary search over large_epsilon. Within each iteration of the binary search + # we run an inner binary search to compute the noise parameter for each mechanism + # that enforce condition 1 from above. + def compute_delta(large_epsilon): + delta = self.get_composed_accountant( + self.mechanisms, self.compute_noise_paramters(large_epsilon), self.pessimistic_estimate, + self.sampling_probability, self.use_connect_dots, self. + value_discretization_interval, self.num_compositions, + ).get_delta_for_epsilon(self.epsilon) + + if delta < self.delta: + # large_epsilon was too small, i.e. noise was too large. + # We can decrease our starting max bound for the next noise parameter search + self.max_bounds = self.noise_parameters + else: + # large_epsilon was too large, i.e. noise was too small. + # We can increase our starting min bound for the next noise parameter search + self.min_bounds = self.noise_parameters + + return delta + + try: + start = time.time() + self.large_epsilon = binary_search_function( + func=compute_delta, + func_monotonically_increasing=True, + target_value=self.delta, + min_bound=max(MIN_BOUND_EPSILON, self.epsilon), + max_bound=min(MAX_BOUND_EPSILON, self.epsilon / min(*self.budget_proportions)), + rtol=RTOL_EPSILON, + confidence_threshold= + CONFIDENCE_THRESHOLD_EPSILON) + print(f'Ran in {time.time() - start}') + except Exception as e: + raise ValueError( + 'Error occurred during binary search for ' + 'large_epsilon using PLD privacy accountant: ' + f'{e}') from e + + @staticmethod + def get_composed_accountant(mechanisms, noise_parameters, + pessimistic_estimate, sampling_probability, + use_connect_dots, + value_discretization_interval, + num_compositions): + composed_pld = None + for mechanism, noise_parameter in zip(mechanisms, noise_parameters): + if mechanism == 'gaussian': + pld = privacy_loss_distribution.from_gaussian_mechanism( + standard_deviation=noise_parameter, + sensitivity=1, + pessimistic_estimate=pessimistic_estimate, + sampling_prob=sampling_probability, + use_connect_dots=use_connect_dots, + value_discretization_interval=value_discretization_interval) + elif mechanism == 'laplace': + pld = privacy_loss_distribution.from_laplace_mechanism( + parameter=noise_parameter, + sensitivity=1, + pessimistic_estimate=pessimistic_estimate, + sampling_prob=sampling_probability, + use_connect_dots=use_connect_dots, + value_discretization_interval=value_discretization_interval) + + else: + raise ValueError(f'mechanism {mechanism} is not supported.') + + if composed_pld is None: + composed_pld = pld.self_compose(num_compositions) + else: + composed_pld = composed_pld.compose(pld.self_compose(num_compositions)) + + return composed_pld + + def compute_noise_paramters(self, large_epsilon): + start = time.time() + noise_parameters = [] + + for mechanism, p, min_bound, max_bound in zip(self.mechanisms, self.budget_proportions, self.min_bounds, self.max_bounds): + mechanism_epsilon = large_epsilon * p + func = lambda noise_param: self.get_composed_accountant( + [mechanism], [noise_param], self.pessimistic_estimate, + self.sampling_probability, self.use_connect_dots, self. + value_discretization_interval, self.num_compositions, + ).get_delta_for_epsilon(mechanism_epsilon) + try: + noise_parameter = binary_search_function( + func=func, + func_monotonically_increasing=False, + target_value=self.delta, + min_bound=min_bound, + max_bound=max_bound, + rtol=RTOL_NOISE_PARAMETER, + confidence_threshold= + CONFIDENCE_THRESHOLD_NOISE_PARAMETER) + except Exception as e: + raise ValueError( + 'Error occurred during binary search for ' + 'noise_parameter using PLD privacy accountant: ' + f'{e}') from e + + noise_parameters.append(noise_parameter) + + self.noise_parameters = noise_parameters + print(f"Computing noise parameters took {time.time() - start:.2f}s") + return noise_parameters + + +def main(): + epsilon = 2 + delta = 1e-8 + num_compositions = 100 + sample_prob = 0.1 + mechanisms = ['gaussian', 'gaussian'] + budget_proportions = [0.25, 0.75] + + accountant = JointPLDPrivacyAccountant(num_compositions, sample_prob, mechanisms, + epsilon=epsilon, delta=delta, budget_proportions=budget_proportions) + + print(accountant.noise_parameters) + print(accountant.large_epsilon) + + +if __name__ == '__main__': + main() diff --git a/tests/privacy/test_joint_privacy_accountant.py b/tests/privacy/test_joint_privacy_accountant.py new file mode 100644 index 0000000..dd19107 --- /dev/null +++ b/tests/privacy/test_joint_privacy_accountant.py @@ -0,0 +1,150 @@ +# Copyright © 2023-2024 Apple Inc. +''' +Test privacy accountants for DP in privacy_accountant.py. +''' + +from unittest.mock import patch + +import numpy as np +import pytest +from dp_accounting import dp_event +from dp_accounting.pld import privacy_loss_distribution +from dp_accounting.rdp import rdp_privacy_accountant +from prv_accountant import LaplaceMechanism, PoissonSubsampledGaussianMechanism, PRVAccountant + +from pfl.privacy import JointPLDPrivacyAccountant + + +@pytest.fixture() +def num_compositions(): + return [2, 10, 1e2, 1e4] + + +@pytest.fixture() +def sampling_probability(): + return [1e-8, 1e-5, 1e-2, 1e-1, 0.5] + + +@pytest.fixture() +def epsilon(): + return [0.5, 2, 5, 20] + + +@pytest.fixture() +def delta(): + return [1e-12, 1e-6, 1e-2] + + +@pytest.fixture() +def sigma(): + return [] + + +def get_expected_delta_pld(noise_parameters, sampling_probability, + num_compositions, mechanisms, epsilon): + + plds = [] + for mechanism, noise_parameter in zip(mechanisms, noise_parameters): + if mechanism == 'gaussian': + pld = privacy_loss_distribution.from_gaussian_mechanism( + standard_deviation=noise_parameter, + sensitivity=1, + sampling_prob=sampling_probability, + ).self_compose(num_compositions) + elif mechanism == 'laplace': + pld = privacy_loss_distribution.from_laplace_mechanism( + parameter=noise_parameter, + sensitivity=1, + sampling_prob=sampling_probability, + ).self_compose(num_compositions) + else: + raise ValueError(f'Mechanism {mechanism} is not a valid value.') + + plds.append(pld) + + composed_pld = None + for pld in plds: + if composed_pld: + composed_pld = composed_pld.compose(pld) + else: + composed_pld = pld + + expected_delta = composed_pld.get_delta_for_epsilon(epsilon) + return expected_delta, plds + + +class TestPrivacyAccountants: + + @pytest.mark.parametrize( + 'accountant_class, fn_expected_delta, max_bound', + [(JointPLDPrivacyAccountant, get_expected_delta_pld, 15)]) + @pytest.mark.parametrize( + 'num_compositions, sampling_probability, epsilon, delta, noise_parameters, noise_scale, mechanisms, budget_proportions', # pylint: disable=line-too-long + [(1000, 0.01, 2, None, [0.76, 1], 1.0, ['gaussian', 'gaussian'], None), + (100, 0.1, None, 1e-8, [0.5, 0.5], 0.5, ['gaussian', 'laplace'], None), + (100, 0.1, 2, 1e-8, None, 0.8, ['gaussian', 'gaussian'], [0.25, 0.75])]) + def test(self, num_compositions, sampling_probability, epsilon, delta, + noise_parameters, noise_scale, mechanisms, budget_proportions, + accountant_class, fn_expected_delta, max_bound): + # these are patches for hyperparameters for the binary search for the + # noise parameter - these settings speed up the binary search for tests + with patch( + 'pfl.privacy.joint_privacy_accountant.MIN_BOUND_NOISE_PARAMETER', + new=2), patch( + 'pfl.privacy.joint_privacy_accountant.MAX_BOUND_NOISE_PARAMETER', + new=max_bound), patch( + 'pfl.privacy.joint_privacy_accountant.MIN_BOUND_EPSILON', + new=2.5), patch( + 'pfl.privacy.joint_privacy_accountant.MAX_BOUND_EPSILON', + new=2.6): + with patch('pfl.privacy.joint_privacy_accountant.RTOL_NOISE_PARAMETER', + new=0.1), patch( + 'pfl.privacy.joint_privacy_accountant.RTOL_EPSILON', + new=0.1): + accountant = accountant_class( + num_compositions=num_compositions, + sampling_probability=sampling_probability, + mechanisms=mechanisms, + epsilon=epsilon, + delta=delta, + budget_proportions=budget_proportions, + noise_parameters=noise_parameters, + noise_scale=noise_scale) + noise_parameters = ([cohort_noise_parameter / noise_scale + for cohort_noise_parameter in accountant.cohort_noise_parameters]) + + expected_delta, plds = fn_expected_delta(noise_parameters, + sampling_probability, + num_compositions, mechanisms, + accountant.epsilon) + + np.testing.assert_almost_equal(accountant.delta, + expected_delta) + + if budget_proportions: + for pld, p in zip(plds, budget_proportions): + np.testing.assert_almost_equal(pld.get_epsilon_for_delta(delta), accountant.large_epsilon * p, decimal=2) + + # @pytest.mark.xfail(raises=(ValueError, AssertionError), strict=True) + # @pytest.mark.parametrize('accountant_class', [(PLDPrivacyAccountant), + # (PRVPrivacyAccountant), + # (RDPPrivacyAccountant)]) + # @pytest.mark.parametrize( + # 'num_compositions, sampling_probability, epsilon, delta, noise_parameter, noise_scale, mechanism', # pylint: disable=line-too-long + # [(100, 0.1, 2, None, None, 1.0, 'gaussian'), + # (100, 0.1, None, None, None, 1.0, 'laplace'), + # (100, 0.1, 2, 1e-8, None, 1.2, 'gaussian'), + # (100, 0.1, 2, 1e-8, None, 1.0, 'bernoulli'), + # (100, 0.1, 2, 1e-8, 10, 1.0, 'gaussian')]) + # def test_fail(self, num_compositions, sampling_probability, epsilon, delta, + # noise_parameter, noise_scale, mechanism, accountant_class): + # accountant_class( + # num_compositions=num_compositions, + # sampling_probability=sampling_probability, + # mechanism=mechanism, + # epsilon=epsilon, + # delta=delta, + # noise_parameter=noise_parameter, + # noise_scale=noise_scale, + # ) + From 749f23a19732a4496f3594c3c92f9c5e592f48c8 Mon Sep 17 00:00:00 2001 From: Jonathan SCOTT Date: Wed, 29 Jan 2025 16:06:04 +0100 Subject: [PATCH 2/8] pld tests --- pfl/privacy/joint_privacy_accountant.py | 5 -- .../privacy/test_joint_privacy_accountant.py | 46 +++++++++---------- 2 files changed, 23 insertions(+), 28 deletions(-) diff --git a/pfl/privacy/joint_privacy_accountant.py b/pfl/privacy/joint_privacy_accountant.py index 7ca3a30..a4c6af7 100644 --- a/pfl/privacy/joint_privacy_accountant.py +++ b/pfl/privacy/joint_privacy_accountant.py @@ -4,7 +4,6 @@ ''' import math -import time from dataclasses import dataclass from typing import Callable, List, Optional, Tuple, TypeVar @@ -224,7 +223,6 @@ def compute_delta(large_epsilon): return delta try: - start = time.time() self.large_epsilon = binary_search_function( func=compute_delta, func_monotonically_increasing=True, @@ -234,7 +232,6 @@ def compute_delta(large_epsilon): rtol=RTOL_EPSILON, confidence_threshold= CONFIDENCE_THRESHOLD_EPSILON) - print(f'Ran in {time.time() - start}') except Exception as e: raise ValueError( 'Error occurred during binary search for ' @@ -277,7 +274,6 @@ def get_composed_accountant(mechanisms, noise_parameters, return composed_pld def compute_noise_paramters(self, large_epsilon): - start = time.time() noise_parameters = [] for mechanism, p, min_bound, max_bound in zip(self.mechanisms, self.budget_proportions, self.min_bounds, self.max_bounds): @@ -306,7 +302,6 @@ def compute_noise_paramters(self, large_epsilon): noise_parameters.append(noise_parameter) self.noise_parameters = noise_parameters - print(f"Computing noise parameters took {time.time() - start:.2f}s") return noise_parameters diff --git a/tests/privacy/test_joint_privacy_accountant.py b/tests/privacy/test_joint_privacy_accountant.py index dd19107..c9b87c0 100644 --- a/tests/privacy/test_joint_privacy_accountant.py +++ b/tests/privacy/test_joint_privacy_accountant.py @@ -77,7 +77,7 @@ class TestPrivacyAccountants: @pytest.mark.parametrize( 'accountant_class, fn_expected_delta, max_bound', - [(JointPLDPrivacyAccountant, get_expected_delta_pld, 15)]) + [(JointPLDPrivacyAccountant, get_expected_delta_pld, 20)]) @pytest.mark.parametrize( 'num_compositions, sampling_probability, epsilon, delta, noise_parameters, noise_scale, mechanisms, budget_proportions', # pylint: disable=line-too-long [(1000, 0.01, 2, None, [0.76, 1], 1.0, ['gaussian', 'gaussian'], None), @@ -125,26 +125,26 @@ def test(self, num_compositions, sampling_probability, epsilon, delta, for pld, p in zip(plds, budget_proportions): np.testing.assert_almost_equal(pld.get_epsilon_for_delta(delta), accountant.large_epsilon * p, decimal=2) - # @pytest.mark.xfail(raises=(ValueError, AssertionError), strict=True) - # @pytest.mark.parametrize('accountant_class', [(PLDPrivacyAccountant), - # (PRVPrivacyAccountant), - # (RDPPrivacyAccountant)]) - # @pytest.mark.parametrize( - # 'num_compositions, sampling_probability, epsilon, delta, noise_parameter, noise_scale, mechanism', # pylint: disable=line-too-long - # [(100, 0.1, 2, None, None, 1.0, 'gaussian'), - # (100, 0.1, None, None, None, 1.0, 'laplace'), - # (100, 0.1, 2, 1e-8, None, 1.2, 'gaussian'), - # (100, 0.1, 2, 1e-8, None, 1.0, 'bernoulli'), - # (100, 0.1, 2, 1e-8, 10, 1.0, 'gaussian')]) - # def test_fail(self, num_compositions, sampling_probability, epsilon, delta, - # noise_parameter, noise_scale, mechanism, accountant_class): - # accountant_class( - # num_compositions=num_compositions, - # sampling_probability=sampling_probability, - # mechanism=mechanism, - # epsilon=epsilon, - # delta=delta, - # noise_parameter=noise_parameter, - # noise_scale=noise_scale, - # ) + @pytest.mark.xfail(raises=(ValueError, AssertionError), strict=True) + @pytest.mark.parametrize('accountant_class', [JointPLDPrivacyAccountant]) + @pytest.mark.parametrize( + 'num_compositions, sampling_probability, epsilon, delta, noise_parameters, noise_scale, mechanisms, budget_proportions', # pylint: disable=line-too-long + [(100, 0.1, 2, None, None, 1.0, ['gaussian', 'gaussian'], [0.5, 0.5]), + (100, 0.1, None, None, None, 1.0, ['gaussian', 'gaussian'], [0.5, 0.5]), + (100, 0.1, 2, 1e-8, None, 1.2, ['gaussian', 'gaussian'], [0.5, 0.5]), + (100, 0.1, 2, 1e-8, None, 1.0, ['bernoulli', 'gaussian'], [0.5, 0.5]), + (100, 0.1, 2, 1e-8, [10, 10], 1.0, ['gaussian', 'gaussian'], [0.5, 0.5]), + (100, 0.1, 2, 1e-8, None, 0.8, ['gaussian', 'gaussian'], [0.1, 0.75])]) + def test_fail(self, num_compositions, sampling_probability, epsilon, delta, + noise_parameters, noise_scale, mechanisms, budget_proportions, accountant_class): + accountant_class( + num_compositions=num_compositions, + sampling_probability=sampling_probability, + mechanisms=mechanisms, + epsilon=epsilon, + delta=delta, + budget_proportions=budget_proportions, + noise_parameters=noise_parameters, + noise_scale=noise_scale, + ) From c134fa758e790e079fadb398763fc521125c53de Mon Sep 17 00:00:00 2001 From: Jonathan SCOTT Date: Wed, 29 Jan 2025 17:06:01 +0100 Subject: [PATCH 3/8] prv joint accountant --- pfl/privacy/__init__.py | 2 +- pfl/privacy/joint_privacy_accountant.py | 164 +++++++++++++++++- .../privacy/test_joint_privacy_accountant.py | 40 ++++- 3 files changed, 197 insertions(+), 9 deletions(-) diff --git a/pfl/privacy/__init__.py b/pfl/privacy/__init__.py index 86d2d7b..c739b2b 100644 --- a/pfl/privacy/__init__.py +++ b/pfl/privacy/__init__.py @@ -1,7 +1,7 @@ # Copyright © 2023-2024 Apple Inc. from .gaussian_mechanism import GaussianMechanism -from .joint_privacy_accountant import JointPLDPrivacyAccountant +from .joint_privacy_accountant import JointPLDPrivacyAccountant, JointPRVPrivacyAccountant from .laplace_mechanism import LaplaceMechanism from .privacy_accountant import PLDPrivacyAccountant, PrivacyAccountant, PRVPrivacyAccountant, RDPPrivacyAccountant from .privacy_mechanism import ( diff --git a/pfl/privacy/joint_privacy_accountant.py b/pfl/privacy/joint_privacy_accountant.py index a4c6af7..b596eed 100644 --- a/pfl/privacy/joint_privacy_accountant.py +++ b/pfl/privacy/joint_privacy_accountant.py @@ -110,6 +110,8 @@ def __post_init__(self): 'Delta should be a positive real value in range (0, 1)') self.mechanisms = [mechanism.lower() for mechanism in self.mechanisms] + self.min_bounds = [MIN_BOUND_NOISE_PARAMETER] * len(self.mechanisms) + self.max_bounds = [MAX_BOUND_NOISE_PARAMETER] * len(self.mechanisms) @property def cohort_noise_parameters(self): @@ -153,8 +155,6 @@ class JointPLDPrivacyAccountant(JointPrivacyAccountant): def __post_init__(self): super().__post_init__() - self.min_bounds = [MIN_BOUND_NOISE_PARAMETER] * len(self.mechanisms) - self.max_bounds = [MAX_BOUND_NOISE_PARAMETER] * len(self.mechanisms) for mechanism in self.mechanisms: assert mechanism in [ @@ -305,6 +305,166 @@ def compute_noise_paramters(self, large_epsilon): return noise_parameters +@dataclass +class JointPRVPrivacyAccountant(JointPrivacyAccountant): + """ + Privacy Random Variable (PRV) accountant, for heterogeneous composition, + using prv-accountant package. + prv-accountant package: https://pypi.org/project/prv-accountant/ + Based on: “Numerical Composition of Differential Privacy”, Gopi et al., + 2021, https://arxiv.org/pdf/2106.02848.pdf + The PRV accountant methods compute_delta() and compute_epsilon() return + a lower bound, an estimated value, and an upper bound for the delta and + epsilon respectively. The estimated value is used for all further + computations. + + :param eps_error: + Maximum permitted error in epsilon. Typically around 0.1. + :param delta_error: + Maximum error allowed in delta. Typically around delta * 1e-3 + """ + eps_error: Optional[float] = 0.07 + delta_error: Optional[float] = 1e-10 + + def __post_init__(self): + super().__post_init__() + + # epsilon, delta, noise_parameter all defined + if [self.epsilon, self.delta, self.noise_parameters].count(None) == 0: + assert math.isclose( + self.get_composed_accountant( + self.mechanisms, self.noise_parameters, + self.sampling_probability, self.num_compositions, + self.eps_error, + self.delta_error).compute_delta(self.epsilon, + [self.num_compositions] * len(self.mechanisms))[1], + self.delta, + rel_tol=1e-3), ( + 'Invalid settings of epsilon, delta, noise_parameter' + 'for PRVPrivacyAccountant') + + else: + if self.noise_parameters: + prv_acc = self.get_composed_accountant( + self.mechanisms, self.noise_parameters, + self.sampling_probability, self.num_compositions, + self.eps_error, self.delta_error) + + if self.epsilon: + # prv_acc.compute_delta() returns lower bound on delta, + # estimate of delta, and upper bound on delta. + # Estimate of delta is used. + (_, delta_estim, + _) = prv_acc.compute_delta(self.epsilon, + [self.num_compositions] * len(self.mechanisms)) + self.delta = delta_estim + else: + # prv_acc.compute_epsilon() returns lower bound on epsilon, + # estimate of epsilon, and upper bound on epsion. + # Estimate of epsilon is used. + (_, epsilon_estim, + _) = prv_acc.compute_epsilon(self.delta, + [self.num_compositions] * len(self.mechanisms)) + self.epsilon = epsilon_estim + + else: + # Do binary search over large_epsilon. Within each iteration of the binary search + # we run an inner binary search to compute the noise parameter for each mechanism + # that enforce condition 1 from above. + def compute_delta(large_epsilon): + delta = self.get_composed_accountant( + self.mechanisms, self.compute_noise_paramters(large_epsilon), + self.sampling_probability, self.num_compositions, + self.eps_error, self.delta_error, + ).compute_delta(self.epsilon, [self.num_compositions] * len(self.mechanisms))[1] + + if delta < self.delta: + # large_epsilon was too small, i.e. noise was too large. + # We can decrease our starting max bound for the next noise parameter search + self.max_bounds = self.noise_parameters + else: + # large_epsilon was too large, i.e. noise was too small. + # We can increase our starting min bound for the next noise parameter search + self.min_bounds = self.noise_parameters + + return delta + + try: + self.large_epsilon = binary_search_function( + func=compute_delta, + func_monotonically_increasing=True, + target_value=self.delta, + min_bound=max(MIN_BOUND_EPSILON, self.epsilon), + max_bound=min(MAX_BOUND_EPSILON, self.epsilon / min(*self.budget_proportions)), + rtol=RTOL_EPSILON, + confidence_threshold= + CONFIDENCE_THRESHOLD_EPSILON) + except Exception as e: + raise ValueError( + 'Error occurred during binary search for ' + 'large_epsilon using PRV privacy accountant: ' + f'{e}') from e + + @staticmethod + def get_composed_accountant(mechanisms, noise_parameters, + sampling_probability, num_compositions, + eps_error, delta_error): + + prvs = [] + for mechanism, noise_parameter in zip(mechanisms, noise_parameters): + if mechanism == 'gaussian': + prv = PoissonSubsampledGaussianMechanism( + sampling_probability=sampling_probability, + noise_multiplier=noise_parameter) + + elif mechanism == 'laplace': + prv = LaplaceMechanism(mu=noise_parameter) + + else: + raise ValueError( + f'Mechanism {mechanism} is not supported for PRV accountant') + + prvs.append(prv) + + acc_prv = PRVAccountant(prvs=prvs, + max_self_compositions=[int(num_compositions)] * len(prvs), + eps_error=eps_error, + delta_error=delta_error) + + return acc_prv + + def compute_noise_paramters(self, large_epsilon): + noise_parameters = [] + + for mechanism, p, min_bound, max_bound in zip(self.mechanisms, self.budget_proportions, self.min_bounds, self.max_bounds): + mechanism_epsilon = large_epsilon * p + func = lambda noise_param: self.get_composed_accountant( + [mechanism], [noise_param], + self.sampling_probability, self.num_compositions, + self.eps_error, self.delta_error, + ).compute_delta(mechanism_epsilon, [self.num_compositions])[1] + try: + noise_parameter = binary_search_function( + func=func, + func_monotonically_increasing=False, + target_value=self.delta, + min_bound=min_bound, + max_bound=max_bound, + rtol=RTOL_NOISE_PARAMETER, + confidence_threshold= + CONFIDENCE_THRESHOLD_NOISE_PARAMETER) + except Exception as e: + raise ValueError( + 'Error occurred during binary search for ' + 'noise_parameter using PRV privacy accountant: ' + f'{e}') from e + + noise_parameters.append(noise_parameter) + + self.noise_parameters = noise_parameters + return noise_parameters + + def main(): epsilon = 2 delta = 1e-8 diff --git a/tests/privacy/test_joint_privacy_accountant.py b/tests/privacy/test_joint_privacy_accountant.py index c9b87c0..8fe2726 100644 --- a/tests/privacy/test_joint_privacy_accountant.py +++ b/tests/privacy/test_joint_privacy_accountant.py @@ -12,7 +12,7 @@ from dp_accounting.rdp import rdp_privacy_accountant from prv_accountant import LaplaceMechanism, PoissonSubsampledGaussianMechanism, PRVAccountant -from pfl.privacy import JointPLDPrivacyAccountant +from pfl.privacy import JointPLDPrivacyAccountant, JointPRVPrivacyAccountant @pytest.fixture() @@ -72,16 +72,44 @@ def get_expected_delta_pld(noise_parameters, sampling_probability, expected_delta = composed_pld.get_delta_for_epsilon(epsilon) return expected_delta, plds +def get_expected_delta_prv(noise_parameters, sampling_probability, + num_compositions, mechanisms, epsilon): + prvs = [] + for mechanism, noise_parameter in zip(mechanisms, noise_parameters): + if mechanism == 'gaussian': + prv = PoissonSubsampledGaussianMechanism( + sampling_probability=sampling_probability, + noise_multiplier=noise_parameter) + + elif mechanism == 'laplace': + prv = LaplaceMechanism(mu=noise_parameter) + + else: + raise ValueError( + f'Mechanism {mechanism} is not supported for PRV accountant') + + prvs.append(prv) + + acc_prv = PRVAccountant(prvs=prvs, + max_self_compositions=[int(num_compositions)] * len(prvs), + eps_error=0.07, + delta_error=1e-10) + + _, expected_delta, _ = acc_prv.compute_delta(epsilon, [int(num_compositions)] * len(prvs)) + + return expected_delta, prvs + class TestPrivacyAccountants: @pytest.mark.parametrize( 'accountant_class, fn_expected_delta, max_bound', - [(JointPLDPrivacyAccountant, get_expected_delta_pld, 20)]) + [(JointPLDPrivacyAccountant, get_expected_delta_pld, 20), + (JointPRVPrivacyAccountant, get_expected_delta_prv, 20)]) @pytest.mark.parametrize( 'num_compositions, sampling_probability, epsilon, delta, noise_parameters, noise_scale, mechanisms, budget_proportions', # pylint: disable=line-too-long [(1000, 0.01, 2, None, [0.76, 1], 1.0, ['gaussian', 'gaussian'], None), - (100, 0.1, None, 1e-8, [0.5, 0.5], 0.5, ['gaussian', 'laplace'], None), + (100, 0.1, None, 1e-8, [1, 1.5], 0.5, ['laplace', 'gaussian'], None), (100, 0.1, 2, 1e-8, None, 0.8, ['gaussian', 'gaussian'], [0.25, 0.75])]) def test(self, num_compositions, sampling_probability, epsilon, delta, noise_parameters, noise_scale, mechanisms, budget_proportions, @@ -121,9 +149,9 @@ def test(self, num_compositions, sampling_probability, epsilon, delta, np.testing.assert_almost_equal(accountant.delta, expected_delta) - if budget_proportions: - for pld, p in zip(plds, budget_proportions): - np.testing.assert_almost_equal(pld.get_epsilon_for_delta(delta), accountant.large_epsilon * p, decimal=2) + # if budget_proportions: + # for pld, p in zip(plds, budget_proportions): + # np.testing.assert_almost_equal(pld.get_epsilon_for_delta(delta), accountant.large_epsilon * p, decimal=2) @pytest.mark.xfail(raises=(ValueError, AssertionError), strict=True) @pytest.mark.parametrize('accountant_class', [JointPLDPrivacyAccountant]) From 09b31a4ddf0153f4791edc88e71ce067f9aee758 Mon Sep 17 00:00:00 2001 From: Jonathan SCOTT Date: Wed, 29 Jan 2025 18:09:36 +0100 Subject: [PATCH 4/8] rdp accountant --- pfl/privacy/__init__.py | 2 +- pfl/privacy/joint_privacy_accountant.py | 160 ++++++++++++++++-- .../privacy/test_joint_privacy_accountant.py | 61 ++++++- 3 files changed, 200 insertions(+), 23 deletions(-) diff --git a/pfl/privacy/__init__.py b/pfl/privacy/__init__.py index c739b2b..d89c3d9 100644 --- a/pfl/privacy/__init__.py +++ b/pfl/privacy/__init__.py @@ -1,7 +1,7 @@ # Copyright © 2023-2024 Apple Inc. from .gaussian_mechanism import GaussianMechanism -from .joint_privacy_accountant import JointPLDPrivacyAccountant, JointPRVPrivacyAccountant +from .joint_privacy_accountant import JointPLDPrivacyAccountant, JointPRVPrivacyAccountant, JointRDPPrivacyAccountant from .laplace_mechanism import LaplaceMechanism from .privacy_accountant import PLDPrivacyAccountant, PrivacyAccountant, PRVPrivacyAccountant, RDPPrivacyAccountant from .privacy_mechanism import ( diff --git a/pfl/privacy/joint_privacy_accountant.py b/pfl/privacy/joint_privacy_accountant.py index b596eed..d337068 100644 --- a/pfl/privacy/joint_privacy_accountant.py +++ b/pfl/privacy/joint_privacy_accountant.py @@ -110,6 +110,21 @@ def __post_init__(self): 'Delta should be a positive real value in range (0, 1)') self.mechanisms = [mechanism.lower() for mechanism in self.mechanisms] + + for mechanism in self.mechanisms: + assert mechanism in [ + 'gaussian', 'laplace' + ], ('Only gaussian and laplace mechanisms are supported.') + + if self.budget_proportions: + assert len(self.mechanisms) == len(self.budget_proportions), ( + 'Mechansim names and budget proportions must have the same length' + ) + + assert math.isclose(sum(self.budget_proportions), 1, rel_tol=1e-3), ( + 'Privacy budget proportions must sum to 1.' + ) + self.min_bounds = [MIN_BOUND_NOISE_PARAMETER] * len(self.mechanisms) self.max_bounds = [MAX_BOUND_NOISE_PARAMETER] * len(self.mechanisms) @@ -156,20 +171,6 @@ class JointPLDPrivacyAccountant(JointPrivacyAccountant): def __post_init__(self): super().__post_init__() - for mechanism in self.mechanisms: - assert mechanism in [ - 'gaussian', 'laplace' - ], ('Only gaussian and laplace mechanisms are supported.') - - if self.budget_proportions: - assert len(self.mechanisms) == len(self.budget_proportions), ( - 'Mechansim names and budget proportions must have the same length' - ) - - assert math.isclose(sum(self.budget_proportions), 1, rel_tol=1e-3), ( - 'Privacy budget proportions must sum to 1.' - ) - # Epsilon, delta, noise parameter all defined. Nothing to do. if [self.epsilon, self.delta, self.noise_parameters].count(None) == 0: assert math.isclose( @@ -464,6 +465,137 @@ def compute_noise_paramters(self, large_epsilon): self.noise_parameters = noise_parameters return noise_parameters +@dataclass +class JointRDPPrivacyAccountant(JointPrivacyAccountant): + """ + Privacy accountant using Renyi differential privacy (RDP) from + dp-accounting package. + Implementation in dp-accounting: https://github.com/google/differential-privacy/blob/main/python/dp_accounting/rdp/rdp_privacy_accountant.py # pylint: disable=line-too-long + The default neighbouring relation for the RDP account is "add or remove + one". The default RDP orders used are: + ([1 + x / 10. for x in range(1, 100)] + list(range(11, 64)) + + [128, 256, 512, 1024]). + """ + + def __post_init__(self): + super().__post_init__() + + # epsilon, delta, noise_parameters all defined + if [self.epsilon, self.delta, self.noise_parameters].count(None) == 0: + assert math.isclose( + self.get_composed_accountant( + self.mechanisms, self.noise_parameters, + self.sampling_probability, + self.num_compositions).get_delta(self.epsilon), + self.delta, + rel_tol=1e-3), ( + 'Invalid settings of epsilon, delta, noise_parameter ' + 'for RDPPrivacyAccountant') + + else: + if self.noise_parameters: + + rdp_accountant = self.get_composed_accountant( + self.mechanisms, self.noise_parameters, + self.sampling_probability, self.num_compositions) + + if self.epsilon: + self.delta = rdp_accountant.get_delta(self.epsilon) + + else: + self.epsilon = rdp_accountant.get_epsilon(self.delta) + + else: + # Do binary search over large_epsilon. Within each iteration of the binary search + # we run an inner binary search to compute the noise parameter for each mechanism + # that enforce condition 1 from above. + def compute_delta(large_epsilon): + delta = self.get_composed_accountant( + self.mechanisms, self.compute_noise_paramters(large_epsilon), + self.sampling_probability, self.num_compositions + ).get_delta(self.epsilon) + + if delta < self.delta: + # large_epsilon was too small, i.e. noise was too large. + # We can decrease our starting max bound for the next noise parameter search + self.max_bounds = self.noise_parameters + else: + # large_epsilon was too large, i.e. noise was too small. + # We can increase our starting min bound for the next noise parameter search + self.min_bounds = self.noise_parameters + + return delta + + try: + self.large_epsilon = binary_search_function( + func=compute_delta, + func_monotonically_increasing=True, + target_value=self.delta, + min_bound=max(MIN_BOUND_EPSILON, self.epsilon), + max_bound=min(MAX_BOUND_EPSILON, self.epsilon / min(*self.budget_proportions)), + rtol=RTOL_EPSILON, + confidence_threshold= + CONFIDENCE_THRESHOLD_EPSILON) + except Exception as e: + raise ValueError( + 'Error occurred during binary search for ' + 'large_epsilon using RDP privacy accountant: ' + f'{e}') from e + + @staticmethod + def get_composed_accountant(mechanisms, noise_parameters, + sampling_probability, num_compositions): + + rdp_accountant = rdp_privacy_accountant.RdpAccountant() + for mechanism, noise_parameter in zip(mechanisms, noise_parameters): + if mechanism == 'gaussian': + event = dp_event.PoissonSampledDpEvent( + sampling_probability, + dp_event.GaussianDpEvent(noise_parameter)) + + elif mechanism == 'laplace': + event = dp_event.LaplaceDpEvent(noise_parameter) + pass + + else: + raise ValueError( + f'Mechanism {mechanism} is not supported for Renyi accountant') + + rdp_accountant = rdp_accountant.compose( + event, int(num_compositions)) + + return rdp_accountant + + def compute_noise_paramters(self, large_epsilon): + noise_parameters = [] + + for mechanism, p, min_bound, max_bound in zip(self.mechanisms, self.budget_proportions, self.min_bounds, self.max_bounds): + mechanism_epsilon = large_epsilon * p + func = lambda noise_param: self.get_composed_accountant( + [mechanism], [noise_param], + self.sampling_probability, self.num_compositions + ).get_delta(mechanism_epsilon) + try: + noise_parameter = binary_search_function( + func=func, + func_monotonically_increasing=False, + target_value=self.delta, + min_bound=min_bound, + max_bound=max_bound, + rtol=RTOL_NOISE_PARAMETER, + confidence_threshold= + CONFIDENCE_THRESHOLD_NOISE_PARAMETER) + except Exception as e: + raise ValueError( + 'Error occurred during binary search for ' + 'noise_parameter using RDP privacy accountant: ' + f'{e}') from e + + noise_parameters.append(noise_parameter) + + self.noise_parameters = noise_parameters + return noise_parameters + def main(): epsilon = 2 diff --git a/tests/privacy/test_joint_privacy_accountant.py b/tests/privacy/test_joint_privacy_accountant.py index 8fe2726..ab6ece0 100644 --- a/tests/privacy/test_joint_privacy_accountant.py +++ b/tests/privacy/test_joint_privacy_accountant.py @@ -5,6 +5,7 @@ from unittest.mock import patch +import dp_accounting.pld import numpy as np import pytest from dp_accounting import dp_event @@ -12,7 +13,7 @@ from dp_accounting.rdp import rdp_privacy_accountant from prv_accountant import LaplaceMechanism, PoissonSubsampledGaussianMechanism, PRVAccountant -from pfl.privacy import JointPLDPrivacyAccountant, JointPRVPrivacyAccountant +from pfl.privacy import JointPLDPrivacyAccountant, JointRDPPrivacyAccountant, JointPRVPrivacyAccountant @pytest.fixture() @@ -97,7 +98,39 @@ def get_expected_delta_prv(noise_parameters, sampling_probability, _, expected_delta, _ = acc_prv.compute_delta(epsilon, [int(num_compositions)] * len(prvs)) - return expected_delta, prvs + individual_prvs = [PRVAccountant(prvs=prv, + max_self_compositions=int(num_compositions), + eps_error=0.07, + delta_error=1e-10) for prv in prvs] + return expected_delta, individual_prvs + +def get_expected_delta_rdp(noise_parameters, sampling_probability, + num_compositions, mechanisms, epsilon): + + rdps = [] + full_rdp_accountant = rdp_privacy_accountant.RdpAccountant() + for mechanism, noise_parameter in zip(mechanisms, noise_parameters): + if mechanism == 'gaussian': + event = dp_event.PoissonSampledDpEvent( + sampling_probability, dp_event.GaussianDpEvent(noise_parameter)) + + elif mechanism == 'laplace': + event = dp_event.LaplaceDpEvent(noise_parameter) + + else: + raise ValueError( + f'Mechanism {mechanism} is not supported for PRV accountant') + + rdp_accountant = rdp_privacy_accountant.RdpAccountant().compose( + event, int(num_compositions)) + + rdps.append(rdp_accountant) + + full_rdp_accountant = full_rdp_accountant.compose( + event, int(num_compositions)) + + expected_delta = full_rdp_accountant.get_delta(epsilon) + return expected_delta, rdps class TestPrivacyAccountants: @@ -105,7 +138,8 @@ class TestPrivacyAccountants: @pytest.mark.parametrize( 'accountant_class, fn_expected_delta, max_bound', [(JointPLDPrivacyAccountant, get_expected_delta_pld, 20), - (JointPRVPrivacyAccountant, get_expected_delta_prv, 20)]) + (JointPRVPrivacyAccountant, get_expected_delta_prv, 20), + (JointRDPPrivacyAccountant, get_expected_delta_rdp, 20)]) @pytest.mark.parametrize( 'num_compositions, sampling_probability, epsilon, delta, noise_parameters, noise_scale, mechanisms, budget_proportions', # pylint: disable=line-too-long [(1000, 0.01, 2, None, [0.76, 1], 1.0, ['gaussian', 'gaussian'], None), @@ -141,7 +175,7 @@ def test(self, num_compositions, sampling_probability, epsilon, delta, noise_parameters = ([cohort_noise_parameter / noise_scale for cohort_noise_parameter in accountant.cohort_noise_parameters]) - expected_delta, plds = fn_expected_delta(noise_parameters, + expected_delta, mechanism_accountants = fn_expected_delta(noise_parameters, sampling_probability, num_compositions, mechanisms, accountant.epsilon) @@ -149,12 +183,23 @@ def test(self, num_compositions, sampling_probability, epsilon, delta, np.testing.assert_almost_equal(accountant.delta, expected_delta) - # if budget_proportions: - # for pld, p in zip(plds, budget_proportions): - # np.testing.assert_almost_equal(pld.get_epsilon_for_delta(delta), accountant.large_epsilon * p, decimal=2) + if budget_proportions: + if accountant_class is JointPLDPrivacyAccountant: + for acc, p in zip(mechanism_accountants, budget_proportions): + np.testing.assert_almost_equal(acc.get_epsilon_for_delta(delta), + accountant.large_epsilon * p, decimal=2) + elif accountant_class is JointPRVPrivacyAccountant: + for acc, p in zip(mechanism_accountants, budget_proportions): + np.testing.assert_almost_equal(acc.compute_epsilon(delta, [num_compositions])[1], + accountant.large_epsilon * p, decimal=2) + elif accountant_class is JointRDPPrivacyAccountant: + for acc, p in zip(mechanism_accountants, budget_proportions): + np.testing.assert_almost_equal(acc.get_epsilon(delta), + accountant.large_epsilon * p, decimal=2) + @pytest.mark.xfail(raises=(ValueError, AssertionError), strict=True) - @pytest.mark.parametrize('accountant_class', [JointPLDPrivacyAccountant]) + @pytest.mark.parametrize('accountant_class', [JointPLDPrivacyAccountant, JointPRVPrivacyAccountant, JointRDPPrivacyAccountant]) @pytest.mark.parametrize( 'num_compositions, sampling_probability, epsilon, delta, noise_parameters, noise_scale, mechanisms, budget_proportions', # pylint: disable=line-too-long [(100, 0.1, 2, None, None, 1.0, ['gaussian', 'gaussian'], [0.5, 0.5]), From 3b1585fefec062c2fa355918ac2f22da8f6e6838 Mon Sep 17 00:00:00 2001 From: Jonny Scott Date: Sun, 2 Feb 2025 13:47:29 +0100 Subject: [PATCH 5/8] comments update --- pfl/privacy/joint_privacy_accountant.py | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) diff --git a/pfl/privacy/joint_privacy_accountant.py b/pfl/privacy/joint_privacy_accountant.py index d337068..cc83b9b 100644 --- a/pfl/privacy/joint_privacy_accountant.py +++ b/pfl/privacy/joint_privacy_accountant.py @@ -5,7 +5,7 @@ import math from dataclasses import dataclass -from typing import Callable, List, Optional, Tuple, TypeVar +from typing import List, Optional from dp_accounting import dp_event from dp_accounting.pld import privacy_loss_distribution @@ -55,10 +55,11 @@ class JointPrivacyAccountant: :param mechanisms: The list of noise mechanisms to be used, each can be either Gaussian or Laplace. :param epsilon: - The privacy loss random variable. It controls how much the output of - the mechanism can vary between two neighboring databases. + The privacy loss random variable. The total epsilon allowed for + the composition of all the mechanisms. :param delta: - The probability that all privacy will be lost. + The probability that all privacy will be lost. The total delta allowed for + the composition of all the mechanisms. :param budget_proportions: List specifying the proportion of the total (epsilon, delta) privacy budget each mechanism is allocated. @@ -465,6 +466,7 @@ def compute_noise_paramters(self, large_epsilon): self.noise_parameters = noise_parameters return noise_parameters + @dataclass class JointRDPPrivacyAccountant(JointPrivacyAccountant): """ From 3a758f31c657c648b58f77cd22eed624314a7786 Mon Sep 17 00:00:00 2001 From: Jonathan SCOTT Date: Mon, 3 Feb 2025 11:33:31 +0100 Subject: [PATCH 6/8] code cleanup and formatting --- pfl/privacy/joint_privacy_accountant.py | 207 ++++++++++-------- .../privacy/test_joint_privacy_accountant.py | 120 ++++++---- 2 files changed, 186 insertions(+), 141 deletions(-) diff --git a/pfl/privacy/joint_privacy_accountant.py b/pfl/privacy/joint_privacy_accountant.py index cc83b9b..a0cc571 100644 --- a/pfl/privacy/joint_privacy_accountant.py +++ b/pfl/privacy/joint_privacy_accountant.py @@ -1,6 +1,6 @@ # Copyright © 2023-2024 Apple Inc. ''' -Privacy accountants for differential privacy. +Joint privacy accountants for differential privacy with multiple mechanisms. ''' import math @@ -31,10 +31,8 @@ class JointPrivacyAccountant: Tracks the privacy loss over multiple composition steps with multiple mechanisms simultaneously. Either two or three of the variables epsilon, delta and noise_parameters must be defined. - If all three are defined a check will be performed to make sure a valid set of variable values has been provided. - If two are defined, the remaining variable can be computed. In the case that epsilon and delta are defined then the budget_proportions parameter must be provided, specifying what fraction of the total budget each @@ -43,8 +41,8 @@ class JointPrivacyAccountant: and noise parameters [sigma_1, sigma_2, ...] such that the following two constraints hold: 1. For each i, mechanism_i with noise parameter sigma_i is (large_epsilon * p_i, delta) - DP after all composition steps - 2. The composition of all mechanisms over all steps is (epsilon, delta) DP + DP after all composition steps, + 2. The composition of all mechanisms over all steps is (epsilon, delta) DP. :param num_compositions: Maximum number of compositions to be performed with each mechanism. @@ -102,7 +100,7 @@ def __post_init__(self): if self.noise_parameters is not None: for noise_parameter in self.noise_parameters: assert noise_parameter > 0, ( - 'All noise parameters must be positive real values.') + 'All noise parameters must be positive real values') if self.epsilon is not None: assert self.epsilon >= 0, ( 'Epsilon must be a non-negative real value') @@ -115,16 +113,20 @@ def __post_init__(self): for mechanism in self.mechanisms: assert mechanism in [ 'gaussian', 'laplace' - ], ('Only gaussian and laplace mechanisms are supported.') + ], ('Only gaussian and laplace mechanisms are supported') if self.budget_proportions: assert len(self.mechanisms) == len(self.budget_proportions), ( 'Mechansim names and budget proportions must have the same length' ) - assert math.isclose(sum(self.budget_proportions), 1, rel_tol=1e-3), ( - 'Privacy budget proportions must sum to 1.' - ) + assert math.isclose( + sum(self.budget_proportions), 1, + rel_tol=1e-3), ('Privacy budget proportions must sum to 1') + + for p in self.budget_proportions: + assert (p > 0) and (p < 1), ( + 'Privacy budget proportions must be in range (0, 1)') self.min_bounds = [MIN_BOUND_NOISE_PARAMETER] * len(self.mechanisms) self.max_bounds = [MAX_BOUND_NOISE_PARAMETER] * len(self.mechanisms) @@ -135,7 +137,10 @@ def cohort_noise_parameters(self): Noise parameters to be used on a cohort of users. Noise scale is considered. """ - return [noise_parameter * self.noise_scale for noise_parameter in self.noise_parameters] + return [ + noise_parameter * self.noise_scale + for noise_parameter in self.noise_parameters + ] @dataclass @@ -190,10 +195,10 @@ def __post_init__(self): # Compute remaining variable if self.noise_parameters: composed_pld = self.get_composed_accountant( - self.mechanisms, self.noise_parameters, - self.pessimistic_estimate, self.sampling_probability, - self.use_connect_dots, self.value_discretization_interval, - self.num_compositions) + self.mechanisms, self.noise_parameters, + self.pessimistic_estimate, self.sampling_probability, + self.use_connect_dots, self.value_discretization_interval, + self.num_compositions) if self.epsilon: self.delta = composed_pld.get_delta_for_epsilon( @@ -205,12 +210,16 @@ def __post_init__(self): else: # Do binary search over large_epsilon. Within each iteration of the binary search # we run an inner binary search to compute the noise parameter for each mechanism - # that enforce condition 1 from above. + # that enforce condition 1 above. def compute_delta(large_epsilon): delta = self.get_composed_accountant( - self.mechanisms, self.compute_noise_paramters(large_epsilon), self.pessimistic_estimate, - self.sampling_probability, self.use_connect_dots, self. - value_discretization_interval, self.num_compositions, + self.mechanisms, + self.compute_noise_paramters(large_epsilon), + self.pessimistic_estimate, + self.sampling_probability, + self.use_connect_dots, + self.value_discretization_interval, + self.num_compositions, ).get_delta_for_epsilon(self.epsilon) if delta < self.delta: @@ -230,10 +239,11 @@ def compute_delta(large_epsilon): func_monotonically_increasing=True, target_value=self.delta, min_bound=max(MIN_BOUND_EPSILON, self.epsilon), - max_bound=min(MAX_BOUND_EPSILON, self.epsilon / min(*self.budget_proportions)), + max_bound=min( + MAX_BOUND_EPSILON, + self.epsilon / min(*self.budget_proportions)), rtol=RTOL_EPSILON, - confidence_threshold= - CONFIDENCE_THRESHOLD_EPSILON) + confidence_threshold=CONFIDENCE_THRESHOLD_EPSILON) except Exception as e: raise ValueError( 'Error occurred during binary search for ' @@ -255,7 +265,8 @@ def get_composed_accountant(mechanisms, noise_parameters, pessimistic_estimate=pessimistic_estimate, sampling_prob=sampling_probability, use_connect_dots=use_connect_dots, - value_discretization_interval=value_discretization_interval) + value_discretization_interval=value_discretization_interval + ) elif mechanism == 'laplace': pld = privacy_loss_distribution.from_laplace_mechanism( parameter=noise_parameter, @@ -263,7 +274,8 @@ def get_composed_accountant(mechanisms, noise_parameters, pessimistic_estimate=pessimistic_estimate, sampling_prob=sampling_probability, use_connect_dots=use_connect_dots, - value_discretization_interval=value_discretization_interval) + value_discretization_interval=value_discretization_interval + ) else: raise ValueError(f'mechanism {mechanism} is not supported.') @@ -271,19 +283,32 @@ def get_composed_accountant(mechanisms, noise_parameters, if composed_pld is None: composed_pld = pld.self_compose(num_compositions) else: - composed_pld = composed_pld.compose(pld.self_compose(num_compositions)) + composed_pld = composed_pld.compose( + pld.self_compose(num_compositions)) return composed_pld def compute_noise_paramters(self, large_epsilon): + """ + Compute noise parameter for each mechanism such that when self composed + it is (large_epsilon * p, delta) DP, where p is the budget proportion + of the mechanism + """ noise_parameters = [] - for mechanism, p, min_bound, max_bound in zip(self.mechanisms, self.budget_proportions, self.min_bounds, self.max_bounds): + for mechanism, p, min_bound, max_bound in zip(self.mechanisms, + self.budget_proportions, + self.min_bounds, + self.max_bounds): mechanism_epsilon = large_epsilon * p func = lambda noise_param: self.get_composed_accountant( - [mechanism], [noise_param], self.pessimistic_estimate, - self.sampling_probability, self.use_connect_dots, self. - value_discretization_interval, self.num_compositions, + [mechanism], + [noise_param], + self.pessimistic_estimate, + self.sampling_probability, + self.use_connect_dots, + self.value_discretization_interval, + self.num_compositions, ).get_delta_for_epsilon(mechanism_epsilon) try: noise_parameter = binary_search_function( @@ -293,8 +318,7 @@ def compute_noise_paramters(self, large_epsilon): min_bound=min_bound, max_bound=max_bound, rtol=RTOL_NOISE_PARAMETER, - confidence_threshold= - CONFIDENCE_THRESHOLD_NOISE_PARAMETER) + confidence_threshold=CONFIDENCE_THRESHOLD_NOISE_PARAMETER) except Exception as e: raise ValueError( 'Error occurred during binary search for ' @@ -310,8 +334,8 @@ def compute_noise_paramters(self, large_epsilon): @dataclass class JointPRVPrivacyAccountant(JointPrivacyAccountant): """ - Privacy Random Variable (PRV) accountant, for heterogeneous composition, - using prv-accountant package. + For each mechanism uses the Privacy Random Variable (PRV) accountant, + for heterogeneous composition, using prv-accountant package. prv-accountant package: https://pypi.org/project/prv-accountant/ Based on: “Numerical Composition of Differential Privacy”, Gopi et al., 2021, https://arxiv.org/pdf/2106.02848.pdf @@ -337,9 +361,9 @@ def __post_init__(self): self.get_composed_accountant( self.mechanisms, self.noise_parameters, self.sampling_probability, self.num_compositions, - self.eps_error, - self.delta_error).compute_delta(self.epsilon, - [self.num_compositions] * len(self.mechanisms))[1], + self.eps_error, self.delta_error).compute_delta( + self.epsilon, + [self.num_compositions] * len(self.mechanisms))[1], self.delta, rel_tol=1e-3), ( 'Invalid settings of epsilon, delta, noise_parameter' @@ -356,17 +380,17 @@ def __post_init__(self): # prv_acc.compute_delta() returns lower bound on delta, # estimate of delta, and upper bound on delta. # Estimate of delta is used. - (_, delta_estim, - _) = prv_acc.compute_delta(self.epsilon, - [self.num_compositions] * len(self.mechanisms)) + (_, delta_estim, _) = prv_acc.compute_delta( + self.epsilon, + [self.num_compositions] * len(self.mechanisms)) self.delta = delta_estim else: # prv_acc.compute_epsilon() returns lower bound on epsilon, # estimate of epsilon, and upper bound on epsion. # Estimate of epsilon is used. - (_, epsilon_estim, - _) = prv_acc.compute_epsilon(self.delta, - [self.num_compositions] * len(self.mechanisms)) + (_, epsilon_estim, _) = prv_acc.compute_epsilon( + self.delta, + [self.num_compositions] * len(self.mechanisms)) self.epsilon = epsilon_estim else: @@ -375,10 +399,14 @@ def __post_init__(self): # that enforce condition 1 from above. def compute_delta(large_epsilon): delta = self.get_composed_accountant( - self.mechanisms, self.compute_noise_paramters(large_epsilon), - self.sampling_probability, self.num_compositions, - self.eps_error, self.delta_error, - ).compute_delta(self.epsilon, [self.num_compositions] * len(self.mechanisms))[1] + self.mechanisms, + self.compute_noise_paramters(large_epsilon), + self.sampling_probability, + self.num_compositions, + self.eps_error, + self.delta_error, + ).compute_delta(self.epsilon, [self.num_compositions] * + len(self.mechanisms))[1] if delta < self.delta: # large_epsilon was too small, i.e. noise was too large. @@ -397,10 +425,11 @@ def compute_delta(large_epsilon): func_monotonically_increasing=True, target_value=self.delta, min_bound=max(MIN_BOUND_EPSILON, self.epsilon), - max_bound=min(MAX_BOUND_EPSILON, self.epsilon / min(*self.budget_proportions)), + max_bound=min( + MAX_BOUND_EPSILON, + self.epsilon / min(*self.budget_proportions)), rtol=RTOL_EPSILON, - confidence_threshold= - CONFIDENCE_THRESHOLD_EPSILON) + confidence_threshold=CONFIDENCE_THRESHOLD_EPSILON) except Exception as e: raise ValueError( 'Error occurred during binary search for ' @@ -424,12 +453,14 @@ def get_composed_accountant(mechanisms, noise_parameters, else: raise ValueError( - f'Mechanism {mechanism} is not supported for PRV accountant') + f'Mechanism {mechanism} is not supported for PRV accountant' + ) prvs.append(prv) acc_prv = PRVAccountant(prvs=prvs, - max_self_compositions=[int(num_compositions)] * len(prvs), + max_self_compositions=[int(num_compositions)] * + len(prvs), eps_error=eps_error, delta_error=delta_error) @@ -438,12 +469,18 @@ def get_composed_accountant(mechanisms, noise_parameters, def compute_noise_paramters(self, large_epsilon): noise_parameters = [] - for mechanism, p, min_bound, max_bound in zip(self.mechanisms, self.budget_proportions, self.min_bounds, self.max_bounds): + for mechanism, p, min_bound, max_bound in zip(self.mechanisms, + self.budget_proportions, + self.min_bounds, + self.max_bounds): mechanism_epsilon = large_epsilon * p func = lambda noise_param: self.get_composed_accountant( - [mechanism], [noise_param], - self.sampling_probability, self.num_compositions, - self.eps_error, self.delta_error, + [mechanism], + [noise_param], + self.sampling_probability, + self.num_compositions, + self.eps_error, + self.delta_error, ).compute_delta(mechanism_epsilon, [self.num_compositions])[1] try: noise_parameter = binary_search_function( @@ -453,8 +490,7 @@ def compute_noise_paramters(self, large_epsilon): min_bound=min_bound, max_bound=max_bound, rtol=RTOL_NOISE_PARAMETER, - confidence_threshold= - CONFIDENCE_THRESHOLD_NOISE_PARAMETER) + confidence_threshold=CONFIDENCE_THRESHOLD_NOISE_PARAMETER) except Exception as e: raise ValueError( 'Error occurred during binary search for ' @@ -470,8 +506,8 @@ def compute_noise_paramters(self, large_epsilon): @dataclass class JointRDPPrivacyAccountant(JointPrivacyAccountant): """ - Privacy accountant using Renyi differential privacy (RDP) from - dp-accounting package. + For each mechanism uses the Privacy accountant using Renyi differential + privacy (RDP) from dp-accounting package. Implementation in dp-accounting: https://github.com/google/differential-privacy/blob/main/python/dp_accounting/rdp/rdp_privacy_accountant.py # pylint: disable=line-too-long The default neighbouring relation for the RDP account is "add or remove one". The default RDP orders used are: @@ -513,9 +549,10 @@ def __post_init__(self): # that enforce condition 1 from above. def compute_delta(large_epsilon): delta = self.get_composed_accountant( - self.mechanisms, self.compute_noise_paramters(large_epsilon), - self.sampling_probability, self.num_compositions - ).get_delta(self.epsilon) + self.mechanisms, + self.compute_noise_paramters(large_epsilon), + self.sampling_probability, + self.num_compositions).get_delta(self.epsilon) if delta < self.delta: # large_epsilon was too small, i.e. noise was too large. @@ -534,10 +571,11 @@ def compute_delta(large_epsilon): func_monotonically_increasing=True, target_value=self.delta, min_bound=max(MIN_BOUND_EPSILON, self.epsilon), - max_bound=min(MAX_BOUND_EPSILON, self.epsilon / min(*self.budget_proportions)), + max_bound=min( + MAX_BOUND_EPSILON, + self.epsilon / min(*self.budget_proportions)), rtol=RTOL_EPSILON, - confidence_threshold= - CONFIDENCE_THRESHOLD_EPSILON) + confidence_threshold=CONFIDENCE_THRESHOLD_EPSILON) except Exception as e: raise ValueError( 'Error occurred during binary search for ' @@ -561,22 +599,25 @@ def get_composed_accountant(mechanisms, noise_parameters, else: raise ValueError( - f'Mechanism {mechanism} is not supported for Renyi accountant') + f'Mechanism {mechanism} is not supported for Renyi accountant' + ) - rdp_accountant = rdp_accountant.compose( - event, int(num_compositions)) + rdp_accountant = rdp_accountant.compose(event, + int(num_compositions)) return rdp_accountant def compute_noise_paramters(self, large_epsilon): noise_parameters = [] - for mechanism, p, min_bound, max_bound in zip(self.mechanisms, self.budget_proportions, self.min_bounds, self.max_bounds): + for mechanism, p, min_bound, max_bound in zip(self.mechanisms, + self.budget_proportions, + self.min_bounds, + self.max_bounds): mechanism_epsilon = large_epsilon * p func = lambda noise_param: self.get_composed_accountant( - [mechanism], [noise_param], - self.sampling_probability, self.num_compositions - ).get_delta(mechanism_epsilon) + [mechanism], [noise_param], self.sampling_probability, self. + num_compositions).get_delta(mechanism_epsilon) try: noise_parameter = binary_search_function( func=func, @@ -585,8 +626,7 @@ def compute_noise_paramters(self, large_epsilon): min_bound=min_bound, max_bound=max_bound, rtol=RTOL_NOISE_PARAMETER, - confidence_threshold= - CONFIDENCE_THRESHOLD_NOISE_PARAMETER) + confidence_threshold=CONFIDENCE_THRESHOLD_NOISE_PARAMETER) except Exception as e: raise ValueError( 'Error occurred during binary search for ' @@ -597,22 +637,3 @@ def compute_noise_paramters(self, large_epsilon): self.noise_parameters = noise_parameters return noise_parameters - - -def main(): - epsilon = 2 - delta = 1e-8 - num_compositions = 100 - sample_prob = 0.1 - mechanisms = ['gaussian', 'gaussian'] - budget_proportions = [0.25, 0.75] - - accountant = JointPLDPrivacyAccountant(num_compositions, sample_prob, mechanisms, - epsilon=epsilon, delta=delta, budget_proportions=budget_proportions) - - print(accountant.noise_parameters) - print(accountant.large_epsilon) - - -if __name__ == '__main__': - main() diff --git a/tests/privacy/test_joint_privacy_accountant.py b/tests/privacy/test_joint_privacy_accountant.py index ab6ece0..ee3b7d8 100644 --- a/tests/privacy/test_joint_privacy_accountant.py +++ b/tests/privacy/test_joint_privacy_accountant.py @@ -1,11 +1,10 @@ # Copyright © 2023-2024 Apple Inc. ''' -Test privacy accountants for DP in privacy_accountant.py. +Test joint privacy accountants for DP in joint_privacy_accountant.py. ''' from unittest.mock import patch -import dp_accounting.pld import numpy as np import pytest from dp_accounting import dp_event @@ -13,7 +12,7 @@ from dp_accounting.rdp import rdp_privacy_accountant from prv_accountant import LaplaceMechanism, PoissonSubsampledGaussianMechanism, PRVAccountant -from pfl.privacy import JointPLDPrivacyAccountant, JointRDPPrivacyAccountant, JointPRVPrivacyAccountant +from pfl.privacy import JointPLDPrivacyAccountant, JointPRVPrivacyAccountant, JointRDPPrivacyAccountant @pytest.fixture() @@ -65,14 +64,12 @@ def get_expected_delta_pld(noise_parameters, sampling_probability, composed_pld = None for pld in plds: - if composed_pld: - composed_pld = composed_pld.compose(pld) - else: - composed_pld = pld + composed_pld = composed_pld.compose(pld) if composed_pld else pld expected_delta = composed_pld.get_delta_for_epsilon(epsilon) return expected_delta, plds + def get_expected_delta_prv(noise_parameters, sampling_probability, num_compositions, mechanisms, epsilon): prvs = [] @@ -92,18 +89,23 @@ def get_expected_delta_prv(noise_parameters, sampling_probability, prvs.append(prv) acc_prv = PRVAccountant(prvs=prvs, - max_self_compositions=[int(num_compositions)] * len(prvs), + max_self_compositions=[int(num_compositions)] * + len(prvs), eps_error=0.07, delta_error=1e-10) - _, expected_delta, _ = acc_prv.compute_delta(epsilon, [int(num_compositions)] * len(prvs)) + _, expected_delta, _ = acc_prv.compute_delta( + epsilon, [int(num_compositions)] * len(prvs)) - individual_prvs = [PRVAccountant(prvs=prv, - max_self_compositions=int(num_compositions), - eps_error=0.07, - delta_error=1e-10) for prv in prvs] + individual_prvs = [ + PRVAccountant(prvs=prv, + max_self_compositions=int(num_compositions), + eps_error=0.07, + delta_error=1e-10) for prv in prvs + ] return expected_delta, individual_prvs + def get_expected_delta_rdp(noise_parameters, sampling_probability, num_compositions, mechanisms, epsilon): @@ -112,7 +114,8 @@ def get_expected_delta_rdp(noise_parameters, sampling_probability, for mechanism, noise_parameter in zip(mechanisms, noise_parameters): if mechanism == 'gaussian': event = dp_event.PoissonSampledDpEvent( - sampling_probability, dp_event.GaussianDpEvent(noise_parameter)) + sampling_probability, + dp_event.GaussianDpEvent(noise_parameter)) elif mechanism == 'laplace': event = dp_event.LaplaceDpEvent(noise_parameter) @@ -144,7 +147,8 @@ class TestPrivacyAccountants: 'num_compositions, sampling_probability, epsilon, delta, noise_parameters, noise_scale, mechanisms, budget_proportions', # pylint: disable=line-too-long [(1000, 0.01, 2, None, [0.76, 1], 1.0, ['gaussian', 'gaussian'], None), (100, 0.1, None, 1e-8, [1, 1.5], 0.5, ['laplace', 'gaussian'], None), - (100, 0.1, 2, 1e-8, None, 0.8, ['gaussian', 'gaussian'], [0.25, 0.75])]) + (100, 0.1, 2, 1e-8, None, 0.8, ['gaussian', 'gaussian'], [0.25, 0.75]) + ]) def test(self, num_compositions, sampling_probability, epsilon, delta, noise_parameters, noise_scale, mechanisms, budget_proportions, accountant_class, fn_expected_delta, max_bound): @@ -152,17 +156,19 @@ def test(self, num_compositions, sampling_probability, epsilon, delta, # noise parameter - these settings speed up the binary search for tests with patch( 'pfl.privacy.joint_privacy_accountant.MIN_BOUND_NOISE_PARAMETER', - new=2), patch( - 'pfl.privacy.joint_privacy_accountant.MAX_BOUND_NOISE_PARAMETER', - new=max_bound), patch( - 'pfl.privacy.joint_privacy_accountant.MIN_BOUND_EPSILON', - new=2.5), patch( - 'pfl.privacy.joint_privacy_accountant.MAX_BOUND_EPSILON', - new=2.6): - with patch('pfl.privacy.joint_privacy_accountant.RTOL_NOISE_PARAMETER', - new=0.1), patch( - 'pfl.privacy.joint_privacy_accountant.RTOL_EPSILON', - new=0.1): + new=2 + ), patch( + 'pfl.privacy.joint_privacy_accountant.MAX_BOUND_NOISE_PARAMETER', + new=max_bound + ), patch('pfl.privacy.joint_privacy_accountant.MIN_BOUND_EPSILON', + new=2.5), patch( + 'pfl.privacy.joint_privacy_accountant.MAX_BOUND_EPSILON', + new=2.6): + with patch( + 'pfl.privacy.joint_privacy_accountant.RTOL_NOISE_PARAMETER', + new=0.1), patch( + 'pfl.privacy.joint_privacy_accountant.RTOL_EPSILON', + new=0.1): accountant = accountant_class( num_compositions=num_compositions, sampling_probability=sampling_probability, @@ -172,44 +178,63 @@ def test(self, num_compositions, sampling_probability, epsilon, delta, budget_proportions=budget_proportions, noise_parameters=noise_parameters, noise_scale=noise_scale) - noise_parameters = ([cohort_noise_parameter / noise_scale - for cohort_noise_parameter in accountant.cohort_noise_parameters]) + noise_parameters = ([ + cohort_noise_parameter / noise_scale + for cohort_noise_parameter in + accountant.cohort_noise_parameters + ]) - expected_delta, mechanism_accountants = fn_expected_delta(noise_parameters, - sampling_probability, - num_compositions, mechanisms, - accountant.epsilon) + expected_delta, mechanism_accountants = fn_expected_delta( + noise_parameters, sampling_probability, num_compositions, + mechanisms, accountant.epsilon) np.testing.assert_almost_equal(accountant.delta, expected_delta) if budget_proportions: if accountant_class is JointPLDPrivacyAccountant: - for acc, p in zip(mechanism_accountants, budget_proportions): - np.testing.assert_almost_equal(acc.get_epsilon_for_delta(delta), - accountant.large_epsilon * p, decimal=2) + for acc, p in zip(mechanism_accountants, + budget_proportions): + np.testing.assert_almost_equal( + acc.get_epsilon_for_delta(delta), + accountant.large_epsilon * p, + decimal=2) elif accountant_class is JointPRVPrivacyAccountant: - for acc, p in zip(mechanism_accountants, budget_proportions): - np.testing.assert_almost_equal(acc.compute_epsilon(delta, [num_compositions])[1], - accountant.large_epsilon * p, decimal=2) + for acc, p in zip(mechanism_accountants, + budget_proportions): + np.testing.assert_almost_equal( + acc.compute_epsilon(delta, + [num_compositions])[1], + accountant.large_epsilon * p, + decimal=2) elif accountant_class is JointRDPPrivacyAccountant: - for acc, p in zip(mechanism_accountants, budget_proportions): - np.testing.assert_almost_equal(acc.get_epsilon(delta), - accountant.large_epsilon * p, decimal=2) - + for acc, p in zip(mechanism_accountants, + budget_proportions): + np.testing.assert_almost_equal( + acc.get_epsilon(delta), + accountant.large_epsilon * p, + decimal=2) @pytest.mark.xfail(raises=(ValueError, AssertionError), strict=True) - @pytest.mark.parametrize('accountant_class', [JointPLDPrivacyAccountant, JointPRVPrivacyAccountant, JointRDPPrivacyAccountant]) + @pytest.mark.parametrize('accountant_class', [ + JointPLDPrivacyAccountant, JointPRVPrivacyAccountant, + JointRDPPrivacyAccountant + ]) @pytest.mark.parametrize( 'num_compositions, sampling_probability, epsilon, delta, noise_parameters, noise_scale, mechanisms, budget_proportions', # pylint: disable=line-too-long [(100, 0.1, 2, None, None, 1.0, ['gaussian', 'gaussian'], [0.5, 0.5]), - (100, 0.1, None, None, None, 1.0, ['gaussian', 'gaussian'], [0.5, 0.5]), + (100, 0.1, None, None, None, 1.0, ['gaussian', 'gaussian' + ], [0.5, 0.5]), (100, 0.1, 2, 1e-8, None, 1.2, ['gaussian', 'gaussian'], [0.5, 0.5]), (100, 0.1, 2, 1e-8, None, 1.0, ['bernoulli', 'gaussian'], [0.5, 0.5]), - (100, 0.1, 2, 1e-8, [10, 10], 1.0, ['gaussian', 'gaussian'], [0.5, 0.5]), - (100, 0.1, 2, 1e-8, None, 0.8, ['gaussian', 'gaussian'], [0.1, 0.75])]) + (100, 0.1, 2, 1e-8, [10, 10], 1.0, ['gaussian', 'gaussian' + ], [0.5, 0.5]), + (100, 0.1, 2, 1e-8, None, 0.8, ['gaussian', 'gaussian'], [0.1, 0.75]), + (100, 0.1, 2, 1e-8, None, 0.8, ['gaussian', 'gaussian'], [1.1, 0.75]) + ]) def test_fail(self, num_compositions, sampling_probability, epsilon, delta, - noise_parameters, noise_scale, mechanisms, budget_proportions, accountant_class): + noise_parameters, noise_scale, mechanisms, + budget_proportions, accountant_class): accountant_class( num_compositions=num_compositions, sampling_probability=sampling_probability, @@ -220,4 +245,3 @@ def test_fail(self, num_compositions, sampling_probability, epsilon, delta, noise_parameters=noise_parameters, noise_scale=noise_scale, ) - From 52a7d34da56105c96e67395f1b1f1cbc1ec207c5 Mon Sep 17 00:00:00 2001 From: Jonathan SCOTT Date: Mon, 3 Feb 2025 11:36:32 +0100 Subject: [PATCH 7/8] update docs --- docs/source/reference/privacy.rst | 15 +++++++++++++++ 1 file changed, 15 insertions(+) diff --git a/docs/source/reference/privacy.rst b/docs/source/reference/privacy.rst index d8b37ea..c0716ae 100644 --- a/docs/source/reference/privacy.rst +++ b/docs/source/reference/privacy.rst @@ -46,6 +46,21 @@ Privacy accountants .. autoclass:: pfl.privacy.RDPPrivacyAccountant :members: +Joint Privacy accountants +------------------- + +.. autoclass:: pfl.privacy.JointPrivacyAccountant + :members: + +.. autoclass:: pfl.privacy.JointPLDPrivacyAccountant + :members: + +.. autoclass:: pfl.privacy.JointPRVPrivacyAccountant + :members: + +.. autoclass:: pfl.privacy.JointRDPPrivacyAccountant + :members: + DP with adaptive clipping ------------------------- From 38908d07858f5f06df57288fef638bd743097630 Mon Sep 17 00:00:00 2001 From: Jonathan SCOTT Date: Wed, 5 Feb 2025 16:44:39 +0100 Subject: [PATCH 8/8] fixed ruff errors --- pfl/privacy/joint_privacy_accountant.py | 18 +++++++++--------- 1 file changed, 9 insertions(+), 9 deletions(-) diff --git a/pfl/privacy/joint_privacy_accountant.py b/pfl/privacy/joint_privacy_accountant.py index a0cc571..7bb12f9 100644 --- a/pfl/privacy/joint_privacy_accountant.py +++ b/pfl/privacy/joint_privacy_accountant.py @@ -301,15 +301,15 @@ def compute_noise_paramters(self, large_epsilon): self.min_bounds, self.max_bounds): mechanism_epsilon = large_epsilon * p - func = lambda noise_param: self.get_composed_accountant( - [mechanism], + func = lambda noise_param, mech=mechanism, mech_epsilon=mechanism_epsilon: self.get_composed_accountant( + [mech], [noise_param], self.pessimistic_estimate, self.sampling_probability, self.use_connect_dots, self.value_discretization_interval, self.num_compositions, - ).get_delta_for_epsilon(mechanism_epsilon) + ).get_delta_for_epsilon(mech_epsilon) try: noise_parameter = binary_search_function( func=func, @@ -474,14 +474,14 @@ def compute_noise_paramters(self, large_epsilon): self.min_bounds, self.max_bounds): mechanism_epsilon = large_epsilon * p - func = lambda noise_param: self.get_composed_accountant( - [mechanism], + func = lambda noise_param, mech=mechanism, mech_epsilon=mechanism_epsilon: self.get_composed_accountant( + [mech], [noise_param], self.sampling_probability, self.num_compositions, self.eps_error, self.delta_error, - ).compute_delta(mechanism_epsilon, [self.num_compositions])[1] + ).compute_delta(mech_epsilon, [self.num_compositions])[1] try: noise_parameter = binary_search_function( func=func, @@ -615,9 +615,9 @@ def compute_noise_paramters(self, large_epsilon): self.min_bounds, self.max_bounds): mechanism_epsilon = large_epsilon * p - func = lambda noise_param: self.get_composed_accountant( - [mechanism], [noise_param], self.sampling_probability, self. - num_compositions).get_delta(mechanism_epsilon) + func = lambda noise_param, mech=mechanism, mech_epsilon=mechanism_epsilon: self.get_composed_accountant( + [mech], [noise_param], self.sampling_probability, self. + num_compositions).get_delta(mech_epsilon) try: noise_parameter = binary_search_function( func=func,