diff --git a/Wrappers/Python/cil/optimisation/algorithms/FISTA.py b/Wrappers/Python/cil/optimisation/algorithms/FISTA.py
index b2f1c510da..8f0142bf23 100644
--- a/Wrappers/Python/cil/optimisation/algorithms/FISTA.py
+++ b/Wrappers/Python/cil/optimisation/algorithms/FISTA.py
@@ -18,7 +18,7 @@
 
 from cil.optimisation.algorithms import Algorithm
 from cil.optimisation.functions import ZeroFunction
-from cil.optimisation.utilities import ConstantStepSize, StepSizeRule
+from cil.optimisation.utilities import ConstantStepSize, StepSizeRule, NesterovMomentum, MomentumCoefficient, ConstantMomentum
 import numpy
 import logging
 from numbers import Real, Number
@@ -118,7 +118,7 @@ def step_size(self):
             return self.step_size_rule.step_size
         else:
             warnings.warn(
-                "Note the step-size is set by a step-size rule and could change wit each iteration")
+                "Note the step-size is set by a step-size rule and could change with each iteration")
             return self.step_size_rule.get_step_size()
 
     # Set default step size
@@ -170,6 +170,9 @@ def set_up(self, initial, f, g, step_size, preconditioner, **kwargs):
             self.step_size_rule = ConstantStepSize(step_size)
         elif isinstance(step_size, StepSizeRule):
             self.step_size_rule = step_size
+        else:
+            raise TypeError(
+                "step_size must be a real number or a child class of :meth:`cil.optimisation.utilities.StepSizeRule`")
         
         self.preconditioner = preconditioner
 
@@ -229,8 +232,127 @@ def calculate_objective_function_at_point(self, x):
         
         """
         return self.f(x) + self.g(x)
+    
+    
+class APGD(ISTA):
+    
+    r"""Accelerated Proximal Gradient Descent (APGD), is used to solve:
+    
+    .. math:: \min_{x} f(x) + g(x)
+
+    where :math:`f` is differentiable and :math:`g` has a *simple* proximal operator.
+
+
+    In each update the algorithm completes the following steps:
+
+    .. math::
+
+        \begin{cases}
+                x_{k} = \mathrm{prox}_{\alpha g}(y_{k} - \alpha\nabla f(y_{k}))\\
+                y_{k+1} = x_{k} + M(x_{k} - x_{k-1})
+        \end{cases}
+
+    where :math:`\alpha` is the :code:`step_size` and :math:`M` is the momentum coefficient.
 
-class FISTA(ISTA):
+    Note that the above applies for :math:`k\geq 1`. For :math:`k=0`, :math:`x_{0}` and :math:`y_{0}` are initialised to `initial`, and :math:`t_{1}=1`.
+    
+    Parameters
+    ----------
+    initial : DataContainer
+            Starting point of the algorithm
+    f : Function
+        Differentiable function.  If `None` is passed, the algorithm will use the ZeroFunction.
+    g : Function or `None`
+        Convex function with *simple* proximal operator. If `None` is passed, the algorithm will use the ZeroFunction.
+    step_size : positive :obj:`float` or child class of :meth:`cil.optimisation.utilities.StepSizeRule`',  default = None
+                Step size for the gradient step of ISTA. If a float is passed, this is used as a constant step size. If a child class of :meth:`cil.optimisation.utilities.StepSizeRule` is passed then it's method :meth:`get_step_size` is called for each update.
+                The default :code:`step_size` is a constant :math:`\frac{1}{L}` or 1 if `f=None`.
+    preconditioner : class with an `apply` method or a function that takes an initialised CIL function as an argument and modifies a provided `gradient`.
+            This could be a custom `preconditioner` or one provided in :meth:`~cil.optimisation.utilities.preconditoner`. If None is passed  then `self.gradient_update` will remain unmodified.
+    momentum : float or child class of :meth:`cil.optimisation.utilities.MomentumCoefficient`, default = None
+            Momentum coefficient. If a float is passed, this is used as a constant momentum coefficient. If a child class of :meth:`cil.optimisation.utilities.MomentumCoefficient` is passed then it's method :meth:`__call__` is called for each update. The default momentum coefficient is the Nesterov momentum coefficient. 
+    
+    Note
+    -----
+    Running this algorithm with the default step size and the default momentum coefficient is equivalent to running the FISTA algorithm.
+    
+    """
+
+    
+    def __init__(self, initial, f, g, step_size=None,  preconditioner=None, momentum=None,  **kwargs):
+
+        self.y = initial.copy()
+
+        self.set_momentum(momentum)
+            
+        super(APGD, self).__init__(initial=initial, f=f, g=g,
+                                    step_size=step_size,  preconditioner=preconditioner, **kwargs)
+        
+    def _calculate_default_step_size(self):
+        """Calculate the default step size if a step size rule or step size is not provided 
+        """
+        return 1./self.f.L
+    
+    def _provable_convergence_condition(self):
+        if self.preconditioner is not None:
+            raise NotImplementedError(
+                "Can't check convergence criterion if a preconditioner is used ")
+
+
+        if isinstance(self.step_size_rule, ConstantStepSize) and isinstance(self.momentum, NesterovMomentum):
+            return self.step_size_rule.step_size <= 1./self.f.L
+        else:
+            raise TypeError(
+                "Can't check convergence criterion for non-constant step size or non-Nesterov momentum coefficient")
+            
+            
+    @property
+    def momentum(self):        
+       return self._momentum  
+
+    def set_momentum(self, momentum):
+
+        if momentum is None:
+            self._momentum = NesterovMomentum()
+        else:
+            if isinstance(momentum, Number):
+                self._momentum = ConstantMomentum(momentum)  
+            elif isinstance(momentum, MomentumCoefficient):
+                self._momentum = momentum
+            else:
+                raise TypeError("Momentum must be a number or a child class of MomentumCoefficient")
+        
+    def update(self):
+        r"""Performs a single iteration of APGD. For :math:`k\geq 1`:
+
+        .. math::
+
+            \begin{cases}
+                x_{k} = \mathrm{prox}_{\alpha g}(y_{k} - \alpha\nabla f(y_{k}))\\
+                y_{k+1} = x_{k} + M(x_{k} - x_{k-1})
+            \end{cases}
+
+        """
+
+        self.f.gradient(self.y, out=self.gradient_update)
+
+        if self.preconditioner is not None:
+            self.preconditioner.apply(
+                self, self.gradient_update, out=self.gradient_update)
+
+        step_size = self.step_size_rule.get_step_size(self)
+
+        self.y.sapyb(1., self.gradient_update, -step_size, out=self.y)
+
+        self.g.proximal(self.y, step_size, out=self.x)
+
+        self.x.subtract(self.x_old, out=self.y)
+        
+        momentum = self.momentum(self)
+        self.y.sapyb(momentum, self.x, 1.0, out=self.y)
+        
+
+class FISTA(APGD):
 
     r"""Fast Iterative Shrinkage-Thresholding Algorithm (FISTA), see :cite:`BeckTeboulle_b`, :cite:`BeckTeboulle_a`, is used to solve:
 
@@ -302,6 +424,7 @@ class FISTA(ISTA):
 
     """
 
+
     def _calculate_default_step_size(self):
         """Calculate the default step size if a step size rule or step size is not provided 
         """
@@ -320,42 +443,12 @@ def _provable_convergence_condition(self):
             raise TypeError(
                 "Can't check convergence criterion for non-constant step size")
 
-    def __init__(self, initial, f, g, step_size=None,  preconditioner=None, **kwargs):
 
+    def __init__(self, initial, f, g, step_size = None, preconditioner=None, momentum=None, **kwargs):
+                     
         self.y = initial.copy()
-        self.t = 1
         super(FISTA, self).__init__(initial=initial, f=f, g=g,
-                                    step_size=step_size,  preconditioner=preconditioner, **kwargs)
-
-    def update(self):
-        r"""Performs a single iteration of FISTA. For :math:`k\geq 1`:
-
-        .. math::
-
-            \begin{cases}
-                x_{k} = \mathrm{prox}_{\alpha g}(y_{k} - \alpha\nabla f(y_{k}))\\
-                t_{k+1} = \frac{1+\sqrt{1+ 4t_{k}^{2}}}{2}\\
-                y_{k+1} = x_{k} + \frac{t_{k}-1}{t_{k+1}}(x_{k} - x_{k-1})
-            \end{cases}
-
-        """
+                                    step_size=step_size,  preconditioner=preconditioner, momentum=None,  **kwargs)
 
-        self.t_old = self.t
 
-        self.f.gradient(self.y, out=self.gradient_update)
-
-        if self.preconditioner is not None:
-            self.preconditioner.apply(
-                self, self.gradient_update, out=self.gradient_update)
-
-        step_size = self.step_size_rule.get_step_size(self)
-
-        self.y.sapyb(1., self.gradient_update, -step_size, out=self.y)
-
-        self.g.proximal(self.y, step_size, out=self.x)
-
-        self.t = 0.5*(1 + numpy.sqrt(1 + 4*(self.t_old**2)))
-
-        self.x.subtract(self.x_old, out=self.y)
-        self.y.sapyb(((self.t_old-1)/self.t), self.x, 1.0, out=self.y)
 
diff --git a/Wrappers/Python/cil/optimisation/algorithms/__init__.py b/Wrappers/Python/cil/optimisation/algorithms/__init__.py
index 21372fdcb4..67686e2b83 100644
--- a/Wrappers/Python/cil/optimisation/algorithms/__init__.py
+++ b/Wrappers/Python/cil/optimisation/algorithms/__init__.py
@@ -23,7 +23,7 @@
 from .FISTA import FISTA
 from .FISTA import ISTA
 from .FISTA import ISTA as PGD
-from .FISTA import FISTA as APGD
+from .FISTA import APGD 
 from .PDHG import PDHG
 from .ADMM import LADMM
 from .SPDHG import SPDHG
diff --git a/Wrappers/Python/cil/optimisation/utilities/__init__.py b/Wrappers/Python/cil/optimisation/utilities/__init__.py
index daa32667a0..ccd9800130 100644
--- a/Wrappers/Python/cil/optimisation/utilities/__init__.py
+++ b/Wrappers/Python/cil/optimisation/utilities/__init__.py
@@ -21,3 +21,4 @@
 from .sampler import SamplerRandom
 from .StepSizeMethods import ConstantStepSize, ArmijoStepSizeRule, StepSizeRule, BarzilaiBorweinStepSizeRule
 from .preconditioner import  Preconditioner, AdaptiveSensitivity, Sensitivity
+from .momentum import MomentumCoefficient, ConstantMomentum, NesterovMomentum
diff --git a/Wrappers/Python/cil/optimisation/utilities/callbacks.py b/Wrappers/Python/cil/optimisation/utilities/callbacks.py
index a5c136695a..df86985c78 100644
--- a/Wrappers/Python/cil/optimisation/utilities/callbacks.py
+++ b/Wrappers/Python/cil/optimisation/utilities/callbacks.py
@@ -1,3 +1,22 @@
+#  Copyright 2024 United Kingdom Research and Innovation
+#  Copyright 2024 The University of Manchester
+#
+#  Licensed under the Apache License, Version 2.0 (the "License");
+#  you may not use this file except in compliance with the License.
+#  You may obtain a copy of the License at
+#
+#      http://www.apache.org/licenses/LICENSE-2.0
+#
+#  Unless required by applicable law or agreed to in writing, software
+#  distributed under the License is distributed on an "AS IS" BASIS,
+#  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+#  See the License for the specific language governing permissions and
+#  limitations under the License.
+#
+# Authors:
+# - CIL Developers, listed at: https://github.com/TomographicImaging/CIL/blob/master/NOTICE.txt
+
+
 from abc import ABC, abstractmethod
 from functools import partialmethod
 
diff --git a/Wrappers/Python/cil/optimisation/utilities/momentum.py b/Wrappers/Python/cil/optimisation/utilities/momentum.py
new file mode 100644
index 0000000000..bff1232089
--- /dev/null
+++ b/Wrappers/Python/cil/optimisation/utilities/momentum.py
@@ -0,0 +1,76 @@
+#  Copyright 2024 United Kingdom Research and Innovation
+#  Copyright 2024 The University of Manchester
+#
+#  Licensed under the Apache License, Version 2.0 (the "License");
+#  you may not use this file except in compliance with the License.
+#  You may obtain a copy of the License at
+#
+#      http://www.apache.org/licenses/LICENSE-2.0
+#
+#  Unless required by applicable law or agreed to in writing, software
+#  distributed under the License is distributed on an "AS IS" BASIS,
+#  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+#  See the License for the specific language governing permissions and
+#  limitations under the License.
+#
+# Authors:
+# - CIL Developers, listed at: https://github.com/TomographicImaging/CIL/blob/master/NOTICE.txt
+
+
+from abc import ABC, abstractmethod
+import numpy
+
+class MomentumCoefficient(ABC):
+    '''Abstract base class for MomentumCoefficient objects. The `__call__` method of this class returns the momentum coefficient for the given iteration.
+    '''
+    def __init__(self):
+        '''Initialises the meomentum coefficient object.
+        '''
+        pass
+    
+    @abstractmethod
+    def __call__(self, algorithm):
+        '''Returns the momentum coefficient for the given iteration.
+        
+        Parameters
+        ----------
+        algorithm: CIL Algorithm
+            The algorithm object.
+        '''
+        
+        pass
+    
+class ConstantMomentum(MomentumCoefficient):
+    
+    '''MomentumCoefficient object that returns a constant momentum coefficient.
+    
+    Parameters
+    ----------
+    momentum: float
+        The momentum coefficient.
+    '''
+    
+    def __init__(self, momentum):
+        self.momentum = momentum
+        
+    def __call__(self, algorithm):
+        return self.momentum
+    
+class NesterovMomentum(MomentumCoefficient):
+    
+    '''MomentumCoefficient object that returns the Nesterov momentum coefficient.
+    
+    Parameters
+    ----------
+    t: float
+        The initial value for the momentum coefficient.
+    '''
+    
+    def __init__(self, t= 1):
+        self.t = 1
+        
+    def __call__(self, algorithm):
+        self.t_old = self.t
+        self.t = 0.5*(1 + numpy.sqrt(1 + 4*(self.t_old**2)))
+        return (self.t_old-1)/self.t
+        
\ No newline at end of file
diff --git a/Wrappers/Python/test/test_algorithm_convergence.py b/Wrappers/Python/test/test_algorithm_convergence.py
index 61bdb14569..efdddba544 100644
--- a/Wrappers/Python/test/test_algorithm_convergence.py
+++ b/Wrappers/Python/test/test_algorithm_convergence.py
@@ -1,10 +1,11 @@
-from cil.optimisation.algorithms import SPDHG, PDHG
-from cil.optimisation.functions import L2NormSquared, IndicatorBox, BlockFunction, ZeroFunction
-from cil.optimisation.operators import BlockOperator, IdentityOperator, MatrixOperator
+from cil.optimisation.algorithms import SPDHG, PDHG, FISTA, APGD
+from cil.optimisation.functions import L2NormSquared, IndicatorBox, BlockFunction, ZeroFunction, KullbackLeibler, OperatorCompositionFunction, LeastSquares
+from cil.optimisation.operators import BlockOperator, IdentityOperator, MatrixOperator, GradientOperator
 from cil.optimisation.utilities import Sampler 
-from cil.framework import AcquisitionGeometry, BlockDataContainer, BlockGeometry, VectorData
+from cil.framework import AcquisitionGeometry, BlockDataContainer, BlockGeometry, VectorData, ImageGeometry
 
 from cil.utilities import dataexample
+from cil.utilities import noise as applynoise
 
 import numpy as np
 import unittest
@@ -103,4 +104,82 @@ def test_SPDHG_toy_example(self):
         self.assertNumpyArrayAlmostEqual( 
             alg_stochastic.x.as_array(), u_cvxpy.value)
         self.assertNumpyArrayAlmostEqual(
-            alg_stochastic.x.as_array(), b.as_array(), decimal=6)
\ No newline at end of file
+            alg_stochastic.x.as_array(), b.as_array(), decimal=6)
+        
+    def test_FISTA_Denoising(self):
+        # adapted from demo FISTA_Tikhonov_Poisson_Denoising.py in CIL-Demos repository
+        data = dataexample.SHAPES.get()
+        ig = data.geometry
+        ag = ig
+        # Create Noisy data with Poisson noise
+        scale = 5
+        noisy_data = applynoise.poisson(data/scale, seed=10) * scale
+
+        # Regularisation Parameter
+        alpha = 10
+
+        # Setup and run the FISTA algorithm
+        operator = GradientOperator(ig)
+        fid = KullbackLeibler(b=noisy_data)
+        reg = OperatorCompositionFunction(alpha * L2NormSquared(), operator)
+
+        initial = ig.allocate()
+        fista = FISTA(initial=initial, f=reg, g=fid)
+        fista.update_objective_interval = 500
+        fista.run(3000, verbose=0)
+        rmse = (fista.get_output() - data).norm() / data.as_array().size
+        self.assertLess(rmse, 4.2e-4)
+        
+    def test_APGD(self):
+        ig = ImageGeometry(41, 43, 47)
+        initial = ig.allocate(0)
+        b = ig.allocate("random")**2
+        identity = IdentityOperator(ig)
+
+        f = OperatorCompositionFunction(L2NormSquared(b=b), identity)
+        g= IndicatorBox(lower=0)
+        
+        apgd = APGD(f=f, g=g, initial=initial, update_objective_interval=100, momentum=0.5)
+        apgd.run(500, verbose=0)
+        self.assertNumpyArrayAlmostEqual(apgd.solution.as_array(), b.as_array(), decimal=3)
+        
+    
+
+    def test_FISTA_momentum(self):
+        
+        np.random.seed(10)
+        n = 100  
+        m = 50 
+        A = np.random.normal(0,1, (m, n)).astype('float32') 
+        # A /= np.linalg.norm(A, axis=1, keepdims=True)
+        b = np.random.normal(0,1, m).astype('float32')
+        reg = 0.5
+        
+        Aop = MatrixOperator(A)
+        bop = VectorData(b) 
+        ig = Aop.domain
+
+        # cvxpy solutions
+        u_cvxpy = cvxpy.Variable(ig.shape[0])
+        objective = cvxpy.Minimize( 0.5 * cvxpy.sum_squares(Aop.A @ u_cvxpy - bop.array) + reg/2 * cvxpy.sum_squares(u_cvxpy))
+        p = cvxpy.Problem(objective)
+        p.solve(verbose=False, solver=cvxpy.SCS, eps=1e-4)  
+
+        # default fista
+        f = LeastSquares(A=Aop, b=bop, c=0.5)
+        g = reg/2*L2NormSquared()        
+        fista = FISTA(initial=ig.allocate(), f=f, g=g, update_objective_interval=1)
+        fista.run(500)        
+        np.testing.assert_allclose(fista.objective[-1], p.value, atol=1e-3)
+        np.testing.assert_allclose(fista.solution.array, u_cvxpy.value, atol=1e-3)
+
+        # fista Dossal Chambolle "On the convergence of the iterates of ”FISTA”
+        from cil.optimisation.algorithms.FISTA import MomentumCoefficient
+        class DossalChambolle(MomentumCoefficient):        
+            def __call__(self, algo=None):
+                return (algo.iteration-1)/(algo.iteration+50)
+        momentum = DossalChambolle()
+        fista_dc = APGD(initial=ig.allocate(), f=f, g=g, update_objective_interval=1, momentum=momentum)
+        fista_dc.run(500)
+        np.testing.assert_allclose(fista_dc.solution.array, u_cvxpy.value, atol=1e-3)
+        np.testing.assert_allclose(fista_dc.solution.array, u_cvxpy.value, atol=1e-3)
diff --git a/Wrappers/Python/test/test_algorithms.py b/Wrappers/Python/test/test_algorithms.py
index 2efe206e23..34c9dc3cad 100644
--- a/Wrappers/Python/test/test_algorithms.py
+++ b/Wrappers/Python/test/test_algorithms.py
@@ -30,7 +30,7 @@
 
 from cil.framework.labels import FillType
 
-from cil.optimisation.utilities import ArmijoStepSizeRule, ConstantStepSize, Sampler, callbacks
+from cil.optimisation.utilities import ArmijoStepSizeRule, ConstantStepSize, Sampler, callbacks, NesterovMomentum, MomentumCoefficient, ConstantMomentum
 from cil.optimisation.operators import IdentityOperator
 from cil.optimisation.operators import GradientOperator, BlockOperator, MatrixOperator
 
@@ -263,11 +263,64 @@ def test_gd_deprecate_atol_rtol(self):
         gd.run(10)
         self.assertEqual(gd.iteration, 0)
         
+class Test_APGD(CCPiTestClass):
+    def setUp(self):
+        self.ig = ImageGeometry(127, 139, 149)
+        self.initial = self.ig.allocate(0)
+        self.b = self.ig.allocate("random")**2
+        self.identity = IdentityOperator(self.ig)
+
+        self.f = OperatorCompositionFunction(L2NormSquared(b=self.b), self.identity)
+        self.g= IndicatorBox(lower=0)
+        
+    def test_init_default(self):
+        alg = APGD(initial=self.initial, f=self.f, g=self.g)
+        assert isinstance(alg.momentum, NesterovMomentum) 
+        assert isinstance(alg.step_size_rule, ConstantStepSize)
+        self.assertEqual(alg.step_size_rule.step_size, 1/self.f.L)
+        self.assertEqual(alg.update_objective_interval, 1)
+        self.assertNumpyArrayAlmostEqual(alg.y.as_array(), self.initial.as_array())
+
+    def test_init_momentum(self):
+        alg = APGD(initial=self.initial, f=self.f, g=self.g, momentum=1)
+        assert isinstance(alg.momentum, ConstantMomentum)
+        self.assertEqual(alg.momentum(alg),1) 
+        self.assertEqual(alg.momentum.momentum, 1)
+        
+        with self.assertRaises(TypeError):
+            alg = APGD(initial=self.initial, f=self.f, g=self.g, momentum='banana')
+        
+    def test_init_step_size(self):
+        alg = APGD(initial=self.initial, f=self.f, g=self.g, step_size=1.0)
+        assert isinstance(alg.step_size_rule, ConstantStepSize)
+        self.assertEqual(alg.step_size_rule.get_step_size(alg), 1.0)
+        self.assertEqual(alg.step_size_rule.step_size, 1.0)
+        
+        with self.assertRaises(TypeError):
+            alg = APGD(initial=self.initial, f=self.f, g=self.g, step_size='banana')
+            
+    def test_update_step(self):
+        alg = APGD(initial=self.initial, f=self.f, g=self.g, step_size=0.3, momentum=0.5)
+        y_0=self.initial.copy()
+        x_0=self.g.proximal(y_0 - 0.3*self.f.gradient(y_0), 0.3)
+        y_1=x_0 + 0.5*(x_0-y_0)
+        alg.run(1)
+        self.assertNumpyArrayAlmostEqual(alg.y.as_array(), y_1.as_array())
+        
+        x_1=self.g.proximal(y_1 - 0.3*self.f.gradient(y_1), 0.3)
+        y_2=x_1 + 0.5*(x_1-x_0)
+        alg.run(1)
+        self.assertNumpyArrayAlmostEqual(alg.y.as_array(), y_2.as_array())
+        
+        
+        
+        
+        
 
 class TestFISTA(CCPiTestClass):
     def test_FISTA(self):
         ig = ImageGeometry(127, 139, 149)
-        initial = ig.allocate()
+        initial = ig.allocate(0)
         b = initial.copy()
         # fill with random numbers
         b.fill(np.random.random(initial.shape))
@@ -412,51 +465,14 @@ def test_FISTA_catch_Lipschitz(self):
         with self.assertRaises(TypeError):
             alg = FISTA(initial=initial, f=L1Norm(), g=ZeroFunction())
 
-    def test_FISTA_Denoising(self):
-        # adapted from demo FISTA_Tikhonov_Poisson_Denoising.py in CIL-Demos repository
-        data = dataexample.SHAPES.get()
-        ig = data.geometry
-        ag = ig
-        N = 300
-        # Create Noisy data with Poisson noise
-        scale = 5
-        noisy_data = applynoise.poisson(data/scale, seed=10) * scale
 
-        # Regularisation Parameter
-        alpha = 10
 
-        # Setup and run the FISTA algorithm
-        operator = GradientOperator(ig)
-        fid = KullbackLeibler(b=noisy_data)
-        reg = OperatorCompositionFunction(alpha * L2NormSquared(), operator)
 
-        initial = ig.allocate()
-        fista = FISTA(initial=initial, f=reg, g=fid)
-        fista.update_objective_interval = 500
-        fista.run(3000, verbose=0)
-        rmse = (fista.get_output() - data).norm() / data.as_array().size
-        log.info("RMSE %f", rmse)
-        self.assertLess(rmse, 4.2e-4)
 
- 
-    def test_FISTA_APGD_alias(self):
-        n = 50
-        m = 500
-        A = np.random.uniform(0, 1, (m, n)).astype('float32')
-        b = A.dot(np.random.randn(n))
-        Aop = MatrixOperator(A)
-        bop = VectorData(b)
-        f = LeastSquares(Aop, b=bop, c=0.5)
-        g = ZeroFunction()
-        ig = Aop.domain
-        initial = ig.allocate()
-         
-        with patch('cil.optimisation.algorithms.ISTA.set_up', MagicMock(return_value=None)) as mock_method:
 
-            alg = APGD(initial=initial, f=f, g=g, step_size=4)
-            self.assertEqual(alg.t, 1)
-            self.assertNumpyArrayEqual(alg.y.array, initial.array)
-            mock_method.assert_called_once_with(initial=initial, f=f, g=g, step_size=4, preconditioner=None)
+        
+
+        
 
 
 class testISTA(CCPiTestClass):
diff --git a/Wrappers/Python/test/test_momentum.py b/Wrappers/Python/test/test_momentum.py
new file mode 100644
index 0000000000..5296b6b979
--- /dev/null
+++ b/Wrappers/Python/test/test_momentum.py
@@ -0,0 +1,62 @@
+#  Copyright 2025 United Kingdom Research and Innovation
+#  Copyright 2025 The University of Manchester
+#
+#  Licensed under the Apache License, Version 2.0 (the "License");
+#  you may not use this file except in compliance with the License.
+#  You may obtain a copy of the License at
+#
+#      http://www.apache.org/licenses/LICENSE-2.0
+#
+#  Unless required by applicable law or agreed to in writing, software
+#  distributed under the License is distributed on an "AS IS" BASIS,
+#  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+#  See the License for the specific language governing permissions and
+#  limitations under the License.
+#
+# Authors:
+# CIL Developers, listed at: https://github.com/TomographicImaging/CIL/blob/master/NOTICE.txt
+
+
+import unittest
+import numpy as np
+from cil.optimisation.utilities import ConstantMomentum, NesterovMomentum, MomentumCoefficient
+
+class MockAlgorithm:
+    pass  # Placeholder for any required algorithm attributes
+
+class TestMomentumCoefficients(unittest.TestCase):
+    
+    def test_constant_momentum(self):
+        momentum_value = 0.9
+        momentum = ConstantMomentum(momentum_value)
+        algorithm = MockAlgorithm()
+        
+        self.assertEqual(momentum(algorithm), momentum_value, msg="ConstantMomentum should return the set momentum value")
+    
+    def test_nesterov_momentum(self):
+        momentum = NesterovMomentum()
+        algorithm = MockAlgorithm()
+        
+        initial_value = momentum(algorithm)
+        second_value = momentum(algorithm)
+        
+        # Check initial momentum value
+        expected_initial_t = 1
+        expected_next_t = 0.5 * (1 + np.sqrt(1 + 4 * (expected_initial_t ** 2)))
+        expected_initial_momentum = (expected_initial_t - 1) / expected_next_t
+        
+        self.assertAlmostEqual(initial_value, expected_initial_momentum, places=6, msg="Incorrect initial momentum value")
+        
+        # Check second iteration momentum value
+        expected_next_t_old = expected_next_t
+        expected_next_t = 0.5 * (1 + np.sqrt(1 + 4 * (expected_next_t_old ** 2)))
+        expected_next_momentum = (expected_next_t_old - 1) / expected_next_t
+        
+        self.assertAlmostEqual(second_value, expected_next_momentum, places=6, msg="Incorrect second iteration momentum value")
+        
+    def test_momentum_coefficient_abc(self):
+        class InvalidMomentum(MomentumCoefficient):
+            pass
+        
+        with self.assertRaises(TypeError):
+            InvalidMomentum()    
\ No newline at end of file
diff --git a/docs/source/optimisation.rst b/docs/source/optimisation.rst
index b087405503..1d4fc58518 100644
--- a/docs/source/optimisation.rst
+++ b/docs/source/optimisation.rst
@@ -45,7 +45,7 @@ An algorithm is designed for a particular generic optimisation problem accepts a
 instances of :code:`Function` derived classes and/or :code:`Operator` derived classes as input to
 define a specific instance of the generic optimisation problem to be solved.
 They are iterable objects which can be run in a for loop.
-The user can provide a stopping criterion different than the default max_iteration.
+The user can provide a stopping criterion different than the defaul.
 
 New algorithms can be easily created by extending the :code:`Algorithm` class.
 The user is required to implement only 4 methods: set_up, __init__, update and update_objective.
@@ -78,19 +78,19 @@ GD
 --
 .. autoclass:: cil.optimisation.algorithms.GD
    :members:
-   :inherited-members: run, update_objective_interval, max_iteration
+   :inherited-members: run, update_objective_interval
 
 CGLS
 ----
 .. autoclass:: cil.optimisation.algorithms.CGLS
    :members:
-   :inherited-members: run, update_objective_interval, max_iteration
+   :inherited-members: run, update_objective_interval
 
 SIRT
 ----
 .. autoclass:: cil.optimisation.algorithms.SIRT
    :members: update, update_objective
-   :inherited-members: run, update_objective_interval, max_iteration
+   :inherited-members: run, update_objective_interval
 
 ISTA/PGD
 --------
@@ -99,36 +99,44 @@ The Iterative Soft Thresholding Algorithm (ISTA) is also known as Proximal Gradi
 .. _ISTA:
 .. autoclass:: cil.optimisation.algorithms.ISTA
    :members:
-   :inherited-members: run, update_objective_interval, max_iteration
+   :inherited-members: run, update_objective_interval
 
 
-FISTA/APGD
+FISTA
 -----
-The Fast Iterative Soft Thresholding Algorithm (FISTA) is also known as Accelerated Proximal Gradient Descent (APGD). Note that in CIL, :ref:`APGD<FISTA>` is an alias of `FISTA`.
+The Fast Iterative Soft Thresholding Algorithm (FISTA). 
 
 .. _FISTA:
 .. autoclass:: cil.optimisation.algorithms.FISTA
    :members:
-   :inherited-members: run, update_objective_interval, max_iteration
+   :inherited-members: run, update_objective_interval
+
+APGD
+-----
+The Accelerated Proximal Gradient Descent Algorithm (APGD). This is an extension of the PGD/ISTA algorithm allowing you to either use a constant momemtum or a momentum that is updated at each iteration. Note that in CIL, running this algorithm with the default step size and the default momentum coefficient is equivalent to running the FISTA algorithm.
+
+.. autoclass:: cil.optimisation.algorithms.APGD
+   :members:
+   :inherited-members: run, update_objective_interval
 
 PDHG
 ----
 .. autoclass:: cil.optimisation.algorithms.PDHG
    :members: update, set_step_sizes, update_step_sizes, update_objective
    :member-order: bysource
-   :inherited-members: run, update_objective_interval, max_iteration
+   :inherited-members: run, update_objective_interval
 
 LADMM
 -----
 .. autoclass:: cil.optimisation.algorithms.LADMM
    :members:
-   :inherited-members: run, update_objective_interval, max_iteration
+   :inherited-members: run, update_objective_interval
 
 PD3O
 ----
 .. autoclass:: cil.optimisation.algorithms.PD3O
    :members:
-   :inherited-members: run, update_objective_interval, max_iteration
+   :inherited-members: run, update_objective_interval
 
 
 Algorithms (Stochastic)
@@ -158,7 +166,7 @@ Each iteration considers just one index of the sum, potentially reducing computa
 
 .. autoclass:: cil.optimisation.algorithms.SPDHG
    :members: update, set_step_sizes, set_step_sizes_from_ratio, update_objective
-   :inherited-members: run, update_objective_interval, max_iteration
+   :inherited-members: run, update_objective_interval
 
 
 Approximate gradient methods