Source code for pyrast.activity_coefficients.activity_coefficient

"""Base model for activity coefficient models."""

from typing import cast

import numpy as np
from scipy.optimize import brentq, least_squares

from pyrast.isotherms.interpolator_isotherm import CubicIsotherm, InterpolatorIsotherm



[docs]
class ActivityCoefficient:
    # Model list built at import time
    _MODELS = {}

    # Class variables
    name: str = ''
    param_names: tuple
    param_default_bounds: tuple
    param_ideal_values: tuple

    # Instance variables
    partial_fug: np.ndarray
    loadings: np.ndarray
    isotherms: list
    model_parameters: dict
    model: str

    def __new__(cls, *args, **kwargs):
        """Creates an instance of the user-specified model.

        This factory design pattern is identical to the model isotherms and allows
        users to easily switch between different activity coefficient models. Users
        should never interact with this method directly.
        """
        if cls is ActivityCoefficient:
            model = kwargs.pop('model', None)
            if model is None:
                if args and isinstance(args[0], str):
                    model = args[0]
                elif len(args) >= 4:
                    model = args[3]
            if model not in cls._MODELS:
                raise ValueError(f'{model} is not a valid model. Choose from'
                                 f' {list(cls._MODELS.keys())}')
            subclass = cls._MODELS[model]
            return super().__new__(subclass)
        return super().__new__(cls)

    def __init_subclass__(cls, *, model_name: str = '', **kwargs):
        """Registers subclasses of ModelIsotherm at import time.

        Users should never interact with this method directly. To modify the list of
        available models, edit the import statements in __init__.py.
        """
        super().__init_subclass__(**kwargs)
        if model_name:
            ActivityCoefficient._MODELS[model_name] = cls


[docs]
    def __init__(self, partial_fug: np.ndarray | list, loadings: np.ndarray | list,
                 isotherms: list, model: str, *, total_loading: bool = False,
                 verbose: bool = False, model_parameters: dict | None = None,
                 c: float = 1, param_guess: dict | None = None, param_tol: float = 1e-6,
                 param_bounds: dict | None = None,
                 optimization_options: dict | None = None,
                 root_bracketing_options: dict | None = None):
        """Initializes an activity coefficient model.

        Activity coefficient models are used in real adsorbed solution theory (RAST) to
        account for non-ideal interactions between adsorbed species. This base class
        provides the framework for fitting activity coefficient models from binary
        mixture data. There are two ways to fit the model parameters:

        1) Component loadings: If the user provides component loadings for different
        partial fugacities, the model parameters will be fit by minimizing the residual
        between the predicted and observed activity coefficients for each component.
        At each iteration, a 1D solver is used to find the spreading pressure that gives
        the observed loading for each component. This approach is more reliable for
        fitting activity coefficient models. If only one data point is provided, the
        models can still be fit assuming a default value for the C parameter, which
        can be adjusted by passing the 'c' argument. With multiple data points, the C
        parameter will be used as an initial guess and fit along with the other model
        parameters.

        2) Total loading: If the user provides total loadings for different partial
        fugacities, the model parameters will be fit by minimizing the residual
        between the predicted total loading from a RAST calculation and the provided
        total loading. This approach is more computationally expensive, but is
        useful for experimental data where component loadings are difficult to
        measure. The user must provide at least as many total loading data points as
        model parameters.

        For a full discussion of the fitting procedure and the underlying equations,
        see the documentation or paper discussion. Fitting activity coefficient
        models can be challenging, and convergence depends on the quality of the data
        and the choice of fitting options. If you have trouble fitting a model, try
        adjusting the initial guess, tolerances, or optimization options.

        Args:
            partial_fug (array-like): Partial fugacities of each component in
                the gas phase.
            loadings (array-like): Adsorbed phase loadings of each component (or total
                loading if total_loading=True).
            isotherms (list): List of pure component isotherm objects for each
                component.
            model (str): Name of the activity coefficient model to use. Must be one of
                the registered models in pyRAST.
            total_loading (bool, optional): If True, the input loadings are treated as
                total loadings and RAST calculations are used to fit the model
                parameters. Default is False.
            verbose (bool, optional): If True, prints convergence information during
                fitting. Default is False.
            model_parameters (dict, optional): If provided, these parameters will be
                used instead of fitting from data. Keys must match the parameter names
                for the specified model.
            c (float, optional): Sets the C parameter for fitting to single point data,
                or used as the initial guess for C when fitting to multiple data points.
                Default is 1.
            param_guess (dict, optional): Initial guess for model parameters when
                fitting from data. Keys must match the parameter names for the specified
                model. If not provided, the guess will be set to the parameter values
                that produce ideal behavior (gamma=1).
            param_tol (float, optional): Tolerance in parameter values for convergence
                when fitting to data. This can often be adjusted lower without
                significantly affecting the fit quality. This is passed as the 'xtol'
                parameter to scipy.optimize.least_squares. If specified again in
                optimization_options, it will override this value. Default is 1e-6.
            param_bounds (dict, optional): Bounds for model parameters when fitting from
                data. Keys must match the parameter names for the specified model. If
                not provided, the bounds will be set to the default specified in the
                model class.
            optimization_options (dict, optional): Options for the least-squares
                optimization when fitting to data. This is passed directly to
                scipy.optimize.least_squares, so you can specify any options available
                there. Default is None.
            root_bracketing_options (dict, optional): Options for the root bracketing
                when fitting from component loading data. This is passed as keyword
                arguments to the _bracket_phi function that is used to find the
                spreading pressure at each iteration. The default values for these
                parameters are: phi_low=1e-12, phi_high=1.0, max_expand=60,
                phi_cap=np.inf. Adjusting these parameters can help with finding a root
                for phi, although if root finding continuously fails it is likely that
                the model is not suitable for the system you are trying to fit.

        Returns:
            None: Model parameters are stored in self.model_parameters.

        Raises:
            ValueError: If input arrays have inconsistent shapes or invalid values
                or if input dictionary keys do not match model parameter names.
        """
        # Validate inputs
        partial_fug = np.asarray(partial_fug)
        loadings = np.asarray(loadings)
        if np.any(loadings < 0):
            raise ValueError('Component loadings must be non-negative.')
        if np.any(partial_fug < 0):
            raise ValueError('Partial fugacities must be non-negative.')
        if not total_loading and partial_fug.shape != loadings.shape:
            raise ValueError('Arrays for partial fugacities and loadings must have'
                            'the same dimensions.')
        if total_loading and partial_fug.shape[0] != loadings.shape[0]:
            raise ValueError('Arrays for partial fugacities and total loadings must'
                             'have the same number of data points.')
        if partial_fug.ndim == 2 and partial_fug.shape[1] != 2:
            raise ValueError('This activity coefficient model is currently only '
                            'implemented for binary mixtures (2 components). '
                            'Your arrays must have shape (n, 2) where n is the '
                            'number of data points.')
        if len(isotherms) != 2:
            raise ValueError('Exactly 2 isotherm objects must be provided in a list, '
                             'one for each component.')


        # Store data
        self.partial_fug = partial_fug
        self.loadings = loadings
        self.isotherms = isotherms
        self.c = c

        # Store model info
        self.model = model

        # Store calculation info
        self.param_tol = param_tol

        # Handle parameter guess and bounds
        self.param_bounds = dict(zip(self.param_names, self.param_default_bounds))
        if param_bounds is not None:
            if set(param_bounds.keys()) != set(self.param_names):
                raise ValueError(f'param_bounds keys must be {self.param_names}.')
            self.param_bounds = param_bounds

        self.param_guess = dict(zip(self.param_names, self.param_ideal_values +
                                                      (self.c,)))
        if param_guess is not None:
            if set(param_guess.keys()) != set(self.param_names):
                raise ValueError(f'param_guess keys must be {self.param_names}.')
            self.param_guess = param_guess
        self.param_guess = self._enforce_parameter_bounds(self.param_guess)

        # Fit model to data
        # If user provided parameters, check that keys are correct
        self.model_parameters = dict.fromkeys(self.param_names, np.nan)
        if model_parameters is not None:
            if set(model_parameters.keys()) != set(self.param_names):
                raise ValueError(f'model_parameters keys must be {self.param_names}.')
            self.model_parameters = model_parameters
        elif not total_loading:
            self._fit_component_loadings(verbose, optimization_options,
                                         root_bracketing_options)
        else:
            self._fit_total_loading(verbose, optimization_options)


    def __repr__(self):
        """String representation of activity coefficient model."""
        return (f'{self.name} activity coefficient model with parameters: '
                f'{self.model_parameters}')


[docs]
    def _enforce_parameter_bounds(self, guess):
        """Enforces parameter bounds on the initial guess.

        Args:
            guess (dict): Initial guess for model parameters.

        Returns:
            dict: Guess with parameters enforced within bounds.
        """
        for param, value in guess.items():
            bounds = self.param_bounds[param]
            if value < bounds[0]:
                guess[param] = bounds[0]
            elif value > bounds[1]:
                guess[param] = bounds[1]
        return guess



[docs]
    def ln_gamma(self, x, phi):
        """Calculates natural log of activity coefficients.

        This method is implemented in every subclass for the specific model.
        """
        raise NotImplementedError('ln_gamma method must be implemented in subclass.')



[docs]
    def gamma(self, x, phi):
        """Calculates activity coefficients (gamma) from ln_gamma.

        Args:
            x (array-like): Mole fractions of components in the adsorbed phase.
            phi (float): Spreading pressure.

        Returns:
            np.ndarray: Activity coefficients for each component.
        """
        return np.exp(self.ln_gamma(x, phi))



[docs]
    def inverse_excess_loading(self, x, phi):
        """Calculates inverse excess loading.

        This method is implemented in every subclass for the specific model.
        """
        raise NotImplementedError('inverse_excess_loading method must be implemented '
                                  'in subclass.')



[docs]
    def _fit_total_loading(self, verbose, optimization_options):
        """Fits model parameters to total loading data.

        This method fits the model parameters to the total loading data by minimizing
        the difference between the predicted and observed total loadings. It uses a
        least-squares approach to find the optimal parameters. At each step, the
        model parameters are updated and used in a RAST calculation to predict the total
        loading.

        Note: Using total loading data can be less reliable for fitting activity
        coefficient models as multiple combinations of parameters can give similar total
        loading predictions. Only use this method if you do not have component loading
        data available.

        Args:
            verbose (bool): If True, prints model parameters at each iteration and
                            convergence information.
            optimization_options (dict): Options for the least-squares optimization when
                fitting from data. This is passed directly to
                scipy.optimize.least_squares, so you can specify any options available
                there.

        Returns:
            None: Model parameters are stored in self.model_parameters.

        Raises:
            RuntimeError: If the fit to total loading fails to converge.
        """
        # Import RAST here to avoid circular import issues
        from pyrast.calculations.rast import rast

        partial_fug = np.asarray(self.partial_fug)
        points = len(partial_fug)

        def residuals(params):
            # Assign parameters to model
            num_params = len(self.param_names)
            self.model_parameters = {self.param_names[i]: params[i]
                                     for i in range(num_params)}
            res = np.zeros(points)
            for i in range(points):
                q_total_pred = np.sum(rast(partial_fug[i], self.isotherms,
                                           self))
                res[i] = q_total_pred - self.loadings[i]
            return res

        # Assign initial guess for parameters
        initial_guess = list(self.param_guess.values())

        # Enforce parameter bounds
        bounds = [[self.param_bounds[param][0] for param in self.param_names],
                  [self.param_bounds[param][1] for param in self.param_names]]

        fitting_inputs = {
            'fun': residuals,
            'x0': initial_guess,
            'bounds': bounds,
            'xtol': self.param_tol,
        }
        if optimization_options is not None:
            fitting_inputs.update(optimization_options)
        res = least_squares(**fitting_inputs)

        if not res.success:
            raise RuntimeError(f'Total loading fit failed: {res.message}. Try a '
                               'different initial guess or check data quality.')

        # Print residuals if verbose
        if verbose:
            print(f'Fitted parameters: {dict(zip(self.param_names, res.x))}')
            print(f'Residual Sum of Squares: {res.cost}')

        # Assign final parameters to model
        num_params = len(self.param_names)
        self.model_parameters = {self.param_names[i]: res.x[i]
                                 for i in range(num_params)}



[docs]
    def _fit_component_loadings(self, verbose, optimization_options,
                                root_bracketing_options):
        """Fits model parameters to component loading data.

        This method fits the model parameters to component loading data by minimizing
        the difference between the predicted and observed ln(activity coefficients).
        It uses a least-squares approach to find the optimal parameters. At each step,
        the model parameters are updated and used in a 1D root-finding calculation to
        find the spreading pressure that gives the observed total loading for each data
        point. All model parameters can be fit simultaneously using 2+ data points, or
        the C parameter can be fixed and the other parameters fit using a single data
        point.

        Args:
            verbose (bool): If True, prints model parameters and residual information
                after fitting.
            optimization_options (dict): Options for the least-squares optimization when
                fitting model parameters. This is passed directly to
                scipy.optimize.least_squares, so you can specify any options available
                there.
            root_bracketing_options (dict): Options for the root bracketing when fitting
                from component loading data. This is passed as keyword arguments to the
                _bracket_phi function that is used to find the spreading pressure at
                each iteration.

        Returns:
            None: Model parameters are stored in self.model_parameters.

        Raises:
            RuntimeError: If the fit to component loadings fails to converge or if a
                root cannot be found for the spreading pressure at any iteration.
            ValueError: If residuals are not finite at the bracketing limits for root
                finding.
        """
        # Reshape data if single point is provided
        single_point = False
        if self.loadings.ndim == 1:
            single_point = True
            self.loadings = self.loadings.reshape(1, -1)
            self.partial_fug = self.partial_fug.reshape(1, -1)

        # Calculate important variables for least squares fitting
        points = len(self.partial_fug)
        q_total = np.sum(self.loadings, axis=1)
        xs = self.loadings / q_total[:, None]

        def solve_phi(x, q_total):
            p0 = np.zeros(len(self.isotherms))
            # Calculate residual for determining phi by total loading at given phi
            def residuals(phi, x):
                for i in range(len(self.isotherms)):
                    p0[i] = self.isotherms[i].p0(phi)
                q0 = np.array([self.isotherms[i].loading(p0[i])
                                for i in range(len(self.isotherms))])
                q_excess = self.inverse_excess_loading(x, phi)
                q_total_pred = 1.0 / (np.sum(x / q0) + q_excess)
                return q_total_pred - q_total

            # Use a bracketing strategy to ensure brentq can find a root. If a root is
            # bracketed, then brentq is guaranteed to find it.
            def _bracket_phi(x, residuals, *, phi_low, phi_high, max_expand, phi_cap):
                # Determine the maximum phi based on isotherm extrapolation limits
                def _phi_cap_for_isotherm(iso):
                    if isinstance(iso, InterpolatorIsotherm):
                        if iso.extrap_method is not None or iso.fill_value is not None:
                            return iso.extrap_p
                        p_max = iso.df[iso.pressure_key].max()
                        return iso.spreading_pressure(p_max)
                    if isinstance(iso, CubicIsotherm):
                        if iso.extrap_method is not None:
                            return iso.extrap_p
                        p_max = iso.df[iso.pressure_key].max()
                        return iso.spreading_pressure(p_max)
                    return np.inf

                # Compute the cap if there is one
                caps = [_phi_cap_for_isotherm(iso) for iso in self.isotherms]
                phi_cap = min(caps) if any(np.isfinite(caps)) else np.inf

                # Check lower limit to ensure we don't start with an invalid point
                f_low = residuals(phi_low, x)
                if not np.isfinite(f_low):
                    raise ValueError('Could not find finite residual at initial '
                                     'phi_low. Try adjusting the bracketing '
                                     'parameters, though this may indicate the model '
                                     'is not suitable for this system.')

                # Lower upper limit if necessary to find a finite residual
                f_high = residuals(phi_high, x)
                while not np.isfinite(f_high) and phi_high > phi_low:
                    phi_high *= 0.9
                    f_high = residuals(phi_high, x)

                # Throw error if we can't find a finite residual at the upper limit
                if not np.isfinite(f_high):
                    raise ValueError('Could not find finite residual at initial '
                                     'phi_high. Try adjusting the bracketing '
                                     'parameters, though this may indicate the model'
                                     'is not suitable for this system.')

                # Expand upper limit until we bracket a root or hit the cap
                for _ in range(max_expand):
                    if np.isfinite(f_high) and np.sign(f_low) != np.sign(f_high):
                        return phi_low, phi_high

                    if np.isfinite(phi_cap) and phi_high >= phi_cap:
                        break

                    phi_high = min(phi_high * 2.0,
                                phi_cap) if np.isfinite(phi_cap) else phi_high * 2.0
                    f_high = residuals(phi_high, x)

                    # if we just hit a NaN/inf, back off once
                    if not np.isfinite(f_high):
                        phi_high *= 0.5
                        f_high = residuals(phi_high, x)
                        if not np.isfinite(f_high):
                            break

                # If we exit the loop without finding a valid bracket, raise an error
                raise RuntimeError('Could not bracket a root for phi. Try adjusting the'
                                 ' bracketing parameters, though this may indicate the '
                                 'model is not suitable for this system.')
            # Prepare inputs for bracketing
            bracketing_inputs = {'phi_low': 1e-12, 'phi_high': 1.0, 'max_expand': 60,
                                 'phi_cap': np.inf}
            if root_bracketing_options is not None:
                bracketing_inputs.update(root_bracketing_options)

            # Solve for phi with bracketed root finding
            phi_low, phi_high = _bracket_phi(x, residuals, **bracketing_inputs)
            return cast('float', brentq(residuals, phi_low, phi_high, args=(x,)))

        def residuals(params):
            # Update model parameters that we are fitting
            num_params = len(params)
            self.model_parameters.update({self.param_names[i]: params[i]
                                     for i in range(num_params)})

            # Calculate residual of predicted vs observed ln(gamma)
            res = np.zeros(points * 2)
            for i in range(points):
                phi = solve_phi(xs[i], q_total[i])
                p0 = np.asarray([iso.p0(phi) for iso in self.isotherms])

                ln_gamma_exp = np.log(self.partial_fug[i] / (p0 * xs[i]))
                ln_gamma_pred = self.ln_gamma(xs[i], phi)
                res[2*i:2*i+2] = ln_gamma_pred - ln_gamma_exp
            return res

        # Assign initial guess for parameters
        initial_guess = list(self.param_guess.values())

        # Enforce parameter bounds
        bounds = [[self.param_bounds[param][0] for param in self.param_names],
                  [self.param_bounds[param][1] for param in self.param_names]]

        # Enable single point fitting if only one data point is provided
        if single_point:
            initial_guess = initial_guess[:-1]
            bounds = [bounds[0][:-1], bounds[1][:-1]]
            self.model_parameters['C'] = self.c

        fitting_inputs = {
            'fun': residuals,
            'x0': initial_guess,
            'bounds': bounds,
            'xtol': self.param_tol,
        }
        # Update fitting inputs with any user-provided optimization options
        if optimization_options is not None:
            fitting_inputs.update(optimization_options)
        res = least_squares(**fitting_inputs)

        if not res.success:
            raise RuntimeError(f'Component loading fit failed: {res.message}. Try a '
                               'different initial guess or check data quality.')
        # Print residuals if verbose
        if verbose:
            print(f'Fitted parameters: {dict(zip(self.param_names, res.x))}')
            print(f'Residual Sum of Squares: {res.cost}')

        # Assign final parameters to model
        num_params = len(res.x)
        self.model_parameters.update({self.param_names[i]: res.x[i]
                                 for i in range(num_params)})