Source code for colourlab.statistics

#!/usr/bin/env python3
# -*- coding: utf-8 -*-
"""
statistics: Colour metric statistics, part of the colourlab package

Copyright (C) 2013-2016 Ivar Farup

This program is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation, either version 3 of the License, or
(at your option) any later version.

This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
GNU General Public License for more details.

You should have received a copy of the GNU General Public License
along with this program.  If not, see <http://www.gnu.org/licenses/>.
"""

import numpy as np
import scipy.integrate
import scipy.optimize


# =============================================================================
# Statistics of metrics
# =============================================================================


[docs]def stress(diff1, diff2, weights=None, confidence=.95): """ Compute the STRESS for the two sets of differences. The STRESS (standardised residual sum of squares) is returned as a percentage. If weights are given, WSTRESS is calculated. Parameters ---------- diff1 : ndarray 1D array of colour differences. diff2 : ndarray 1D array of colour differences. weights : ndarray 1D array of individual weights for the colour differences. If None, the standard STRESS is calculated, if given, WSTRESS is calculated. confidence : float The size of the confidence interval (e.g., .95 for a 95% confidence interval) Returns ------- stress : float Standard residual sum of squares. interval : tuple The confidence interval for STRESS_a / STRESS_b """ from scipy.stats import f if weights is None: weights = np.ones(np.shape(diff1)) F = (diff1**2).sum() / (diff1 * diff2).sum() stress = np.sqrt((weights * (diff1 - F * diff2)**2).sum() / (weights * (F * diff2)**2).sum()) N = np.shape(weights)[0] return stress, f.interval(confidence, N - 1, N - 1)
def _ellipse_union(th, ell1, ell2): """ For the Pant R values. Integrand the union of two ellipses. """ r1 = ell1[0] * ell1[1] / (np.sqrt(ell1[1]**2 * np.cos(th-ell1[2])**2 + ell1[0]**2 * np.sin(th-ell1[2])**2)) r2 = ell2[0] * ell2[1] / (np.sqrt(ell2[1]**2 * np.cos(th-ell2[2])**2 + ell2[0]**2 * np.sin(th-ell2[2])**2)) u = max(r1, r2) return .5 * u**2 def _ellipse_intersection(th, ell1, ell2): """ For the Pant R values. Integrand for the intersection of two ellipses. """ r1 = ell1[0] * ell1[1] / (np.sqrt(ell1[1]**2 * np.cos(th-ell1[2])**2 + ell1[0]**2 * np.sin(th-ell1[2])**2)) r2 = ell2[0] * ell2[1] / (np.sqrt(ell2[1]**2 * np.cos(th-ell2[2])**2 + ell2[0]**2 * np.sin(th-ell2[2])**2)) u = min(r1, r2) return .5 * u**2 def _pant_R_value(ell1, ell2): """ Compute single R value for the two given ellipses. """ area_intersection = scipy.integrate.quad(_ellipse_intersection, 0, 2 * np.pi, (ell1, ell2)) area_union = scipy.integrate.quad(_ellipse_union, 0, 2 * np.pi, (ell1, ell2)) return area_intersection[0] / area_union[0] def _pant_R_values(ells1, ells2, scale=1): """ Compute set of R values for the two given sets of ellipses. """ ells1 = ells1.copy() ells1[:, 0:2] = ells1[:, 0:2] * scale N = np.shape(ells1)[0] r_values = np.zeros(N) for i in range(N): r_values[i] = _pant_R_value(ells1[i], ells2[i]) return r_values def _cost_function_pant(scale, ells1, ells2): """ Cost function for the optimisation of the scale for the R values. """ r_values = _pant_R_values(ells1, ells2, scale) return 1 - r_values.mean()
[docs]def pant_R_values(space, tdata1, tdata2, optimise=True, plane=None): """ Compute the list of R values for the given metric tensors in tdataN. The R values are computed in the given colour space. If optimise=True, the maximum overall R values are found by scaling one of the data sets. The ellipses are computed in the given plane. If plane=None, all three principal planes are used, and the resulting array of R values will be three times the length ot tdataN. Parameters ---------- space : Space The colour space in which to compute the R values. tdata1 : Tensors The first set of colour metric tensors. tdata2 : Tensors The second set of colour metric tensors. optimise : bool Whether or not to optimise the scaling of the ellipse set. plane : slice The principal plan for the ellipsoid cross sections. Returns ------- r_values : ndarray Pant R values """ if plane is None: ell1a = tdata1.get_ellipse_parameters(space, tdata1.plane_01) ell1b = tdata1.get_ellipse_parameters(space, tdata1.plane_12) ell1c = tdata1.get_ellipse_parameters(space, tdata1.plane_20) ell1 = np.concatenate((ell1a, ell1b, ell1c)) ell2a = tdata2.get_ellipse_parameters(space, tdata1.plane_01) ell2b = tdata2.get_ellipse_parameters(space, tdata1.plane_12) ell2c = tdata2.get_ellipse_parameters(space, tdata1.plane_20) ell2 = np.concatenate((ell2a, ell2b, ell2c)) else: ell1 = tdata1.get_ellipse_parameters(space, plane) ell2 = tdata2.get_ellipse_parameters(space, plane) if optimise: res = scipy.optimize.fmin(_cost_function_pant, 1, (ell1, ell2)) return _pant_R_values(ell1, ell2, res[0]), res[0] else: return _pant_R_values(ell1, ell2), 0
[docs]def dataset_distance(data1, data2): """ Euclidean distances between data in the two data sets. Parameters ---------- data1 : ndarray The first dataset, Nx3. data2 : ndarray The second data set, Nx3. Returns ------- diff : ndarray Array of Euclidean distances, N. """ return np.sqrt(((data1 - data2)**2).sum(axis=1))
def _scale_rot_dataset(params, dataset): """ Scale and rotate dataset for optimisation. Parameters ---------- params : ndarray Optimising parameters, scale and angle. dataset : ndarray The data set to optimise. Returns ------- new_set : ndarray The scaled and rotated dataset. """ scale_mat = params[1] * np.eye(3) scale_mat[0, 0] = params[0] th = params[2] rot_mat = np.array([[1, 0, 0], [0, np.cos(th), -np.sin(th)], [0, np.sin(th), np.cos(th)]]) sys_mat = np.dot(scale_mat, rot_mat) return np.dot(sys_mat, dataset.T).T def _cost_function_dataset(params, dataset, ground_truth): """ Cost function for the optimisation of the scale for the R values. Parameters ---------- params : ndarray Optimising parameters, L-scale, C-scale and angle. dataset : ndarray The data set to optimise. ground_truth : ndarray The ground truth dataset. Returns ------- cost : float The cost for the current values of scale and angle. """ return dataset_distance( _scale_rot_dataset(params, dataset), ground_truth).sum()
[docs]def minimal_dataset_distance(dataset, ground_truth): """ Return the minimal dataset distance between a dataset and a ground truth. The dataset is assumed to be on the Lab form (as an Nx3 ndarray) and is changed by scaling and rotation about the L axis. Parameters ---------- dataset : ndarray Nx3 ndarray with the colour data. ground_truth : ndarray Nx3 ndarray with the ground truth colour data. Returns ------- diff : ndarray Array of minimal Euclidean distances. opt_data : ndarray The optimised data set by scaling and rotation. L-scale : float The optimal scale. C-scale : float The optimal scale. angle : float The optimal angle. """ params = scipy.optimize.fmin(_cost_function_dataset, np.array([1, 1, 0]), (dataset, ground_truth)) opt_data = _scale_rot_dataset(params, dataset) return dataset_distance(opt_data, ground_truth), \ opt_data, params[0], params[1], params[2]