8.2. Statistics Descriptive
8.2.1. Mean
Compute the arithmetic mean along the specified axis.
The arithmetic mean is the sum of the elements along the axis divided by the number of elements.
The average is taken over the flattened array by default, otherwise over the specified axis.
>>> import numpy as np
>>> a = np.array([1, 2, 3])
>>>
>>> np.mean(a)
np.float64(2.0)
>>>
>>> np.mean(a, axis=0)
np.float64(2.0)
>>>
>>> np.mean(a, axis=1)
Traceback (most recent call last):
numpy.exceptions.AxisError: axis 1 is out of bounds for array of dimension 1
>>> a = np.array([[1, 2, 3],
... [4, 5, 6]])
>>>
>>> np.mean(a)
np.float64(3.5)
>>>
>>> np.mean(a, axis=0)
array([2.5, 3.5, 4.5])
>>>
>>> np.mean(a, axis=1)
array([2., 5.])
>>> a = np.array([[1, 2, 3],
... [4, 5, 6],
... [7, 8, 9]])
>>>
>>> np.mean(a)
np.float64(5.0)
>>>
>>> np.mean(a, axis=0)
array([4., 5., 6.])
>>>
>>> np.mean(a, axis=1)
array([2., 5., 8.])
8.2.2. Average
Compute the weighted average along the specified axis.
>>> import numpy as np
>>> a = np.array([1, 2, 3])
>>>
>>> np.average(a)
np.float64(2.0)
>>>
>>> np.average(a, axis=0)
np.float64(2.0)
>>>
>>> np.average(a, axis=1)
Traceback (most recent call last):
numpy.exceptions.AxisError: axis: axis 1 is out of bounds for array of dimension 1
>>>
>>> np.average(a, weights=[1, 1, 2])
np.float64(2.25)
>>> a = np.array([[1, 2, 3],
... [4, 5, 6]])
>>>
>>> np.average(a)
np.float64(3.5)
>>>
>>> np.average(a, axis=0)
array([2.5, 3.5, 4.5])
>>>
>>> np.average(a, axis=1)
array([2., 5.])
>>>
>>> np.average(a, weights=[[1, 0, 2],
... [2, 0, 1]])
np.float64(3.5)
>>> a = np.array([[1, 2, 3],
... [4, 5, 6],
... [7, 8, 9]])
>>>
>>> np.average(a)
np.float64(5.0)
>>>
>>> np.average(a, axis=0)
array([4., 5., 6.])
>>>
>>> np.average(a, axis=1)
array([2., 5., 8.])
>>>
>>> np.average(a, weights=[[1, 0, 2],
... [2, 0, 1],
... [1./4, 1./2, 1./3]])
np.float64(4.2)
8.2.3. Median
Compute the median along the specified axis
>>> import numpy as np
>>> a = np.array([1, 2, 3])
>>>
>>> np.median(a)
np.float64(2.0)
>>>
>>> np.median(a, axis=0)
np.float64(2.0)
>>>
>>> np.median(a, axis=1)
Traceback (most recent call last):
numpy.exceptions.AxisError: axis 1 is out of bounds for array of dimension 1
>>> a = np.array([[1, 2, 3],
... [4, 5, 6]])
>>>
>>> np.median(a)
np.float64(3.5)
>>>
>>> np.median(a, axis=0)
array([2.5, 3.5, 4.5])
>>>
>>> np.median(a, axis=1)
array([2., 5.])
>>> a = np.array([[1, 2, 3],
... [4, 5, 6],
... [7, 8, 9]])
>>>
>>> np.median(a)
np.float64(5.0)
>>>
>>> np.median(a, axis=0)
array([4., 5., 6.])
>>>
>>> np.median(a, axis=1)
array([2., 5., 8.])
>>> a = np.array([1, 2, 3, 4])
>>>
>>> np.median(a)
np.float64(2.5)
8.2.4. Variance
Compute the variance along the specified axis.
Variance of the array elements is a measure of the spread of a distribution.
The variance is the average of the squared deviations from the mean, i.e.,
var = mean(abs(x - x.mean())**2)
The variance is computed for the flattened array by default, otherwise over the specified axis.
>>> import numpy as np
>>> a = np.array([1, 2, 3])
>>>
>>> np.var(a)
np.float64(0.6666666666666666)
>>>
>>> np.var(a, axis=0)
np.float64(0.6666666666666666)
>>>
>>> np.var(a, axis=1)
Traceback (most recent call last):
numpy.exceptions.AxisError: axis 1 is out of bounds for array of dimension 1
>>> a = np.array([[1, 2, 3],
... [4, 5, 6]])
>>>
>>> np.var(a)
np.float64(2.9166666666666665)
>>>
>>> np.var(a, axis=0)
array([2.25, 2.25, 2.25])
>>>
>>> np.var(a, axis=1)
array([0.66666667, 0.66666667])
>>> a = np.array([[1, 2, 3],
... [4, 5, 6],
... [7, 8, 9]])
>>>
>>> np.var(a)
np.float64(6.666666666666667)
>>>
>>> np.var(a, axis=0)
array([6., 6., 6.])
>>>
>>> np.var(a, axis=1)
array([0.66666667, 0.66666667, 0.66666667])
8.2.5. Standard Deviation
Compute the standard deviation along the specified axis.
Standard deviation is a measure of the spread of a distribution, of the array elements.
The standard deviation is the square root of the average of the squared deviations from the mean, i.e.,
std = sqrt(mean(abs(x - x.mean())**2))
The standard deviation is computed for the flattened array by default, otherwise over the specified axis.
>>> import numpy as np
>>> a = np.array([1, 2, 3])
>>>
>>> np.std(a)
np.float64(0.816496580927726)
>>>
>>> np.std(a, axis=0)
np.float64(0.816496580927726)
>>>
>>> np.std(a, axis=1)
Traceback (most recent call last):
numpy.exceptions.AxisError: axis 1 is out of bounds for array of dimension 1
>>> a = np.array([[1, 2, 3],
... [4, 5, 6]])
>>>
>>> np.std(a)
np.float64(1.707825127659933)
>>>
>>> np.std(a, axis=0)
array([1.5, 1.5, 1.5])
>>>
>>> np.std(a, axis=1)
array([0.81649658, 0.81649658])
>>> a = np.array([[1, 2, 3],
... [4, 5, 6],
... [7, 8, 9]])
>>>
>>> np.std(a)
np.float64(2.581988897471611)
>>>
>>> np.std(a, axis=0)
array([2.44948974, 2.44948974, 2.44948974])
>>>
>>> np.std(a, axis=1)
array([0.81649658, 0.81649658, 0.81649658])
8.2.6. Covariance
Estimate a covariance matrix, given data and weights
Covariance indicates the level to which two variables vary together
ddof
- Delta Degrees of Freedom
>>> import numpy as np
>>> a = np.array([1, 2, 3])
>>>
>>> np.cov(a)
array(1.)
>>>
>>> np.cov(a, ddof=0)
array(0.66666667)
>>>
>>> np.cov(a, ddof=1)
array(1.)
>>> a = np.array([[1, 2, 3],
... [4, 5, 6]])
>>>
>>> np.cov(a)
array([[1., 1.],
[1., 1.]])
>>>
>>> np.cov(a, ddof=0)
array([[0.66666667, 0.66666667],
[0.66666667, 0.66666667]])
>>>
>>> np.cov(a, ddof=1)
array([[1., 1.],
[1., 1.]])
>>> a = np.array([[1, 2, 3],
... [4, 5, 6],
... [7, 8, 9]])
>>>
>>> np.cov(a)
array([[1., 1., 1.],
[1., 1., 1.],
[1., 1., 1.]])
>>>
>>> np.cov(a, ddof=0)
array([[0.66666667, 0.66666667, 0.66666667],
[0.66666667, 0.66666667, 0.66666667],
[0.66666667, 0.66666667, 0.66666667]])
>>>
>>> np.cov(a, ddof=1)
array([[1., 1., 1.],
[1., 1., 1.],
[1., 1., 1.]])
8.2.7. Correlation coefficient
measure of the linear correlation between two variables X and Y
Pearson correlation coefficient (PCC)
Pearson product-moment correlation coefficient (PPMCC)
bivariate correlation
>>> import numpy as np
>>> a = np.array([1, 2, 3])
>>>
>>> np.corrcoef(a)
np.float64(1.0)
>>> a = np.array([[1, 2, 3],
... [4, 5, 6]])
>>>
>>> np.corrcoef(a)
array([[1., 1.],
[1., 1.]])
>>> a = np.array([[1, 2, 3],
... [4, 5, 6],
... [7, 8, 9]])
>>>
>>> np.corrcoef(a)
array([[1., 1., 1.],
[1., 1., 1.],
[1., 1., 1.]])
>>> a = np.array([[1, 2, 1],
... [5, 4, 3]])
>>>
>>> np.corrcoef(a)
array([[1., 0.],
[0., 1.]])
>>> a = np.array([[3, 1, 3],
... [5, 5, 3]])
>>>
>>> np.corrcoef(a)
array([[ 1. , -0.5],
[-0.5, 1. ]])
>>> a = np.array([[5, 2, 1],
... [2, 4, 5]])
>>>
>>> np.corrcoef(a)
array([[ 1. , -0.99587059],
[-0.99587059, 1. ]])