8.2. Statistics Descriptive

8.2.1. Mean

Compute the arithmetic mean along the specified axis.
The arithmetic mean is the sum of the elements along the axis divided by the number of elements.
The average is taken over the flattened array by default, otherwise over the specified axis.

✘>>> import numpy as np

✘>>> a = np.array([1, 2, 3])
>>>
>>> np.mean(a)
np.float64(2.0)
>>>
>>> np.mean(a, axis=0)
np.float64(2.0)
>>>
>>> np.mean(a, axis=1)
Traceback (most recent call last):
numpy.exceptions.AxisError: axis 1 is out of bounds for array of dimension 1

✘>>> a = np.array([[1, 2, 3],
...               [4, 5, 6]])
>>>
>>> np.mean(a)
np.float64(3.5)
>>>
>>> np.mean(a, axis=0)
array([2.5, 3.5, 4.5])
>>>
>>> np.mean(a, axis=1)
array([2., 5.])

✘>>> a = np.array([[1, 2, 3],
...               [4, 5, 6],
...               [7, 8, 9]])
>>>
>>> np.mean(a)
np.float64(5.0)
>>>
>>> np.mean(a, axis=0)
array([4., 5., 6.])
>>>
>>> np.mean(a, axis=1)
array([2., 5., 8.])

8.2.2. Average

Compute the weighted average along the specified axis.

✘>>> import numpy as np

✘>>> a = np.array([1, 2, 3])
>>>
>>> np.average(a)
np.float64(2.0)
>>>
>>> np.average(a, axis=0)
np.float64(2.0)
>>>
>>> np.average(a, axis=1)
Traceback (most recent call last):
numpy.exceptions.AxisError: axis: axis 1 is out of bounds for array of dimension 1
>>>
>>> np.average(a, weights=[1, 1, 2])
np.float64(2.25)

✘>>> a = np.array([[1, 2, 3],
...               [4, 5, 6]])
>>>
>>> np.average(a)
np.float64(3.5)
>>>
>>> np.average(a, axis=0)
array([2.5, 3.5, 4.5])
>>>
>>> np.average(a, axis=1)
array([2., 5.])
>>>
>>> np.average(a, weights=[[1, 0, 2],
...                        [2, 0, 1]])
np.float64(3.5)

✘>>> a = np.array([[1, 2, 3],
...               [4, 5, 6],
...               [7, 8, 9]])
>>>
>>> np.average(a)
np.float64(5.0)
>>>
>>> np.average(a, axis=0)
array([4., 5., 6.])
>>>
>>> np.average(a, axis=1)
array([2., 5., 8.])
>>>
>>> np.average(a, weights=[[1, 0, 2],
...                        [2, 0, 1],
...                        [1./4, 1./2, 1./3]])
np.float64(4.2)

8.2.3. Median

Compute the median along the specified axis

✘>>> import numpy as np

✘>>> a = np.array([1, 2, 3])
>>>
>>> np.median(a)
np.float64(2.0)
>>>
>>> np.median(a, axis=0)
np.float64(2.0)
>>>
>>> np.median(a, axis=1)
Traceback (most recent call last):
numpy.exceptions.AxisError: axis 1 is out of bounds for array of dimension 1

✘>>> a = np.array([[1, 2, 3],
...               [4, 5, 6]])
>>>
>>> np.median(a)
np.float64(3.5)
>>>
>>> np.median(a, axis=0)
array([2.5, 3.5, 4.5])
>>>
>>> np.median(a, axis=1)
array([2., 5.])

✘>>> a = np.array([[1, 2, 3],
...               [4, 5, 6],
...               [7, 8, 9]])
>>>
>>> np.median(a)
np.float64(5.0)
>>>
>>> np.median(a, axis=0)
array([4., 5., 6.])
>>>
>>> np.median(a, axis=1)
array([2., 5., 8.])

✘>>> a = np.array([1, 2, 3, 4])
>>>
>>> np.median(a)
np.float64(2.5)

8.2.4. Variance

Compute the variance along the specified axis.
Variance of the array elements is a measure of the spread of a distribution.
The variance is the average of the squared deviations from the mean, i.e., var = mean(abs(x - x.mean())**2)
The variance is computed for the flattened array by default, otherwise over the specified axis.

✘>>> import numpy as np

✘>>> a = np.array([1, 2, 3])
>>>
>>> np.var(a)
np.float64(0.6666666666666666)
>>>
>>> np.var(a, axis=0)
np.float64(0.6666666666666666)
>>>
>>> np.var(a, axis=1)
Traceback (most recent call last):
numpy.exceptions.AxisError: axis 1 is out of bounds for array of dimension 1

✘>>> a = np.array([[1, 2, 3],
...               [4, 5, 6]])
>>>
>>> np.var(a)
np.float64(2.9166666666666665)
>>>
>>> np.var(a, axis=0)
array([2.25, 2.25, 2.25])
>>>
>>> np.var(a, axis=1)
array([0.66666667, 0.66666667])

✘>>> a = np.array([[1, 2, 3],
...               [4, 5, 6],
...               [7, 8, 9]])
>>>
>>> np.var(a)
np.float64(6.666666666666667)
>>>
>>> np.var(a, axis=0)
array([6., 6., 6.])
>>>
>>> np.var(a, axis=1)
array([0.66666667, 0.66666667, 0.66666667])

8.2.5. Standard Deviation

Compute the standard deviation along the specified axis.
Standard deviation is a measure of the spread of a distribution, of the array elements.
The standard deviation is the square root of the average of the squared deviations from the mean, i.e., std = sqrt(mean(abs(x - x.mean())**2))
The standard deviation is computed for the flattened array by default, otherwise over the specified axis.

✘>>> import numpy as np

✘>>> a = np.array([1, 2, 3])
>>>
>>> np.std(a)
np.float64(0.816496580927726)
>>>
>>> np.std(a, axis=0)
np.float64(0.816496580927726)
>>>
>>> np.std(a, axis=1)
Traceback (most recent call last):
numpy.exceptions.AxisError: axis 1 is out of bounds for array of dimension 1

✘>>> a = np.array([[1, 2, 3],
...               [4, 5, 6]])
>>>
>>> np.std(a)
np.float64(1.707825127659933)
>>>
>>> np.std(a, axis=0)
array([1.5, 1.5, 1.5])
>>>
>>> np.std(a, axis=1)
array([0.81649658, 0.81649658])

✘>>> a = np.array([[1, 2, 3],
...               [4, 5, 6],
...               [7, 8, 9]])
>>>
>>> np.std(a)
np.float64(2.581988897471611)
>>>
>>> np.std(a, axis=0)
array([2.44948974, 2.44948974, 2.44948974])
>>>
>>> np.std(a, axis=1)
array([0.81649658, 0.81649658, 0.81649658])

8.2.6. Covariance

Estimate a covariance matrix, given data and weights
Covariance indicates the level to which two variables vary together
ddof - Delta Degrees of Freedom

✘>>> import numpy as np

✘>>> a = np.array([1, 2, 3])
>>>
>>> np.cov(a)
array(1.)
>>>
>>> np.cov(a, ddof=0)
array(0.66666667)
>>>
>>> np.cov(a, ddof=1)
array(1.)

✘>>> a = np.array([[1, 2, 3],
...               [4, 5, 6]])
>>>
>>> np.cov(a)
array([[1., 1.],
       [1., 1.]])
>>>
>>> np.cov(a, ddof=0)
array([[0.66666667, 0.66666667],
       [0.66666667, 0.66666667]])
>>>
>>> np.cov(a, ddof=1)
array([[1., 1.],
       [1., 1.]])

✘>>> a = np.array([[1, 2, 3],
...               [4, 5, 6],
...               [7, 8, 9]])
>>>
>>> np.cov(a)
array([[1., 1., 1.],
       [1., 1., 1.],
       [1., 1., 1.]])
>>>
>>> np.cov(a, ddof=0)
array([[0.66666667, 0.66666667, 0.66666667],
       [0.66666667, 0.66666667, 0.66666667],
       [0.66666667, 0.66666667, 0.66666667]])
>>>
>>> np.cov(a, ddof=1)
array([[1., 1., 1.],
       [1., 1., 1.],
       [1., 1., 1.]])

8.2.7. Correlation coefficient

measure of the linear correlation between two variables X and Y
Pearson correlation coefficient (PCC)
Pearson product-moment correlation coefficient (PPMCC)
bivariate correlation

../../_images/statistics-correlation-coefficient.png — Figure 8.13. Examples of scatter diagrams with different values of correlation coefficient (ρ)

✘>>> import numpy as np

✘>>> a = np.array([1, 2, 3])
>>>
>>> np.corrcoef(a)
np.float64(1.0)

✘>>> a = np.array([[1, 2, 3],
...               [4, 5, 6]])
>>>
>>> np.corrcoef(a)
array([[1., 1.],
       [1., 1.]])

✘>>> a = np.array([[1, 2, 3],
...               [4, 5, 6],
...               [7, 8, 9]])
>>>
>>> np.corrcoef(a)
array([[1., 1., 1.],
       [1., 1., 1.],
       [1., 1., 1.]])

✘>>> a = np.array([[1, 2, 1],
...               [5, 4, 3]])
>>>
>>> np.corrcoef(a)
array([[1., 0.],
       [0., 1.]])

✘>>> a = np.array([[3, 1, 3],
...               [5, 5, 3]])
>>>
>>> np.corrcoef(a)
array([[ 1. , -0.5],
       [-0.5,  1. ]])

✘>>> a = np.array([[5, 2, 1],
...               [2, 4, 5]])
>>>
>>> np.corrcoef(a)
array([[ 1.        , -0.99587059],
       [-0.99587059,  1.        ]])

8.2. Statistics Descriptive

8.2.1. Mean

8.2.2. Average

8.2.3. Median

8.2.4. Variance

8.2.5. Standard Deviation

8.2.6. Covariance

8.2.7. Correlation coefficient

8.2.8. References