# 3.3. Math Statistics¶

• statistics module

## 3.3.1. Mean¶

Function

Description

statistics.mean()

Arithmetic mean ('average') of data

statistics.fmean()

faster, floating point variant of statistics.mean(), since Python 3.8

statistics.harmonic_mean()

Harmonic mean of data

statistics.geometric_mean()

since Python 3.8

Arithmetic mean ('average') of data:

from statistics import mean

mean([1, 2, 3, 4, 4])
# 2.8
mean([-1.0, 2.5, 3.25, 5.75])
# 2.625


Harmonic mean of data:

from statistics import harmonic_mean

harmonic_mean([2.5, 3, 10])
# 3.6


## 3.3.2. Median¶

Function

Description

statistics.median()

Median (middle value) of data

statistics.median_low()

Low median of data

statistics.median_high()

High median of data

statistics.median_grouped()

Median, or 50th percentile, of grouped data

Median (middle value) of data:

from statistics import median

median([1, 3, 5])
# 3
median([1, 3, 5, 7])
# 4.0

• The low median is always a member of the data set.

• When the number of data points is odd, the middle value is returned.

• When it is even, the smaller of the two middle values is returned.

Low median of data:

from statistics import median_low

median_low([1, 3, 5])
# 3
median_low([1, 3, 5, 7])
# 3

• The high median is always a member of the data set.

• When the number of data points is odd, the middle value is returned.

• When it is even, the larger of the two middle values is returned.

High median of data:

from statistics import median_high

median_high([1, 3, 5])
# 3
median_high([1, 3, 5, 7])
# 5

• Median of grouped continuous data.

• Calculated using interpolation as the 50th percentile.

Median, or 50th percentile, of grouped data:

from statistics import median_grouped

median_grouped([52, 52, 53, 54])
# 52.5
median_grouped([1, 3, 3, 5, 7], interval=1)
# 3.25
median_grouped([1, 3, 3, 5, 7], interval=2)
# 3.5


## 3.3.3. Mode¶

Function

Description

statistics.mode()

Mode (most common value) of discrete data

statistics.multimode()

returns a list of the most common values, since Python 3.8

statistics.quantiles()

divides data or a distribution in to equiprobable intervals (e.g. quartiles, deciles, or percentiles), since Python 3.8

Mode (most common value) of discrete data:

from statistics import mode

mode([1, 1, 2, 3, 3, 3, 3, 4])
# 3
mode(["red", "blue", "blue", "red", "green", "red", "red"])
# 'red'


## 3.3.4. Distribution¶

Function

Description

statistics.NormalDist

tool for creating and manipulating normal distributions of a random variable

## 3.3.5. Standard Deviation¶

Function

Description

statistics.pstdev()

Population standard deviation of data

statistics.stdev()

Sample standard deviation of data

Sample standard deviation of data:

from statistics import stdev

stdev([1.5, 2.5, 2.5, 2.75, 3.25, 4.75])
# 1.0810874155219827

• Population standard deviation

• Is the square root of the population variance

Population standard deviation:

from statistics import pstdev

pstdev([1.5, 2.5, 2.5, 2.75, 3.25, 4.75])
# 0.986893273527251


## 3.3.6. Variance¶

Function

Description

statistics.pvariance()

Population variance of data

statistics.variance()

Sample variance of data

Sample variance of data:

from statistics import variance

variance([2.75, 1.75, 1.25, 0.25, 0.5, 1.25, 3.5])
# 1.3720238095238095


Population variance of data:

from statistics import pvariance

pvariance([0.0, 0.25, 0.25, 1.25, 1.5, 1.75, 2.75, 3.25])
# 1.25


## 3.3.7. Examples¶

temperature_feb = NormalDist.from_samples([4, 12, -3, 2, 7, 14])

temperature_feb.mean
# 6.0
temperature_feb.stdev
# 6.356099432828281

# Chance of being under 3 degrees
temperature_feb.cdf(3)  # 0.3184678262814532

# Relative chance of being 7 degrees versus 10 degrees
temperature_feb.pdf(7) / temperature_feb.pdf(10)  # 1.2039930378537762

el_niño = NormalDist(4, 2.5)

# Add in a climate effect
temperature_feb += el_niño

temperature_feb
# NormalDist(mu=10.0, sigma=6.830080526611674)

# Convert to Fahrenheit
temperature_feb * (9/5) + 32
# NormalDist(mu=50.0, sigma=12.294144947901014)

# Generate random samples
temperature_feb.samples(3)
# [7.672102882379219, 12.000027119750287, 4.647488369766392]


## 3.3.8. Assignments¶

"""
* Assignment: Math Statistics Stats
* Complexity: easy
* Lines of code: 11 lines
* Time: 13 min

English:
1. For columns:
a. sepal_length,
b. sepal_width,
c. petal_length,
d. petal_width.
2. Print calculated values:
a. mean,
b. median,
c. standard deviation,
d. variance.
3. Use statistics module from Python standard library
4. Run doctests - all must succeed

Polish:
1. Dla kolumn:
a. sepal_length,
b. sepal_width,
c. petal_length,
d. petal_width.
2. Wypisz wyliczone wartości:
a. średnią,
b. medianę,
c. odchylenie standardowe,
d. wariancję.
3. Użyj modułu statistics z biblioteki standardowej Python
4. Uruchom doctesty - wszystkie muszą się powieść

Hint:
* Note, that in petal_length stdev is:
a. Python 3.10: 1.8602739173624534
b. Python 3.11: 1.8602739173624532

Tests:
>>> import sys; sys.tracebacklimit = 0

>>> stats(sepal_length)
{'mean': 5.833333333333333, 'stdev': 0.9084785816591018, 'median': 5.7, 'variance': 0.8253333333333333}
>>> stats(sepal_width)
{'mean': 3.0619047619047617, 'stdev': 0.36670995415476587, 'median': 3.0, 'variance': 0.1344761904761905}
>>> stats(petal_length)
{'mean': 3.8523809523809525, 'stdev': 1.8602739173624532, 'median': 4.5, 'variance': 3.4606190476190477}
>>> stats(petal_width)
{'mean': 1.2333333333333334, 'stdev': 0.7741662181555931, 'median': 1.4, 'variance': 0.5993333333333334}
"""

from statistics import mean, stdev, variance, median

DATA = [
('sepal_length', 'sepal_width', 'petal_length', 'petal_width', 'species'),
(5.8, 2.7, 5.1, 1.9, 'virginica'),
(5.1, 3.5, 1.4, 0.2, 'setosa'),
(5.7, 2.8, 4.1, 1.3, 'versicolor'),
(6.3, 2.9, 5.6, 1.8, 'virginica'),
(6.4, 3.2, 4.5, 1.5, 'versicolor'),
(4.7, 3.2, 1.3, 0.2, 'setosa'),
(7.0, 3.2, 4.7, 1.4, 'versicolor'),
(7.6, 3.0, 6.6, 2.1, 'virginica'),
(4.9, 3.0, 1.4, 0.2, 'setosa'),
(4.9, 2.5, 4.5, 1.7, 'virginica'),
(7.1, 3.0, 5.9, 2.1, 'virginica'),
(4.6, 3.4, 1.4, 0.3, 'setosa'),
(5.4, 3.9, 1.7, 0.4, 'setosa'),
(5.7, 2.8, 4.5, 1.3, 'versicolor'),
(5.0, 3.6, 1.4, 0.3, 'setosa'),
(5.5, 2.3, 4.0, 1.3, 'versicolor'),
(6.5, 3.0, 5.8, 2.2, 'virginica'),
(6.5, 2.8, 4.6, 1.5, 'versicolor'),
(6.3, 3.3, 6.0, 2.5, 'virginica'),
(6.9, 3.1, 4.9, 1.5, 'versicolor'),
(4.6, 3.1, 1.5, 0.2, 'setosa'),
]


"""
* Assignment: Math Statistics Iris
* Complexity: easy
* Lines of code: 30 lines
* Time: 21 min

English:
1. Create dict result: dict[str, dict]
2. For each species calculate for numerical values:
a. mean,
b. median,
c. standard deviation,
d. variance.
3. Save data to result dict
4. Non-functional requirements:
a. Use statistics module from Python standard library
5. Run doctests - all must succeed

Polish:
1. Stwórz słownik result: dict[str, dict]
2. Dla każdego gatunku wylicz dla wartości numerycznych:
a. średnią,
b. medianę,
c. odchylenie standardowe,
d. wariancję.
3. Dane zapisz w słowniku result
4. Wymagania niefunkcjonalne:
a. Użyj modułu statistics z biblioteki standardowej Python
5. Uruchom doctesty - wszystkie muszą się powieść

Tests:
>>> import sys; sys.tracebacklimit = 0

>>> result  # doctest: +NORMALIZE_WHITESPACE
{'virginica': {'sepal_length': {'values': [5.8, 6.3, 7.6, 4.9, 7.1, 6.5, 6.3],
'mean': 6.357142857142857,
'median': 6.3,
'stdev': 0.871506631944823,
'variance': 0.7595238095238092},
'sepal_width': {'values': [2.7, 2.9, 3.0, 2.5, 3.0, 3.0, 3.3],
'mean': 2.914285714285714,
'median': 3.0,
'stdev': 0.25448360411214066,
'variance': 0.06476190476190473},
'petal_length': {'values': [5.1, 5.6, 6.6, 4.5, 5.9, 5.8, 6.0],
'mean': 5.642857142857142,
'median': 5.8,
'stdev': 0.6754187413675136,
'variance': 0.45619047619047615},
'petal_width': {'values': [1.9, 1.8, 2.1, 1.7, 2.1, 2.2, 2.5],
'mean': 2.0428571428571427,
'median': 2.1,
'stdev': 0.26992062325273125,
'variance': 0.07285714285714287}},
'setosa': {'sepal_length': {'values': [5.1, 4.7, 4.9, 4.6, 5.4, 5.0, 4.6],
'mean': 4.9,
'median': 4.9,
'stdev': 0.2943920288775951,
'variance': 0.08666666666666677},
'sepal_width': {'values': [3.5, 3.2, 3.0, 3.4, 3.9, 3.6, 3.1],
'mean': 3.3857142857142857,
'median': 3.4,
'stdev': 0.31320159337914943,
'variance': 0.09809523809523807},
'petal_length': {'values': [1.4, 1.3, 1.4, 1.4, 1.7, 1.4, 1.5],
'mean': 1.4428571428571428,
'median': 1.4,
'stdev': 0.12724180205607036,
'variance': 0.01619047619047619},
'petal_width': {'values': [0.2, 0.2, 0.2, 0.3, 0.4, 0.3, 0.2],
'mean': 0.2571428571428572,
'median': 0.2,
'stdev': 0.07867957924694431,
'variance': 0.006190476190476191}},
'versicolor': {'sepal_length': {'values': [5.7, 6.4, 7.0, 5.7, 5.5, 6.5, 6.9],
'mean': 6.242857142857143,
'median': 6.4,
'stdev': 0.6106202935189289,
'variance': 0.3728571428571429},
'sepal_width': {'values': [2.8, 3.2, 3.2, 2.8, 2.3, 2.8, 3.1],
'mean': 2.8857142857142857,
'median': 2.8,
'stdev': 0.31847852585154235,
'variance': 0.10142857142857152},
'petal_length': {'values': [4.1, 4.5, 4.7, 4.5, 4.0, 4.6, 4.9],
'mean': 4.4714285714285715,
'median': 4.5,
'stdev': 0.31997023671109237,
'variance': 0.10238095238095248},
'petal_width': {'values': [1.3, 1.5, 1.4, 1.3, 1.3, 1.5, 1.5],
'mean': 1.4,
'median': 1.4,
'stdev': 0.09999999999999998,
'variance': 0.009999999999999995}}}
"""

from statistics import mean, stdev, median, variance

DATA = [
('sepal_length', 'sepal_width', 'petal_length', 'petal_width', 'species'),
(5.8, 2.7, 5.1, 1.9, 'virginica'),
(5.1, 3.5, 1.4, 0.2, 'setosa'),
(5.7, 2.8, 4.1, 1.3, 'versicolor'),
(6.3, 2.9, 5.6, 1.8, 'virginica'),
(6.4, 3.2, 4.5, 1.5, 'versicolor'),
(4.7, 3.2, 1.3, 0.2, 'setosa'),
(7.0, 3.2, 4.7, 1.4, 'versicolor'),
(7.6, 3.0, 6.6, 2.1, 'virginica'),
(4.9, 3.0, 1.4, 0.2, 'setosa'),
(4.9, 2.5, 4.5, 1.7, 'virginica'),
(7.1, 3.0, 5.9, 2.1, 'virginica'),
(4.6, 3.4, 1.4, 0.3, 'setosa'),
(5.4, 3.9, 1.7, 0.4, 'setosa'),
(5.7, 2.8, 4.5, 1.3, 'versicolor'),
(5.0, 3.6, 1.4, 0.3, 'setosa'),
(5.5, 2.3, 4.0, 1.3, 'versicolor'),
(6.5, 3.0, 5.8, 2.2, 'virginica'),
(6.5, 2.8, 4.6, 1.5, 'versicolor'),
(6.3, 3.3, 6.0, 2.5, 'virginica'),
(6.9, 3.1, 4.9, 1.5, 'versicolor'),
(4.6, 3.1, 1.5, 0.2, 'setosa'),
]

result = {}