forked from AllenDowney/ThinkStats2
-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathdensity.py
More file actions
114 lines (86 loc) · 2.87 KB
/
density.py
File metadata and controls
114 lines (86 loc) · 2.87 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
"""This file contains code used in "Think Stats",
by Allen B. Downey, available from greenteapress.com
Copyright 2014 Allen B. Downey
License: GNU GPLv3 http://www.gnu.org/licenses/gpl.html
"""
from __future__ import print_function
import math
import random
import brfss
import first
import thinkstats2
import thinkplot
def Summarize(data):
"""Prints summary statistics.
data: pandas Series
"""
mean = data.mean()
std = data.std()
median = thinkstats2.Median(data)
print('mean', mean)
print('std', std)
print('median', median)
print('skewness', thinkstats2.Skewness(data))
print('pearson skewness',
thinkstats2.PearsonMedianSkewness(data))
return mean, median
def ComputeSkewnesses():
"""Plots KDE of birthweight and adult weight.
"""
def VertLine(x, y):
thinkplot.Plot([x, x], [0, y], color='0.6', linewidth=1)
live, firsts, others = first.MakeFrames()
data = live.totalwgt_lb.dropna()
print('Birth weight')
mean, median = Summarize(data)
y = 0.35
VertLine(mean, y)
thinkplot.Text(mean-0.15, 0.1*y, 'mean', horizontalalignment='right')
VertLine(median, y)
thinkplot.Text(median+0.1, 0.1*y, 'median', horizontalalignment='left')
pdf = thinkstats2.EstimatedPdf(data)
thinkplot.Pdf(pdf, label='birth weight')
thinkplot.Save(root='density_totalwgt_kde',
xlabel='lbs',
ylabel='PDF')
df = brfss.ReadBrfss(nrows=None)
data = df.wtkg2.dropna()
print('Adult weight')
mean, median = Summarize(data)
y = 0.02499
VertLine(mean, y)
thinkplot.Text(mean+1, 0.1*y, 'mean', horizontalalignment='left')
VertLine(median, y)
thinkplot.Text(median-1.5, 0.1*y, 'median', horizontalalignment='right')
pdf = thinkstats2.EstimatedPdf(data)
thinkplot.Pdf(pdf, label='adult weight')
thinkplot.Save(root='density_wtkg2_kde',
xlabel='kg',
ylabel='PDF',
xlim=[0, 200])
def MakePdfExample(n=500):
"""Plots a normal density function and a KDE estimate.
n: sample size
"""
# mean and var of women's heights in cm, from the BRFSS
mean, var = 163, 52.8
std = math.sqrt(var)
# make a PDF and compute a density, FWIW
pdf = thinkstats2.NormalPdf(mean, std)
print(pdf.Density(mean + std))
# make a PMF and plot it
thinkplot.PrePlot(2)
thinkplot.Pdf(pdf, label='normal')
# make a sample, make an estimated PDF, and plot it
sample = [random.gauss(mean, std) for _ in range(n)]
sample_pdf = thinkstats2.EstimatedPdf(sample)
thinkplot.Pdf(sample_pdf, label='sample KDE')
thinkplot.Save(root='pdf_example',
xlabel='Height (cm)',
ylabel='Density')
def main():
thinkstats2.RandomSeed(17)
MakePdfExample()
ComputeSkewnesses()
if __name__ == '__main__':
main()