Skip to content

Commit 03975cc

Browse files
committed
Updated plotting functions and descriptions
1 parent ba4d20f commit 03975cc

File tree

2 files changed

+28
-14
lines changed

2 files changed

+28
-14
lines changed

README.md

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -78,6 +78,16 @@ If _correlation coefficient is –1_, the variables are perfectly negatively cor
7878
- **Pie chart:** `df.plot.pie([y])`
7979

8080

81+
### Matplotlib Plotting
82+
- **Scatter plot:** `scatter(x_data, y_data, s = 30, color = '#539caf', alpha = 0.75)`
83+
- **Line plot:** `plot(x_data, y_data, lw = 2, color = '#539caf', alpha = 1)`
84+
- **Histogram:** `hist(data, n_bins, color = '#539caf')`
85+
- **Probability Density Function:** plot(x_data, density_est(x_data), color = '#539caf', lw = 2) Where `density_est(x_data)` computes the probability density of each data point
86+
- **Bar plot:** `bar(x_data, y_data, color = '#539caf', align = 'center')`
87+
- **Box plot:** `boxplot(y_data)` We set the x_data using the x-axis tick labels on the plot `set_xticklabels(x_data)`
88+
89+
90+
8191

8292
### Examples
8393
![alt text](https://github.com/GeorgeSeif/Data-Science-Python/blob/master/Images/explore_wine_scattermatrix.png)

plt_helpers.py

Lines changed: 18 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
import matplotlib
22

3-
def scatterplot(x_data, y_data, x_label, y_label, title):
3+
def scatterplot(x_data, y_data, x_label="", y_label="", title=""):
44

55
# Create the plot object
66
_, ax = plt.subplots()
@@ -15,7 +15,7 @@ def scatterplot(x_data, y_data, x_label, y_label, title):
1515
ax.set_ylabel(y_label)
1616

1717

18-
def lineplot(x_data, y_data, x_label, y_label, title):
18+
def lineplot(x_data, y_data, x_label="", y_label="", title=""):
1919
# Create the plot object
2020
_, ax = plt.subplots()
2121

@@ -30,7 +30,7 @@ def lineplot(x_data, y_data, x_label, y_label, title):
3030

3131

3232
# Line plot with 2 different y values
33-
def lineplot2y(x_data, x_label, y1_data, y1_color, y1_label, y2_data, y2_color, y2_label, title):
33+
def lineplot2y(x_data, y1_data, y2_data, x_label="", y1_color="#539caf", y1_label="", y2_color="#7663b0", y2_label="", title=""):
3434
# Each variable will actually have its own plot object but they
3535
# will be displayed in just one plot
3636
# Create the first plot object and draw the line
@@ -50,23 +50,27 @@ def lineplot2y(x_data, x_label, y1_data, y1_color, y1_label, y2_data, y2_color,
5050
ax2.spines['right'].set_visible(True)
5151

5252

53-
def histogram(data, x_label, y_label, title):
53+
def histogram(data, n_bins, cumulative=False, x_label = "", y_label = "", title = ""):
5454
_, ax = plt.subplots()
55-
ax.hist(data, color = '#539caf')
55+
ax.hist(data, n_bins = n_bins, cumulative = cumulative, color = '#539caf')
5656
ax.set_ylabel(y_label)
5757
ax.set_xlabel(x_label)
5858
ax.set_title(title)
5959

6060

6161

6262
# Overlay 2 histograms to compare them
63-
def overlaid_histogram(data1, data1_name, data1_color, data2, data2_name, data2_color, x_label, y_label, title):
64-
# Set the bounds for the bins so that the two distributions are
65-
# fairly compared
63+
def overlaid_histogram(data1, data2, n_bins = 0, data1_name="", data1_color="#539caf", data2_name="", data2_color="#7663b0", x_label="", y_label="", title=""):
64+
# Set the bounds for the bins so that the two distributions are fairly compared
6665
max_nbins = 10
6766
data_range = [min(min(data1), min(data2)), max(max(data1), max(data2))]
6867
binwidth = (data_range[1] - data_range[0]) / max_nbins
69-
bins = np.arange(data_range[0], data_range[1] + binwidth, binwidth)
68+
69+
70+
if n_bins == 0
71+
bins = np.arange(data_range[0], data_range[1] + binwidth, binwidth)
72+
else:
73+
bins = n_bins
7074

7175
# Create the plot
7276
_, ax = plt.subplots()
@@ -79,7 +83,7 @@ def overlaid_histogram(data1, data1_name, data1_color, data2, data2_name, data2_
7983

8084

8185
# Probability Density Function
82-
def densityplot(x_data, density_est, x_label, y_label, title):
86+
def densityplot(x_data, density_est, x_label="", y_label="", title=""):
8387
_, ax = plt.subplots()
8488
ax.plot(x_data, density_est(x_data), color = '#539caf', lw = 2)
8589
ax.set_ylabel(y_label)
@@ -88,7 +92,7 @@ def densityplot(x_data, density_est, x_label, y_label, title):
8892

8993

9094

91-
def barplot(x_data, y_data, error_data, x_label, y_label, title):
95+
def barplot(x_data, y_data, error_data, x_label="", y_label="", title=""):
9296
_, ax = plt.subplots()
9397
# Draw bars, position them in the center of the tick mark on the x-axis
9498
ax.bar(x_data, y_data, color = '#539caf', align = 'center')
@@ -101,7 +105,7 @@ def barplot(x_data, y_data, error_data, x_label, y_label, title):
101105

102106

103107

104-
def stackedbarplot(x_data, y_data_list, y_data_names, colors, x_label, y_label, title):
108+
def stackedbarplot(x_data, y_data_list, colors, y_data_names="", x_label="", y_label="", title=""):
105109
_, ax = plt.subplots()
106110
# Draw bars, one category at a time
107111
for i in range(0, len(y_data_list)):
@@ -118,7 +122,7 @@ def stackedbarplot(x_data, y_data_list, y_data_names, colors, x_label, y_label,
118122

119123

120124

121-
def groupedbarplot(x_data, y_data_list, y_data_names, colors, x_label, y_label, title):
125+
def groupedbarplot(x_data, y_data_list, colors, y_data_names="", x_label="", y_label="", title=""):
122126
_, ax = plt.subplots()
123127
# Total width for all bars at one x location
124128
total_width = 0.8
@@ -140,7 +144,7 @@ def groupedbarplot(x_data, y_data_list, y_data_names, colors, x_label, y_label,
140144

141145

142146

143-
def boxplot(x_data, y_data, base_color, median_color, x_label, y_label, title):
147+
def boxplot(x_data, y_data, base_color="#539caf", median_color="#297083", x_label="", y_label="", title=""):
144148
_, ax = plt.subplots()
145149

146150
# Draw boxplots, specifying desired style

0 commit comments

Comments
 (0)