Questions and Answers: August 2019

Saturday, August 31, 2019

Codecademy Learn Statistics With Python Quantiles Exercise 2/5

# Sample Solution
from song_data import songs
import numpy as np

# Define twenty_third_percentile here:
twenty_third_percentile = np.quantile(songs,.23)

#Ignore the code below here:
try:
print("The value that splits 23% of the data is " + str(twenty_third_percentile) + "\n")
except NameError:
print("You haven't defined twenty_third_percentile.")

Codecademy Learn Statistics With Python Quartiles Exercise 5/6

# Sample Solution
from song_data import songs
import numpy as np

#Create the variables songs_q1, songs_q2, and songs_q3 here:
songs_q1 = np.quantile(songs, 0.25)
songs_q2 = np.quantile(songs, 0.50)
songs_q3 = np.quantile(songs, 0.75)
number_of_songs = len(songs)
print("There are " + str(number_of_songs) + " songs in this list.")
#Generate a random number to pick a favorite song
import random
random_num = random.randint(0,(number_of_songs-1))
favorite_song = songs[random_num]
print("Song length in seconds is " + str(favorite_song))
if(favorite_song < songs_q1):
quarter = 1
elif(favorite_song < songs_q2):
quarter = 2
elif(favorite_song < songs_q3):
quarter = 3
else:
quarter = 4
print("Favorite song is in quartile " + str(quarter))
#Ignore the code below here:
try:
print("The first quartile of dataset one is " + str(songs_q1) + " seconds")
except NameError:
print("You haven't defined songs_q1")
try:
print("The second quartile of dataset one is " + str(songs_q2)+ " seconds")
except NameError:
print("You haven't defined songs_q2")
try:
print("The third quartile of dataset one is " + str(songs_q3)+ " seconds")
except NameError:
print("You haven't defined songs_q3\n")

Codecademy Learn Statistics With Python Quartiles Exercise 4/6

# Sample Solution
dataset_one = [50, 10, 4, -3, 4, -20, 2]
#Sorted dataset_one: [-20, -3, 2, 4, 4, 10, 50]

dataset_two = [24, 20, 1, 45, -15, 40]
#Sorted dataset_two: [-15, 1, 20, 24, 40, 45]

dataset_one_q2 = 4
dataset_two_q2 = 22
#Define the first and third quartile of both datasets here:
dataset_one_q1 = -0.5
dataset_one_q3 = 7
dataset_two_q1 = 1
dataset_two_q3 = 40

#Ignore the code below here:
try:
print("The first quartile of dataset one is " + str(dataset_one_q1))
except NameError:
print("You haven't defined dataset_one_q1")
try:
print("The second quartile of dataset one is " + str(dataset_one_q2))
except NameError:
print("You haven't defined dataset_one_q2")
try:
print("The third quartile of dataset one is " + str(dataset_one_q3) + "\n")
except NameError:
print("You haven't defined dataset_one_q3\n")
try:
print("The first quartile of dataset two is " + str(dataset_two_q1))
except NameError:
print("You haven't defined dataset_two_q1")
try:
print("The second quartile of dataset two is " + str(dataset_two_q2))
except NameError:
print("You haven't defined dataset_two_q2")
try:
print("The third quartile of dataset two is " + str(dataset_two_q3))
except NameError:
print("You haven't defined dataset_two_q3")

Codecademy Learn Statistics With Python Quartiles Exercise 3/6

# Sample Solution
dataset_one = [50, 10, 4, -3, 4, -20, 2]
#Sorted dataset_one: [-20, -3, 2, 4, 4, 10, 50]

dataset_two = [24, 20, 1, 45, -15, 40]
#Sorted dataset_two: [-15, 1, 20, 24, 40, 45]

dataset_one_q2 = 4
dataset_two_q2 = 22
#Define the first and third quartile of both datasets here:
dataset_one_q1 = -3
dataset_one_q3 = 10
dataset_two_q1 = 1
dataset_two_q3 = 40

#Ignore the code below here:
try:
print("The first quartile of dataset one is " + str(dataset_one_q1))
except NameError:
print("You haven't defined dataset_one_q1")
try:
print("The second quartile of dataset one is " + str(dataset_one_q2))
except NameError:
print("You haven't defined dataset_one_q2")
try:
print("The third quartile of dataset one is " + str(dataset_one_q3) + "\n")
except NameError:
print("You haven't defined dataset_one_q3\n")
try:
print("The first quartile of dataset two is " + str(dataset_two_q1))
except NameError:
print("You haven't defined dataset_two_q1")
try:
print("The second quartile of dataset two is " + str(dataset_two_q2))
except NameError:
print("You haven't defined dataset_two_q2")
try:
print("The third quartile of dataset two is " + str(dataset_two_q3))
except NameError:
print("You haven't defined dataset_two_q3")

Codecademy Learn Statistics With Python Quartiles Exercise 2/6

# Sample Solution
dataset_one = [50, 10, 4, -3, 4, -20, 2]
#Sorted dataset_one: [-20, -3, 2, 4, 4, 10, 50]

dataset_two = [24, 20, 1, 45, -15, 40]
#Sorted dataset_two: [-15, 1, 20, 24, 40, 45]

#Define the second quartile of both datasets here:
dataset_one_q2 = 4
dataset_two_q2 = 22

#Ignore the code below here:
try:
print("The second quartile of dataset one is " + str(dataset_one_q2))
except NameError:
print("You haven't defined dataset_one_q2")
try:
print("The second quartile of dataset two is " + str(dataset_two_q2))
except NameError:
print("You haven't defined dataset_two_q2")

Codecademy Learn Statistics With Python Histograms Exercise 9/9

# Sample Solution
# Import packages
import codecademylib
import numpy as np
import pandas as pd

# Import matplotlib pyplot
from matplotlib import pyplot as plt

# Read in transactions data
transactions = pd.read_csv("transactions.csv")

# Save transaction times to a separate numpy array
times = transactions["Transaction Time"].values
cost = transactions["Cost"].values

# Use plt.hist() below
plt.hist(cost, range = (0, 165), bins = 165, edgecolor = 'black')
plt.title("Cost Frequency")
plt.xlabel("Cost (1 dollar increments)")
plt.ylabel("Count")

plt.show()

Codecademy Learn Statistics With Python Histograms Exercise 8/9

# Sample Solution
# Import packages
import codecademylib
import numpy as np
import pandas as pd
from matplotlib import pyplot as plt

# Read in transactions data
transactions = pd.read_csv("transactions.csv")

# Save transaction times to a separate numpy array
times = transactions["Transaction Time"].values

# Use plt.hist() below
plt.hist(times, range=(0, 24), bins=24, edgecolor="black")
plt.title("Weekday Frequency of Customers")
plt.xlabel("Hours (1 hour increments)")
plt.ylabel("Count")

plt.show()

Codecademy Learn Statistics With Python Histograms Exercise 7/9

# Sample Solution
# Import packages
import codecademylib
import numpy as np
import pandas as pd

# import pyplot as plt
from matplotlib import pyplot as plt

# Read in transactions data
transactions = pd.read_csv("transactions.csv")

# Save transaction times to a separate numpy array
times = transactions["Transaction Time"].values

# Use plt.hist() below
plt.hist(times, range = (0, 24), bins = 4, edgecolor = 'black')
plt.title("Transaction Time Histogram")
plt.xlabel("Time Ranges")
plt.ylabel("Count")
plt.show()

Codecademy Learn Statistics With Python Histograms Exercise 6/9

# Sample Solution
# Import packages
import numpy as np
import pandas as pd

# Read in transactions data
transactions = pd.read_csv("transactions.csv")

# Save transaction times to a separate numpy array
times = transactions["Transaction Time"].values

# Use numpy.histogram() below
times_hist = np.histogram(times, range = (0, 24), bins = 4)

print(times_hist)

Codecademy Learn Statistics With Python Histograms Exercise 5/9

# Sample Solution
# Import packages
import numpy as np
import pandas as pd

# Array of days old bread
days_old_bread = np.array([0, 8, 7, 8, 0, 2, 3, 5, 6, 2])

# Count the values in each bin
days_old_012 = 4
days_old_345 = 2
days_old_678 = 4

# Printing the values
print("Between 0 and 2 days: " + str(days_old_012))
print("Between 3 and 5 days: " + str(days_old_345))
print("Between 6 and 8 days: " + str(days_old_678))

Codecademy Learn Statistics With Python Histograms Exercise 4/9

# Sample Solution
# Import packages
import numpy as np
import pandas as pd

# Array of days old bread
days_old_bread = np.array([0, 8, 7, 8, 0, 2, 3, 5, 6, 2])

# Set the minimum and maximums of the array below
min_days_old = np.amin(days_old_bread)
max_days_old = np.amax(days_old_bread)

# Set the number of bins to 3
bins = 3

# Calculate the bin range
try:
bin_range = (max_days_old - min_days_old + 1) / bins
print("Bins: " + str(bins))
print("Bin Width: " + str(bin_range))
# Printing the values
except:
print("You have not set the min, max, or bins values yet.")

Codecademy Learn Statistics With Python Histograms Exercise 3/9

# Sample Solution
# Import packages
import numpy as np
import pandas as pd

# Read in transactions data
transactions = pd.read_csv("transactions.csv")

# Save transaction data to numpy arrays
times = transactions["Transaction Time"].values
cost = transactions["Cost"].values

# Find the minimum time, maximum time, and range
min_time = np.amin(times)
max_time = np.amax(times)
range_time = max_time - min_time

# Printing the values
print("Earliest Time: " + str(min_time))
print("Latest Time: " + str(max_time))
print("Time Range: " + str(range_time))

Codecademy Learn Statistics With Python Histograms Exercise 2/9

# Sample Solution
# Import packages
import numpy as np
import pandas as pd

# Read in transactions data
transactions = pd.read_csv("transactions.csv")
transactions = transactions.drop(["Unnamed: 0"], axis = 1)

# Save transaction times to a separate numpy array
times = transactions["Transaction Time"].values
cost = transactions["Cost"].values

# Print transactions below
print(transactions)

# Print the average times below
print(np.average(times))

Codecademy Learn Statistics With Python Standard Deviation Exercise 5/5

# Sample Solution
import codecademylib3_seaborn
import matplotlib.pyplot as plt
import numpy as np
from data import nba_data, okcupid_data

nba_mean = np.mean(nba_data)
okcupid_mean = np.mean(okcupid_data)

#Change this variable to your height (in inches)!
your_height = 0

nba_standard_deviation = np.std(nba_data)
okcupid_standard_deviation = np.std(okcupid_data)

plt.subplot(211)
plt.title("NBA Player Heights")
plt.xlabel("Height (inches)")

plt.hist(nba_data)

plt.axvline(nba_mean, color='#FD4E40', linestyle='solid', linewidth=2, label = "Mean")

plt.axvline(nba_mean + nba_standard_deviation, color='#FFB908', linestyle='solid', linewidth=2, label = "Standard Deviations")
plt.axvline(nba_mean - nba_standard_deviation, color='#FFB908', linestyle='solid', linewidth=2)

plt.axvline(nba_mean + nba_standard_deviation * 2, color='#FFB908', linestyle='solid', linewidth=2)
plt.axvline(nba_mean - nba_standard_deviation * 2, color='#FFB908', linestyle='solid', linewidth=2)

plt.axvline(nba_mean + nba_standard_deviation * 3, color='#FFB908', linestyle='solid', linewidth=2)
plt.axvline(nba_mean - nba_standard_deviation * 3, color='#FFB908', linestyle='solid', linewidth=2)

plt.axvline(67, color='#62EDBF', linestyle='solid', linewidth=2, label = "You")

plt.xlim(55, 90)
plt.legend()

plt.subplot(212)
plt.title("OkCupid Profile Heights")
plt.xlabel("Height (inches)")

plt.hist(okcupid_data)

plt.axvline(okcupid_mean, color='#FD4E40', linestyle='solid', linewidth=2, label = "Mean")

plt.axvline(okcupid_mean + okcupid_standard_deviation, color='#FFB908', linestyle='solid', linewidth=2, label = "Standard Deviations")
plt.axvline(okcupid_mean - okcupid_standard_deviation, color='#FFB908', linestyle='solid', linewidth=2)

plt.axvline(okcupid_mean + okcupid_standard_deviation * 2, color='#FFB908', linestyle='solid', linewidth=2)
plt.axvline(okcupid_mean - okcupid_standard_deviation * 2, color='#FFB908', linestyle='solid', linewidth=2)

plt.axvline(okcupid_mean + okcupid_standard_deviation * 3, color='#FFB908', linestyle='solid', linewidth=2)
plt.axvline(okcupid_mean - okcupid_standard_deviation * 3, color='#FFB908', linestyle='solid', linewidth=2)

plt.axvline(your_height, color='#62EDBF', linestyle='solid', linewidth=2, label = "You")

plt.xlim(55, 90)
plt.legend()

plt.tight_layout()
plt.show()

Codecademy Learn Statistics With Python Standard Deviation Exercise 4/5

# Sample Solution when height = 80 inches
import numpy as np
from data import nba_data, okcupid_data

nba_mean = np.mean(nba_data)
okcupid_mean = np.mean(okcupid_data)

nba_standard_deviation = np.std(nba_data)
okcupid_standard_deviation = np.std(okcupid_data)

#Step 1: Calcualte the difference between the player's height and the means
nba_difference = 80 - nba_mean
okcupid_difference = 80 - okcupid_mean

#Step 2: Use the difference between the point and the mean to find how many standard deviations the player is away from the mean.

num_nba_deviations = nba_difference / nba_standard_deviation
num_okcupid_deviations = okcupid_difference / okcupid_standard_deviation

#IGNORE CODE BELOW HERE
print("Your basketball player is " + str(num_nba_deviations) + " standard deviations away from the mean of NBA player heights\n")
print("Your basketball player is " + str(num_okcupid_deviations) + " standard deviations away from the mean of OkCupid profile heights")
#======================================================================
# Sample Solution when height = 65 inches
import numpy as np
from data import nba_data, okcupid_data

nba_mean = np.mean(nba_data)
okcupid_mean = np.mean(okcupid_data)

nba_standard_deviation = np.std(nba_data)
okcupid_standard_deviation = np.std(okcupid_data)

#Step 1: Calcualte the difference between the player's height and the means
nba_difference = 65 - nba_mean
okcupid_difference = 65 - okcupid_mean

#Step 2: Use the difference between the point and the mean to find how many standard deviations the player is away from the mean.

num_nba_deviations = nba_difference / nba_standard_deviation
num_okcupid_deviations = okcupid_difference / okcupid_standard_deviation

#IGNORE CODE BELOW HERE
print("Your basketball player is " + str(num_nba_deviations) + " standard deviations away from the mean of NBA player heights\n")
print("Your basketball player is " + str(num_okcupid_deviations) + " standard deviations away from the mean of OkCupid profile heights")

Codecademy Learn Statistics With Python Standard Deviation Exercise 3/5

# Sample Solution
import numpy as np
from data import nba_data, okcupid_data

#Change these variables to be the standard deviation of each dataset. Use NumPy's function!
nba_standard_deviation = np.std(nba_data)
okcupid_standard_deviation = np.std(okcupid_data)

#IGNORE CODE BELOW HERE
print("The standard deviation of the NBA dataset is " +str(nba_standard_deviation))
print("The standard deviation of the OkCupid dataset is " + str(okcupid_standard_deviation))

Codecademy Learn Statistics With Python Standard Deviation Exercise 2/5

# Sample Solution
import numpy as np
from data import nba_data, okcupid_data

nba_variance = np.var(nba_data)
okcupid_variance = np.var(okcupid_data)

#Change these variables to be the standard deviation of each dataset.
nba_standard_deviation = nba_variance ** 0.5
okcupid_standard_deviation = okcupid_variance ** 0.5

#IGNORE CODE BELOW HERE
print("The standard deviation of the NBA dataset is " +str(nba_standard_deviation))
print("The standard deviation of the OkCupid dataset is " + str(okcupid_standard_deviation))

Codecademy Learn Statistics With Python Variance Exercise 5/6

# Sample Solution
import numpy as np
import matplotlib.pyplot as plt
import codecademylib3_seaborn

teacher_one_grades = [80.24, 81.15, 81.29, 82.12, 82.52, 82.54, 82.76, 83.37, 83.42, 83.45, 83.47, 83.79, 83.91, 83.98, 84.03, 84.69, 84.74, 84.89, 84.95, 84.95, 85.02, 85.18, 85.53, 86.29, 86.83, 87.29, 87.47, 87.62, 88.04, 88.5]
teacher_two_grades = [65.82, 70.77, 71.46, 73.63, 74.62, 76.53, 76.86, 77.06, 78.46, 79.81, 80.64, 81.61, 81.84, 83.67, 84.44, 84.73, 84.74, 85.15, 86.55, 88.06, 88.53, 90.12, 91.27, 91.62, 92.86, 94.37, 95.64, 95.99, 97.69, 104.4]

#Set these two variables equal to the variance of each dataset using NumPy
teacher_one_variance = np.var(teacher_one_grades)
teacher_two_variance = np.var(teacher_two_grades)

#IGNORE THE CODE BELOW HERE
plt.hist(teacher_one_grades, alpha = 0.75, label = "Teacher 1 Scores", bins = 7)
plt.hist(teacher_two_grades, alpha = 0.5, label = "Teacher 2 Scores", bins = 30)
plt.title("Student test grades in two different classes")
plt.xlabel("Grades")
plt.legend()
plt.show()

print("The mean of the test scores in teacher one's class is " + str(np.mean(teacher_one_grades)))
print("The mean of the test scores in teacher two's class is " + str(np.mean(teacher_two_grades)))

print("The variance of the test scores in teacher one's class is " +str(teacher_one_variance))
print("The variance of the test scores in teacher two's class is " +str(teacher_two_variance))

Codecademy Learn Statistics With Python Variance Exercise 4/6

# Sample Solution
import numpy as np

grades = [88, 82, 85, 84, 90]
mean = np.mean(grades)

#When calculating these variables, square the difference.
difference_one = (88 - mean) ** 2
difference_two = (82 - mean) ** 2
difference_three = (85 - mean) ** 2
difference_four = (84 - mean) ** 2
difference_five = (90 - mean) ** 2

difference_sum = (difference_one + difference_two + difference_three + difference_four + difference_five)

variance = difference_sum / 5

print("The sum of the squared differences is " + str(difference_sum))
print("The variance is " + str(variance))

Codecademy Learn Statistics With Python Variance Exercise 3/6

# Sample Solution
import numpy as np

grades = [88, 82, 85, 84, 90]
mean = np.mean(grades)

difference_one = 88 - mean
difference_two = 82 - mean
difference_three = 85 - mean
difference_four = 84 - mean
difference_five = 90 - mean

# manually check values and add
print(str(format(difference_one, "f")))
print(str(format(difference_two, "f")))
print(str(format(difference_three, "f")))
print(str(format(difference_four, "f")))
print(str(format(difference_five, "f")))

#Part 1: Sum the differences
difference_sum = (difference_one + difference_two + difference_three + difference_four + difference_five)

#Part 2: Average the differences
average_difference = difference_sum / 5

#IGNORE CODE BELOW HERE
print("The sum of the differences is " + str(format(difference_sum, "f")))
print("The average difference is " + str(format(average_difference, "f")))

Codecademy Learn Statistics With Python Variance Exercise 2/6

# Sample Solution
import numpy as np

grades = [88, 82, 85, 84, 90]
mean = np.mean(grades)

difference_one = 88 - mean
difference_two = 82 - mean
difference_three = 85 - mean
difference_four = 84 - mean
difference_five = 90 - mean

# IGNORE CODE BELOW HERE
print("The mean of the data set is " + str(mean) + "\n")
print("The first student is " +str(round(difference_one, 2)) + " percentage points away from the mean.")
print("The second student is " +str(round(difference_two, 2)) + " percentage points away from the mean.")
print("The third student is " +str(round(difference_three, 2)) + " percentage points away from the mean.")
print("The fourth student is " +str(round(difference_four, 2)) + " percentage points away from the mean.")
print("The fifth student is " +str(round(difference_five, 2)) + " percentage points away from the mean.")

Codecademy Learn Statistics With Python Mode Exercise 3/4

# Sample Solution
# Import packages
import numpy as np
import pandas as pd
from scipy import stats

# Read in author data
greatest_books = pd.read_csv("top-hundred-books.csv")

# Save author ages to author_ages
author_ages = greatest_books['Ages']

# Use numpy to calculate the median age of the top 100 authors
mode_age = stats.mode(author_ages)

print("The mode age and its frequency of authors from Le Monde's 100 greatest books is: " + str(mode_age[0][0]) + " and " + str(mode_age[1][0]))

Codecademy Learn Statistics With Python Mode Exercise 2/4

# Sample Solution
import numpy as np

# Set first_ten_authors equal to their ages
first_ten_authors = np.array([29, 49, 42, 43, 32, 38, 37, 41, 27, 27])

# number: occurences
# 29:1, 49:1, 42:1, 43:1, 32:1, 38:1, 37:1, 41:1, # 27:2
# Save the mode value to mode_age
mode_age = 27

# Save the count of authors with the mode age
mode_count = 2

# Print the sorted array and median value
print("The ages of the first ten authors is: " + str(first_ten_authors))
print("The mode of the first ten authors is: " + str(mode_age))
print("The number of authors who were " + str(mode_age) + " when their book was published is " + str(mode_count))

Friday, August 30, 2019

Codecademy Learn Statistics With Python Median Exercise 3/4

# Sample Solution
# Import packages
import numpy as np
import pandas as pd

# Read in author data
greatest_books = pd.read_csv("top-hundred-books.csv")

# Save author ages to author_ages
author_ages = greatest_books['Ages']

# Use numpy to calculate the median age of the top 100 authors
median_age = np.median(author_ages)

print("The median age of the 100 greatest authors, according to a survey by Le Monde is: " + str(median_age))

Codecademy Learn Statistics With Python Median Exercise 2/4

# Sample Solution
import numpy as np

# Array of the first five author ages
five_author_ages = np.array([29, 49, 42, 43, 32])

# Fill in the empty array with the values sorted
sorted_author_ages = np.array([29, 32, 42, 43, 49])

# Save the median value to median_value
median_age = 42

# Print the sorted array and median value
print("The sorted array is: " + str(sorted_author_ages))
print("The median of the array is: " + str(median_age))

Codecademy Learn Statistics With Python Exercise 3/4

# Sample Solution
# Import packages
import numpy as np
import pandas as pd

# Read author data
greatest_books = pd.read_csv("top-hundred-books.csv")

# Set author ages to a NumPy array
author_ages = greatest_books['Ages']

# Use numpy to calculate the average age of the top 100 authors
average_age = np.average(author_ages)

print("The average age of the 100 greatest authors, according to a survey by Le Monde, is: " + str(average_age))

Codecademy Learn Statistics With Python Exercise 2/4

# Sample Solution
# Set total equal to the sum
total = 29 + 49 + 42 + 43

# Set mean_value equal to the mean
mean_value = total / 4

# The following code prints the total and mean
print("The sum total is equal to: " + str(total))
print("The mean value is equal to: " + str(mean_value))

Thursday, August 29, 2019

DataCamp Combining data types Sample Solution

# Update data types

year_1_str = str(year_1)

revenue_1_str = str(revenue_1)

# Create a complete sentence combining only the string data types

sentence = 'The revenue of ' + company_1 + ' in ' + year_1_str + ' was $' + revenue_1_str + ' billion.'

# Print sentence

print(sentence)

Questions and Answers

Quotes help make search much faster. Example: "Practice Makes Perfect"