Skip to content

Data Science Boot Camp Pandas Homework (4)

l1uc3ky/pandas-challenge

Repository files navigation

#import dependencies 
import pandas as pd
import numpy as np
import json 
#import data files 
data_file = 'purchase_data.json'
purchase_data = pd.read_json(data_file, orient="records")
purchase_data.head()
<style scoped> .dataframe tbody tr th:only-of-type { vertical-align: middle; }
.dataframe tbody tr th {
    vertical-align: top;
}

.dataframe thead th {
    text-align: right;
}
</style>
Age Gender Item ID Item Name Price SN
0 38 Male 165 Bone Crushing Silver Skewer 3.37 Aelalis34
1 21 Male 119 Stormbringer, Dark Blade of Ending Misery 2.32 Eolo46
2 34 Male 174 Primitive Blade 2.46 Assastnya25
3 21 Male 92 Final Critic 1.36 Pheusrical25
4 23 Male 63 Stormfury Mace 1.27 Aela59

Player Counts & Demographics

# obtain player demographics 
player_demos = purchase_data.loc[:, ["Gender", "SN", "Age"]]
player_demos.head()
<style scoped> .dataframe tbody tr th:only-of-type { vertical-align: middle; }
.dataframe tbody tr th {
    vertical-align: top;
}

.dataframe thead th {
    text-align: right;
}
</style>
Gender SN Age
0 Male Aelalis34 38
1 Male Eolo46 21
2 Male Assastnya25 34
3 Male Pheusrical25 21
4 Male Aela59 23
# obtain unique values of player demographics to obtain total players
player_demos = player_demos.drop_duplicates()
total_players = player_demos.count()[0]
total_players
573
total_playersdf = pd.DataFrame({"Total Players": [total_players]})
# Counts and percentages of Male and Female Players
# counts by gender
gender_count = player_demos["Gender"].value_counts()
gender_count

# Percentage of male and female players
gender_percent = (gender_count / total_players)*100
gender_percent

# Put into one table 
gender_demos = pd.DataFrame({"Gender Count": gender_count,
                            "Gender Percentage": gender_percent})

# Round to 2 decimal places 
gender_demos = gender_demos.round(2)


# print data
gender_demos
<style scoped> .dataframe tbody tr th:only-of-type { vertical-align: middle; }
.dataframe tbody tr th {
    vertical-align: top;
}

.dataframe thead th {
    text-align: right;
}
</style>
Gender Count Gender Percentage
Male 465 81.15
Female 100 17.45
Other / Non-Disclosed 8 1.40

Purchase Analysis (Total)

#Calculations for analysis
#average purchase price 
average_item_price = purchase_data["Price"].mean()

#total number of purchases 
purchase_count = purchase_data["Price"].count()

#total revenue 
total_purchase_value = purchase_data["Price"].sum()

#total items purchased
item_count = len(purchase_data["Item ID"].unique())

# Create a data frame for purchase data analysis 
purchase_totals = pd.DataFrame({"Number of Unique Items": [item_count], 
                               "Total Revenue": [total_purchase_value],
                               "Number of purchases": [purchase_count], 
                               "Average Purchase Price": [average_item_price]})

#summary of purchase analysis 
purchase_totals = purchase_totals.round(2)
purchase_totals["Average Purchase Price"] = purchase_totals["Average Purchase Price"].map("${:,.2f}".format)
purchase_totals["Total Revenue"] = purchase_totals["Total Revenue"].map("${:,.2f}".format)
purchase_totals
<style scoped> .dataframe tbody tr th:only-of-type { vertical-align: middle; }
.dataframe tbody tr th {
    vertical-align: top;
}

.dataframe thead th {
    text-align: right;
}
</style>
Average Purchase Price Number of Unique Items Number of purchases Total Revenue
0 $2.93 183 780 $2,286.33

Purchase Analysis (Gender)

#Calculations for purchase analysis by gender 
gender_purchases = purchase_data.groupby(["Gender"]).sum()["Price"].rename("Total Purchase Value")
gender_avgprice = purchase_data.groupby(["Gender"]).mean()["Price"].rename("Average Purchase Value")
gender_counts = purchase_data.groupby(["Gender"]).count()["Price"].rename("Purchase Count")

#Normalize data 
normalized_total = gender_purchases / gender_count

# Create data frame to house results 
gender_data = pd.DataFrame({"Normalized Total": normalized_total, 
                            "Purchase Count": gender_counts, 
                            "Total Purchase Value": gender_purchases, 
                            "Average Purchase Value": gender_avgprice})

#format results 
gender_data = gender_data.round(2)
gender_data["Average Purchase Value"] = gender_data["Average Purchase Value"].map("${:,.2f}".format)
gender_data["Total Purchase Value"] = gender_data["Total Purchase Value"].map("${:,.2f}".format)
gender_data["Normalized Total"] = gender_data["Normalized Total"].map("${:,.2f}".format)


#Print results of purchase analysis 
gender_data
<style scoped> .dataframe tbody tr th:only-of-type { vertical-align: middle; }
.dataframe tbody tr th {
    vertical-align: top;
}

.dataframe thead th {
    text-align: right;
}
</style>
Average Purchase Value Normalized Total Purchase Count Total Purchase Value
Gender
Female $2.82 $3.83 136 $382.91
Male $2.95 $4.02 633 $1,867.68
Other / Non-Disclosed $3.25 $4.47 11 $35.74

Age Demographics

age_bins = [0, 9.90, 14.90, 19.90, 24.9, 29.9, 34.90, 39.90, 9999999]
group_names = ["<10", "10-14", "15-19", "20-24", "25-29", "30-34", "35-39", ">40"]

#Cut data to put players into age bins 
#create new column to add the series in 
purchase_data["Age Ranges"] = pd.cut(purchase_data["Age"], age_bins, labels=group_names)
purchase_data.head()
<style scoped> .dataframe tbody tr th:only-of-type { vertical-align: middle; }
.dataframe tbody tr th {
    vertical-align: top;
}

.dataframe thead th {
    text-align: right;
}
</style>
Age Gender Item ID Item Name Price SN Age Ranges
0 38 Male 165 Bone Crushing Silver Skewer 3.37 Aelalis34 35-39
1 21 Male 119 Stormbringer, Dark Blade of Ending Misery 2.32 Eolo46 20-24
2 34 Male 174 Primitive Blade 2.46 Assastnya25 30-34
3 21 Male 92 Final Critic 1.36 Pheusrical25 20-24
4 23 Male 63 Stormfury Mace 1.27 Aela59 20-24
# total players and percentages by age 
age_demos_total = purchase_data["Age Ranges"].value_counts()
age_demo_percents = (age_demos_total / total_players) * 100

#create data frame to hold the results 
age_demos = pd.DataFrame({"Total Count": age_demos_total, "Percent of Players": age_demo_percents})
age_demos = age_demos.sort_index()
age_demos = age_demos.round(2)
age_demos
<style scoped> .dataframe tbody tr th:only-of-type { vertical-align: middle; }
.dataframe tbody tr th {
    vertical-align: top;
}

.dataframe thead th {
    text-align: right;
}
</style>
Percent of Players Total Count
<10 4.89 28
10-14 6.11 35
15-19 23.21 133
20-24 58.64 336
25-29 21.82 125
30-34 11.17 64
35-39 7.33 42
>40 2.97 17

Purchase Analysis (Age)

#Calculations for purchase analysis by age bins  
age_purchases = purchase_data.groupby(["Age Ranges"]).sum()["Price"].rename("Total Purchase Value")
age_avgprice = purchase_data.groupby(["Age Ranges"]).mean()["Price"].rename("Average Purchase Value")
age_counts = purchase_data.groupby(["Age Ranges"]).count()["Price"].rename("Purchase Count")

#Normalize data 
normalized_total = age_purchases / age_demos_total

# Create data frame to house results 
age_data = pd.DataFrame({"Normalized Total": normalized_total, 
                            "Purchase Count": age_counts, 
                            "Total Purchase Value": age_purchases, 
                            "Average Purchase Value": age_avgprice})

#format results 
age_data = age_data.round(2)
age_data["Average Purchase Value"] = age_data["Average Purchase Value"].map("${:,.2f}".format)
age_data["Total Purchase Value"] = age_data["Total Purchase Value"].map("${:,.2f}".format)
age_data["Normalized Total"] = age_data["Normalized Total"].map("${:,.2f}".format)


#Print results of purchase analysis 
age_data
<style scoped> .dataframe tbody tr th:only-of-type { vertical-align: middle; }
.dataframe tbody tr th {
    vertical-align: top;
}

.dataframe thead th {
    text-align: right;
}
</style>
Average Purchase Value Normalized Total Purchase Count Total Purchase Value
10-14 $2.77 $2.77 35 $96.95
15-19 $2.91 $2.91 133 $386.42
20-24 $2.91 $2.91 336 $978.77
25-29 $2.96 $2.96 125 $370.33
30-34 $3.08 $3.08 64 $197.25
35-39 $2.84 $2.84 42 $119.40
<10 $2.98 $2.98 28 $83.46
>40 $3.16 $3.16 17 $53.75

Top 5 Spenders

# Identify the top 5 spenders in the game by total purchase value, then list in a table: 
# group totals, averages, and counts by SN 
user_total = purchase_data.groupby(["SN"]).sum()["Price"].rename("Total Purchase Amount")
user_average = purchase_data.groupby(["SN"]).mean()["Price"].rename("Average Purchase Price")
user_count = purchase_data.groupby(["SN"]).count()["Price"].rename("Purchase Count")

#Create data frame to hold results 
user_spend = pd.DataFrame({"Total Purchase Amount": user_total,
                          "Average Purchase Price": user_average,
                           "Purchase Count": user_count})

#format results 
user_spend = user_spend.round(2)
user_spend["Average Purchase Price"] = user_spend["Average Purchase Price"].map("${:,.2f}".format)
user_spend["Total Purchase Amount"] = user_spend["Total Purchase Amount"].map("${:,.2f}".format)

#Print results of purchase analysis 
#sort values to obtain top 5 
user_spend.sort_values("Total Purchase Amount", ascending=False).head(5)
<style scoped> .dataframe tbody tr th:only-of-type { vertical-align: middle; }
.dataframe tbody tr th {
    vertical-align: top;
}

.dataframe thead th {
    text-align: right;
}
</style>
Average Purchase Price Purchase Count Total Purchase Amount
SN
Qarwen67 $2.49 4 $9.97
Sondim43 $3.13 3 $9.38
Tillyrin30 $3.06 3 $9.19
Lisistaya47 $3.06 3 $9.19
Tyisriphos58 $4.59 2 $9.18

Most Popular Items

# Identify the top 5 popular items in the game by purchase count, then list in a table: 
# group totals, averages, and counts by SN 
pop_total = purchase_data.groupby(["Item ID", "Item Name"]).sum()["Price"].rename("Total Purchase Value")
popular_count = purchase_data.groupby(["Item ID", "Item Name"]).count()["Price"].rename("Purchase Count")

#Create data frame to hold results 
popular_items = pd.DataFrame({"Total Purchase Value": pop_total, 
                              "Purchase Count": popular_count})

#format results 
popular_items = popular_items.round(2)
popular_items["Total Purchase Value"] = popular_items["Total Purchase Value"].map("${:,.2f}".format)

#Print results of purchase analysis 
#sort values to obtain top 5 
popular_items.sort_values("Purchase Count", ascending=False).head(5)
<style scoped> .dataframe tbody tr th:only-of-type { vertical-align: middle; }
.dataframe tbody tr th {
    vertical-align: top;
}

.dataframe thead th {
    text-align: right;
}
</style>
Purchase Count Total Purchase Value
Item ID Item Name
39 Betrayal, Whisper of Grieving Widows 11 $25.85
84 Arcane Gem 11 $24.53
31 Trickster 9 $18.63
175 Woeful Adamantite Claymore 9 $11.16
13 Serenity 9 $13.41
#convert notebook to markdown 
jupyter nbconvert --to markdown notebook.ipynb
  File "<ipython-input-32-99259d0986d2>", line 2
    jupyter nbconvert --to markdown notebook.ipynb
                    ^
SyntaxError: invalid syntax

About

Data Science Boot Camp Pandas Homework (4)

Resources

Stars

Watchers

Forks

Releases

No releases published

Packages

No packages published