#import dependencies
import pandas as pd
import numpy as np
import json #import data files
data_file = 'purchase_data.json'
purchase_data = pd.read_json(data_file, orient="records")
purchase_data.head()
<style scoped>
.dataframe tbody tr th:only-of-type {
vertical-align: middle;
}
</style>
.dataframe tbody tr th {
vertical-align: top;
}
.dataframe thead th {
text-align: right;
}
| Age | Gender | Item ID | Item Name | Price | SN | |
|---|---|---|---|---|---|---|
| 0 | 38 | Male | 165 | Bone Crushing Silver Skewer | 3.37 | Aelalis34 |
| 1 | 21 | Male | 119 | Stormbringer, Dark Blade of Ending Misery | 2.32 | Eolo46 |
| 2 | 34 | Male | 174 | Primitive Blade | 2.46 | Assastnya25 |
| 3 | 21 | Male | 92 | Final Critic | 1.36 | Pheusrical25 |
| 4 | 23 | Male | 63 | Stormfury Mace | 1.27 | Aela59 |
# obtain player demographics
player_demos = purchase_data.loc[:, ["Gender", "SN", "Age"]]
player_demos.head()
<style scoped>
.dataframe tbody tr th:only-of-type {
vertical-align: middle;
}
</style>
.dataframe tbody tr th {
vertical-align: top;
}
.dataframe thead th {
text-align: right;
}
| Gender | SN | Age | |
|---|---|---|---|
| 0 | Male | Aelalis34 | 38 |
| 1 | Male | Eolo46 | 21 |
| 2 | Male | Assastnya25 | 34 |
| 3 | Male | Pheusrical25 | 21 |
| 4 | Male | Aela59 | 23 |
# obtain unique values of player demographics to obtain total players
player_demos = player_demos.drop_duplicates()
total_players = player_demos.count()[0]
total_players573
total_playersdf = pd.DataFrame({"Total Players": [total_players]})# Counts and percentages of Male and Female Players
# counts by gender
gender_count = player_demos["Gender"].value_counts()
gender_count
# Percentage of male and female players
gender_percent = (gender_count / total_players)*100
gender_percent
# Put into one table
gender_demos = pd.DataFrame({"Gender Count": gender_count,
"Gender Percentage": gender_percent})
# Round to 2 decimal places
gender_demos = gender_demos.round(2)
# print data
gender_demos
<style scoped>
.dataframe tbody tr th:only-of-type {
vertical-align: middle;
}
</style>
.dataframe tbody tr th {
vertical-align: top;
}
.dataframe thead th {
text-align: right;
}
| Gender Count | Gender Percentage | |
|---|---|---|
| Male | 465 | 81.15 |
| Female | 100 | 17.45 |
| Other / Non-Disclosed | 8 | 1.40 |
#Calculations for analysis
#average purchase price
average_item_price = purchase_data["Price"].mean()
#total number of purchases
purchase_count = purchase_data["Price"].count()
#total revenue
total_purchase_value = purchase_data["Price"].sum()
#total items purchased
item_count = len(purchase_data["Item ID"].unique())
# Create a data frame for purchase data analysis
purchase_totals = pd.DataFrame({"Number of Unique Items": [item_count],
"Total Revenue": [total_purchase_value],
"Number of purchases": [purchase_count],
"Average Purchase Price": [average_item_price]})
#summary of purchase analysis
purchase_totals = purchase_totals.round(2)
purchase_totals["Average Purchase Price"] = purchase_totals["Average Purchase Price"].map("${:,.2f}".format)
purchase_totals["Total Revenue"] = purchase_totals["Total Revenue"].map("${:,.2f}".format)
purchase_totals
<style scoped>
.dataframe tbody tr th:only-of-type {
vertical-align: middle;
}
</style>
.dataframe tbody tr th {
vertical-align: top;
}
.dataframe thead th {
text-align: right;
}
| Average Purchase Price | Number of Unique Items | Number of purchases | Total Revenue | |
|---|---|---|---|---|
| 0 | $2.93 | 183 | 780 | $2,286.33 |
#Calculations for purchase analysis by gender
gender_purchases = purchase_data.groupby(["Gender"]).sum()["Price"].rename("Total Purchase Value")
gender_avgprice = purchase_data.groupby(["Gender"]).mean()["Price"].rename("Average Purchase Value")
gender_counts = purchase_data.groupby(["Gender"]).count()["Price"].rename("Purchase Count")
#Normalize data
normalized_total = gender_purchases / gender_count
# Create data frame to house results
gender_data = pd.DataFrame({"Normalized Total": normalized_total,
"Purchase Count": gender_counts,
"Total Purchase Value": gender_purchases,
"Average Purchase Value": gender_avgprice})
#format results
gender_data = gender_data.round(2)
gender_data["Average Purchase Value"] = gender_data["Average Purchase Value"].map("${:,.2f}".format)
gender_data["Total Purchase Value"] = gender_data["Total Purchase Value"].map("${:,.2f}".format)
gender_data["Normalized Total"] = gender_data["Normalized Total"].map("${:,.2f}".format)
#Print results of purchase analysis
gender_data
<style scoped>
.dataframe tbody tr th:only-of-type {
vertical-align: middle;
}
</style>
.dataframe tbody tr th {
vertical-align: top;
}
.dataframe thead th {
text-align: right;
}
| Average Purchase Value | Normalized Total | Purchase Count | Total Purchase Value | |
|---|---|---|---|---|
| Gender | ||||
| Female | $2.82 | $3.83 | 136 | $382.91 |
| Male | $2.95 | $4.02 | 633 | $1,867.68 |
| Other / Non-Disclosed | $3.25 | $4.47 | 11 | $35.74 |
age_bins = [0, 9.90, 14.90, 19.90, 24.9, 29.9, 34.90, 39.90, 9999999]
group_names = ["<10", "10-14", "15-19", "20-24", "25-29", "30-34", "35-39", ">40"]
#Cut data to put players into age bins
#create new column to add the series in
purchase_data["Age Ranges"] = pd.cut(purchase_data["Age"], age_bins, labels=group_names)
purchase_data.head()
<style scoped>
.dataframe tbody tr th:only-of-type {
vertical-align: middle;
}
</style>
.dataframe tbody tr th {
vertical-align: top;
}
.dataframe thead th {
text-align: right;
}
| Age | Gender | Item ID | Item Name | Price | SN | Age Ranges | |
|---|---|---|---|---|---|---|---|
| 0 | 38 | Male | 165 | Bone Crushing Silver Skewer | 3.37 | Aelalis34 | 35-39 |
| 1 | 21 | Male | 119 | Stormbringer, Dark Blade of Ending Misery | 2.32 | Eolo46 | 20-24 |
| 2 | 34 | Male | 174 | Primitive Blade | 2.46 | Assastnya25 | 30-34 |
| 3 | 21 | Male | 92 | Final Critic | 1.36 | Pheusrical25 | 20-24 |
| 4 | 23 | Male | 63 | Stormfury Mace | 1.27 | Aela59 | 20-24 |
# total players and percentages by age
age_demos_total = purchase_data["Age Ranges"].value_counts()
age_demo_percents = (age_demos_total / total_players) * 100
#create data frame to hold the results
age_demos = pd.DataFrame({"Total Count": age_demos_total, "Percent of Players": age_demo_percents})
age_demos = age_demos.sort_index()
age_demos = age_demos.round(2)
age_demos
<style scoped>
.dataframe tbody tr th:only-of-type {
vertical-align: middle;
}
</style>
.dataframe tbody tr th {
vertical-align: top;
}
.dataframe thead th {
text-align: right;
}
| Percent of Players | Total Count | |
|---|---|---|
| <10 | 4.89 | 28 |
| 10-14 | 6.11 | 35 |
| 15-19 | 23.21 | 133 |
| 20-24 | 58.64 | 336 |
| 25-29 | 21.82 | 125 |
| 30-34 | 11.17 | 64 |
| 35-39 | 7.33 | 42 |
| >40 | 2.97 | 17 |
#Calculations for purchase analysis by age bins
age_purchases = purchase_data.groupby(["Age Ranges"]).sum()["Price"].rename("Total Purchase Value")
age_avgprice = purchase_data.groupby(["Age Ranges"]).mean()["Price"].rename("Average Purchase Value")
age_counts = purchase_data.groupby(["Age Ranges"]).count()["Price"].rename("Purchase Count")
#Normalize data
normalized_total = age_purchases / age_demos_total
# Create data frame to house results
age_data = pd.DataFrame({"Normalized Total": normalized_total,
"Purchase Count": age_counts,
"Total Purchase Value": age_purchases,
"Average Purchase Value": age_avgprice})
#format results
age_data = age_data.round(2)
age_data["Average Purchase Value"] = age_data["Average Purchase Value"].map("${:,.2f}".format)
age_data["Total Purchase Value"] = age_data["Total Purchase Value"].map("${:,.2f}".format)
age_data["Normalized Total"] = age_data["Normalized Total"].map("${:,.2f}".format)
#Print results of purchase analysis
age_data
<style scoped>
.dataframe tbody tr th:only-of-type {
vertical-align: middle;
}
</style>
.dataframe tbody tr th {
vertical-align: top;
}
.dataframe thead th {
text-align: right;
}
| Average Purchase Value | Normalized Total | Purchase Count | Total Purchase Value | |
|---|---|---|---|---|
| 10-14 | $2.77 | $2.77 | 35 | $96.95 |
| 15-19 | $2.91 | $2.91 | 133 | $386.42 |
| 20-24 | $2.91 | $2.91 | 336 | $978.77 |
| 25-29 | $2.96 | $2.96 | 125 | $370.33 |
| 30-34 | $3.08 | $3.08 | 64 | $197.25 |
| 35-39 | $2.84 | $2.84 | 42 | $119.40 |
| <10 | $2.98 | $2.98 | 28 | $83.46 |
| >40 | $3.16 | $3.16 | 17 | $53.75 |
# Identify the top 5 spenders in the game by total purchase value, then list in a table:
# group totals, averages, and counts by SN
user_total = purchase_data.groupby(["SN"]).sum()["Price"].rename("Total Purchase Amount")
user_average = purchase_data.groupby(["SN"]).mean()["Price"].rename("Average Purchase Price")
user_count = purchase_data.groupby(["SN"]).count()["Price"].rename("Purchase Count")
#Create data frame to hold results
user_spend = pd.DataFrame({"Total Purchase Amount": user_total,
"Average Purchase Price": user_average,
"Purchase Count": user_count})
#format results
user_spend = user_spend.round(2)
user_spend["Average Purchase Price"] = user_spend["Average Purchase Price"].map("${:,.2f}".format)
user_spend["Total Purchase Amount"] = user_spend["Total Purchase Amount"].map("${:,.2f}".format)
#Print results of purchase analysis
#sort values to obtain top 5
user_spend.sort_values("Total Purchase Amount", ascending=False).head(5)
<style scoped>
.dataframe tbody tr th:only-of-type {
vertical-align: middle;
}
</style>
.dataframe tbody tr th {
vertical-align: top;
}
.dataframe thead th {
text-align: right;
}
| Average Purchase Price | Purchase Count | Total Purchase Amount | |
|---|---|---|---|
| SN | |||
| Qarwen67 | $2.49 | 4 | $9.97 |
| Sondim43 | $3.13 | 3 | $9.38 |
| Tillyrin30 | $3.06 | 3 | $9.19 |
| Lisistaya47 | $3.06 | 3 | $9.19 |
| Tyisriphos58 | $4.59 | 2 | $9.18 |
# Identify the top 5 popular items in the game by purchase count, then list in a table:
# group totals, averages, and counts by SN
pop_total = purchase_data.groupby(["Item ID", "Item Name"]).sum()["Price"].rename("Total Purchase Value")
popular_count = purchase_data.groupby(["Item ID", "Item Name"]).count()["Price"].rename("Purchase Count")
#Create data frame to hold results
popular_items = pd.DataFrame({"Total Purchase Value": pop_total,
"Purchase Count": popular_count})
#format results
popular_items = popular_items.round(2)
popular_items["Total Purchase Value"] = popular_items["Total Purchase Value"].map("${:,.2f}".format)
#Print results of purchase analysis
#sort values to obtain top 5
popular_items.sort_values("Purchase Count", ascending=False).head(5)
<style scoped>
.dataframe tbody tr th:only-of-type {
vertical-align: middle;
}
</style>
.dataframe tbody tr th {
vertical-align: top;
}
.dataframe thead th {
text-align: right;
}
| Purchase Count | Total Purchase Value | ||
|---|---|---|---|
| Item ID | Item Name | ||
| 39 | Betrayal, Whisper of Grieving Widows | 11 | $25.85 |
| 84 | Arcane Gem | 11 | $24.53 |
| 31 | Trickster | 9 | $18.63 |
| 175 | Woeful Adamantite Claymore | 9 | $11.16 |
| 13 | Serenity | 9 | $13.41 |
#convert notebook to markdown
jupyter nbconvert --to markdown notebook.ipynb File "<ipython-input-32-99259d0986d2>", line 2
jupyter nbconvert --to markdown notebook.ipynb
^
SyntaxError: invalid syntax