Player Counts & Demographics

#import dependencies 
import pandas as pd
import numpy as np
import json

#import data files 
data_file = 'purchase_data.json'
purchase_data = pd.read_json(data_file, orient="records")
purchase_data.head()

.dataframe tbody tr th {
    vertical-align: top;
}

.dataframe thead th {
    text-align: right;
}

</style>

	Age	Gender	Item ID	Item Name	Price	SN
0	38	Male	165	Bone Crushing Silver Skewer	3.37	Aelalis34
1	21	Male	119	Stormbringer, Dark Blade of Ending Misery	2.32	Eolo46
2	34	Male	174	Primitive Blade	2.46	Assastnya25
3	21	Male	92	Final Critic	1.36	Pheusrical25
4	23	Male	63	Stormfury Mace	1.27	Aela59

Player Counts & Demographics

# obtain player demographics 
player_demos = purchase_data.loc[:, ["Gender", "SN", "Age"]]
player_demos.head()

.dataframe tbody tr th {
    vertical-align: top;
}

.dataframe thead th {
    text-align: right;
}

</style>

	Gender	SN	Age
0	Male	Aelalis34	38
1	Male	Eolo46	21
2	Male	Assastnya25	34
3	Male	Pheusrical25	21
4	Male	Aela59	23

# obtain unique values of player demographics to obtain total players
player_demos = player_demos.drop_duplicates()
total_players = player_demos.count()[0]
total_players

total_playersdf = pd.DataFrame({"Total Players": [total_players]})

# Counts and percentages of Male and Female Players
# counts by gender
gender_count = player_demos["Gender"].value_counts()
gender_count

# Percentage of male and female players
gender_percent = (gender_count / total_players)*100
gender_percent

# Put into one table 
gender_demos = pd.DataFrame({"Gender Count": gender_count,
                            "Gender Percentage": gender_percent})

# Round to 2 decimal places 
gender_demos = gender_demos.round(2)


# print data
gender_demos

.dataframe tbody tr th {
    vertical-align: top;
}

.dataframe thead th {
    text-align: right;
}

</style>

	Gender Count	Gender Percentage
Male	465	81.15
Female	100	17.45
Other / Non-Disclosed	8	1.40

Purchase Analysis (Total)

#Calculations for analysis
#average purchase price 
average_item_price = purchase_data["Price"].mean()

#total number of purchases 
purchase_count = purchase_data["Price"].count()

#total revenue 
total_purchase_value = purchase_data["Price"].sum()

#total items purchased
item_count = len(purchase_data["Item ID"].unique())

# Create a data frame for purchase data analysis 
purchase_totals = pd.DataFrame({"Number of Unique Items": [item_count], 
                               "Total Revenue": [total_purchase_value],
                               "Number of purchases": [purchase_count], 
                               "Average Purchase Price": [average_item_price]})

#summary of purchase analysis 
purchase_totals = purchase_totals.round(2)
purchase_totals["Average Purchase Price"] = purchase_totals["Average Purchase Price"].map("${:,.2f}".format)
purchase_totals["Total Revenue"] = purchase_totals["Total Revenue"].map("${:,.2f}".format)
purchase_totals

.dataframe tbody tr th {
    vertical-align: top;
}

.dataframe thead th {
    text-align: right;
}

</style>

	Average Purchase Price	Number of Unique Items	Number of purchases	Total Revenue
0	$2.93	183	780	$2,286.33

Purchase Analysis (Gender)

#Calculations for purchase analysis by gender 
gender_purchases = purchase_data.groupby(["Gender"]).sum()["Price"].rename("Total Purchase Value")
gender_avgprice = purchase_data.groupby(["Gender"]).mean()["Price"].rename("Average Purchase Value")
gender_counts = purchase_data.groupby(["Gender"]).count()["Price"].rename("Purchase Count")

#Normalize data 
normalized_total = gender_purchases / gender_count

# Create data frame to house results 
gender_data = pd.DataFrame({"Normalized Total": normalized_total, 
                            "Purchase Count": gender_counts, 
                            "Total Purchase Value": gender_purchases, 
                            "Average Purchase Value": gender_avgprice})

#format results 
gender_data = gender_data.round(2)
gender_data["Average Purchase Value"] = gender_data["Average Purchase Value"].map("${:,.2f}".format)
gender_data["Total Purchase Value"] = gender_data["Total Purchase Value"].map("${:,.2f}".format)
gender_data["Normalized Total"] = gender_data["Normalized Total"].map("${:,.2f}".format)


#Print results of purchase analysis 
gender_data

.dataframe tbody tr th {
    vertical-align: top;
}

.dataframe thead th {
    text-align: right;
}

</style>

	Average Purchase Value	Normalized Total	Purchase Count	Total Purchase Value
Gender
Female	$2.82	$3.83	136	$382.91
Male	$2.95	$4.02	633	$1,867.68
Other / Non-Disclosed	$3.25	$4.47	11	$35.74

Age Demographics

age_bins = [0, 9.90, 14.90, 19.90, 24.9, 29.9, 34.90, 39.90, 9999999]
group_names = ["<10", "10-14", "15-19", "20-24", "25-29", "30-34", "35-39", ">40"]

#Cut data to put players into age bins 
#create new column to add the series in 
purchase_data["Age Ranges"] = pd.cut(purchase_data["Age"], age_bins, labels=group_names)
purchase_data.head()

.dataframe tbody tr th {
    vertical-align: top;
}

.dataframe thead th {
    text-align: right;
}

</style>

	Age	Gender	Item ID	Item Name	Price	SN	Age Ranges
0	38	Male	165	Bone Crushing Silver Skewer	3.37	Aelalis34	35-39
1	21	Male	119	Stormbringer, Dark Blade of Ending Misery	2.32	Eolo46	20-24
2	34	Male	174	Primitive Blade	2.46	Assastnya25	30-34
3	21	Male	92	Final Critic	1.36	Pheusrical25	20-24
4	23	Male	63	Stormfury Mace	1.27	Aela59	20-24

# total players and percentages by age 
age_demos_total = purchase_data["Age Ranges"].value_counts()
age_demo_percents = (age_demos_total / total_players) * 100

#create data frame to hold the results 
age_demos = pd.DataFrame({"Total Count": age_demos_total, "Percent of Players": age_demo_percents})
age_demos = age_demos.sort_index()
age_demos = age_demos.round(2)
age_demos

.dataframe tbody tr th {
    vertical-align: top;
}

.dataframe thead th {
    text-align: right;
}

</style>

	Percent of Players	Total Count
<10	4.89	28
10-14	6.11	35
15-19	23.21	133
20-24	58.64	336
25-29	21.82	125
30-34	11.17	64
35-39	7.33	42
>40	2.97	17

Purchase Analysis (Age)

#Calculations for purchase analysis by age bins  
age_purchases = purchase_data.groupby(["Age Ranges"]).sum()["Price"].rename("Total Purchase Value")
age_avgprice = purchase_data.groupby(["Age Ranges"]).mean()["Price"].rename("Average Purchase Value")
age_counts = purchase_data.groupby(["Age Ranges"]).count()["Price"].rename("Purchase Count")

#Normalize data 
normalized_total = age_purchases / age_demos_total

# Create data frame to house results 
age_data = pd.DataFrame({"Normalized Total": normalized_total, 
                            "Purchase Count": age_counts, 
                            "Total Purchase Value": age_purchases, 
                            "Average Purchase Value": age_avgprice})

#format results 
age_data = age_data.round(2)
age_data["Average Purchase Value"] = age_data["Average Purchase Value"].map("${:,.2f}".format)
age_data["Total Purchase Value"] = age_data["Total Purchase Value"].map("${:,.2f}".format)
age_data["Normalized Total"] = age_data["Normalized Total"].map("${:,.2f}".format)


#Print results of purchase analysis 
age_data

.dataframe tbody tr th {
    vertical-align: top;
}

.dataframe thead th {
    text-align: right;
}

</style>

	Average Purchase Value	Normalized Total	Purchase Count	Total Purchase Value
10-14	$2.77	$2.77	35	$96.95
15-19	$2.91	$2.91	133	$386.42
20-24	$2.91	$2.91	336	$978.77
25-29	$2.96	$2.96	125	$370.33
30-34	$3.08	$3.08	64	$197.25
35-39	$2.84	$2.84	42	$119.40
<10	$2.98	$2.98	28	$83.46
>40	$3.16	$3.16	17	$53.75

Top 5 Spenders

# Identify the top 5 spenders in the game by total purchase value, then list in a table: 
# group totals, averages, and counts by SN 
user_total = purchase_data.groupby(["SN"]).sum()["Price"].rename("Total Purchase Amount")
user_average = purchase_data.groupby(["SN"]).mean()["Price"].rename("Average Purchase Price")
user_count = purchase_data.groupby(["SN"]).count()["Price"].rename("Purchase Count")

#Create data frame to hold results 
user_spend = pd.DataFrame({"Total Purchase Amount": user_total,
                          "Average Purchase Price": user_average,
                           "Purchase Count": user_count})

#format results 
user_spend = user_spend.round(2)
user_spend["Average Purchase Price"] = user_spend["Average Purchase Price"].map("${:,.2f}".format)
user_spend["Total Purchase Amount"] = user_spend["Total Purchase Amount"].map("${:,.2f}".format)

#Print results of purchase analysis 
#sort values to obtain top 5 
user_spend.sort_values("Total Purchase Amount", ascending=False).head(5)

.dataframe tbody tr th {
    vertical-align: top;
}

.dataframe thead th {
    text-align: right;
}

</style>

	Average Purchase Price	Purchase Count	Total Purchase Amount
SN
Qarwen67	$2.49	4	$9.97
Sondim43	$3.13	3	$9.38
Tillyrin30	$3.06	3	$9.19
Lisistaya47	$3.06	3	$9.19
Tyisriphos58	$4.59	2	$9.18

Most Popular Items

# Identify the top 5 popular items in the game by purchase count, then list in a table: 
# group totals, averages, and counts by SN 
pop_total = purchase_data.groupby(["Item ID", "Item Name"]).sum()["Price"].rename("Total Purchase Value")
popular_count = purchase_data.groupby(["Item ID", "Item Name"]).count()["Price"].rename("Purchase Count")

#Create data frame to hold results 
popular_items = pd.DataFrame({"Total Purchase Value": pop_total, 
                              "Purchase Count": popular_count})

#format results 
popular_items = popular_items.round(2)
popular_items["Total Purchase Value"] = popular_items["Total Purchase Value"].map("${:,.2f}".format)

#Print results of purchase analysis 
#sort values to obtain top 5 
popular_items.sort_values("Purchase Count", ascending=False).head(5)

.dataframe tbody tr th {
    vertical-align: top;
}

.dataframe thead th {
    text-align: right;
}

</style>

		Purchase Count	Total Purchase Value
Item ID	Item Name
39	Betrayal, Whisper of Grieving Widows	11	$25.85
84	Arcane Gem	11	$24.53
31	Trickster	9	$18.63
175	Woeful Adamantite Claymore	9	$11.16
13	Serenity	9	$13.41

#convert notebook to markdown 
jupyter nbconvert --to markdown notebook.ipynb

  File "<ipython-input-32-99259d0986d2>", line 2
    jupyter nbconvert --to markdown notebook.ipynb
                    ^
SyntaxError: invalid syntax

Name		Name	Last commit message	Last commit date
Latest commit History 16 Commits
.ipynb_checkpoints		.ipynb_checkpoints
.gitattributes		.gitattributes
HW 4_HeroesOfPymoli.ipynb		HW 4_HeroesOfPymoli.ipynb
README.md		README.md
Trends from Heroes of Pylomi.docx		Trends from Heroes of Pylomi.docx
purchase_data.json		purchase_data.json

Provide feedback

Saved searches

Use saved searches to filter your results more quickly

Uh oh!

Uh oh!

Uh oh!

Repository files navigation

Player Counts & Demographics

Purchase Analysis (Total)

Purchase Analysis (Gender)

Age Demographics

Purchase Analysis (Age)

Top 5 Spenders

Most Popular Items

About

Uh oh!

Releases

Packages

Languages

Uh oh!

Uh oh!

l1uc3ky/pandas-challenge

Folders and files

Latest commit

History

Repository files navigation

Player Counts & Demographics

Purchase Analysis (Total)

Purchase Analysis (Gender)

Age Demographics

Purchase Analysis (Age)

Top 5 Spenders

Most Popular Items

About

Resources

Uh oh!

Stars

Watchers

Forks

Releases

Packages 0

Languages

Packages