# Instructions to users to navigate to the folder where they have uploaded the zip file from our GitHub repository
from google.colab import drive
drive.mount('/content/drive')

print("Navigate to the directory in Google Drive where you have uploaded the project ZIP file using the command below.")
%cd /content/drive/'path to your directory here'


# Mounting my google drive for access to the datasets
from google.colab import drive
drive.mount('/content/drive')
%cd /content/drive/MyDrive/Colab Notebooks/Intro to Data Science/Data Science Final Portfolio

Mounted at /content/drive
/content/drive/MyDrive/Colab Notebooks/Intro to Data Science/Data Science Final Portfolio


# Importing different libraries for use in analysis
import pandas as pd
import numpy as np
import matplotlib
import matplotlib.pyplot as plt
import seaborn as sns


# Formatting the output
pd.set_option('display.max_columns', None)  # Shows all columns
pd.set_option('display.max_rows', 100)  # Shows 100 rows
pd.set_option('display.width', None)  # Utilizes the maximum width of your terminal


# Specifying the file paths used
ppg_file_path = 'Data Sets/NBA Stats (1947-Present)/Player Per Game.csv'
ps_file_path = 'Data Sets/NBA Stats (1947-Present)/Player Shooting.csv'

# Reading the CSVs into DataFrames
player_per_game_df = pd.read_csv(ppg_file_path)
player_shooting_df = pd.read_csv(ps_file_path)


# Displaying the first 5 rows of each DataFrame
print('Player Per Game Stats:\n', player_per_game_df.head(),'\n\n' 'Player Shooting Stats:\n', player_shooting_df.head())

Player Per Game Stats:
    seas_id  season  player_id         player  birth_year pos   age  experience   lg   tm   g  \
0    31136    2024       5025     A.J. Green         NaN  SG  24.0           2  NBA  MIL  39   
1    31137    2024       5026    A.J. Lawson         NaN  SG  23.0           2  NBA  DAL  28   
2    31138    2024       5027     AJ Griffin         NaN  SF  20.0           2  NBA  ATL  18   
3    31139    2024       4219   Aaron Gordon         NaN  PF  28.0          10  NBA  DEN  54   
4    31140    2024       4582  Aaron Holiday         NaN  PG  27.0           6  NBA  HOU  56   

     gs  mp_per_game  fg_per_game  fga_per_game  fg_percent  x3p_per_game  x3pa_per_game  \
0   0.0          9.2          1.4           3.3       0.438           1.2            2.8   
1   0.0          8.3          1.4           3.0       0.471           0.5            1.4   
2   0.0          7.3          0.7           2.5       0.289           0.5            1.8   
3  54.0         31.5          5.5           9.8       0.557           0.5            1.8   
4   1.0         17.3          2.5           5.5       0.455           1.2            3.0   

   x3p_percent  x2p_per_game  x2pa_per_game  x2p_percent  e_fg_percent  ft_per_game  fta_per_game  \
0        0.423           0.2            0.4        0.529         0.621          0.2           0.2   
1        0.325           1.0            1.6        0.600         0.547          0.4           0.7   
2        0.273           0.2            0.7        0.333         0.389          0.1           0.1   
3        0.293           4.9            8.0        0.618         0.585          2.4           3.7   
4        0.410           1.3            2.6        0.507         0.565          0.7           0.8   

   ft_percent  orb_per_game  drb_per_game  trb_per_game  ast_per_game  stl_per_game  blk_per_game  \
0       1.000           0.2           0.9           1.0           0.5           0.1           0.1   
1       0.632           0.4           0.8           1.2           0.5           0.3           0.1   
2       1.000           0.1           0.7           0.8           0.2           0.1           0.1   
3       0.652           2.4           4.1           6.5           3.2           0.9           0.7   
4       0.889           0.3           1.4           1.7           1.8           0.5           0.1   

   tov_per_game  pf_per_game  pts_per_game  
0           0.1          0.9           4.3  
1           0.4          0.7           3.8  
2           0.3          0.3           2.1  
3           1.5          1.9          13.9  
4           0.8          1.6           7.0   

Player Shooting Stats:
    seas_id  season  player_id         player  birth_year pos  age  experience   lg   tm   g    mp  \
0    31136    2024       5025     A.J. Green         NaN  SG   24           2  NBA  MIL  39   357   
1    31137    2024       5026    A.J. Lawson         NaN  SG   23           2  NBA  DAL  28   231   
2    31138    2024       5027     AJ Griffin         NaN  SF   20           2  NBA  ATL  18   132   
3    31139    2024       4219   Aaron Gordon         NaN  PF   28          10  NBA  DEN  54  1699   
4    31140    2024       4582  Aaron Holiday         NaN  PG   27           6  NBA  HOU  56   967   

   fg_percent  avg_dist_fga  percent_fga_from_x2p_range  percent_fga_from_x0_3_range  \
0       0.438          24.0                       0.133                        0.023   
1       0.471          12.7                       0.529                        0.329   
2       0.289          21.3                       0.267                        0.044   
3       0.557           7.4                       0.814                        0.537   
4       0.455          17.4                       0.465                        0.132   

   percent_fga_from_x3_10_range  percent_fga_from_x10_16_range  percent_fga_from_x16_3p_range  \
0                         0.023                          0.023                          0.063   
1                         0.188                          0.012                          0.000   
2                         0.111                          0.044                          0.067   
3                         0.203                          0.053                          0.021   
4                         0.168                          0.119                          0.045   

   percent_fga_from_x3p_range  fg_percent_from_x2p_range  fg_percent_from_x0_3_range  \
0                       0.867                      0.529                       1.000   
1                       0.471                      0.600                       0.857   
2                       0.733                      0.333                       1.000   
3                       0.186                      0.618                       0.758   
4                       0.535                      0.507                       0.634   

   fg_percent_from_x3_10_range  fg_percent_from_x10_16_range  fg_percent_from_x16_3p_range  \
0                        0.333                         0.333                         0.500   
1                        0.188                         0.000                           NaN   
2                        0.200                         0.000                         0.333   
3                        0.380                         0.214                         0.364   
4                        0.462                         0.514                         0.286   

   fg_percent_from_x3p_range  percent_assisted_x2p_fg  percent_assisted_x3p_fg  \
0                      0.423                    1.000                    0.915   
1                      0.325                    0.519                    1.000   
2                      0.273                    0.750                    0.889   
3                      0.293                    0.644                    0.793   
4                      0.410                    0.274                    0.838   

   percent_dunks_of_fga  num_of_dunks  percent_corner_3s_of_3pa  corner_3_point_percent  \
0                 0.000             0                     0.216                   0.542   
1                 0.129            10                     0.650                   0.308   
2                 0.022             1                     0.242                   0.250   
3                 0.262           128                     0.364                   0.389   
4                 0.006             2                     0.229                   0.447   

   num_heaves_attempted  num_heaves_made  
0                     0                0  
1                     0                0  
2                     0                0  
3                     1                0  
4                     1                0


#See unique column names
player_shooting_df.columns.unique()

Index(['seas_id', 'season', 'player_id', 'player', 'birth_year', 'pos', 'age', 'experience', 'lg',
       'tm', 'g', 'mp', 'fg_percent', 'avg_dist_fga', 'percent_fga_from_x2p_range',
       'percent_fga_from_x0_3_range', 'percent_fga_from_x3_10_range',
       'percent_fga_from_x10_16_range', 'percent_fga_from_x16_3p_range',
       'percent_fga_from_x3p_range', 'fg_percent_from_x2p_range', 'fg_percent_from_x0_3_range',
       'fg_percent_from_x3_10_range', 'fg_percent_from_x10_16_range',
       'fg_percent_from_x16_3p_range', 'fg_percent_from_x3p_range', 'percent_assisted_x2p_fg',
       'percent_assisted_x3p_fg', 'percent_dunks_of_fga', 'num_of_dunks',
       'percent_corner_3s_of_3pa', 'corner_3_point_percent', 'num_heaves_attempted',
       'num_heaves_made'],
      dtype='object')


# Checking the dtypes of the different columns
player_shooting_df.dtypes

seas_id                            int64
season                             int64
player_id                          int64
player                            object
birth_year                       float64
pos                               object
age                                int64
experience                         int64
lg                                object
tm                                object
g                                  int64
mp                                 int64
fg_percent                       float64
avg_dist_fga                     float64
percent_fga_from_x2p_range       float64
percent_fga_from_x0_3_range      float64
percent_fga_from_x3_10_range     float64
percent_fga_from_x10_16_range    float64
percent_fga_from_x16_3p_range    float64
percent_fga_from_x3p_range       float64
fg_percent_from_x2p_range        float64
fg_percent_from_x0_3_range       float64
fg_percent_from_x3_10_range      float64
fg_percent_from_x10_16_range     float64
fg_percent_from_x16_3p_range     float64
fg_percent_from_x3p_range        float64
percent_assisted_x2p_fg          float64
percent_assisted_x3p_fg          float64
percent_dunks_of_fga             float64
num_of_dunks                       int64
percent_corner_3s_of_3pa         float64
corner_3_point_percent           float64
num_heaves_attempted               int64
num_heaves_made                    int64
dtype: object


# Checking for NaNs
player_shooting_df.isnull().sum()

seas_id                              0
season                               0
player_id                            0
player                               0
birth_year                       16531
pos                                  0
age                                  0
experience                           0
lg                                   0
tm                                   0
g                                    0
mp                                   0
fg_percent                         109
avg_dist_fga                       109
percent_fga_from_x2p_range         109
percent_fga_from_x0_3_range        109
percent_fga_from_x3_10_range       109
percent_fga_from_x10_16_range      109
percent_fga_from_x16_3p_range      109
percent_fga_from_x3p_range         109
fg_percent_from_x2p_range          190
fg_percent_from_x0_3_range         578
fg_percent_from_x3_10_range        920
fg_percent_from_x10_16_range      1345
fg_percent_from_x16_3p_range      1481
fg_percent_from_x3p_range         2429
percent_assisted_x2p_fg            482
percent_assisted_x3p_fg           4590
percent_dunks_of_fga               109
num_of_dunks                         0
percent_corner_3s_of_3pa          2429
corner_3_point_percent            4522
num_heaves_attempted                 0
num_heaves_made                      0
dtype: int64


# Using the .drop() function and specifying inplace = True to make the changes permanent within the dataframe.
player_shooting_df.drop(columns = 'birth_year', inplace = True)
player_shooting_df.head()


# Here we will attempt to minimize the discrepancy of NaN and 0 percentages for two-point shot attempts

# Define the columns which indicate percentage of field goals attempted from two-point ranges and three-point range
attempt_columns = [
    'percent_fga_from_x2p_range',
    'percent_fga_from_x0_3_range',
    'percent_fga_from_x3_10_range',
    'percent_fga_from_x10_16_range',
    'percent_fga_from_x16_3p_range',
    'percent_fga_from_x3p_range'
]

# Define the corresponding shooting percentage columns
percent_columns = [
    'fg_percent_from_x2p_range',
    'fg_percent_from_x0_3_range',
    'fg_percent_from_x3_10_range',
    'fg_percent_from_x10_16_range',
    'fg_percent_from_x16_3p_range',
    'fg_percent_from_x3p_range'
]

# Create a copy of the dataframe for the relevant columns before replacement
before_replacement = player_shooting_df[attempt_columns + percent_columns].copy()

# Apply conditional logic to fill NaNs where attempt percentage >= 0
for attempt_col, percent_col in zip(attempt_columns, percent_columns):

    # Replace NaNs with 0 in the shooting percentage column where attempt percentage is >= 0
    player_shooting_df.loc[player_shooting_df[attempt_col] >= 0, percent_col] = player_shooting_df.loc[player_shooting_df[attempt_col] >= 0, percent_col].fillna(0)

# After replacements - check for differences
after_replacement = player_shooting_df[attempt_columns + percent_columns]

# Check the before_replacement DataFrame for NaN values
for attempt_col, percent_col in zip(attempt_columns, percent_columns):
    condition = (before_replacement[attempt_col] > 0) & (before_replacement[percent_col].isnull())
    if condition.any():
        print(f"Rows with non-negative attempts in {attempt_col} but NaN in {percent_col} before replacement:")
        print(before_replacement[condition][[attempt_col, percent_col]])
    else:
        print(f"No discrepancies in {attempt_col} and {percent_col} before replacement.")

# Check if there are rows with attempt percentage > 0 but corresponding shooting percentage is NaN after the operation
for attempt_col, percent_col in zip(attempt_columns, percent_columns):
    condition = (after_replacement[attempt_col] > 0) & (after_replacement[percent_col].isnull())
    if condition.any():
        print(f"Rows with non-negative attempts in {attempt_col} but NaN in {percent_col} after replacement:")
        print(after_replacement[condition][[attempt_col, percent_col]])
    else:
        print(f"No discrepancies found in {attempt_col} and {percent_col} after replacement.")

No discrepancies in percent_fga_from_x2p_range and fg_percent_from_x2p_range before replacement.
No discrepancies in percent_fga_from_x0_3_range and fg_percent_from_x0_3_range before replacement.
No discrepancies in percent_fga_from_x3_10_range and fg_percent_from_x3_10_range before replacement.
No discrepancies in percent_fga_from_x10_16_range and fg_percent_from_x10_16_range before replacement.
No discrepancies in percent_fga_from_x16_3p_range and fg_percent_from_x16_3p_range before replacement.
No discrepancies in percent_fga_from_x3p_range and fg_percent_from_x3p_range before replacement.
No discrepancies found in percent_fga_from_x2p_range and fg_percent_from_x2p_range after replacement.
No discrepancies found in percent_fga_from_x0_3_range and fg_percent_from_x0_3_range after replacement.
No discrepancies found in percent_fga_from_x3_10_range and fg_percent_from_x3_10_range after replacement.
No discrepancies found in percent_fga_from_x10_16_range and fg_percent_from_x10_16_range after replacement.
No discrepancies found in percent_fga_from_x16_3p_range and fg_percent_from_x16_3p_range after replacement.
No discrepancies found in percent_fga_from_x3p_range and fg_percent_from_x3p_range after replacement.


#See unique column names
player_per_game_df.columns.unique()

Index(['seas_id', 'season', 'player_id', 'player', 'birth_year', 'pos', 'age', 'experience', 'lg',
       'tm', 'g', 'gs', 'mp_per_game', 'fg_per_game', 'fga_per_game', 'fg_percent', 'x3p_per_game',
       'x3pa_per_game', 'x3p_percent', 'x2p_per_game', 'x2pa_per_game', 'x2p_percent',
       'e_fg_percent', 'ft_per_game', 'fta_per_game', 'ft_percent', 'orb_per_game', 'drb_per_game',
       'trb_per_game', 'ast_per_game', 'stl_per_game', 'blk_per_game', 'tov_per_game',
       'pf_per_game', 'pts_per_game'],
      dtype='object')


# Checking the dtypes of the different columns
player_per_game_df.dtypes

seas_id            int64
season             int64
player_id          int64
player            object
birth_year       float64
pos               object
age              float64
experience         int64
lg                object
tm                object
g                  int64
gs               float64
mp_per_game      float64
fg_per_game      float64
fga_per_game     float64
fg_percent       float64
x3p_per_game     float64
x3pa_per_game    float64
x3p_percent      float64
x2p_per_game     float64
x2pa_per_game    float64
x2p_percent      float64
e_fg_percent     float64
ft_per_game      float64
fta_per_game     float64
ft_percent       float64
orb_per_game     float64
drb_per_game     float64
trb_per_game     float64
ast_per_game     float64
stl_per_game     float64
blk_per_game     float64
tov_per_game     float64
pf_per_game      float64
pts_per_game     float64
dtype: object


# Checking for NaNs
player_per_game_df.isnull().sum()

seas_id              0
season               0
player_id            0
player               0
birth_year       28944
pos                  0
age                 22
experience           0
lg                   0
tm                   0
g                    0
gs                8637
mp_per_game       1083
fg_per_game          0
fga_per_game         0
fg_percent         163
x3p_per_game      6352
x3pa_per_game     6352
x3p_percent      10545
x2p_per_game         0
x2pa_per_game        0
x2p_percent        250
e_fg_percent       163
ft_per_game          0
fta_per_game         0
ft_percent        1303
orb_per_game      4657
drb_per_game      4657
trb_per_game       894
ast_per_game         0
stl_per_game      5626
blk_per_game      5625
tov_per_game      5635
pf_per_game          0
pts_per_game         0
dtype: int64


# Using the .drop() function and specifying inplace = True to make the changes permanent within the dataframe.
player_per_game_df.drop(columns = 'birth_year', inplace = True)
player_per_game_df.head()


# Filtering the dataframe for rows where the 'age' column is NaN
ppg_nan_age_df = player_per_game_df[player_per_game_df['age'].isnull()]
ppg_nan_age_df.head(30)


# Filter the dataframe to keep only rows where 'lg' is 'NBA', overwriting the original DataFrame
player_per_game_df = player_per_game_df[player_per_game_df['lg'] == 'NBA']

# Now player_per_game_df contains only entries from the NBA
(player_per_game_df['lg']!='NBA').sum()

0


# Filter the dataframe to keep only rows where 'lg' is 'NBA', overwriting the original DataFrame
player_shooting_df = player_shooting_df[player_shooting_df['lg'] == 'NBA']

# Now player_per_game_df contains only entries from the NBA
(player_shooting_df['lg']!='NBA').sum()

0


player_per_game_df.isnull().sum()

seas_id             0
season              0
player_id           0
player              0
pos                 0
age                 0
experience          0
lg                  0
tm                  0
g                   0
gs               6417
mp_per_game       501
fg_per_game         0
fga_per_game        0
fg_percent        146
x3p_per_game     5770
x3pa_per_game    5770
x3p_percent      9621
x2p_per_game        0
x2pa_per_game       0
x2p_percent       233
e_fg_percent      146
ft_per_game         0
fta_per_game        0
ft_percent       1228
orb_per_game     3900
drb_per_game     3900
trb_per_game      312
ast_per_game        0
stl_per_game     3900
blk_per_game     3900
tov_per_game     5052
pf_per_game         0
pts_per_game        0
dtype: int64


# Filter the dataframe for rows where 'gs' is NaN but 'g' is greater than 0
missing_gs_played_games = player_per_game_df[(player_per_game_df['gs'].isnull()) & (player_per_game_df['g'] > 0)]

# Display the first few rows of this filtered dataframe to investigate
print(missing_gs_played_games[['player', 'g', 'gs']].head(20))

                player   g  gs
23104    Abdul Jeelani  66 NaN
23105   Adrian Dantley  80 NaN
23106       Alan Hardy  22 NaN
23107     Alex English  81 NaN
23108    Allan Bristow  82 NaN
23109    Allen Leavell  79 NaN
23110      Alvan Adams  75 NaN
23111      Alvin Scott  82 NaN
23112   Andre McCarter  43 NaN
23114  Anthony Roberts  26 NaN
23115      Armond Hill  75 NaN
23116      Armond Hill  24 NaN
23117      Armond Hill  51 NaN
23118      Art Collins  29 NaN
23119    Artis Gilmore  82 NaN
23120      Austin Carr  47 NaN
23121      Austin Carr   8 NaN
23122      Austin Carr  39 NaN
23123     Ben Poquette  82 NaN
23124     Bernard King  81 NaN


missing_gs_played_games.loc[23104]

seas_id                   8348
season                    1981
player_id                 1845
player           Abdul Jeelani
pos                         SF
age                       26.0
experience                   2
lg                         NBA
tm                         DAL
g                           66
gs                         NaN
mp_per_game               16.8
fg_per_game                2.8
fga_per_game               6.7
fg_percent               0.425
x3p_per_game               0.0
x3pa_per_game              0.0
x3p_percent                0.0
x2p_per_game               2.8
x2pa_per_game              6.7
x2p_percent              0.426
e_fg_percent             0.425
ft_per_game                2.7
fta_per_game               3.3
ft_percent               0.814
orb_per_game               1.3
drb_per_game               2.2
trb_per_game               3.5
ast_per_game               1.0
stl_per_game               0.7
blk_per_game               0.5
tov_per_game               1.3
pf_per_game                1.9
pts_per_game               8.4
Name: 23104, dtype: object


# Let's look at the average percentage of field goal attempts from 3-point range
player_shooting_df['percent_fga_from_x3p_range'].mean()

0.26173929001203367


# Let's see Steph Curry's shooting from the 2015-2016 season (his unanimous MVP season).
player_shooting_df[(player_shooting_df['player'] == 'Stephen Curry') & (player_shooting_df['season'] == 2016)][['percent_fga_from_x3p_range', 'fg_percent_from_x3p_range']]


# Let's filter for each player and their MVP seasons
curry_stats = player_shooting_df[
    (player_shooting_df['player'] == 'Stephen Curry') &
    ((player_shooting_df['season'] == 2015) | (player_shooting_df['season'] == 2016))][['player', 'season', 'percent_fga_from_x3p_range', 'fg_percent_from_x3p_range']]

giannis_stats = player_shooting_df[
    (player_shooting_df['player'] == 'Giannis Antetokounmpo') &
    ((player_shooting_df['season'] == 2019) | (player_shooting_df['season'] == 2020))][['player', 'season', 'percent_fga_from_x3p_range', 'fg_percent_from_x3p_range']]

jokic_stats = player_shooting_df[
    (player_shooting_df['player'] == 'Nikola Jokić') &
    ((player_shooting_df['season'] == 2021) | (player_shooting_df['season'] == 2022))][['player', 'season', 'percent_fga_from_x3p_range', 'fg_percent_from_x3p_range']]

# Concatenating the dataframes for easy viewing
mvp_comparison = pd.concat([curry_stats, giannis_stats, jokic_stats])

# Resetting the index for better readability
mvp_comparison.reset_index(drop=True, inplace=True)

mvp_comparison


# Looking at effective field goal percent, defined by the NBA as follows: "eFG% measures field goal percentage adjusting for the fact that a 3-point field goal is worth one more point than a 2-point field goal. The formula is eFG% = ((FGM + (0.5 * 3PM)) / FGA"
player_per_game_df['e_fg_percent'].mean()

0.45996369872317305


# Filter for Stephen Curry's back-to-back MVP seasons (2014-2015, 2015-2016)
curry_efg = player_per_game_df[
    (player_per_game_df['player'] == 'Stephen Curry') &
    ((player_per_game_df['season'] == 2015) | (player_per_game_df['season'] == 2016))
][['player', 'season', 'e_fg_percent']]

# Filter for Giannis Antetokounmpo's back-to-back MVP seasons (2018-2019, 2019-2020)
giannis_efg = player_per_game_df[
    (player_per_game_df['player'] == 'Giannis Antetokounmpo') &
    ((player_per_game_df['season'] == 2019) | (player_per_game_df['season'] == 2020))
][['player', 'season', 'e_fg_percent']]

# Filter for Nikola Jokić's back-to-back MVP seasons (2020-2021, 2021-2022)
jokic_efg = player_per_game_df[
    (player_per_game_df['player'] == 'Nikola Jokić') &
    ((player_per_game_df['season'] == 2021) | (player_per_game_df['season'] == 2022))
][['player', 'season', 'e_fg_percent']]

# Concatenate the filtered DataFrames
mvp_efg_comparison = pd.concat([curry_efg, giannis_efg, jokic_efg])

# Reset the index for better readability
mvp_efg_comparison.reset_index(drop=True, inplace=True)

mvp_efg_comparison


# Filter for Stephen Curry's back-to-back MVP seasons (2014-2015, 2015-2016)
curry_fga = player_per_game_df[
    (player_per_game_df['player'] == 'Stephen Curry') &
    ((player_per_game_df['season'] == 2015) | (player_per_game_df['season'] == 2016))
][['player', 'season', 'fga_per_game']]

# Filter for Giannis Antetokounmpo's back-to-back MVP seasons (2018-2019, 2019-2020)
giannis_fga = player_per_game_df[
    (player_per_game_df['player'] == 'Giannis Antetokounmpo') &
    ((player_per_game_df['season'] == 2019) | (player_per_game_df['season'] == 2020))
][['player', 'season', 'fga_per_game']]

# Filter for Nikola Jokić's back-to-back MVP seasons (2020-2021, 2021-2022)
jokic_fga = player_per_game_df[
    (player_per_game_df['player'] == 'Nikola Jokić') &
    ((player_per_game_df['season'] == 2021) | (player_per_game_df['season'] == 2022))
][['player', 'season', 'fga_per_game']]

# Concatenate the filtered DataFrames
mvp_fga_comparison = pd.concat([curry_fga, giannis_fga, jokic_fga])

# Reset the index for better readability
mvp_fga_comparison.reset_index(drop=True, inplace=True)

mvp_fga_comparison


# Combine the two DataFrames on 'player' and 'season'
mvp_stats_comparison = pd.merge(mvp_efg_comparison, mvp_fga_comparison, on=['player', 'season'])

# Set up the matplotlib figure
plt.figure(figsize=(14, 7))

# Plot 1: FGA per game
plt.subplot(1, 2, 1)  # 1 row, 2 columns, first plot
sns.barplot(data=mvp_stats_comparison, x='player', y='fga_per_game', hue='player', palette='Blues_d', legend=False)
plt.title('Field Goal Attempts per Game (FGA per Game)')
plt.xlabel('Player')
plt.ylabel('FGA per Game')

# Plot 2: eFG%
plt.subplot(1, 2, 2)  # 1 row, 2 columns, second plot
sns.barplot(data=mvp_stats_comparison, x='player', y='e_fg_percent', hue='player', palette='Greens_d', legend=False)
plt.title('Effective Field Goal Percentage (eFG%)')
plt.xlabel('Player')
plt.ylabel('eFG%')

# Adjust layout to prevent overlap
plt.tight_layout()

# Display the plots
plt.show()


# Filter for rows where the fg_percent_from_x2p_range is NaN
missing_fg_percent_from_x2p = player_shooting_df[player_shooting_df['fg_percent_from_x2p_range'].isnull()]

# Now, display a sample of the rows with missing fg_percent_from_x2p values
print(missing_fg_percent_from_x2p.head())

     seas_id  season  player_id           player pos  age  experience   lg   tm  g  mp  \
44     31180    2024       3984  Bismack Biyombo   C   31          13  NBA  OKC  2   9   
201    31337    2024       5140   Filip Petrušev   C   23           1  NBA  PHI  1   3   
292    31428    2024       5060    Jason Preston  PG   24           2  NBA  UTA  1   1   
294    31430    2024       4955    Javonte Smart  PG   24           2  NBA  PHI  1   1   
354    31490    2024       4528   Justin Jackson  SF   28           7  NBA  MIN  2   1   

     fg_percent  avg_dist_fga  percent_fga_from_x2p_range  percent_fga_from_x0_3_range  \
44          NaN           NaN                         NaN                          NaN   
201         NaN           NaN                         NaN                          NaN   
292         NaN           NaN                         NaN                          NaN   
294         NaN           NaN                         NaN                          NaN   
354         NaN           NaN                         NaN                          NaN   

     percent_fga_from_x3_10_range  percent_fga_from_x10_16_range  percent_fga_from_x16_3p_range  \
44                            NaN                            NaN                            NaN   
201                           NaN                            NaN                            NaN   
292                           NaN                            NaN                            NaN   
294                           NaN                            NaN                            NaN   
354                           NaN                            NaN                            NaN   

     percent_fga_from_x3p_range  fg_percent_from_x2p_range  fg_percent_from_x0_3_range  \
44                          NaN                        NaN                         NaN   
201                         NaN                        NaN                         NaN   
292                         NaN                        NaN                         NaN   
294                         NaN                        NaN                         NaN   
354                         NaN                        NaN                         NaN   

     fg_percent_from_x3_10_range  fg_percent_from_x10_16_range  fg_percent_from_x16_3p_range  \
44                           NaN                           NaN                           NaN   
201                          NaN                           NaN                           NaN   
292                          NaN                           NaN                           NaN   
294                          NaN                           NaN                           NaN   
354                          NaN                           NaN                           NaN   

     fg_percent_from_x3p_range  percent_assisted_x2p_fg  percent_assisted_x3p_fg  \
44                         NaN                      NaN                      NaN   
201                        NaN                      NaN                      NaN   
292                        NaN                      NaN                      NaN   
294                        NaN                      NaN                      NaN   
354                        NaN                      NaN                      NaN   

     percent_dunks_of_fga  num_of_dunks  percent_corner_3s_of_3pa  corner_3_point_percent  \
44                    NaN             0                       NaN                     NaN   
201                   NaN             0                       NaN                     NaN   
292                   NaN             0                       NaN                     NaN   
294                   NaN             0                       NaN                     NaN   
354                   NaN             0                       NaN                     NaN   

     num_heaves_attempted  num_heaves_made  
44                      0                0  
201                     0                0  
292                     0                0  
294                     0                0  
354                     0                0


# Filter for rows with NaN in the fg_percent_from_x2p_range column and check games played
missing_data_with_g = player_shooting_df[player_shooting_df['fg_percent_from_x2p_range'].isnull()][['player', 'season', 'tm', 'g', 'percent_fga_from_x2p_range', 'fg_percent_from_x2p_range']]

# Display the players who have zero games played
no_games_played = missing_data_with_g[missing_data_with_g['g'] == 0]
print(no_games_played)

Empty DataFrame
Columns: [player, season, tm, g, percent_fga_from_x2p_range, fg_percent_from_x2p_range]
Index: []


# In the same format as before, let's import the 'Player Totals' CSV
pt_file_path = 'Data Sets/NBA Stats (1947-Present)/Player Totals.csv'
player_totals_df = pd.read_csv(pt_file_path)


# Checking for NaNs
player_totals_df.isnull().sum()

seas_id             0
season              0
player_id           0
player              0
birth_year      28944
pos                 0
age                22
experience          0
lg                  0
tm                  0
g                   0
gs               8637
mp               1083
fg                  0
fga                 0
fg_percent        163
x3p              6352
x3pa             6352
x3p_percent     10545
x2p                 0
x2pa                0
x2p_percent       250
e_fg_percent      163
ft                  0
fta                 0
ft_percent       1303
orb              4657
drb              4657
trb               894
ast                 0
stl              5626
blk              5625
tov              5635
pf                  0
pts                 0
dtype: int64


# Using the .drop() function and specifying inplace = True to make the changes permanent within the dataframe.
player_totals_df.drop(columns = ['birth_year'], inplace = True)

# Filter the dataframe to keep only rows where 'lg' is 'NBA', overwriting the original DataFrame
player_totals_df = player_totals_df[player_totals_df['lg'] == 'NBA']

# Now player_totals_df contains only entries from the NBA
(player_per_game_df['lg']!='NBA').sum()

0


player_totals_df.head()


# Rechecking for NaNs
player_totals_df.isnull().sum()

seas_id            0
season             0
player_id          0
player             0
pos                0
age                0
experience         0
lg                 0
tm                 0
g                  0
gs              6417
mp               501
fg                 0
fga                0
fg_percent       146
x3p             5770
x3pa            5770
x3p_percent     9621
x2p                0
x2pa               0
x2p_percent      233
e_fg_percent     146
ft                 0
fta                0
ft_percent      1228
orb             3900
drb             3900
trb              312
ast                0
stl             3900
blk             3900
tov             5052
pf                 0
pts                0
dtype: int64


# Updating our DataFrames to only include data from 1981 on

# For player_shooting_df
player_shooting_df = player_shooting_df[player_shooting_df['season'] > 1980]

# For player_per_game_df
player_per_game_df = player_per_game_df[player_per_game_df['season'] > 1980]

# For player_totals_df
player_totals_df = player_totals_df[player_totals_df['season'] > 1980]


# Rechecking for NaNs
player_shooting_df.isnull().sum()

seas_id                             0
season                              0
player_id                           0
player                              0
pos                                 0
age                                 0
experience                          0
lg                                  0
tm                                  0
g                                   0
mp                                  0
fg_percent                        109
avg_dist_fga                      109
percent_fga_from_x2p_range        109
percent_fga_from_x0_3_range       109
percent_fga_from_x3_10_range      109
percent_fga_from_x10_16_range     109
percent_fga_from_x16_3p_range     109
percent_fga_from_x3p_range        109
fg_percent_from_x2p_range         109
fg_percent_from_x0_3_range        109
fg_percent_from_x3_10_range       109
fg_percent_from_x10_16_range      109
fg_percent_from_x16_3p_range      109
fg_percent_from_x3p_range         109
percent_assisted_x2p_fg           482
percent_assisted_x3p_fg          4590
percent_dunks_of_fga              109
num_of_dunks                        0
percent_corner_3s_of_3pa         2429
corner_3_point_percent           4522
num_heaves_attempted                0
num_heaves_made                     0
dtype: int64


player_per_game_df.isnull().sum()

seas_id             0
season              0
player_id           0
player              0
pos                 0
age                 0
experience          0
lg                  0
tm                  0
g                   0
gs                340
mp_per_game         0
fg_per_game         0
fga_per_game        0
fg_percent        133
x3p_per_game        0
x3pa_per_game       0
x3p_percent      3767
x2p_per_game        0
x2pa_per_game       0
x2p_percent       220
e_fg_percent      133
ft_per_game         0
fta_per_game        0
ft_percent       1110
orb_per_game        0
drb_per_game        0
trb_per_game        0
ast_per_game        0
stl_per_game        0
blk_per_game        0
tov_per_game        0
pf_per_game         0
pts_per_game        0
dtype: int64


player_totals_df.isnull().sum()

seas_id            0
season             0
player_id          0
player             0
pos                0
age                0
experience         0
lg                 0
tm                 0
g                  0
gs               340
mp                 0
fg                 0
fga                0
fg_percent       133
x3p                0
x3pa               0
x3p_percent     3767
x2p                0
x2pa               0
x2p_percent      220
e_fg_percent     133
ft                 0
fta                0
ft_percent      1110
orb                0
drb                0
trb                0
ast                0
stl                0
blk                0
tov                0
pf                 0
pts                0
dtype: int64


# Merge Player Per Game and Player Shooting with appropriate suffixes
combined_df = pd.merge(
    player_per_game_df, player_shooting_df,
    on=['seas_id', 'player_id', 'season', 'player', 'pos', 'age', 'experience', 'lg', 'tm', 'g'],
    how='outer',
    suffixes=('_per_game', '')  # Suffix per game
)

# Merge the result with Player Totals and apply clear suffixes
ppg_ps_combined_df = pd.merge(
    combined_df, player_totals_df,
    on=['seas_id', 'player_id', 'season', 'player', 'pos', 'age', 'experience', 'lg', 'tm', 'g', 'gs', 'mp'],
    how='outer',
    suffixes=('', '_totals')  # Suffix totals to distinguish these stats
)


# Check the updated column names to confirm changes
print(ppg_ps_combined_df.columns.unique())

Index(['seas_id', 'season', 'player_id', 'player', 'pos', 'age', 'experience', 'lg', 'tm', 'g',
       'gs', 'mp_per_game', 'fg_per_game', 'fga_per_game', 'fg_percent_per_game', 'x3p_per_game',
       'x3pa_per_game', 'x3p_percent', 'x2p_per_game', 'x2pa_per_game', 'x2p_percent',
       'e_fg_percent', 'ft_per_game', 'fta_per_game', 'ft_percent', 'orb_per_game', 'drb_per_game',
       'trb_per_game', 'ast_per_game', 'stl_per_game', 'blk_per_game', 'tov_per_game',
       'pf_per_game', 'pts_per_game', 'mp', 'fg_percent', 'avg_dist_fga',
       'percent_fga_from_x2p_range', 'percent_fga_from_x0_3_range', 'percent_fga_from_x3_10_range',
       'percent_fga_from_x10_16_range', 'percent_fga_from_x16_3p_range',
       'percent_fga_from_x3p_range', 'fg_percent_from_x2p_range', 'fg_percent_from_x0_3_range',
       'fg_percent_from_x3_10_range', 'fg_percent_from_x10_16_range',
       'fg_percent_from_x16_3p_range', 'fg_percent_from_x3p_range', 'percent_assisted_x2p_fg',
       'percent_assisted_x3p_fg', 'percent_dunks_of_fga', 'num_of_dunks',
       'percent_corner_3s_of_3pa', 'corner_3_point_percent', 'num_heaves_attempted',
       'num_heaves_made', 'fg', 'fga', 'fg_percent_totals', 'x3p', 'x3pa', 'x3p_percent_totals',
       'x2p', 'x2pa', 'x2p_percent_totals', 'e_fg_percent_totals', 'ft', 'fta',
       'ft_percent_totals', 'orb', 'drb', 'trb', 'ast', 'stl', 'blk', 'tov', 'pf', 'pts'],
      dtype='object')


# Now print the null counts
print(ppg_ps_combined_df.isnull().sum())

seas_id                              0
season                               0
player_id                            0
player                               0
pos                                  0
age                                  0
experience                           0
lg                                   0
tm                                   0
g                                    0
gs                                 680
mp_per_game                       6738
fg_per_game                       6738
fga_per_game                      6738
fg_percent_per_game               6871
x3p_per_game                      6738
x3pa_per_game                     6738
x3p_percent                      10505
x2p_per_game                      6738
x2pa_per_game                     6738
x2p_percent                       6958
e_fg_percent                      6871
ft_per_game                       6738
fta_per_game                      6738
ft_percent                        7848
orb_per_game                      6738
drb_per_game                      6738
trb_per_game                      6738
ast_per_game                      6738
stl_per_game                      6738
blk_per_game                      6738
tov_per_game                      6738
pf_per_game                       6738
pts_per_game                      6738
mp                                6738
fg_percent                       13585
avg_dist_fga                     13585
percent_fga_from_x2p_range       13585
percent_fga_from_x0_3_range      13585
percent_fga_from_x3_10_range     13585
percent_fga_from_x10_16_range    13585
percent_fga_from_x16_3p_range    13585
percent_fga_from_x3p_range       13585
fg_percent_from_x2p_range        13585
fg_percent_from_x0_3_range       13585
fg_percent_from_x3_10_range      13585
fg_percent_from_x10_16_range     13585
fg_percent_from_x16_3p_range     13585
fg_percent_from_x3p_range        13585
percent_assisted_x2p_fg          13958
percent_assisted_x3p_fg          18066
percent_dunks_of_fga             13585
num_of_dunks                     13476
percent_corner_3s_of_3pa         15905
corner_3_point_percent           17998
num_heaves_attempted             13476
num_heaves_made                  13476
fg                                6738
fga                               6738
fg_percent_totals                 6871
x3p                               6738
x3pa                              6738
x3p_percent_totals               10505
x2p                               6738
x2pa                              6738
x2p_percent_totals                6958
e_fg_percent_totals               6871
ft                                6738
fta                               6738
ft_percent_totals                 7848
orb                               6738
drb                               6738
trb                               6738
ast                               6738
stl                               6738
blk                               6738
tov                               6738
pf                                6738
pts                               6738
dtype: int64


ppg_ps_combined_df[ppg_ps_combined_df['mp_per_game'].isnull()]


#Average rebounds per game of NBA players
ppg_ps_combined_df['trb_per_game'].mean()

3.449737929858951


# Let's filter for each player and their MVP seasons
curry_stats = ppg_ps_combined_df[
    (ppg_ps_combined_df['player'] == 'Stephen Curry') &
    ((ppg_ps_combined_df['season'] == 2015) | (player_per_game_df['season'] == 2016))]

giannis_stats = ppg_ps_combined_df[
    (ppg_ps_combined_df['player'] == 'Giannis Antetokounmpo') &
    ((ppg_ps_combined_df['season'] == 2019) | (player_per_game_df['season'] == 2020))]

jokic_stats = ppg_ps_combined_df[
    (ppg_ps_combined_df['player'] == 'Nikola Jokić') &
    ((ppg_ps_combined_df['season'] == 2021) | (player_per_game_df['season'] == 2022))]

# Concatenating the dataframes for easy viewing
mvp_comparison = pd.concat([curry_stats, giannis_stats, jokic_stats])

# Resetting the index for better readability
mvp_comparison.reset_index(drop=True, inplace=True)

mvp_comparison[['player', 'season', 'trb_per_game']]


# Define colors for each player
color_map = {
    'Stephen Curry': 'blue',
    'Giannis Antetokounmpo': 'green',
    'Nikola Jokić': 'orange'
}

# Assign colors based on player name
colors = [color_map[name.split(' (')[0]] for name in mvp_comparison['player']]


# Create bar chart
plt.figure(figsize=(10, 6))
plt.bar(mvp_comparison.index, mvp_comparison['trb_per_game'], color=colors)

plt.xlabel('Player and Season')
plt.xticks(mvp_comparison.index,
 ['Stephen Curry (2015)', 'Stephen Curry (2016)',
  'Giannis Antetokounmpo (2019)', 'Giannis Antetokounmpo (2020)',
  'Nikola Jokić (2021)', 'Nikola Jokić (2022)'], rotation=45, ha='right')

plt.ylabel('Rebounds per Game')
plt.title('Rebounds per Game for MVP Seasons')

plt.tight_layout()
plt.show()


#Average assists per game of NBA players
ppg_ps_combined_df['ast_per_game'].mean()

1.8478800017045212


mvp_comparison[['player', 'season', 'ast_per_game']]


# Define colors for each player
color_map = {
    'Stephen Curry': 'blue',
    'Giannis Antetokounmpo': 'green',
    'Nikola Jokić': 'orange'
}

# Assign colors based on player name
colors = [color_map[name.split(' (')[0]] for name in mvp_comparison['player']]


# Create bar chart
plt.figure(figsize=(10, 6))
plt.bar(mvp_comparison.index, mvp_comparison['ast_per_game'], color=colors)

plt.xlabel('Player and Season')
plt.xticks(mvp_comparison.index,
 ['Stephen Curry (2015)', 'Stephen Curry (2016)',
  'Giannis Antetokounmpo (2019)', 'Giannis Antetokounmpo (2020)',
  'Nikola Jokić (2021)', 'Nikola Jokić (2022)'], rotation=45, ha='right')

plt.ylabel('Assists per Game')
plt.title('Assists per Game for MVP Seasons')

plt.tight_layout()
plt.show()


ts_file_path = 'Data Sets/NBA Stats (1947-Present)/Team Summaries.csv'

team_summary_df = pd.read_csv(ts_file_path)

team_summary_df.head()


team_summary_df.columns.unique()

Index(['season', 'lg', 'team', 'abbreviation', 'playoffs', 'age', 'w', 'l', 'pw', 'pl', 'mov',
       'sos', 'srs', 'o_rtg', 'd_rtg', 'n_rtg', 'pace', 'f_tr', 'x3p_ar', 'ts_percent',
       'e_fg_percent', 'tov_percent', 'orb_percent', 'ft_fga', 'opp_e_fg_percent',
       'opp_tov_percent', 'opp_drb_percent', 'opp_ft_fga', 'arena', 'attend', 'attend_g'],
      dtype='object')


team_summary_df.dtypes

season                int64
lg                   object
team                 object
abbreviation         object
playoffs               bool
age                 float64
w                   float64
l                   float64
pw                  float64
pl                  float64
mov                 float64
sos                 float64
srs                 float64
o_rtg               float64
d_rtg               float64
n_rtg               float64
pace                float64
f_tr                float64
x3p_ar              float64
ts_percent          float64
e_fg_percent        float64
tov_percent         float64
orb_percent         float64
ft_fga              float64
opp_e_fg_percent    float64
opp_tov_percent     float64
opp_drb_percent     float64
opp_ft_fga          float64
arena                object
attend              float64
attend_g            float64
dtype: object


team_summary_df.isnull().sum()

season                0
lg                    0
team                  0
abbreviation         87
playoffs              0
age                  64
w                    88
l                    88
pw                    1
pl                    1
mov                   1
sos                   1
srs                   1
o_rtg                53
d_rtg                53
n_rtg               136
pace                 53
f_tr                  1
x3p_ar              443
ts_percent            1
e_fg_percent          1
tov_percent         318
orb_percent         366
ft_fga                1
opp_e_fg_percent    264
opp_tov_percent     318
opp_drb_percent     366
opp_ft_fga          264
arena                88
attend              485
attend_g            878
dtype: int64


team_summary_df[team_summary_df['w'].isnull()]


# Filter the DataFrame for the seasons of interest
SGJ_MVP_seasons = [2015, 2016, 2019, 2020, 2021, 2022]

# Calculate the mean wins and losses for each season
mean_wins_losses = team_summary_df[team_summary_df['season'].isin(SGJ_MVP_seasons)].groupby('season')[['w', 'l']].mean()

# Print the mean wins and losses for each season
print("Mean Wins and Losses for NBA Teams: ")
print(mean_wins_losses)

Mean Wins and Losses for NBA Teams: 
           w     l
season            
2015    41.0  41.0
2016    41.0  41.0
2019    41.0  41.0
2020    35.3  35.3
2021    36.0  36.0
2022    41.0  41.0


# Filter data for the specified teams and seasons
warriors_data = team_summary_df[
    ((team_summary_df['team'] == 'Golden State Warriors') &
     ((team_summary_df['season'] == 2015) | (team_summary_df['season'] == 2016)))
]

bucks_data = team_summary_df[
    ((team_summary_df['team'] == 'Milwaukee Bucks') &
     ((team_summary_df['season'] == 2019) | (team_summary_df['season'] == 2020)))
]

nuggets_data = team_summary_df[
    ((team_summary_df['team'] == 'Denver Nuggets') &
     ((team_summary_df['season'] == 2021) | (team_summary_df['season'] == 2022)))
]

# Calculate mean wins and losses for each team and season
warriors_mean_wins_losses = warriors_data.groupby('season')[['w', 'l']].mean()
bucks_mean_wins_losses = bucks_data.groupby('season')[['w', 'l']].mean()
nuggets_mean_wins_losses = nuggets_data.groupby('season')[['w', 'l']].mean()

# Display the results
print("Mean Wins and Losses for Golden State Warriors in 2015 and 2016:")
print(warriors_mean_wins_losses)

print("\nMean Wins and Losses for Milwaukee Bucks in 2019 and 2020:")
print(bucks_mean_wins_losses)

print("\nMean Wins and Losses for Denver Nuggets in 2021 and 2022:")
print(nuggets_mean_wins_losses)

Mean Wins and Losses for Golden State Warriors in 2015 and 2016:
           w     l
season            
2015    67.0  15.0
2016    73.0   9.0

Mean Wins and Losses for Milwaukee Bucks in 2019 and 2020:
           w     l
season            
2019    60.0  22.0
2020    56.0  17.0

Mean Wins and Losses for Denver Nuggets in 2021 and 2022:
           w     l
season            
2021    47.0  25.0
2022    48.0  34.0


# Combining the mean wins and losses dataframes for the MVPs
wbn_mean_wins_losses = pd.merge(warriors_mean_wins_losses, bucks_mean_wins_losses, how='outer', left_index=True, right_index=True)
wbn_mean_wins_losses = pd.merge(wbn_mean_wins_losses, nuggets_mean_wins_losses, how='outer', left_index=True, right_index=True)

# Summing up the wins across different columns ('w_x', 'w_y', 'w')
wbn_mean_wins_losses['wbn_wins'] = wbn_mean_wins_losses[['w_x', 'w_y', 'w']].sum(axis=1)

team_labels = ['Warriors 2015', 'Warriors 2016', 'Bucks 2019', 'Bucks 2020', 'Nuggets 2021', 'Nuggets 2022']

# Calculate league average wins
league_avg_wins = team_summary_df[team_summary_df['season'].isin(SGJ_MVP_seasons)].groupby('season')['w'].mean()

# Plotting
plt.figure(figsize=(10, 6))
plt.bar(league_avg_wins.index, league_avg_wins, width=0.4, label='League Average Wins', color='gray', align='edge')
plt.bar(wbn_mean_wins_losses.index, wbn_mean_wins_losses['wbn_wins'], width=-0.4, label=team_labels, tick_label=team_labels, color=['blue', 'blue', 'green', 'green', 'orange', 'orange'], align='edge')
plt.xlabel('Season')
plt.xticks(list(wbn_mean_wins_losses.index))
plt.ylabel('Wins')
plt.title('Team Wins vs. League Average Wins by Season')
plt.legend()
plt.tight_layout()
plt.show()


# Let's import some more data frames and begin exploring the NBA season awards

# We already have combined player per game and player shooting stats, and team summary stats available to us.

# File paths for new data we will use
advanced_file_path = 'Data Sets/NBA Stats (1947-Present)/Advanced.csv'
award_file_path = 'Data Sets/NBA Stats (1947-Present)/Player Award Shares.csv'

# Creating dataframes for the new data
advanced_df = pd.read_csv(advanced_file_path)
awards_df = pd.read_csv(award_file_path)

# Let's look at awards
awards_df.head()


# Convert 'winner' column to boolean type and create a winners dataframe
award_winner_df = awards_df[awards_df['winner'].astype(bool) == True]
award_winner_df.head()


past_MVPs = award_winner_df[award_winner_df['award'] == 'nba mvp']
past_MVPs.head()


# Using our newly imported awards data and adding it to our combined player per game and player shooting dataframe
ppg_ps_combined_df = pd.merge(ppg_ps_combined_df, award_winner_df[['player', 'season', 'award', 'pts_won', 'first', 'pts_max', 'share', 'winner']], on=['player', 'season'], how='left')


# Looking at the winners
ppg_ps_combined_df[ppg_ps_combined_df['winner'] == True].head()


ppg_ps_combined_df.columns.unique()

Index(['seas_id', 'season', 'player_id', 'player', 'pos', 'age', 'experience', 'lg', 'tm', 'g',
       'gs', 'mp_per_game', 'fg_per_game', 'fga_per_game', 'fg_percent_per_game', 'x3p_per_game',
       'x3pa_per_game', 'x3p_percent', 'x2p_per_game', 'x2pa_per_game', 'x2p_percent',
       'e_fg_percent', 'ft_per_game', 'fta_per_game', 'ft_percent', 'orb_per_game', 'drb_per_game',
       'trb_per_game', 'ast_per_game', 'stl_per_game', 'blk_per_game', 'tov_per_game',
       'pf_per_game', 'pts_per_game', 'mp', 'fg_percent', 'avg_dist_fga',
       'percent_fga_from_x2p_range', 'percent_fga_from_x0_3_range', 'percent_fga_from_x3_10_range',
       'percent_fga_from_x10_16_range', 'percent_fga_from_x16_3p_range',
       'percent_fga_from_x3p_range', 'fg_percent_from_x2p_range', 'fg_percent_from_x0_3_range',
       'fg_percent_from_x3_10_range', 'fg_percent_from_x10_16_range',
       'fg_percent_from_x16_3p_range', 'fg_percent_from_x3p_range', 'percent_assisted_x2p_fg',
       'percent_assisted_x3p_fg', 'percent_dunks_of_fga', 'num_of_dunks',
       'percent_corner_3s_of_3pa', 'corner_3_point_percent', 'num_heaves_attempted',
       'num_heaves_made', 'fg', 'fga', 'fg_percent_totals', 'x3p', 'x3pa', 'x3p_percent_totals',
       'x2p', 'x2pa', 'x2p_percent_totals', 'e_fg_percent_totals', 'ft', 'fta',
       'ft_percent_totals', 'orb', 'drb', 'trb', 'ast', 'stl', 'blk', 'tov', 'pf', 'pts', 'award',
       'pts_won', 'first', 'pts_max', 'share', 'winner'],
      dtype='object')


# Adding win percentage
team_summary_df['win_percentage'] = team_summary_df['w'] / (team_summary_df['w'] + team_summary_df['l'])


# Renaming stats that are reflected in both dataframes
ppg_ps_combined_df.rename(columns={
    'orb_per_game': 'player_orb_per_game',
    'drb_per_game': 'player_drb_per_game',
    'e_fg_percent': 'player_e_fg_percent',
    'tov_per_game': 'player_tov_per_game',
    'x3p_per_game': 'player_x3p_per_game',
    'ft_per_game': 'player_ft_per_game'
}, inplace=True)

team_summary_df.rename(columns={
    'orb_percent': 'team_orb_percent',
    'drb_percent': 'team_drb_percent',
    'e_fg_percent': 'team_e_fg_percent',
    'tov_percent': 'team_tov_percent',
    'x3p_ar': 'team_x3p_ar',
    'ft_fga': 'team_free_throws_per_field_goal_attempt',
    'opp_drb_percent': 'team_opp_drb_percent',
    'win_perc': 'team_win_percentage',
    'w': 'team_wins',
    'l': 'team_losses',
    'pw': 'team_pythagorean_wins',
    'pl': 'team_pythagorean_losses',
    'mov': 'team_margin_of_victory',
    'sos': 'team_strength_of_schedule',
    'srs': 'team_simple_rating_system',
    'o_rtg': 'team_offensive_rating',
    'd_rtg': 'team_defensive_rating',
    'n_rtg': 'team_net_rating',
    'pace': 'team_pace',
    'f_tr': 'team_free_throw_rate',
    'ts_percent': 'team_true_shooting_percentage',
    'opp_e_fg_percent': 'team_opponent_efg_percent',
    'opp_tov_percent': 'team_opponent_tov_percent'
}, inplace=True)

# Combining the dataframes into team_player_stats
team_player_stats_df = pd.merge(ppg_ps_combined_df, team_summary_df, how='left', left_on=['season', 'tm'], right_on=['season', 'abbreviation'], suffixes=('_playerStat', '_teamStat'))


# Selecting relevant columns from advanced stats and renaming them to then merge into team_player_stats
columns_to_keep = [
    'season', 'player', 'per', 'ts_percent', 'x3p_ar', 'f_tr', 'orb_percent',
    'drb_percent', 'trb_percent', 'ast_percent', 'stl_percent', 'blk_percent',
    'tov_percent', 'usg_percent', 'ows', 'dws', 'ws', 'ws_48', 'obpm', 'dbpm',
    'bpm', 'vorp'
]

advanced_df = advanced_df[columns_to_keep].copy()

advanced_df.rename(columns={
                   'per': 'player_per',
                   'ts_percent': 'player_ts_percent',
                   'x3p_ar': 'player_x3p_ar',
                   'f_tr': 'player_f_tr',
                   'orb_percent': 'player_orb_percent',
                   'drb_percent': 'player_drb_percent',
                   'trb_percent': 'player_trb_percent',
                   'ast_percent': 'player_ast_percent',
                   'stl_percent': 'player_stl_percent',
                   'blk_percent': 'player_blk_percent',
                   'tov_percent': 'player_tov_percent',
                   'usg_percent': 'player_usg_percent',
                   'ows': 'player_ows',
                   'dws': 'player_dws',
                   'ws': 'player_ws',
                   'ws_48': 'player_ws_48',
                   'obpm': 'player_obpm',
                   'dbpm': 'player_dbpm',
                   'bpm': 'player_bpm',
                   'vorp': 'player_vorp'
    }, inplace=True)

team_player_stats_df = pd.merge(team_player_stats_df, advanced_df, on=['player', 'season'], how='left')

# Making sure all of our data is consistent in being after 1980 and dropping anything not in the NBA
modern_data = team_player_stats_df[(team_player_stats_df['season'] >= 1980) & (team_player_stats_df['lg_teamStat'] == 'NBA')].copy()

# Taking a look at our mega dataframe!
modern_data.head()


# Checking existing column names and their count before drop
print("Columns before drop:", modern_data.columns)
print("Number of columns before drop:", len(modern_data.columns))

Columns before drop: Index(['seas_id', 'season', 'player_id', 'player', 'pos', 'age_playerStat', 'experience',
       'lg_playerStat', 'tm', 'g',
       ...
       'player_tov_percent', 'player_usg_percent', 'player_ows', 'player_dws', 'player_ws',
       'player_ws_48', 'player_obpm', 'player_dbpm', 'player_bpm', 'player_vorp'],
      dtype='object', length=136)
Number of columns before drop: 136


# Dropping some data to clean it up a bit

# Drop columns
modern_data.drop(columns=['lg_playerStat', 'tm', 'lg_teamStat', 'arena', 'attend', 'attend_g', 'e_fg_percent_totals'], inplace=True, errors='ignore')
modern_data.dropna(subset=['win_percentage', 'abbreviation'], inplace=True)

# Check remaining column names and their count after drop
print("Columns after drop:", modern_data.columns)
print("Number of columns after drop:", len(modern_data.columns))

Columns after drop: Index(['seas_id', 'season', 'player_id', 'player', 'pos', 'age_playerStat', 'experience', 'g',
       'gs', 'mp_per_game',
       ...
       'player_tov_percent', 'player_usg_percent', 'player_ows', 'player_dws', 'player_ws',
       'player_ws_48', 'player_obpm', 'player_dbpm', 'player_bpm', 'player_vorp'],
      dtype='object', length=129)
Number of columns after drop: 129


# Updating past_MVPs data frame
past_MVPs = modern_data[modern_data['award'] == 'nba mvp']

# Add a new column 'Is_MVP' to modern_data dataframe
modern_data['Is_MVP'] = modern_data['player'].isin(past_MVPs['player'])

# Visualization 1: Team Win Percentage vs. Player Points Per Game
plt.figure(figsize=(14, 8))
sns.scatterplot(data=modern_data[modern_data['Is_MVP'] == False], x="win_percentage", y="pts_per_game", color='lightblue', marker='o', s=100, alpha=0.5, zorder = 1, label='Non-MVP')
sns.scatterplot(data=modern_data[modern_data['Is_MVP'] == True], x="win_percentage", y="pts_per_game", color='darkviolet', marker='X', s=100, alpha=.8, zorder = 2, label='MVP')
plt.axhline(modern_data[modern_data['Is_MVP'] == True]['pts_per_game'].mean(), color='darkviolet', linestyle='--', linewidth=1, label='MVP Average')
plt.axhline(modern_data[modern_data['Is_MVP'] == False]['pts_per_game'].mean(), color='skyblue', linestyle='--', linewidth=1, label='Non-MVP Average')
plt.title('Team Win Percentage vs. Player Points Per Game')
plt.xlabel('Team Win Percentage')
plt.ylabel('Player Points Per Game')
handles, labels = plt.gca().get_legend_handles_labels()
order = [1, 0, 2, 3]
plt.legend([handles[idx] for idx in order], [labels[idx] for idx in order], title='Legend', loc='upper right', framealpha=1)
plt.show()

# Visualization 2: Team EFG Percent vs. Player EFG Percent
plt.figure(figsize=(14, 8))
sns.scatterplot(data=modern_data[modern_data['Is_MVP'] == False], x="team_e_fg_percent", y="player_e_fg_percent", color='lightblue', marker='o', s=100, alpha=0.5, zorder = 1, label='Non-MVP')
sns.scatterplot(data=modern_data[modern_data['Is_MVP'] == True], x="team_e_fg_percent", y="player_e_fg_percent", color='darkviolet', marker='X', s=100, alpha=.8, zorder = 2, label='MVP')
plt.axhline(modern_data[modern_data['Is_MVP'] == True]['player_e_fg_percent'].mean(), color='darkviolet', linestyle='--', linewidth=1, label='MVP Average EFG%')
plt.axhline(modern_data[modern_data['Is_MVP'] == False]['player_e_fg_percent'].mean(), color='skyblue', linestyle='--', linewidth=1, label='Non-MVP Average EFG%')
plt.title('Team EFG Percent vs. Player EFG Percent')
plt.xlabel('Team EFG Percent')
plt.ylabel('Player Effective Field Goal Percentage')
handles, labels = plt.gca().get_legend_handles_labels()
order = [1, 0, 2, 3]
plt.legend([handles[idx] for idx in order], [labels[idx] for idx in order], title='Legend', loc='upper right', framealpha=1)
plt.show()

# Visualization 3: Player Assists Per Game vs. Team Wins
plt.figure(figsize=(14, 8))
sns.scatterplot(data=modern_data[modern_data['Is_MVP'] == False], x="team_wins", y="ast_per_game", color='lightblue', marker='o', s=100, alpha=0.5, zorder = 1, label='Non-MVP')
sns.scatterplot(data=modern_data[modern_data['Is_MVP'] == True], x="team_wins", y="ast_per_game", color='darkviolet', marker='X', s=100, alpha=.8, zorder = 2, label='MVP')
plt.axhline(modern_data[modern_data['Is_MVP'] == True]['ast_per_game'].mean(), color='darkviolet', linestyle='--', linewidth=1, label='MVP Average Assists')
plt.axhline(modern_data[modern_data['Is_MVP'] == False]['ast_per_game'].mean(), color='skyblue', linestyle='--', linewidth=1, label='Non-MVP Average Assists')
plt.title('Team Wins vs. Player Assists Per Game')
plt.xlabel('Team Wins')
plt.ylabel('Player Assists Per Game')
handles, labels = plt.gca().get_legend_handles_labels()
order = [1, 0, 2, 3]
plt.legend([handles[idx] for idx in order], [labels[idx] for idx in order], title='Legend', loc='upper right', framealpha=1)
plt.show()


# Now let's show a few correlation matrices for different categories of stats

# Subsets of performance-related statistics, advanced metrics, efficiency-related statistics, and team impact-related statistics
performance_stats = modern_data[['pts_per_game', 'ast_per_game', 'trb_per_game', 'stl_per_game', 'blk_per_game', 'Is_MVP']]
advanced_stats = modern_data[['player_per', 'player_vorp', 'player_usg_percent', 'player_ws', 'player_bpm', 'Is_MVP']]
efficiency_stats = modern_data[['fg_percent', 'player_e_fg_percent', 'ft_percent', 'x3p_percent', 'Is_MVP']]
team_impact_stats = modern_data[['win_percentage', 'team_wins', 'team_net_rating', 'team_pace', 'team_e_fg_percent', 'Is_MVP']]

# Computing the correlation matrices for these stats
corr_performance = performance_stats.corr()
corr_advanced = advanced_stats.corr()
corr_efficiency = efficiency_stats.corr()
corr_team_impact = team_impact_stats.corr()

# Plotting the correlation matrices
plt.figure(figsize=(8, 6))
sns.heatmap(corr_performance, annot=True, fmt=".2f", cmap='coolwarm', cbar=True, square=True, linewidths=.5)
plt.title('Correlation Matrix for Player Performance Metrics')
plt.show()

plt.figure(figsize=(8, 6))
sns.heatmap(corr_efficiency, annot=True, fmt=".2f", cmap='coolwarm', cbar=True, square=True, linewidths=.5)
plt.title('Correlation Matrix for Player Efficiency Metrics')
plt.show()

plt.figure(figsize=(8, 6))
sns.heatmap(corr_advanced, annot=True, fmt=".2f", cmap='coolwarm', cbar=True, square=True, linewidths=.5)
plt.title('Correlation Matrix for Player Advanced Metrics')
plt.show()

plt.figure(figsize=(8, 6))
sns.heatmap(corr_team_impact, annot=True, fmt=".2f", cmap='coolwarm', cbar=True, square=True, linewidths=.5)
plt.title('Correlation Matrix for Team Impact Metrics')
plt.show()


from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.impute import SimpleImputer
from sklearn.linear_model import LogisticRegression
from sklearn.pipeline import Pipeline
from sklearn.metrics import classification_report, accuracy_score, confusion_matrix, roc_curve, roc_auc_score, precision_recall_curve, auc
import matplotlib.pyplot as plt
import pandas as pd

modern_data['Is_MVP'] = modern_data['Is_MVP'].astype(int)

# Define features and target for the base model
base_features = [
    'pts_per_game', 'trb_per_game', 'ast_per_game', 'stl_per_game', 'blk_per_game',
    'fg_percent_per_game', 'player_e_fg_percent', 'ft_percent', 'x3p_percent',
    'player_per', 'player_vorp', 'player_ws', 'player_bpm',
    'team_wins', 'win_percentage', 'team_net_rating',
    'player_ts_percent', 'player_x3p_ar', 'player_usg_percent',
    'player_ows', 'player_dws', 'player_dbpm'
]
X_base = modern_data[base_features]
y_base = modern_data['Is_MVP']

# Create preprocessing and modeling pipeline
model_base = Pipeline([
    ('imputer', SimpleImputer(strategy='median')),
    ('scaler', StandardScaler()),
    ('classifier', LogisticRegression(max_iter=1000))
])

# Splitting data
X_train_base, X_test_base, y_train_base, y_test_base = train_test_split(X_base, y_base, test_size=0.2, random_state=42)

# Fitting model
model_base.fit(X_train_base, y_train_base)

# Making predictions
predictions_base = model_base.predict(X_test_base)

# Evaluating model
print("Base Model Accuracy:", accuracy_score(y_test_base, predictions_base))
print(classification_report(y_test_base, predictions_base))

Base Model Accuracy: 0.9887554306158958
              precision    recall  f1-score   support

           0       0.99      1.00      0.99      7712
           1       0.73      0.36      0.48       114

    accuracy                           0.99      7826
   macro avg       0.86      0.68      0.74      7826
weighted avg       0.99      0.99      0.99      7826


# Base Model 2023 MVP Prediction
data_2023_base = modern_data[modern_data['season'] == 2023].copy()
X_2023_base = data_2023_base[base_features]
predictions_2023_base = model_base.predict(X_2023_base)
probs_2023_base = model_base.predict_proba(X_2023_base)[:, 1]
data_2023_base['predicted_MVP_base'] = predictions_2023_base
data_2023_base['MVP_probability_base'] = probs_2023_base
top_candidates_base = data_2023_base[data_2023_base['predicted_MVP_base'] == 1].sort_values('MVP_probability_base', ascending=False)
print(top_candidates_base[['player', 'MVP_probability_base']])

                     player  MVP_probability_base
1911           Nikola Jokić              0.981343
1414  Giannis Antetokounmpo              0.972390
1556            Joel Embiid              0.932235
1749            Luka Dončić              0.831246
1741           LeBron James              0.601394
1457              Ja Morant              0.599394
1542           Jayson Tatum              0.567255
1313       Domantas Sabonis              0.547751
1688           Kevin Durant              0.502502


# Dictionary to store metrics for each model version
model_metrics = {}

# DataFrame to summarize model evaluation metrics
columns = ['Model', 'Accuracy', 'ROC AUC', 'Precision_Non_MVP', 'Recall_Non_MVP', 'F1_Non_MVP', 'Precision_MVP', 'Recall_MVP', 'F1_MVP']
model_summary = pd.DataFrame(columns=columns)

# Model evaluation
y_true = data_2023_base['Is_MVP']
y_scores = probs_2023_base

# Confusion Matrix
conf_matrix = confusion_matrix(y_true, predictions_2023_base)
print("Confusion Matrix:\n", conf_matrix)

# Classification Report
class_report = classification_report(y_true, predictions_2023_base)
print("Classification Report:\n", class_report)

# ROC Curve and AUC
fpr, tpr, thresholds = roc_curve(y_true, y_scores)
roc_auc = auc(fpr, tpr)
plt.figure(figsize=(8, 6))
plt.plot(fpr, tpr, color='darkorange', lw=2, label='ROC curve (area = %0.2f)' % roc_auc)
plt.plot([0, 1], [0, 1], color='navy', lw=2, linestyle='--')
plt.grid(True)
plt.xlim([0.0, 1.0])
plt.ylim([0.0, 1.05])
plt.xlabel('False Positive Rate')
plt.ylabel('True Positive Rate')
plt.title('Receiver Operating Characteristic')
plt.legend(loc="lower right")
plt.show()

# Precision-Recall Curve
precision, recall, _ = precision_recall_curve(y_true, y_scores)
pr_auc = auc(recall, precision)
plt.figure(figsize=(8, 6))
plt.plot(recall, precision, color='blue', lw=2, label='Precision-Recall curve (area = %0.2f)' % pr_auc)
plt.grid(True)
plt.xlabel('Recall')
plt.ylabel('Precision')
plt.title('Precision-Recall Curve')
plt.legend(loc="lower left")
plt.show()

# Calculate accuracy
accuracy_base = accuracy_score(y_true, predictions_2023_base)

# Store results
model_metrics['Base_Model'] = {
    'Confusion_Matrix': conf_matrix,
    'Classification_Report': class_report,
    'ROC_AUC': roc_auc,
    'Precision_Recall_AUC': pr_auc
}


base_metrics = pd.DataFrame({
    'Model': ['Base_Model'],
    'Accuracy': [accuracy_base],
    'ROC AUC': [roc_auc]
})

# Use concat to add the new row to model_summary
model_summary = pd.concat([model_summary, base_metrics], ignore_index=True)

Confusion Matrix:
 [[865   4]
 [ 14   5]]
Classification Report:
               precision    recall  f1-score   support

           0       0.98      1.00      0.99       869
           1       0.56      0.26      0.36        19

    accuracy                           0.98       888
   macro avg       0.77      0.63      0.67       888
weighted avg       0.97      0.98      0.98       888


modern_data['points_per_minute'] = modern_data['pts_per_game'] / modern_data['mp_per_game']
modern_data['efficiency_team_success'] = modern_data['player_per'] * modern_data['win_percentage']
modern_data['player_per_squared'] = modern_data['player_per'] ** 2
modern_data['player_vorp_cubed'] = modern_data['player_vorp'] ** 3
modern_data['player_team_points_share'] = modern_data['pts_per_game'] / modern_data.groupby('team')['pts_per_game'].transform('mean')


# Updated features list with new engineered features
features_fe = base_features + ['points_per_minute', 'efficiency_team_success', 'player_per_squared']

X_fe = modern_data[features_fe]
y_fe = modern_data['Is_MVP']

# Create preprocessing and modeling pipeline again
model_fe = Pipeline([
    ('imputer', SimpleImputer(strategy='median')),
    ('scaler', StandardScaler()),
    ('classifier', LogisticRegression(max_iter=1000))
])

# Splitting the data again
X_train_fe, X_test_fe, y_train_fe, y_test_fe = train_test_split(X_fe, y_fe, test_size=0.2, random_state=42)

# Fitting the model again
model_fe.fit(X_train_fe, y_train_fe)

# Making predictions
predictions_fe = model_fe.predict(X_test_fe)

# Evaluating the model
print("Feature Engineering Model Accuracy:", accuracy_score(y_test_fe, predictions_fe))
print(classification_report(y_test_fe, predictions_fe))

Feature Engineering Model Accuracy: 0.989010989010989
              precision    recall  f1-score   support

           0       0.99      1.00      0.99      7712
           1       0.74      0.38      0.50       114

    accuracy                           0.99      7826
   macro avg       0.87      0.69      0.75      7826
weighted avg       0.99      0.99      0.99      7826


# Feature Engineering for the 2023 data
data_2023_fe = modern_data[modern_data['season'] == 2023].copy()
data_2023_fe['points_per_minute'] = data_2023_fe['pts_per_game'] / data_2023_fe['mp_per_game']
data_2023_fe['efficiency_team_success'] = data_2023_fe['player_per'] * data_2023_fe['win_percentage']
data_2023_fe['player_per_squared'] = data_2023_fe['player_per'] ** 2
data_2023_fe['player_vorp_cubed'] = data_2023_fe['player_vorp'] ** 3
data_2023_fe['player_team_points_share'] = data_2023_fe['pts_per_game'] / data_2023_fe.groupby('team')['pts_per_game'].transform('mean')
X_2023_fe = data_2023_fe[features_fe]
predictions_2023_fe = model_fe.predict(X_2023_fe)
probs_2023_fe = model_fe.predict_proba(X_2023_fe)[:, 1]
data_2023_fe['predicted_MVP_fe'] = predictions_2023_fe
data_2023_fe['MVP_probability_fe'] = probs_2023_fe
top_candidates_fe = data_2023_fe[data_2023_fe['predicted_MVP_fe'] == 1].sort_values('MVP_probability_fe', ascending=False)
print(top_candidates_fe[['player', 'MVP_probability_fe']])

                     player  MVP_probability_fe
1414  Giannis Antetokounmpo            0.975176
1911           Nikola Jokić            0.967024
1556            Joel Embiid            0.918652
1749            Luka Dončić            0.694112
1457              Ja Morant            0.632326
1741           LeBron James            0.603599
1542           Jayson Tatum            0.556099
1688           Kevin Durant            0.509479


# Model Evaluation
y_true_fe = data_2023_fe['Is_MVP']
y_scores_fe = probs_2023_fe

conf_matrix_fe = confusion_matrix(y_true_fe, predictions_2023_fe)
print("Confusion Matrix:\n", conf_matrix_fe)
class_report_fe = classification_report(y_true_fe, predictions_2023_fe)
print("Classification Report:\n", class_report_fe)

fpr_fe, tpr_fe, thresholds_fe = roc_curve(y_true_fe, y_scores_fe)
roc_auc_fe = auc(fpr_fe, tpr_fe)
plt.figure(figsize=(8, 6))
plt.plot(fpr_fe, tpr_fe, color='darkorange', lw=2, label='ROC curve (area = %0.2f)' % roc_auc_fe)
plt.plot([0, 1], [0, 1], color='navy', lw=2, linestyle='--')
plt.grid(True)
plt.xlim([0.0, 1.0])
plt.ylim([0.0, 1.05])
plt.xlabel('False Positive Rate')
plt.ylabel('True Positive Rate')
plt.title('Receiver Operating Characteristic')
plt.legend(loc="lower right")
plt.show()

precision_fe, recall_fe, _ = precision_recall_curve(y_true_fe, y_scores_fe)
pr_auc_fe = auc(recall_fe, precision_fe)
plt.figure(figsize=(8, 6))
plt.plot(recall_fe, precision_fe, color='blue', lw=2, label='Precision-Recall curve (area = %0.2f)' % pr_auc_fe)
plt.grid(True)
plt.xlabel('Recall')
plt.ylabel('Precision')
plt.title('Precision-Recall Curve')
plt.legend(loc="lower left")
plt.show()

accuracy_fe = accuracy_score(y_true_fe, predictions_2023_fe)
model_metrics['Feature_Engineered_Model'] = {
    'Confusion_Matrix': conf_matrix_fe,
    'Classification_Report': class_report_fe,
    'ROC_AUC': roc_auc_fe,
    'Precision_Recall_AUC': pr_auc_fe
}

# Update model_summary DataFrame
fe_metrics = pd.DataFrame({
    'Model': ['Feature_Engineered_Model'],
    'Accuracy': [accuracy_fe],
    'ROC AUC': [roc_auc_fe]
})

model_summary = pd.concat([model_summary, fe_metrics], ignore_index=True)

Confusion Matrix:
 [[866   3]
 [ 14   5]]
Classification Report:
               precision    recall  f1-score   support

           0       0.98      1.00      0.99       869
           1       0.62      0.26      0.37        19

    accuracy                           0.98       888
   macro avg       0.80      0.63      0.68       888
weighted avg       0.98      0.98      0.98       888


from sklearn.model_selection import train_test_split
from sklearn.impute import SimpleImputer
from imblearn.over_sampling import SMOTE
from sklearn.feature_selection import SelectFromModel
from sklearn.ensemble import RandomForestClassifier
from sklearn.linear_model import LogisticRegression
from sklearn.pipeline import Pipeline
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import classification_report, accuracy_score, roc_auc_score, confusion_matrix, roc_curve, auc, precision_recall_curve
from sklearn.model_selection import GridSearchCV
from sklearn.inspection import permutation_importance
import matplotlib.pyplot as plt
import pandas as pd

# Prepare the data with all features
features_all = [col for col in modern_data.columns if col not in ['Is_MVP', 'player', 'season', 'award', 'pos', 'seas_id', 'season, player_id', 'team', 'abbreviation', 'award', 'pts_won', 'first', 'pts_max', 'share', 'winner']]
X_all = modern_data[features_all]
y_all = modern_data['Is_MVP']

# Split the data into training and testing sets
X_train_all, X_test_all, y_train_all, y_test_all = train_test_split(X_all, y_all, test_size=0.2, random_state=42)

# Impute missing values
imputer = SimpleImputer(strategy='median')
X_train_imputed = imputer.fit_transform(X_train_all)
X_test_imputed = imputer.transform(X_test_all)

# Handle class imbalance with SMOTE applied only on the training data
smote = SMOTE(random_state=42)
X_train_resampled, y_train_resampled = smote.fit_resample(X_train_imputed, y_train_all)

# Feature selection with Random Forest applied on resampled training data
selector = SelectFromModel(RandomForestClassifier(n_estimators=100, random_state=42), threshold='median')
X_train_selected = selector.fit_transform(X_train_resampled, y_train_resampled)
selected_features = [features_all[i] for i in selector.get_support(indices=True)]
X_test_selected = selector.transform(X_test_imputed)

print("Selected features:", selected_features)

# Define the pipeline for hyperparameter tuning
pipeline_tuned = Pipeline([
    ('scaler', StandardScaler()),
    ('classifier', LogisticRegression(max_iter=1000))
])

# Define the parameter grid
param_grid = {
    'classifier__C': [0.01, 0.1, 1, 10, 100],
    'classifier__penalty': ['l1', 'l2'],
    'classifier__solver': ['liblinear']
}

# Setup the grid search on the selected features
grid_search = GridSearchCV(pipeline_tuned, param_grid, cv=5, scoring='roc_auc', verbose=1, n_jobs=-1)
grid_search.fit(X_train_selected, y_train_resampled)

# Evaluate the tuned model using the selected features of the test set
best_model = grid_search.best_estimator_
predictions_tuned = best_model.predict(X_test_selected)
probs_tuned = best_model.predict_proba(X_test_selected)[:, 1]
print("Best parameters:", grid_search.best_params_)
print("Best cross-validation score: {:.2f}".format(grid_search.best_score_))
print(classification_report(y_test_all, predictions_tuned))
print("ROC AUC score:", roc_auc_score(y_test_all, probs_tuned))

# Feature importance assessment
importance = permutation_importance(best_model, X_test_selected, y_test_all, n_repeats=10, random_state=42)
feature_importances = pd.DataFrame({
    'Feature': selected_features,
    'Importance': importance.importances_mean
})
feature_importances = feature_importances.sort_values(by='Importance', ascending=False)
print("\nFeature Importances:\n", feature_importances)

Selected features: ['player_id', 'age_playerStat', 'experience', 'gs', 'mp_per_game', 'fg_per_game', 'fga_per_game', 'x2p_per_game', 'x2pa_per_game', 'player_ft_per_game', 'fta_per_game', 'player_drb_per_game', 'trb_per_game', 'ast_per_game', 'stl_per_game', 'player_tov_per_game', 'pf_per_game', 'pts_per_game', 'mp', 'percent_fga_from_x2p_range', 'percent_fga_from_x10_16_range', 'percent_assisted_x2p_fg', 'percent_assisted_x3p_fg', 'percent_corner_3s_of_3pa', 'fg', 'x2p', 'ft', 'ft_percent_totals', 'orb', 'drb', 'trb', 'ast', 'tov', 'pf', 'pts', 'playoffs', 'age_teamStat', 'team_pace', 'team_x3p_ar', 'team_opp_drb_percent', 'opp_ft_fga', 'player_per', 'player_drb_percent', 'player_trb_percent', 'player_ast_percent', 'player_blk_percent', 'player_tov_percent', 'player_usg_percent', 'player_ows', 'player_dws', 'player_ws', 'player_ws_48', 'player_obpm', 'player_dbpm', 'player_bpm', 'player_vorp', 'points_per_minute', 'efficiency_team_success', 'player_per_squared', 'player_vorp_cubed', 'player_team_points_share']
Fitting 5 folds for each of 10 candidates, totalling 50 fits
Best parameters: {'classifier__C': 100, 'classifier__penalty': 'l2', 'classifier__solver': 'liblinear'}
Best cross-validation score: 0.99
              precision    recall  f1-score   support

           0       1.00      0.95      0.97      7712
           1       0.20      0.89      0.32       114

    accuracy                           0.95      7826
   macro avg       0.60      0.92      0.65      7826
weighted avg       0.99      0.95      0.96      7826

ROC AUC score: 0.9739276224794351

Feature Importances:
                           Feature    Importance
30                            trb  3.117301e-01
29                            drb  2.768336e-01
6                    fga_per_game  1.499617e-01
28                            orb  1.429849e-01
54                     player_bpm  1.422566e-01
5                     fg_per_game  1.358037e-01
2                      experience  1.131357e-01
7                    x2p_per_game  1.065040e-01
17                   pts_per_game  1.026706e-01
1                  age_playerStat  1.022745e-01
52                    player_obpm  7.737030e-02
48                     player_ows  5.894454e-02
8                   x2pa_per_game  4.883721e-02
9              player_ft_per_game  3.373371e-02
39           team_opp_drb_percent  3.087145e-02
58             player_per_squared  2.407360e-02
34                            pts  2.406082e-02
51                   player_ws_48  2.397138e-02
50                      player_ws  2.384360e-02
53                    player_dbpm  1.896243e-02
49                     player_dws  1.842576e-02
24                             fg  1.772297e-02
0                       player_id  1.745464e-02
31                            ast  1.558906e-02
16                    pf_per_game  1.290570e-02
18                             mp  7.794531e-03
12                   trb_per_game  5.852287e-03
26                             ft  5.801176e-03
43             player_trb_percent  4.574495e-03
42             player_drb_percent  4.459494e-03
38                    team_x3p_ar  2.913366e-03
14                   stl_per_game  2.657807e-03
10                   fta_per_game  2.619474e-03
11            player_drb_per_game  1.431127e-03
3                              gs  1.380015e-03
35                       playoffs  8.305648e-04
19     percent_fga_from_x2p_range  7.794531e-04
13                   ast_per_game  5.877843e-04
46             player_tov_percent  3.577818e-04
27              ft_percent_totals  1.916688e-04
56              points_per_minute  1.788909e-04
45             player_blk_percent  1.150013e-04
36                   age_teamStat  0.000000e+00
20  percent_fga_from_x10_16_range -3.330669e-17
37                      team_pace -1.277792e-05
32                            tov -7.538973e-04
44             player_ast_percent -9.200102e-04
40                     opp_ft_fga -9.327881e-04
59              player_vorp_cubed -9.966777e-04
21        percent_assisted_x2p_fg -1.558906e-03
23       percent_corner_3s_of_3pa -1.840020e-03
22        percent_assisted_x3p_fg -1.878354e-03
33                             pf -3.258370e-03
25                            x2p -5.162280e-03
57        efficiency_team_success -8.101201e-03
4                     mp_per_game -8.177869e-03
47             player_usg_percent -8.765653e-03
55                    player_vorp -9.506772e-03
60       player_team_points_share -1.015845e-02
15            player_tov_per_game -1.078456e-02
41                     player_per -1.115512e-02


# Create a copy of the 2023 data
data_2023_tuned = modern_data[modern_data['season'] == 2023].copy()

# Apply the same feature engineering steps as done during model training
data_2023_tuned['points_per_minute'] = data_2023_tuned['pts_per_game'] / data_2023_tuned['mp_per_game']
data_2023_tuned['efficiency_team_success'] = data_2023_tuned['player_per'] * data_2023_tuned['win_percentage']
data_2023_tuned['player_per_squared'] = data_2023_tuned['player_per'] ** 2
data_2023_tuned['player_vorp_cubed'] = data_2023_tuned['player_vorp'] ** 3
data_2023_tuned['player_team_points_share'] = data_2023_tuned['pts_per_game'] / data_2023_tuned.groupby('team')['pts_per_game'].transform('mean')

# Create a list of common features present in both features_all and data_2023_tuned.columns
common_features = [col for col in features_all if col in data_2023_tuned.columns]

# Prepare the features for prediction using common_features
X_2023_tuned = data_2023_tuned[common_features]

# Impute missing values using the same imputer fitted on the training data
X_2023_imputed = imputer.transform(X_2023_tuned)

# Transform the data using the same feature selector used during training
X_2023_selected = selector.transform(X_2023_imputed)

# Use the best_model from the grid search to predict the 2023 MVP
predictions_2023_tuned = best_model.predict(X_2023_selected)
probs_2023_tuned = best_model.predict_proba(X_2023_selected)[:, 1]

# Add predictions and probabilities to the 2023 data for analysis
data_2023_tuned['predicted_MVP_tuned'] = predictions_2023_tuned
data_2023_tuned['MVP_probability_tuned'] = probs_2023_tuned

# Display the results, possibly sorting them to show top MVP candidates
top_candidates_2023_tuned = data_2023_tuned.sort_values(by='MVP_probability_tuned', ascending=False)
print(top_candidates_2023_tuned[['player', 'MVP_probability_tuned']])

                     player  MVP_probability_tuned
1414  Giannis Antetokounmpo           9.999935e-01
1911           Nikola Jokić           9.999884e-01
1741           LeBron James           9.997334e-01
1691           Kevin Durant           9.996930e-01
1688           Kevin Durant           9.996097e-01
...                     ...                    ...
1503         Jamaree Bouyea           4.458844e-08
1878            Moses Brown           7.247953e-09
1103       Alondes Williams           3.698974e-09
1285          Deonte Burton           1.101422e-09
1186           Chima Moneke           3.340905e-10

[888 rows x 2 columns]


# Model Evaluation
y_true_tuned = data_2023_tuned['Is_MVP']
y_scores_tuned = probs_2023_tuned

conf_matrix_tuned = confusion_matrix(y_true_tuned, predictions_2023_tuned)
print("Confusion Matrix:\n", conf_matrix_tuned)

class_report_tuned = classification_report(y_true_tuned, predictions_2023_tuned)
print("Classification Report:\n", class_report_tuned)

fpr_tuned, tpr_tuned, thresholds_tuned = roc_curve(y_true_tuned, y_scores_tuned)
roc_auc_tuned = auc(fpr_tuned, tpr_tuned)

plt.figure(figsize=(8, 6))
plt.plot(fpr_tuned, tpr_tuned, color='darkorange', lw=2, label='ROC curve (area = %0.2f)' % roc_auc_tuned)
plt.plot([0, 1], [0, 1], color='navy', lw=2, linestyle='--')
plt.grid(True)
plt.xlim([0.0, 1.0])
plt.ylim([0.0, 1.05])
plt.xlabel('False Positive Rate')
plt.ylabel('True Positive Rate')
plt.title('Receiver Operating Characteristic')
plt.legend(loc="lower right")
plt.show()

precision_tuned, recall_tuned, _ = precision_recall_curve(y_true_tuned, y_scores_tuned)
pr_auc_tuned = auc(recall_tuned, precision_tuned)

plt.figure(figsize=(8, 6))
plt.plot(recall_tuned, precision_tuned, color='blue', lw=2, label='Precision-Recall curve (area = %0.2f)' % pr_auc_tuned)
plt.grid(True)
plt.xlabel('Recall')
plt.ylabel('Precision')
plt.title('Precision-Recall Curve')
plt.legend(loc="lower left")
plt.show()

accuracy_tuned = accuracy_score(y_true_tuned, predictions_2023_tuned)

model_metrics['Tuned_Model'] = {
    'Confusion_Matrix': conf_matrix_tuned,
    'Classification_Report': class_report_tuned,
    'ROC_AUC': roc_auc_tuned,
    'Precision_Recall_AUC': pr_auc_tuned
}

# Update model_summary DataFrame
tuned_metrics = pd.DataFrame({
    'Model': ['Tuned_Model'],
    'Accuracy': [accuracy_tuned],
    'ROC AUC': [roc_auc_tuned]
})

model_summary = pd.concat([model_summary, tuned_metrics], ignore_index=True)

Confusion Matrix:
 [[817  52]
 [  1  18]]
Classification Report:
               precision    recall  f1-score   support

           0       1.00      0.94      0.97       869
           1       0.26      0.95      0.40        19

    accuracy                           0.94       888
   macro avg       0.63      0.94      0.69       888
weighted avg       0.98      0.94      0.96       888


X_new = modern_data[features_all]
y_new = modern_data['Is_MVP']


# Create preprocessing and modeling pipeline again
pipeline_new = Pipeline([
    ('imputer', SimpleImputer(strategy='median')),
    ('scaler', StandardScaler()),
    ('classifier', LogisticRegression(max_iter=1000))
])

# Splitting the data again
X_train_new, X_test_new, y_train_new, y_test_new = train_test_split(X_new, y_new, test_size=0.2, random_state=42)

# Fitting the model again
pipeline_new.fit(X_train_new, y_train_new)

# Making predictions
predictions_new = pipeline_new.predict(X_test_new)

# Evaluating the model
print(features_all)
print("Basic Model Accuracy (All Features):", accuracy_score(y_test_new, predictions_new))
print(classification_report(y_test_new, predictions_new))


# Create a copy of the 2023 data
data_2023_new = modern_data[modern_data['season'] == 2023].copy()

# Apply the same feature engineering steps as done during model training
data_2023_new['points_per_minute'] = data_2023_new['pts_per_game'] / data_2023_new['mp_per_game']
data_2023_new['efficiency_team_success'] = data_2023_new['player_per'] * data_2023_new['win_percentage']
data_2023_new['player_per_squared'] = data_2023_new['player_per'] ** 2
data_2023_new['player_vorp_cubed'] = data_2023_new['player_vorp'] ** 3
data_2023_new['player_team_points_share'] = data_2023_new['pts_per_game'] / data_2023_new.groupby('team')['pts_per_game'].transform('mean')



X_2023_new = data_2023_new[features_all]



# Predict the 2023 season MVP using the feature-engineered model
predictions_2023_new = pipeline_new.predict(X_2023_new)
probs_2023_new = pipeline_new.predict_proba(X_2023_new)[:, 1]

# Adding predictions and probabilities to the data for 2023
data_2023_new['predicted_MVP_new'] = predictions_2023_new
data_2023_new['MVP_probability_new'] = probs_2023_new

# Filtering for players with non-zero MVP probability and sort by the highest probabilities but for this model version
top_candidates_new = data_2023_new[data_2023_new['predicted_MVP_new'] == 1].sort_values('MVP_probability_new', ascending=False)

print(top_candidates_new[['player', 'MVP_probability_new']])

['player_id', 'age_playerStat', 'experience', 'g', 'gs', 'mp_per_game', 'fg_per_game', 'fga_per_game', 'fg_percent_per_game', 'player_x3p_per_game', 'x3pa_per_game', 'x3p_percent', 'x2p_per_game', 'x2pa_per_game', 'x2p_percent', 'player_e_fg_percent', 'player_ft_per_game', 'fta_per_game', 'ft_percent', 'player_orb_per_game', 'player_drb_per_game', 'trb_per_game', 'ast_per_game', 'stl_per_game', 'blk_per_game', 'player_tov_per_game', 'pf_per_game', 'pts_per_game', 'mp', 'fg_percent', 'avg_dist_fga', 'percent_fga_from_x2p_range', 'percent_fga_from_x0_3_range', 'percent_fga_from_x3_10_range', 'percent_fga_from_x10_16_range', 'percent_fga_from_x16_3p_range', 'percent_fga_from_x3p_range', 'fg_percent_from_x2p_range', 'fg_percent_from_x0_3_range', 'fg_percent_from_x3_10_range', 'fg_percent_from_x10_16_range', 'fg_percent_from_x16_3p_range', 'fg_percent_from_x3p_range', 'percent_assisted_x2p_fg', 'percent_assisted_x3p_fg', 'percent_dunks_of_fga', 'num_of_dunks', 'percent_corner_3s_of_3pa', 'corner_3_point_percent', 'num_heaves_attempted', 'num_heaves_made', 'fg', 'fga', 'fg_percent_totals', 'x3p', 'x3pa', 'x3p_percent_totals', 'x2p', 'x2pa', 'x2p_percent_totals', 'ft', 'fta', 'ft_percent_totals', 'orb', 'drb', 'trb', 'ast', 'stl', 'blk', 'tov', 'pf', 'pts', 'playoffs', 'age_teamStat', 'team_wins', 'team_losses', 'team_pythagorean_wins', 'team_pythagorean_losses', 'team_margin_of_victory', 'team_strength_of_schedule', 'team_simple_rating_system', 'team_offensive_rating', 'team_defensive_rating', 'team_net_rating', 'team_pace', 'team_free_throw_rate', 'team_x3p_ar', 'team_true_shooting_percentage', 'team_e_fg_percent', 'team_tov_percent', 'team_orb_percent', 'team_free_throws_per_field_goal_attempt', 'team_opponent_efg_percent', 'team_opponent_tov_percent', 'team_opp_drb_percent', 'opp_ft_fga', 'win_percentage', 'player_per', 'player_ts_percent', 'player_x3p_ar', 'player_f_tr', 'player_orb_percent', 'player_drb_percent', 'player_trb_percent', 'player_ast_percent', 'player_stl_percent', 'player_blk_percent', 'player_tov_percent', 'player_usg_percent', 'player_ows', 'player_dws', 'player_ws', 'player_ws_48', 'player_obpm', 'player_dbpm', 'player_bpm', 'player_vorp', 'points_per_minute', 'efficiency_team_success', 'player_per_squared', 'player_vorp_cubed', 'player_team_points_share']
Basic Model Accuracy (All Features): 0.9905443393815487
              precision    recall  f1-score   support

           0       0.99      1.00      1.00      7712
           1       0.72      0.57      0.64       114

    accuracy                           0.99      7826
   macro avg       0.86      0.78      0.82      7826
weighted avg       0.99      0.99      0.99      7826

                     player  MVP_probability_new
1911           Nikola Jokić             0.991590
1414  Giannis Antetokounmpo             0.991424
1556            Joel Embiid             0.961598
1741           LeBron James             0.952313
1691           Kevin Durant             0.947272
1692           Kevin Durant             0.939353
1688           Kevin Durant             0.911138
1689           Kevin Durant             0.898374
1693           Kevin Durant             0.777994
1690           Kevin Durant             0.666675
1508           James Harden             0.599046
1606           Jrue Holiday             0.590072
1749            Luka Dončić             0.585704
1996      Russell Westbrook             0.530652


# Model Evaluation
y_true_new = data_2023_new['Is_MVP']
y_scores_new = probs_2023_new

conf_matrix_new = confusion_matrix(y_true_new, predictions_2023_new)
class_report_new = classification_report(y_true_new, predictions_2023_new)
fpr_new, tpr_new, thresholds_new = roc_curve(y_true_new, y_scores_new)
roc_auc_new = auc(fpr_new, tpr_new)
precision_new, recall_new, _ = precision_recall_curve(y_true_new, y_scores_new)
pr_auc_new = auc(recall_new, precision_new)
accuracy_new = accuracy_score(y_true_new, predictions_2023_new)

model_metrics['New_Model'] = {
    'Confusion_Matrix': conf_matrix_new,
    'Classification_Report': class_report_new,
    'ROC_AUC': roc_auc_new,
    'Precision_Recall_AUC': pr_auc_new
}


print("Model Metrics:")
for model, metrics in model_metrics.items():
    print(f"\n{model}:")
    print("Confusion Matrix:\n", metrics['Confusion_Matrix'])
    print("Classification Report:\n", metrics['Classification_Report'])
    print("ROC AUC:", metrics['ROC_AUC'])
    print("Precision-Recall AUC:", metrics['Precision_Recall_AUC'])

print("\nModel Summary:")
print(model_summary)

# Function to extract precision, recall, and F1-score from the classification report
def extract_metrics(report):
    lines = report.split('\n')
    precision_non_mvp = float(lines[2].split()[1])
    recall_non_mvp = float(lines[2].split()[2])
    f1_non_mvp = float(lines[2].split()[3])
    precision_mvp = float(lines[3].split()[1])
    recall_mvp = float(lines[3].split()[2])
    f1_mvp = float(lines[3].split()[3])
    return precision_non_mvp, recall_non_mvp, f1_non_mvp, precision_mvp, recall_mvp, f1_mvp

# Clear the model_summary DataFrame
model_summary = model_summary[:0]

# Update model_summary with metrics for each model
for model, metrics in model_metrics.items():
    accuracy = metrics['Confusion_Matrix'].diagonal().sum() / metrics['Confusion_Matrix'].sum()
    roc_auc = metrics['ROC_AUC']
    precision_non_mvp, recall_non_mvp, f1_non_mvp, precision_mvp, recall_mvp, f1_mvp = extract_metrics(metrics['Classification_Report'])

    model_summary = pd.concat([model_summary, pd.DataFrame({
        'Model': [model],
        'Accuracy': [accuracy],
        'ROC AUC': [roc_auc],
        'Precision_Non_MVP': [precision_non_mvp],
        'Recall_Non_MVP': [recall_non_mvp],
        'F1_Non_MVP': [f1_non_mvp],
        'Precision_MVP': [precision_mvp],
        'Recall_MVP': [recall_mvp],
        'F1_MVP': [f1_mvp]
    })], ignore_index=True)

Model Metrics:

Base_Model:
Confusion Matrix:
 [[865   4]
 [ 14   5]]
Classification Report:
               precision    recall  f1-score   support

           0       0.98      1.00      0.99       869
           1       0.56      0.26      0.36        19

    accuracy                           0.98       888
   macro avg       0.77      0.63      0.67       888
weighted avg       0.97      0.98      0.98       888

ROC AUC: 0.9648113378959481
Precision-Recall AUC: 0.5065986756334937

Feature_Engineered_Model:
Confusion Matrix:
 [[866   3]
 [ 14   5]]
Classification Report:
               precision    recall  f1-score   support

           0       0.98      1.00      0.99       869
           1       0.62      0.26      0.37        19

    accuracy                           0.98       888
   macro avg       0.80      0.63      0.68       888
weighted avg       0.98      0.98      0.98       888

ROC AUC: 0.9562715765247412
Precision-Recall AUC: 0.5333442736886027

Tuned_Model:
Confusion Matrix:
 [[817  52]
 [  1  18]]
Classification Report:
               precision    recall  f1-score   support

           0       1.00      0.94      0.97       869
           1       0.26      0.95      0.40        19

    accuracy                           0.94       888
   macro avg       0.63      0.94      0.69       888
weighted avg       0.98      0.94      0.96       888

ROC AUC: 0.9944885228029798
Precision-Recall AUC: 0.8766124399130635

New_Model:
Confusion Matrix:
 [[867   2]
 [  7  12]]
Classification Report:
               precision    recall  f1-score   support

           0       0.99      1.00      0.99       869
           1       0.86      0.63      0.73        19

    accuracy                           0.99       888
   macro avg       0.92      0.81      0.86       888
weighted avg       0.99      0.99      0.99       888

ROC AUC: 0.99351947186724
Precision-Recall AUC: 0.8952621925603874

Model Summary:
                      Model  Accuracy   ROC AUC  Precision_Non_MVP  Recall_Non_MVP  F1_Non_MVP  \
0                Base_Model  0.979730  0.964811               0.98            1.00        0.99   
1  Feature_Engineered_Model  0.980856  0.956272               0.98            1.00        0.99   
2               Tuned_Model  0.940315  0.994489               1.00            0.94        0.97   
3                 New_Model  0.989865  0.993519               0.99            1.00        0.99   

   Precision_MVP  Recall_MVP  F1_MVP  
0           0.56        0.26    0.36  
1           0.62        0.26    0.37  
2           0.26        0.95    0.40  
3           0.86        0.63    0.73

	season	lg	team	abbreviation	playoffs	age	w	l	pw	pl	mov	sos	srs	o_rtg	d_rtg	n_rtg	pace	f_tr	x3p_ar	ts_percent	e_fg_percent	tov_percent	orb_percent	ft_fga	opp_e_fg_percent	opp_tov_percent	opp_drb_percent	opp_ft_fga	arena	attend	attend_g
0	2024	NBA	Atlanta Hawks	ATL	False	26.1	26.0	33.0	26.0	33.0	-2.14	-0.09	-2.22	118.4	120.5	-2.1	101.3	0.266	0.403	0.578	0.538	11.3	27.9	0.216	0.575	12.4	74.2	0.190	State Farm Arena	524969.0	16934.0
1	2024	NBA	Boston Celtics	BOS	False	28.4	46.0	12.0	45.0	13.0	10.43	0.05	10.48	122.2	111.6	10.6	97.9	0.242	0.471	0.608	0.574	11.2	24.6	0.196	0.519	10.5	76.4	0.155	TD Garden	574680.0	19156.0
2	2024	NBA	Brooklyn Nets	BRK	False	26.4	23.0	36.0	25.0	34.0	-2.39	0.43	-1.96	114.2	116.6	-2.4	97.7	0.226	0.415	0.563	0.533	11.3	25.1	0.171	0.544	11.3	76.4	0.206	Barclays Center	544567.0	17567.0
3	2024	NBA	Chicago Bulls	CHI	False	28.0	28.0	31.0	27.0	32.0	-1.24	-0.30	-1.54	114.2	115.4	-1.2	96.3	0.236	0.364	0.564	0.529	11.1	25.3	0.188	0.555	13.0	76.7	0.198	United Center	610496.0	20350.0
4	2024	NBA	Charlotte Hornets	CHO	False	25.2	15.0	44.0	13.0	46.0	-10.54	-0.03	-10.57	109.3	120.0	-10.7	98.0	0.214	0.378	0.557	0.525	12.6	22.2	0.168	0.572	12.2	75.3	0.202	Spectrum Center	486236.0	16208.0

	season	award	player	age	tm	first	pts_won	pts_max	share	winner	seas_id	player_id
0	2023	dpoy	Jaren Jackson Jr.	23	MEM	56.0	391.0	500.0	0.782	True	30733	4632
1	2023	dpoy	Brook Lopez	34	MIL	31.0	309.0	500.0	0.618	False	30508	3801
2	2023	dpoy	Evan Mobley	21	CLE	8.0	101.0	500.0	0.202	False	30640	4931
3	2023	dpoy	Draymond Green	32	GSW	3.0	34.0	500.0	0.068	False	30622	4085
4	2023	dpoy	Bam Adebayo	25	MIA	1.0	18.0	500.0	0.036	False	30489	4472

	seas_id	season	player_id	player	pos	age	experience	lg	tm	g	gs	mp_per_game	fg_per_game	fga_per_game	fg_percent_per_game	x3p_per_game	x3pa_per_game	x3p_percent	x2p_per_game	x2pa_per_game	x2p_percent	e_fg_percent	ft_per_game	fta_per_game	ft_percent	orb_per_game	drb_per_game	trb_per_game	ast_per_game	stl_per_game	blk_per_game	tov_per_game	pf_per_game	pts_per_game	mp	fg_percent	avg_dist_fga	percent_fga_from_x2p_range	percent_fga_from_x0_3_range	percent_fga_from_x3_10_range	percent_fga_from_x10_16_range	percent_fga_from_x16_3p_range	percent_fga_from_x3p_range	fg_percent_from_x2p_range	fg_percent_from_x0_3_range	fg_percent_from_x3_10_range	fg_percent_from_x10_16_range	fg_percent_from_x16_3p_range	fg_percent_from_x3p_range	percent_assisted_x2p_fg	percent_assisted_x3p_fg	percent_dunks_of_fga	num_of_dunks	percent_corner_3s_of_3pa	corner_3_point_percent	num_heaves_attempted	fg	fga	fg_percent_totals	x3p	x3pa	x3p_percent_totals	x2p	x2pa	x2p_percent_totals	e_fg_percent_totals	ft	fta	ft_percent_totals	orb	drb	trb	ast	stl	blk	tov	pf	pts	award	pts_won	first	pts_max	share	winner
957	30733	2023	4632	Jaren Jackson Jr.	C	23.0	5	NBA	MEM	63	63.0	28.4	6.6	13.0	0.506	1.6	4.5	0.355	5.0	8.6	0.585	0.567	3.8	4.9	0.788	1.7	5.0	6.8	1.0	1.0	3.0	1.7	3.6	18.6	1787.0	0.506	11.4	0.657	0.304	0.319	0.028	0.006	0.343	0.585	0.704	0.496	0.348	0.400	0.355	0.595	0.960	0.112	83.0	0.209	0.322	3.0	416.0	822.0	0.506	100.0	282.0	0.355	316.0	540.0	0.585	0.567	241.0	306.0	0.788	108.0	318.0	426.0	60.0	65.0	189.0	107.0	227.0	1173.0	dpoy	391.0	56.0	500.0	0.782	True
986	30764	2023	4417	Joel Embiid	C	28.0	7	NBA	PHI	66	66.0	34.6	11.0	20.1	0.548	1.0	3.0	0.330	10.0	17.1	0.587	0.573	10.0	11.7	0.857	1.7	8.4	10.2	4.2	1.0	1.7	3.4	3.1	33.1	2284.0	0.548	11.3	0.849	0.273	0.196	0.245	0.135	0.151	0.587	0.810	0.435	0.509	0.497	0.330	0.600	0.894	0.066	75.0	0.040	0.250	4.0	728.0	1328.0	0.548	66.0	200.0	0.330	662.0	1128.0	0.587	0.573	661.0	771.0	0.857	113.0	557.0	670.0	274.0	66.0	112.0	226.0	205.0	2183.0	nba mvp	915.0	73.0	1000.0	0.915	True
1092	30871	2023	4535	Lauri Markkanen	SF	25.0	6	NBA	UTA	66	66.0	34.4	8.7	17.3	0.499	3.0	7.7	0.391	5.6	9.6	0.585	0.586	5.3	6.0	0.875	2.0	6.7	8.6	1.9	0.6	0.6	1.9	2.1	25.6	2273.0	0.499	14.6	0.554	0.213	0.251	0.053	0.037	0.446	0.585	0.717	0.505	0.525	0.452	0.391	0.650	0.935	0.112	111.0	0.184	0.489	0.0	571.0	1145.0	0.499	200.0	511.0	0.391	371.0	634.0	0.585	0.586	349.0	399.0	0.875	130.0	440.0	570.0	123.0	42.0	38.0	127.0	137.0	1691.0	mip	430.0	69.0	500.0	0.860	True
1111	30890	2023	4424	Malcolm Brogdon	PG	30.0	7	NBA	BOS	67	0.0	26.0	5.3	10.9	0.484	2.0	4.4	0.444	3.3	6.5	0.510	0.574	2.4	2.7	0.870	0.6	3.6	4.2	3.7	0.7	0.3	1.5	1.6	14.9	1744.0	0.484	14.6	0.594	0.195	0.240	0.083	0.075	0.406	0.510	0.622	0.432	0.475	0.509	0.444	0.342	0.659	0.007	5.0	0.158	0.447	0.0	354.0	732.0	0.484	132.0	297.0	0.444	222.0	435.0	0.510	0.574	160.0	184.0	0.870	42.0	238.0	280.0	248.0	45.0	18.0	98.0	109.0	1000.0	smoy	408.0	60.0	500.0	0.816	True
1203	30981	2023	5089	Paolo Banchero	PF	20.0	1	NBA	ORL	72	72.0	33.8	6.7	15.6	0.427	1.2	4.0	0.298	5.5	11.6	0.471	0.465	5.5	7.4	0.738	1.2	5.7	6.9	3.7	0.8	0.5	2.8	2.2	20.0	2430.0	0.427	12.4	0.746	0.242	0.253	0.124	0.127	0.254	0.471	0.658	0.380	0.410	0.352	0.298	0.404	0.706	0.054	52.0	0.123	0.343	1.0	479.0	1122.0	0.427	85.0	285.0	0.298	394.0	837.0	0.471	0.465	394.0	534.0	0.738	84.0	413.0	497.0	269.0	60.0	39.0	200.0	160.0	1437.0	nba roy	494.0	98.0	500.0	0.988	True

	seas_id	season	player_id	player	pos	age	experience	lg	tm	g	mp	fg_percent	avg_dist_fga	percent_fga_from_x2p_range	percent_fga_from_x0_3_range	percent_fga_from_x3_10_range	percent_fga_from_x10_16_range	percent_fga_from_x16_3p_range	percent_fga_from_x3p_range	fg_percent_from_x2p_range	fg_percent_from_x0_3_range	fg_percent_from_x3_10_range	fg_percent_from_x10_16_range	fg_percent_from_x16_3p_range	fg_percent_from_x3p_range	percent_assisted_x2p_fg	percent_assisted_x3p_fg	percent_dunks_of_fga	num_of_dunks	percent_corner_3s_of_3pa	corner_3_point_percent	num_heaves_attempted
0	31136	2024	5025	A.J. Green	SG	24	2	NBA	MIL	39	357	0.438	24.0	0.133	0.023	0.023	0.023	0.063	0.867	0.529	1.000	0.333	0.333	0.500	0.423	1.000	0.915	0.000	0	0.216	0.542	0
1	31137	2024	5026	A.J. Lawson	SG	23	2	NBA	DAL	28	231	0.471	12.7	0.529	0.329	0.188	0.012	0.000	0.471	0.600	0.857	0.188	0.000	NaN	0.325	0.519	1.000	0.129	10	0.650	0.308	0
2	31138	2024	5027	AJ Griffin	SF	20	2	NBA	ATL	18	132	0.289	21.3	0.267	0.044	0.111	0.044	0.067	0.733	0.333	1.000	0.200	0.000	0.333	0.273	0.750	0.889	0.022	1	0.242	0.250	0
3	31139	2024	4219	Aaron Gordon	PF	28	10	NBA	DEN	54	1699	0.557	7.4	0.814	0.537	0.203	0.053	0.021	0.186	0.618	0.758	0.380	0.214	0.364	0.293	0.644	0.793	0.262	128	0.364	0.389	1
4	31140	2024	4582	Aaron Holiday	PG	27	6	NBA	HOU	56	967	0.455	17.4	0.465	0.132	0.168	0.119	0.045	0.535	0.507	0.634	0.462	0.514	0.286	0.410	0.274	0.838	0.006	2	0.229	0.447	1

	seas_id	season	player_id	player	pos	age	experience	lg	tm	g	gs	mp_per_game	fg_per_game	fga_per_game	fg_percent	x3p_per_game	x3pa_per_game	x3p_percent	x2p_per_game	x2pa_per_game	x2p_percent	e_fg_percent	ft_per_game	fta_per_game	ft_percent	orb_per_game	drb_per_game	trb_per_game	ast_per_game	stl_per_game	blk_per_game	tov_per_game	pf_per_game	pts_per_game
26500	5554	1973	1470	Pete Smith	PF	NaN	1	ABA	SDA	5	NaN	6.4	0.4	2.4	0.167	0.0	0.4	0.0	0.4	2.0	0.200	0.167	0.0	0.0	NaN	0.6	1.0	1.6	0.2	NaN	NaN	1.0	1.0	0.8
27132	4391	1971	1253	Clarence Brookins	F	NaN	1	ABA	FLO	8	NaN	7.4	1.0	3.3	0.308	0.0	0.1	0.0	1.0	3.1	0.320	0.308	0.6	1.5	0.417	1.0	0.5	1.5	0.1	NaN	NaN	0.0	0.6	2.6
27286	4545	1971	1291	Jim Wilson	G	NaN	1	ABA	PTC	6	NaN	7.3	0.2	1.3	0.125	0.0	0.0	NaN	0.2	1.3	0.125	0.125	0.7	1.0	0.667	0.2	0.8	1.0	1.3	NaN	NaN	0.8	0.5	1.0
27888	4293	1970	1231	Walter Byrd	PF	NaN	1	ABA	MMF	22	NaN	5.0	0.6	2.0	0.326	0.0	0.0	0.0	0.6	1.9	0.333	0.326	0.2	0.8	0.294	0.4	0.8	1.1	0.3	NaN	NaN	0.4	1.0	1.5
27899	4304	1970	1233	Wilbur Kirkland	F	NaN	1	ABA	PTP	2	NaN	13.5	1.5	3.5	0.429	0.0	0.0	NaN	1.5	3.5	0.429	0.429	0.0	0.0	NaN	0.5	5.0	5.5	0.5	NaN	NaN	1.0	2.5	3.0
27972	3550	1969	1081	Charles Parks	F	NaN	1	ABA	DNR	2	NaN	2.5	0.0	0.5	0.000	0.0	0.0	NaN	0.0	0.5	0.000	0.000	0.0	0.0	NaN	0.0	0.0	0.0	0.0	NaN	NaN	0.0	0.5	0.0
28349	3141	1968	904	Bill Allen	C	NaN	1	ABA	ANA	38	NaN	22.6	3.2	7.4	0.429	0.1	0.1	1.0	3.1	7.3	0.424	0.432	1.5	2.6	0.586	NaN	NaN	7.1	0.6	NaN	NaN	1.0	3.2	7.9
28378	3170	1968	922	Bobby Wilson	PF	NaN	1	ABA	DLC	69	NaN	22.6	3.3	8.4	0.389	0.0	0.0	0.5	3.3	8.4	0.389	0.390	2.4	3.8	0.615	NaN	NaN	6.5	0.8	NaN	NaN	1.8	3.0	8.9
28404	3196	1968	939	Darrell Hardy	F	NaN	1	ABA	HSM	17	NaN	10.1	1.9	4.4	0.432	0.0	0.1	0.0	1.9	4.3	0.438	0.432	1.5	2.1	0.714	NaN	NaN	3.3	0.5	NaN	NaN	0.7	1.4	5.2
28414	3206	1968	945	Dexter Westbrook	F	NaN	1	ABA	TOT	12	NaN	10.6	1.6	3.3	0.487	0.0	0.0	NaN	1.6	3.3	0.487	0.487	0.8	1.2	0.714	NaN	NaN	1.9	0.4	NaN	NaN	1.1	2.5	4.0
28415	3207	1968	945	Dexter Westbrook	F	NaN	1	ABA	NJA	7	NaN	8.4	1.7	2.7	0.632	0.0	0.0	NaN	1.7	2.7	0.632	0.632	1.0	1.3	0.778	NaN	NaN	1.3	0.3	NaN	NaN	1.0	2.3	4.4
28416	3208	1968	945	Dexter Westbrook	F	NaN	1	ABA	PTP	5	NaN	13.6	1.4	4.0	0.350	0.0	0.0	NaN	1.4	4.0	0.350	0.350	0.6	1.0	0.600	NaN	NaN	2.8	0.6	NaN	NaN	1.2	2.8	3.4
28418	3210	1968	946	Dick Lee	F	NaN	1	ABA	ANA	2	NaN	1.0	0.0	0.0	NaN	0.0	0.0	NaN	0.0	0.0	NaN	NaN	0.0	0.0	NaN	NaN	NaN	0.5	0.5	NaN	NaN	0.0	0.0	0.0
28435	3227	1968	952	Errol Palmer	SF	NaN	1	ABA	MNM	63	NaN	18.9	2.6	7.2	0.364	0.0	0.0	NaN	2.6	7.2	0.364	0.364	2.7	4.0	0.672	NaN	NaN	7.5	1.4	NaN	NaN	1.2	2.7	7.9
28452	3244	1968	956	Gary Turner	F	NaN	1	ABA	HSM	2	NaN	10.5	1.0	1.0	1.000	0.0	0.0	NaN	1.0	1.0	1.000	1.000	1.0	1.5	0.667	NaN	NaN	1.5	0.0	NaN	NaN	1.0	1.0	3.0
28607	3399	1968	1023	R.B. Lynam	G	NaN	1	ABA	DNR	7	NaN	5.6	0.7	2.4	0.294	0.0	0.1	0.0	0.7	2.3	0.313	0.294	1.0	1.1	0.875	NaN	NaN	0.7	0.0	NaN	NaN	0.7	1.4	2.4
28609	3401	1968	1025	Randy Stoll	PF	NaN	1	ABA	ANA	25	NaN	16.1	2.6	5.5	0.478	0.0	0.0	NaN	2.6	5.5	0.478	0.478	0.4	1.0	0.400	NaN	NaN	3.6	0.5	NaN	NaN	1.1	1.7	5.7
28618	3410	1968	1031	Richie Moore	SG	NaN	1	ABA	DNR	18	NaN	11.7	1.3	3.9	0.338	0.0	0.1	0.0	1.3	3.8	0.348	0.338	1.2	1.6	0.750	NaN	NaN	1.1	0.4	NaN	NaN	1.3	0.9	3.8
28690	3482	1968	1067	Willis Thomas	SG	NaN	1	ABA	TOT	62	NaN	17.2	3.9	8.9	0.442	0.0	0.0	0.0	3.9	8.8	0.444	0.442	1.1	1.5	0.742	NaN	NaN	1.8	0.9	NaN	NaN	1.5	1.7	9.0
28691	3483	1968	1067	Willis Thomas	SG	NaN	1	ABA	DNR	24	NaN	22.6	5.7	12.7	0.446	0.0	0.1	0.0	5.7	12.6	0.449	0.446	0.9	1.3	0.688	NaN	NaN	2.4	1.0	NaN	NaN	2.1	1.9	12.3
28692	3484	1968	1067	Willis Thomas	SG	NaN	1	ABA	ANA	38	NaN	13.8	2.8	6.4	0.437	0.0	0.0	0.0	2.8	6.4	0.439	0.437	1.2	1.6	0.770	NaN	NaN	1.5	0.8	NaN	NaN	1.1	1.6	6.9
31716	107	1947	89	Howie McCarty	F-G	NaN	1	BAA	DTF	19	NaN	NaN	0.5	4.3	0.122	NaN	NaN	NaN	0.5	4.3	0.122	0.122	0.1	0.5	0.100	NaN	NaN	NaN	0.1	NaN	NaN	NaN	1.2	1.1

	player	season	percent_fga_from_x3p_range	fg_percent_from_x3p_range
0	Stephen Curry	2016	0.554	0.454
1	Stephen Curry	2015	0.482	0.443
2	Giannis Antetokounmpo	2020	0.237	0.304
3	Giannis Antetokounmpo	2019	0.163	0.256
4	Nikola Jokić	2022	0.220	0.337
5	Nikola Jokić	2021	0.183	0.388

	player	season	e_fg_percent
0	Stephen Curry	2016	0.630
1	Stephen Curry	2015	0.594
2	Giannis Antetokounmpo	2020	0.589
3	Giannis Antetokounmpo	2019	0.599
4	Nikola Jokić	2022	0.620
5	Nikola Jokić	2021	0.602

	player	season	fga_per_game
0	Stephen Curry	2016	20.2
1	Stephen Curry	2015	16.8
2	Giannis Antetokounmpo	2020	19.7
3	Giannis Antetokounmpo	2019	17.3
4	Nikola Jokić	2022	17.7
5	Nikola Jokić	2021	18.0

	seas_id	season	player_id	player	pos	age	experience	lg	tm	g	gs	mp_per_game	fg_per_game	fga_per_game	fg_percent_per_game	x3p_per_game	x3pa_per_game	x3p_percent	x2p_per_game	x2pa_per_game	x2p_percent	e_fg_percent	ft_per_game	fta_per_game	ft_percent	orb_per_game	drb_per_game	trb_per_game	ast_per_game	stl_per_game	blk_per_game	tov_per_game	pf_per_game	pts_per_game	mp	fg_percent	avg_dist_fga	percent_fga_from_x2p_range	percent_fga_from_x0_3_range	percent_fga_from_x3_10_range	percent_fga_from_x10_16_range	percent_fga_from_x16_3p_range	percent_fga_from_x3p_range	fg_percent_from_x2p_range	fg_percent_from_x0_3_range	fg_percent_from_x3_10_range	fg_percent_from_x10_16_range	fg_percent_from_x16_3p_range	fg_percent_from_x3p_range	percent_assisted_x2p_fg	percent_assisted_x3p_fg	percent_dunks_of_fga	num_of_dunks	percent_corner_3s_of_3pa	corner_3_point_percent	num_heaves_attempted	num_heaves_made	fg	fga	fg_percent_totals	x3p	x3pa	x3p_percent_totals	x2p	x2pa	x2p_percent_totals	e_fg_percent_totals	ft	fta	ft_percent_totals	orb	drb	trb	ast	stl	blk	tov	pf	pts
23467	14541	1996	2218	A.C. Green	SF	32.0	11	NBA	PHO	82	36.0	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	2113.0	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	215.0	444.0	0.484	14.0	52.0	0.269	201.0	392.0	0.513	0.500	168.0	237.0	0.709	166.0	388.0	554.0	72.0	45.0	23.0	79.0	141.0	612.0
23468	14542	1996	2832	Aaron McKie	SG	23.0	2	NBA	POR	81	73.0	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	2259.0	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	337.0	722.0	0.467	38.0	117.0	0.325	299.0	605.0	0.494	0.493	152.0	199.0	0.764	86.0	218.0	304.0	205.0	92.0	21.0	135.0	205.0	864.0
23469	14543	1996	2761	Acie Earl	C	25.0	3	NBA	TOR	42	7.0	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	655.0	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	117.0	276.0	0.424	0.0	3.0	0.000	117.0	273.0	0.429	0.424	82.0	114.0	0.719	51.0	78.0	129.0	27.0	18.0	37.0	49.0	73.0	316.0
23470	14544	1996	2694	Adam Keefe	PF	25.0	4	NBA	UTA	82	0.0	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	1708.0	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	180.0	346.0	0.520	0.0	4.0	0.000	180.0	342.0	0.526	0.520	139.0	201.0	0.692	176.0	279.0	455.0	64.0	51.0	41.0	88.0	174.0	499.0
23471	14545	1996	2479	Adrian Caldwell	PF	29.0	4	NBA	IND	51	1.0	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	327.0	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	46.0	83.0	0.554	0.0	0.0	NaN	46.0	83.0	0.554	0.554	18.0	36.0	0.500	42.0	68.0	110.0	6.0	9.0	5.0	35.0	73.0	110.0
...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...
30200	8706	1981	1974	Wes Matthews	PG	21.0	1	NBA	WSB	45	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	1161.0	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	224.0	449.0	0.499	5.0	15.0	0.333	219.0	434.0	0.505	0.504	99.0	129.0	0.767	30.0	37.0	67.0	199.0	46.0	10.0	149.0	120.0	552.0
30201	8707	1981	1974	Wes Matthews	PG	21.0	1	NBA	ATL	34	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	1105.0	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	161.0	330.0	0.488	0.0	6.0	0.000	161.0	324.0	0.497	0.488	103.0	123.0	0.837	16.0	56.0	72.0	212.0	61.0	7.0	112.0	122.0	425.0
30202	8708	1981	1155	Wes Unseld	C	34.0	13	NBA	WSB	63	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	2032.0	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	225.0	429.0	0.524	2.0	4.0	0.500	223.0	425.0	0.525	0.527	55.0	86.0	0.640	207.0	466.0	673.0	170.0	52.0	36.0	97.0	171.0	507.0
30203	8709	1981	1844	Winford Boynes	SG	23.0	3	NBA	DAL	44	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	757.0	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	121.0	313.0	0.387	0.0	0.0	NaN	121.0	313.0	0.387	0.387	45.0	55.0	0.818	24.0	51.0	75.0	37.0	23.0	16.0	69.0	79.0	287.0
30204	8710	1981	1691	World B. Free	SG	27.0	6	NBA	GSW	65	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	2370.0	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	516.0	1157.0	0.446	5.0	31.0	0.161	511.0	1126.0	0.454	0.448	528.0	649.0	0.814	48.0	111.0	159.0	361.0	85.0	11.0	195.0	183.0	1565.0

	season	lg	team	abbreviation	playoffs	age	w	l	pw	pl	mov	sos	srs	o_rtg	d_rtg	n_rtg	pace	f_tr	x3p_ar	ts_percent	e_fg_percent	tov_percent	orb_percent	ft_fga	opp_e_fg_percent	opp_tov_percent	opp_drb_percent	opp_ft_fga	arena	attend	attend_g
30	2024	NBA	League Average	NaN	False	26.6	NaN	NaN	30.0	29.0	0.00	0.00	0.00	115.7	115.7	NaN	99.1	0.252	0.392	0.581	0.547	12.1	24.4	0.197	0.547	12.1	75.6	0.197	NaN	537936.0	18238.0
61	2023	NBA	League Average	NaN	False	26.3	NaN	NaN	41.0	41.0	0.00	0.00	0.00	114.8	114.8	NaN	99.1	0.266	0.387	0.581	0.545	12.5	24.0	0.208	0.545	12.5	76.0	0.208	NaN	739257.0	17993.0
92	2022	NBA	League Average	NaN	False	26.3	NaN	NaN	41.0	41.0	0.00	0.00	0.00	112.0	112.0	NaN	98.2	0.248	0.399	0.566	0.532	12.3	23.2	0.192	0.532	12.3	76.8	0.192	NaN	693702.0	16920.0
123	2021	NBA	League Average	NaN	False	26.3	NaN	NaN	36.0	36.0	0.00	0.00	0.00	112.3	112.3	NaN	99.2	0.247	0.392	0.572	0.538	12.4	22.2	0.192	0.538	12.4	77.8	0.192	NaN	49476.0	1374.0
154	2020	NBA	League Average	NaN	False	26.2	NaN	NaN	35.0	35.0	0.00	0.00	0.00	110.6	110.6	NaN	100.3	0.260	0.384	0.565	0.529	12.8	22.5	0.201	0.529	12.8	77.5	0.201	NaN	575820.0	17788.0
185	2019	NBA	League Average	NaN	False	26.4	NaN	NaN	41.0	41.0	0.00	0.00	0.00	110.4	110.4	NaN	100.0	0.259	0.359	0.560	0.524	12.4	22.9	0.198	0.524	12.4	77.1	0.198	NaN	732148.0	17853.0
216	2018	NBA	League Average	NaN	False	26.6	NaN	NaN	41.0	41.0	0.00	0.00	0.00	108.6	108.6	NaN	97.3	0.252	0.337	0.556	0.521	13.0	22.3	0.193	0.521	13.0	77.7	0.193	NaN	737485.0	17989.0
247	2017	NBA	League Average	NaN	False	26.6	NaN	NaN	41.0	41.0	0.00	0.00	0.00	108.8	108.8	NaN	96.4	0.271	0.316	0.552	0.514	12.7	23.3	0.209	0.514	12.7	76.7	0.209	NaN	733247.0	17880.0
278	2016	NBA	League Average	NaN	False	26.8	NaN	NaN	41.0	41.0	0.00	0.00	0.00	106.4	106.4	NaN	95.8	0.276	0.285	0.541	0.502	13.2	23.8	0.209	0.502	13.2	76.2	0.209	NaN	732555.0	17866.0
309	2015	NBA	League Average	NaN	False	26.8	NaN	NaN	41.0	41.0	0.00	0.00	0.00	105.6	105.6	NaN	93.9	0.273	0.268	0.534	0.496	13.3	25.1	0.205	0.496	13.3	74.9	0.205	NaN	729877.0	17814.0
340	2014	NBA	League Average	NaN	False	26.6	NaN	NaN	41.0	41.0	0.00	0.00	0.00	106.7	106.7	NaN	93.9	0.284	0.259	0.541	0.501	13.6	25.5	0.215	0.501	13.6	74.5	0.215	NaN	713714.0	17407.0
371	2013	NBA	League Average	NaN	False	26.7	NaN	NaN	41.0	41.0	0.00	0.00	0.00	105.9	105.9	NaN	92.0	0.270	0.243	0.535	0.496	13.7	26.5	0.204	0.496	13.7	73.5	0.204	NaN	710653.0	17346.0
402	2012	NBA	League Average	NaN	False	26.7	NaN	NaN	33.0	33.0	0.00	0.00	0.00	104.6	104.6	NaN	91.3	0.276	0.226	0.527	0.487	13.8	27.0	0.208	0.487	13.8	73.0	0.208	NaN	570035.0	17274.0
433	2011	NBA	League Average	NaN	False	26.7	NaN	NaN	41.0	41.0	0.00	0.00	0.00	107.3	107.3	NaN	92.1	0.300	0.222	0.541	0.498	13.4	26.4	0.229	0.498	13.4	73.6	0.229	NaN	710241.0	17321.0
464	2010	NBA	League Average	NaN	False	26.8	NaN	NaN	41.0	41.0	0.00	-0.01	-0.01	107.6	107.6	NaN	92.7	0.300	0.222	0.543	0.501	13.3	26.3	0.228	0.501	13.3	73.7	0.228	NaN	703627.0	17162.0
495	2009	NBA	League Average	NaN	False	26.8	NaN	NaN	41.0	41.0	0.00	0.00	0.00	108.3	108.3	NaN	91.7	0.306	0.224	0.544	0.500	13.3	26.7	0.236	0.500	13.3	73.3	0.236	NaN	718309.0	17520.0
526	2008	NBA	League Average	NaN	False	27.0	NaN	NaN	41.0	41.0	0.00	0.00	0.00	107.5	107.5	NaN	92.4	0.306	0.222	0.540	0.497	13.2	26.7	0.231	0.497	13.2	73.3	0.231	NaN	713186.0	17395.0
557	2007	NBA	League Average	NaN	False	26.7	NaN	NaN	41.0	41.0	0.00	0.00	0.00	106.5	106.5	NaN	91.9	0.327	0.213	0.541	0.496	14.2	27.1	0.246	0.496	14.2	72.9	0.246	NaN	728036.0	17760.0
588	2006	NBA	League Average	NaN	False	26.6	NaN	NaN	41.0	41.0	0.00	0.00	0.00	106.2	106.2	NaN	90.5	0.333	0.202	0.536	0.490	13.7	27.3	0.248	0.490	13.7	72.7	0.248	NaN	719853.0	17571.0
619	2005	NBA	League Average	NaN	False	27.1	NaN	NaN	41.0	41.0	0.00	0.00	0.00	106.1	106.1	NaN	90.9	0.324	0.196	0.529	0.482	13.6	28.7	0.245	0.482	13.6	71.3	0.245	NaN	709883.0	17314.0
649	2004	NBA	League Average	NaN	False	27.2	NaN	NaN	41.0	41.0	0.00	0.00	0.00	102.9	102.9	NaN	90.1	0.303	0.187	0.516	0.471	14.2	28.6	0.228	0.471	14.2	71.4	0.228	NaN	699041.0	17046.0
679	2003	NBA	League Average	NaN	False	27.2	NaN	NaN	41.0	41.0	0.00	0.00	0.00	103.6	103.6	NaN	91.0	0.302	0.182	0.519	0.474	14.0	28.5	0.229	0.474	14.0	71.5	0.229	NaN	692220.0	16883.0
709	2002	NBA	League Average	NaN	False	27.4	NaN	NaN	41.0	41.0	0.00	0.00	0.00	104.5	104.5	NaN	90.7	0.293	0.181	0.520	0.477	13.6	28.9	0.221	0.477	13.6	71.1	0.221	NaN	695899.0	16973.0
739	2001	NBA	League Average	NaN	False	27.8	NaN	NaN	41.0	41.0	0.00	0.00	0.00	103.0	103.0	NaN	91.3	0.309	0.170	0.518	0.473	14.1	28.2	0.231	0.473	14.1	71.8	0.231	NaN	687864.0	16777.0
769	2000	NBA	League Average	NaN	False	27.8	NaN	NaN	41.0	41.0	0.00	0.00	0.00	104.1	104.1	NaN	93.1	0.308	0.167	0.523	0.478	14.2	28.9	0.231	0.478	14.2	71.1	0.231	NaN	691469.0	NaN
799	1999	NBA	League Average	NaN	False	27.8	NaN	NaN	25.0	25.0	0.00	0.00	0.00	102.2	102.2	NaN	88.9	0.330	0.168	0.511	0.466	14.6	30.2	0.240	0.466	14.6	69.8	0.240	NaN	417929.0	NaN
829	1998	NBA	League Average	NaN	False	27.6	NaN	NaN	41.0	41.0	0.00	0.00	0.00	105.0	105.0	NaN	90.3	0.330	0.159	0.524	0.478	14.5	31.4	0.243	0.478	14.5	68.6	0.243	NaN	685130.0	NaN
859	1997	NBA	League Average	NaN	False	27.6	NaN	NaN	41.0	41.0	0.00	0.00	0.00	106.7	106.7	NaN	90.1	0.320	0.212	0.536	0.493	14.8	30.8	0.236	0.493	14.8	69.2	0.236	NaN	685905.0	18624.0
889	1996	NBA	League Average	NaN	False	27.6	NaN	NaN	41.0	41.0	0.00	0.00	0.00	107.6	107.6	NaN	91.8	0.329	0.200	0.542	0.499	14.7	30.6	0.243	0.499	14.7	69.4	0.243	NaN	703139.0	18173.0
917	1995	NBA	League Average	NaN	False	27.2	NaN	NaN	41.0	41.0	0.00	0.01	0.01	108.3	108.3	NaN	92.9	0.332	0.188	0.543	0.500	14.6	31.4	0.245	0.500	14.6	68.6	0.245	NaN	685305.0	17864.0
945	1994	NBA	League Average	NaN	False	27.3	NaN	NaN	41.0	41.0	0.00	0.00	0.00	106.3	106.3	NaN	95.1	0.315	0.117	0.528	0.485	14.3	32.2	0.232	0.485	14.3	67.8	0.232	NaN	664663.0	19181.0
973	1993	NBA	League Average	NaN	False	27.1	NaN	NaN	41.0	41.0	0.00	0.00	0.00	108.0	108.0	NaN	96.8	0.323	0.104	0.536	0.491	14.0	32.0	0.243	0.491	14.0	68.0	0.243	NaN	658192.0	12888.0
1001	1992	NBA	League Average	NaN	False	27.2	NaN	NaN	41.0	41.0	0.00	0.00	0.00	108.2	108.2	NaN	96.6	0.305	0.087	0.531	0.487	13.6	32.9	0.232	0.487	13.6	67.1	0.232	NaN	642339.0	17555.0
1029	1991	NBA	League Average	NaN	False	27.2	NaN	NaN	41.0	41.0	0.00	0.00	0.00	107.9	107.9	NaN	97.8	0.320	0.082	0.534	0.487	13.9	32.3	0.245	0.487	13.9	67.7	0.245	NaN	621283.0	15195.0
1057	1990	NBA	League Average	NaN	False	27.1	NaN	NaN	41.0	41.0	0.00	0.00	0.00	108.1	108.1	NaN	98.3	0.327	0.076	0.537	0.489	13.9	32.1	0.250	0.489	13.9	67.9	0.250	NaN	638071.0	17582.0
1083	1989	NBA	League Average	NaN	False	27.0	NaN	NaN	41.0	41.0	0.00	0.00	0.00	107.8	107.8	NaN	100.6	0.324	0.074	0.537	0.489	14.5	33.0	0.249	0.489	14.5	67.0	0.249	NaN	581579.0	NaN
1107	1988	NBA	League Average	NaN	False	26.8	NaN	NaN	41.0	41.0	0.00	0.00	0.00	108.0	108.0	NaN	99.6	0.332	0.057	0.538	0.489	14.3	32.8	0.254	0.489	14.3	67.2	0.254	NaN	545315.0	NaN
1131	1987	NBA	League Average	NaN	False	26.6	NaN	NaN	41.0	41.0	0.00	-0.01	-0.01	108.3	108.3	NaN	100.8	0.343	0.053	0.538	0.488	14.3	33.4	0.262	0.488	14.3	66.6	0.262	NaN	524600.0	11375.0
1155	1986	NBA	League Average	NaN	False	26.8	NaN	NaN	41.0	41.0	0.00	0.00	0.00	107.2	107.2	NaN	102.1	0.341	0.038	0.541	0.493	14.9	32.4	0.258	0.493	14.9	67.6	0.258	NaN	487508.0	12858.0
1179	1985	NBA	League Average	NaN	False	26.4	NaN	NaN	41.0	41.0	0.00	0.00	0.00	107.9	107.9	NaN	102.1	0.330	0.035	0.543	0.496	14.9	32.9	0.252	0.496	14.9	67.1	0.252	NaN	454445.0	14856.0
1203	1984	NBA	League Average	NaN	False	26.4	NaN	NaN	41.0	41.0	0.00	0.00	0.00	107.6	107.6	NaN	101.4	0.336	0.027	0.543	0.495	15.0	33.0	0.255	0.495	15.0	67.0	0.255	NaN	427247.0	14176.0
1227	1983	NBA	League Average	NaN	False	26.2	NaN	NaN	41.0	41.0	0.00	0.00	0.00	104.7	104.7	NaN	103.1	0.315	0.025	0.531	0.488	15.8	33.4	0.233	0.488	15.8	66.6	0.233	NaN	406843.0	12587.0
1251	1982	NBA	League Average	NaN	False	26.1	NaN	NaN	41.0	41.0	0.00	0.00	0.00	106.9	106.9	NaN	100.9	0.324	0.026	0.539	0.495	15.0	33.0	0.241	0.495	15.0	67.0	0.241	NaN	429808.0	13526.0
1275	1981	NBA	League Average	NaN	False	26.2	NaN	NaN	41.0	41.0	0.00	0.00	0.00	105.5	105.5	NaN	101.8	0.327	0.023	0.534	0.489	15.6	33.5	0.245	0.489	15.6	66.5	0.245	NaN	411228.0	11827.0
1298	1980	NBA	League Average	NaN	False	26.5	NaN	NaN	41.0	41.0	0.00	0.00	0.00	105.3	105.3	NaN	103.1	0.307	0.031	0.531	0.486	15.5	33.5	0.235	0.486	15.5	66.5	0.235	NaN	589616.0	13800.0
1321	1979	NBA	League Average	NaN	False	26.2	NaN	NaN	41.0	41.0	0.00	0.00	0.00	103.8	103.8	NaN	105.8	0.309	NaN	0.530	0.485	16.0	32.8	0.232	0.485	16.0	67.2	0.232	NaN	450269.0	NaN
1344	1978	NBA	League Average	NaN	False	26.2	NaN	NaN	41.0	41.0	0.00	0.00	0.00	100.9	100.9	NaN	106.7	0.306	NaN	0.515	0.469	16.0	31.8	0.230	0.469	16.0	68.2	0.230	NaN	485977.0	NaN
1367	1977	NBA	League Average	NaN	False	26.1	NaN	NaN	41.0	41.0	0.00	0.00	0.00	99.5	99.5	NaN	106.5	0.301	NaN	0.511	0.465	16.5	31.8	0.226	0.465	16.5	68.2	0.226	NaN	509413.0	5517.0
1395	1976	NBA	League Average	NaN	False	26.5	NaN	NaN	41.0	41.0	0.00	0.01	0.01	98.3	98.3	NaN	105.5	0.294	NaN	0.504	0.458	16.0	30.3	0.221	0.458	16.0	69.7	0.221	NaN	532283.0	7516.0
1396	1976	ABA	League Average	NaN	False	25.3	NaN	NaN	34.0	34.0	0.00	-0.01	-0.01	104.1	104.1	NaN	106.9	0.290	0.040	0.517	0.472	15.2	34.4	0.224	0.472	15.2	65.6	0.224	NaN	NaN	7516.0
1425	1975	NBA	League Average	NaN	False	26.7	NaN	NaN	41.0	41.0	0.00	-0.01	-0.01	97.7	97.7	NaN	104.5	0.276	NaN	0.502	0.457	16.3	30.2	0.211	0.457	16.3	69.8	0.211	NaN	480279.0	6542.0
1426	1975	ABA	League Average	NaN	False	25.7	NaN	NaN	42.0	42.0	0.00	0.01	0.01	104.7	104.7	NaN	103.1	0.269	0.040	0.520	0.479	14.9	34.1	0.206	0.479	14.9	65.9	0.206	NaN	NaN	6542.0
1454	1974	NBA	League Average	NaN	False	27.0	NaN	NaN	41.0	41.0	0.00	0.00	0.00	97.7	97.7	NaN	107.8	0.270	NaN	0.503	0.459	16.5	30.5	0.209	0.459	16.5	69.5	0.209	NaN	479203.0	5676.0
1455	1974	ABA	League Average	NaN	False	25.7	NaN	NaN	42.0	42.0	0.00	0.00	0.00	103.0	103.0	NaN	102.6	0.273	0.045	0.509	0.466	14.6	33.6	0.207	0.466	14.6	66.4	0.207	NaN	NaN	5676.0
1483	1973	NBA	League Average	NaN	False	26.7	NaN	NaN	41.0	41.0	0.00	0.01	0.01	96.8	96.8	NaN	110.7	0.261	NaN	0.498	0.456	NaN	NaN	0.198	0.456	NaN	NaN	0.198	NaN	544053.0	5941.0
1484	1973	ABA	League Average	NaN	False	26.0	NaN	NaN	42.0	42.0	0.00	0.00	0.01	101.8	101.8	NaN	109.3	0.360	0.041	0.527	0.476	15.3	34.3	0.269	0.476	15.3	65.7	0.269	NaN	NaN	5941.0
1513	1972	NBA	League Average	NaN	False	26.7	NaN	NaN	41.0	41.0	0.00	0.00	0.00	97.9	97.9	NaN	112.0	0.326	NaN	0.504	0.455	NaN	NaN	0.244	0.455	NaN	NaN	0.244	NaN	507521.0	5762.0
1514	1972	ABA	League Average	NaN	False	25.7	NaN	NaN	42.0	42.0	0.00	0.00	0.00	100.6	100.6	NaN	111.9	0.334	0.055	0.519	0.469	14.3	34.3	0.253	0.469	14.3	65.7	0.253	NaN	NaN	5762.0
1543	1971	NBA	League Average	NaN	False	26.6	NaN	NaN	41.0	41.0	0.00	0.00	0.00	97.2	97.2	NaN	115.1	0.333	NaN	0.500	0.449	NaN	NaN	0.248	0.449	NaN	NaN	0.248	NaN	439938.0	5152.0
1544	1971	ABA	League Average	NaN	False	25.7	NaN	NaN	42.0	42.0	0.00	0.00	0.00	105.0	105.0	NaN	111.1	0.327	0.061	0.513	0.464	14.3	NaN	0.246	0.464	14.3	NaN	0.246	NaN	NaN	5152.0
1570	1970	NBA	League Average	NaN	False	26.8	NaN	NaN	41.0	41.0	0.00	0.00	0.00	99.0	99.0	NaN	117.1	0.339	NaN	0.511	0.460	NaN	NaN	0.255	NaN	NaN	NaN	NaN	NaN	407073.0	4496.0
1571	1970	ABA	League Average	NaN	False	25.2	NaN	NaN	42.0	42.0	0.00	0.00	0.00	102.7	102.7	NaN	109.2	0.348	0.065	0.506	0.454	14.9	NaN	0.259	0.454	14.9	NaN	0.259	NaN	NaN	4496.0
1597	1969	NBA	League Average	NaN	False	26.9	NaN	NaN	41.0	41.0	0.00	0.00	0.00	95.5	95.5	NaN	116.9	0.353	NaN	0.491	0.441	NaN	NaN	0.252	NaN	NaN	NaN	NaN	NaN	402696.0	3043.0
1598	1969	ABA	League Average	NaN	False	24.8	NaN	NaN	39.0	39.0	0.00	0.00	0.00	104.0	104.0	NaN	109.4	0.392	0.060	0.502	0.445	14.2	NaN	0.286	0.445	14.2	NaN	0.286	NaN	NaN	3043.0
1622	1968	NBA	League Average	NaN	False	26.8	NaN	NaN	41.0	41.0	0.00	0.00	0.00	96.8	96.8	NaN	119.8	0.368	NaN	0.498	0.446	NaN	NaN	0.265	NaN	NaN	NaN	NaN	NaN	371057.0	2330.0
1623	1968	ABA	League Average	NaN	False	24.0	NaN	NaN	39.0	39.0	0.00	0.01	0.01	101.3	101.3	NaN	107.0	0.375	0.052	0.483	0.428	13.2	NaN	0.269	0.428	13.2	NaN	0.269	NaN	NaN	2330.0
1634	1967	NBA	League Average	NaN	False	26.6	NaN	NaN	40.0	41.0	0.00	0.00	0.00	96.1	96.1	NaN	121.6	0.352	NaN	0.493	0.441	NaN	NaN	0.257	NaN	NaN	NaN	NaN	NaN	378849.0	NaN
1644	1966	NBA	League Average	NaN	False	26.6	NaN	NaN	40.0	40.0	0.00	0.00	0.00	94.9	94.9	NaN	121.4	0.361	NaN	0.487	0.433	NaN	NaN	0.262	NaN	NaN	NaN	NaN	NaN	336328.0	2436.0
1654	1965	NBA	League Average	NaN	False	26.4	NaN	NaN	40.0	40.0	0.00	0.00	0.00	93.6	93.6	NaN	117.3	0.356	NaN	0.479	0.426	NaN	NaN	0.257	NaN	NaN	NaN	NaN	NaN	319267.0	NaN
1664	1964	NBA	League Average	NaN	False	26.3	NaN	NaN	40.0	40.0	0.00	0.00	0.00	94.6	94.6	NaN	116.8	0.353	NaN	0.485	0.433	NaN	NaN	0.255	NaN	NaN	NaN	NaN	NaN	272839.0	NaN
1674	1963	NBA	League Average	NaN	False	26.4	NaN	NaN	40.0	40.0	0.00	0.00	0.00	95.9	95.9	NaN	119.6	0.354	NaN	0.493	0.441	NaN	NaN	0.258	NaN	NaN	NaN	NaN	NaN	274022.0	NaN
1684	1962	NBA	League Average	NaN	False	26.3	NaN	NaN	40.0	40.0	0.00	0.00	0.00	93.6	93.6	NaN	126.2	0.344	NaN	0.479	0.426	NaN	NaN	0.250	NaN	NaN	NaN	NaN	NaN	191088.0	NaN
1693	1961	NBA	League Average	NaN	False	26.4	NaN	NaN	40.0	39.0	0.00	0.01	0.01	92.1	92.1	NaN	127.7	0.342	NaN	0.469	0.415	NaN	NaN	0.250	NaN	NaN	NaN	NaN	NaN	176457.0	NaN
1702	1960	NBA	League Average	NaN	False	26.5	NaN	NaN	37.0	38.0	0.00	0.00	0.00	91.1	91.1	NaN	126.1	0.330	NaN	0.463	0.410	NaN	NaN	0.242	NaN	NaN	NaN	NaN	NaN	209374.0	NaN
1711	1959	NBA	League Average	NaN	False	26.4	NaN	NaN	36.0	36.0	0.00	0.00	0.00	90.2	90.2	NaN	118.9	0.355	NaN	0.457	0.395	NaN	NaN	0.268	NaN	NaN	NaN	NaN	NaN	244642.0	NaN
1720	1958	NBA	League Average	NaN	False	26.7	NaN	NaN	36.0	36.0	0.00	0.00	0.00	88.8	88.8	NaN	119.7	0.376	NaN	0.449	0.383	NaN	NaN	0.281	NaN	NaN	NaN	NaN	NaN	240943.0	NaN
1729	1957	NBA	League Average	NaN	False	26.3	NaN	NaN	36.0	36.0	0.00	-0.01	-0.01	88.9	88.9	NaN	111.1	0.391	NaN	0.449	0.380	NaN	NaN	0.293	NaN	NaN	NaN	NaN	NaN	262918.0	NaN
1738	1956	NBA	League Average	NaN	False	26.3	NaN	NaN	36.0	36.0	0.00	0.00	0.00	90.3	90.3	NaN	108.8	0.416	NaN	0.458	0.387	NaN	NaN	0.310	NaN	NaN	NaN	NaN	NaN	209645.0	NaN
1739	1955	NBA	Baltimore Bullets	BLB	False	24.9	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN
1748	1955	NBA	League Average	NaN	False	26.5	NaN	NaN	36.0	36.0	0.01	0.00	0.00	89.8	89.8	NaN	102.9	0.416	NaN	0.455	0.385	NaN	NaN	0.307	NaN	NaN	NaN	NaN	NaN	175675.0	NaN
1758	1954	NBA	League Average	NaN	False	26.3	NaN	NaN	36.0	36.0	0.00	0.01	0.01	87.5	87.5	NaN	90.0	0.438	NaN	0.442	0.372	NaN	NaN	0.311	NaN	NaN	NaN	NaN	NaN	156912.0	NaN
1769	1953	NBA	League Average	NaN	False	26.5	NaN	NaN	36.0	34.0	0.29	0.01	0.01	88.0	87.7	NaN	92.4	0.465	NaN	0.445	0.370	NaN	NaN	0.333	NaN	NaN	NaN	NaN	NaN	161808.0	NaN
1780	1952	NBA	League Average	NaN	False	26.3	NaN	NaN	33.0	33.0	0.00	0.00	-0.01	86.9	86.9	NaN	95.1	0.411	NaN	0.438	0.367	NaN	NaN	0.302	NaN	NaN	NaN	NaN	NaN	160167.0	NaN
1792	1951	NBA	League Average	NaN	False	NaN	NaN	NaN	32.0	32.0	0.00	0.00	0.00	85.1	85.1	NaN	97.4	0.399	NaN	0.428	0.357	NaN	NaN	0.293	NaN	NaN	NaN	NaN	NaN	197888.0	NaN
1810	1950	NBA	League Average	NaN	False	NaN	NaN	NaN	33.0	33.0	0.00	0.00	0.00	NaN	NaN	NaN	NaN	0.397	NaN	0.410	0.340	NaN	NaN	0.284	NaN	NaN	NaN	NaN	NaN	110552.0	NaN
1823	1949	BAA	League Average	NaN	False	NaN	NaN	NaN	30.0	30.0	0.00	-0.01	-0.01	NaN	NaN	NaN	NaN	0.353	NaN	0.390	0.327	NaN	NaN	0.248	NaN	NaN	NaN	NaN	NaN	144275.0	NaN
1832	1948	BAA	League Average	NaN	False	NaN	NaN	NaN	25.0	23.0	0.00	-0.01	-0.01	NaN	NaN	NaN	NaN	0.281	NaN	0.337	0.284	NaN	NaN	0.190	NaN	NaN	NaN	NaN	NaN	90264.0	52.0
1844	1947	BAA	League Average	NaN	False	NaN	NaN	NaN	30.0	31.0	0.00	0.00	0.00	NaN	NaN	NaN	NaN	0.267	NaN	0.326	0.279	NaN	NaN	0.171	NaN	NaN	NaN	NaN	NaN	108240.0	46.0

NBA Most Improved Player/Most Valuable Player Predictor¶

Data Science Final Portfolio¶

Project Background, Logistics, Goals, and Data¶

Extraction, Transformation, and Loading!¶

Data Cleanup¶

Cleaning player_shooting_df¶

Cleaning player_per_game_df¶

Exploratory Data Analysis¶

This will be the last section we work on for Milestone 1!¶

Summary Statistics¶

Visualizations¶

Further ETL and EDA (Milestone 2 Starts Here)¶

Clean Up, Continued¶

Importing Player Totals¶

Exploratory Data Analysis, Extended¶

Rebounds per game¶

Assists per game¶

Recap and Next Steps¶

New Dataframe: Team Summaries¶

Team vs League Wins¶

Model Questions¶

Model 1: Predicitng MVP Winners Based on Player Performance Metrics¶

Model 2: Predicting Team Success Based on Player and Team Performance Metrics¶

Generating Models (Milestone 3 Starts Here)¶

More Data, More Preprocessing¶

Data Importing¶

Data Merging¶

Exploring Model Data¶

Model Data Breakdown¶

Model Data Visual Exploration¶

Building a Base Model!¶

Base Model 2023 MVP Prediction¶

Base Model Evaluation¶

Feature Engineering Our Model¶

Feature Engineered Model 2023 MVP Prediction¶

Feature Engineered Model Evaluation¶

Model Tuning¶

Tuned Model 2023 MVP Prediction¶

Tuned Model Evaluation¶

Base Model, More Features¶

Overall Model Evaluations¶