This notebook will read the output of the cleanup stage, massage the data and output two pickled files with data for analysis.

analysis-predictors.bz2 - each row is a loan and the columns are the features available to investors
analysis-target.bz2 - each row is a loan and the value is the APY return of the loan

import pandas as pd
import numpy as np
import matplotlib
import matplotlib.pyplot as plt
%matplotlib inline

LoanStats¶

# Read in the raw data provided by Lending Club
loan = pd.read_csv("LoanStats.csv")

/Users/rogeriliffe/anaconda3/lib/python3.6/site-packages/IPython/core/interactiveshell.py:2785: DtypeWarning: Columns (19,47,55,112,123,124,125,128,129,130,133,139,140,141) have mixed types. Specify dtype option on import or set low_memory=False.
  interactivity=interactivity, compiler=compiler, result=result)

# Overview of all the predictors provided by the Lending Club
# Unfortunately many of these are NaN particularly those indicators that might help us
# with an analysis of any potential discrimination
# Descriptions related to these terms are provided in a separate Excel file

for column in loan.columns:
#     print("%-40s"%column, loan[column].dtype)
    print(column)

id
member_id
loan_amnt
funded_amnt
funded_amnt_inv
term
int_rate
installment
grade
sub_grade
emp_title
emp_length
home_ownership
annual_inc
verification_status
issue_d
loan_status
pymnt_plan
url
desc
purpose
title
zip_code
addr_state
dti
delinq_2yrs
earliest_cr_line
inq_last_6mths
mths_since_last_delinq
mths_since_last_record
open_acc
pub_rec
revol_bal
revol_util
total_acc
initial_list_status
out_prncp
out_prncp_inv
total_pymnt
total_pymnt_inv
total_rec_prncp
total_rec_int
total_rec_late_fee
recoveries
collection_recovery_fee
last_pymnt_d
last_pymnt_amnt
next_pymnt_d
last_credit_pull_d
collections_12_mths_ex_med
mths_since_last_major_derog
policy_code
application_type
annual_inc_joint
dti_joint
verification_status_joint
acc_now_delinq
tot_coll_amt
tot_cur_bal
open_acc_6m
open_act_il
open_il_12m
open_il_24m
mths_since_rcnt_il
total_bal_il
il_util
open_rv_12m
open_rv_24m
max_bal_bc
all_util
total_rev_hi_lim
inq_fi
total_cu_tl
inq_last_12m
acc_open_past_24mths
avg_cur_bal
bc_open_to_buy
bc_util
chargeoff_within_12_mths
delinq_amnt
mo_sin_old_il_acct
mo_sin_old_rev_tl_op
mo_sin_rcnt_rev_tl_op
mo_sin_rcnt_tl
mort_acc
mths_since_recent_bc
mths_since_recent_bc_dlq
mths_since_recent_inq
mths_since_recent_revol_delinq
num_accts_ever_120_pd
num_actv_bc_tl
num_actv_rev_tl
num_bc_sats
num_bc_tl
num_il_tl
num_op_rev_tl
num_rev_accts
num_rev_tl_bal_gt_0
num_sats
num_tl_120dpd_2m
num_tl_30dpd
num_tl_90g_dpd_24m
num_tl_op_past_12m
pct_tl_nvr_dlq
percent_bc_gt_75
pub_rec_bankruptcies
tax_liens
tot_hi_cred_lim
total_bal_ex_mort
total_bc_limit
total_il_high_credit_limit
revol_bal_joint
sec_app_earliest_cr_line
sec_app_inq_last_6mths
sec_app_mort_acc
sec_app_open_acc
sec_app_revol_util
sec_app_open_act_il
sec_app_num_rev_accts
sec_app_chargeoff_within_12_mths
sec_app_collections_12_mths_ex_med
sec_app_mths_since_last_major_derog
hardship_flag
hardship_type
hardship_reason
hardship_status
deferral_term
hardship_amount
hardship_start_date
hardship_end_date
payment_plan_start_date
hardship_length
hardship_dpd
hardship_loan_status
orig_projected_additional_accrued_interest
hardship_payoff_balance_amount
hardship_last_payment_amount
disbursement_method
debt_settlement_flag
debt_settlement_flag_date
settlement_status
settlement_date
settlement_amount
settlement_percentage
settlement_term

loan.isna().sum()

id                                            1873290
member_id                                     1873290
loan_amnt                                           0
funded_amnt                                         0
funded_amnt_inv                                     0
term                                                0
int_rate                                            0
installment                                         0
grade                                               0
sub_grade                                           0
emp_title                                      121923
emp_length                                     114348
home_ownership                                      0
annual_inc                                          4
verification_status                                 0
issue_d                                             0
loan_status                                         0
pymnt_plan                                          0
url                                           1873290
desc                                          1747223
purpose                                             0
title                                           23325
zip_code                                            1
addr_state                                          0
dti                                               841
delinq_2yrs                                        29
earliest_cr_line                                   29
inq_last_6mths                                     30
mths_since_last_delinq                         942545
mths_since_last_record                        1561849
                                               ...   
sec_app_open_acc                              1817923
sec_app_revol_util                            1818873
sec_app_open_act_il                           1817923
sec_app_num_rev_accts                         1817923
sec_app_chargeoff_within_12_mths              1817923
sec_app_collections_12_mths_ex_med            1817923
sec_app_mths_since_last_major_derog           1854298
hardship_flag                                       0
hardship_type                                 1866002
hardship_reason                               1866002
hardship_status                               1866002
deferral_term                                 1866002
hardship_amount                               1866002
hardship_start_date                           1866002
hardship_end_date                             1866002
payment_plan_start_date                       1866002
hardship_length                               1866002
hardship_dpd                                  1866002
hardship_loan_status                          1866002
orig_projected_additional_accrued_interest    1867444
hardship_payoff_balance_amount                1866002
hardship_last_payment_amount                  1866002
disbursement_method                                 0
debt_settlement_flag                                0
debt_settlement_flag_date                     1851638
settlement_status                             1851638
settlement_date                               1851638
settlement_amount                             1851638
settlement_percentage                         1851638
settlement_term                               1851638
Length: 145, dtype: int64

Transform the Credit Rating Data using the "Sub_grade" Function¶

import re

def subgrade_score(s):
    return (ord(s[0]) - ord('A')) + (int(s[1]) - 1)/5

def extract_number(x): 
    try:
        return int(re.search(r"\d+", str(x)).group(0))
    except AttributeError:
        return np.nan

Cleanse the Information Related to Credit-Worthiness¶

# Clean the data related to the borrower's ZipCode
loan["zip_code_prefix"] = [extract_number(s) for s in loan.zip_code.astype(str)]

# Record today's date in order to calculate loan length
now = pd.to_datetime("Aug-2018")
loan["earliest_cr_line_date"] = pd.to_datetime(loan.earliest_cr_line, 
                                               format="%b-%Y")
loan.earliest_cr_line_date.head()

0   1987-10-01
1   1994-12-01
2   1990-01-01
3   2006-09-01
4   2004-10-01
Name: earliest_cr_line_date, dtype: datetime64[ns]

# Overall credit history is then today's date less the date of the 
# First available information on the person's Credit history
loan["credit_history"] = now - loan.earliest_cr_line_date

loan["credit_history_years"] = loan.credit_history.dt.days.fillna(0) / 365.25

# Check that there are no 'NAs' in the credit history data column
loan.credit_history_years.isna().sum()

0

# Clean the length of employment predictor/column and check for 'NAs'
loan["emp_length_years"] = loan.emp_length.apply(extract_number)
loan["emp_length_years"].isna().sum()

114348

# Cleanse the data related to the loan inquiries made within the last 6 months
# This is a typical credit worthiness metric
# The more frequently somehow has asked for a loan the less credit-worthy they are
loan.inq_last_6mths = loan.inq_last_6mths.fillna(0)

# Clean up the column/predictor to the months since last delinquent payment
# This is part of the credit history of the borrower
loan.mths_since_last_delinq = loan.mths_since_last_delinq.fillna(0)
loan.mths_since_last_record = loan.mths_since_last_record.fillna(0)

# Clean the data related to the employment history of the borrower
m = np.mean(loan.emp_length_years)
loan.emp_length_years = loan.emp_length_years.fillna(m)
loan.emp_length_years.isna().sum()

0

# Impute the missing data for the annual income of the borrowed using the average
loan.annual_inc = loan.annual_inc.fillna(np.mean(loan.annual_inc))

loan.dti = loan.dti.fillna(np.mean(loan.dti))

for g in sorted(loan.sub_grade.unique()):
    print(g, subgrade_score(g))

A1 0.0
A2 0.2
A3 0.4
A4 0.6
A5 0.8
B1 1.0
B2 1.2
B3 1.4
B4 1.6
B5 1.8
C1 2.0
C2 2.2
C3 2.4
C4 2.6
C5 2.8
D1 3.0
D2 3.2
D3 3.4
D4 3.6
D5 3.8
E1 4.0
E2 4.2
E3 4.4
E4 4.6
E5 4.8
F1 5.0
F2 5.2
F3 5.4
F4 5.6
F5 5.8
G1 6.0
G2 6.2
G3 6.4
G4 6.6
G5 6.8

loan.int_rate = loan.int_rate.str[:-1].astype(float)

loan["credit_score"] = loan.sub_grade.apply(subgrade_score)

Cleanse the Data Related to Loan Dates¶

loan.issue_d.min()

'Apr-2008'

# Convert the loan issue date to a datetime in order to speed up processing
loan["issue_date"] = pd.to_datetime(loan.issue_d, format="%b-%Y")

loan.earliest_cr_line_date = pd.to_datetime(loan.earliest_cr_line, format="%b-%Y").fillna(loan.issue_date)

pd.to_datetime(['Apr-2008'], format="%b-%Y")

DatetimeIndex(['2008-04-01'], dtype='datetime64[ns]', freq=None)

# Create a predictor/column with the year the loan was issued
loan["issue_date_year"] = loan.issue_date.dt.year

# Create a predictor/column with the date of the last payment
loan["last_pymnt_date"] = pd.to_datetime(loan.last_pymnt_d, format="%b-%Y")

# Create a predictor/column with the actual loan length
# This is not the same as the planned loan length
loan["loan_length"] = loan["last_pymnt_date"] - loan["issue_date"]

# Clean the data for the predictor/column loan length term
loan["loan_length_term"] = loan.loan_length.fillna(loan.loan_length)

# Clean the data for the predictor/column loan length in days
loan["loan_length_days"] = loan.loan_length_term.dt.days.fillna(1)

# Check that the dates have been properly changed
loan[["last_pymnt_date", "issue_date", "loan_length","grade","term","loan_status"]].head(-100)
loan.issue_date.describe()

count                 1873290
unique                    130
top       2016-03-01 00:00:00
freq                    61992
first     2007-06-01 00:00:00
last      2018-03-01 00:00:00
Name: issue_date, dtype: object

Calculate the Profitability of the Loans¶

This will be used as the y variable¶

# New column/predictor percentage of the revolving credit utilized
loan["revol_util_perc"] = loan.revol_util.astype(str).apply(extract_number)
loan["revol_util_perc"] = loan["revol_util_perc"].fillna(np.mean(loan.revol_util_perc))

# Calculate the length of the loan
loan["term_months"] = [extract_number(s) for s in loan.term]

loan["term_days"] = pd.to_timedelta(loan.term_months*30, unit='D')

# Calculate the Annual Percentage Rate per Annum
loan["apy"] = (loan.total_pymnt / loan.loan_amnt) ** (365.25/(loan.loan_length_days)) - 1

# In order to be able to use APY we need to remove loans with very short lengths
loan.query("loan_length_days > 60 and apy>1")[["funded_amnt", "total_pymnt", "loan_length_days", "apy", "loan_status"]]

loan.credit_history.head()

0   11262 days
1    8644 days
2   10439 days
3    4352 days
4    5052 days
Name: credit_history, dtype: timedelta64[ns]

# Completed loans are those which are either charged off or fully paid
complete = loan.query("(loan_status=='Charged Off' or loan_status=='Fully Paid') and loan_length_days > 60 and apy < 1")
complete["good_loan"] = complete.loan_status=='Fully Paid'

/Users/rogeriliffe/anaconda3/lib/python3.6/site-packages/ipykernel_launcher.py:3: SettingWithCopyWarning: 
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  This is separate from the ipykernel package so we can avoid doing imports until

# Overview of the loans that have been completed: charged off or fully repaid
complete.describe()

loan.head(30)

loan.isna().sum()

id                                            1873290
member_id                                     1873290
loan_amnt                                           0
funded_amnt                                         0
funded_amnt_inv                                     0
term                                                0
int_rate                                            0
installment                                         0
grade                                               0
sub_grade                                           0
emp_title                                      121923
emp_length                                     114348
home_ownership                                      0
annual_inc                                          0
verification_status                                 0
issue_d                                             0
loan_status                                         0
pymnt_plan                                          0
url                                           1873290
desc                                          1747223
purpose                                             0
title                                           23325
zip_code                                            1
addr_state                                          0
dti                                                 0
delinq_2yrs                                        29
earliest_cr_line                                   29
inq_last_6mths                                      0
mths_since_last_delinq                              0
mths_since_last_record                              0
                                               ...   
hardship_length                               1866002
hardship_dpd                                  1866002
hardship_loan_status                          1866002
orig_projected_additional_accrued_interest    1867444
hardship_payoff_balance_amount                1866002
hardship_last_payment_amount                  1866002
disbursement_method                                 0
debt_settlement_flag                                0
debt_settlement_flag_date                     1851638
settlement_status                             1851638
settlement_date                               1851638
settlement_amount                             1851638
settlement_percentage                         1851638
settlement_term                               1851638
zip_code_prefix                                     1
earliest_cr_line_date                               0
credit_history                                     29
credit_history_years                                0
emp_length_years                                    0
credit_score                                        0
issue_date                                          0
issue_date_year                                     0
last_pymnt_date                                  1944
loan_length                                      1944
loan_length_term                                 1944
loan_length_days                                    0
revol_util_perc                                     0
term_months                                         0
term_days                                           0
apy                                                 0
Length: 161, dtype: int64

Subset Public Data¶

We are only concerned with loans that are complete. Either they are paid off or charged off. Loans that are still ongoing are also interesting to look at, but they require their own separate analysis. Because our time is limited, we can only do one set and complete loans seems the most promising.

In addition, there are a number of loans that have a small length. That is they were taken out and repaid quickly. I'm not sure why that is, but such loans make little difference. If an investor makes a loan and it is repaid in a few days, they he neither makes money nor loses money and must make a new loan. So for now, we ignore them.

public_columns = [
'loan_amnt','term_months','int_rate','credit_score',
# 'emp_title',
'emp_length_years',
'home_ownership',
'annual_inc',
    'issue_date',
'verification_status',
# 'loan_status',
# 'pymnt_plan',
# 'desc',
# 'purpose',
# 'title',
# 'zip_code',
# 'addr_state',
'dti',
'delinq_2yrs',
# 'earliest_cr_line',
'credit_history_years',
'inq_last_6mths',
'mths_since_last_delinq',
'mths_since_last_record',
'open_acc',
'pub_rec',
'revol_bal',
'revol_util_perc',
'total_acc']

public_data = complete[public_columns]

public_data.isna().sum()

loan_amnt                 0
term_months               0
int_rate                  0
credit_score              0
emp_length_years          0
home_ownership            0
annual_inc                0
issue_date                0
verification_status       0
dti                       0
delinq_2yrs               0
credit_history_years      0
inq_last_6mths            0
mths_since_last_delinq    0
mths_since_last_record    0
open_acc                  0
pub_rec                   0
revol_bal                 0
revol_util_perc           0
total_acc                 0
dtype: int64

public_data.head()

public_data_y = complete.apy

public_data_y.describe()

count    985774.000000
mean         -0.023926
std           0.283121
min          -1.000000
25%           0.040477
50%           0.073094
75%           0.108942
max           0.982541
Name: apy, dtype: float64

Now, deal with categorical data using one-hot encoding.

categorical = ['verification_status','home_ownership']

public_data_dummy = pd.get_dummies(public_data[categorical],dummy_na=False)
public_data_cat = pd.concat([public_data, public_data_dummy], axis=1)

public_data_cat = public_data_cat.drop(categorical, axis=1)

Save subset¶

public_data_cat.to_pickle("analysis-predictors.bz2")

public_data_y.to_pickle("analysis-target.bz2")

complete.to_pickle("complete.bz2")

	funded_amnt	total_pymnt	loan_length_days	apy	loan_status
150847	35000	39870.20	61.0	1.181657	Charged Off
1294857	7350	8558.46	62.0	1.451616	Charged Off
1448760	30000	35233.92	61.0	1.619266	Charged Off
1460189	3000	3404.91	61.0	1.134176	Charged Off
1495641	14400	18260.17	61.0	3.145585	Charged Off

	id	member_id	loan_amnt	funded_amnt	funded_amnt_inv	int_rate	installment	annual_inc	url	dti	...	emp_length_years	credit_score	issue_date_year	loan_length	loan_length_term	loan_length_days	revol_util_perc	term_months	term_days	apy
count	0.0	0.0	985774.000000	985774.000000	985774.000000	985774.000000	985774.000000	9.857740e+05	0.0	985774.000000	...	985774.000000	985774.000000	985774.000000	985774	985774	985774.000000	985774.000000	985774.000000	985774	985774.000000
mean	NaN	NaN	14366.752724	14355.437326	14327.340411	13.339905	437.549103	7.552179e+04	NaN	18.080502	...	6.045900	2.170880	2014.518303	622 days 06:32:59.986487	622 days 06:32:59.986487	622.272917	52.411627	41.757133	1252 days 17:08:08.667787	-0.023926
std	NaN	NaN	8550.599858	8545.143038	8549.164274	4.661833	257.085701	6.594260e+04	NaN	9.507735	...	3.451159	1.299934	1.546647	359 days 19:22:56.693007	359 days 19:22:56.693007	359.807601	24.254171	10.248254	307 days 10:44:35.352572	0.283121
min	NaN	NaN	500.000000	500.000000	0.000000	5.310000	4.930000	0.000000e+00	NaN	-1.000000	...	1.000000	0.000000	2007.000000	61 days 00:00:00	61 days 00:00:00	61.000000	0.000000	36.000000	1080 days 00:00:00	-1.000000
25%	NaN	NaN	8000.000000	8000.000000	8000.000000	9.990000	251.172500	4.505000e+04	NaN	11.770000	...	3.000000	1.200000	2014.000000	334 days 00:00:00	334 days 00:00:00	334.000000	34.000000	36.000000	1080 days 00:00:00	0.040477
50%	NaN	NaN	12000.000000	12000.000000	12000.000000	12.990000	376.760000	6.500000e+04	NaN	17.520000	...	6.053993	2.000000	2015.000000	578 days 00:00:00	578 days 00:00:00	578.000000	53.000000	36.000000	1080 days 00:00:00	0.073094
75%	NaN	NaN	20000.000000	20000.000000	20000.000000	16.200000	578.030000	9.000000e+04	NaN	23.850000	...	10.000000	3.000000	2016.000000	915 days 00:00:00	915 days 00:00:00	915.000000	71.000000	36.000000	1080 days 00:00:00	0.108942
max	NaN	NaN	40000.000000	40000.000000	40000.000000	30.990000	1719.830000	9.550000e+06	NaN	999.000000	...	10.000000	6.800000	2018.000000	2130 days 00:00:00	2130 days 00:00:00	2130.000000	892.000000	60.000000	1800 days 00:00:00	0.982541

	id	member_id	loan_amnt	funded_amnt	funded_amnt_inv	term	int_rate	installment	grade	sub_grade	...	issue_date	issue_date_year	last_pymnt_date	loan_length	loan_length_term	loan_length_days	revol_util_perc	term_months	term_days	apy
0	NaN	NaN	20000	20000	20000.0	36 months	10.41	649.21	B	B3	...	2018-03-01	2018	2018-06-01	92 days	92 days	92.0	33.00000	36	1080 days	-0.999915
1	NaN	NaN	11000	11000	11000.0	36 months	7.34	341.37	A	A4	...	2018-03-01	2018	2018-06-01	92 days	92 days	92.0	47.00000	36	1080 days	-0.999985
2	NaN	NaN	12000	12000	12000.0	36 months	6.07	365.45	A	A2	...	2018-03-01	2018	2018-06-01	92 days	92 days	92.0	22.00000	36	1080 days	-0.999986
3	NaN	NaN	35000	35000	35000.0	36 months	16.01	1230.67	C	C5	...	2018-03-01	2018	2018-06-01	92 days	92 days	92.0	119.00000	36	1080 days	-0.999900
4	NaN	NaN	20000	20000	20000.0	60 months	9.92	424.16	B	B2	...	2018-03-01	2018	2018-06-01	92 days	92 days	92.0	65.00000	60	1800 days	-0.999997
5	NaN	NaN	20000	20000	20000.0	60 months	17.47	502.12	D	D1	...	2018-03-01	2018	2018-06-01	92 days	92 days	92.0	69.00000	60	1800 days	-0.999994
6	NaN	NaN	3000	3000	3000.0	36 months	10.41	97.39	B	B3	...	2018-03-01	2018	2018-06-01	92 days	92 days	92.0	24.00000	36	1080 days	-0.999982
7	NaN	NaN	30000	30000	30000.0	36 months	7.34	930.99	A	A4	...	2018-03-01	2018	2018-06-01	92 days	92 days	92.0	0.00000	36	1080 days	-0.999985
8	NaN	NaN	20000	20000	20000.0	60 months	26.77	607.97	E	E5	...	2018-03-01	2018	2018-06-01	92 days	92 days	92.0	46.00000	60	1800 days	-0.999988
9	NaN	NaN	12000	12000	12000.0	60 months	18.45	307.67	D	D2	...	2018-03-01	2018	2018-06-01	92 days	92 days	92.0	36.00000	60	1800 days	-0.999994
10	NaN	NaN	5000	5000	5000.0	36 months	5.31	150.56	A	A1	...	2018-03-01	2018	2018-06-01	92 days	92 days	92.0	9.00000	36	1080 days	-0.999469
11	NaN	NaN	18000	18000	18000.0	60 months	13.58	414.92	C	C2	...	2018-03-01	2018	2018-06-01	92 days	92 days	92.0	46.00000	60	1800 days	-0.999996
12	NaN	NaN	21000	21000	21000.0	60 months	20.39	560.94	D	D4	...	2018-03-01	2018	2018-06-01	92 days	92 days	92.0	34.00000	60	1800 days	-0.999993
13	NaN	NaN	10000	10000	10000.0	36 months	10.90	326.92	B	B4	...	2018-03-01	2018	2018-06-01	92 days	92 days	92.0	38.00000	36	1080 days	-0.999982
14	NaN	NaN	13400	13400	13400.0	60 months	16.01	325.94	C	C5	...	2018-03-01	2018	2018-06-01	92 days	92 days	92.0	33.00000	60	1800 days	-0.999995
15	NaN	NaN	15000	15000	15000.0	60 months	17.47	376.59	D	D1	...	2018-03-01	2018	2018-06-01	92 days	92 days	92.0	34.00000	60	1800 days	-0.999994
16	NaN	NaN	25000	25000	25000.0	60 months	14.07	582.62	C	C3	...	2018-03-01	2018	2018-06-01	92 days	92 days	92.0	47.00000	60	1800 days	-0.999995
17	NaN	NaN	15000	15000	15000.0	60 months	14.07	349.57	C	C3	...	2018-03-01	2018	2018-06-01	92 days	92 days	92.0	41.00000	60	1800 days	-0.999995
18	NaN	NaN	2000	2000	2000.0	36 months	10.90	65.39	B	B4	...	2018-03-01	2018	2018-06-01	92 days	92 days	92.0	11.00000	36	1080 days	-0.999982
19	NaN	NaN	10000	10000	10000.0	36 months	7.96	313.18	A	A5	...	2018-03-01	2018	2018-06-01	92 days	92 days	92.0	48.00000	36	1080 days	-0.999984
20	NaN	NaN	7000	7000	7000.0	36 months	11.98	232.44	B	B5	...	2018-03-01	2018	2018-06-01	92 days	92 days	92.0	36.00000	36	1080 days	-0.999978
21	NaN	NaN	8000	8000	8000.0	36 months	6.71	245.96	A	A3	...	2018-03-01	2018	2018-06-01	92 days	92 days	92.0	25.00000	36	1080 days	-0.999985
22	NaN	NaN	5000	5000	5000.0	36 months	15.04	173.43	C	C4	...	2018-03-01	2018	2018-06-01	92 days	92 days	92.0	87.00000	36	1080 days	-0.999977
23	NaN	NaN	35000	35000	35000.0	60 months	24.84	1024.02	E	E3	...	2018-03-01	2018	2018-06-01	92 days	92 days	92.0	16.00000	60	1800 days	-0.999988
24	NaN	NaN	3000	3000	3000.0	36 months	11.98	99.62	B	B5	...	2018-03-01	2018	2018-06-01	92 days	92 days	92.0	51.21452	36	1080 days	-0.999981
25	NaN	NaN	20300	20300	20300.0	60 months	17.47	509.66	D	D1	...	2018-03-01	2018	2018-06-01	92 days	92 days	92.0	70.00000	60	1800 days	-0.999994
26	NaN	NaN	16000	16000	16000.0	36 months	6.07	487.26	A	A2	...	2018-03-01	2018	2018-06-01	92 days	92 days	92.0	29.00000	36	1080 days	-0.999986
27	NaN	NaN	4600	4600	4600.0	36 months	9.92	148.26	B	B2	...	2018-03-01	2018	2018-06-01	92 days	92 days	92.0	29.00000	36	1080 days	-0.999982
28	NaN	NaN	12000	12000	12000.0	60 months	16.01	291.89	C	C5	...	2018-03-01	2018	2018-06-01	92 days	92 days	92.0	50.00000	60	1800 days	-0.999995
29	NaN	NaN	18000	18000	18000.0	36 months	9.43	576.01	B	B1	...	2018-03-01	2018	2018-06-01	92 days	92 days	92.0	55.00000	36	1080 days	-0.999983

	loan_amnt	term_months	int_rate	credit_score	emp_length_years	home_ownership	annual_inc	issue_date	verification_status	dti	credit_history_years	inq_last_6mths	mths_since_last_delinq	mths_since_last_record	open_acc	pub_rec	revol_bal	revol_util_perc	total_acc
37	3000	36	7.34	0.6	9.0	RENT	52000.0	2018-03-01	Source Verified	0.58	20.580424	0.0	26.0	0.0	7.0	0.0	141	0.0	30.0
112	1000	36	17.47	3.0	2.0	RENT	51000.0	2018-03-01	Verified	10.56	10.417522	1.0	61.0	103.0	5.0	1.0	2	0.0	9.0
216	24000	36	7.96	0.8	10.0	RENT	109000.0	2018-03-01	Source Verified	39.25	29.913758	0.0	0.0	91.0	26.0	1.0	6041	18.0	45.0
342	24000	60	17.47	3.0	10.0	RENT	30000.0	2018-03-01	Not Verified	9.85	13.084189	0.0	60.0	0.0	11.0	0.0	877	7.0	19.0
400	16000	60	11.98	1.8	1.0	OWN	100000.0	2018-03-01	Not Verified	18.79	11.252567	1.0	0.0	97.0	15.0	1.0	12900	22.0	24.0