Evaluate the Ad Budget Dataset of XYZ FirmEvaluate the Ad Bu

Evaluate the Ad Budget Dataset of XYZ Firm

Happy coding!

1: Import the dataset

#Import the required libraries
import pandas as pd

#Import the advertising dataset
dv_adv_data = pd.read_csv('C:\\Users\\kevin.zhang\\Lesson 8\\Advertising Budget and Sales.csv', index_col=0)

2: Analyze the dataset

#View the initial few records of the dataset
dv_adv_data.head()

.dataframe tbody tr th:only-of-type { vertical-align: middle; } .dataframe tbody tr th { vertical-align: top; } .dataframe thead th { text-align: right; }

	TV Ad Budget ($)	Radio Ad Budget ($)	Newspaper Ad Budget ($)	Sales ($)
1	230.1	37.8	69.2	22.1
2	44.5	39.3	45.1	10.4
3	17.2	45.9	69.3	9.3
4	151.5	41.3	58.5	18.5
5	180.8	10.8	58.4	12.9

#Check the total number of elements in the dataset
dv_adv_data.size

3: Find the features or media channels used by the firm

#Check the number of observations (rows) and attributes (columns) in the dataset
dv_adv_data.shape

(200, 4)

#View the names of each of the attributes
dv_adv_data.columns

Index(['TV Ad Budget ($)', 'Radio Ad Budget ($)', 'Newspaper Ad Budget ($)',       'Sales ($)'],
      dtype='object')

4: Create objects to train and test the model; find the sales figures for each channel

#Create a feature object from the columns
X_feature = dv_adv_data[['Newspaper Ad Budget ($)','Radio Ad Budget ($)','TV Ad Budget ($)']]
#X_feature = dv_adv_data.iloc[:, 0:3]

#View the feature object
X_feature.head()

.dataframe tbody tr th:only-of-type { vertical-align: middle; } .dataframe tbody tr th { vertical-align: top; } .dataframe thead th { text-align: right; }

	Newspaper Ad Budget ($)	Radio Ad Budget ($)	TV Ad Budget ($)
1	69.2	37.8	230.1
2	45.1	39.3	44.5
3	69.3	45.9	17.2
4	58.5	41.3	151.5
5	58.4	10.8	180.8

#Create a target object (Hint: use the sales column as it is the response of the dataset)
Y_target = dv_adv_data[['Sales ($)']]

#View the target object
Y_target.head()

.dataframe tbody tr th:only-of-type { vertical-align: middle; } .dataframe tbody tr th { vertical-align: top; } .dataframe thead th { text-align: right; }

	Sales ($)
1	22.1
2	10.4
3	9.3
4	18.5
5	12.9

#Verify if all the observations have been captured in the feature object
X_feature.shape

(200, 3)

#Verify if all the observations have been captured in the target object
Y_target.shape

(200, 1)

5: Split the original dataset into training and testing datasets for the model

#Split the dataset (by default, 75% is the training data and 25% is the testing data)
from sklearn.model_selection import train_test_split
x_train,x_test,y_train,y_test = train_test_split(X_feature,Y_target,random_state=1)

#Verify if the training and testing datasets are split correctly (Hint: use the shape() method)
print(x_train.shape)
print(y_train.shape)
print(x_test.shape)
print(y_test.shape)

(150, 3)
(150, 1)
(50, 3)
(50, 1)

6: Create a model to predict the sales outcome

#Create a linear regression model
from sklearn.linear_model import LinearRegression
linreg = LinearRegression()
linreg.fit(X_feature,Y_target)

LinearRegression()

#Print the intercept and coefficients 
print(linreg.intercept_)
print(linreg.coef_)

[2.93888937]
[[-0.00103749  0.18853002  0.04576465]]

#Predict the outcome for the testing dataset
y_pred = linreg.predict(x_test)
y_pred

array([[21.89805198],
       [16.37766467],
       [ 7.57483051],
       [17.64409385],
       [18.53852096],
       [23.76732149],
       [16.31492112],
       [13.18867186],
       [ 9.14402389],
       [17.1861428 ],
       [14.35827373],
       [ 9.95168206],
       [17.28512918],
       [16.92225511],
       [14.7598741 ],
       [15.56609348],
       [12.39914823],
       [17.00682618],
       [11.45348627],
       [18.13348698],
       [ 9.33981296],
       [12.82365674],
       [ 8.88787996],
       [10.48212385],
       [11.3460929 ],
       [14.94678206],
       [ 9.90868103],
       [19.41053803],
       [18.48695797],
       [17.05167344],
       [21.7226299 ],
       [14.22372138],
       [16.49530044],
       [12.1620464 ],
       [19.9793727 ],
       [15.33707782],
       [13.74435742],
       [ 9.90298206],
       [21.10891244],
       [ 7.60769238],
       [ 3.58725841],
       [ 7.10850249],
       [ 6.05162411],
       [18.43436638],
       [ 8.52771254],
       [14.16607293],
       [15.30509593],
       [20.44761039],
       [20.80301059],
       [19.45441306]])

7: Calculate the Mean Square Error (MSE)

#Import required libraries for calculating MSE (mean square error)
from sklearn import metrics
import numpy as np

#Calculate the MSE
print(np.sqrt(metrics.mean_squared_error(y_test,y_pred)))

1.3435804306352026

print('True', y_test.values[0:10])
print()
print('Pred', y_pred[0:10])

True [[23.8]
 [16.6]
 [ 9.5]
 [14.8]
 [17.6]
 [25.5]
 [16.9]
 [12.9]
 [10.5]
 [17.1]]

Pred [[21.89805198]
 [16.37766467]
 [ 7.57483051]
 [17.64409385]
 [18.53852096]
 [23.76732149]
 [16.31492112]
 [13.18867186]
 [ 9.14402389]
 [17.1861428 ]]