Basics of deep learning and neural networks
Otimizing a neural network with backward propagation
Building deep learning models with keras
Fine-tuning keras models
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
input_data = np.array([3, 5])
weights = {'node_0': np.array([2, 4]), 'node_1': np.array([ 4, -5]), 'output': np.array([2, 7])}
# Calculate node 0 value: node_0_value
node_0_value = (input_data * weights['node_0']).sum()
# Calculate node 1 value: node_1_value
node_1_value = (input_data * weights['node_1']).sum()
# Put node values into array: hidden_layer_outputs
hidden_layer_outputs = np.array([node_0_value, node_1_value])
# Calculate output: output
output = (hidden_layer_outputs * weights['output']).sum()
# Print output
print(output)
tanh()
was used as the standard activation functionmax(0, input)
def relu(input):
'''Define your relu activation function here'''
# Calculate the value for the output of the relu function: output
output = max(0, input)
# Return the value just calculated
return(output)
# Calculate node 0 value: node_0_output
node_0_input = (input_data * weights['node_0']).sum()
node_0_output = relu(node_0_input)
# Calculate node 1 value: node_1_output
node_1_input = (input_data * weights['node_1']).sum()
node_1_output = relu(node_1_input)
# Put node values into array: hidden_layer_outputs
hidden_layer_outputs = np.array([node_0_output, node_1_output])
# Calculate model output (do not apply relu)
model_output = (hidden_layer_outputs * weights['output']).sum()
# Print model output
print(model_output)
input_data = [np.array([3, 5]), np.array([ 1, -1]), np.array([0, 0]), np.array([8, 4])]
weights = {
'node_0': np.array([2, 4]),
'node_1': np.array([ 4, -5]),
'output': np.array([2, 7])}
# Define predict_with_network()
def predict_with_network(input_data_row, weights):
# Calculate node 0 value
node_0_input = (input_data_row * weights['node_0']).sum()
node_0_output = relu(node_0_input)
# Calculate node 1 value
node_1_input = (input_data_row * weights['node_1']).sum()
node_1_output = relu(node_1_input)
# Put node values into array: hidden_layer_outputs
hidden_layer_outputs = np.array([node_0_output, node_1_output])
# Calculate model output
input_to_final_layer = (hidden_layer_outputs * weights['output']).sum()
model_output = relu(input_to_final_layer)
# Return model output
return(model_output)
# Create empty list to store prediction results
results = []
for input_data_row in input_data:
# Append prediction to results
results.append(predict_with_network(input_data_row, weights))
# Print results
print(results)
input_data = np.array([3, 5])
weights = {
'node_0_0': np.array([2, 4]),
'node_0_1': np.array([ 4, -5]),
'node_1_0': np.array([-1, 2]),
'node_1_1': np.array([1, 2]),
'output': np.array([2, 7])}
def predict_with_network(input_data):
# Calculate node 0 in the first hidden layer
node_0_0_input = (input_data * weights['node_0_0']).sum()
node_0_0_output = relu(node_0_0_input)
# Calculate node 1 in the first hidden layer
node_0_1_input = (input_data * weights['node_0_1']).sum()
node_0_1_output = relu(node_0_1_input)
# Put node values into array: hidden_0_outputs
hidden_0_outputs = np.array([node_0_0_output, node_0_1_output])
# Calculate node 0 in the second hidden layer
node_1_0_input = (hidden_0_outputs * weights['node_1_0']).sum()
node_1_0_output = relu(node_1_0_input)
# Calculate node 1 in the second hidden layer
node_1_1_input = (hidden_0_outputs * weights['node_1_1']).sum()
node_1_1_output = relu(node_1_1_input)
# Put node values into array: hidden_1_outputs
hidden_1_outputs = np.array([node_1_0_output, node_1_1_output])
# Calculate model output: model_output
model_output = (hidden_1_outputs * weights['output']).sum()
# Return model_output
return(model_output)
output = predict_with_network(input_data)
print(output)
How are the weights that deterimine the features/interaction in Neural Networks created>
def predict_with_network(input_data, weights):
# Calculate node 0 in the first hidden layer
node_0_input = (input_data * weights['node_0']).sum()
node_0_output = relu(node_0_input)
# Calculate node 1 in the first hidden layer
node_1_input = (input_data * weights['node_1']).sum()
node_1_output = relu(node_1_input)
# Put node values into array: hidden_0_outputs
hidden_outputs = np.array([node_0_output, node_1_output])
# Calculate model output: model_output
model_output = (hidden_outputs * weights['output']).sum()
# Return model_output
return(model_output)
# The data point you will make a prediction for
input_data = np.array([0, 3])
# Sample weights
weights_0 = {'node_0': [2, 1],
'node_1': [1, 2],
'output': [1, 1]
}
# The actual target value, used to calculate the error
target_actual = 3
# Make prediction using original weights
model_output_0 = predict_with_network(input_data, weights_0)
# Calculate error: error_0
error_0 = model_output_0 - target_actual
# Create weights that cause the network to make perfect prediction (3): weights_1
weights_1 = {'node_0': [2, 1],
'node_1': [1, 2],
'output': [1, 0]
}
# Make prediction using new weights: model_output_1
model_output_1 = predict_with_network(input_data, weights_1)
# Calculate error: error_1
error_1 = model_output_1 - target_actual
# Print error_0 and error_1
print(error_0)
print(error_1)
input_data = [np.array([0, 3]), np.array([1, 2]), np.array([-1, -2]), np.array([4, 0])]
target_actuals = [1, 3, 5, 7]
weights_0 = {'node_0': [2, 1],
'node_1': [1, 2],
'output': [1, 1]}
weights_1 = {'node_0': [2, 1],
'node_1': [ 1. , 1.5],
'output': [ 1. , 1.5]}
from sklearn.metrics import mean_squared_error
# Create model_output_0
model_output_0 = []
# Create model_output_0
model_output_1 = []
# Loop over input_data
for row in input_data:
# Append prediction to model_output_0
model_output_0.append(predict_with_network(row, weights_0))
# Append prediction to model_output_1
model_output_1.append(predict_with_network(row, weights_1))
# Calculate the mean squared error for model_output_0: mse_0
mse_0 = mean_squared_error(target_actuals, model_output_0)
# Calculate the mean squared error for model_output_1: mse_1
mse_1 = mean_squared_error(target_actuals, model_output_1)
print(model_output_0)
print(model_output_1)
print('target actuals:')
print(target_actuals)
# Print mse_0 and mse_1
print("Mean squared error with weights_0: %f" %mse_0)
print("Mean squared error with weights_1: %f" %mse_1)
weights = np.array([0, 2, 1])
input_data = np.array([1, 2, 3])
target = 0
# Calculate the predictions: preds
preds = (weights * input_data).sum()
print("pred:", preds)
print("target:", target)
# Calculate the error: error
error = preds - target
# Calculate the slope: slope
slope = 2 * error * input_data
# Print the slope
print(slope)
# Set the learning rate: learning_rate
learning_rate = 0.01
# Update the weights: weights_updated
weights_updated = weights - (slope * learning_rate)
# Get updated predictions: preds_updated
preds_updated = (weights_updated * input_data).sum()
# Calculate updated error: error_updated
error_updated = preds_updated - target
# Print the original error
print(error)
# Print the updated error
print(error_updated)
def get_error(input_data, target, weights):
preds = (weights * input_data).sum()
error = preds - target
return(error)
def get_slope(input_data, target, weights):
error = get_error(input_data, target, weights)
slope = 2 * input_data * error
return(slope)
def get_mse(input_data, target, weights):
errors = get_error(input_data, target, weights)
mse = np.mean(errors**2)
return(mse)
n_updates = 20
mse_hist = []
# Iterate over the number of updates
for i in range(n_updates):
# Calculate the slope: slope
slope = get_slope(input_data, target, weights)
# Update the weights: weights
weights = weights - slope * 0.01
# Calculate mse with new weights: mse
mse = get_mse(input_data, target, weights)
# Append the mse to mse_hist
mse_hist.append(mse)
# Plot the mse history
plt.plot(mse_hist)
plt.xlabel('Iterations')
plt.ylabel('Mean Squared Error')
plt.show()
file = 'https://assets.datacamp.com/production/course_1975/datasets/hourly_wages.csv'
wages = pd.read_csv(file)
wages.head()
target = wages['wage_per_hour'].values
print(target.shape)
target[0:6]
predictors = wages.drop('wage_per_hour', axis = 1).values
print(predictors.shape)
predictors
# Import necessary modules
import keras
from keras.layers import Dense
from keras.models import Sequential
# Save the number of columns in predictors: n_cols
n_cols = predictors.shape[1]
# Set up the model: model
model = Sequential()
# Add the first layer
model.add(Dense(50, activation = 'relu', input_shape = (n_cols,)))
# Add the second layer
model.add(Dense(32, activation = 'relu'))
# Add the output layer
model.add(Dense(1))
# Compile the model
model.compile(optimizer = 'adam', loss = 'mean_squared_error')
# Verify that model contains information from compiling
print("Loss function: " + model.loss)
# Fit the model
model.fit(predictors, target)
Output layer has separate node for each possible outcome and uses 'softmax' activation
We will have a separate node in the output for each possible class
We will split or results classification into a boolean column for each outcome
We use the function to_categorical
to do the one hot encoding
to_matrix
file = 'https://assets.datacamp.com/production/course_1975/datasets/titanic_all_numeric.csv'
titanic = pd.read_csv(file)
titanic.head()
titanic.describe()
predictors = titanic.drop('survived', axis = 1).values
predictors
from keras.utils import to_categorical
# Convert the target to categorical: target
target = to_categorical(titanic.survived)
target
titanic.survived.head()
n_cols = predictors.shape[1]
# Import necessary modules
import keras
from keras.layers import Dense
from keras.models import Sequential
# Set up the model
model = Sequential()
# Add the first layer
model.add(Dense(32, activation = 'relu', input_shape = (n_cols,)))
# Add the output layer
model.add(Dense(2, activation = 'softmax'))
# Compile the model
model.compile(
optimizer = 'sgd',
loss = 'categorical_crossentropy',
metrics = ['accuracy']
)
# Fit the model
model.fit(predictors, target)
adam
optimizermodel = Sequential()
model.add(Dense(100, activation = 'relu', input_shape = (n_cols,)))
model.add(Dense(100, activation = 'relu'))
model.add(Dense(100, activation = 'relu'))
model.add(Dense(2, activation = 'softmax'))
model.compile(
optimizer = 'sgd',
loss = 'categorical_crossentropy',
metrics = ['accuracy']
)
model.fit(predictors, target)
model = Sequential()
model.add(Dense(1000, activation = 'relu', input_shape = (n_cols,)))
model.add(Dense(1000, activation = 'relu'))
model.add(Dense(1000, activation = 'relu'))
model.add(Dense(2, activation = 'softmax'))
model.compile(
optimizer = 'adam',
loss = 'categorical_crossentropy',
metrics = ['accuracy']
)
model.fit(predictors, target)
from keras.models import load_model
model.save('model_file.h5')
my_model = load_model('model_file.h5')
predictions = my_model.predict(predictors)
probability_true = predictions[:,1]
probability_true[0:10]
my_model.summary()
# Specify, compile, and fit the model
model = Sequential()
model.add(Dense(32, activation='relu', input_shape = (n_cols,)))
model.add(Dense(2, activation='softmax'))
model.compile(optimizer='sgd',
loss='categorical_crossentropy',
metrics=['accuracy'])
model.fit(predictors, target)
# Calculate predictions: predictions
predictions = model.predict(predictors)
# Calculate predicted probability of survival: predicted_prob_true
predicted_prob_true = predictions[:,1]
# print predicted_prob_true
print(predicted_prob_true[:20])
adam
help, but problems can still occurdef get_new_model(input_shape):
model = Sequential()
model.add(Dense(100, activation='relu', input_shape = input_shape))
model.add(Dense(100, activation='relu'))
model.add(Dense(2, activation='softmax'))
return(model)
# Import the SGD optimizer
from keras.optimizers import SGD
# Create list of learning rates: lr_to_test
lr_to_test = [.000001, 0.01, 1]
input_shape = (predictors.shape[1],)
# Loop over learning rates
for lr in lr_to_test:
print('\n\nTesting model with learning rate: %f\n'%lr )
# Build new model to test, unaffected by previous models
model = get_new_model(input_shape)
# Create SGD optimizer with specified learning rate: my_optimizer
my_optimizer = SGD(lr=lr)
# Compile the model
model.compile(
optimizer = my_optimizer,
loss = 'categorical_crossentropy')
# Fit the model
model.fit(predictors, target)
validation_split = 0.3
in the model.fit
functionmodel.fit
function as a callback. epochs = 20
argument# Save the number of columns in predictors: n_cols
n_cols = predictors.shape[1]
input_shape = (n_cols,)
# Specify the model
model = Sequential()
model.add(Dense(100, activation='relu', input_shape = input_shape))
model.add(Dense(100, activation='relu'))
model.add(Dense(2, activation='softmax'))
# Compile the model
model.compile(
optimizer = 'adam',
loss = 'categorical_crossentropy',
metrics = ['accuracy'])
# Fit the model
hist = model.fit(
predictors, target,
validation_split = 0.3)
# Import EarlyStopping
from keras.callbacks import EarlyStopping
# Save the number of columns in predictors: n_cols
n_cols = predictors.shape[1]
input_shape = (n_cols,)
# Specify the model
model = Sequential()
model.add(Dense(100, activation='relu', input_shape = input_shape))
model.add(Dense(100, activation='relu'))
model.add(Dense(2, activation='softmax'))
# Compile the model
model.compile(
optimizer = 'adam',
loss = 'categorical_crossentropy',
metrics = ['accuracy'])
# Define early_stopping_monitor
early_stopping_monitor = EarlyStopping(patience = 2)
# Fit the model
model.fit(
predictors, target,
epochs = 30,
validation_split = 0.3,
callbacks = [early_stopping_monitor])
val_loss
score hits .5130. The next 2 epochs are not as good but it does not stop there# Define early_stopping_monitor
early_stopping_monitor = EarlyStopping(patience=2)
# Create model 1
model_1 = Sequential()
model_1.add(Dense(10, activation='relu', input_shape = input_shape))
model_1.add(Dense(10, activation='relu'))
model_1.add(Dense(2, activation='softmax'))
# Compile model_2
model_1.compile(
optimizer = 'adam',
loss = 'categorical_crossentropy',
metrics = ['accuracy'])
# Create the new model: model_2
model_2 = Sequential()
model_2.add(Dense(100, activation='relu', input_shape = input_shape))
model_2.add(Dense(100, activation='relu'))
model_2.add(Dense(2, activation='softmax'))
# Compile model_2
model_2.compile(
optimizer = 'adam',
loss = 'categorical_crossentropy',
metrics = ['accuracy'])
# Fit model_1
model_1_training = model_1.fit(
predictors, target,
epochs=15,
validation_split=0.2,
callbacks=[early_stopping_monitor],
verbose=False)
# Fit model_2
model_2_training = model_2.fit(
predictors, target,
epochs=15,
validation_split=0.2,
callbacks=[early_stopping_monitor],
verbose=False)
# Create the plot
plt.plot(
model_1_training.history['val_loss'], 'r',
model_2_training.history['val_loss'], 'b')
plt.xlabel('Epochs')
plt.ylabel('Validation score')
plt.show()
# The input shape to use in the first hidden layer
input_shape = (n_cols,)
# Create model 1
# Create the new model: model_2
model_1 = Sequential()
model_1.add(Dense(50, activation='relu', input_shape = input_shape))
model_1.add(Dense(50, activation='relu'))
model_1.add(Dense(50, activation='relu'))
model_1.add(Dense(2, activation='softmax'))
# Compile model_1
model_1.compile(
optimizer = 'adam',
loss = 'categorical_crossentropy',
metrics = ['accuracy'])
# Create the new model: model_2
model_2 = Sequential()
model_2.add(Dense(50, activation='relu', input_shape = input_shape))
model_2.add(Dense(50, activation='relu'))
model_2.add(Dense(50, activation='relu'))
model_2.add(Dense(2, activation='softmax'))
# Compile model_2
model_2.compile(
optimizer = 'adam',
loss = 'categorical_crossentropy',
metrics = ['accuracy'])
# Fit model 1
model_1_training = model_1.fit(
predictors, target,
epochs=20,
validation_split=0.4,
callbacks=[early_stopping_monitor],
verbose=False)
# Fit model 2
model_2_training = model_2.fit(
predictors, target,
epochs=20,
validation_split=0.4,
callbacks=[early_stopping_monitor],
verbose=False)
# Create the plot
plt.plot(
model_1_training.history['val_loss'], 'r',
model_2_training.history['val_loss'], 'b')
plt.xlabel('Epochs')
plt.ylabel('Validation score')
plt.show()
Deep learning models perform better with more data, however, they also take longer to train, especially when they start becoming more complex.
If you have a computer with a CUDA compatible GPU, you can take advantage of it to improve computation time.
file = 'https://assets.datacamp.com/production/course_1975/datasets/mnist.csv'
digits = pd.read_csv(file)
print(digits.shape)
digits.head()
X = digits.drop('5', axis = 1).values
X
y = to_categorical(digits.iloc[:,0])
y
from keras.callbacks import EarlyStopping
n_cols = X.shape[1]
# Create the model: model
model = Sequential()
model.add(Dense(50, activation = 'relu', input_shape = (n_cols,)))
model.add(Dense(50, activation = 'relu'))
model.add(Dense(10, activation = 'softmax'))
# Compile the model
model.compile(
optimizer = 'adam',
loss = 'categorical_crossentropy',
metrics = ['accuracy'])
# Fit the model
model_training = model.fit(
X, y,
epochs = 20,
validation_split = 0.3,
callbacks = [EarlyStopping(patience = 3)],
verbose = False)
# Create the plot
plt.plot(model_training.history['val_loss'], 'b')
plt.xlabel('Epochs')
plt.ylabel('Validation score')
plt.show()
plt.plot(model_training.history['val_acc'], 'r')
plt.xlabel('Epochs')
plt.ylabel('Validation Accuracy')
plt.show()