In [27]:
import numpy as np
from collections import Counter, defaultdict

# A function to calculate the probability of the occurrence of the value in the array
# Input: input_array - An array of values
# Output: A dictionary with pairs: Key (array value): Value (# occurrence)
# Example:
# --------
# input_array = [ 3, 2, 1, 1, 5, 4, 6 ]
# Output: {3: 1/7=0.14285714285714285, 2: 1/7=0.14285714285714285, 1: 2/7=0.2857142857142857, 
#          5: 1/7=0.14285714285714285, 4: 1/7=0.14285714285714285, 6: 1/7=0.14285714285714285}

def occurrences(input_array):
    no_of_examples = len(input_array) # The number of examples in the input_array
    prob = dict(Counter(input_array)) # Construct a dictionary with pairs 
                                      # 'unique item in input_array':'number of that item in the input_array'
    for key in prob.keys():           # For each key (the unique item in the input_array) in the dictionary
        prob[key] = prob[key] / float(no_of_examples) # Change the pair value from #occurence to probability
    return prob

# Perform Naive Bayes calculation
# Input: training   - Input values in training data
#        outcome    - The corresponding outcomes
#        new_sample - The new data to check
# Output: A dictionary with pairs: Key (class): Value (Probability)
# Example: 
# --------
# training = [ [0, 1, 2], [1, 2, 3] ]
# outcome = [0, 1]
# new_sample = [1, 2, 3]
# Output: {0: 0.0, 1: 0.5}

def naive_bayes(training, outcome, new_sample):
    classes = np.unique(outcome)     # Form an array of unique labels in outcome, i.e., [0 1] for this example
    rows, cols = np.shape(training)  # Check the shape of training array, i.e., 14 4 for this example
    likelihoods = {}                 # Construct an empty dictionary

    for c in classes: # For each class, i.e., 0 and 1
        likelihoods[c] = defaultdict(list)     # Add a pair to the dictionary, where key is class, value is another
                                               # dictionary with key and value (which is a list)

    class_probabilities = occurrences(outcome) # Calculate the probability of each outcome. For this example,
                                               # the probability of outcome is 0 and
                                               # the probability of outcome is 1
                                               # class_probabilities is a dictionary: key is outcome, value is the probability
    
    # Calculate the conditional probabilities, P(feature1|class), P(feature2|class), ...
    for c in classes: # For each class, i.e., 0 and 1
        row_indices = np.where(outcome == c)[0] # Get the row indices with class = c 
        training_c = training[row_indices, :]   # Retrieve the training data of the class = c using the row indices
        num_r, num_c = np.shape(training_c)     # Find the shape of training data,
                                                # num_r is the number of data
                                                # num_c is the number of features
                                                # For this example, 1st time: num_r = 9, num_c = 4, 2nd time: num_r = 5, num_c = 4

        for j in range(0,num_c): # For each feature j
            likelihoods[c][j] += list(training_c[:,j]) # Add the list of feature values corresponding to feature j of all training data belong to class c
 
    for c in classes: # For each class, i.e., 0 and 1
        for j in range(0,cols):
             # Right: likelihoods[c][j] gets back the list of feature values corrpresonding to feature j of all training data belong to class c
             # occurences(likelihoods[c][j]) returns a dictionary with key = feature j, value = list of probabilities corresponding to each feature value
             likelihoods[c][j] = occurrences(likelihoods[c][j])
 
    # Calculate the conditional probability, P(class|features)
    results = {} # Create an empty dictionary
    for c in classes:
         class_probability = class_probabilities[c]      # Calculate class probability P(c)
         for i in range(0,len(new_sample)):
             relative_values = likelihoods[c][i]         # Get P(feature|class)
             if new_sample[i] in relative_values.keys(): # Check if the sample feature value is in the keys
                 class_probability *= relative_values[new_sample[i]]
             else:
                 class_probability *= 0
             results[c] = class_probability              # Add a pair, key is class, value is the probability
    return results


# Main script

training = np.array([[0, 0, 0, 1], [0, 0, 0, 0], [2, 0, 0, 1], [1, 1, 0, 1], [1, 2, 1, 1], [1, 2, 1, 0], [2, 2, 1, 0],
                     [0, 1, 0, 1], [0, 2, 1, 1], [1, 1, 1, 1], [0, 1, 1, 0], [2, 1, 0, 0], [2, 0, 1, 1], [1, 1, 0, 0]])
outcome = np.array([1, 1, 0, 0, 0, 1, 0, 1, 0, 0, 0, 0, 0, 1])

new_sample = np.array([0, 2, 0, 0])
results = naive_bayes(training, outcome, new_sample)
print(results)

{0: 0.005291005291005291, 1: 0.02057142857142857}
