#suppress warnings for a clean notebook just to moderate error messages
import warnings
warnings.filterwarnings('ignore')


import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from sklearn.preprocessing import MinMaxScaler
import tensorflow as tf
from tensorflow import keras
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_squared_error, mean_absolute_error, r2_score


# Loading historical price data 
# I had to clean the date column because the entries were wrongly set using microsoft excel
df = pd.read_csv(r'C:\Users\PC\Desktop\KIbe\sem 2\Unstructured data analytics & apps\jupkibe\data\cpi.csv')
df['Date'] = pd.to_datetime(df['Date']) 
df.set_index('Date', inplace=True)
data = df['Kenya CPI'].values.reshape(-1, 1)


#Data exploration
df.head()
# results with weighted averages


#Data exploration

# Visualize the data
#plt.figure(figsize=(12, 6))
plt.plot(df)
plt.title('Historical Price Index')
plt.xlabel('Year')
plt.ylabel('Kenya Consumer Price Index')
plt.show()


correlation=df.corr()
correlation['Kenya CPI'].sort_values(ascending=True)

Upper Income          0.671732
Lower  Income         0.780329
Middle Income         0.811479
Nairobi Combined      0.917577
CPI of Urban Areas    0.974351
Kenya CPI             1.000000
Name: Kenya CPI, dtype: float64


#Data exploration

# Extract relevant columns of interest
#independent
df = df[['Kenya CPI']]
#depedent if and only if
#df = df[['Date']]
df.head()


#look for missing values in our select column of interest
df.isna().sum()
#the first instance, it found nan values but after cleaning the data in excel the algorithmn run zero nan values

Kenya CPI    0
dtype: int64


plt.plot(data)
plt.title('Historical Price Index')
plt.xlabel('Year')
plt.ylabel('Kenya Consumer Price Index')
plt.show()


# Normalize the data using Min-Max scaling
# Min-Max scaling is a technique for normalizing the data, 
# ........which means transforming it in a way that the values fall within a specific range
# Sacaling ensures the data has a similar scale across features and that it falls within a range that is 
# .......... suitable for the neural network to learn effectively.
scaler = MinMaxScaler()
data = scaler.fit_transform(data) # data defined above


# standard basic RNN procedures

# EG Create sequences for training
# Sequences are essential for training RNNs because they can capture temporal dependencies in the data
sequence_length = 10  # You can adjust this value
X, y = [], []

# for loop ensures that you don't go out of bounds when creating sequences. 
for i in range(len(data) - sequence_length):
    X.append(data[i:i+sequence_length])
    y.append(data[i+sequence_length])

X = np.array(X)
y = np.array(y)


# Split the data into training and testing sets
# Recognize we are using 80% for training and 20 for testing
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, shuffle=False)


# Build the Long Short-Term Memory (LSTM) model
# example of a simple RNN model using the Long Short-Term Memory (LSTM) architecture
model = keras.Sequential([
    keras.layers.LSTM(50, activation='relu', input_shape=(sequence_length, 1)),
    keras.layers.Dense(1)
])
#We can also try SGD or RMSprop in place of adam or vise versa. 
#Basically these concepts adapts the learning rate during training to improve convergence
model.compile(optimizer='adam', loss='mean_squared_error')


# Train the model
# epoch def: This parameter specifies the number of times the model will iterate through your entire dataset. 
#therefore an epoch represents one complete pass through the entire training dataset
model.fit(X_train, y_train, epochs=50, batch_size=30)

#if adam was used loss started at 0.0042 - 0.3880- 0.450 
#if sgd was used insted of adam - loss started at 0.3907 - 0.4032
# if RMSprop was used - loss started at 0.4250

Epoch 1/50
1/1 [==============================] - 2s 2s/step - loss: 0.5639
Epoch 2/50
1/1 [==============================] - 0s 16ms/step - loss: 0.5379
Epoch 3/50
1/1 [==============================] - 0s 22ms/step - loss: 0.5128
Epoch 4/50
1/1 [==============================] - 0s 21ms/step - loss: 0.4888
Epoch 5/50
1/1 [==============================] - 0s 21ms/step - loss: 0.4657
Epoch 6/50
1/1 [==============================] - 0s 31ms/step - loss: 0.4434
Epoch 7/50
1/1 [==============================] - 0s 25ms/step - loss: 0.4220
Epoch 8/50
1/1 [==============================] - 0s 21ms/step - loss: 0.4014
Epoch 9/50
1/1 [==============================] - 0s 23ms/step - loss: 0.3815
Epoch 10/50
1/1 [==============================] - 0s 19ms/step - loss: 0.3624
Epoch 11/50
1/1 [==============================] - 0s 23ms/step - loss: 0.3440
Epoch 12/50
1/1 [==============================] - 0s 18ms/step - loss: 0.3265
Epoch 13/50
1/1 [==============================] - 0s 24ms/step - loss: 0.3099
Epoch 14/50
1/1 [==============================] - 0s 20ms/step - loss: 0.2941
Epoch 15/50
1/1 [==============================] - 0s 17ms/step - loss: 0.2787
Epoch 16/50
1/1 [==============================] - 0s 19ms/step - loss: 0.2637
Epoch 17/50
1/1 [==============================] - 0s 18ms/step - loss: 0.2492
Epoch 18/50
1/1 [==============================] - 0s 17ms/step - loss: 0.2352
Epoch 19/50
1/1 [==============================] - 0s 18ms/step - loss: 0.2215
Epoch 20/50
1/1 [==============================] - 0s 18ms/step - loss: 0.2082
Epoch 21/50
1/1 [==============================] - 0s 18ms/step - loss: 0.1952
Epoch 22/50
1/1 [==============================] - 0s 17ms/step - loss: 0.1824
Epoch 23/50
1/1 [==============================] - 0s 17ms/step - loss: 0.1700
Epoch 24/50
1/1 [==============================] - 0s 22ms/step - loss: 0.1578
Epoch 25/50
1/1 [==============================] - 0s 18ms/step - loss: 0.1456
Epoch 26/50
1/1 [==============================] - 0s 24ms/step - loss: 0.1334
Epoch 27/50
1/1 [==============================] - 0s 18ms/step - loss: 0.1213
Epoch 28/50
1/1 [==============================] - 0s 19ms/step - loss: 0.1092
Epoch 29/50
1/1 [==============================] - 0s 23ms/step - loss: 0.0971
Epoch 30/50
1/1 [==============================] - 0s 24ms/step - loss: 0.0852
Epoch 31/50
1/1 [==============================] - 0s 18ms/step - loss: 0.0735
Epoch 32/50
1/1 [==============================] - 0s 18ms/step - loss: 0.0620
Epoch 33/50
1/1 [==============================] - 0s 19ms/step - loss: 0.0510
Epoch 34/50
1/1 [==============================] - 0s 17ms/step - loss: 0.0406
Epoch 35/50
1/1 [==============================] - 0s 22ms/step - loss: 0.0309
Epoch 36/50
1/1 [==============================] - 0s 22ms/step - loss: 0.0223
Epoch 37/50
1/1 [==============================] - 0s 21ms/step - loss: 0.0149
Epoch 38/50
1/1 [==============================] - 0s 17ms/step - loss: 0.0092
Epoch 39/50
1/1 [==============================] - 0s 18ms/step - loss: 0.0055
Epoch 40/50
1/1 [==============================] - 0s 17ms/step - loss: 0.0040
Epoch 41/50
1/1 [==============================] - 0s 18ms/step - loss: 0.0048
Epoch 42/50
1/1 [==============================] - 0s 18ms/step - loss: 0.0075
Epoch 43/50
1/1 [==============================] - 0s 19ms/step - loss: 0.0114
Epoch 44/50
1/1 [==============================] - 0s 17ms/step - loss: 0.0151
Epoch 45/50
1/1 [==============================] - 0s 21ms/step - loss: 0.0176
Epoch 46/50
1/1 [==============================] - 0s 17ms/step - loss: 0.0182
Epoch 47/50
1/1 [==============================] - 0s 20ms/step - loss: 0.0172
Epoch 48/50
1/1 [==============================] - 0s 19ms/step - loss: 0.0150
Epoch 49/50
1/1 [==============================] - 0s 18ms/step - loss: 0.0122
Epoch 50/50
1/1 [==============================] - 0s 18ms/step - loss: 0.0094

<keras.src.callbacks.History at 0x192024b6310>


print("Actual values in the array:")
print(X_test)
#print("Actual value for the first sample:", X_test[0])

Actual values in the array:
[[[0.71279762]
  [0.58333333]
  [0.45535714]
  [0.49925595]
  [0.64508929]
  [0.5297619 ]
  [0.50446429]
  [0.59821429]
  [0.64806548]
  [0.7671131 ]]]


# Make predictions on the test data
predictions = model.predict(X_test)
# We now need to Inverse transform the predictions to our original scale, 
#.....because we used scaler as a standard procedure for RNNS
# ... because now we want to interpret the predictions in their original context.
predictions = scaler.inverse_transform(predictions)  # Inverse scaling
predictions

pred_y = model.predict(X_test)
pred_y #both give same predictions

1/1 [==============================] - 0s 264ms/step
1/1 [==============================] - 0s 32ms/step

array([[0.752218]], dtype=float32)


#I took an approximate average of the actual target values for the test sample
#actual_value = 1.0  
actual_value = 0.6
# The model's prediction as above
prediction = 0.71092254  
# Calculate metrics
mse = mean_squared_error([actual_value], [prediction])
rmse = np.sqrt(mse)
mae = mean_absolute_error([actual_value], [prediction])
r2 = r2_score([actual_value], [prediction])

print(f'Mean Squared Error (MSE): {mse}') #Lower values indicate better performance.
print(f'Root Mean Squared Error (RMSE): {rmse}') #average magnitude of the error, the lower the better
print(f'Mean Absolute Error (MAE): {mae}') 
print(f'R-squared (R²): {r2}')

Mean Squared Error (MSE): 0.012303809880051603
Root Mean Squared Error (RMSE): 0.11092254000000001
Mean Absolute Error (MAE): 0.11092254000000001
R-squared (R²): nan


# Evaluate the model
loss = model.evaluate(X_test, y_test)
#loss = model.evaluate(X_test, y_test)
print(f'Test Loss: {loss}')

1/1 [==============================] - 0s 322ms/step - loss: 0.0614
Test Loss: 0.06139591708779335


#import matplotlib.pyplot as plt
# Visualize predictions
#plt.figure(figsize=(12, 6))
#plt.plot(df.index[-len(y_test):], y_test, label='Original Values')
#plt.plot(df.index[-len(y_test):], predictions, label='Predictions', linestyle='dashed')
#plt.xlabel('YEAR')
#plt.ylabel('Kenya CPI')
#plt.legend()
#plt.title('Price Index Prediction with LSTM')
#plt.show()


# number of future time periods to forecast defined here
future_periods = 30  # wse can always adjust this as needed

# Create a date range for the future time periods
future_dates = pd.date_range(start=df.index[-1], periods=future_periods + 1, closed='right')

# Initialize an array for forecasted values
forecasted_values = []

# Make predictions for future time periods
for i in range(future_periods):
    
    # Select the most recent sequence of data (this is defined on cell one for kenya cpi)
    recent_sequence = data[-sequence_length:]

    # Reshape the sequence to match the model's input shape
    recent_sequence = recent_sequence.reshape(1, sequence_length, 1)

    # Predict the next value
    next_value = model.predict(recent_sequence)

    # Append the predicted value to the dataset
    data = np.append(data, next_value, axis=0) #(data:this is defined on cell one for kenya cpi)

    # Append the predicted value to the list of forecasted values
    forecasted_values.append(next_value)

# Inverse transform the forecasted values to the original scale
#forecasted_values = scaler.inverse_transform(forecasted_values).flatten()
predictions = scaler.inverse_transform(predictions)

# Create a DataFrame for the forecasted values
forecasted_df = pd.DataFrame({'Date': future_dates[0:], 'Forecasted_CPI': forecasted_values})

# Concatenate the original data with the forecasted values
complete_df = pd.concat([df, forecasted_df], axis=0)
# Visualize the original data and the forecasted values
plt.figure(figsize=(12, 6))
plt.plot(complete_df['Date'], complete_df['Kenya CPI'], label='Original Data')
plt.plot(forecasted_df['Date'], forecasted_df['Forecasted_CPI'], label='Forecasted Values', linestyle='dashed')
plt.title('Forecasting Future Kenya CPI with LSTM')
plt.xlabel('Year')
plt.ylabel('Kenya Consumer Price Index')
plt.legend()
plt.show()

1/1 [==============================] - 0s 33ms/step
1/1 [==============================] - 0s 28ms/step
1/1 [==============================] - 0s 32ms/step
1/1 [==============================] - 0s 33ms/step
1/1 [==============================] - 0s 31ms/step
1/1 [==============================] - 0s 32ms/step
1/1 [==============================] - 0s 32ms/step
1/1 [==============================] - 0s 33ms/step
1/1 [==============================] - 0s 35ms/step
1/1 [==============================] - 0s 34ms/step
1/1 [==============================] - 0s 51ms/step
1/1 [==============================] - 0s 53ms/step
1/1 [==============================] - 0s 35ms/step
1/1 [==============================] - 0s 32ms/step
1/1 [==============================] - 0s 47ms/step
1/1 [==============================] - 0s 37ms/step
1/1 [==============================] - 0s 38ms/step
1/1 [==============================] - 0s 31ms/step
1/1 [==============================] - 0s 31ms/step
1/1 [==============================] - 0s 28ms/step
1/1 [==============================] - 0s 29ms/step
1/1 [==============================] - 0s 32ms/step
1/1 [==============================] - 0s 36ms/step
1/1 [==============================] - 0s 31ms/step
1/1 [==============================] - 0s 30ms/step
1/1 [==============================] - 0s 32ms/step
1/1 [==============================] - 0s 31ms/step
1/1 [==============================] - 0s 31ms/step
1/1 [==============================] - 0s 31ms/step
1/1 [==============================] - 0s 31ms/step

	Lower Income	Middle Income	Upper Income	Nairobi Combined	CPI of Urban Areas	Kenya CPI
Date
2018-01-01	191.45	159.79	156.07	182.48	187.57	185.47
2018-02-01	194.37	160.71	157.31	184.85	190.20	188.00
2018-03-01	198.16	161.43	157.56	187.76	192.62	190.62
2018-04-01	200.32	161.79	157.57	189.41	195.82	193.18
2018-05-01	202.10	163.00	157.92	191.00	197.89	195.05

	Kenya CPI
Date
2018-01-01	185.47
2018-02-01	188.00
2018-03-01	190.62
2018-04-01	193.18
2018-05-01	195.05