AI Stock Predictor in Python
About the project: Building an AI Stock Predictor typically involves time series analysis using specialized deep learning models like Long Short-Term Memory (LSTM) networks, which are excellent at capturing temporal dependencies in sequential data like stock prices.
Since we cannot connect to live stock APIs (like Yahoo Finance) here, I'll provide a single, complete Python file. This script will:
- Simulate Stock Data: It generates realistic, trended time-series data so the code runs immediately.
- Preprocess: It scales the data and structures it into sequences (the critical step for LSTMs).
- Build Model: It defines, trains, and tests an LSTM neural network using TensorFlow/Keras.
You will need the following libraries installed:
pip install pandas numpy tensorflow scikit-learn
Here is the complete stock_predictor.py file:
import numpy as np
import pandas as pd
import tensorflow as tf
from sklearn.preprocessing import MinMaxScaler
from sklearn.metrics import mean_squared_error
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import LSTM, Dense, Dropout
import math
import os
# --- Configuration ---
# Set the look-back window for the LSTM: how many previous days to use to predict the next day.
LOOK_BACK = 60
# Percentage of data to use for training (80%)
TRAIN_SIZE_PERCENT = 0.8
# Set to 'CPU' to avoid issues if GPU is not configured
os.environ['CUDA_VISIBLE_DEVICES'] = '-1'
def load_or_simulate_data(data_path="simulated_stock_data.csv"):
"""
Loads data from a CSV or simulates it if the file does not exist.
In a real project, you would replace this with an API call (e.g., yfinance).
"""
if os.path.exists(data_path):
print(f"Loading data from {data_path}...")
df = pd.read_csv(data_path, index_col='Date', parse_dates=True)
# Ensure only the required 'Close' column is used
data = df['Close'].values.reshape(-1, 1)
return data
print("Simulating synthetic stock data (1000 days)...")
np.random.seed(42)
# 1. Create a base linear trend
time = np.arange(1000)
base_trend = 0.5 * time
# 2. Add seasonality (cycles)
seasonality = 10 * np.sin(time / 30) + 5 * np.sin(time / 7)
# 3. Add random noise (market volatility)
noise = np.random.randn(1000) * 15
# 4. Combine and scale to typical stock price range
raw_prices = (base_trend + seasonality + noise) + 100
# Convert to DataFrame
df = pd.DataFrame({'Close': raw_prices}, index=pd.to_datetime(pd.date_range('2020-01-01', periods=1000)))
df.index.name = 'Date'
df.to_csv(data_path) # Save for future runs
data = df['Close'].values.reshape(-1, 1)
return data
def create_sequences(data, look_back=LOOK_BACK):
"""
Creates input sequences (X) and target labels (Y) for the LSTM model.
X = [Day t-LOOK_BACK, ..., Day t-1]
Y = [Day t]
"""
X, Y = [], []
for i in range(len(data) - look_back):
# The sequence is the previous 'look_back' days
a = data[i:(i + look_back), 0]
X.append(a)
# The target is the next day
Y.append(data[i + look_back, 0])
return np.array(X), np.array(Y)
def build_lstm_model(input_shape):
"""
Defines the architecture of the LSTM neural network.
"""
model = Sequential()
# Layer 1: LSTM with 50 units, returns sequences for the next LSTM layer
model.add(LSTM(units=50, return_sequences=True, input_shape=input_shape))
model.add(Dropout(0.2)) # Prevents overfitting
# Layer 2: Another LSTM layer
model.add(LSTM(units=50, return_sequences=False))
model.add(Dropout(0.2))
# Output Layer: Dense layer for a single price prediction
model.add(Dense(units=1))
# Compile the model
model.compile(optimizer='adam', loss='mean_squared_error')
return model
def run_stock_predictor():
"""Main function to execute the prediction workflow."""
# 1. Load Data
raw_data = load_or_simulate_data()
# 2. Preprocessing and Scaling
# LSTMs perform better when data is normalized (scaled between 0 and 1)
scaler = MinMaxScaler(feature_range=(0, 1))
scaled_data = scaler.fit_transform(raw_data)
# 3. Split Data into Training and Testing Sets
train_size = int(len(scaled_data) * TRAIN_SIZE_PERCENT)
train_data = scaled_data[0:train_size, :]
test_data = scaled_data[train_size - LOOK_BACK:, :] # Retain look_back days for sequence creation
# Create sequences (X) and targets (Y)
X_train, y_train = create_sequences(train_data, LOOK_BACK)
X_test, y_test = create_sequences(test_data, LOOK_BACK)
# Reshape input data for LSTM: [samples, time steps, features]
X_train = np.reshape(X_train, (X_train.shape[0], X_train.shape[1], 1))
X_test = np.reshape(X_test, (X_test.shape[0], X_test.shape[1], 1))
print(f"\nTraining Samples (X_train): {X_train.shape}")
print(f"Testing Samples (X_test): {X_test.shape}")
# 4. Build and Train Model
input_shape = (X_train.shape[1], 1)
model = build_lstm_model(input_shape)
print("\n--- Training LSTM Model (This may take a minute) ---")
model.fit(
X_train,
y_train,
epochs=10, # Number of training passes
batch_size=32, # Number of samples per update
verbose=1 # Show progress
)
print("--- Training Complete ---")
# 5. Make Predictions
test_predict = model.predict(X_test)
# Invert scaling for predictions to get actual dollar values
test_predict_unscaled = scaler.inverse_transform(test_predict)
y_test_unscaled = scaler.inverse_transform(y_test.reshape(-1, 1))
# 6. Evaluate Performance
# RMSE measures the average magnitude of the errors.
rmse = math.sqrt(mean_squared_error(y_test_unscaled, test_predict_unscaled))
# 7. Predict Next Day
# Use the last LOOK_BACK days of the *full* data set for the next day's prediction
last_sequence = scaled_data[-LOOK_BACK:]
last_sequence = np.reshape(last_sequence, (1, LOOK_BACK, 1))
next_day_prediction_scaled = model.predict(last_sequence)
next_day_prediction = scaler.inverse_transform(next_day_prediction_scaled)
# 8. Output Results
print("\n" + "="*50)
print(" AI STOCK PREDICTION RESULTS")
print("="*50)
print(f"Model Look-Back Window: {LOOK_BACK} days")
print(f"Test Data Root Mean Squared Error (RMSE): ${rmse:.2f}")
print("\n--- Sample Prediction Summary ---")
print(f"Actual Price of last test day: ${y_test_unscaled[-1][0]:.2f}")
print(f"Predicted Price of last test day: ${test_predict_unscaled[-1][0]:.2f}")
print(f"\nPREDICTION FOR THE NEXT TRADING DAY: ${next_day_prediction[0][0]:.2f}")
print("="*50)
if __name__ == '__main__':
# Ensure TensorFlow is working
print(f"TensorFlow Version: {tf.__version__}")
run_stock_predictor()
How to Use and Next Steps
- Save the Code: Save the code above as stock_predictor.py.
- Run: Execute the script: python stock_predictor.py.
The script will generate a file named simulated_stock_data.csv and use it to train the LSTM model.
Moving to Real-World Data
-
To use this with real stock prices:
- Install yfinance: pip install yfinance
- Modify load_or_simulate_data: Replace the content of the load_or_simulate_data function with code that fetches real data.
Example modification for real data (DO NOT ADD THIS CODE, just reference):
import yfinance as yf # Requires separate installation # ... (inside load_or_simulate_data function) ticker = "AAPL" # Apple Stock df = yf.download(ticker, start="2018-01-01", end="2023-01-01") data = df['Close'].values.reshape(-1, 1) return data By structuring the code this way, you can easily plug in real data without changing the core AI logic!
← Back to Projects
