AI Stock Predictor Project code in Python

AI Stock Predictor in Python

Visit pythonforbiginners.com to discover python tutorials

About the project: Building an AI Stock Predictor typically involves time series analysis using specialized deep learning models like Long Short-Term Memory (LSTM) networks, which are excellent at capturing temporal dependencies in sequential data like stock prices.

Since we cannot connect to live stock APIs (like Yahoo Finance) here, I'll provide a single, complete Python file. This script will:

Simulate Stock Data: It generates realistic, trended time-series data so the code runs immediately.
Preprocess: It scales the data and structures it into sequences (the critical step for LSTMs).
Build Model: It defines, trains, and tests an LSTM neural network using TensorFlow/Keras.

You will need the following libraries installed:


  pip install pandas numpy tensorflow scikit-learn

Project Level: Advance

Here is the complete stock_predictor.py file:


import numpy as np
import pandas as pd
import tensorflow as tf
from sklearn.preprocessing import MinMaxScaler
from sklearn.metrics import mean_squared_error
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import LSTM, Dense, Dropout
import math
import os

# --- Configuration ---
# Set the look-back window for the LSTM: how many previous days to use to predict the next day.
LOOK_BACK = 60 
# Percentage of data to use for training (80%)
TRAIN_SIZE_PERCENT = 0.8 
# Set to 'CPU' to avoid issues if GPU is not configured
os.environ['CUDA_VISIBLE_DEVICES'] = '-1' 

def load_or_simulate_data(data_path="simulated_stock_data.csv"):
    """
    Loads data from a CSV or simulates it if the file does not exist.
    In a real project, you would replace this with an API call (e.g., yfinance).
    """
    if os.path.exists(data_path):
        print(f"Loading data from {data_path}...")
        df = pd.read_csv(data_path, index_col='Date', parse_dates=True)
        # Ensure only the required 'Close' column is used
        data = df['Close'].values.reshape(-1, 1)
        return data

    print("Simulating synthetic stock data (1000 days)...")
    np.random.seed(42)
    
    # 1. Create a base linear trend
    time = np.arange(1000)
    base_trend = 0.5 * time
    
    # 2. Add seasonality (cycles)
    seasonality = 10 * np.sin(time / 30) + 5 * np.sin(time / 7)
    
    # 3. Add random noise (market volatility)
    noise = np.random.randn(1000) * 15
    
    # 4. Combine and scale to typical stock price range
    raw_prices = (base_trend + seasonality + noise) + 100
    
    # Convert to DataFrame
    df = pd.DataFrame({'Close': raw_prices}, index=pd.to_datetime(pd.date_range('2020-01-01', periods=1000)))
    df.index.name = 'Date'
    df.to_csv(data_path) # Save for future runs

    data = df['Close'].values.reshape(-1, 1)
    return data

def create_sequences(data, look_back=LOOK_BACK):
    """
    Creates input sequences (X) and target labels (Y) for the LSTM model.
    X = [Day t-LOOK_BACK, ..., Day t-1]
    Y = [Day t]
    """
    X, Y = [], []
    for i in range(len(data) - look_back):
        # The sequence is the previous 'look_back' days
        a = data[i:(i + look_back), 0]
        X.append(a)
        # The target is the next day
        Y.append(data[i + look_back, 0])
    return np.array(X), np.array(Y)

def build_lstm_model(input_shape):
    """
    Defines the architecture of the LSTM neural network.
    """
    model = Sequential()
    # Layer 1: LSTM with 50 units, returns sequences for the next LSTM layer
    model.add(LSTM(units=50, return_sequences=True, input_shape=input_shape))
    model.add(Dropout(0.2)) # Prevents overfitting
    
    # Layer 2: Another LSTM layer
    model.add(LSTM(units=50, return_sequences=False))
    model.add(Dropout(0.2))
    
    # Output Layer: Dense layer for a single price prediction
    model.add(Dense(units=1)) 
    
    # Compile the model
    model.compile(optimizer='adam', loss='mean_squared_error')
    
    return model

def run_stock_predictor():
    """Main function to execute the prediction workflow."""
    
    # 1. Load Data
    raw_data = load_or_simulate_data()
    
    # 2. Preprocessing and Scaling
    # LSTMs perform better when data is normalized (scaled between 0 and 1)
    scaler = MinMaxScaler(feature_range=(0, 1))
    scaled_data = scaler.fit_transform(raw_data)
    
    # 3. Split Data into Training and Testing Sets
    train_size = int(len(scaled_data) * TRAIN_SIZE_PERCENT)
    train_data = scaled_data[0:train_size, :]
    test_data = scaled_data[train_size - LOOK_BACK:, :] # Retain look_back days for sequence creation

    # Create sequences (X) and targets (Y)
    X_train, y_train = create_sequences(train_data, LOOK_BACK)
    X_test, y_test = create_sequences(test_data, LOOK_BACK)
    
    # Reshape input data for LSTM: [samples, time steps, features]
    X_train = np.reshape(X_train, (X_train.shape[0], X_train.shape[1], 1))
    X_test = np.reshape(X_test, (X_test.shape[0], X_test.shape[1], 1))
    
    print(f"\nTraining Samples (X_train): {X_train.shape}")
    print(f"Testing Samples (X_test): {X_test.shape}")
    
    # 4. Build and Train Model
    input_shape = (X_train.shape[1], 1)
    model = build_lstm_model(input_shape)
    
    print("\n--- Training LSTM Model (This may take a minute) ---")
    model.fit(
        X_train, 
        y_train, 
        epochs=10,        # Number of training passes
        batch_size=32,    # Number of samples per update
        verbose=1         # Show progress
    )
    print("--- Training Complete ---")

    # 5. Make Predictions
    test_predict = model.predict(X_test)
    
    # Invert scaling for predictions to get actual dollar values
    test_predict_unscaled = scaler.inverse_transform(test_predict)
    y_test_unscaled = scaler.inverse_transform(y_test.reshape(-1, 1))
    
    # 6. Evaluate Performance
    # RMSE measures the average magnitude of the errors.
    rmse = math.sqrt(mean_squared_error(y_test_unscaled, test_predict_unscaled))
    
    # 7. Predict Next Day
    # Use the last LOOK_BACK days of the *full* data set for the next day's prediction
    last_sequence = scaled_data[-LOOK_BACK:]
    last_sequence = np.reshape(last_sequence, (1, LOOK_BACK, 1))
    
    next_day_prediction_scaled = model.predict(last_sequence)
    next_day_prediction = scaler.inverse_transform(next_day_prediction_scaled)
    
    # 8. Output Results
    print("\n" + "="*50)
    print("      AI STOCK PREDICTION RESULTS")
    print("="*50)
    print(f"Model Look-Back Window: {LOOK_BACK} days")
    print(f"Test Data Root Mean Squared Error (RMSE): ${rmse:.2f}")
    print("\n--- Sample Prediction Summary ---")
    print(f"Actual Price of last test day: ${y_test_unscaled[-1][0]:.2f}")
    print(f"Predicted Price of last test day: ${test_predict_unscaled[-1][0]:.2f}")
    print(f"\nPREDICTION FOR THE NEXT TRADING DAY: ${next_day_prediction[0][0]:.2f}")
    print("="*50)

if __name__ == '__main__':
    # Ensure TensorFlow is working
    print(f"TensorFlow Version: {tf.__version__}")
    run_stock_predictor()

How to Use and Next Steps

Save the Code: Save the code above as stock_predictor.py.
Run: Execute the script: python stock_predictor.py.

The script will generate a file named simulated_stock_data.csv and use it to train the LSTM model.

Moving to Real-World Data

Install yfinance: pip install yfinance
Modify load_or_simulate_data: Replace the content of the load_or_simulate_data function with code that fetches real data.

Example modification for real data (DO NOT ADD THIS CODE, just reference):


    import yfinance as yf # Requires separate installation
# ... (inside load_or_simulate_data function)
ticker = "AAPL" # Apple Stock
df = yf.download(ticker, start="2018-01-01", end="2023-01-01")
data = df['Close'].values.reshape(-1, 1)
return data

By structuring the code this way, you can easily plug in real data without changing the core AI logic!

← Back to Projects