Spaces:

AnyegaAlex
/

stock-prediction-analytics

Running

App Files Files Community

stock-prediction-analytics / app.py

AnyegaAlex

info parameter is still being passed to gr.File thus we removed it

599bf50 6 months ago

raw

history blame contribute delete

32.1 kB

	import os
	import logging
	import pandas as pd
	import numpy as np
	from transformers import pipeline
	from sklearn.ensemble import HistGradientBoostingClassifier
	from sklearn.model_selection import TimeSeriesSplit
	from sklearn.metrics import accuracy_score, f1_score
	from sklearn.base import clone
	import joblib
	import torch
	from tqdm import tqdm
	import warnings
	import gradio as gr
	import matplotlib.pyplot as plt
	import shap
	from datetime import datetime, timedelta
	import sys
	from wordcloud import WordCloud
	from typing import Optional, Tuple, Dict, List


	# Suppress warnings
	warnings.filterwarnings("ignore", category=UserWarning)

	# --------------------------
	# Configuration & Setup
	# --------------------------
	class Config:
	DATA_PATH = "data/ibm_cleaned.parquet"
	MODEL_PATH = "models/stock_prediction_model.joblib"
	CACHE_DIR = "./cache"
	LOGS_DIR = "./logs"
	PLOTS_DIR = "./plots"
	SENTIMENT_MODEL = "distilbert-base-uncased-finetuned-sst-2-english"
	SENTIMENT_BATCH_SIZE = 8
	SENTIMENT_MAX_LEN = 256
	CONFIDENCE_THRESHOLD = 0.3
	DATA_REFRESH_DAYS = 7
	MAX_HISTORY_ENTRIES = 50

	@classmethod
	def setup(cls):
	os.makedirs(cls.CACHE_DIR, exist_ok=True)
	os.makedirs(cls.LOGS_DIR, exist_ok=True)
	os.makedirs(cls.PLOTS_DIR, exist_ok=True)
	os.makedirs(os.path.dirname(cls.MODEL_PATH), exist_ok=True)

	Config.setup()

	# Logger config
	logging.basicConfig(
	filename=os.path.join(Config.LOGS_DIR, 'app.log'),
	level=logging.INFO,
	format='%(asctime)s - %(levelname)s - %(message)s',
	)
	logger = logging.getLogger(__name__)

	# --------------------------
	# Data loading and optimization
	# --------------------------
	def load_data(test_mode: bool = False, force_refresh: bool = False) -> pd.DataFrame:
	"""Load and preprocess the stock data with caching and validation."""
	try:
	# Check if data needs refresh
	if not force_refresh and os.path.exists(Config.DATA_PATH):
	file_age = datetime.now() - datetime.fromtimestamp(os.path.getmtime(Config.DATA_PATH))
	if file_age.days < Config.DATA_REFRESH_DAYS:
	logger.info("Using cached data")

	df = pd.read_parquet(Config.DATA_PATH)

	# Validate required columns
	required_cols = {'Open', 'High', 'Low', 'Close', 'Volume', 'Date', 'News'}
	missing_cols = required_cols - set(df.columns)
	if missing_cols:
	raise ValueError(f"Missing columns in data: {missing_cols}")

	# Optimize dtypes
	num_cols = ['Open', 'High', 'Low', 'Close']
	df[num_cols] = df[num_cols].astype('float32')

	if (df['Volume'] < 0).any():
	logger.warning("Negative values found in 'Volume'; downcasting as signed int.")
	df['Volume'] = pd.to_numeric(df['Volume'], downcast='integer')
	else:
	df['Volume'] = pd.to_numeric(df['Volume'], downcast='unsigned')

	if not pd.api.types.is_datetime64_any_dtype(df['Date']):
	df['Date'] = pd.to_datetime(df['Date'], errors='coerce')
	if df['Date'].isna().any():
	raise ValueError("Date column contains invalid dates after parsing.")

	# Data quality checks
	if df.isnull().sum().sum() > 0:
	logger.warning("Data contains null values - filling with forward fill")
	df = df.ffill()

	if test_mode:
	sample_size = min(5000, len(df))
	if len(df) < 5000:
	logger.warning(f"Data has only {len(df)} rows - using full dataset in test mode")
	return df.sample(sample_size, random_state=42).sort_values('Date').reset_index(drop=True)

	return df.sort_values('Date').reset_index(drop=True)

	except Exception as e:
	logger.error(f"Data loading failed: {e}")
	raise

	# --------------------------
	# Sentiment Analysis
	# --------------------------
	class SentimentAnalyzer:
	"""Handles sentiment analysis with caching and batching."""

	def __init__(self):
	self._initialize_pipeline()
	self.sentiment_cache = {}

	def _initialize_pipeline(self):
	"""Initialize the sentiment analysis pipeline."""
	try:
	self.pipeline = pipeline(
	"text-classification",
	model=Config.SENTIMENT_MODEL,
	device=0 if torch.cuda.is_available() else -1,
	batch_size=Config.SENTIMENT_BATCH_SIZE,
	truncation=True,
	max_length=Config.SENTIMENT_MAX_LEN,
	)
	except Exception as e:
	logger.error(f"Sentiment model load failed: {e}")
	self.pipeline = None
	raise

	def analyze_texts(self, texts: List[str]) -> np.ndarray:
	"""Analyze sentiment for a batch of texts with caching."""
	if self.pipeline is None or not texts:
	return np.zeros(len(texts), dtype='float32')

	# Check cache first
	uncached_texts = []
	uncached_indices = []
	cached_scores = np.zeros(len(texts), dtype='float32')

	for i, text in enumerate(texts):
	text = str(text)[:Config.SENTIMENT_MAX_LEN] if text else ""
	if text in self.sentiment_cache:
	cached_scores[i] = self.sentiment_cache[text]
	else:
	uncached_texts.append(text)
	uncached_indices.append(i)

	# Process uncached texts
	if uncached_texts:
	try:
	results = self.pipeline(
	uncached_texts,
	truncation=True,
	max_length=Config.SENTIMENT_MAX_LEN,
	batch_size=Config.SENTIMENT_BATCH_SIZE
	)

	for idx, res in zip(uncached_indices, results):
	label = res[0]['label']
	score = res[0]['score']
	sentiment_score = score if label == "POSITIVE" else -score
	cached_scores[idx] = sentiment_score
	self.sentiment_cache[uncached_texts[idx]] = sentiment_score

	except Exception as e:
	logger.error(f"Sentiment analysis error: {e}")

	return cached_scores

	# --------------------------
	# Feature Engineering
	# --------------------------
	def calculate_rsi(series: pd.Series, window: int = 14) -> pd.Series:
	"""Calculate Relative Strength Index."""
	try:
	series = pd.to_numeric(series, errors='coerce').ffill()
	delta = series.diff()
	gain = delta.clip(lower=0)
	loss = -delta.clip(upper=0)

	avg_gain = gain.rolling(window, min_periods=1).mean()
	avg_loss = loss.rolling(window, min_periods=1).mean().replace(0, 1e-10)

	rs = avg_gain / avg_loss
	return (100 - (100 / (1 + rs))).astype('float32')
	except Exception as e:
	logger.error(f"RSI calculation failed: {e}")
	return pd.Series(np.nan, index=series.index)

	def calculate_macd(
	series: pd.Series,
	fast: int = 12,
	slow: int = 26,
	signal: int = 9
	) -> Tuple[pd.Series, pd.Series, pd.Series]:
	"""Calculate MACD line, signal line, and histogram."""
	try:
	series = pd.to_numeric(series, errors='coerce').ffill()
	ema_fast = series.ewm(span=fast, adjust=False).mean()
	ema_slow = series.ewm(span=slow, adjust=False).mean()
	macd_line = ema_fast - ema_slow
	signal_line = macd_line.ewm(span=signal, adjust=False).mean()
	return (
	macd_line.astype('float32'),
	signal_line.astype('float32'),
	(macd_line - signal_line).astype('float32')
	)
	except Exception as e:
	logger.error(f"MACD calculation failed: {e}")
	return (
	pd.Series(np.nan, index=series.index),
	pd.Series(np.nan, index=series.index),
	pd.Series(np.nan, index=series.index)
	)

	def calculate_bollinger_bands(
	series: pd.Series,
	window: int = 20,
	no_of_std: int = 2
	) -> Tuple[pd.Series, pd.Series]:
	"""Calculate Bollinger Bands."""
	try:
	series = pd.to_numeric(series, errors='coerce').ffill()
	rolling_mean = series.rolling(window, min_periods=1).mean()
	rolling_std = series.rolling(window, min_periods=1).std()
	return (
	(rolling_mean + (no_of_std * rolling_std)).astype('float32'),
	(rolling_mean - (no_of_std * rolling_std)).astype('float32')
	)
	except Exception as e:
	logger.error(f"Bollinger Bands calculation failed: {e}")
	return (
	pd.Series(np.nan, index=series.index),
	pd.Series(np.nan, index=series.index)
	)

	def calculate_volatility(series: pd.Series, window: int = 10) -> pd.Series:
	"""Calculate price volatility."""
	try:
	series = pd.to_numeric(series, errors='coerce').ffill()
	return series.pct_change().rolling(window, min_periods=1).std().astype('float32')
	except Exception as e:
	logger.error(f"Volatility calculation failed: {e}")
	return pd.Series(np.nan, index=series.index)

	def create_features(df: pd.DataFrame, analyzer: SentimentAnalyzer) -> pd.DataFrame:
	"""Create all features for the prediction model."""
	try:
	df = df.copy()

	# Validate required columns
	required_cols = {'Close', 'News'}
	missing_cols = required_cols - set(df.columns)
	if missing_cols:
	raise ValueError(f"Missing columns for feature creation: {missing_cols}")

	# Price features
	df['Price_Change'] = df['Close'].pct_change().astype('float32')
	df['Log_Return'] = np.log1p(df['Price_Change'].clip(lower=-0.9999)).astype('float32')
	df['MA_5'] = df['Close'].rolling(5, min_periods=1).mean().astype('float32')
	df['MA_20'] = df['Close'].rolling(20, min_periods=1).mean().astype('float32')
	df['RSI'] = calculate_rsi(df['Close'])
	df['Volatility_10'] = calculate_volatility(df['Close'], 10)

	# MACD features
	macd_line, signal_line, hist = calculate_macd(df['Close'])
	df['MACD'] = macd_line
	df['MACD_Signal'] = signal_line
	df['MACD_Hist'] = hist

	# Bollinger Bands
	bb_upper, bb_lower = calculate_bollinger_bands(df['Close'])
	df['BB_Upper'] = bb_upper
	df['BB_Lower'] = bb_lower

	# Sentiment features
	if 'News' not in df or df['News'].isnull().all():
	logger.warning("No news data - setting Sentiment features to zero")
	df['Sentiment'] = 0.0
	else:
	df['Sentiment'] = analyzer.analyze_texts(df['News'].fillna('').tolist())

	df['Sentiment_MA'] = df['Sentiment'].rolling(5, min_periods=1).mean().astype('float32')
	df['Sentiment_Lag1'] = df['Sentiment'].shift(1).fillna(0).astype('float32')

	# Target
	df['Target'] = (df['Close'].shift(-1) > df['Close']).astype('int8')

	return df.dropna().reset_index(drop=True)

	except Exception as e:
	logger.error(f"Feature engineering error: {e}")
	raise

	# --------------------------
	# Model Training
	# --------------------------
	def train_model() -> HistGradientBoostingClassifier:
	"""Train and evaluate the prediction model."""
	try:
	logger.info("Starting model training...")
	df = load_data(test_mode=True)

	# Train-test split maintaining temporal order
	split_idx = int(0.8 * len(df))
	train_df = df.iloc[:split_idx].copy()
	test_df = df.iloc[split_idx:].copy()

	analyzer = SentimentAnalyzer()
	features = []

	# Process in chunks for memory efficiency
	chunk_size = 1000
	for i in tqdm(range(0, len(train_df), chunk_size), desc="Creating features"):
	chunk = train_df.iloc[i:i+chunk_size]
	features.append(create_features(chunk, analyzer))

	train_processed = pd.concat(features)

	# Define expected features
	expected_features = [
	'Sentiment', 'Sentiment_MA', 'Sentiment_Lag1', 'Price_Change',
	'Log_Return', 'MA_5', 'MA_20', 'RSI', 'Volatility_10',
	'MACD', 'MACD_Signal', 'MACD_Hist', 'BB_Upper', 'BB_Lower', 'Target'
	]

	# Validate features
	missing_cols = [c for c in expected_features if c not in train_processed.columns]
	if missing_cols:
	raise ValueError(f"Missing features in training data: {missing_cols}")
	# Ensure only numeric data is passed to model
	train_processed = train_processed[expected_features + ['Date']] # Keep 'Date' only for dropping

	X_train = train_processed.drop(columns=['Date', 'Target'])
	y_train = train_processed['Target']

	# Initialize model with good defaults
	base_model = HistGradientBoostingClassifier(
	max_iter=100,
	max_depth=5,
	learning_rate=0.05,
	random_state=42,
	verbose=1,
	early_stopping=True,
	validation_fraction=0.1
	)

	# Time-series cross-validation
	tscv = TimeSeriesSplit(n_splits=3)
	f1_scores = []

	for train_idx, val_idx in tscv.split(X_train):
	model = clone(base_model)
	model.fit(X_train.iloc[train_idx], y_train.iloc[train_idx])
	preds = model.predict(X_train.iloc[val_idx])
	f1_scores.append(f1_score(y_train.iloc[val_idx], preds))
	logger.info(f"Fold F1: {f1_scores[-1]:.3f}")

	logger.info(f"Validation F1: {np.mean(f1_scores):.3f} ± {np.std(f1_scores):.3f}")

	# Final training
	model = clone(base_model)
	model.fit(X_train, y_train)

	# Test evaluation
	test_processed = create_features(test_df, analyzer)
	X_test = test_processed[expected_features[:-1]]
	y_test = test_processed['Target']

	# Drop Date if present
	if 'Date' in X_test.columns:
	X_test = X_test.drop(columns=['Date'])

	y_pred = model.predict(X_test)
	acc = accuracy_score(y_test, y_pred)
	f1 = f1_score(y_test, y_pred)

	logger.info(f"Test Accuracy: {acc:.3f} \| F1: {f1:.3f}")

	# Save model
	joblib.dump(model, Config.MODEL_PATH)
	logger.info(f"Model saved to {Config.MODEL_PATH}")

	return model

	except Exception as e:
	logger.error(f"Model training failed: {e}")
	raise

	# --------------------------
	# Model Prediction
	# --------------------------
	def predict(df: pd.DataFrame, threshold: float = Config.CONFIDENCE_THRESHOLD) -> pd.DataFrame:
	"""Make predictions with confidence scores."""
	try:
	if model is None:
	raise ValueError("Model is not loaded")

	df_features = create_features(df, analyzer)

	required_features = [
	'Sentiment', 'Sentiment_MA', 'Sentiment_Lag1', 'Price_Change',
	'Log_Return', 'MA_5', 'MA_20', 'RSI', 'Volatility_10',
	'MACD', 'MACD_Signal', 'MACD_Hist', 'BB_Upper', 'BB_Lower'
	]

	X = df_features[required_features]
	proba = model.predict_proba(X)[:, 1] # Probability of 'up' class

	# Generate predictions with confidence threshold
	preds = np.where(
	proba > (0.5 + threshold), "Buy",
	np.where(proba < (0.5 - threshold), "Sell", "Hold")
	)

	df_features['Prediction'] = preds
	df_features['Confidence'] = np.abs(proba - 0.5) * 2 # Normalized to 0-1

	return df_features[['Date', 'Close', 'Prediction', 'Confidence', 'Sentiment']]

	except Exception as e:
	logger.error(f"Prediction error: {e}")
	return pd.DataFrame(columns=['Date', 'Close', 'Prediction', 'Confidence', 'Sentiment'])

	# --------------------------
	# Visualization Functions
	# --------------------------
	def generate_shap_plot(df: pd.DataFrame) -> Optional[str]:
	"""Generate SHAP explanation plot."""
	try:
	df = df.tail(50) # Use most recent 50 samples

	required_features = [
	'Sentiment', 'Sentiment_MA', 'Sentiment_Lag1', 'Price_Change',
	'Log_Return', 'MA_5', 'MA_20', 'RSI', 'Volatility_10',
	'MACD', 'MACD_Signal', 'MACD_Hist', 'BB_Upper', 'BB_Lower'
	]

	X = df[required_features]
	explainer = shap.TreeExplainer(model)
	shap_values = explainer.shap_values(X)

	plt.figure(figsize=(10, 6))
	shap.summary_plot(shap_values, X, show=False, plot_size=(10, 6))

	plot_path = os.path.join(Config.PLOTS_DIR, "shap_summary.png")
	plt.savefig(plot_path, bbox_inches='tight')
	plt.close()

	return plot_path

	except Exception as e:
	logger.error(f"SHAP plot generation failed: {e}")
	return None

	def plot_sentiment(df: pd.DataFrame) -> Optional[str]:
	"""Generate sentiment trend plot."""
	try:
	plt.figure(figsize=(10, 4))
	plt.plot(df['Date'], df['Sentiment'], label="Daily Sentiment")
	plt.plot(df['Date'], df['Sentiment_MA'], label="5-Day MA", alpha=0.7)
	plt.title("News Sentiment Over Time")
	plt.xlabel("Date")
	plt.ylabel("Sentiment Score")
	plt.legend()
	plt.grid(True)

	plot_path = os.path.join(Config.PLOTS_DIR, "sentiment_plot.png")
	plt.savefig(plot_path, bbox_inches='tight')
	plt.close()

	return plot_path

	except Exception as e:
	logger.error(f"Sentiment plot failed: {e}")
	return None

	def generate_wordcloud(texts: List[str]) -> Optional[str]:
	"""Generate word cloud from news texts."""
	try:
	if not texts or all(pd.isna(texts)):
	return None

	text = " ".join(str(t) for t in texts if t and not pd.isna(t))
	wc = WordCloud(width=600, height=400, background_color='white').generate(text)

	plot_path = os.path.join(Config.PLOTS_DIR, "wordcloud.png")
	wc.to_file(plot_path)

	return plot_path

	except Exception as e:
	logger.error(f"Word cloud generation failed: {e}")
	return None

	# --------------------------
	# UI Helper Functions
	# --------------------------
	# --------------------------
	# UI Functions Implementation
	# --------------------------
	def predict_single(text_input: str, threshold: float, history_state: list) -> Tuple[pd.DataFrame, list, dict]:
	"""Make a single prediction based on news input."""
	try:
	# Create input DataFrame
	today = datetime.today().strftime('%Y-%m-%d')
	data = {
	'Date': [today],
	'Open': [100.0],
	'High': [105.0],
	'Low': [95.0],
	'Close': [100.0],
	'Volume': [1000000],
	'News': [text_input]
	}
	df = pd.DataFrame(data)

	# Try to get real close price from historical data
	try:
	hist_data = load_data()
	if not hist_data.empty:
	df['Close'] = hist_data['Close'].iloc[-1]
	except Exception as e:
	logger.warning(f"Couldn't load historical data: {e}")

	# Get predictions
	preds = predict(df, threshold)

	if preds.empty:
	# Create error response
	error_row = {
	'Date': [today],
	'Close': [0],
	'Prediction': ["Error"],
	'Confidence': [0],
	'Sentiment': [0]
	}
	error_df = pd.DataFrame(error_row)
	return error_df, history_state, None

	# Update history
	new_history = history_state.copy() if history_state else []
	new_history.append(preds.iloc[0].to_dict())

	# Keep only last MAX_HISTORY_ENTRIES
	if len(new_history) > Config.MAX_HISTORY_ENTRIES:
	new_history.pop(0)

	return preds, new_history, preds.iloc[0].to_dict()

	except Exception as e:
	logger.error(f"Single prediction failed: {e}")
	today = datetime.today().strftime('%Y-%m-%d')
	error_df = pd.DataFrame({
	'Date': [today],
	'Close': [0],
	'Prediction': ["Error"],
	'Confidence': [0],
	'Sentiment': [0]
	})
	return error_df, history_state, None

	def batch_predict(csv_file: str, threshold: float) -> pd.DataFrame:
	"""Process batch prediction from uploaded CSV file."""
	try:
	if not csv_file:
	return pd.DataFrame()

	# Read and validate CSV
	df = pd.read_csv(csv_file)
	required_cols = {'Date', 'Open', 'High', 'Low', 'Close', 'Volume', 'News'}
	missing_cols = required_cols - set(df.columns)

	if missing_cols:
	logger.error(f"Missing columns in CSV: {missing_cols}")
	return pd.DataFrame(columns=["Date", "Close", "Prediction", "Confidence", "Sentiment"])

	# Make predictions
	preds = predict(df, threshold)
	return preds

	except Exception as e:
	logger.error(f"Batch prediction failed: {e}")
	return pd.DataFrame(columns=["Date", "Close", "Prediction", "Confidence", "Sentiment"])

	def explain_current() -> Optional[str]:
	"""Generate SHAP explanation plot using current data."""
	try:
	df = load_data().tail(100)
	df = create_features(df, analyzer)
	return generate_shap_plot(df)
	except Exception as e:
	logger.error(f"SHAP explanation failed: {e}")
	return None

	def show_sentiment_plot() -> Optional[str]:
	"""Generate sentiment visualization plot."""
	try:
	df = load_data().tail(100)
	df = create_features(df, analyzer)
	return plot_sentiment(df)
	except Exception as e:
	logger.error(f"Sentiment plot failed: {e}")
	return None

	def show_history_table(history_state: list) -> pd.DataFrame:
	"""Display prediction history as a DataFrame."""
	try:
	if not history_state:
	return pd.DataFrame(columns=['Date', 'Close', 'Prediction', 'Confidence', 'Sentiment'])
	return pd.DataFrame(history_state)
	except Exception as e:
	logger.error(f"History display failed: {e}")
	return pd.DataFrame(columns=['Date', 'Close', 'Prediction', 'Confidence', 'Sentiment'])

	def summarize_session(df: pd.DataFrame) -> str:
	"""Generate summary statistics for the session."""
	if df.empty:
	return "No predictions to summarize"

	return f"""
	✅ Total Predictions: {len(df)}
	📊 Avg Confidence: {df['Confidence'].mean():.2f}
	🎯 Predictions: {df['Prediction'].value_counts().to_dict()}
	😊 Avg Sentiment: {df['Sentiment'].mean():.2f}
	📅 Date Range: {df['Date'].min()} → {df['Date'].max()}"""

	def generate_downloadable(df: pd.DataFrame) -> str:
	"""Generate downloadable CSV file."""
	path = os.path.join(Config.CACHE_DIR, "predictions.csv")
	df.to_csv(path, index=False)
	return path

	def prediction_distribution(df: pd.DataFrame) -> Optional[str]:
	"""Generate prediction distribution plot."""
	try:
	if df.empty:
	return None

	fig, ax = plt.subplots(figsize=(8, 4))
	df['Prediction'].value_counts().plot(
	kind='bar',
	ax=ax,
	color=['green' if x == 'Buy' else 'red' if x == 'Sell' else 'gray' for x in df['Prediction'].unique()]
	)
	plt.title("Prediction Distribution")
	plt.ylabel("Count")

	plot_path = os.path.join(Config.PLOTS_DIR, "pred_dist.png")
	fig.savefig(plot_path, bbox_inches='tight')
	plt.close()

	return plot_path

	except Exception as e:
	logger.error(f"Prediction distribution plot failed: {e}")
	return None

	# --------------------------
	# Initialize Components
	# --------------------------
	model = None
	try:
	model = joblib.load(Config.MODEL_PATH)
	logger.info("Model loaded successfully")
	except Exception as e:
	logger.warning(f"Model loading failed: {e}")
	try:
	model = train_model()
	logger.info("New model trained successfully")
	except Exception as e:
	logger.error(f"Model training fallback failed: {e}")
	raise

	analyzer = SentimentAnalyzer()

	# --------------------------
	# Gradio UI
	# --------------------------
	with gr.Blocks(title="Stock Prediction", theme=gr.themes.Soft()) as demo:
	# State variables
	history_state = gr.State(value=[])
	last_prediction_state = gr.State(value=None)

	# Header
	gr.Markdown("""
	# 📈 Stock Price Prediction with News Sentiment Analysis
	Predict next-day stock movements using technical indicators and news sentiment
	""")

	with gr.Tab("🔍 Single Prediction"):
	with gr.Row():
	text_input = gr.Textbox(
	label="Latest News Headline/Text",
	placeholder="Enter news text about the company...",
	lines=3,
	max_lines=5
	)

	with gr.Row():
	threshold_slider = gr.Slider(
	minimum=0,
	maximum=0.5,
	value=Config.CONFIDENCE_THRESHOLD,
	step=0.01,
	label="Confidence Threshold",
	info="Higher values require more confidence for Buy/Sell decisions"
	)

	with gr.Row():
	predict_btn = gr.Button("Predict", variant="primary")
	last_pred_btn = gr.Button("Show Last Prediction")
	refresh_data_btn = gr.Button("Refresh Data")

	with gr.Row():
	with gr.Column():
	prediction_output = gr.Dataframe(
	headers=["Date", "Close", "Prediction", "Confidence", "Sentiment"],
	label="Prediction Results",
	interactive=False
	)

	with gr.Column():
	last_prediction_output = gr.Dataframe(
	headers=["Date", "Close", "Prediction", "Confidence", "Sentiment"],
	label="Last Prediction",
	interactive=False,
	)

	with gr.Tab("📂 Batch Prediction"):
	with gr.Row():
	batch_file = gr.File(
	label="Upload CSV File (CSV should contain: Date, Open, High, Low, Close, Volume, News)",
	file_types=[".csv"],
	type="filepath",
	)

	with gr.Row():
	batch_threshold = gr.Slider(
	label="Confidence Threshold",
	minimum=0,
	maximum=0.5,
	value=Config.CONFIDENCE_THRESHOLD,
	step=0.01
	)

	with gr.Row():
	batch_predict_btn = gr.Button("Run Batch Prediction", variant="primary")

	with gr.Row():
	batch_output = gr.Dataframe(
	headers=["Date", "Close", "Prediction", "Confidence", "Sentiment"],
	label="Batch Predictions",
	interactive=False
	)

	with gr.Row():
	batch_summary = gr.Markdown()

	with gr.Row():
	download_btn = gr.Button("Download Predictions")
	download_file = gr.File(label="Download", visible=False)

	with gr.Tab("📊 Explanations"):
	with gr.Row():
	explain_btn = gr.Button("Generate SHAP Explanation")
	sentiment_plot_btn = gr.Button("Show Sentiment Trend")

	with gr.Row():
	shap_image = gr.Image(label="SHAP Feature Importance", interactive=False)
	sentiment_image = gr.Image(label="Sentiment Over Time", interactive=False)

	with gr.Tab("📜 History"):
	with gr.Row():
	history_btn = gr.Button("Show Prediction History")
	clear_history_btn = gr.Button("Clear History", variant="stop")

	with gr.Row():
	history_output = gr.Dataframe(
	headers=["Date", "Close", "Prediction", "Confidence", "Sentiment"],
	label="Prediction History",
	interactive=False
	)

	with gr.Row():
	history_summary = gr.Markdown()
	dist_image = gr.Image(label="Prediction Distribution", interactive=False)

	with gr.Tab("ℹ️ About"):
	gr.Markdown("""
	## Stock Prediction App

	This application predicts next-day stock price movements using:

	- Technical Indicators: RSI, MACD, Bollinger Bands, Moving Averages
	- News Sentiment Analysis: Using transformer models
	- Machine Learning: Gradient Boosted Decision Trees

	### Features:
	- Single prediction with news input
	- Batch prediction from CSV files
	- Model explainability with SHAP values
	- Sentiment analysis visualization
	- Prediction history tracking

	### Technical Details:
	- Built with Python, scikit-learn, and Gradio
	- Uses DistilBERT for sentiment analysis
	- Implements time-series validation
	""")

	# Event handlers
	predict_btn.click(
	fn=predict_single,
	inputs=[text_input, threshold_slider, history_state],
	outputs=[prediction_output, history_state, last_prediction_state],
	api_name="predict"
	)

	last_pred_btn.click(
	fn=lambda x: pd.DataFrame([x]) if x else pd.DataFrame(),
	inputs=[last_prediction_state],
	outputs=[last_prediction_output]
	)

	refresh_data_btn.click(
	fn=lambda: load_data(force_refresh=True),
	outputs=[]
	)

	batch_predict_btn.click(
	fn=batch_predict,
	inputs=[batch_file, batch_threshold],
	outputs=[batch_output]
	).then(
	fn=summarize_session,
	inputs=[batch_output],
	outputs=[batch_summary]
	).then(
	fn=prediction_distribution,
	inputs=[batch_output],
	outputs=[dist_image]
	)

	download_btn.click(
	fn=generate_downloadable,
	inputs=[batch_output],
	outputs=[download_file]
	).then(
	fn=lambda: gr.File(visible=True),
	outputs=[download_file]
	)

	explain_btn.click(
	fn=explain_current,
	outputs=[shap_image]
	)

	sentiment_plot_btn.click(
	fn=show_sentiment_plot,
	outputs=[sentiment_image]
	)

	history_btn.click(
	fn=show_history_table,
	outputs=[history_output]
	).then(
	fn=summarize_session,
	inputs=[history_output],
	outputs=[history_summary]
	).then(
	fn=prediction_distribution,
	inputs=[history_output],
	outputs=[dist_image]
	)

	clear_history_btn.click(
	fn=lambda: [],
	outputs=[history_state]
	).then(
	fn=lambda: pd.DataFrame(columns=["Date", "Close", "Prediction", "Confidence", "Sentiment"]),
	outputs=[history_output]
	).then(
	fn=lambda: "History cleared",
	outputs=[history_summary]
	)

	# --------------------------
	# Main Execution
	# --------------------------
	if __name__ == "__main__":
	try:
	demo.launch(
	server_name="0.0.0.0",
	server_port=7860,
	show_error=True,
	share=False
	)
	except Exception as e:
	logger.error(f"Application failed: {e}")
	raise