# FeatureExtraction.py # Purpose: Script to extract additional features from chat transcripts for psychological assessments. # Imports import pandas as pd import numpy as np from textblob import TextBlob # Function to calculate sentiment polarity def get_sentiment_polarity(text): return TextBlob(text).sentiment.polarity # Function to calculate sentiment subjectivity def get_sentiment_subjectivity(text): return TextBlob(text).sentiment.subjectivity # Load data with embeddings input_datapath = "data/chat_transcripts_with_embeddings.csv" output_datapath = "data/chat_transcripts_with_features.csv" df = pd.read_csv(input_datapath) # Feature Extraction # Example: Extracting sentiment polarity and subjectivity df['sentiment_polarity'] = df['chathistory'].apply(get_sentiment_polarity) df['sentiment_subjectivity'] = df['chathistory'].apply(get_sentiment_subjectivity) # TODO: Add any additional feature extraction relevant to your study here. # Example: df['feature_name'] = df['column'].apply(your_custom_function) # Save the data with additional features df.to_csv(output_datapath, index=False) print(f"Data with additional features saved to {output_datapath}") # Note to Users: # - Ensure that 'input_datapath' points to your data file with embeddings. # - This script uses TextBlob for sentiment analysis. Install it using 'pip install textblob' if not already installed. # - You can add more feature extraction functions as needed for your specific research requirements.