auto-bot-971 / models.py
HugScriptKitty's picture
Deploy Gradio app with multiple files
0fe9663 verified
import spaces
from transformers import AutoTokenizer, AutoModelForCausalLM
from config import MODEL_NAME
import torch
# Load model and tokenizer globally for efficiency
tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME)
model = AutoModelForCausalLM.from_pretrained(MODEL_NAME, torch_dtype=torch.float16, device_map="auto")
@spaces.GPU(duration=60)
def generate_response(message, history, system_prompt, temperature, max_tokens, top_p):
# Format conversation history
conversation = [{"role": "system", "content": system_prompt}]
for user_msg, assistant_msg in history:
conversation.append({"role": "user", "content": user_msg})
if assistant_msg:
conversation.append({"role": "assistant", "content": assistant_msg})
conversation.append({"role": "user", "content": message})
# Format for chat model
input_text = tokenizer.apply_chat_template(conversation, tokenize=False, add_generation_prompt=True)
inputs = tokenizer(input_text, return_tensors="pt").to(model.device)
with torch.no_grad():
outputs = model.generate(
**inputs,
max_new_tokens=max_tokens,
temperature=temperature,
top_p=top_p,
do_sample=temperature > 0,
pad_token_id=tokenizer.eos_token_id
)
response = tokenizer.decode(outputs[0][inputs['input_ids'].shape[1]:], skip_special_tokens=True)
return response