Spaces:
Running
Running
Carlo Moro
Implement app.py for reranking and deduplication, and add requirements for strive-ranker library
3d745f2
| from strive.reranker import Reranker, EmbeddingType, deduplicate_results | |
| import gradio as gr | |
| def rerank_and_deduplicate(query, corpus_text): | |
| corpus = [line.strip() for line in corpus_text.split("\n") if line.strip()] | |
| textual_reranker = Reranker(embedding_type=EmbeddingType.textual) | |
| semantic_reranker = Reranker(embedding_type=EmbeddingType.semantic) | |
| textual_results = textual_reranker.rerank_documents(query, corpus, top_k=len(corpus)) | |
| semantic_results = semantic_reranker.rerank_documents(query, corpus, top_k=len(corpus)) | |
| merged_results = textual_results + semantic_results | |
| deduplicated_results = deduplicate_results(merged_results, top_k=50) | |
| return "\n".join([f"{text} (Score: {score:.4f})" for text, score in deduplicated_results]) | |
| app = gr.Interface( | |
| fn=rerank_and_deduplicate, | |
| inputs=[ | |
| gr.Textbox(label="Query", placeholder="Enter your query here"), | |
| gr.Textbox(label="Corpus", placeholder="Enter one sentence per line", lines=10) | |
| ], | |
| outputs=gr.Textbox(label="Top Ranked Results"), | |
| title="STRIVE: Semantic Tokenized Ranking via Vectorization & Embeddings", | |
| description="Enter a query and multiple sentences to test the reranking algorithm." | |
| ) | |
| app.launch() | |