Spaces:
Sleeping
Sleeping
| from langchain_community.document_loaders.wikipedia import WikipediaLoader | |
| from langchain_community.document_loaders import YoutubeLoader | |
| from langchain_community.document_loaders import ArxivLoader | |
| from langchain_community.tools.tavily_search import TavilySearchResults | |
| from langchain_core.tools import tool | |
| def youtube_video_loader(url: str) -> str: | |
| """Load a Youtube video and return the transcript. | |
| Args: | |
| url (str): The Youtube video URL.""" | |
| loader = YoutubeLoader.from_youtube_url(str(url), add_video_info=False) | |
| docs = loader.load() | |
| print(docs) | |
| return "\n\n".join(map(repr, docs)) | |
| def multiply(x: int, y: int) -> int: | |
| """Multiply two numbers.""" | |
| return x * y | |
| def add(x: int, y: int) -> int: | |
| """Add two numbers.""" | |
| return x + y | |
| def subtract(x: int, y: int) -> int: | |
| """Subtract two numbers.""" | |
| return x - y | |
| def divide(x: int, y: int) -> float: | |
| """Divide two numbers.""" | |
| return x / y | |
| def modulus(x: int, y: int) -> int: | |
| """Get the modulus of two numbers.""" | |
| return x % y | |
| def wiki_search(query: str) -> str: | |
| """Search Wikipedia for a query. Returns the first result 3. | |
| Args: | |
| query (str): The search query.""" | |
| loader = WikipediaLoader(query=query, load_max_docs=3) | |
| docs = loader.load() | |
| if docs: | |
| # For each document Extract the content and metadata from the documents | |
| results = [] | |
| for doc in docs: | |
| content = doc.page_content | |
| metadata = doc.metadata | |
| # Handle missing metadata fields gracefully | |
| result = { | |
| "source": metadata.get('source', 'Unknown source'), | |
| "page": metadata.get('page', 'Unknown page'), | |
| "content": content | |
| } | |
| results.append(result) | |
| return results.__str__() if results else "No results found." | |
| def arxiv_search(query: str) -> str: | |
| """Search Arxiv for a query. Returns the first result 3. | |
| Args: | |
| query (str): The search query.""" | |
| loader = ArxivLoader(query=query, load_max_docs=3) | |
| docs = loader.load() | |
| if docs: | |
| # For each document Extract the content and metadata from the documents | |
| results = [] | |
| for doc in docs: | |
| content = doc.page_content | |
| metadata = doc.metadata | |
| # Format the result as json object | |
| result = { | |
| "source": metadata['source'], | |
| "page": metadata['page'], | |
| "content": content[:1000] # Limit content to 1000 characters | |
| } | |
| results.append(result) | |
| return results.__str__() if results else "No results found." | |
| def tavily_search(query: str) -> str: | |
| """Search Tavily for a query. Returns the first result 3. | |
| Args: | |
| query (str): The search query.""" | |
| docs = TavilySearchResults(max_results=2).invoke(query) | |
| if docs: | |
| # For each document Extract the content and metadata from the documents | |
| results = [] | |
| for doc in docs: | |
| content = doc['content'] | |
| titel = doc['title'] | |
| url = doc['url'] | |
| # Format the result as json object | |
| result = { | |
| "source": url, | |
| "titel": titel, | |
| "content": content # Limit content to 1000 characters | |
| } | |
| results.append(result) | |
| return results.__str__() if results else "No results found." | |
| def wiki_loader(query: str, lang: str='en', load_max_docs: int=3): | |
| """ | |
| Fetches content from Wikipedia based on a given query. | |
| Parameters: | |
| - query (str): The search query for Wikipedia. | |
| - lang (str): The language of the Wikipedia to search in. Default is 'en'. | |
| - load_max_docs (int): The maximum number of documents to load. Default is 2. | |
| Returns: | |
| - list: A list of documents containing the fetched Wikipedia content. | |
| """ | |
| try: | |
| # Initialize the WikipediaLoader with the given query, language, and max documents | |
| loader = WikipediaLoader(query=query, lang=lang, load_max_docs=load_max_docs) | |
| # Load the documents | |
| documents = loader.load() | |
| result = [] | |
| for doc in documents: | |
| doc_dict = { | |
| "source": doc.metadata.get('source', 'Unknown source'), | |
| "page": doc.metadata.get('page', 'Unknown page'), | |
| "content": doc.page_content | |
| } | |
| result.append(doc_dict) | |
| print(result) | |
| return result | |
| except Exception as e: | |
| print(f"An error occurred: {e}") | |
| return [] |