Source code for banterbot.managers.memory_chain

import logging
import shutil
from typing import Optional

from typing_extensions import Self

import banterbot.paths
from banterbot import config
from banterbot.models.memory import Memory
from banterbot.protos import memory_pb2
from banterbot.utils.nlp import NLP


[docs] class MemoryChain: """ MemoryChain is a class responsible for managing and handling arrays of memories using Protocol Buffers. It provides functionality to save memories to a binary file, load memories from a binary file, and retrieve memories based on keywords. The MemoryChain class is designed to efficiently store and retrieve memories based on keywords, allowing for quick access to relevant information. """
[docs] @classmethod def create(cls) -> Self: """ Generate a new empty set of memories and associated UUID. Returns: MemoryChain: A new instance of MemoryChain with an empty set of memories and a unique UUID. """ uuid = config.generate_uuid().hex # Create new directory for the new UUID and memory files directory = banterbot.paths.personae / uuid / banterbot.paths.memories directory.mkdir(exist_ok=True, parents=True) # Create new memory index file memory_index = memory_pb2.MemoryIndex() with open(banterbot.paths.personae / uuid / banterbot.paths.memory_index, "wb+") as fs: fs.write(memory_index.SerializeToString()) logging.debug(f"MemoryChain created new UUID: `{uuid}`") return cls(uuid=uuid, memory_index={})
[docs] @classmethod def load(cls, uuid: str) -> Self: """ Load the memories from a binary file using Protocol Buffers deserialization and creates a MemoryChain instance. This method is used to load an existing set of memories from a file, allowing for the continuation of a previous session or the sharing of memories between different instances. Args: uuid (str): The UUID of the memory files to load. Returns: MemoryChain: A new instance of MemoryChain with loaded memories. """ logging.debug(f"MemoryChain loading UUID: `{uuid}`") # Read memory index file memory_index_object = memory_pb2.MemoryIndex() with open(banterbot.paths.personae / uuid / banterbot.paths.memory_index, "rb") as fs: memory_index_object.ParseFromString(fs.read()) # Parse the memory index file into a dictionary memory_index = {entry.keyword: list(entry.memory_uuids) for entry in memory_index_object.entries} return cls(uuid, memory_index)
[docs] @classmethod def delete(cls, uuid: str) -> None: """ Delete the directory associated with a MemoryChain instance. This method is used to clean up the file system by removing the directory and all its contents, including memory files and the memory index file. Args: uuid (str): The UUID associated with this set of memories. """ shutil.rmtree(banterbot.paths.personae / uuid)
def __init__(self, uuid: str, memory_index: dict[str, list[str]]) -> None: """ Initialize a new instance of MemoryChain. Args: uuid (str): The UUID associated with this set of memories. memory_index (dict[str, list[str]]): The dictionary mapping from keyword to list of memory UUIDs. This index is used to efficiently look up memories based on keywords. """ logging.debug(f"MemoryChain initialized with UUID: `{uuid}`") self.uuid = uuid self._directory = banterbot.paths.personae / self.uuid / banterbot.paths.memories self._index_cache = memory_index self._memories = {} self._similarity_cache = {} self._token_cache = {} self._update_token_cache(self._index_cache.keys()) self._find_memories()
[docs] def append(self, memory: Memory) -> None: """ Append a memory to the current set of memories. This method is used to add a single memory to the MemoryChain, allowing for the storage of new information. All changes are saved to file as soon as they are made. Args: memory (Memory): The memory to append. """ self._memories[memory.uuid] = memory self._save_memory(memory=memory) self._update_index(memory=memory) self._save_index()
[docs] def extend(self, memories: list[Memory]) -> None: """ Extend the current set of memories with a list of memories. This method is used to add multiple memories to the MemoryChain at once, allowing for the storage of new information in bulk. All changes are saved to file as soon as they are made. Args: memories (list[Memory]): The list of memories to append. """ for memory in memories: self._memories[memory.uuid] = memory self._save_memory(memory=memory) self._update_index(memory=memory) self._save_index()
[docs] def search(self, keywords: list[str], fuzzy_threshold: Optional[float] = None) -> list[Memory]: """ Look up memories based on keywords. This method is used to retrieve memories that are relevant to the specified keywords. It can also perform fuzzy matching, allowing for the retrieval of memories that are similar to the given keywords based on a similarity threshold. Args: keywords (list[str]): The list of keywords to look up. fuzzy_threshold (Optional[float]): The threshold for fuzzy matching. If None, only returns exact matches. If a value is provided, memories with keywords that have a similarity score greater than or equal to the threshold will also be returned. Returns: list[Memory]: The list of matching memories. """ memory_uuids = set() memories = [] if fuzzy_threshold is not None: self._update_similarity_cache(keywords=keywords) # Find additional keywords that are similar to the specified keywords keywords_extension = [] cache_filtered = [i for i in self._index_cache.keys() if i not in keywords] for keyword in keywords: for keyword_indexed in cache_filtered: if self._similarity_cache[(keyword, keyword_indexed)] >= fuzzy_threshold: keywords_extension.append(keyword_indexed) keywords.extend(keywords_extension) # Add memory UUIDs to the result set for keyword in keywords: if keyword in self._index_cache.keys(): for memory_uuid in self._index_cache[keyword]: if self._memories[memory_uuid] is None: self._load_memory(memory_uuid=memory_uuid) memory_uuids.add(memory_uuid) # Write all the Memory objects into a list for memory_uuid in memory_uuids: memories.append(self._memories[memory_uuid]) return memories
def _save_memory(self, memory: Memory) -> None: """ Save an instance of class Memory to file using protocol buffers. """ filename = memory.uuid + banterbot.paths.protobuf_extension with open(self._directory / filename, "wb+") as fs: fs.write(memory.serialize()) def _save_index(self) -> None: """ Save the current state of the memory index to file. """ memory_index = memory_pb2.MemoryIndex() for keyword, memory_uuids in self._index_cache.items(): memory_index_entry = memory_pb2.MemoryIndexEntry() memory_index_entry.keyword = keyword memory_index_entry.memory_uuids.extend(memory_uuids) memory_index.entries.append(memory_index_entry) with open(banterbot.paths.personae / self.uuid / banterbot.paths.memory_index, "wb+") as fs: fs.write(memory_index.SerializeToString()) def _find_memories(self) -> None: """ Find all memory files associated with this UUID and store them in _memories dictionary. This method is used to locate all memory files that belong to the current MemoryChain instance, allowing for the efficient loading and retrieval of memories when needed. """ directory = self._directory self._memories = {path.stem: None for path in directory.glob("*" + banterbot.paths.protobuf_extension)} def _update_index(self, memory: Memory) -> None: """ Update the memory index with a new memory. This method is used to keep the memory index up-to-date when new memories are added to the MemoryChain. The index allows for efficient look-up of memories based on keywords. Args: memory (Memory): The memory to update the index with. """ for keyword in memory.keywords: if keyword not in self._index_cache.keys(): self._index_cache[keyword] = set() self._index_cache[keyword].add(memory.uuid) self._update_token_cache(memory.keywords) def _load_memory(self, memory_uuid: str) -> None: """ Load a memory from a memory file. This method is used to load a specific memory from a file when it is needed, allowing for efficient memory usage by only loading memories when they are required. Args: memory_uuid (str): The UUID of the memory to load. """ filename = memory_uuid + banterbot.paths.protobuf_extension with open(self._directory / filename, "rb") as fs: self._memories[memory_uuid] = Memory.deserialize(fs.read()) def _update_token_cache(self, keywords: list[str]) -> None: """ Update the token cache with new keywords. This method is used to keep the token cache up-to-date when new keywords are added to the MemoryChain. The token cache allows for efficient computation of similarity scores between keywords. Args: keywords (list[str]): The new keywords to update the cache with. """ new_keywords = [keyword for keyword in keywords if keyword not in self._token_cache.keys()] for keyword, token in zip(new_keywords, NLP.tokenize(strings=new_keywords)): self._token_cache[keyword] = token def _update_similarity_cache(self, keywords: list[str]) -> None: """ Update the similarity cache with new keywords. This method is used to keep the similarity cache up-to-date when new keywords are added to the MemoryChain. The similarity cache allows for efficient computation of similarity scores between keywords, enabling fuzzy matching in the search method. Args: keywords (list[str]): The new keywords to update the cache with. """ self._update_token_cache(keywords=keywords) for keyword_indexed in self._index_cache.keys(): for keyword in keywords: pair = (keyword, keyword_indexed) if pair not in self._similarity_cache.keys(): similarity = self._token_cache[keyword].similarity(self._token_cache[keyword_indexed]) self._similarity_cache[pair] = similarity