Source code for maeser.chat.chat_logs

"""
Module for managing chat logs, including logging and retrieving chat history,
feedback, and training data.

© 2024 Blaine Freestone, Carson Bush

This file is part of Maeser.

Maeser is free software: you can redistribute it and/or modify it under the terms of
the GNU Lesser General Public License as published by the Free Software Foundation,
either version 3 of the License, or (at your option) any later version.

Maeser is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY;
without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR
PURPOSE. See the GNU Lesser General Public License for more details.

You should have received a copy of the GNU Lesser General Public License along with
Maeser. If not, see <https://www.gnu.org/licenses/>.
"""

from maeser.user_manager import UserManager, User
from maeser.render import get_response_html
from abc import ABC, abstractmethod
from datetime import datetime
import time
import yaml
from os import path, stat, walk, mkdir, makedirs
import subprocess
from flask import abort, render_template
import platform


[docs] class BaseChatLogsManager(ABC): def __init__(self, chat_log_path: str, user_manager: UserManager | None = None) -> None: """ Initializes the BaseChatLogsManager. Args: chat_log_path (str): Path to the chat log directory. user_manager (UserManager | None): Optional user manager instance. """ self.chat_log_path: str = chat_log_path self.user_manager: UserManager | None = user_manager # create log directory if it does not exist if not path.exists(self.chat_log_path): makedirs(self.chat_log_path)
[docs] @abstractmethod def log(self, branch_name: str, session_id: str, log_data: dict) -> None: """ Abstract method to log chat data. Args: branch_name (str): The name of the branch. session_id (str): The session ID for the conversation. log_data (dict): The data to be logged. Returns: None """ pass
[docs] @abstractmethod def log_feedback(self, branch_name: str, session_id: str, message_index: int, feedback: str) -> None: """ Abstract method to log feedback for a message. Args: branch_name (str): The name of the branch. session_id (str): The session ID for the conversation. message_index (int): The index of the message to add feedback to. feedback (str): The feedback to add to the message. Returns: None """ pass
[docs] @abstractmethod def get_chat_history_overview(self, user: User | None) -> list[dict]: """ Abstract method to get an overview of chat history. This is used to display a list of overviews of previous chat conversations. Args: user (User | None): The user to get chat history for. Returns: list[dict]: A list of dictionaries containing information about previous chat conversations. Each should have the following keys: - 'branch': The name of the branch. - 'session': The session ID for the conversation. - 'modified': The timestamp of when the chat conversation was last modified. - 'header': The text that will be used as the link text. Usually the first message in the conversation. Could also be a conversation title. """ pass
[docs] @abstractmethod def get_chat_logs_overview(self, sort_by: str, order: str, branch_filter: str, feedback_filter: str) -> tuple[list[dict], int, float]: """ Abstract method to get an overview of chat logs. Args: sort_by (str): The field to sort by. order (str): The order to sort by. Either 'asc' or 'desc'. branch_filter (str): The branch to filter by. feedback_filter (str): The feedback to filter by. Returns: tuple: A tuple containing: - list[dict]: A list of dictionaries containing information about chat logs. - int: The total number of tokens used. - float: The total cost of the chat logs. """ pass
[docs] @abstractmethod def get_chat_history(self, branch_name: str, session_id: str) -> dict: """ Abstract method to get chat history for a session. Args: branch_name (str): The name of the branch. session_id (str): The session ID for the conversation. Returns: dict: The chat history for the session. """ pass
[docs] @abstractmethod def get_log_file_template(self, filename: str, branch: str) -> str: """ Abstract method to get the jinja template for a log file. Args: filename (str): The name of the log file. branch (str): The branch the log file is in. Returns: str: The rendered template for the log file. """ pass
[docs] @abstractmethod def save_feedback(self, feedback: dict) -> None: """ Abstract method to save feedback input to a file. Args: feedback (dict): The feedback to save. Returns: None """ pass
[docs] @abstractmethod def save_training_data(self, training_data: dict) -> None: """ Abstract method to save training data to a file. Args: training_data (dict): The training data to save. Returns: None """ pass
[docs] class ChatLogsManager(BaseChatLogsManager): def __init__(self, chat_log_path: str) -> None: """ Initializes the ChatLogsManager. Args: chat_log_path (str): Path to the chat log directory. """ super().__init__(chat_log_path)
[docs] def log(self, branch_name: str, session_id: str, log_data: dict) -> None: """ Logs chat data to a YAML file. Args: branch_name (str): The name of the branch. session_id (str): The session ID for the conversation. log_data (dict): The data to be logged. Should contain the following keys: 'user', 'cost', 'tokens', and 'message'. Returns: None """ if not self._does_log_exist(branch_name, session_id): self._create_log_file(branch_name, session_id, log_data.get('user', None)) else: self._update_log_file(branch_name, session_id, log_data)
[docs] def log_feedback(self, branch_name: str, session_id: str, message_index: int, feedback: str) -> None: """ Adds feedback to the log for a specific response in a specific session. Args: branch_name (str): The name of the branch. session_id (str): The session ID for the conversation. message_index (int): The index of the message to add feedback to. feedback (str): The feedback to add to the message. Returns: None """ with open(f'{self.chat_log_path}/chat_history/{branch_name}/{session_id}.log', 'r') as file: log: dict = yaml.safe_load(file) log['messages'][message_index]['liked'] = feedback with open(f'{self.chat_log_path}/chat_history/{branch_name}/{session_id}.log', 'w') as file: yaml.dump(log, file)
[docs] def get_chat_history_overview(self, user: User | None) -> list[dict]: """ Gets an overview of chat history. Args: user (User | None): The user to get chat history for. Returns: list[dict]: A list of dictionaries containing information about previous chat conversations. """ overview = [] conversations = self._get_file_list() for conversation in conversations: current_user_name: str = 'anon' if user is None else user.full_id_name if current_user_name == conversation['user']: overview.append({ 'branch': conversation['branch'], 'session': conversation['name'].removesuffix('.log'), 'modified': conversation['modified'], 'header': conversation['first_message'] }) # Sort conversations by date modified overview.sort(key=lambda x: x['modified'], reverse=True) # Remove conversations with no first message overview = [link for link in overview if link['header'] is not None] return overview
[docs] def get_chat_logs_overview(self, sort_by: str, order: str, branch_filter: str, feedback_filter: str) -> tuple[list[dict], int, float]: """ Gets an overview of chat logs. Args: sort_by (str): The field to sort by. order (str): The order to sort by. Either 'asc' or 'desc'. branch_filter (str): The branch to filter by. feedback_filter (str): The feedback to filter by. Returns: tuple: A tuple containing: - list[dict]: A list of dictionaries containing information about chat logs. - int: The total number of tokens used. - float: The total cost of the chat logs. """ log_files = self._get_file_list() if branch_filter: log_files = [f for f in log_files if branch_filter.lower() in f['branch'].lower()] if feedback_filter: feedback_filter_bool = feedback_filter.lower() == 'true' log_files = [f for f in log_files if f['has_feedback'] == feedback_filter_bool] reverse = (order == 'desc') log_files.sort(key=lambda x: x[sort_by], reverse=reverse) # Calculate aggregate number of tokens and cost total_tokens = 0 total_cost = 0.0 for file in log_files: with open(f'{self.chat_log_path}/chat_history/{file["branch"]}/{file["name"]}', 'r') as f: log = yaml.safe_load(f) total_tokens += log.get('tokens', 0) total_cost += log.get('cost', 0.0) return log_files, total_tokens, total_cost
[docs] def get_chat_history(self, branch_name: str, session_id: str) -> dict: """ Retrieves chat history for a specific session. Args: branch_name (str): The name of the branch. session_id (str): The session ID for the conversation. Returns: dict: The chat history for the session. """ with open(f'{self.chat_log_path}/chat_history/{branch_name}/{session_id}.log', 'r') as file: chat_history = yaml.safe_load(file) return chat_history
[docs] def get_log_file_template(self, filename: str, branch: str) -> str: """ Gets the Jinja template for a log file. Args: filename (str): The name of the log file. branch (str): The branch the log file is in. Returns: str: The rendered template for the log file. """ def process_messages(messages: dict) -> dict: """ Process each system response in the conversation and convert it to HTML. Args: filename (str): The name of the log file. Returns: dict: The processed messages in HTML format. """ for message in messages: message['content'] = get_response_html(message['content']) return messages try: print(f'{self.chat_log_path}/chat_history/{branch}/{filename}') with open(f'{self.chat_log_path}/chat_history/{branch}/{filename}', 'r') as file: content = yaml.safe_load(file) user_name = content['user'] real_name = content['real_name'] branch = content['branch'] time = content['time'] total_cost = round(content['total_cost'], 3) total_tokens = content['total_tokens'] try: messages = process_messages(content['messages']) except KeyError: messages = None return render_template( 'display_chat_log.html', user_name=user_name, real_name=real_name, branch=branch, time=time, total_cost=total_cost, total_tokens=total_tokens, messages=messages, app_name=branch ) except FileNotFoundError: abort(404, description='Log file not found') except yaml.YAMLError as e: abort(500, description=f'Error parsing log file: {e}')
[docs] def save_feedback(self, feedback: dict) -> None: """ Saves feedback input to a YAML file. Args: feedback (dict): The feedback to save. Returns: None """ # Make directory if it doesn't exist try: mkdir(f'{self.chat_log_path}/feedback') except FileExistsError: pass now = time.time() timestamp = time.strftime('%Y-%m-%d-%H-%M-%S', time.localtime(now)) filename = f'{self.chat_log_path}/feedback/{timestamp}.log' with open(filename, 'w') as f: yaml.dump(feedback, f) print(f'Feedback saved to {filename}')
[docs] def save_training_data(self, training_data: dict) -> None: """ Saves training data to a YAML file. Args: training_data (dict): The training data to save. """ # Make directory if it doesn't exist try: mkdir(f'{self.chat_log_path}/training_data') except FileExistsError: pass now = time.time() timestamp = time.strftime('%Y-%m-%d-%H-%M-%S', time.localtime(now)) filename = f'{self.chat_log_path}/training_data/{timestamp}.log' with open(filename, 'w') as f: yaml.dump(training_data, f) print(f'Training data saved to {filename}')
def _get_file_list(self) -> list[dict]: """ Get the list of chat history files with metadata. Returns: bool: True if the log file exists, False otherwise. """ def get_creation_time(file_path): if platform.system() == 'Darwin': # macOS result = subprocess.run(['stat', '-f', '%B', file_path], capture_output=True, text=True) if result.returncode != 0: raise RuntimeError(f"Error getting creation time: {result.stderr}") return int(result.stdout.strip()) elif platform.system() == 'Linux': result = subprocess.run(['stat', '-c', '%W', file_path], capture_output=True, text=True) if result.returncode != 0: raise RuntimeError(f"Error getting creation time: {result.stderr}") return int(result.stdout.strip()) else: # Fallback for other operating systems return int(path.getctime(file_path)) def get_file_info(file_path: str) -> dict: """ Get detailed information from a file and return it as a dictionary. Args: file_path (str): The path to the file. Returns: dict: A dictionary containing detailed information about the file. """ def has_feedback(msgs: list) -> bool: for msg in msgs: if 'liked' in msg: return True return False file_info = {} try: with open(file_path, 'r') as file: chat_log = yaml.safe_load(file) file_info['has_feedback'] = has_feedback(chat_log.get('messages', [])) file_info['first_message'] = chat_log.get('messages', [{}])[0]["content"] if len(chat_log.get('messages', [])) > 0 else None file_info['user'] = chat_log.get('user', 'unknown user') file_info['real_name'] = chat_log.get('real_name', 'Student') except Exception as e: print(f"Error: Cannot read file {file_path}: {e}") return file_info file_list = [] for root, dirs, files in walk(self.chat_log_path + '/chat_history'): for file_name in files: file_path = path.join(root, file_name) if path.isfile(file_path): # Check if the path is a file try: created_time = get_creation_time(file_path) except RuntimeError: # Fallback if stat doesn't work at all (may show modified time) created_time = int(path.getctime(file_path)) file_stat = stat(file_path) file_info = { 'name': file_name, 'created': created_time, 'modified': file_stat.st_mtime, 'branch': path.basename(root), # Get the branch name from the directory } # Update file_info with additional details from get_file_info file_info.update(get_file_info(file_path)) file_list.append(file_info) return file_list def _create_log_file(self, branch_name: str, session_id: str, user: User | None = None) -> None: """ Creates a new log file for a chat session. Args: branch_name (str): The name of the branch. session_id (str): The session ID for the conversation. user (User | None): Optional User to obtain information from to include in the log. Returns: None """ # compile log information log_info: dict = { "session_id": session_id, "user": user.full_id_name if user else "anon", "real_name": user.realname if user else "anon", "time": datetime.now().strftime("%Y-%m-%d %H:%M:%S"), "branch": branch_name, "total_cost": 0, "total_tokens": 0, "messages": [] } # ensure log directory exists if not path.exists(f"{self.chat_log_path}/chat_history/{branch_name}"): makedirs(f"{self.chat_log_path}/chat_history/{branch_name}") # create log file with open(f"{self.chat_log_path}/chat_history/{branch_name}/{session_id}.log", "w") as file: yaml.dump(log_info, file) def _update_log_file(self, branch_name: str, session_id: str, log_data: dict) -> None: """ Updates the log file with the new log data. Args: branch_name (str): The name of the branch. session_id (str): The session ID for the conversation. log_data (dict): The data to be logged. Should contain the following keys: "user_info", "cost", "tokens", and "message". Returns: None """ with open(f"{self.chat_log_path}/chat_history/{branch_name}/{session_id}.log", "r") as file: log: dict = yaml.safe_load(file) log["messages"] = log.get("messages", []) log["messages"].append( { "role": "user", "content": log_data["messages"][-2], }) log["messages"].append({ "role": "system", "content": log_data["messages"][-1], "context": [context.page_content for context in log_data["retrieved_context"]], "execution_time": log_data.get("execution_time", 0), "tokens_used": log_data.get("tokens", 0), "cost": log_data.get("cost", 0) }) log["total_cost"] += log_data.get("cost", 0) log["total_tokens"] += log_data.get("tokens", 0) with open(f"{self.chat_log_path}/chat_history/{branch_name}/{session_id}.log", "w") as file: yaml.dump(log, file) def _does_log_exist(self, branch_name: str, session_id: str) -> bool: """ Checks if a log file exists for the given session ID. Args: session_id (str): The session ID to check for. Returns: bool: True if the log file exists, False otherwise. """ return path.exists(f"{self.chat_log_path}/chat_history/{branch_name}/{session_id}.log")