"""
Module for managing chat logs, including logging and retrieving chat history,
feedback, and training data.
© 2024 Blaine Freestone, Carson Bush
This file is part of Maeser.
Maeser is free software: you can redistribute it and/or modify it under the terms of
the GNU Lesser General Public License as published by the Free Software Foundation,
either version 3 of the License, or (at your option) any later version.
Maeser is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY;
without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR
PURPOSE. See the GNU Lesser General Public License for more details.
You should have received a copy of the GNU Lesser General Public License along with
Maeser. If not, see <https://www.gnu.org/licenses/>.
"""
from maeser.user_manager import UserManager, User
from maeser.render import get_response_html
from abc import ABC, abstractmethod
from datetime import datetime
import time
import yaml
from os import path, stat, walk, mkdir, makedirs
import subprocess
from flask import abort, render_template
import platform
[docs]
class BaseChatLogsManager(ABC):
def __init__(
self, chat_log_path: str, user_manager: UserManager | None = None
) -> None:
"""
Initializes the BaseChatLogsManager.
Args:
chat_log_path (str): Path to the chat log directory.
user_manager (UserManager | None): Optional user manager instance.
"""
self.chat_log_path: str = chat_log_path
self.user_manager: UserManager | None = user_manager
# create log directory if it does not exist
if not path.exists(self.chat_log_path):
makedirs(self.chat_log_path)
[docs]
@abstractmethod
def log(self, branch_name: str, session_id: str, log_data: dict) -> None:
"""
Abstract method to log chat data.
Args:
branch_name (str): The name of the branch.
session_id (str): The session ID for the conversation.
log_data (dict): The data to be logged.
Returns:
None
"""
pass
[docs]
@abstractmethod
def log_feedback(
self, branch_name: str, session_id: str, message_index: int, feedback: str
) -> None:
"""
Abstract method to log feedback for a message.
Args:
branch_name (str): The name of the branch.
session_id (str): The session ID for the conversation.
message_index (int): The index of the message to add feedback to.
feedback (str): The feedback to add to the message.
Returns:
None
"""
pass
[docs]
@abstractmethod
def get_chat_history_overview(self, user: User | None) -> list[dict]:
"""
Abstract method to get an overview of chat history.
This is used to display a list of overviews of previous chat conversations.
Args:
user (User | None): The user to get chat history for.
Returns:
list[dict]: A list of dictionaries containing information about previous chat conversations. Each should have the following keys:
- 'branch': The name of the branch.
- 'session': The session ID for the conversation.
- 'modified': The timestamp of when the chat conversation was last modified.
- 'header': The text that will be used as the link text. Usually the first message in the conversation. Could also be a conversation title.
"""
pass
[docs]
@abstractmethod
def get_chat_logs_overview(
self, sort_by: str, order: str, branch_filter: str, user_filter: str, feedback_filter: str
) -> tuple[list[dict], int, float, set[str]]:
"""
Abstract method to get an overview of chat logs.
Args:
sort_by (str): The field to sort by.
order (str): The order to sort by. Either 'asc' or 'desc'.
branch_filter (str): The branch to filter by.
feedback_filter (str): The feedback to filter by.
Returns:
tuple: A tuple containing:
- list[dict]: A list of dictionaries containing information about chat logs.
- int: The total number of tokens used.
- float: The total cost of the chat logs.
"""
pass
[docs]
@abstractmethod
def get_chat_history(self, branch_name: str, session_id: str) -> dict:
"""
Abstract method to get chat history for a session.
Args:
branch_name (str): The name of the branch.
session_id (str): The session ID for the conversation.
Returns:
dict: The chat history for the session.
"""
pass
[docs]
@abstractmethod
def get_log_file_template(self, filename: str, branch: str) -> str:
"""
Abstract method to get the jinja template for a log file.
Args:
filename (str): The name of the log file.
branch (str): The branch the log file is in.
Returns:
str: The rendered template for the log file.
"""
pass
[docs]
@abstractmethod
def save_feedback(self, feedback: dict) -> None:
"""
Abstract method to save feedback input to a file.
Args:
feedback (dict): The feedback to save.
Returns:
None
"""
pass
[docs]
@abstractmethod
def save_training_data(self, training_data: dict) -> None:
"""
Abstract method to save training data to a file.
Args:
training_data (dict): The training data to save.
Returns:
None
"""
pass
[docs]
class ChatLogsManager(BaseChatLogsManager):
def __init__(self, chat_log_path: str) -> None:
"""
Initializes the ChatLogsManager.
Args:
chat_log_path (str): Path to the chat log directory.
"""
super().__init__(chat_log_path)
[docs]
def log(self, branch_name: str, session_id: str, log_data: dict) -> None:
"""
Logs chat data to a YAML file.
Args:
branch_name (str): The name of the branch.
session_id (str): The session ID for the conversation.
log_data (dict): The data to be logged. Should contain the following keys: 'user', 'cost', 'tokens', and 'message'.
Returns:
None
"""
if not self._does_log_exist(branch_name, session_id):
self._create_log_file(branch_name, session_id, log_data.get("user", None))
else:
self._update_log_file(branch_name, session_id, log_data)
[docs]
def log_feedback(
self, branch_name: str, session_id: str, message_index: int, feedback: str
) -> None:
"""
Adds feedback to the log for a specific response in a specific session.
Args:
branch_name (str): The name of the branch.
session_id (str): The session ID for the conversation.
message_index (int): The index of the message to add feedback to.
feedback (str): The feedback to add to the message.
Returns:
None
"""
with open(
f"{self.chat_log_path}/chat_history/{branch_name}/{session_id}.log", "r"
) as file:
log: dict = yaml.safe_load(file)
log["messages"][message_index]["liked"] = feedback
with open(
f"{self.chat_log_path}/chat_history/{branch_name}/{session_id}.log", "w"
) as file:
yaml.dump(log, file)
[docs]
def get_chat_history_overview(self, user: User | None) -> list[dict]:
"""
Gets an overview of chat history.
Args:
user (User | None): The user to get chat history for.
Returns:
list[dict]: A list of dictionaries containing information about previous chat conversations.
"""
overview = []
conversations = self._get_file_list()
for conversation in conversations:
current_user_name: str = "anon" if user is None else user.full_id_name
if current_user_name == conversation["user"]:
overview.append(
{
"branch": conversation["branch"],
"session": conversation["name"].removesuffix(".log"),
"modified": conversation["modified"],
"header": conversation["first_message"],
}
)
# Sort conversations by date modified
overview.sort(key=lambda x: x["modified"], reverse=True)
# Remove conversations with no first message
overview = [link for link in overview if link["header"] is not None]
return overview
[docs]
def get_chat_logs_overview(
self, sort_by: str, order: str, branch_filter: str, user_filter: str, feedback_filter: str
) -> tuple[list[dict], int, float, set[str]]:
"""
Gets an overview of chat logs.
Args:
sort_by (str): The field to sort by.
order (str): The order to sort by. Either 'asc' or 'desc'.
branch_filter (str): The branch to filter by.
feedback_filter (str): The feedback to filter by.
Returns:
tuple: A tuple containing:
- list[dict]: A list of dictionaries containing information about chat logs.
- int: The total number of tokens used.
- float: The total cost of the chat logs.
"""
log_files = self._get_file_list()
# Get set of all users
user_set = {f["user"] for f in log_files}
if branch_filter:
log_files = [
f for f in log_files if branch_filter.lower() in f["branch"].lower()
]
if user_filter:
log_files = [
f for f in log_files if user_filter == f["user"]
]
if feedback_filter:
feedback_filter_bool = feedback_filter.lower() == "true"
log_files = [
f for f in log_files if f["has_feedback"] == feedback_filter_bool
]
reverse = order == "desc"
log_files.sort(key=lambda x: x[sort_by], reverse=reverse)
# Calculate aggregate number of tokens and cost
total_tokens = 0
total_cost = 0.0
for file in log_files:
with open(
f'{self.chat_log_path}/chat_history/{file["branch"]}/{file["name"]}',
"r",
) as f:
log = yaml.safe_load(f)
if log.get("total_tokens") is None:
print(
f"\x1b[33mWarning: \"total_tokens\" key is missing from log for file {file['name']}, defaulting value to 0.\x1b[0m"
)
else:
total_tokens += log.get("total_tokens", 0)
if log.get("total_cost") is None:
print(
f"\x1b[33mWarning: \"total_cost\" key is missing from log for file {file['name']}, defaulting value to 0.\x1b[0m"
)
else:
total_cost += log.get("total_cost", 0.0)
return log_files, total_tokens, total_cost, user_set
[docs]
def get_chat_history(self, branch_name: str, session_id: str) -> dict:
"""
Retrieves chat history for a specific session.
Args:
branch_name (str): The name of the branch.
session_id (str): The session ID for the conversation.
Returns:
dict: The chat history for the session.
"""
with open(
f"{self.chat_log_path}/chat_history/{branch_name}/{session_id}.log", "r"
) as file:
chat_history = yaml.safe_load(file)
return chat_history
[docs]
def get_log_file_template(self, filename: str, branch: str) -> str:
"""
Gets the Jinja template for a log file.
Args:
filename (str): The name of the log file.
branch (str): The branch the log file is in.
Returns:
str: The rendered template for the log file.
"""
def process_messages(messages: dict) -> dict:
"""
Process each system response in the conversation and convert it to HTML.
Args:
filename (str): The name of the log file.
Returns:
dict: The processed messages in HTML format.
"""
for message in messages:
message["content"] = get_response_html(message["content"])
return messages
try:
print(f"{self.chat_log_path}/chat_history/{branch}/{filename}")
with open(
f"{self.chat_log_path}/chat_history/{branch}/{filename}", "r"
) as file:
content = yaml.safe_load(file)
user_name = content["user"]
real_name = content["real_name"]
branch = content["branch"]
time = content["time"]
total_cost = round(content["total_cost"], 3)
total_tokens = content["total_tokens"]
try:
messages = process_messages(content["messages"])
except KeyError:
messages = None
return render_template(
"display_chat_log.html",
user_name=user_name,
real_name=real_name,
branch=branch,
time=time,
total_cost=total_cost,
total_tokens=total_tokens,
messages=messages,
app_name=branch,
)
except FileNotFoundError:
abort(404, description="Log file not found")
except yaml.YAMLError as e:
abort(500, description=f"Error parsing log file: {e}")
[docs]
def save_feedback(self, feedback: dict) -> None:
"""
Saves feedback input to a YAML file.
Args:
feedback (dict): The feedback to save.
Returns:
None
"""
# Make directory if it doesn't exist
try:
mkdir(f"{self.chat_log_path}/feedback")
except FileExistsError:
pass
now = time.time()
timestamp = time.strftime("%Y-%m-%d-%H-%M-%S", time.localtime(now))
filename = f"{self.chat_log_path}/feedback/{timestamp}.log"
with open(filename, "w") as f:
yaml.dump(feedback, f)
print(f"Feedback saved to {filename}")
[docs]
def save_training_data(self, training_data: dict) -> None:
"""
Saves training data to a YAML file.
Args:
training_data (dict): The training data to save.
"""
# Make directory if it doesn't exist
try:
mkdir(f"{self.chat_log_path}/training_data")
except FileExistsError:
pass
now = time.time()
timestamp = time.strftime("%Y-%m-%d-%H-%M-%S", time.localtime(now))
filename = f"{self.chat_log_path}/training_data/{timestamp}.log"
with open(filename, "w") as f:
yaml.dump(training_data, f)
print(f"Training data saved to {filename}")
def _get_file_list(self) -> list[dict]:
"""
Get the list of chat history files with metadata.
Returns:
bool: True if the log file exists, False otherwise.
"""
def get_creation_time(file_path):
if platform.system() == "Darwin": # macOS
result = subprocess.run(
["stat", "-f", "%B", file_path], capture_output=True, text=True
)
if result.returncode != 0:
raise RuntimeError(f"Error getting creation time: {result.stderr}")
return int(result.stdout.strip())
elif platform.system() == "Linux":
result = subprocess.run(
["stat", "-c", "%W", file_path], capture_output=True, text=True
)
if result.returncode != 0:
raise RuntimeError(f"Error getting creation time: {result.stderr}")
return int(result.stdout.strip())
else:
# Fallback for other operating systems
return int(path.getctime(file_path))
def get_file_info(file_path: str) -> dict:
"""
Get detailed information from a file and return it as a dictionary.
Args:
file_path (str): The path to the file.
Returns:
dict: A dictionary containing detailed information about the file.
"""
def has_feedback(msgs: list) -> bool:
for msg in msgs:
if "liked" in msg:
return True
return False
file_info = {}
try:
with open(file_path, "r") as file:
chat_log = yaml.safe_load(file)
file_info["has_feedback"] = has_feedback(
chat_log.get("messages", [])
)
file_info["first_message"] = (
chat_log.get("messages", [{}])[0]["content"]
if len(chat_log.get("messages", [])) > 0
else None
)
file_info["user"] = chat_log.get("user", "unknown user")
file_info["real_name"] = chat_log.get("real_name", "Student")
except Exception as e:
print(f"Error: Cannot read file {file_path}: {e}")
return file_info
file_list = []
for root, dirs, files in walk(self.chat_log_path + "/chat_history"):
for file_name in files:
file_path = path.join(root, file_name)
if path.isfile(file_path): # Check if the path is a file
try:
created_time = get_creation_time(file_path)
except RuntimeError:
# Fallback if stat doesn't work at all (may show modified time)
created_time = int(path.getctime(file_path))
file_stat = stat(file_path)
file_info = {
"name": file_name,
"created": created_time,
"modified": file_stat.st_mtime,
"branch": path.basename(
root
), # Get the branch name from the directory
}
# Update file_info with additional details from get_file_info
file_info.update(get_file_info(file_path))
file_list.append(file_info)
return file_list
def _create_log_file(
self, branch_name: str, session_id: str, user: User | None = None
) -> None:
"""
Creates a new log file for a chat session.
Args:
branch_name (str): The name of the branch.
session_id (str): The session ID for the conversation.
user (User | None): Optional User to obtain information from to include in the log.
Returns:
None
"""
# compile log information
log_info: dict = {
"session_id": session_id,
"user": user.full_id_name if user else "anon",
"real_name": user.realname if user else "anon",
"time": datetime.now().strftime("%Y-%m-%d %H:%M:%S"),
"branch": branch_name,
"total_cost": 0,
"total_tokens": 0,
"messages": [],
}
# ensure log directory exists
if not path.exists(f"{self.chat_log_path}/chat_history/{branch_name}"):
makedirs(f"{self.chat_log_path}/chat_history/{branch_name}")
# create log file
with open(
f"{self.chat_log_path}/chat_history/{branch_name}/{session_id}.log", "w"
) as file:
yaml.dump(log_info, file)
def _update_log_file(
self, branch_name: str, session_id: str, log_data: dict
) -> None:
"""
Updates the log file with the new log data.
Args:
branch_name (str): The name of the branch.
session_id (str): The session ID for the conversation.
log_data (dict): The data to be logged. Should contain the following keys: "user_info", "cost", "tokens_used", and "message".
Returns:
None
"""
with open(
f"{self.chat_log_path}/chat_history/{branch_name}/{session_id}.log", "r"
) as file:
log: dict = yaml.safe_load(file)
log["messages"] = log.get("messages", [])
# Add user message
log["messages"].append(
{
"role": "user",
"content": log_data["messages"][-2],
}
)
# Add chatbot message and execution stats
log["messages"].append(
{
"role": "system",
"content": log_data["messages"][-1],
"context": [
context.page_content
for context in log_data["retrieved_context"]
],
"execution_time": log_data.get("execution_time", 0),
"tokens_used": log_data.get("tokens_used", 0),
"cost": log_data.get("cost", 0),
}
)
log["total_cost"] += log_data.get("cost", 0)
log["total_tokens"] += log_data.get("tokens_used", 0)
with open(
f"{self.chat_log_path}/chat_history/{branch_name}/{session_id}.log", "w"
) as file:
yaml.dump(log, file)
def _does_log_exist(self, branch_name: str, session_id: str) -> bool:
"""
Checks if a log file exists for the given session ID.
Args:
session_id (str): The session ID to check for.
Returns:
bool: True if the log file exists, False otherwise.
"""
return path.exists(
f"{self.chat_log_path}/chat_history/{branch_name}/{session_id}.log"
)