Clean up assistant streaming

2025-08-10 07:55:21 +02:00 · 2025-03-30 20:22:19 -04:00 · 2025-03-30 20:22:19 -04:00 · 34633329e6
commit 34633329e6
parent 57023fdc85
5 changed files with 246 additions and 205 deletions
--- a/app/models/assistant.rb
+++ b/app/models/assistant.rb
@ -1,219 +1,71 @@
-# Orchestrates LLM interactions for chat conversations by:
-# - Streaming generic provider responses
-# - Persisting messages and tool calls
-# - Broadcasting updates to chat UI
-# - Handling provider errors
 class Assistant
-  include Provided
+  include Provided, Configurable

-  attr_reader :chat
+  attr_reader :chat, :instructions

  class << self
    def for_chat(chat)
-      new(chat)
+      config = config_for(chat)
+      new(chat, instructions: config[:instructions], functions: config[:functions])
    end
  end

-  def initialize(chat)
+  def initialize(chat, instructions: nil, functions: [])
    @chat = chat
-  end
-
-  class ToolCaller
-    def initialize(functions: [])
-      @functions = functions
-    end
-
-    def call_function(function_request)
-      name = function_request.function_name
-      args = JSON.parse(function_request.function_arguments)
-      fn = get_function(name)
-      result = fn.call(args)
-
-      ToolCall::Function.new(
-        provider_id: function_request.id,
-        provider_call_id: function_request.call_id,
-        function_name: name,
-        function_arguments: args,
-        function_result: result
-      )
-    rescue => e
-      fn_execution_details = {
-        fn_name: name,
-        fn_args: args
-      }
-
-      message = "Error calling function #{name} with arguments #{args}: #{e.message}"
-
-      raise StandardError.new(message)
-    end
-
-    private
-      attr_reader :functions
-
-      def get_function(name)
-        functions.find { |f| f.name == name }
-      end
+    @instructions = instructions
+    @functions = functions
  end

  def respond_to(message)
-    chat.clear_error
+    pause_to_think

-    sleep artificial_thinking_delay
-
-    provider = get_model_provider(message.ai_model)
-
-    tool_caller = ToolCaller.new(functions: functions)
-
-    assistant_response = AssistantMessage.new(
-      chat: chat,
-      content: "",
-      ai_model: message.ai_model
-    )
-
-    streamer2 = proc do |chunk|
-      case chunk.type
-      when "output_text"
-        stop_thinking
-        assistant_response.content += chunk.data
-        assistant_response.save!
-      when "response"
-        stop_thinking
-        chat.update!(latest_assistant_response_id: chunk.data.id)
-      end
-    end
-
-    streamer1 = proc do |chunk|
-      case chunk.type
-      when "output_text"
-        stop_thinking
-        assistant_response.content += chunk.data
-        assistant_response.save!
-      when "response"
-        if chunk.data.function_requests.any?
-          update_thinking("Analyzing your data to assist you with your question...")
-
-          tool_calls = chunk.data.function_requests.map do |fn_request|
-            tool_caller.call_function(fn_request)
-          end
-
-          assistant_response.tool_calls = tool_calls
-          assistant_response.save!
-
-          provider.chat_response(
-            message.content,
-            model: message.ai_model,
-            instructions: instructions,
-            functions: functions.map(&:to_h),
-            function_results: tool_calls.map(&:to_h),
-            streamer: streamer2
-          )
-        else
-          stop_thinking
-        end
-
-        chat.update!(latest_assistant_response_id: chunk.data.id)
-      end
-    end
-
-    provider.chat_response(
-      message.content,
+    streamer = Assistant::ResponseStreamer.new(
+      prompt: message.content,
      model: message.ai_model,
-      instructions: instructions,
-      functions: functions.map(&:to_h),
-      function_results: [],
-      streamer: streamer1
+      assistant: self,
    )
+
+    streamer.stream_response
  rescue => e
    chat.add_error(e)
  end

+  def fulfill_function_requests(function_requests)
+    function_requests.map do |fn_request|
+      result = function_executor.execute(fn_request)
+
+      ToolCall::Function.new(
+        provider_id: fn_request.id,
+        provider_call_id: fn_request.call_id,
+        function_name: fn_request.function_name,
+        function_arguments: fn_request.function_arguments,
+        function_result: result
+      )
+    end
+  end
+
+  def callable_functions
+    functions.map do |fn|
+      fn.new(chat.user)
+    end
+  end
+
+  def update_thinking(thought)
+    chat.broadcast_update target: "thinking-indicator", partial: "chats/thinking_indicator", locals: { chat: chat, message: thought }
+  end
+
+  def stop_thinking
+    chat.broadcast_remove target: "thinking-indicator"
+  end
+
  private
-    def update_thinking(thought)
-      chat.broadcast_update target: "thinking-indicator", partial: "chats/thinking_indicator", locals: { chat: chat, message: thought }
+    attr_reader :functions
+
+    def function_executor
+      @function_executor ||= FunctionExecutor.new(callable_functions)
    end

-    def stop_thinking
-      chat.broadcast_remove target: "thinking-indicator"
-    end
-
-    def instructions
-      <<~PROMPT
-        ## Your identity
-
-        You are a friendly financial assistant for an open source personal finance application called "Maybe", which is short for "Maybe Finance".
-
-        ## Your purpose
-
-        You help users understand their financial data by answering questions about their accounts,
-        transactions, income, expenses, net worth, and more.
-
-        ## Your rules
-
-        Follow all rules below at all times.
-
-        ### General rules
-
-        - Provide ONLY the most important numbers and insights
-        - Eliminate all unnecessary words and context
-        - Ask follow-up questions to keep the conversation going. Help educate the user about their own data and entice them to ask more questions.
-        - Do NOT add introductions or conclusions
-        - Do NOT apologize or explain limitations
-
-        ### Formatting rules
-
-        - Format all responses in markdown
-        - Format all monetary values according to the user's preferred currency
-        - Format dates in the user's preferred format: #{preferred_date_format}
-
-        #### User's preferred currency
-
-        Maybe is a multi-currency app where each user has a "preferred currency" setting.
-
-        When no currency is specified, use the user's preferred currency for formatting and displaying monetary values.
-
-        - Symbol: #{preferred_currency.symbol}
-        - ISO code: #{preferred_currency.iso_code}
-        - Default precision: #{preferred_currency.default_precision}
-        - Default format: #{preferred_currency.default_format}
-          - Separator: #{preferred_currency.separator}
-          - Delimiter: #{preferred_currency.delimiter}
-
-        ### Rules about financial advice
-
-        You are NOT a licensed financial advisor and therefore, you should not provide any specific investment advice (such as "buy this stock", "sell that bond", "invest in crypto", etc.).
-
-        Instead, you should focus on educating the user about personal finance using their own data so they can make informed decisions.
-
-        - Do not suggest investments or financial products
-        - Do not make assumptions about the user's financial situation. Use the functions available to get the data you need.
-
-        ### Function calling rules
-
-        - Use the functions available to you to get user financial data and enhance your responses
-        - For functions that require dates, use the current date as your reference point: #{Date.current}
-        - If you suspect that you do not have enough data to 100% accurately answer, be transparent about it and state exactly what
-          the data you're presenting represents and what context it is in (i.e. date range, account, etc.)
-      PROMPT
-    end
-
-    def functions
-      [
-        Assistant::Function::GetTransactions.new(chat.user),
-        Assistant::Function::GetAccounts.new(chat.user),
-        Assistant::Function::GetBalanceSheet.new(chat.user),
-        Assistant::Function::GetIncomeStatement.new(chat.user)
-      ]
-    end
-
-    def preferred_currency
-      Money::Currency.new(chat.user.family.currency)
-    end
-
-    def preferred_date_format
-      chat.user.family.date_format
-    end
-
-    def artificial_thinking_delay
-      1
+    def pause_to_think
+      sleep 1
    end
 end
--- a/app/models/assistant/configurable.rb
+++ b/app/models/assistant/configurable.rb
@ -0,0 +1,85 @@
+module Assistant::Configurable
+  extend ActiveSupport::Concern
+
+  class_methods do
+    def config_for(chat)
+      preferred_currency = Money::Currency.new(chat.user.family.currency)
+      preferred_date_format = chat.user.family.date_format
+
+      {
+        instructions: default_instructions(preferred_currency, preferred_date_format),
+        functions: default_functions
+      }
+    end
+
+    private
+      def default_functions
+        [
+          Assistant::Function::GetTransactions,
+          Assistant::Function::GetAccounts,
+          Assistant::Function::GetBalanceSheet,
+          Assistant::Function::GetIncomeStatement
+        ]
+      end
+
+      def default_instructions(preferred_currency, preferred_date_format)
+        <<~PROMPT
+          ## Your identity
+
+          You are a friendly financial assistant for an open source personal finance application called "Maybe", which is short for "Maybe Finance".
+
+          ## Your purpose
+
+          You help users understand their financial data by answering questions about their accounts,
+          transactions, income, expenses, net worth, and more.
+
+          ## Your rules
+
+          Follow all rules below at all times.
+
+          ### General rules
+
+          - Provide ONLY the most important numbers and insights
+          - Eliminate all unnecessary words and context
+          - Ask follow-up questions to keep the conversation going. Help educate the user about their own data and entice them to ask more questions.
+          - Do NOT add introductions or conclusions
+          - Do NOT apologize or explain limitations
+
+          ### Formatting rules
+
+          - Format all responses in markdown
+          - Format all monetary values according to the user's preferred currency
+          - Format dates in the user's preferred format: #{preferred_date_format}
+
+          #### User's preferred currency
+
+          Maybe is a multi-currency app where each user has a "preferred currency" setting.
+
+          When no currency is specified, use the user's preferred currency for formatting and displaying monetary values.
+
+          - Symbol: #{preferred_currency.symbol}
+          - ISO code: #{preferred_currency.iso_code}
+          - Default precision: #{preferred_currency.default_precision}
+          - Default format: #{preferred_currency.default_format}
+            - Separator: #{preferred_currency.separator}
+            - Delimiter: #{preferred_currency.delimiter}
+
+          ### Rules about financial advice
+
+          You are NOT a licensed financial advisor and therefore, you should not provide any specific investment advice (such as "buy this stock", "sell that bond", "invest in crypto", etc.).
+
+          Instead, you should focus on educating the user about personal finance using their own data so they can make informed decisions.
+
+          - Do not suggest investments or financial products
+          - Do not make assumptions about the user's financial situation. Use the functions available to get the data you need.
+
+          ### Function calling rules
+
+          - Use the functions available to you to get user financial data and enhance your responses
+          - For functions that require dates, use the current date as your reference point: #{Date.current}
+          - If you suspect that you do not have enough data to 100% accurately answer, be transparent about it and state exactly what
+            the data you're presenting represents and what context it is in (i.e. date range, account, etc.)
+        PROMPT
+      end
+  end
+end
--- a/app/models/assistant/function_executor.rb
+++ b/app/models/assistant/function_executor.rb
@ -0,0 +1,24 @@
+class Assistant::FunctionExecutor
+  Error = Class.new(StandardError)
+
+  attr_reader :functions
+
+  def initialize(functions = [])
+    @functions = functions
+  end
+
+  def execute(function_request)
+    fn = find_function(function_request)
+    fn_args = JSON.parse(function_request.function_args)
+    fn.call(fn_args)
+  rescue => e
+    raise Error.new(
+      "Error calling function #{fn.name} with arguments #{fn_args}: #{e.message}"
+    )
+  end
+
+  private
+    def find_function(function_request)
+      functions.find { |f| f.name == function_request.function_name }
+    end
+end
--- a/app/models/assistant/response_streamer.rb
+++ b/app/models/assistant/response_streamer.rb
@ -0,0 +1,81 @@
+class Assistant::ResponseStreamer
+  MAX_LLM_CALLS = 5
+
+  MaxCallsError = Class.new(StandardError)
+
+  def initialize(prompt:, model:, assistant:, assistant_message: nil, llm_call_count: 0)
+    @prompt = prompt
+    @model = model
+    @assistant = assistant
+    @llm_call_count = llm_call_count
+    @assistant_message = assistant_message
+  end
+
+  def call(chunk)
+    case chunk.type
+    when "output_text"
+      assistant.stop_thinking
+      assistant_message.content += chunk.data
+      assistant_message.save!
+    when "response"
+      response = chunk.data
+
+      assistant.chat.update!(latest_assistant_response_id: assistant_message.id)
+
+      if response.function_requests.any?
+        assistant.update_thinking("Analyzing your data...")
+
+        function_tool_calls = assistant.fulfill_function_requests(response.function_requests)
+        assistant_message.tool_calls = function_tool_calls
+        assistant_message.save!
+
+        # Circuit breaker
+        raise MaxCallsError if llm_call_count >= MAX_LLM_CALLS
+
+        follow_up_streamer = self.class.new(
+          prompt: prompt,
+          model: model,
+          assistant: assistant,
+          assistant_message: assistant_message,
+          llm_call_count: llm_call_count + 1
+        )
+
+        follow_up_streamer.stream_response(
+          function_results: function_tool_calls.map(&:to_h)
+        )
+      else
+        assistant.stop_thinking
+      end
+    end
+  end
+
+  def stream_response(function_results: [])
+    llm.chat_response(
+      prompt: prompt,
+      model: model,
+      instructions: assistant.instructions,
+      functions: assistant.callable_functions,
+      function_results: function_results,
+      streamer: self
+    )
+  end
+
+  private
+    attr_reader :prompt, :model, :assistant, :assistant_message, :llm_call_count
+
+    def assistant_message
+      @assistant_message ||= build_assistant_message
+    end
+
+    def llm
+      assistant.get_model_provider(model)
+    end
+
+    def build_assistant_message
+      AssistantMessage.new(
+        chat: assistant.chat,
+        content: "",
+        ai_model: model
+      )
+    end
+end
--- a/test/models/assistant_test.rb
+++ b/test/models/assistant_test.rb
@ -13,10 +13,11 @@ class AssistantTest < ActiveSupport::TestCase
    )
    @assistant = Assistant.for_chat(@chat)
    @provider = mock
-    @assistant.expects(:get_model_provider).with("gpt-4o").returns(@provider)
  end

  test "responds to basic prompt" do
+    @assistant.expects(:get_model_provider).with("gpt-4o").returns(@provider)
+
    text_chunk = OpenStruct.new(type: "output_text", data: "Hello from assistant")
    response_chunk = OpenStruct.new(
      type: "response",
@ -40,6 +41,10 @@ class AssistantTest < ActiveSupport::TestCase
  end

  test "responds with tool function calls" do
+    # We expect 2 total instances of ChatStreamer (initial response + follow up with tool call results)
+    @assistant.expects(:get_model_provider).with("gpt-4o").returns(@provider).twice
+
+    # Only first provider call executes function
    Assistant::Function::GetAccounts.any_instance.stubs(:call).returns("test value")

    # Call #1: Function requests
@ -54,7 +59,7 @@ class AssistantTest < ActiveSupport::TestCase
            id: "1",
            call_id: "1",
            function_name: "get_accounts",
-            function_arguments: "{}",
+            function_args: "{}",
          )
        ]
      )
@ -79,18 +84,12 @@ class AssistantTest < ActiveSupport::TestCase
      previous_response_id: "1"
    ))

-    sequence = sequence("provider_chat_response")
-
    @provider.expects(:chat_response).with do |message, **options|
+      options[:streamer].call(call1_response_chunk)
      options[:streamer].call(call2_text_chunk)
      options[:streamer].call(call2_response_chunk)
      true
-    end.returns(nil).once.in_sequence(sequence)
-
-    @provider.expects(:chat_response).with do |message, **options|
-      options[:streamer].call(call1_response_chunk)
-      true
-    end.returns(nil).once.in_sequence(sequence)
+    end.returns(nil)

    assert_difference "AssistantMessage.count", 1 do
      @assistant.respond_to(@message)