Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -4,4 +4,6 @@ public static class MessageTypeName
{
public const string Plain = "plain";
public const string Notification = "notification";
public const string FunctionCall = "function";
public const string Audio = "audio";
}
Original file line number Diff line number Diff line change
Expand Up @@ -10,4 +10,6 @@ public class StateConst
public const string AGENT_REDIRECTION_REASON = "agent_redirection_reason";

public const string LANGUAGE = "language";

public const string SUB_CONVERSATION_ID = "sub_conversation_id";
}
Original file line number Diff line number Diff line change
Expand Up @@ -5,8 +5,25 @@ namespace BotSharp.Abstraction.MLTasks;
public interface IRealTimeCompletion
{
string Provider { get; }
string Model { get; }

void SetModelName(string model);

Task Connect(RealtimeHubConnection conn,
Action onModelReady,
Action<string> onModelAudioDeltaReceived,
Action onModelAudioResponseDone,
Action<string> onAudioTranscriptDone,
Action<string> onModelResponseDone,
Action onUserInterrupted);
Task AppenAudioBuffer(string message);

Task SendEventToModel(object message);
Task Disconnect();

Task<RealtimeSession> CreateSession(Agent agent, List<RoleDialogModel> conversations);
Task<string> UpdateInitialSession(RealtimeHubConnection conn);
Task<string> InsertConversationItem(RoleDialogModel message);
Task TriggerModelInference(string? instructions = null);
Task<List<RoleDialogModel>> OnResponsedDone(RealtimeHubConnection conn, string response);
}
12 changes: 12 additions & 0 deletions src/Infrastructure/BotSharp.Abstraction/Realtime/IRealtimeHub.cs
Original file line number Diff line number Diff line change
@@ -0,0 +1,12 @@
using BotSharp.Abstraction.Realtime.Models;
using System.Net.WebSockets;

namespace BotSharp.Abstraction.Realtime;

/// <summary>
/// Realtime hub interface. Manage the WebSocket connection include User, Agent and Model.
/// </summary>
public interface IRealtimeHub
{
Task Listen(WebSocket userWebSocket, Func<string, RealtimeHubConnection> onUserMessageReceived);
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,13 @@
namespace BotSharp.Abstraction.Realtime.Models;

public class RealtimeHubConnection
{
public string Event { get; set; } = null!;
public string StreamId { get; set; } = null!;
public string ConversationId { get; set; } = null!;
public string Data { get; set; } = string.Empty;
public string Model { get; set; } = null!;
public Func<string, object> OnModelMessageReceived { get; set; } = null!;
public Func<object> OnModelAudioResponseDone { get; set; } = null!;
public Func<object> OnModelUserInterrupted { get; set; } = null!;
}
4 changes: 4 additions & 0 deletions src/Infrastructure/BotSharp.Core/BotSharpCoreExtensions.cs
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,8 @@
using BotSharp.Abstraction.Templating;
using BotSharp.Core.Templating;
using BotSharp.Abstraction.Infrastructures.Enums;
using BotSharp.Abstraction.Realtime;
using BotSharp.Core.Realtime;

namespace BotSharp.Core;

Expand Down Expand Up @@ -171,5 +173,7 @@ public static void RegisterPlugins(IServiceCollection services, IConfiguration c
});

services.AddSingleton(loader);

services.AddScoped<IRealtimeHub, RealtimeHub>();
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -107,7 +107,7 @@ public async Task<Conversation> NewConversation(Conversation sess)
record.Id = sess.Id.IfNullOrEmptyAs(Guid.NewGuid().ToString());
record.UserId = sess.UserId.IfNullOrEmptyAs(foundUserId);
record.Tags = sess.Tags;
record.Title = "New Conversation";
record.Title = string.IsNullOrEmpty(record.Title) ? "New Conversation" : record.Title;

db.CreateNewConversation(record);

Expand Down
155 changes: 155 additions & 0 deletions src/Infrastructure/BotSharp.Core/Realtime/RealtimeHub.cs
Original file line number Diff line number Diff line change
@@ -0,0 +1,155 @@
using BotSharp.Abstraction.Realtime;
using System.Net.WebSockets;
using System;
using BotSharp.Abstraction.Realtime.Models;
using BotSharp.Abstraction.MLTasks;
using BotSharp.Abstraction.Agents.Models;

namespace BotSharp.Core.Realtime;

public class RealtimeHub : IRealtimeHub
{
private readonly IServiceProvider _services;
private readonly ILogger _logger;
public RealtimeHub(IServiceProvider services, ILogger<RealtimeHub> logger)
{
_services = services;
_logger = logger;
}

public async Task Listen(WebSocket userWebSocket,
Func<string, RealtimeHubConnection> onUserMessageReceived)
{
var buffer = new byte[1024 * 4];
WebSocketReceiveResult result;

var llmProviderService = _services.GetRequiredService<ILlmProviderService>();
var model = llmProviderService.GetProviderModel("openai", "gpt-4",
realTime: true).Name;

var completer = _services.GetServices<IRealTimeCompletion>().First(x => x.Provider == "openai");
completer.SetModelName(model);

do
{
result = await userWebSocket.ReceiveAsync(new ArraySegment<byte>(buffer), CancellationToken.None);
string receivedText = Encoding.UTF8.GetString(buffer, 0, result.Count);
_logger.LogDebug($"Received from user: {receivedText}");
if (string.IsNullOrEmpty(receivedText))
{
continue;
}

var conn = onUserMessageReceived(receivedText);
conn.Model = model;

if (conn.Event == "user_connected")
{
await ConnectToModel(completer, userWebSocket, conn);
}
else if (conn.Event == "user_data_received")
{
await completer.AppenAudioBuffer(conn.Data);
}
else if (conn.Event == "user_disconnected")
{
await completer.Disconnect();
}
} while (!result.CloseStatus.HasValue);

await userWebSocket.CloseAsync(result.CloseStatus.Value, result.CloseStatusDescription, CancellationToken.None);
}

private async Task ConnectToModel(IRealTimeCompletion completer, WebSocket userWebSocket, RealtimeHubConnection conn)
{
var hookProvider = _services.GetRequiredService<ConversationHookProvider>();
var storage = _services.GetRequiredService<IConversationStorage>();
var convService = _services.GetRequiredService<IConversationService>();
convService.SetConversationId(conn.ConversationId, []);
var conversation = await convService.GetConversation(conn.ConversationId);
var agentService = _services.GetRequiredService<IAgentService>();
var agent = await agentService.LoadAgent(conversation.AgentId);
var routing = _services.GetRequiredService<IRoutingService>();
var dialogs = convService.GetDialogHistory();
routing.Context.SetDialogs(dialogs);

await completer.Connect(conn,
onModelReady: async () =>
{
// Control initial session
var data = await completer.UpdateInitialSession(conn);
await completer.SendEventToModel(data);

// Add dialog history
foreach (var item in dialogs)
{
var dialogItem = await completer.InsertConversationItem(item);
await completer.SendEventToModel(data);
}

if (dialogs.LastOrDefault()?.Role == AgentRole.Assistant)
{
await completer.TriggerModelInference($"Rephase your last response:\r\n{dialogs.LastOrDefault()?.Content}");
}
else
{
await completer.TriggerModelInference("Reply based on the conversation context.");
}
},
onModelAudioDeltaReceived: async audioDeltaData =>
{
var data = conn.OnModelMessageReceived(audioDeltaData);
await SendEventToUser(userWebSocket, data);
},
onModelAudioResponseDone: async () =>
{
var data = conn.OnModelAudioResponseDone();
await SendEventToUser(userWebSocket, data);
},
onAudioTranscriptDone: async transcript =>
{
var message = new RoleDialogModel(AgentRole.Assistant, transcript);

// append transcript to conversation
storage.Append(conn.ConversationId, message);

foreach (var hook in hookProvider.HooksOrderByPriority)
{
hook.SetAgent(agent)
.SetConversation(conversation);

if (!string.IsNullOrEmpty(transcript))
{
await hook.OnMessageReceived(message);
}
}
},
onModelResponseDone: async response =>
{
var messages = await completer.OnResponsedDone(conn, response);
foreach (var message in messages)
{
// Invoke function
if (message.FunctionName != null)
{
await routing.InvokeFunction(message.FunctionName, message);
var data = await completer.InsertConversationItem(message);
await completer.SendEventToModel(data);
await completer.TriggerModelInference("Reply based on the function's output.");
}
}
},
onUserInterrupted: async () =>
{
var data = conn.OnModelUserInterrupted();
await SendEventToUser(userWebSocket, data);
});
}

private async Task SendEventToUser(WebSocket webSocket, object message)
{
var data = JsonSerializer.Serialize(message);
var buffer = Encoding.UTF8.GetBytes(data);
await webSocket.SendAsync(new ArraySegment<byte>(buffer), WebSocketMessageType.Text, true, CancellationToken.None);
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -49,7 +49,7 @@ public async Task<bool> InvokeFunction(string name, RoleDialogModel message)
}

// Set result to original message
message.Role = clonedMessage.Role;
message.Role = AgentRole.Function;
message.PostbackFunctionName = clonedMessage.PostbackFunctionName;
message.CurrentAgentId = clonedMessage.CurrentAgentId;
message.Content = clonedMessage.Content;
Expand Down
4 changes: 3 additions & 1 deletion src/Infrastructure/BotSharp.OpenAPI/BotSharp.OpenAPI.csproj
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
<Project Sdk="Microsoft.NET.Sdk">
<Project Sdk="Microsoft.NET.Sdk">

<PropertyGroup>
<TargetFramework>$(TargetFramework)</TargetFramework>
Expand Down Expand Up @@ -47,6 +47,8 @@
</ItemGroup>

<ItemGroup>
<ProjectReference Include="..\..\Plugins\BotSharp.Plugin.OpenAI\BotSharp.Plugin.OpenAI.csproj" />
<ProjectReference Include="..\..\Plugins\BotSharp.Plugin.Twilio\BotSharp.Plugin.Twilio.csproj" />
<ProjectReference Include="..\BotSharp.Core\BotSharp.Core.csproj" />
</ItemGroup>

Expand Down
Original file line number Diff line number Diff line change
@@ -1,22 +1,39 @@
using BotSharp.Abstraction.Functions.Models;
using System.Text.Json.Serialization;

namespace BotSharp.Plugin.OpenAI.Models;
namespace BotSharp.Plugin.OpenAI.Models.Realtime;

public class RealtimeSessionRequest
public class RealtimeSessionBody
{
[JsonPropertyName("id")]
[JsonIgnore(Condition = JsonIgnoreCondition.WhenWritingNull)]
public string Id { get; set; } = null!;

[JsonPropertyName("object")]
[JsonIgnore(Condition = JsonIgnoreCondition.WhenWritingNull)]
public string Object { get; set; } = null!;

[JsonPropertyName("model")]
public string Model { get; set; } = "gpt-4o-mini-realtime-preview-2024-12-17";
[JsonIgnore(Condition = JsonIgnoreCondition.WhenWritingNull)]
public string Model { get; set; } = null!;

[JsonPropertyName("temperature")]
public float temperature { get; set; } = 0.8f;
public float Temperature { get; set; } = 0.8f;

[JsonPropertyName("modalities")]
public string[] Modalities { get; set; } = ["audio", "text"];

[JsonPropertyName("input_audio_format")]
public string InputAudioFormat { get; set; } = "pcm16";

[JsonPropertyName("output_audio_format")]
public string OutputAudioFormat { get; set; } = "pcm16";

[JsonPropertyName("instructions")]
public string Instructions { get; set; } = "You are a friendly assistant.";

[JsonPropertyName("voice")]
public string Voice { get; set; } = "sage";

[JsonPropertyName("max_response_output_tokens")]
public int MaxResponseOutputTokens { get; set; } = 512;

Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,14 @@
namespace BotSharp.Plugin.OpenAI.Models.Realtime;

public class RealtimeSessionCreationRequest : RealtimeSessionBody
{

}

/// <summary>
/// https://platform.openai.com/docs/api-reference/realtime-client-events/session/update
/// </summary>
public class RealtimeSessionUpdateRequest : RealtimeSessionBody
{

}
Original file line number Diff line number Diff line change
@@ -1,4 +1,6 @@
namespace BotSharp.Abstraction.Realtime.Models;
using BotSharp.Abstraction.Realtime.Models;

namespace BotSharp.Plugin.OpenAI.Models.Realtime;

public class RealtimeSessionUpdate
{
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,19 @@
namespace BotSharp.Plugin.OpenAI.Models.Realtime;

public class ResponseAudioDelta : ServerEventResponse
{
[JsonPropertyName("response_id")]
public string ResponseId { get; set; } = null!;

[JsonPropertyName("item_id")]
public string ItemId { get; set; } = null!;

[JsonPropertyName("output_index")]
public int OutputIndex { get; set; }

[JsonPropertyName("content_index")]
public int ContentIndex { get; set; }

[JsonPropertyName("delta")]
public string? Delta { get; set; }
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,19 @@
namespace BotSharp.Plugin.OpenAI.Models.Realtime;

public class ResponseAudioTranscript : ServerEventResponse
{
[JsonPropertyName("response_id")]
public string ResponseId { get; set; } = null!;

[JsonPropertyName("item_id")]
public string ItemId { get; set; } = null!;

[JsonPropertyName("output_index")]
public int OutputIndex { get; set; }

[JsonPropertyName("content_index")]
public int ContentIndex { get; set; }

[JsonPropertyName("transcript")]
public string? Transcript { get; set; }
}
Loading