Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions pjmedia/include/pjmedia.h
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,7 @@
* @file pjmedia.h
* @brief PJMEDIA main header file.
*/
#include <pjmedia/ai_port.h>
#include <pjmedia/alaw_ulaw.h>
#include <pjmedia/avi.h>
#include <pjmedia/avi_stream.h>
Expand Down
12 changes: 12 additions & 0 deletions pjmedia/include/pjmedia/ai_port.h
Original file line number Diff line number Diff line change
Expand Up @@ -378,6 +378,18 @@ PJ_DECL(pj_status_t) pjmedia_ai_port_disconnect(pjmedia_ai_port *ai_port);
*/
PJ_DECL(void*) pjmedia_ai_port_get_user_data(pjmedia_ai_port *ai_port);

/**
* Set the user data associated with the AI port. If callbacks may
* be running concurrently, the caller should hold the port's
* grp_lock (via pjmedia_ai_port_get_port()->grp_lock) when calling
* this function.
*
* @param ai_port The AI port instance.
* @param user_data The user data pointer.
*/
PJ_DECL(void) pjmedia_ai_port_set_user_data(pjmedia_ai_port *ai_port,
void *user_data);

/**
* Create an OpenAI Realtime API backend.
*
Expand Down
7 changes: 7 additions & 0 deletions pjmedia/src/pjmedia/ai_port.c
Original file line number Diff line number Diff line change
Expand Up @@ -801,3 +801,10 @@ PJ_DEF(void*) pjmedia_ai_port_get_user_data(pjmedia_ai_port *ai_port)
PJ_ASSERT_RETURN(ai_port, NULL);
return ai_port->user_data;
}

PJ_DEF(void) pjmedia_ai_port_set_user_data(pjmedia_ai_port *ai_port,
void *user_data)
{
PJ_ASSERT_ON_FAIL(ai_port, return);
ai_port->user_data = user_data;
}
1 change: 1 addition & 0 deletions pjsip-apps/src/swig/pjsua2.i
Original file line number Diff line number Diff line change
Expand Up @@ -179,6 +179,7 @@ using namespace pj;
%feature("director") FindBuddyMatch;
%feature("director") AudioMediaPlayer;
%feature("director") AudioMediaPort;
%feature("director") AudioMediaAiPort;
%feature("director") VideoRecorder;

// PendingJob is only used on Python
Expand Down
91 changes: 91 additions & 0 deletions pjsip-apps/src/swig/python/test.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
import pjsua2 as pj
import sys
import os
import time
from collections import deque
import struct
Expand Down Expand Up @@ -233,6 +234,95 @@ def ua_tonegen_test():

ep.libDestroy()

#
# AI Media Port test
#
class MyAiPort(pj.AudioMediaAiPort):
"""Custom AI port that collects events."""
def __init__(self):
super().__init__()
self.events = []
self.connected = False
self.transcripts = []

def onEvent(self, event):
self.events.append(event.type)
if event.type == pj.PJMEDIA_AI_EVENT_CONNECTED:
self.connected = True
write(" [AI] Connected\r\n")
elif event.type == pj.PJMEDIA_AI_EVENT_DISCONNECTED:
self.connected = False
write(" [AI] Disconnected\r\n")
elif event.type == pj.PJMEDIA_AI_EVENT_TRANSCRIPT:
self.transcripts.append(event.text)
write(" [AI] " + event.text + "\r\n")
elif event.type == pj.PJMEDIA_AI_EVENT_RESPONSE_DONE:
write(" [AI] Response done\r\n")

def ua_ai_port_test():
write("AI media port test.." + "\r\n")
ep_cfg = pj.EpConfig()

ep = pj.Endpoint()
ep.libCreate()
ep.libInit(ep_cfg)
ep.libStart()

# Create AI port with default params
ai = MyAiPort()
prm = pj.AiMediaPortParam()
assert prm.vadEnabled == False
assert prm.ptimeMsec == 20

ai.createPort(prm)
write(" AI port created and registered to conf bridge\r\n")

# Route audio: mic -> AI -> speaker
ai.startTransmit(ep.audDevManager().getPlaybackDevMedia())
ep.audDevManager().getCaptureDevMedia().startTransmit(ai)
write(" Audio routing established\r\n")

# Connect to OpenAI if API key is available
api_key = os.environ.get("OPENAI_API_KEY", "")
if api_key:
url = ("wss://api.openai.com/v1/realtime"
"?model=gpt-4o-mini-realtime-preview")
write(" Connecting to OpenAI..\r\n")
ai.connect(url, api_key)

# Wait for connection
for i in range(150):
ep.libHandleEvents(100)
if ai.connected:
break

if ai.connected:
write(" Connected! Speak into your mic.\r\n")
write(" Press ENTER to stop.\r\n")
input()

write(" Disconnecting..\r\n")
ai.disconnect()
# Let close handshake complete
for i in range(20):
ep.libHandleEvents(100)
else:
write(" Connection timeout (non-fatal)\r\n")
else:
write(" OPENAI_API_KEY not set, skipping AI connection test\r\n")

# Disconnect routing
ai.stopTransmit(ep.audDevManager().getPlaybackDevMedia())
ep.audDevManager().getCaptureDevMedia().stopTransmit(ai)
write(" Audio routing disconnected\r\n")

del ai
write(" AI port destroyed\r\n")

ep.libDestroy()
write(" AI media port test OK\r\n")


class RandomIntVal():
def __init__(self):
self.value = randint(0, 100000)
Expand Down Expand Up @@ -304,6 +394,7 @@ def ua_pending_job_test():
ua_run_test_exception()
ua_run_log_test()
ua_run_ua_test()
ua_ai_port_test()
ua_tonegen_test()
ua_pending_job_test()
sys.exit(0)
Expand Down
11 changes: 11 additions & 0 deletions pjsip-apps/src/swig/symbols.i
Original file line number Diff line number Diff line change
Expand Up @@ -462,6 +462,17 @@ typedef enum pjmedia_vid_stream_rc_method
PJMEDIA_VID_STREAM_RC_SEND_THREAD = 2
} pjmedia_vid_stream_rc_method;

typedef enum pjmedia_ai_event_type
{
PJMEDIA_AI_EVENT_CONNECTED,
PJMEDIA_AI_EVENT_DISCONNECTED,
PJMEDIA_AI_EVENT_TRANSCRIPT,
PJMEDIA_AI_EVENT_RESPONSE_START,
PJMEDIA_AI_EVENT_RESPONSE_DONE,
PJMEDIA_AI_EVENT_SPEECH_STARTED,
PJMEDIA_AI_EVENT_SPEECH_STOPPED
} pjmedia_ai_event_type;

enum pjmedia_file_writer_option
{
PJMEDIA_FILE_WRITE_PCM = 0,
Expand Down
1 change: 1 addition & 0 deletions pjsip-apps/src/swig/symbols.lst
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,7 @@ pjmedia/videodev.h pjmedia_vid_dev_hwnd_type
pjmedia/vid_codec.h pjmedia_vid_packing
pjmedia/vid_conf.h pjmedia_vid_conf_op_type
pjmedia/vid_stream.h pjmedia_vid_stream_rc_method
pjmedia/ai_port.h pjmedia_ai_event_type
pjmedia/wav_port.h pjmedia_file_writer_option pjmedia_file_player_option

pjmedia-audiodev/audiodev.h pjmedia_aud_dev_route pjmedia_aud_dev_cap
Expand Down
122 changes: 122 additions & 0 deletions pjsip/include/pjsua2/media.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -556,6 +556,128 @@ class AudioMediaPort : public AudioMedia
pjmedia_port *port;
};


/**
* AI Media Port event data.
*/
struct AiMediaEvent
{
/** Event type. */
pjmedia_ai_event_type type;

/** Status code. PJ_SUCCESS for informational events. */
pj_status_t status;

/** Text payload (transcript). Only valid for TRANSCRIPT events. */
string text;
};


/**
* AI Media Port creation parameters.
*/
struct AiMediaPortParam
{
/**
* Enable voice activity detection (VAD) on the TX path.
* When enabled, silence frames are not sent to the AI service.
*
* Default: false
*/
bool vadEnabled;

/**
* Ptime in milliseconds.
*
* Default: 20
*/
unsigned ptimeMsec;

public:
/** Default constructor */
AiMediaPortParam() : vadEnabled(false), ptimeMsec(20) {}
};


/**
* AI Media Port.
*
* This wraps pjmedia_ai_port as an AudioMedia object that can be
* connected to the conference bridge. It bridges audio to/from
* real-time AI services (e.g. OpenAI Realtime API) over WebSocket.
*
* Basic usage:
* 1. Create with createPort() (uses the OpenAI Realtime API backend).
* 2. Connect to the AI service with connect().
* 3. Use startTransmit()/stopTransmit() to route audio from/to
* other conference bridge ports (e.g. AudioMedia from a call).
* 4. Disconnect with disconnect() when done.
*
* Events (connected, transcript, etc.) are delivered via the
* virtual onEvent() callback.
*/
class AudioMediaAiPort : public AudioMedia
{
public:
/**
* Constructor.
*/
AudioMediaAiPort();

/**
* Destructor. Disconnects from the AI service (if connected) and
* unregisters the port from the conference bridge.
*/
virtual ~AudioMediaAiPort();

/**
* Create an AI media port with the OpenAI Realtime API backend and
* register it to the conference bridge. The port operates at the
* backend's native clock rate (e.g. 24kHz for OpenAI); the
* conference bridge handles resampling.
*
* @param prm Creation parameters.
*/
void createPort(const AiMediaPortParam &prm = AiMediaPortParam())
PJSUA2_THROW(Error);

/**
* Connect to the AI service asynchronously. The onEvent() callback
* will be called with PJMEDIA_AI_EVENT_CONNECTED or
* PJMEDIA_AI_EVENT_DISCONNECTED when complete.
*
* @param url WebSocket URL (e.g.
* "wss://api.openai.com/v1/realtime?model=...")
* @param authToken Authentication token (e.g. OpenAI API key).
*/
void connect(const string &url, const string &authToken)
PJSUA2_THROW(Error);

/**
* Disconnect from the AI service gracefully.
*/
void disconnect() PJSUA2_THROW(Error);

/*
* Callbacks
*/

/**
* Called when an AI event occurs (connected, transcript, etc.).
* This may be called from the ioqueue worker thread.
*
* @param event The event data.
*/
virtual void onEvent(const AiMediaEvent &event)
{ PJ_UNUSED_ARG(event); }

private:
pj_pool_t *pool;
pjmedia_ai_port *aiPort;
pjmedia_ai_backend *backend;
};


/**
* This structure contains additional info about AudioMediaPlayer.
*/
Expand Down
Loading
Loading