feat(matrix): support voice transcription with E2EE media on dev

This commit is contained in:
argenis de la rosa 2026-03-04 05:51:43 -05:00
parent 32a2cf370d
commit 229ceb4142
2 changed files with 80 additions and 7 deletions

View File

@ -3,6 +3,7 @@ use async_trait::async_trait;
use matrix_sdk::{
authentication::matrix::MatrixSession,
config::SyncSettings,
media::{MediaFormat, MediaRequestParameters},
ruma::{
events::room::message::{
MessageType, OriginalSyncRoomMessageEvent, Relation, RoomMessageEventContent,
@ -33,6 +34,7 @@ pub struct MatrixChannel {
resolved_room_id_cache: Arc<RwLock<Option<String>>>,
sdk_client: Arc<OnceCell<MatrixSdkClient>>,
http_client: Client,
transcription: Option<crate::config::TranscriptionConfig>,
}
impl std::fmt::Debug for MatrixChannel {
@ -41,6 +43,7 @@ impl std::fmt::Debug for MatrixChannel {
.field("homeserver", &self.homeserver)
.field("room_id", &self.room_id)
.field("allowed_users", &self.allowed_users)
.field("transcription_enabled", &self.transcription.is_some())
.finish_non_exhaustive()
}
}
@ -172,9 +175,18 @@ impl MatrixChannel {
resolved_room_id_cache: Arc::new(RwLock::new(None)),
sdk_client: Arc::new(OnceCell::new()),
http_client: Client::new(),
transcription: None,
}
}
/// Configure voice transcription.
pub fn with_transcription(mut self, config: crate::config::TranscriptionConfig) -> Self {
if config.enabled {
self.transcription = Some(config);
}
self
}
pub fn with_mention_only(mut self, mention_only: bool) -> Self {
self.mention_only = mention_only;
self
@ -224,7 +236,7 @@ impl MatrixChannel {
}
fn is_supported_message_type(msgtype: &str) -> bool {
matches!(msgtype, "m.text" | "m.notice")
matches!(msgtype, "m.text" | "m.notice" | "m.audio")
}
fn has_non_empty_body(body: &str) -> bool {
@ -745,6 +757,7 @@ impl Channel for MatrixChannel {
let dedupe_for_handler = Arc::clone(&recent_event_cache);
let bot_dedupe_for_handler = Arc::clone(&recent_bot_event_cache);
let mention_only_for_handler = self.mention_only;
let transcription_for_handler = self.transcription.clone();
client.add_event_handler(move |event: OriginalSyncRoomMessageEvent, room: Room| {
let tx = tx_handler.clone();
@ -753,6 +766,7 @@ impl Channel for MatrixChannel {
let allowed_users = allowed_users_for_handler.clone();
let dedupe = Arc::clone(&dedupe_for_handler);
let bot_dedupe = Arc::clone(&bot_dedupe_for_handler);
let transcription = transcription_for_handler.clone();
async move {
if room.room_id().as_str() != target_room.as_str() {
@ -776,6 +790,45 @@ impl Channel for MatrixChannel {
let body = match &event.content.msgtype {
MessageType::Text(content) => content.body.clone(),
MessageType::Notice(content) => content.body.clone(),
MessageType::Audio(content) => {
// Check if transcription is enabled
if let Some(ref tc) = transcription {
if tc.enabled {
// 2. Media Acquisition
// We use the room's client to fetch the actual media bytes.
// This handles decryption automatically if the room is E2EE.
let media_request = MediaRequestParameters {
source: content.source.clone(),
format: MediaFormat::File,
};
match room.client().media().get_media_content(&media_request, true).await {
Ok(audio_data) => {
// 3. Transcription via local service
match crate::channels::transcription::transcribe_audio(
audio_data,
&content.body,
tc,
).await {
Ok(text) => text,
Err(e) => {
tracing::warn!("Matrix voice transcription failed: {e}");
format!("[Audio transcription failed: {}]", content.body)
}
}
}
Err(e) => {
let safe_error = MatrixChannel::sanitize_error_for_log(&e);
tracing::warn!("Failed to download Matrix audio: {safe_error}");
format!("[Audio download failed: {}]", content.body)
}
}
} else {
return;
}
} else {
return;
}
}
_ => return,
};
@ -783,18 +836,18 @@ impl Channel for MatrixChannel {
return;
}
let mut is_direct_room = false;
let mut is_mentioned = false;
let mut is_reply_to_bot = false;
if mention_only_for_handler {
is_direct_room = room.is_direct().await.unwrap_or_else(|error| {
let is_direct_room = room.is_direct().await.unwrap_or_else(|error| {
let safe_error = MatrixChannel::sanitize_error_for_log(&error);
tracing::warn!(
"Matrix is_direct() failed while evaluating mention_only gate: {safe_error}"
);
false
});
let mut is_mentioned = false;
let mut is_reply_to_bot = false;
if !is_direct_room {
is_mentioned =
MatrixChannel::event_mentions_user(&event, &body, my_user_id.as_str());
@ -1018,10 +1071,29 @@ mod tests {
fn supported_message_type_detection() {
assert!(MatrixChannel::is_supported_message_type("m.text"));
assert!(MatrixChannel::is_supported_message_type("m.notice"));
assert!(MatrixChannel::is_supported_message_type("m.audio"));
assert!(!MatrixChannel::is_supported_message_type("m.image"));
assert!(!MatrixChannel::is_supported_message_type("m.file"));
}
#[test]
fn with_transcription_configures_enabled_config() {
let mut tc = crate::config::TranscriptionConfig::default();
tc.enabled = true;
let ch = make_channel().with_transcription(tc);
assert!(ch.transcription.is_some());
}
#[test]
fn with_transcription_ignores_disabled_config() {
let mut tc = crate::config::TranscriptionConfig::default();
tc.enabled = false;
let ch = make_channel().with_transcription(tc);
assert!(ch.transcription.is_none());
}
#[test]
fn body_presence_detection() {
assert!(MatrixChannel::has_non_empty_body("hello"));

View File

@ -4254,7 +4254,8 @@ fn collect_configured_channels(
mx.device_id.clone(),
config.config_path.parent().map(|path| path.to_path_buf()),
)
.with_mention_only(mx.mention_only),
.with_mention_only(mx.mention_only)
.with_transcription(config.transcription.clone()),
),
});
}