Skip to main content

oxide_browser/
media_capture.rs

1//! Host-side media capture: camera, microphone, and screen (with permission prompts).
2//!
3//! Guests call [`register_media_capture_functions`] imports from the `oxide` module. Native
4//! OS prompts (camera / microphone / screen recording) may appear in addition to Oxide’s
5//! in-app confirmation dialogs.
6
7use std::collections::VecDeque;
8use std::sync::{Arc, Mutex};
9
10use anyhow::Result;
11use cpal::traits::{DeviceTrait, HostTrait, StreamTrait};
12use cpal::SampleFormat;
13use nokhwa::pixel_format::RgbFormat;
14use nokhwa::utils::{CameraIndex, RequestedFormat, RequestedFormatType};
15use nokhwa::Camera;
16use wasmtime::{Caller, Linker};
17
18use crate::capabilities::{console_log, write_guest_bytes, ConsoleLevel, HostState};
19
20const MIC_RING_CAP: usize = 96_000;
21
22/// Shared capture state for a tab (camera stream, mic ring buffer, counters for pipeline stats).
23#[derive(Default)]
24pub struct MediaCaptureState {
25    camera: Option<Camera>,
26    last_frame_w: u32,
27    last_frame_h: u32,
28    camera_frames: u64,
29    microphone: Option<MicrophoneInput>,
30    screen_w: u32,
31    screen_h: u32,
32    screen_captures: u64,
33}
34
35struct MicrophoneInput {
36    #[allow(dead_code)]
37    stream: cpal::Stream,
38    buffer: Arc<Mutex<VecDeque<f32>>>,
39    sample_rate: u32,
40}
41
42fn prompt(feature: &str) -> bool {
43    matches!(
44        rfd::MessageDialog::new()
45            .set_title("Oxide")
46            .set_description(format!("Allow this page to access {feature}?"))
47            .set_buttons(rfd::MessageButtons::OkCancel)
48            .show(),
49        rfd::MessageDialogResult::Ok | rfd::MessageDialogResult::Yes
50    )
51}
52
53fn push_mono_f32(data: &[f32], channels: usize, ring: &Arc<Mutex<VecDeque<f32>>>) {
54    let ch = channels.max(1);
55    let frames = data.len() / ch;
56    let mut q = ring.lock().unwrap();
57    for i in 0..frames {
58        let mut sum = 0.0f32;
59        for c in 0..ch {
60            sum += data[i * ch + c];
61        }
62        let m = sum / ch as f32;
63        while q.len() >= MIC_RING_CAP {
64            q.pop_front();
65        }
66        q.push_back(m);
67    }
68}
69
70fn push_mono_i16(data: &[i16], channels: usize, ring: &Arc<Mutex<VecDeque<f32>>>) {
71    let ch = channels.max(1);
72    let frames = data.len() / ch;
73    let mut q = ring.lock().unwrap();
74    for i in 0..frames {
75        let mut sum = 0.0f32;
76        for c in 0..ch {
77            sum += data[i * ch + c] as f32 / 32768.0;
78        }
79        let m = sum / ch as f32;
80        while q.len() >= MIC_RING_CAP {
81            q.pop_front();
82        }
83        q.push_back(m);
84    }
85}
86
87fn push_mono_u16(data: &[u16], channels: usize, ring: &Arc<Mutex<VecDeque<f32>>>) {
88    let ch = channels.max(1);
89    let frames = data.len() / ch;
90    let mut q = ring.lock().unwrap();
91    for i in 0..frames {
92        let mut sum = 0.0f32;
93        for c in 0..ch {
94            sum += (data[i * ch + c] as f32 - 32768.0) / 32768.0;
95        }
96        let m = sum / ch as f32;
97        while q.len() >= MIC_RING_CAP {
98            q.pop_front();
99        }
100        q.push_back(m);
101    }
102}
103
104fn open_microphone(
105    console: &Arc<Mutex<Vec<crate::capabilities::ConsoleEntry>>>,
106) -> Result<MicrophoneInput, i32> {
107    let host = cpal::default_host();
108    let device = host
109        .default_input_device()
110        .ok_or_else(|| log_err(console, -2, "[MIC] No input device".to_string()))?;
111    let supported = match device.default_input_config() {
112        Ok(c) => c,
113        Err(e) => {
114            return Err(log_err(console, -3, format!("[MIC] Config: {e}")));
115        }
116    };
117    let sample_format = supported.sample_format();
118    let config: cpal::StreamConfig = supported.clone().into();
119    let channels = config.channels as usize;
120    let ring = Arc::new(Mutex::new(VecDeque::with_capacity(MIC_RING_CAP)));
121    let ring2 = ring.clone();
122    let console_err = console.clone();
123    let err_fn = move |e| {
124        console_log(
125            &console_err,
126            ConsoleLevel::Warn,
127            format!("[MIC] Stream error: {e}"),
128        );
129    };
130
131    let stream = match sample_format {
132        SampleFormat::F32 => device.build_input_stream(
133            &config,
134            move |data: &[f32], _| push_mono_f32(data, channels, &ring2),
135            err_fn,
136            None,
137        ),
138        SampleFormat::I16 => device.build_input_stream(
139            &config,
140            move |data: &[i16], _| push_mono_i16(data, channels, &ring2),
141            err_fn,
142            None,
143        ),
144        SampleFormat::U16 => device.build_input_stream(
145            &config,
146            move |data: &[u16], _| push_mono_u16(data, channels, &ring2),
147            err_fn,
148            None,
149        ),
150        other => {
151            return Err(log_err(
152                console,
153                -3,
154                format!("[MIC] Unsupported sample format {other:?}"),
155            ));
156        }
157    };
158    let stream = match stream {
159        Ok(s) => s,
160        Err(e) => {
161            return Err(log_err(console, -3, format!("[MIC] Build stream: {e}")));
162        }
163    };
164    if let Err(e) = stream.play() {
165        return Err(log_err(console, -3, format!("[MIC] Play: {e}")));
166    }
167    let sample_rate = supported.sample_rate();
168    Ok(MicrophoneInput {
169        stream,
170        buffer: ring,
171        sample_rate,
172    })
173}
174
175fn log_err(
176    console: &Arc<Mutex<Vec<crate::capabilities::ConsoleEntry>>>,
177    code: i32,
178    msg: String,
179) -> i32 {
180    console_log(console, ConsoleLevel::Warn, msg);
181    code
182}
183
184/// Register `api_camera_*`, `api_microphone_*`, `api_screen_capture`, and `api_media_pipeline_stats`.
185pub fn register_media_capture_functions(linker: &mut Linker<HostState>) -> Result<()> {
186    linker.func_wrap(
187        "oxide",
188        "api_camera_open",
189        |caller: Caller<'_, HostState>| -> i32 {
190            let console = caller.data().console.clone();
191            let st = caller.data().media_capture.clone();
192            if !prompt("the camera") {
193                return -1;
194            }
195            let mut g = st.lock().unwrap();
196            if let Some(mut cam) = g.camera.take() {
197                let _ = cam.stop_stream();
198            }
199            let cams = match nokhwa::query(nokhwa::utils::ApiBackend::Auto) {
200                Ok(c) => c,
201                Err(e) => {
202                    return log_err(&console, -2, format!("[CAMERA] No cameras: {e}"));
203                }
204            };
205            if cams.is_empty() {
206                return log_err(&console, -2, "[CAMERA] No cameras found".to_string());
207            }
208            let req = RequestedFormat::new::<RgbFormat>(RequestedFormatType::HighestResolution(
209                nokhwa::utils::Resolution::new(1280, 720),
210            ));
211            let mut camera = match Camera::new(CameraIndex::Index(0), req) {
212                Ok(c) => c,
213                Err(e) => {
214                    return log_err(&console, -3, format!("[CAMERA] Open failed: {e}"));
215                }
216            };
217            if let Err(e) = camera.open_stream() {
218                return log_err(&console, -3, format!("[CAMERA] Stream: {e}"));
219            }
220            g.camera = Some(camera);
221            0
222        },
223    )?;
224
225    linker.func_wrap(
226        "oxide",
227        "api_camera_close",
228        |caller: Caller<'_, HostState>| {
229            let st = caller.data().media_capture.clone();
230            let mut g = st.lock().unwrap();
231            if let Some(mut cam) = g.camera.take() {
232                let _ = cam.stop_stream();
233            }
234        },
235    )?;
236
237    linker.func_wrap(
238        "oxide",
239        "api_camera_capture_frame",
240        |mut caller: Caller<'_, HostState>, out_ptr: u32, out_cap: u32| -> u32 {
241            let mem = match caller.data().memory {
242                Some(m) => m,
243                None => return 0,
244            };
245            let st = caller.data().media_capture.clone();
246            let mut g = st.lock().unwrap();
247            let cam = match g.camera.as_mut() {
248                Some(c) => c,
249                None => return 0,
250            };
251            let buffer = match cam.frame() {
252                Ok(b) => b,
253                Err(e) => {
254                    console_log(
255                        &caller.data().console,
256                        ConsoleLevel::Warn,
257                        format!("[CAMERA] Frame: {e}"),
258                    );
259                    return 0;
260                }
261            };
262            let img = match buffer.decode_image::<RgbFormat>() {
263                Ok(i) => i,
264                Err(e) => {
265                    console_log(
266                        &caller.data().console,
267                        ConsoleLevel::Warn,
268                        format!("[CAMERA] Decode: {e}"),
269                    );
270                    return 0;
271                }
272            };
273            let w = img.width();
274            let h = img.height();
275            let mut rgba = Vec::with_capacity((w * h * 4) as usize);
276            for px in img.pixels() {
277                let p = px.0;
278                rgba.push(p[0]);
279                rgba.push(p[1]);
280                rgba.push(p[2]);
281                rgba.push(255);
282            }
283            g.last_frame_w = w;
284            g.last_frame_h = h;
285            g.camera_frames = g.camera_frames.saturating_add(1);
286            let write_len = rgba.len().min(out_cap as usize);
287            if write_guest_bytes(&mem, &mut caller, out_ptr, &rgba[..write_len]).is_err() {
288                return 0;
289            }
290            write_len as u32
291        },
292    )?;
293
294    linker.func_wrap(
295        "oxide",
296        "api_camera_frame_dimensions",
297        |caller: Caller<'_, HostState>| -> u64 {
298            let g = caller.data().media_capture.lock().unwrap();
299            ((g.last_frame_w as u64) << 32) | (g.last_frame_h as u64)
300        },
301    )?;
302
303    linker.func_wrap(
304        "oxide",
305        "api_microphone_open",
306        |caller: Caller<'_, HostState>| -> i32 {
307            let console = caller.data().console.clone();
308            let st = caller.data().media_capture.clone();
309            if !prompt("the microphone") {
310                return -1;
311            }
312            let mut g = st.lock().unwrap();
313            g.microphone = None;
314            match open_microphone(&console) {
315                Ok(m) => {
316                    g.microphone = Some(m);
317                    0
318                }
319                Err(code) => code,
320            }
321        },
322    )?;
323
324    linker.func_wrap(
325        "oxide",
326        "api_microphone_close",
327        |caller: Caller<'_, HostState>| {
328            let st = caller.data().media_capture.clone();
329            st.lock().unwrap().microphone = None;
330        },
331    )?;
332
333    linker.func_wrap(
334        "oxide",
335        "api_microphone_sample_rate",
336        |caller: Caller<'_, HostState>| -> u32 {
337            let g = caller.data().media_capture.lock().unwrap();
338            g.microphone.as_ref().map(|m| m.sample_rate).unwrap_or(0)
339        },
340    )?;
341
342    linker.func_wrap(
343        "oxide",
344        "api_microphone_read_samples",
345        |mut caller: Caller<'_, HostState>, out_ptr: u32, max_samples: u32| -> u32 {
346            let mem = match caller.data().memory {
347                Some(m) => m,
348                None => return 0,
349            };
350            let st = caller.data().media_capture.clone();
351            let g = st.lock().unwrap();
352            let mic = match g.microphone.as_ref() {
353                Some(m) => m,
354                None => return 0,
355            };
356            let mut q = mic.buffer.lock().unwrap();
357            let take = (max_samples as usize).min(q.len());
358            let mut chunk = Vec::with_capacity(take * 4);
359            for _ in 0..take {
360                if let Some(s) = q.pop_front() {
361                    chunk.extend_from_slice(&s.to_le_bytes());
362                }
363            }
364            let write_len = chunk.len().min((max_samples as usize).saturating_mul(4));
365            if write_guest_bytes(&mem, &mut caller, out_ptr, &chunk[..write_len]).is_err() {
366                return 0;
367            }
368            (write_len / 4) as u32
369        },
370    )?;
371
372    linker.func_wrap(
373        "oxide",
374        "api_screen_capture",
375        |mut caller: Caller<'_, HostState>, out_ptr: u32, out_cap: u32| -> i32 {
376            let mem = match caller.data().memory {
377                Some(m) => m,
378                None => return -4,
379            };
380            let console = caller.data().console.clone();
381            if !prompt("screen capture (the OS may also ask for screen recording permission)") {
382                return -1;
383            }
384            let screens = match screenshots::Screen::all() {
385                Ok(s) => s,
386                Err(e) => {
387                    console_log(
388                        &console,
389                        ConsoleLevel::Warn,
390                        format!("[SCREEN] Enumerate: {e}"),
391                    );
392                    return -2;
393                }
394            };
395            let screen = match screens.first() {
396                Some(s) => s,
397                None => {
398                    return log_err(&console, -2, "[SCREEN] No displays".to_string());
399                }
400            };
401            let img = match screen.capture() {
402                Ok(i) => i,
403                Err(e) => {
404                    console_log(
405                        &console,
406                        ConsoleLevel::Warn,
407                        format!("[SCREEN] Capture: {e}"),
408                    );
409                    return -3;
410                }
411            };
412            let w = img.width();
413            let h = img.height();
414            let rgba = img.into_raw();
415            let st = caller.data().media_capture.clone();
416            {
417                let mut g = st.lock().unwrap();
418                g.screen_w = w;
419                g.screen_h = h;
420                g.screen_captures = g.screen_captures.saturating_add(1);
421            }
422            let write_len = rgba.len().min(out_cap as usize);
423            if write_guest_bytes(&mem, &mut caller, out_ptr, &rgba[..write_len]).is_err() {
424                return -4;
425            }
426            write_len as i32
427        },
428    )?;
429
430    linker.func_wrap(
431        "oxide",
432        "api_screen_capture_dimensions",
433        |caller: Caller<'_, HostState>| -> u64 {
434            let g = caller.data().media_capture.lock().unwrap();
435            ((g.screen_w as u64) << 32) | (g.screen_h as u64)
436        },
437    )?;
438
439    linker.func_wrap(
440        "oxide",
441        "api_media_pipeline_stats",
442        |caller: Caller<'_, HostState>| -> u64 {
443            let g = caller.data().media_capture.lock().unwrap();
444            let mic_ring = g
445                .microphone
446                .as_ref()
447                .map(|m| m.buffer.lock().unwrap().len() as u32)
448                .unwrap_or(0);
449            (g.camera_frames << 32) | (mic_ring as u64)
450        },
451    )?;
452
453    Ok(())
454}