diff --git a/crates/amalthea/src/error.rs b/crates/amalthea/src/error.rs index 99fdf15fa..119a39949 100644 --- a/crates/amalthea/src/error.rs +++ b/crates/amalthea/src/error.rs @@ -43,7 +43,6 @@ pub enum Error { UnknownCommName(String), UnknownCommId(String), InvalidCommMessage(String, String, String), - InvalidInputRequest(String), InvalidConsoleInput(String), Anyhow(anyhow::Error), ShellErrorReply(Exception), @@ -196,9 +195,6 @@ impl fmt::Display for Error { msg, id, err ) }, - Error::InvalidInputRequest(message) => { - write!(f, "{message}") - }, Error::InvalidConsoleInput(message) => { write!(f, "{message}") }, @@ -228,6 +224,6 @@ impl From> for Error { macro_rules! anyhow { ($($rest: expr),*) => {{ let message = anyhow::anyhow!($($rest, )*); - crate::error::Error::Anyhow(message) + $crate::error::Error::Anyhow(message) }} } diff --git a/crates/amalthea/src/fixtures/dummy_frontend.rs b/crates/amalthea/src/fixtures/dummy_frontend.rs index 137e453a2..09b0bfe18 100644 --- a/crates/amalthea/src/fixtures/dummy_frontend.rs +++ b/crates/amalthea/src/fixtures/dummy_frontend.rs @@ -21,6 +21,8 @@ use crate::wire::jupyter_message::JupyterMessage; use crate::wire::jupyter_message::Message; use crate::wire::jupyter_message::ProtocolMessage; use crate::wire::jupyter_message::Status; +use crate::wire::shutdown_reply::ShutdownReply; +use crate::wire::shutdown_request::ShutdownRequest; use crate::wire::status::ExecutionState; use crate::wire::stream::Stream; use crate::wire::wire_message::WireMessage; @@ -36,7 +38,7 @@ pub struct DummyConnection { } pub struct DummyFrontend { - pub _control_socket: Socket, + pub control_socket: Socket, pub shell_socket: Socket, pub iopub_socket: Socket, pub stdin_socket: Socket, @@ -132,7 +134,7 @@ impl DummyFrontend { // the Jupyter specification, these must share a ZeroMQ identity. let shell_id = rand::thread_rng().gen::<[u8; 16]>(); - let _control_socket = Socket::new( + let control_socket = Socket::new( connection.session.clone(), connection.ctx.clone(), String::from("Control"), @@ -198,7 +200,7 @@ impl DummyFrontend { }); Self { - _control_socket, + control_socket, shell_socket, iopub_socket, stdin_socket, @@ -207,12 +209,22 @@ impl DummyFrontend { } } + /// Sends a Jupyter message on the Control socket; returns the ID of the newly + /// created message + pub fn send_control(&self, msg: T) -> String { + Self::send(&self.control_socket, &self.session, msg) + } + /// Sends a Jupyter message on the Shell socket; returns the ID of the newly /// created message pub fn send_shell(&self, msg: T) -> String { Self::send(&self.shell_socket, &self.session, msg) } + pub fn send_shutdown_request(&self, restart: bool) -> String { + self.send_control(ShutdownRequest { restart }) + } + pub fn send_execute_request(&self, code: &str, options: ExecuteRequestOptions) -> String { self.send_shell(ExecuteRequest { code: String::from(code), @@ -224,6 +236,77 @@ impl DummyFrontend { }) } + /// Sends an execute request and handles the standard message flow: + /// busy -> execute_input -> idle -> execute_reply. + /// Asserts that the input code matches and returns the execution count. + #[track_caller] + pub fn execute_request_invisibly(&self, code: &str) -> u32 { + self.send_execute_request(code, ExecuteRequestOptions::default()); + self.recv_iopub_busy(); + + let input = self.recv_iopub_execute_input(); + assert_eq!(input.code, code); + + self.recv_iopub_idle(); + + let execution_count = self.recv_shell_execute_reply(); + assert_eq!(execution_count, input.execution_count); + + execution_count + } + + /// Sends an execute request and handles the standard message flow with a result: + /// busy -> execute_input -> execute_result -> idle -> execute_reply. + /// Asserts that the input code matches and passes the result to the callback. + /// Returns the execution count. + #[track_caller] + pub fn execute_request(&self, code: &str, result_check: F) -> u32 + where + F: FnOnce(String), + { + self.send_execute_request(code, ExecuteRequestOptions::default()); + self.recv_iopub_busy(); + + let input = self.recv_iopub_execute_input(); + assert_eq!(input.code, code); + + let result = self.recv_iopub_execute_result(); + result_check(result); + + self.recv_iopub_idle(); + + let execution_count = self.recv_shell_execute_reply(); + assert_eq!(execution_count, input.execution_count); + + execution_count + } + + /// Sends an execute request that produces an error and handles the standard message flow: + /// busy -> execute_input -> execute_error -> idle -> execute_reply_exception. + /// Passes the error message to the callback for custom assertions. + /// Returns the execution count. + #[track_caller] + pub fn execute_request_error(&self, code: &str, error_check: F) -> u32 + where + F: FnOnce(String), + { + self.send_execute_request(code, ExecuteRequestOptions::default()); + self.recv_iopub_busy(); + + let input = self.recv_iopub_execute_input(); + assert_eq!(input.code, code); + + let error_msg = self.recv_iopub_execute_error(); + error_check(error_msg); + + self.recv_iopub_idle(); + + let execution_count = self.recv_shell_execute_reply_exception(); + assert_eq!(execution_count, input.execution_count); + + execution_count + } + /// Sends a Jupyter message on the Stdin socket pub fn send_stdin(&self, msg: T) { Self::send(&self.stdin_socket, &self.session, msg); @@ -236,6 +319,7 @@ impl DummyFrontend { id } + #[track_caller] pub fn recv(socket: &Socket) -> Message { // It's important to wait with a timeout because the kernel thread might have // panicked, preventing it from sending the expected message. The tests would then @@ -246,6 +330,8 @@ impl DummyFrontend { // // Note that the panic hook will still have run to record the panic, so we'll get // expected panic information in the test output. + // + // If you're debugging tests, you'll need to bump this timeout to a large value. if socket.poll_incoming(10000).unwrap() { return Message::read_from_socket(socket).unwrap(); } @@ -253,21 +339,39 @@ impl DummyFrontend { panic!("Timeout while expecting message on socket {}", socket.name); } + /// Receives a Jupyter message from the Control socket + #[track_caller] + pub fn recv_control(&self) -> Message { + Self::recv(&self.control_socket) + } + /// Receives a Jupyter message from the Shell socket + #[track_caller] pub fn recv_shell(&self) -> Message { Self::recv(&self.shell_socket) } /// Receives a Jupyter message from the IOPub socket + #[track_caller] pub fn recv_iopub(&self) -> Message { Self::recv(&self.iopub_socket) } /// Receives a Jupyter message from the Stdin socket + #[track_caller] pub fn recv_stdin(&self) -> Message { Self::recv(&self.stdin_socket) } + /// Receive from Control and assert `ShutdownReply` message. + #[track_caller] + pub fn recv_control_shutdown_reply(&self) -> ShutdownReply { + let message = self.recv_control(); + assert_matches!(message, Message::ShutdownReply(message) => { + message.content + }) + } + /// Receive from Shell and assert `ExecuteReply` message. /// Returns `execution_count`. #[track_caller] diff --git a/crates/amalthea/src/socket/control.rs b/crates/amalthea/src/socket/control.rs index 8e7d05716..829732da8 100644 --- a/crates/amalthea/src/socket/control.rs +++ b/crates/amalthea/src/socket/control.rs @@ -102,6 +102,11 @@ impl Control { H: FnOnce(JupyterMessage) -> Result<(), Error>, { // Enter the kernel-busy state in preparation for handling the message. + // The protocol specification does not mandate status messages for + // Control, but we emit them for compatibility with ipykernel: + // https://github.com/ipython/ipykernel/pull/585. These status messages + // can be discriminated from those on Shell by examining the parent + // header. if let Err(err) = self.send_state(req.clone(), ExecutionState::Busy) { warn!("Failed to change kernel status to busy: {err}"); } diff --git a/crates/amalthea/src/wire/jupyter_message.rs b/crates/amalthea/src/wire/jupyter_message.rs index 76d835605..b60ae002f 100644 --- a/crates/amalthea/src/wire/jupyter_message.rs +++ b/crates/amalthea/src/wire/jupyter_message.rs @@ -46,6 +46,7 @@ use crate::wire::is_complete_reply::IsCompleteReply; use crate::wire::is_complete_request::IsCompleteRequest; use crate::wire::kernel_info_request::KernelInfoRequest; use crate::wire::originator::Originator; +use crate::wire::shutdown_reply::ShutdownReply; use crate::wire::shutdown_request::ShutdownRequest; use crate::wire::status::KernelStatus; use crate::wire::wire_message::WireMessage; @@ -101,6 +102,7 @@ pub enum Message { // Control InterruptReply(JupyterMessage), InterruptRequest(JupyterMessage), + ShutdownReply(JupyterMessage), ShutdownRequest(JupyterMessage), // Registration HandshakeRequest(JupyterMessage), @@ -163,6 +165,7 @@ impl TryFrom<&Message> for WireMessage { Message::IsCompleteRequest(msg) => WireMessage::try_from(msg), Message::KernelInfoReply(msg) => WireMessage::try_from(msg), Message::KernelInfoRequest(msg) => WireMessage::try_from(msg), + Message::ShutdownReply(msg) => WireMessage::try_from(msg), Message::ShutdownRequest(msg) => WireMessage::try_from(msg), Message::Status(msg) => WireMessage::try_from(msg), Message::CommInfoReply(msg) => WireMessage::try_from(msg), @@ -245,6 +248,9 @@ impl TryFrom<&WireMessage> for Message { if kind == UpdateDisplayData::message_type() { return Ok(Message::UpdateDisplayData(JupyterMessage::try_from(msg)?)); } + if kind == ShutdownReply::message_type() { + return Ok(Message::ShutdownReply(JupyterMessage::try_from(msg)?)); + } if kind == ShutdownRequest::message_type() { return Ok(Message::ShutdownRequest(JupyterMessage::try_from(msg)?)); } diff --git a/crates/amalthea/tests/client.rs b/crates/amalthea/tests/client.rs index e20f27fdb..26f904093 100644 --- a/crates/amalthea/tests/client.rs +++ b/crates/amalthea/tests/client.rs @@ -19,6 +19,7 @@ use amalthea::wire::comm_info_request::CommInfoRequest; use amalthea::wire::comm_msg::CommWireMsg; use amalthea::wire::comm_open::CommOpen; use amalthea::wire::jupyter_message::Message; +use amalthea::wire::jupyter_message::Status; use amalthea::wire::kernel_info_request::KernelInfoRequest; use amalthea::wire::status::ExecutionState; use assert_matches::assert_matches; @@ -63,6 +64,34 @@ fn test_amalthea_execute_request() { frontend.recv_iopub_idle(); } +#[test] +fn test_amalthea_shutdown_request() { + let frontend = DummyAmaltheaFrontend::lock(); + + // Send a shutdown request with restart = false + frontend.send_shutdown_request(false); + + // Shutdown requests generate busy/idle status messages on IOPub + frontend.recv_iopub_busy(); + + // Receive the shutdown reply + let reply = frontend.recv_control_shutdown_reply(); + assert_eq!(reply.status, Status::Ok); + assert_eq!(reply.restart, false); + + frontend.recv_iopub_idle(); + + // Test with restart = true + frontend.send_shutdown_request(true); + frontend.recv_iopub_busy(); + + let reply = frontend.recv_control_shutdown_reply(); + assert_eq!(reply.status, Status::Ok); + assert_eq!(reply.restart, true); + + frontend.recv_iopub_idle(); +} + #[test] fn test_amalthea_input_request() { let frontend = DummyAmaltheaFrontend::lock(); diff --git a/crates/ark/src/control.rs b/crates/ark/src/control.rs index 93fcfbc9b..b005e8485 100644 --- a/crates/ark/src/control.rs +++ b/crates/ark/src/control.rs @@ -36,6 +36,12 @@ impl ControlHandler for Control { ) -> Result { log::info!("Received shutdown request: {msg:?}"); + // Interrupt any ongoing computation. We shut down from ReadConsole when + // R has become idle again. Note that Positron will have interrupted us + // beforehand, but another frontend might not have, and it's good to + // have this as a defensive measure in any case. + crate::sys::control::handle_interrupt_request(); + // According to the Jupyter protocol we should block here until the // shutdown is complete. However AFAICS ipykernel doesn't wait // until complete shutdown before replying and instead just signals diff --git a/crates/ark/src/dap/dap_r_main.rs b/crates/ark/src/dap/dap_r_main.rs index 2943bde85..cd436e78e 100644 --- a/crates/ark/src/dap/dap_r_main.rs +++ b/crates/ark/src/dap/dap_r_main.rs @@ -141,7 +141,7 @@ impl RMainDap { self.debugging = false; } - pub fn handle_stdout(&mut self, content: &str) { + pub fn handle_write_console(&mut self, content: &str) { if let DebugCallText::Capturing(ref mut call_text) = self.call_text { // Append to current expression if we are currently capturing stdout call_text.push_str(content); @@ -164,7 +164,11 @@ impl RMainDap { } } - pub fn finalize_call_text(&mut self) { + pub fn handle_read_console(&mut self) { + // Upon entering read-console, finalize any debug call text that we were capturing. + // At this point, the user can either advance the debugger, causing us to capture + // a new expression, or execute arbitrary code, where we will reuse a finalized + // debug call text to maintain the debug state. match &self.call_text { // If not debugging, nothing to do. DebugCallText::None => (), diff --git a/crates/ark/src/errors.rs b/crates/ark/src/errors.rs index ba36a76b2..960d1b296 100644 --- a/crates/ark/src/errors.rs +++ b/crates/ark/src/errors.rs @@ -5,6 +5,9 @@ // // +use amalthea::wire::exception::Exception; +use harp::exec::r_peek_error_buffer; +use harp::exec::RE_STACK_OVERFLOW; use harp::object::RObject; use harp::r_symbol; use harp::session::r_format_traceback; @@ -37,9 +40,16 @@ unsafe extern "C-unwind" fn ps_record_error(evalue: SEXP, traceback: SEXP) -> an Vec::::new() }); - main.error_occurred = true; - main.error_message = evalue; - main.error_traceback = traceback; + main.last_error = Some( + // We don't fill out `ename` with anything meaningful because typically + // R errors don't have names. We could consider using the condition class + // here, which r-lib/tidyverse packages have been using more heavily. + Exception { + ename: String::from(""), + evalue, + traceback, + }, + ); Ok(R_NilValue) } @@ -67,3 +77,12 @@ unsafe extern "C-unwind" fn ps_rust_backtrace() -> anyhow::Result { let trace = format!("{trace}"); Ok(*RObject::from(trace)) } + +pub(crate) fn stack_overflow_occurred() -> bool { + // Error handlers are not called on stack overflow so the error flag + // isn't set. Instead we detect stack overflows by peeking at the error + // buffer. The message is explicitly not translated to save stack space + // so the matching should be reliable. + let err_buf = r_peek_error_buffer(); + RE_STACK_OVERFLOW.is_match(&err_buf) +} diff --git a/crates/ark/src/fixtures/dummy_frontend.rs b/crates/ark/src/fixtures/dummy_frontend.rs index 5823dd16c..74a951b7d 100644 --- a/crates/ark/src/fixtures/dummy_frontend.rs +++ b/crates/ark/src/fixtures/dummy_frontend.rs @@ -4,11 +4,13 @@ use std::sync::Arc; use std::sync::Mutex; use std::sync::MutexGuard; use std::sync::OnceLock; +use std::time::Duration; use amalthea::fixtures::dummy_frontend::DummyConnection; use amalthea::fixtures::dummy_frontend::DummyFrontend; use crate::interface::SessionMode; +use crate::interface::CLEANUP_SIGNAL; use crate::repos::DefaultRepos; // There can be only one frontend per process. Needs to be in a mutex because @@ -62,6 +64,22 @@ impl DummyArkFrontend { } } + /// Wait for R cleanup to start (indicating shutdown has been initiated). + /// Panics if cleanup doesn't start within the timeout. + #[track_caller] + pub fn wait_for_cleanup() { + let (lock, cvar) = &CLEANUP_SIGNAL; + let result = cvar + .wait_timeout_while(lock.lock().unwrap(), Duration::from_secs(3), |started| { + !*started + }) + .unwrap(); + + if !*result.0 { + panic!("Cleanup did not start within timeout"); + } + } + fn get_frontend() -> &'static Arc> { // These are the hard-coded defaults. Call `init()` explicitly to // override. diff --git a/crates/ark/src/interface.rs b/crates/ark/src/interface.rs index 7c6f7e3d8..0fc04d1d8 100644 --- a/crates/ark/src/interface.rs +++ b/crates/ark/src/interface.rs @@ -10,6 +10,7 @@ // The frontend methods called by R are forwarded to the corresponding // `RMain` methods via `R_MAIN`. +use std::cell::Cell; use std::cell::RefCell; use std::cell::UnsafeCell; use std::collections::HashMap; @@ -59,20 +60,23 @@ use harp::command::r_home_setup; use harp::environment::r_ns_env; use harp::environment::Environment; use harp::environment::R_ENVS; +use harp::exec::exec_with_cleanup; use harp::exec::r_check_stack; use harp::exec::r_peek_error_buffer; use harp::exec::r_sandbox; use harp::exec::RFunction; use harp::exec::RFunctionExt; -use harp::exec::RE_STACK_OVERFLOW; use harp::library::RLibraries; use harp::line_ending::convert_line_endings; use harp::line_ending::LineEnding; use harp::object::r_null_or_try_into; use harp::object::RObject; +use harp::r_null; use harp::r_symbol; use harp::routines::r_register_routines; use harp::session::r_traceback; +use harp::srcref::get_block_srcrefs; +use harp::srcref::get_srcref; use harp::utils::r_is_data_frame; use harp::utils::r_typeof; use harp::R_MAIN_THREAD_ID; @@ -97,6 +101,7 @@ use crate::dap::dap_r_main::FrameInfoId; use crate::dap::dap_r_main::RMainDap; use crate::dap::Dap; use crate::errors; +use crate::errors::stack_overflow_occurred; use crate::help::message::HelpEvent; use crate::help::r_help::RHelp; use crate::lsp::events::EVENTS; @@ -127,7 +132,6 @@ use crate::srcref::ark_uri; use crate::srcref::ns_populate_srcref; use crate::srcref::resource_loaded_namespaces; use crate::startup; -use crate::strings::lines; use crate::sys::console::console_to_utf8; use crate::ui::UiCommMessage; use crate::ui::UiCommSender; @@ -214,10 +218,9 @@ pub struct RMain { /// by forwarding them through the UI comm. Optional, and really Positron specific. ui_comm_tx: Option, - /// Represents whether an error occurred during R code execution. - pub error_occurred: bool, - pub error_message: String, // `evalue` in the Jupyter protocol - pub error_traceback: Vec, + /// Error captured by our global condition handler during the last iteration + /// of the REPL. + pub(crate) last_error: Option, /// Channel to communicate with the Help thread help_event_tx: Option>, @@ -235,7 +238,7 @@ pub struct RMain { pub positron_ns: Option, - pending_lines: Vec, + pending_inputs: Option, /// Banner output accumulated during startup, but set to `None` after we complete /// the initialization procedure and forward the banner on @@ -259,14 +262,138 @@ pub struct RMain { /// reliable indication of whether we moved since last time. debug_last_stack: Vec, - /// Current topmost environment on the stack while waiting for input in the - /// debugger. This is `Some()` only when R is idle and in a `browser()` - /// prompt. - debug_env: Option, - /// Ever increasing debug session index. Used to create URIs that are only /// valid for a single session. debug_session_index: u32, + + /// Tracks how many nested `r_read_console()` calls are on the stack. + /// Incremented when entering `r_read_console(),` decremented on exit. + read_console_depth: Cell, + + /// Set to true when `r_read_console()` exits. Reset to false at the start + /// of each `r_read_console()` call. Used to detect if `eval()` returned + /// from a nested REPL (the flag will be true when the evaluation returns). + read_console_nested_return: Cell, + + /// Set to true `r_read_console()` exits via an error longjump. Used to + /// detect if we need to go return from `r_read_console()` with a dummy + /// evaluation to reset things like `R_EvalDepth`. + read_console_threw_error: Cell, + + /// Used to track an input to evaluate upon returning to `r_read_console()`, + /// after having returned a dummy input to reset `R_ConsoleIob` in R's REPL. + read_console_next_input: Cell>, + + /// We've received a Shutdown signal and need to return EOF from all nested + /// consoles to get R to shut down + read_console_shutdown: Cell, + + /// Current topmost environment on the stack while waiting for input in ReadConsole + read_console_frame: RefCell, +} + +/// Stack of pending inputs +struct PendingInputs { + /// EXPRSXP vector of parsed expressions + exprs: RObject, + /// Srcref of each expression. If known, this is a list the same length as `exprs`. + srcrefs: Option, + /// Length of `exprs` and `srcrefs` + len: isize, + /// Index into the stack + index: isize, +} + +enum ParseResult { + Success(Option), + SyntaxError(String), +} + +impl PendingInputs { + pub(crate) fn read(input: &str) -> anyhow::Result> { + let status = match harp::parse_status(&harp::ParseInput::Text(input)) { + Err(err) => { + // Failed to even attempt to parse the input, something is seriously wrong + // FIXME: There are some valid syntax errors going through here, e.g. `identity |> _(1)`. + return Ok(ParseResult::SyntaxError(format!("{err}"))); + }, + Ok(status) => status, + }; + + // - Incomplete inputs put R into a state where it expects more input that will never come, so we + // immediately reject them. Positron should never send us these, but Jupyter Notebooks may. + // - Complete statements are obviously fine. + // - Syntax errors will cause R to throw an error, which is expected. + let exprs = match status { + harp::ParseResult::Complete(exprs) => exprs, + harp::ParseResult::Incomplete => { + return Ok(ParseResult::SyntaxError(format!( + "Can't execute incomplete input:\n{input}" + ))); + }, + harp::ParseResult::SyntaxError { message, .. } => { + return Ok(ParseResult::SyntaxError(format!("Syntax error: {message}"))); + }, + }; + + let srcrefs = get_block_srcrefs(exprs.sexp); + let srcrefs = if srcrefs.is_null() { + None + } else { + Some(srcrefs) + }; + + let len = exprs.length(); + let index = 0; + + if len == 0 { + return Ok(ParseResult::Success(None)); + } + + Ok(ParseResult::Success(Some(Self { + exprs, + srcrefs, + len, + index, + }))) + } + + pub(crate) fn is_empty(&self) -> bool { + self.index >= self.len + } + + pub(crate) fn pop(&mut self) -> Option { + if self.index >= self.len { + return None; + } + + let srcref = self + .srcrefs + .as_ref() + .map(|srcrefs| get_srcref(srcrefs.sexp, self.index)); + + let expr = harp::r_list_get(self.exprs.sexp, self.index); + + self.index += 1; + Some(PendingInput { expr, srcref }) + } +} + +#[derive(Debug)] +pub(crate) struct PendingInput { + expr: RObject, + srcref: Option, +} + +#[derive(Debug, Clone)] +enum ConsoleValue { + Success(serde_json::Map), + Error(Exception), +} + +enum WaitFor { + InputReply, + ExecuteRequest, } /// Represents the currently active execution request from the frontend. It @@ -287,6 +414,19 @@ pub struct KernelInfo { pub continuation_prompt: Option, } +/// The kind of prompt we're handling in the REPL. +#[derive(Clone, Debug, PartialEq)] +pub enum PromptKind { + /// A top-level REPL prompt + TopLevel, + + /// A `browser()` debugging prompt + Browser, + + /// A user input request from code, e.g., via `readline()` + InputRequest, +} + /// This struct represents the data that we wish R would pass to /// `ReadConsole()` methods. We need this information to determine what kind /// of prompt we are dealing with. @@ -298,7 +438,7 @@ pub struct PromptInfo { input_prompt: String, /// The continuation prompt string when user supplies incomplete - /// inputs. This always corresponds to `getOption("continue"). We send + /// inputs. This always corresponds to `getOption("continue")`. We send /// it to frontends along with `prompt` because some frontends such as /// Positron do not send incomplete inputs to Ark and take charge of /// continuation prompts themselves. For frontends that can send @@ -306,16 +446,8 @@ pub struct PromptInfo { /// error on them rather than requesting that this be shown. continuation_prompt: String, - /// Whether this is a `browser()` prompt. A browser prompt can be - /// incomplete but is never a user request. - browser: bool, - - /// Whether the last input didn't fully parse and R is waiting for more input - incomplete: bool, - - /// Whether this is a prompt from a fresh REPL iteration (browser or - /// top level) or a prompt from some user code, e.g. via `readline()` - input_request: bool, + /// The kind of prompt we're handling. + kind: PromptKind, } pub enum ConsoleInput { @@ -323,11 +455,13 @@ pub enum ConsoleInput { Input(String), } -pub enum ConsoleResult { +#[derive(Debug)] +pub(crate) enum ConsoleResult { NewInput, + NewPendingInput(PendingInput), Interrupt, Disconnected, - Error(amalthea::Error), + Error(String), } impl RMain { @@ -475,6 +609,9 @@ impl RMain { if let Err(err) = apply_default_repos(default_repos) { log::error!("Error setting default repositories: {err:?}"); } + + // Initialise Ark's last value + libr::SETCDR(r_symbol!(".ark_last_value"), harp::r_null()); } // Now that R has started (emitting any startup messages that we capture in the @@ -603,9 +740,7 @@ impl RMain { execution_count: 0, autoprint_output: String::new(), ui_comm_tx: None, - error_occurred: false, - error_message: String::new(), - error_traceback: Vec::new(), + last_error: None, help_event_tx: None, help_port: None, lsp_events_tx: None, @@ -616,14 +751,19 @@ impl RMain { pending_futures: HashMap::new(), session_mode, positron_ns: None, - pending_lines: Vec::new(), banner: None, r_error_buffer: None, captured_output: String::new(), debug_preserve_focus: false, debug_last_stack: vec![], - debug_env: None, debug_session_index: 1, + pending_inputs: None, + read_console_depth: Cell::new(0), + read_console_nested_return: Cell::new(false), + read_console_threw_error: Cell::new(false), + read_console_next_input: Cell::new(None), + read_console_frame: RefCell::new(RObject::new(unsafe { libr::R_GlobalEnv })), + read_console_shutdown: Cell::new(false), } } @@ -731,11 +871,16 @@ impl RMain { /// * `prompt` - The prompt shown to the user /// * `buf` - Pointer to buffer to receive the user's input (type `CONSOLE_BUFFER_CHAR`) /// * `buflen` - Size of the buffer to receiver user's input - /// * `hist` - Whether to add the input to the history (1) or not (0) + /// * `_hist` - Whether to add the input to the history (1) or not (0) /// - /// Returns a tuple. First value is to be passed on to `ReadConsole()` and - /// indicates whether new input is available. Second value indicates whether - /// we need to call `Rf_onintr()` to process an interrupt. + /// This does two things: + /// - Move the Console state machine to the next state: + /// - Wait for input + /// - Set an active execute request and a list of pending expressions + /// - Set `self.dap.is_debugging()` depending on presence or absence of debugger prompt + /// - Evaluate next pending expression + /// - Close active execute request if pending list is empty + /// - Run an event loop while waiting for input fn read_console( &mut self, prompt: *const c_char, @@ -743,54 +888,40 @@ impl RMain { buflen: c_int, _hist: c_int, ) -> ConsoleResult { + self.dap.handle_read_console(); + + // State machine part of ReadConsole + let info = self.prompt_info(prompt); log::trace!("R prompt: {}", info.input_prompt); - // Upon entering read-console, finalize any debug call text that we were capturing. - // At this point, the user can either advance the debugger, causing us to capture - // a new expression, or execute arbitrary code, where we will reuse a finalized - // debug call text to maintain the debug state. - self.dap.finalize_call_text(); + // Invariant: If we detect a browser prompt, `self.dap.is_debugging()` + // is true. Otherwise it is false. + if matches!(info.kind, PromptKind::Browser) { + // Start or continue debugging with the `debug_preserve_focus` hint + // from the last expression we evaluated + self.start_debug(self.debug_preserve_focus); + } else if self.dap.is_debugging() { + self.stop_debug(); + } - // We get called here everytime R needs more input. This handler - // represents the driving event of a small state machine that manages - // communication between R and the frontend. In the following order: - // - // - If we detect an input request prompt, then we forward the request - // on to the frontend and then fall through to the event loop to wait - // on the input reply. - // - // - If the vector of pending lines is not empty, R might be waiting for - // us to complete an incomplete expression, or we might just have - // completed an intermediate expression (e.g. from an ExecuteRequest - // like `foo\nbar` where `foo` is intermediate and `bar` is final). - // Send the next line to R. - // - // - If the vector of pending lines is empty, and if the prompt is for - // new R code, we close the active ExecuteRequest and send an - // ExecuteReply to the frontend. We then fall through to the event - // loop to wait for more input. - // - // This state machine depends on being able to reliably distinguish - // between readline prompts (from `readline()`, `scan()`, or `menu()`), - // and actual R code prompts (either top-level or from a nested debug - // REPL). A readline prompt should never change our state (in - // particular our vector of pending inputs). We think we are making this - // distinction sufficiently robustly but ideally R would let us know the - // prompt type so there is no ambiguity at all. - // - // R might throw an error at any time while we are working on our vector - // of pending lines, either from a syntax error or from an evaluation - // error. When this happens, we abort evaluation and clear the pending - // lines. - // - // If the vector of pending lines is empty and we detect an incomplete - // prompt, this is a panic. We check ahead of time for complete - // expressions before breaking up an ExecuteRequest in multiple lines, - // so this should not happen. - if let Some(console_result) = self.handle_active_request(&info, buf, buflen) { - return console_result; - }; + if let Some(exception) = self.take_exception() { + // Clear any pending inputs, if any + self.pending_inputs = None; + + // Reply to active request with error + self.handle_active_request(&info, ConsoleValue::Error(exception)); + } else if matches!(info.kind, PromptKind::InputRequest) { + // Request input reply to the frontend and return it to R + return self.handle_input_request(&info, buf, buflen); + } else if let Some(input) = self.pop_pending() { + // Evaluate pending expression if there is any remaining + return self.handle_pending_input(input, buf, buflen); + } else { + // Otherwise reply to active request with accumulated result + let result = self.take_result(); + self.handle_active_request(&info, ConsoleValue::Success(result)); + } // In the future we'll also send browser information, see // https://github.com/posit-dev/positron/issues/3001. Currently this is @@ -800,21 +931,30 @@ impl RMain { // often. We'd still push a `DidChangeConsoleInputs` notification from // here, but only containing high-level information such as `search()` // contents and `ls(rho)`. - if !info.browser && !info.incomplete && !info.input_request { + if !self.dap.is_debugging() && !matches!(info.kind, PromptKind::InputRequest) { self.refresh_lsp(); } // Signal prompt EVENTS.console_prompt.emit(()); - if info.browser { - self.start_debug(); - } else { - if self.dap.is_debugging() { - self.stop_debug(); - } - } + self.run_event_loop(&info, buf, buflen, WaitFor::ExecuteRequest) + } + /// Runs the ReadConsole event loop. + /// This handles events for: + /// - Reception of either input replies or execute requests (as determined + /// by `wait_for`) + /// - Idle-time and interrupt-time tasks + /// - Requests from the frontend (currently only used for establishing UI comm) + /// - R's polled events + fn run_event_loop( + &mut self, + info: &PromptInfo, + buf: *mut c_uchar, + buflen: c_int, + wait_for: WaitFor, + ) -> ConsoleResult { let mut select = crossbeam::channel::Select::new(); // Cloning is necessary to avoid a double mutable borrow error @@ -830,22 +970,28 @@ impl RMain { // package. 50ms seems to be more in line with RStudio (posit-dev/positron#7235). let polled_events_rx = crossbeam::channel::tick(Duration::from_millis(50)); - let r_request_index = select.recv(&r_request_rx); - let stdin_reply_index = select.recv(&stdin_reply_rx); + // This is the main kind of message from the frontend that we are expecting. + // We either wait for `input_reply` messages on StdIn, or for + // `execute_request` on Shell. + let (r_request_index, stdin_reply_index) = match wait_for { + WaitFor::ExecuteRequest => (Some(select.recv(&r_request_rx)), None), + WaitFor::InputReply => (None, Some(select.recv(&stdin_reply_rx))), + }; + let kernel_request_index = select.recv(&kernel_request_rx); let tasks_interrupt_index = select.recv(&tasks_interrupt_rx); let polled_events_index = select.recv(&polled_events_rx); - // Don't process idle tasks in browser prompts. We currently don't want + // Don't process idle tasks unless at top level. We currently don't want // idle tasks (e.g. for srcref generation) to run when the call stack is - // empty. We could make this configurable though if needed, i.e. some + // not empty. We could make this configurable though if needed, i.e. some // idle tasks would be able to run in the browser. Those should be sent // to a dedicated channel that would always be included in the set of // recv channels. - let tasks_idle_index = if info.browser { - None - } else { + let tasks_idle_index = if matches!(info.kind, PromptKind::TopLevel) { Some(select.recv(&tasks_idle_rx)) + } else { + None }; loop { @@ -857,7 +1003,7 @@ impl RMain { // `UserBreak`, but won't actually fire the interrupt b/c // we have them disabled, so it would end up swallowing the // user interrupt request. - if info.input_request && interrupts_pending() { + if matches!(info.kind, PromptKind::InputRequest) && interrupts_pending() { return ConsoleResult::Interrupt; } @@ -866,17 +1012,13 @@ impl RMain { // reset the flag set_interrupts_pending(false); - // FIXME: Race between interrupt and new code request. To fix - // this, we could manage the Shell and Control sockets on the - // common message event thread. The Control messages would need - // to be handled in a blocking way to ensure subscribers are - // notified before the next incoming message is processed. - // First handle execute requests outside of `select` to ensure they // have priority. `select` chooses at random. - if let Ok(req) = r_request_rx.try_recv() { - if let Some(input) = self.handle_execute_request(req, &info, buf, buflen) { - return input; + if let WaitFor::ExecuteRequest = wait_for { + if let Ok(req) = r_request_rx.try_recv() { + if let Some(input) = self.handle_execute_request(req, &info, buf, buflen) { + return input; + } } } @@ -884,7 +1026,7 @@ impl RMain { match oper.index() { // We've got an execute request from the frontend - i if i == r_request_index => { + i if Some(i) == r_request_index => { let req = oper.recv(&r_request_rx); let Ok(req) = req else { // The channel is disconnected and empty @@ -897,7 +1039,7 @@ impl RMain { }, // We've got a reply for readline - i if i == stdin_reply_index => { + i if Some(i) == stdin_reply_index => { let reply = oper.recv(&stdin_reply_rx).unwrap(); return self.handle_input_reply(reply, buf, buflen); }, @@ -941,8 +1083,9 @@ impl RMain { let prompt_slice = unsafe { CStr::from_ptr(prompt_c) }; let prompt = prompt_slice.to_string_lossy().into_owned(); + // Sent to the frontend after each top-level command so users can + // customise their prompts let continuation_prompt: String = harp::get_option("continue").try_into().unwrap(); - let matches_continuation = prompt == continuation_prompt; // Detect browser prompt by matching the prompt string // https://github.com/posit-dev/positron/issues/4742. @@ -950,99 +1093,124 @@ impl RMain { // `options(prompt =, continue = ` to something that looks like // a browser prompt, or doing the same with `readline()`. We have // chosen to not support these edge cases. - // Additionally, we send code to R one line at a time, so even if we are debugging - // it can look like we are in a continuation state. To try and detect that, we - // detect if we matched the continuation prompt while the DAP is active. - let browser = - RE_DEBUG_PROMPT.is_match(&prompt) || (self.dap.is_debugging() && matches_continuation); - - // If there are frames on the stack and we're not in a browser prompt, - // this means some user code is requesting input, e.g. via `readline()` - let user_request = !browser && n_frame > 0; - - // The request is incomplete if we see the continue prompt, except if - // we're in a user request, e.g. `readline("+ ")`. To guard against - // this, we check that we are at top-level (call stack is empty or we - // have a debug prompt). - let top_level = n_frame == 0 || browser; - let incomplete = matches_continuation && top_level; + let browser = RE_DEBUG_PROMPT.is_match(&prompt); + + // Determine the prompt kind based on context + let kind = if browser { + PromptKind::Browser + } else if n_frame > 0 { + // If there are frames on the stack and we're not in a browser prompt, + // this means some user code is requesting input, e.g. via `readline()` + PromptKind::InputRequest + } else { + PromptKind::TopLevel + }; return PromptInfo { input_prompt: prompt, continuation_prompt, - browser, - incomplete, - input_request: user_request, + kind, }; } - fn read_console_cleanup(&mut self) { - // The debug environment is only valid while R is idle - self.debug_env = None; - } + /// Take result from `self.autoprint_output` and R's `.Last.value` object + fn take_result(&mut self) -> serde_json::Map { + // TODO: Implement rich printing of certain outputs. + // Will we need something similar to the RStudio model, + // where we implement custom print() methods? Or can + // we make the stub below behave sensibly even when + // streaming R output? + let mut data = serde_json::Map::new(); - /// Returns: - /// - `None` if we should fall through to the event loop to wait for more user input - /// - `Some(ConsoleResult)` if we should immediately exit `read_console()` - fn handle_active_request( - &mut self, - info: &PromptInfo, - buf: *mut c_uchar, - buflen: c_int, - ) -> Option { - // TODO: Can we remove this below code? - // If the prompt begins with "Save workspace", respond with (n) - // and allow R to immediately exit. - // - // NOTE: Should be able to overwrite the `Cleanup` frontend method. - // This would also help with detecting normal exits versus crashes. - if info.input_prompt.starts_with("Save workspace") { - match Self::on_console_input(buf, buflen, String::from("n")) { - Ok(()) => return Some(ConsoleResult::NewInput), - Err(err) => return Some(ConsoleResult::Error(err)), - } + // The output generated by autoprint is emitted as an + // `execute_result` message. + let mut autoprint = std::mem::take(&mut self.autoprint_output); + + if autoprint.ends_with('\n') { + // Remove the trailing newlines that R adds to outputs but that + // Jupyter frontends are not expecting. Is it worth taking a + // mutable self ref across calling methods to avoid the clone? + autoprint.pop(); + } + if autoprint.len() != 0 { + data.insert("text/plain".to_string(), json!(autoprint)); } - // First check if we are inside request for user input, like a `readline()` or `menu()`. - // It's entirely possible that we still have more pending lines, but an intermediate line - // put us into an `input_request` state. We must respond to that request before processing - // the rest of the pending lines. - if info.input_request { - if let Some(req) = &self.active_request { - // Send request to frontend. We'll wait for an `input_reply` - // from the frontend in the event loop in `read_console()`. - // The active request remains active. - self.request_input(req.originator.clone(), info.input_prompt.to_string()); - return None; - } else { - // Invalid input request, propagate error to R - return Some(self.handle_invalid_input_request(buf, buflen)); + // Include HTML representation of data.frame + unsafe { + let value = Rf_findVarInFrame(R_GlobalEnv, r_symbol!(".Last.value")); + if r_is_data_frame(value) { + match to_html(value) { + Ok(html) => { + data.insert("text/html".to_string(), json!(html)); + }, + Err(err) => { + log::error!("{:?}", err); + }, + }; } } - // An incomplete prompt when we no longer have any inputs to send should - // never happen because we check for incomplete inputs ahead of time and - // respond to the frontend with an error. - if info.incomplete && self.pending_lines.is_empty() { - unreachable!("Incomplete input in `ReadConsole` handler"); + data + } + + fn take_exception(&mut self) -> Option { + let mut exception = if let Some(exception) = self.last_error.take() { + exception + } else if stack_overflow_occurred() { + // Call `base::traceback()` since we don't have a handled error + // object carrying a backtrace. This won't be formatted as a + // tree which is just as well since the recursive calls would + // push a tree too far to the right. + let traceback = r_traceback(); + + // Reset error buffer so we don't display this message again + let _ = RFunction::new("base", "stop").call(); + + Exception { + ename: String::from(""), + evalue: r_peek_error_buffer(), + traceback, + } + } else { + return None; + }; + + // Flush any accumulated output to StdOut. This can happen if + // the last input errors out during autoprint. + let autoprint = std::mem::take(&mut self.autoprint_output); + if !autoprint.is_empty() { + let message = IOPubMessage::Stream(StreamOutput { + name: Stream::Stdout, + text: autoprint, + }); + self.iopub_tx.send(message).unwrap(); } - // Next check if we have any pending lines. If we do, we are in the middle of - // evaluating a multi line selection, so immediately write the next line into R's buffer. - // The active request remains active. - if let Some(console_result) = self.handle_pending_line(buf, buflen) { - return Some(console_result); + // Jupyter clients typically discard the `evalue` when a `traceback` is + // present. Jupyter-Console even disregards `evalue` in all cases. So + // include it here if we are in Notebook mode. But should Positron + // implement similar behaviour as the other frontends eventually? The + // first component of `traceback` could be compared to `evalue` and + // discarded from the traceback if the same. + if let SessionMode::Notebook = self.session_mode { + exception.traceback.insert(0, exception.evalue.clone()); } - // Finally, check if we have an active request from a previous `read_console()` - // iteration. If so, we `take()` and clear the `active_request` as we're about - // to complete it and send a reply to unblock the active Shell - // request. + Some(exception) + } + + /// Returns: + /// - `None` if we should fall through to the event loop to wait for more user input + /// - `Some(ConsoleResult)` if we should immediately exit `read_console()` + fn handle_active_request(&mut self, info: &PromptInfo, value: ConsoleValue) { + // If we get here we finished evaluating all pending inputs. Check if we + // have an active request from a previous `read_console()` iteration. If + // so, we `take()` and clear the `active_request` as we're about to + // complete it and send a reply to unblock the active Shell request. if let Some(req) = std::mem::take(&mut self.active_request) { - // FIXME: Race condition between the comm and shell socket threads. - // - // Perform a refresh of the frontend state - // (Prompts, working directory, etc) + // Perform a refresh of the frontend state (Prompts, working + // directory, etc) self.with_mut_ui_comm_tx(|ui_comm_tx| { let input_prompt = info.input_prompt.clone(); let continuation_prompt = info.continuation_prompt.clone(); @@ -1057,13 +1225,15 @@ impl RMain { // Let frontend know the last request is complete. This turns us // back to Idle. - self.reply_execute_request(req, &info); + Self::reply_execute_request(&self.iopub_tx, req, &info, value); + } else { + log::info!("No active request to handle, discarding: {value:?}"); } - - // Prepare for the next user input - None } + // Called from Ark's ReadConsole event loop when we get a new execute + // request. It's not possible to get one while an active request is ongoing + // because of Jupyter's queueing of Shell messages. fn handle_execute_request( &mut self, req: RRequest, @@ -1071,7 +1241,7 @@ impl RMain { buf: *mut c_uchar, buflen: c_int, ) -> Option { - if info.input_request { + if matches!(info.kind, PromptKind::InputRequest) { panic!("Unexpected `execute_request` while waiting for `input_reply`."); } @@ -1105,48 +1275,166 @@ impl RMain { }, }; - // Clear error flag - self.error_occurred = false; - match input { ConsoleInput::Input(code) => { - // Handle commands for the debug interpreter - if self.dap.is_debugging() { - let continue_cmds = vec!["n", "f", "c", "cont"]; - if continue_cmds.contains(&&code[..]) { - // We're stepping so we want to focus the next location we stop at - self.debug_preserve_focus = false; - self.dap.send_dap(DapBackendEvent::Continued); - } else { - // The user is evaluating some other expression so preserve current focus - // https://github.com/posit-dev/positron/issues/3151 - self.debug_preserve_focus = true; - } + // Parse input into pending expressions + match PendingInputs::read(&code) { + Ok(ParseResult::Success(inputs)) => { + self.pending_inputs = inputs; + }, + Ok(ParseResult::SyntaxError(message)) => { + return Some(ConsoleResult::Error(message)) + }, + Err(err) => { + return Some(ConsoleResult::Error(format!( + "Error while parsing input: {err:?}" + ))) + }, } - // If the input is invalid (e.g. incomplete), don't send it to R - // at all, reply with an error right away - if let Err(err) = Self::check_console_input(code.as_str()) { - return Some(ConsoleResult::Error(err)); + // Evaluate first expression if there is one + if let Some(input) = self.pop_pending() { + return Some(self.handle_pending_input(input, buf, buflen)); } - // Split input by lines, retrieve first line, and store - // remaining lines in a buffer. This helps with long inputs - // because R has a fixed input buffer size of 4096 bytes at the - // time of writing. - let code = self.buffer_console_input(code.as_str()); - - // Store input in R's buffer and return sentinel indicating some - // new input is ready - match Self::on_console_input(buf, buflen, code) { - Ok(()) => Some(ConsoleResult::NewInput), - Err(err) => Some(ConsoleResult::Error(err)), - } + // Otherwise we got an empty input, e.g. `""` and there's + // nothing to do. Close active request. + self.handle_active_request(info, ConsoleValue::Success(Default::default())); + + // And return to event loop + None }, + ConsoleInput::EOF => Some(ConsoleResult::Disconnected), } } + /// Handles user input requests (e.g., readline, menu) and special prompts. + /// Runs the ReadConsole event loop until a reply comes in. + fn handle_input_request( + &mut self, + info: &PromptInfo, + buf: *mut c_uchar, + buflen: c_int, + ) -> ConsoleResult { + if let Some(req) = &self.active_request { + // Send request to frontend. We'll wait for an `input_reply` + // from the frontend in the event loop in `read_console()`. + // The active request remains active. + self.request_input(req.originator.clone(), String::from(&info.input_prompt)); + + // Run the event loop, waiting for stdin replies but not execute requests + self.run_event_loop(info, buf, buflen, WaitFor::InputReply) + } else { + // Invalid input request, propagate error to R + self.handle_invalid_input_request(buf, buflen) + } + } + + fn handle_pending_input( + &mut self, + input: PendingInput, + buf: *mut c_uchar, + buflen: c_int, + ) -> ConsoleResult { + // Default: preserve current focus for evaluated expressions. + // This only has an effect if we're debugging. + // https://github.com/posit-dev/positron/issues/3151 + self.debug_preserve_focus = true; + + if self.dap.is_debugging() { + // Try to interpret this pending input as a symbol (debug commands + // are entered as symbols). Whether or not it parses as a symbol, + // if we're currently debugging we must set `debug_preserve_focus`. + if let Ok(sym) = harp::RSymbol::new(input.expr.sexp) { + // All debug commands as documented in `?browser` + const DEBUG_COMMANDS: &[&str] = + &["c", "cont", "f", "help", "n", "s", "where", "r", "Q"]; + + // The subset of debug commands that continue execution + const DEBUG_COMMANDS_CONTINUE: &[&str] = &["n", "f", "c", "cont"]; + + let sym = String::from(sym); + + if DEBUG_COMMANDS.contains(&&sym[..]) { + if DEBUG_COMMANDS_CONTINUE.contains(&&sym[..]) { + // For continue-like commands, we do not preserve focus, + // i.e. we let the cursor jump to the stopped + // position. Set the preserve focus hint for the + // next iteration of ReadConsole. + self.debug_preserve_focus = false; + + // Let the DAP client know that execution is now continuing + self.dap.send_dap(DapBackendEvent::Continued); + } + + // All debug commands are forwarded to the base REPL as + // is so that R can interpret them. + // Unwrap safety: A debug command fits in the buffer. + Self::on_console_input(buf, buflen, sym).unwrap(); + return ConsoleResult::NewInput; + } + } + } + + ConsoleResult::NewPendingInput(input) + } + + fn pop_pending(&mut self) -> Option { + let Some(pending_inputs) = self.pending_inputs.as_mut() else { + return None; + }; + + let Some(input) = pending_inputs.pop() else { + self.pending_inputs = None; + return None; + }; + + if pending_inputs.is_empty() { + self.pending_inputs = None; + } + + Some(input) + } + + // SAFETY: Call this from a POD frame. Inputs must be protected. + unsafe fn eval(expr: libr::SEXP, srcref: libr::SEXP, buf: *mut c_uchar, buflen: c_int) { + let frame = harp::r_current_frame(); + + // SAFETY: This may jump in case of error, keep this POD + unsafe { + let frame = libr::Rf_protect(frame.into()); + + // The global source reference is stored in this global variable by + // the R REPL before evaluation. We do the same here. + libr::set(libr::R_Srcref, srcref); + + // Evaluate the expression. Beware: this may throw an R longjump. + let value = libr::Rf_eval(expr, frame); + libr::Rf_protect(value); + + // Store in the base environment for robust access from (almost) any + // evaluation environment. We only require the presence of `::` so + // we can reach into base. Note that unlike regular environments + // which are stored in pairlists or hash tables, the base environment + // is stored in the `value` field of symbols, i.e. their "CDR". + libr::SETCDR(r_symbol!(".ark_last_value"), value); + + libr::Rf_unprotect(2); + value + }; + + // Back in business, Rust away + let code = if unsafe { libr::get(libr::R_Visible) == 1 } { + String::from("base::.ark_last_value") + } else { + String::from("base::invisible(base::.ark_last_value)") + }; + + // Unwrap safety: The input always fits in the buffer + Self::on_console_input(buf, buflen, code).unwrap(); + } + /// Handle an `input_request` received outside of an `execute_request` context /// /// We believe it is always invalid to receive an `input_request` that isn't @@ -1175,7 +1463,7 @@ impl RMain { log::info!("Detected `readline()` call in renv autoloader. Returning `'{input}'`."); match Self::on_console_input(buf, buflen, input) { Ok(()) => return ConsoleResult::NewInput, - Err(err) => return ConsoleResult::Error(err), + Err(err) => return ConsoleResult::Error(format!("{err}")), } } @@ -1186,19 +1474,12 @@ impl RMain { "Are you calling `readline()` or `menu()` from an `.Rprofile` or `.Rprofile.site` file? If so, that is the issue and you should remove that code." ].join("\n"); - return ConsoleResult::Error(Error::InvalidInputRequest(message)); + return ConsoleResult::Error(message); } - fn start_debug(&mut self) { + fn start_debug(&mut self, debug_preserve_focus: bool) { match self.dap.stack_info() { Ok(stack) => { - if let Some(frame) = stack.first() { - if let Some(ref env) = frame.environment { - // This is reset on exit in the cleanup phase, see `r_read_console()` - self.debug_env = Some(env.get().clone()); - } - } - // Figure out whether we changed location since last time, // e.g. because the user evaluated an expression that hit // another breakpoint. In that case we do want to move @@ -1216,11 +1497,8 @@ impl RMain { let fallback_sources = self.load_fallback_sources(&stack); self.debug_last_stack = stack_id; - self.dap.start_debug( - stack, - same_stack && self.debug_preserve_focus, - fallback_sources, - ); + self.dap + .start_debug(stack, same_stack && debug_preserve_focus, fallback_sources); }, Err(err) => log::error!("ReadConsole: Can't get stack info: {err}"), }; @@ -1353,10 +1631,10 @@ impl RMain { let input = convert_line_endings(&input.value, LineEnding::Posix); match Self::on_console_input(buf, buflen, input) { Ok(()) => ConsoleResult::NewInput, - Err(err) => ConsoleResult::Error(err), + Err(err) => ConsoleResult::Error(format!("{err:?}")), } }, - Err(err) => ConsoleResult::Error(err), + Err(err) => ConsoleResult::Error(format!("{err:?}")), } } @@ -1544,63 +1822,6 @@ impl RMain { self.get_ui_comm_tx().is_some() } - fn handle_pending_line(&mut self, buf: *mut c_uchar, buflen: c_int) -> Option { - if self.error_occurred { - // If an error has occurred, we've already sent a complete expression that resulted in - // an error. Flush the remaining lines and return to `read_console()`, who will handle - // that error. - self.pending_lines.clear(); - return None; - } - - let Some(input) = self.pending_lines.pop() else { - // No pending lines - return None; - }; - - match Self::on_console_input(buf, buflen, input) { - Ok(()) => Some(ConsoleResult::NewInput), - Err(err) => Some(ConsoleResult::Error(err)), - } - } - - fn check_console_input(input: &str) -> amalthea::Result<()> { - let status = unwrap!(harp::parse_status(&harp::ParseInput::Text(input)), Err(err) => { - // Failed to even attempt to parse the input, something is seriously wrong - return Err(Error::InvalidConsoleInput(format!( - "Failed to parse input: {err:?}" - ))); - }); - - // - Incomplete inputs put R into a state where it expects more input that will never come, so we - // immediately reject them. Positron should never send us these, but Jupyter Notebooks may. - // - Complete statements are obviously fine. - // - Syntax errors will cause R to throw an error, which is expected. - match status { - harp::ParseResult::Incomplete => Err(Error::InvalidConsoleInput(format!( - "Can't execute incomplete input:\n{input}" - ))), - harp::ParseResult::Complete(_) => Ok(()), - harp::ParseResult::SyntaxError { .. } => Ok(()), - } - } - - fn buffer_console_input(&mut self, input: &str) -> String { - // Split into lines and reverse them to be able to `pop()` from the front - let mut lines: Vec = lines(input).rev().map(String::from).collect(); - - // SAFETY: There is always at least one line because: - // - `lines("")` returns 1 element containing `""` - // - `lines("\n")` returns 2 elements containing `""` - let first = lines.pop().unwrap(); - - // No-op if `lines` is empty - assert!(self.pending_lines.is_empty()); - self.pending_lines.append(&mut lines); - - first - } - /// Copy console input into R's internal input buffer /// /// Supposedly `buflen` is "the maximum length, in bytes, including the @@ -1650,6 +1871,13 @@ impl RMain { Ok(()) } + fn console_input(buf: *mut c_uchar, _buflen: c_int) -> String { + unsafe { + let cstr = CStr::from_ptr(buf as *const c_char); + cstr.to_string_lossy().into_owned() + } + } + // Hitting this means a SINGLE line from the user was longer than the buffer size (>4000 characters) fn buffer_overflow_error() -> amalthea::Error { Error::InvalidConsoleInput(String::from( @@ -1659,142 +1887,56 @@ impl RMain { // Reply to the previously active request. The current prompt type and // whether an error has occurred defines the reply kind. - fn reply_execute_request(&mut self, req: ActiveReadConsoleRequest, prompt_info: &PromptInfo) { + fn reply_execute_request( + iopub_tx: &Sender, + req: ActiveReadConsoleRequest, + prompt_info: &PromptInfo, + value: ConsoleValue, + ) { let prompt = &prompt_info.input_prompt; - let (reply, result) = if prompt_info.incomplete { - log::trace!("Got prompt {} signaling incomplete request", prompt); - (new_incomplete_reply(&req.request, req.exec_count), None) - } else if prompt_info.input_request { - unreachable!(); - } else { - log::trace!("Got R prompt '{}', completing execution", prompt); + log::trace!("Got R prompt '{}', completing execution", prompt); - self.make_execute_reply_error(req.exec_count) - .unwrap_or_else(|| self.make_execute_reply(req.exec_count)) - }; + let exec_count = req.exec_count; - if let Some(result) = result { - self.iopub_tx.send(result).unwrap(); - } + let (reply, result) = match value { + ConsoleValue::Success(data) => { + let reply = Ok(ExecuteReply { + status: Status::Ok, + execution_count: exec_count, + user_expressions: json!({}), + }); - log::trace!("Sending `execute_reply`: {reply:?}"); - req.reply_tx.send(reply).unwrap(); - } + let result = if data.len() > 0 { + Some(IOPubMessage::ExecuteResult(ExecuteResult { + execution_count: exec_count, + data: serde_json::Value::Object(data), + metadata: json!({}), + })) + } else { + None + }; - fn make_execute_reply_error( - &mut self, - exec_count: u32, - ) -> Option<(amalthea::Result, Option)> { - // Save and reset error occurred flag - let error_occurred = self.error_occurred; - self.error_occurred = false; - - // Error handlers are not called on stack overflow so the error flag - // isn't set. Instead we detect stack overflows by peeking at the error - // buffer. The message is explicitly not translated to save stack space - // so the matching should be reliable. - let err_buf = r_peek_error_buffer(); - let stack_overflow_occurred = RE_STACK_OVERFLOW.is_match(&err_buf); - - // Reset error buffer so we don't display this message again - if stack_overflow_occurred { - let _ = RFunction::new("base", "stop").call(); - } + (reply, result) + }, - // Send the reply to the frontend - if !error_occurred && !stack_overflow_occurred { - return None; - } + ConsoleValue::Error(exception) => { + let reply = Err(amalthea::Error::ShellErrorExecuteReply( + exception.clone(), + exec_count, + )); + let result = IOPubMessage::ExecuteError(ExecuteError { exception }); - // We don't fill out `ename` with anything meaningful because typically - // R errors don't have names. We could consider using the condition class - // here, which r-lib/tidyverse packages have been using more heavily. - let mut exception = if error_occurred { - Exception { - ename: String::from(""), - evalue: self.error_message.clone(), - traceback: self.error_traceback.clone(), - } - } else { - // Call `base::traceback()` since we don't have a handled error - // object carrying a backtrace. This won't be formatted as a - // tree which is just as well since the recursive calls would - // push a tree too far to the right. - let traceback = r_traceback(); - Exception { - ename: String::from(""), - evalue: err_buf.clone(), - traceback, - } + (reply, Some(result)) + }, }; - // Jupyter clients typically discard the `evalue` when a `traceback` is - // present. Jupyter-Console even disregards `evalue` in all cases. So - // include it here if we are in Notebook mode. But should Positron - // implement similar behaviour as the other frontends eventually? The - // first component of `traceback` could be compared to `evalue` and - // discarded from the traceback if the same. - if let SessionMode::Notebook = self.session_mode { - exception.traceback.insert(0, exception.evalue.clone()) - } - - let reply = new_execute_reply_error(exception.clone(), exec_count); - let result = IOPubMessage::ExecuteError(ExecuteError { exception }); - - Some((reply, Some(result))) - } - - fn make_execute_reply( - &mut self, - exec_count: u32, - ) -> (amalthea::Result, Option) { - // TODO: Implement rich printing of certain outputs. - // Will we need something similar to the RStudio model, - // where we implement custom print() methods? Or can - // we make the stub below behave sensibly even when - // streaming R output? - let mut data = serde_json::Map::new(); - - // The output generated by autoprint is emitted as an - // `execute_result` message. - let mut autoprint = std::mem::take(&mut self.autoprint_output); - - if autoprint.ends_with('\n') { - // Remove the trailing newlines that R adds to outputs but that - // Jupyter frontends are not expecting. Is it worth taking a - // mutable self ref across calling methods to avoid the clone? - autoprint.pop(); - } - if autoprint.len() != 0 { - data.insert("text/plain".to_string(), json!(autoprint)); - } - - // Include HTML representation of data.frame - unsafe { - let value = Rf_findVarInFrame(R_GlobalEnv, r_symbol!(".Last.value")); - if r_is_data_frame(value) { - match to_html(value) { - Ok(html) => data.insert("text/html".to_string(), json!(html)), - Err(err) => { - log::error!("{:?}", err); - None - }, - }; - } + if let Some(result) = result { + iopub_tx.send(result).unwrap(); } - let reply = new_execute_reply(exec_count); - - let result = (data.len() > 0).then(|| { - IOPubMessage::ExecuteResult(ExecuteResult { - execution_count: exec_count, - data: serde_json::Value::Object(data), - metadata: json!({}), - }) - }); - - (reply, result) + log::trace!("Sending `execute_reply`: {reply:?}"); + req.reply_tx.send(reply).unwrap(); } /// Sends a `Wait` message to IOPub, which responds when the IOPub thread @@ -1874,7 +2016,7 @@ impl RMain { // To capture the current `debug: ` output, for use in the debugger's // match based fallback - r_main.dap.handle_stdout(&content); + r_main.dap.handle_write_console(&content); let stream = if otype == 0 { Stream::Stdout @@ -1917,7 +2059,7 @@ impl RMain { // https://github.com/posit-dev/positron/issues/1881 // Handle last expression - if r_main.pending_lines.is_empty() { + if r_main.pending_inputs.is_none() { r_main.autoprint_output.push_str(&content); return; } @@ -2210,42 +2352,12 @@ impl RMain { } } - fn propagate_error(&mut self, err: anyhow::Error) -> ! { - // Save error message to `RMain`'s buffer to avoid leaking memory when `Rf_error()` jumps. - // Some gymnastics are required to deal with the possibility of `CString` conversion failure - // since the error message comes from the frontend and might be corrupted. - self.r_error_buffer = Some(new_cstring(format!("\n{err}"))); - unsafe { Rf_error(self.r_error_buffer.as_ref().unwrap().as_ptr()) } - } - #[cfg(not(test))] // Avoid warnings in unit test - pub(crate) fn debug_env(&self) -> Option { - self.debug_env.clone() + pub(crate) fn read_console_frame(&self) -> RObject { + self.read_console_frame.borrow().clone() } } -/// Report an incomplete request to the frontend -fn new_incomplete_reply(req: &ExecuteRequest, exec_count: u32) -> amalthea::Result { - let error = Exception { - ename: "IncompleteInput".to_string(), - evalue: format!("Code fragment is not complete: {}", req.code), - traceback: vec![], - }; - Err(amalthea::Error::ShellErrorExecuteReply(error, exec_count)) -} - -fn new_execute_reply(exec_count: u32) -> amalthea::Result { - Ok(ExecuteReply { - status: Status::Ok, - execution_count: exec_count, - user_expressions: json!({}), - }) -} - -fn new_execute_reply_error(error: Exception, exec_count: u32) -> amalthea::Result { - Err(amalthea::Error::ShellErrorExecuteReply(error, exec_count)) -} - /// Converts a data frame to HTML fn to_html(frame: SEXP) -> Result { unsafe { @@ -2287,9 +2399,106 @@ pub extern "C-unwind" fn r_read_console( buflen: c_int, hist: c_int, ) -> i32 { + // In this entry point we handle two kinds of state: + // - The number of nested REPLs `read_console_depth` + // - A bunch of flags that help us reset the calling R REPL + // + // The second kind is unfortunate and due to us taking charge of parsing and + // evaluation. Ideally R would extend their frontend API so that this would + // only be necessary for backward compatibility with old versions of R. + let main = RMain::get_mut(); + + // Propagate an EOF event (e.g. from a Shutdown request). We need to exit + // from all consoles on the stack to let R shut down with an `exit()`. + if main.read_console_shutdown.get() { + return 0; + } + + // We've finished evaluating a dummy value to reset state in R's REPL, + // and are now ready to evaluate the actual input + if let Some(next_input) = main.read_console_next_input.take() { + RMain::on_console_input(buf, buflen, next_input).unwrap(); + return 1; + } + + // In case of error, we haven't had a chance to evaluate ".ark_last_value". + // So we return to the R REPL to give R a chance to run the state + // restoration that occurs between `R_ReadConsole()` and `eval()`: + // - R_PPStackTop: https://github.com/r-devel/r-svn/blob/74cd0af4/src/main/main.c#L227 + // - R_EvalDepth: https://github.com/r-devel/r-svn/blob/74cd0af4/src/main/main.c#L260 + // + // Technically this also resets time limits (see `base::setTimeLimit()`) but + // these aren't supported in Ark because they cause errors when we poll R + // events. + if main.last_error.is_some() && main.read_console_threw_error.get() { + main.read_console_threw_error.set(false); + + // Evaluate last value so that `base::.Last.value` remains the same + RMain::on_console_input( + buf, + buflen, + String::from("base::invisible(base::.Last.value)"), + ) + .unwrap(); + return 1; + } + + // Keep track of state that we care about + + // - Track nesting depth of ReadConsole REPLs + main.read_console_depth + .set(main.read_console_depth.get() + 1); + + // - Set current frame environment + let old_current_frame = main.read_console_frame.replace(harp::r_current_frame()); + + // Keep track of state that we use for workarounds while interacting + // with the R REPL and force it to reset state + + // - Reset flag that helps us figure out when a nested REPL returns + main.read_console_nested_return.set(false); + + // - Reset flag that helps us figure out when an error occurred and needs a + // reset of `R_EvalDepth` and friends + main.read_console_threw_error.set(true); + + exec_with_cleanup( + || { + let main = RMain::get_mut(); + let result = r_read_console_impl(main, prompt, buf, buflen, hist); + + // If we get here, there was no error + main.read_console_threw_error.set(false); + + result + }, + || { + let main = RMain::get_mut(); + + // We're exiting, decrease depth of nested consoles + main.read_console_depth + .set(main.read_console_depth.get() - 1); + + // Set flag so that parent read console, if any, can detect that a + // nested console returned (if it indeed returns instead of looping + // for another iteration) + main.read_console_nested_return.set(true); + + // Restore current frame + main.read_console_frame.replace(old_current_frame); + }, + ) +} + +fn r_read_console_impl( + main: &mut RMain, + prompt: *const c_char, + buf: *mut c_uchar, + buflen: c_int, + hist: c_int, +) -> i32 { let result = r_sandbox(|| main.read_console(prompt, buf, buflen, hist)); - main.read_console_cleanup(); let result = unwrap!(result, Err(err) => { panic!("Unexpected longjump while reading from console: {err:?}"); @@ -2299,8 +2508,53 @@ pub extern "C-unwind" fn r_read_console( // destructors. We're longjumping from here in case of interrupt. match result { - ConsoleResult::NewInput => return 1, - ConsoleResult::Disconnected => return 0, + ConsoleResult::NewPendingInput(input) => { + let PendingInput { expr, srcref } = input; + + unsafe { + // The pointer protection stack is restored by `run_Rmainloop()` + // after a longjump to top-level, so it's safe to protect here + // even if the evaluation throws + let expr = libr::Rf_protect(expr.into()); + + let srcref = match srcref { + Some(srcref) => srcref.into(), + None => r_null(), + }; + libr::Rf_protect(srcref); + + RMain::eval(expr, srcref, buf, buflen); + + // Check if a nested read_console() just returned. If that's the + // case, we need to reset the `R_ConsoleIob` by first returning + // a dummy value causing a `PARSE_NULL` event. + if main.read_console_nested_return.get() { + let next_input = RMain::console_input(buf, buflen); + main.read_console_next_input.set(Some(next_input)); + + // Evaluating a space causes a `PARSE_NULL` event. Don't + // evaluate a newline, that would cause a parent debug REPL + // to interpret it as `n`, causing it to exit instead of + // being a no-op. + RMain::on_console_input(buf, buflen, String::from(" ")).unwrap(); + main.read_console_nested_return.set(false); + } + + libr::Rf_unprotect(2); + return 1; + } + }, + + ConsoleResult::NewInput => { + return 1; + }, + + ConsoleResult::Disconnected => { + // Cause parent consoles to shutdown too + main.read_console_shutdown.set(true); + return 0; + }, + ConsoleResult::Interrupt => { log::trace!("Interrupting `ReadConsole()`"); unsafe { @@ -2311,8 +2565,14 @@ pub extern "C-unwind" fn r_read_console( log::error!("`Rf_onintr()` did not longjump"); return 0; }, - ConsoleResult::Error(err) => { - main.propagate_error(anyhow::anyhow!("{err}")); + + ConsoleResult::Error(message) => { + // Save error message in `RMain` to avoid leaking memory when + // `Rf_error()` jumps. Some gymnastics are required to deal with the + // possibility of `CString` conversion failure since the error + // message comes from the frontend and might be corrupted. + main.r_error_buffer = Some(new_cstring(message)); + unsafe { Rf_error(main.r_error_buffer.as_ref().unwrap().as_ptr()) } }, }; } @@ -2354,6 +2614,28 @@ pub unsafe extern "C-unwind" fn r_polled_events() { }; } +// For integration tests +use std::sync::Condvar; +pub static CLEANUP_SIGNAL: (Mutex, Condvar) = (Mutex::new(false), Condvar::new()); + +#[no_mangle] +pub extern "C-unwind" fn r_cleanup_for_tests(_save_act: i32, _status: i32, _run_last: i32) { + // Signal that cleanup has started + let (lock, cvar) = &CLEANUP_SIGNAL; + + let mut started = lock.lock().unwrap(); + *started = true; + + cvar.notify_all(); + drop(started); + + // Sleep to give tests time to complete before we panic + std::thread::sleep(std::time::Duration::from_secs(5)); + + // Fallthrough to R which will call `exit()`. Note that panicking from here + // would be UB, we can't panic over a C stack. +} + // This hook is called like a user onLoad hook but for every package to be // loaded in the session #[harp::register] @@ -2410,7 +2692,10 @@ fn do_resource_namespaces() -> bool { fn is_auto_printing() -> bool { let n_frame = harp::session::r_n_frame().unwrap(); - // The call-stack is empty so this must be R auto-printing an unclassed object + // The call-stack is empty so this must be R auto-printing an unclassed + // object. Note that this might wrongly return true in debug REPLs. Ideally + // we'd take note of the number of frames on the stack when we enter + // `r_read_console()`, and compare against that. if n_frame == 0 { return true; } diff --git a/crates/ark/src/lsp/completions/sources/composite/search_path.rs b/crates/ark/src/lsp/completions/sources/composite/search_path.rs index 0adfd8004..273cd7951 100644 --- a/crates/ark/src/lsp/completions/sources/composite/search_path.rs +++ b/crates/ark/src/lsp/completions/sources/composite/search_path.rs @@ -13,7 +13,6 @@ use harp::vector::CharacterVector; use harp::vector::Vector; use harp::RObject; use libr::R_EmptyEnv; -use libr::R_GlobalEnv; use libr::R_lsInternal; use libr::ENCLOS; use tower_lsp::lsp_types::CompletionItem; @@ -51,19 +50,12 @@ fn completions_from_search_path( ]; unsafe { - // Iterate through environments starting from the global environment. - let mut env = R_GlobalEnv; - - // If we're waiting for input in `read_console()` with a debugger - // prompt, start from current environment + // Iterate through environments starting from the current frame environment. #[cfg(not(test))] // Unit tests do not have an `RMain` - { - use crate::interface::RMain; - if let Some(debug_env) = &RMain::get().debug_env() { - // Mem-Safety: Object protected by `RMain` for the duration of the `r_task()` - env = debug_env.sexp; - } - } + // Mem-Safety: Object protected by `RMain` for the duration of the `r_task()` + let mut env = crate::interface::RMain::get().read_console_frame().sexp; + #[cfg(test)] + let mut env = libr::R_GlobalEnv; while env != R_EmptyEnv { let is_pkg_env = r_env_is_pkg_env(env); diff --git a/crates/ark/src/main.rs b/crates/ark/src/main.rs index b5e4d202d..746e43058 100644 --- a/crates/ark/src/main.rs +++ b/crates/ark/src/main.rs @@ -83,6 +83,9 @@ fn main() -> anyhow::Result<()> { let mut capture_streams = true; let mut default_repos = DefaultRepos::Auto; + // We don't support the asking the user whether to save the workspace data + // on exit because calling readline during shutdown puts in a precarious + // position. So effectively we're implementing "no-save" by default. // Process remaining arguments. TODO: Need an argument that can passthrough args to R while let Some(arg) = argv.next() { match arg.as_str() { @@ -325,6 +328,20 @@ fn main() -> anyhow::Result<()> { r_args.push(String::from("--interactive")); } + // Prepend the vector of arguments with our default. These can be overridden + // by user arguments (last one wins). + r_args.splice(0..0, [ + // We don't support the asking the user whether to save the workspace + // data on exit because calling readline during shutdown puts in a + // precarious position. So effectively we're implementing "no-save" by + // default. Note that there is no argument to opt into the "ask" + // behaviour, so it can't be reenabled by the user. + String::from("--no-save"), + // Since we don't save by default, we also don't restore by default for + // consistency + String::from("--no-restore-data"), + ]); + // This causes panics on background threads to propagate on the main // thread. If we don't propagate a background thread panic, the program // keeps running in an unstable state as all communications with this diff --git a/crates/ark/src/modules/positron/errors.R b/crates/ark/src/modules/positron/errors.R index 11e082cb5..12263fa76 100644 --- a/crates/ark/src/modules/positron/errors.R +++ b/crates/ark/src/modules/positron/errors.R @@ -33,6 +33,14 @@ #' @export .ps.errors.globalErrorHandler <- function(cnd) { + # Unlike C stack overflow errors, expressions nested too deeply errors allow + # calling handlers. But since we run R code, we need to temporarily bump the + # threshold to give a little room while we handle the error. + if (inherits(cnd, "expressionStackOverflowError")) { + old <- options(expressions = getOption("expressions") + 500) + defer(options(old)) + } + # This reproduces the behaviour of R's default error handler: # - Invoke `getOption("error")` # - Save backtrace for `traceback()` diff --git a/crates/ark/src/sys/unix/interface.rs b/crates/ark/src/sys/unix/interface.rs index db5bc60ce..0748ea367 100644 --- a/crates/ark/src/sys/unix/interface.rs +++ b/crates/ark/src/sys/unix/interface.rs @@ -73,6 +73,14 @@ pub fn setup_r(args: &Vec) { libr::set(ptr_R_Busy, Some(r_busy)); libr::set(ptr_R_Suicide, Some(r_suicide)); + if stdext::IS_TESTING { + use libr::ptr_R_CleanUp; + + use crate::interface::r_cleanup_for_tests; + + libr::set(ptr_R_CleanUp, Some(r_cleanup_for_tests)); + } + // In tests R may be run from various threads. This confuses R's stack // overflow checks so we disable those. This should not make it in // production builds as it causes stack overflows to crash R instead of diff --git a/crates/ark/tests/kernel.rs b/crates/ark/tests/kernel.rs index 042142cec..fe1988899 100644 --- a/crates/ark/tests/kernel.rs +++ b/crates/ark/tests/kernel.rs @@ -1,9 +1,13 @@ use amalthea::fixtures::dummy_frontend::ExecuteRequestOptions; use amalthea::wire::jupyter_message::Message; +use amalthea::wire::jupyter_message::Status; use amalthea::wire::kernel_info_request::KernelInfoRequest; use ark::fixtures::DummyArkFrontend; use stdext::assert_match; +// Avoids our global calling handler from rlangifying errors. +// This causes some test instability across configs. + #[test] fn test_kernel_info() { let frontend = DummyArkFrontend::lock(); @@ -24,70 +28,83 @@ fn test_kernel_info() { #[test] fn test_execute_request() { let frontend = DummyArkFrontend::lock(); - - let code = "42"; - frontend.send_execute_request(code, ExecuteRequestOptions::default()); - frontend.recv_iopub_busy(); - - let input = frontend.recv_iopub_execute_input(); - assert_eq!(input.code, code); - assert_eq!(frontend.recv_iopub_execute_result(), "[1] 42"); - - frontend.recv_iopub_idle(); - - assert_eq!(frontend.recv_shell_execute_reply(), input.execution_count); + frontend.execute_request("42", |result| assert_eq!(result, "[1] 42")); } #[test] fn test_execute_request_empty() { let frontend = DummyArkFrontend::lock(); - let code = ""; - frontend.send_execute_request(code, ExecuteRequestOptions::default()); - frontend.recv_iopub_busy(); + frontend.execute_request_invisibly(""); - let input = frontend.recv_iopub_execute_input(); - assert_eq!(input.code, code); + // Equivalent to invisible output + frontend.execute_request_invisibly("invisible(1)"); +} - frontend.recv_iopub_idle(); +#[test] +fn test_execute_request_multiple_lines() { + let frontend = DummyArkFrontend::lock(); - assert_eq!(frontend.recv_shell_execute_reply(), input.execution_count); + frontend.execute_request("1 +\n 2+\n 3", |result| assert_eq!(result, "[1] 6")); +} - // Equivalent to invisible output - let code = "invisible(1)"; - frontend.send_execute_request(code, ExecuteRequestOptions::default()); - frontend.recv_iopub_busy(); +#[test] +fn test_execute_request_incomplete() { + // Set RUST_BACKTRACE to ensure backtraces are captured. We used to leak + // backtraces in syntax error messages, and this shouldn't happen even when + // `RUST_BACKTRACE` is set. + std::env::set_var("RUST_BACKTRACE", "1"); - let input = frontend.recv_iopub_execute_input(); - assert_eq!(input.code, code); + let frontend = DummyArkFrontend::lock(); - frontend.recv_iopub_idle(); + frontend.execute_request_invisibly("options(positron.error_entrace = FALSE)"); - assert_eq!(frontend.recv_shell_execute_reply(), input.execution_count); + frontend.execute_request_error("1 +", |error_msg| { + assert_eq!(error_msg, "Error:\nCan't execute incomplete input:\n1 +"); + }); } #[test] -fn test_execute_request_multiple_lines() { +fn test_execute_request_incomplete_multiple_lines() { let frontend = DummyArkFrontend::lock(); - let code = "1 +\n 2+\n 3"; - frontend.send_execute_request(code, ExecuteRequestOptions::default()); - frontend.recv_iopub_busy(); + frontend.execute_request_error("1 +\n2 +", |error_msg| { + assert!(error_msg.contains("Can't execute incomplete input")); + }); +} - let input = frontend.recv_iopub_execute_input(); - assert_eq!(input.code, code); - assert_eq!(frontend.recv_iopub_execute_result(), "[1] 6"); +#[test] +fn test_execute_request_invalid() { + // Set RUST_BACKTRACE to ensure backtraces are captured. We used to leak + // backtraces in syntax error messages, and this shouldn't happen even when + // `RUST_BACKTRACE` is set. + std::env::set_var("RUST_BACKTRACE", "1"); - frontend.recv_iopub_idle(); + let frontend = DummyArkFrontend::lock(); - assert_eq!(frontend.recv_shell_execute_reply(), input.execution_count) + frontend.execute_request_error("1 + )", |error_msg| { + assert!(error_msg.contains("Syntax error")); + assert!(!error_msg.contains("Stack backtrace:") && !error_msg.contains("std::backtrace")); + }); + + // https://github.com/posit-dev/ark/issues/598 + frontend.execute_request_error("``", |error_msg| { + assert!(error_msg.contains("Syntax error")); + assert!(!error_msg.contains("Stack backtrace:") && !error_msg.contains("std::backtrace")); + }); + + // https://github.com/posit-dev/ark/issues/722 + frontend.execute_request_error("_ + _()", |error_msg| { + assert!(error_msg.contains("Syntax error")); + assert!(!error_msg.contains("Stack backtrace:") && !error_msg.contains("std::backtrace")); + }); } #[test] -fn test_execute_request_incomplete() { +fn test_execute_request_browser() { let frontend = DummyArkFrontend::lock(); - let code = "1 +"; + let code = "browser()"; frontend.send_execute_request(code, ExecuteRequestOptions::default()); frontend.recv_iopub_busy(); @@ -95,22 +112,21 @@ fn test_execute_request_incomplete() { assert_eq!(input.code, code); assert!(frontend - .recv_iopub_execute_error() - .contains("Can't execute incomplete input")); + .recv_iopub_execute_result() + .contains("Called from: top level")); frontend.recv_iopub_idle(); - assert_eq!( - frontend.recv_shell_execute_reply_exception(), - input.execution_count - ) + assert_eq!(frontend.recv_shell_execute_reply(), input.execution_count); + + frontend.execute_request_invisibly("Q"); } #[test] -fn test_execute_request_incomplete_multiple_lines() { +fn test_execute_request_browser_continue() { let frontend = DummyArkFrontend::lock(); - let code = "1 +\n2 +"; + let code = "browser()"; frontend.send_execute_request(code, ExecuteRequestOptions::default()); frontend.recv_iopub_busy(); @@ -118,21 +134,22 @@ fn test_execute_request_incomplete_multiple_lines() { assert_eq!(input.code, code); assert!(frontend - .recv_iopub_execute_error() - .contains("Can't execute incomplete input")); + .recv_iopub_execute_result() + .contains("Called from: top level")); frontend.recv_iopub_idle(); - assert_eq!( - frontend.recv_shell_execute_reply_exception(), - input.execution_count - ) + assert_eq!(frontend.recv_shell_execute_reply(), input.execution_count); + + frontend.execute_request_invisibly("n"); } #[test] -fn test_execute_request_browser() { +fn test_execute_request_browser_nested() { + // Test nested browser() calls - entering a browser within a browser let frontend = DummyArkFrontend::lock(); + // Start first browser let code = "browser()"; frontend.send_execute_request(code, ExecuteRequestOptions::default()); frontend.recv_iopub_busy(); @@ -148,16 +165,54 @@ fn test_execute_request_browser() { assert_eq!(frontend.recv_shell_execute_reply(), input.execution_count); - let code = "Q"; + // Evaluate a value in the outer browser + frontend.execute_request("42", |result| assert!(result.contains("[1] 42"))); + + // Start nested browser from within the first browser + // Nested browser() produces execute_result output + frontend.execute_request("browser()", |_result| {}); + + // Evaluate a command in the nested browser + frontend.execute_request("1", |result| assert!(result.contains("[1] 1"))); + + // Evaluate another value in the nested browser + frontend.execute_request("\"hello\"", |result| assert!(result.contains("hello"))); + + // Throw an error in the nested browser + let code = "stop('error in nested')"; + frontend.send_execute_request(code, ExecuteRequestOptions::default()); + frontend.recv_iopub_busy(); + + let input = frontend.recv_iopub_execute_input(); + assert_eq!(input.code, code); + + frontend.recv_iopub_stream_stderr("Error: error in nested\n"); + frontend.recv_iopub_idle(); + + assert_eq!(frontend.recv_shell_execute_reply(), input.execution_count); + + // Continue to exit the nested browser and return to parent + frontend.execute_request_invisibly("c"); + + // Back in the parent browser, evaluate another value + frontend.execute_request("3.14", |result| assert!(result.contains("[1] 3.14"))); + + // Throw an error in the outer browser + let code = "stop('error in parent')"; frontend.send_execute_request(code, ExecuteRequestOptions::default()); frontend.recv_iopub_busy(); let input = frontend.recv_iopub_execute_input(); assert_eq!(input.code, code); + frontend.recv_iopub_stream_stderr("Error: error in parent\n"); frontend.recv_iopub_idle(); assert_eq!(frontend.recv_shell_execute_reply(), input.execution_count); + + frontend.execute_request("NA", |result| assert!(result.contains("[1] NA"))); + // Quit the outer browser + frontend.execute_request_invisibly("Q"); } #[test] @@ -195,20 +250,16 @@ fn test_execute_request_browser_error() { assert_eq!(frontend.recv_shell_execute_reply(), input.execution_count); - let code = "Q"; - frontend.send_execute_request(code, ExecuteRequestOptions::default()); - frontend.recv_iopub_busy(); - - let input = frontend.recv_iopub_execute_input(); - assert_eq!(input.code, code); - - frontend.recv_iopub_idle(); - - assert_eq!(frontend.recv_shell_execute_reply(), input.execution_count); + frontend.execute_request_invisibly("Q"); } #[test] fn test_execute_request_browser_incomplete() { + // Set RUST_BACKTRACE to ensure backtraces are captured. We used to leak + // backtraces in syntax error messages, and this shouldn't happen even when + // `RUST_BACKTRACE` is set. + std::env::set_var("RUST_BACKTRACE", "1"); + let frontend = DummyArkFrontend::lock(); let code = "browser()"; @@ -233,7 +284,7 @@ fn test_execute_request_browser_incomplete() { let input = frontend.recv_iopub_execute_input(); assert_eq!(input.code, code); - frontend.recv_iopub_stream_stderr("Error: \nCan't execute incomplete input:\n1 +\n"); + frontend.recv_iopub_stream_stderr("Error: Can't execute incomplete input:\n1 +\n"); frontend.recv_iopub_idle(); assert_eq!(frontend.recv_shell_execute_reply(), input.execution_count); @@ -290,16 +341,7 @@ fn()"; assert_eq!(frontend.recv_shell_execute_reply(), input.execution_count); - let code = "Q"; - frontend.send_execute_request(code, ExecuteRequestOptions::default()); - frontend.recv_iopub_busy(); - - let input = frontend.recv_iopub_execute_input(); - assert_eq!(input.code, code); - - frontend.recv_iopub_idle(); - - assert_eq!(frontend.recv_shell_execute_reply(), input.execution_count); + frontend.execute_request_invisibly("Q"); } #[test] @@ -338,6 +380,58 @@ fn test_execute_request_browser_stdin() { frontend.recv_iopub_idle(); assert_eq!(frontend.recv_shell_execute_reply(), input.execution_count); + frontend.execute_request_invisibly("Q"); +} + +#[test] +fn test_execute_request_browser_multiple_expressions() { + let frontend = DummyArkFrontend::lock(); + + // Ideally the evaluation of `1` would be cancelled + let code = "browser()\n1"; + frontend.send_execute_request(code, ExecuteRequestOptions::default()); + frontend.recv_iopub_busy(); + + let input = frontend.recv_iopub_execute_input(); + assert_eq!(input.code, code); + + frontend.recv_iopub_stream_stdout("Called from: top level \n"); + + assert_eq!(frontend.recv_iopub_execute_result(), "[1] 1"); + frontend.recv_iopub_idle(); + assert_eq!(frontend.recv_shell_execute_reply(), input.execution_count); + + // Even if we could cancel pending expressions, it would still be possible + // to run multiple expressions in a debugger prompt + let code = "1\n2"; + frontend.send_execute_request(code, ExecuteRequestOptions::default()); + frontend.recv_iopub_busy(); + + let input = frontend.recv_iopub_execute_input(); + assert_eq!(input.code, code); + + frontend.recv_iopub_stream_stdout("[1] 1\n"); + + assert_eq!(frontend.recv_iopub_execute_result(), "[1] 2"); + frontend.recv_iopub_idle(); + assert_eq!(frontend.recv_shell_execute_reply(), input.execution_count); + + // But getting in a nested browser session with a pending expression would + // cancel it (not the case currently) + let code = "browser()\n1"; + frontend.send_execute_request(code, ExecuteRequestOptions::default()); + frontend.recv_iopub_busy(); + + let input = frontend.recv_iopub_execute_input(); + assert_eq!(input.code, code); + + frontend.recv_iopub_stream_stdout("Called from: top level \n"); + + assert_eq!(frontend.recv_iopub_execute_result(), "[1] 1"); + frontend.recv_iopub_idle(); + assert_eq!(frontend.recv_shell_execute_reply(), input.execution_count); + + // Quit session let code = "Q"; frontend.send_execute_request(code, ExecuteRequestOptions::default()); frontend.recv_iopub_busy(); @@ -350,17 +444,75 @@ fn test_execute_request_browser_stdin() { assert_eq!(frontend.recv_shell_execute_reply(), input.execution_count); } +#[test] +fn test_execute_request_browser_local_variable() { + let frontend = DummyArkFrontend::lock(); + + let code = "local({\n local_foo <- 1\n browser()\n})"; + frontend.send_execute_request(code, ExecuteRequestOptions::default()); + frontend.recv_iopub_busy(); + + let input = frontend.recv_iopub_execute_input(); + assert_eq!(input.code, code); + + frontend.recv_iopub_stream_stdout( + "Called from: eval(quote({\n local_foo <- 1\n browser()\n}), new.env())\n", + ); + + frontend.recv_iopub_idle(); + + assert_eq!(frontend.recv_shell_execute_reply(), input.execution_count); + + let code = "local_foo"; + frontend.send_execute_request(code, ExecuteRequestOptions::default()); + frontend.recv_iopub_busy(); + + let input = frontend.recv_iopub_execute_input(); + assert_eq!(input.code, code); + + // Should ideally be `recv_iopub_execute_result()`, but auto-printing + // detection currently does not work reliably in debug REPLs + frontend.recv_iopub_stream_stdout("[1] 1\n"); + frontend.recv_iopub_idle(); + assert_eq!(frontend.recv_shell_execute_reply(), input.execution_count); + + frontend.execute_request_invisibly("Q"); +} + #[test] fn test_execute_request_error() { let frontend = DummyArkFrontend::lock(); - frontend.send_execute_request("stop('foobar')", ExecuteRequestOptions::default()); + frontend.execute_request_error("stop('foobar')", |error_msg| { + assert!(error_msg.contains("foobar")); + }); +} + +#[test] +fn test_execute_request_error_with_accumulated_output() { + // Test that when the very last input output and then throws an error, + // the accumulated output is flushed before the error is reported. + // This tests the autoprint buffer flush logic in error handling. + let frontend = DummyArkFrontend::lock(); + + let code = "{ + print.foo <- function(x) { + print(unclass(x)) + stop(\"foo\") + } + structure(42, class = \"foo\") + }"; + frontend.send_execute_request(code, ExecuteRequestOptions::default()); frontend.recv_iopub_busy(); let input = frontend.recv_iopub_execute_input(); - assert_eq!(input.code, "stop('foobar')"); - assert!(frontend.recv_iopub_execute_error().contains("foobar")); + assert_eq!(input.code, code); + + // The output from print(1) should be flushed to stdout + frontend.recv_iopub_stream_stdout("[1] 42\n"); + // Then the error should be reported on stderr + assert!(frontend.recv_iopub_execute_error().contains("foo")); frontend.recv_iopub_idle(); assert_eq!( @@ -369,15 +521,58 @@ fn test_execute_request_error() { ); } +#[test] +fn test_execute_request_error_expressions_overflow() { + let frontend = DummyArkFrontend::lock(); + + // Deterministically produce an "evaluation too deeply nested" error + frontend.execute_request_error( + "options(expressions = 100); f <- function(x) if (x > 0 ) f(x - 1); f(100)", + |error_msg| { + assert!(error_msg.contains("evaluation nested too deeply")); + }, + ); + + // Check we can still evaluate without causing another too deeply nested error + frontend.execute_request_invisibly("f(10)"); +} + +#[test] +fn test_execute_request_error_expressions_overflow_last_value() { + let frontend = DummyArkFrontend::lock(); + + // Set state and last value + frontend.execute_request_invisibly( + "options(expressions = 100); f <- function(x) if (x > 0 ) f(x - 1); invisible('hello')", + ); + + // Check last value is set + frontend.execute_request(".Last.value", |result| { + assert_eq!(result, "[1] \"hello\""); + }); + + // Deterministically produce an "evaluation too deeply nested" error + frontend.execute_request_error("f(100)", |error_msg| { + assert!(error_msg.contains("evaluation nested too deeply")); + }); + + // Check last value is still set + frontend.execute_request(".Last.value", |result| { + assert_eq!(result, "[1] \"hello\""); + }); +} + #[test] fn test_execute_request_error_multiple_expressions() { let frontend = DummyArkFrontend::lock(); - frontend.send_execute_request("1\nstop('foobar')\n2", ExecuteRequestOptions::default()); + // `print(2)` and `3` are never evaluated + let code = "1\nstop('foobar')\nprint(2)\n3"; + frontend.send_execute_request(code, ExecuteRequestOptions::default()); frontend.recv_iopub_busy(); let input = frontend.recv_iopub_execute_input(); - assert_eq!(input.code, "1\nstop('foobar')\n2"); + assert_eq!(input.code, code); frontend.recv_iopub_stream_stdout("[1] 1\n"); assert!(frontend.recv_iopub_execute_error().contains("foobar")); @@ -418,28 +613,16 @@ fn test_execute_request_multiple_expressions() { fn test_execute_request_single_line_buffer_overflow() { let frontend = DummyArkFrontend::lock(); + // This used to fail back when we were passing inputs down to the REPL from + // our `ReadConsole` handler. Below is the old test description for posterity. + // The newlines do matter for what we are testing here, // due to how we internally split by newlines. We want // to test that the `aaa`s result in an immediate R error, // not in text written to the R buffer that calls `stop()`. let aaa = "a".repeat(4096); let code = format!("quote(\n{aaa}\n)"); - frontend.send_execute_request(code.as_str(), ExecuteRequestOptions::default()); - frontend.recv_iopub_busy(); - - let input = frontend.recv_iopub_execute_input(); - assert_eq!(input.code, code); - - assert!(frontend - .recv_iopub_execute_error() - .contains("Can't pass console input on to R")); - - frontend.recv_iopub_idle(); - - assert_eq!( - frontend.recv_shell_execute_reply_exception(), - input.execution_count - ); + frontend.execute_request(code.as_str(), |result| assert!(result.contains(&aaa))); } #[test] @@ -590,6 +773,98 @@ fn test_stdin_from_menu() { assert_eq!(frontend.recv_shell_execute_reply(), input.execution_count); } +// Can debug the base environment (parent is the empty environment) +#[test] +fn test_browser_in_base_env() { + let frontend = DummyArkFrontend::lock(); + + let code = "evalq(browser(), baseenv())"; + frontend.send_execute_request(code, ExecuteRequestOptions::default()); + frontend.recv_iopub_busy(); + + let input = frontend.recv_iopub_execute_input(); + assert_eq!(input.code, code); + + // Inside `evalq()` we aren't at top level, so this comes as an iopub stream + // and not an execute result + frontend.recv_iopub_stream_stdout("Called from: evalq(browser(), baseenv())\n"); + + frontend.recv_iopub_idle(); + assert_eq!(frontend.recv_shell_execute_reply(), input.execution_count); + + // While paused in the debugger, evaluate a simple expression + let code = "1 + 1"; + frontend.send_execute_request(code, ExecuteRequestOptions::default()); + frontend.recv_iopub_busy(); + + let input = frontend.recv_iopub_execute_input(); + assert_eq!(input.code, code); + + frontend.recv_iopub_stream_stdout("[1] 2\n"); + + frontend.recv_iopub_idle(); + assert_eq!(frontend.recv_shell_execute_reply(), input.execution_count); + + let code = "Q"; + frontend.send_execute_request(code, ExecuteRequestOptions::default()); + frontend.recv_iopub_busy(); + + let input = frontend.recv_iopub_execute_input(); + assert_eq!(input.code, code); + + frontend.recv_iopub_idle(); + assert_eq!(frontend.recv_shell_execute_reply(), input.execution_count); +} + +// The minimal environment we can debug in: access to base via `::`. This might +// be a problem for very specialised sandboxing environment, but they can +// temporarily add `::` while debugging. +#[test] +fn test_browser_in_sandboxing_environment() { + let frontend = DummyArkFrontend::lock(); + + let code = " +env <- new.env(parent = emptyenv()) +env$`::` <- `::` +evalq(base::browser(), env)"; + frontend.send_execute_request(code, ExecuteRequestOptions::default()); + frontend.recv_iopub_busy(); + + let input = frontend.recv_iopub_execute_input(); + assert_eq!(input.code, code); + + // Inside `evalq()` we aren't at top level, so this comes as an iopub stream + // and not an execute result + frontend.recv_iopub_stream_stdout("Called from: evalq(base::browser(), env)\n"); + + frontend.recv_iopub_idle(); + assert_eq!(frontend.recv_shell_execute_reply(), input.execution_count); + + // While paused in the debugger, evaluate a simple expression that only + // requires `::` + let code = "base::list"; + frontend.send_execute_request(code, ExecuteRequestOptions::default()); + frontend.recv_iopub_busy(); + + let input = frontend.recv_iopub_execute_input(); + assert_eq!(input.code, code); + + frontend.recv_iopub_stream_stdout("function (...) .Primitive(\"list\")\n"); + + frontend.recv_iopub_idle(); + assert_eq!(frontend.recv_shell_execute_reply(), input.execution_count); + + let code = "Q"; + frontend.send_execute_request(code, ExecuteRequestOptions::default()); + frontend.recv_iopub_busy(); + + let input = frontend.recv_iopub_execute_input(); + assert_eq!(input.code, code); + + frontend.recv_iopub_idle(); + assert_eq!(frontend.recv_shell_execute_reply(), input.execution_count); +} + #[test] fn test_env_vars() { // These environment variables are set by R's shell script frontend. @@ -610,3 +885,133 @@ fn test_env_vars() { assert_eq!(frontend.recv_shell_execute_reply(), input.execution_count); } + +/// Install a SIGINT handler for shutdown tests. This overrides the test runner +/// handler so it doesn't cancel our test. +fn install_sigint_handler() { + extern "C" fn sigint_handler(_: libc::c_int) {} + #[cfg(unix)] + unsafe { + use nix::sys::signal::signal; + use nix::sys::signal::SigHandler; + use nix::sys::signal::Signal; + + signal(Signal::SIGINT, SigHandler::Handler(sigint_handler)).unwrap(); + } +} + +// Note that because of these shutdown tests you _have_ to use `cargo nextest` +// instead of `cargo test`, so that each test has its own process and R thread. +#[test] +#[cfg(unix)] +fn test_shutdown_request() { + install_sigint_handler(); + let frontend = DummyArkFrontend::lock(); + + frontend.send_shutdown_request(false); + frontend.recv_iopub_busy(); + + let reply = frontend.recv_control_shutdown_reply(); + assert_eq!(reply.status, Status::Ok); + assert_eq!(reply.restart, false); + + frontend.recv_iopub_idle(); + + DummyArkFrontend::wait_for_cleanup(); +} + +#[test] +#[cfg(unix)] +fn test_shutdown_request_with_restart() { + install_sigint_handler(); + let frontend = DummyArkFrontend::lock(); + + frontend.send_shutdown_request(true); + frontend.recv_iopub_busy(); + + let reply = frontend.recv_control_shutdown_reply(); + assert_eq!(reply.status, Status::Ok); + assert_eq!(reply.restart, true); + + frontend.recv_iopub_idle(); + + DummyArkFrontend::wait_for_cleanup(); +} + +static SHUTDOWN_TESTS_ENABLED: bool = false; + +// Can shut down Ark when running a nested debug console +// https://github.com/posit-dev/positron/issues/6553 +#[test] +#[cfg(unix)] +fn test_shutdown_request_browser() { + if !SHUTDOWN_TESTS_ENABLED { + return; + } + + install_sigint_handler(); + let frontend = DummyArkFrontend::lock(); + + let code = "browser()"; + frontend.send_execute_request(code, ExecuteRequestOptions::default()); + frontend.recv_iopub_busy(); + + let input = frontend.recv_iopub_execute_input(); + assert_eq!(input.code, code); + + assert!(frontend + .recv_iopub_execute_result() + .contains("Called from: top level")); + + frontend.recv_iopub_idle(); + + assert_eq!(frontend.recv_shell_execute_reply(), input.execution_count); + + frontend.send_shutdown_request(true); + frontend.recv_iopub_busy(); + + // There is a race condition between the Control thread and the Shell + // threads. Ideally we'd wait for both the Shutdown reply and the IOPub Idle + // messages concurrently instead of sequentially. + let reply = frontend.recv_control_shutdown_reply(); + assert_eq!(reply.status, Status::Ok); + assert_eq!(reply.restart, true); + + frontend.recv_iopub_idle(); + + DummyArkFrontend::wait_for_cleanup(); +} + +#[test] +#[cfg(unix)] +fn test_shutdown_request_while_busy() { + if !SHUTDOWN_TESTS_ENABLED { + return; + } + + install_sigint_handler(); + let frontend = DummyArkFrontend::lock(); + + let code = "Sys.sleep(10)"; + frontend.send_execute_request(code, ExecuteRequestOptions::default()); + frontend.recv_iopub_busy(); + + let input = frontend.recv_iopub_execute_input(); + assert_eq!(input.code, code); + + frontend.send_shutdown_request(false); + frontend.recv_iopub_busy(); + + let reply = frontend.recv_control_shutdown_reply(); + assert_eq!(reply.status, Status::Ok); + assert_eq!(reply.restart, false); + + // It seems this isn't emitted on older R versions + frontend.recv_iopub_stream_stderr("\n"); + frontend.recv_iopub_idle(); + + assert_eq!(frontend.recv_shell_execute_reply(), input.execution_count); + frontend.recv_iopub_idle(); + + DummyArkFrontend::wait_for_cleanup(); +} diff --git a/crates/harp/src/error.rs b/crates/harp/src/error.rs index 5b4f2a5f5..5989986f0 100644 --- a/crates/harp/src/error.rs +++ b/crates/harp/src/error.rs @@ -46,7 +46,6 @@ pub enum Error { InvalidUtf8(Utf8Error), ParseSyntaxError { message: String, - line: i32, }, MissingValueError, MissingColumnError { @@ -200,8 +199,8 @@ impl fmt::Display for Error { write!(f, "Invalid UTF-8 in string: {}", error) }, - Error::ParseSyntaxError { message, line } => { - write!(f, "Syntax error on line {} when parsing: {}", line, message) + Error::ParseSyntaxError { message } => { + write!(f, "Syntax error: {}", message) }, Error::MissingValueError => { diff --git a/crates/harp/src/exec.rs b/crates/harp/src/exec.rs index 808a954da..672fcaec8 100644 --- a/crates/harp/src/exec.rs +++ b/crates/harp/src/exec.rs @@ -367,6 +367,101 @@ where } } +/// Execute a function with a cleanup handler using R's cleanup mechanism. +/// +/// This wraps `R_ExecWithCleanup` to provide execution with guaranteed cleanup, +/// even in case of an R longjump. +/// +/// In case of longjump, `cleanup()` runs but `exec_with_cleanup()` does not +/// return, the lonjump propagates. +/// +/// Note that `fun` and `cleanup` must be longjump-safe: +/// - Only POD types without Drop destructors on the stack +/// - Or protects itself from longjumps via e.g. `try_catch()` +/// ``` +pub fn exec_with_cleanup<'env, F, C, T>(fun: F, cleanup: C) -> T +where + F: FnOnce() -> T, + F: 'env, + C: FnOnce(), + C: 'env, + T: 'env, +{ + struct CleanupData<'a, F, C, T> + where + F: FnOnce() -> T + 'a, + C: FnOnce() + 'a, + { + // slot for the result of the closure + result: &'a mut Option, + closure: Option, + cleanup: Option, + } + + // Allocate stack memory for the result + let mut result: Option = None; + + // Move closures to the payload + let mut callback_data = CleanupData { + result: &mut result, + closure: Some(fun), + cleanup: Some(cleanup), + }; + let payload = &mut callback_data as *mut _ as *mut c_void; + + extern "C-unwind" fn exec_callback<'env, F, C, T>(data: *mut c_void) -> SEXP + where + F: FnOnce() -> T, + F: 'env, + C: FnOnce(), + C: 'env, + T: 'env, + { + // SAFETY: `data` points to a `CleanupData` allocated on the caller's stack. + let data: &mut CleanupData = unsafe { &mut *(data as *mut CleanupData) }; + + // Move closure here so it can be called. Required since that's an `FnOnce`. + let closure = take(&mut data.closure).unwrap(); + + // Call closure and store the result in the payload + let result = closure(); + *(data.result) = Some(result); + + // Always return R_NilValue to R_ExecWithCleanup; the real result is in `payload`. + unsafe { R_NilValue } + } + + extern "C-unwind" fn cleanup_callback<'env, F, C, T>(data: *mut c_void) + where + F: FnOnce() -> T, + F: 'env, + C: FnOnce(), + C: 'env, + T: 'env, + { + // SAFETY: `data` points to a `CleanupData` allocated on the caller's stack. + let data: &mut CleanupData = unsafe { &mut *(data as *mut CleanupData) }; + + // Move cleanup closure here so it can be called + if let Some(cleanup) = take(&mut data.cleanup) { + cleanup(); + } + } + + // Call into R; the callbacks will populate `res` and always return R_NilValue. + unsafe { + R_ExecWithCleanup( + Some(exec_callback::), + payload, + Some(cleanup_callback::), + payload, + ) + }; + + // Unwrap Safety: If we get here, we're in the happy path and the result is Some + result.unwrap() +} + pub fn r_peek_error_buffer() -> String { // SAFETY: Returns pointer to static memory buffer owned by R. let buffer = unsafe { R_curErrorBuf() }; @@ -495,6 +590,8 @@ pub fn r_check_stack(size: Option) -> Result<()> { #[cfg(test)] mod tests { use std::ffi::CString; + use std::sync::Arc; + use std::sync::Mutex; use stdext::assert_match; @@ -644,4 +741,52 @@ mod tests { }); }) } + + #[test] + fn test_exec_with_cleanup() { + crate::r_task(|| { + let cleanup_called = Arc::new(Mutex::new(false)); + let cleanup_called_clone = cleanup_called.clone(); + + let result = exec_with_cleanup( + || { + // Create a simple R object and return it directly (T = RObject) + let obj = RObject::from(unsafe { Rf_ScalarInteger(42) }); + obj + }, + || { + *cleanup_called_clone.lock().unwrap() = true; + }, + ); + + assert_eq!(unsafe { Rf_asInteger(*result) }, 42); + assert!( + *cleanup_called.lock().unwrap(), + "Cleanup should have been called" + ); + + // Test error case - cleanup should still be called. + let cleanup_called_error = Arc::new(Mutex::new(false)); + let cleanup_called_error_clone = cleanup_called_error.clone(); + + let result = try_catch(|| { + exec_with_cleanup( + || -> RObject { + let msg = CString::new("ouch").unwrap(); // This leaks + unsafe { Rf_error(msg.as_ptr()) }; + }, + || { + *cleanup_called_error_clone.lock().unwrap() = true; + }, + ) + }); + + assert!(result.is_err()); + + assert!( + *cleanup_called_error.lock().unwrap(), + "Cleanup should have been called on error" + ); + }) + } } diff --git a/crates/harp/src/lib.rs b/crates/harp/src/lib.rs index ea4d66e9f..2e5d17cc1 100644 --- a/crates/harp/src/lib.rs +++ b/crates/harp/src/lib.rs @@ -70,6 +70,7 @@ pub(crate) use harp::fixtures::r_task; pub use harp::object::list_get; pub use harp::object::list_poke; pub use harp::object::RObject; +pub use harp::session::*; pub use harp::symbol::RSymbol; pub use harp::utils::get_option; pub use harp::weak_ref::RWeakRef; diff --git a/crates/harp/src/object.rs b/crates/harp/src/object.rs index 3daa1e0f1..8d4f3eb8e 100644 --- a/crates/harp/src/object.rs +++ b/crates/harp/src/object.rs @@ -238,6 +238,10 @@ pub fn r_list_poke(x: SEXP, i: R_xlen_t, value: SEXP) { } } +pub fn r_list_get(x: SEXP, i: R_xlen_t) -> RObject { + unsafe { RObject::new(VECTOR_ELT(x, i)) } +} + pub fn r_lgl_begin(x: SEXP) -> *mut i32 { unsafe { LOGICAL(x) } } diff --git a/crates/harp/src/parse.rs b/crates/harp/src/parse.rs index 7e819a8ca..d2426876c 100644 --- a/crates/harp/src/parse.rs +++ b/crates/harp/src/parse.rs @@ -28,7 +28,7 @@ pub struct RParseOptions { pub enum ParseResult { Complete(RObject), Incomplete, - SyntaxError { message: String, line: i32 }, + SyntaxError { message: String }, } pub enum ParseInput<'a> { @@ -79,9 +79,7 @@ pub fn parse_exprs_ext<'a>(input: &ParseInput<'a>) -> crate::Result { code: parse_input_as_string(input).unwrap_or(String::from("Conversion error")), message: String::from("Incomplete code"), }), - ParseResult::SyntaxError { message, line } => { - Err(crate::Error::ParseSyntaxError { message, line }) - }, + ParseResult::SyntaxError { message } => Err(crate::Error::ParseSyntaxError { message }), } } @@ -109,17 +107,34 @@ pub fn parse_status<'a>(input: &ParseInput<'a>) -> crate::Result { ParseInput::SrcFile(srcfile) => (srcfile.lines()?, srcfile.inner.clone()), }; - let result: RObject = - try_catch(|| libr::R_ParseVector(text.sexp, -1, &mut status, srcfile.sexp).into())?; + let result = + try_catch(|| libr::R_ParseVector(text.sexp, -1, &mut status, srcfile.sexp).into()); + + let value = match result { + Ok(value) => value, + Err(err) => match err { + // The parser sometimes throws errors instead of returning an + // error flag. Convert these errors to proper syntax errors so + // we don't leak a backtrace making it seem like an internal + // error. + // https://github.com/posit-dev/ark/issues/598 + // https://github.com/posit-dev/ark/issues/722 + crate::Error::TryCatchError { message, .. } => { + return Ok(ParseResult::SyntaxError { message }); + }, + _ => { + return Err(err); + }, + }, + }; match status { - libr::ParseStatus_PARSE_OK => Ok(ParseResult::Complete(result)), + libr::ParseStatus_PARSE_OK => Ok(ParseResult::Complete(value)), libr::ParseStatus_PARSE_INCOMPLETE => Ok(ParseResult::Incomplete), libr::ParseStatus_PARSE_ERROR => Ok(ParseResult::SyntaxError { message: CStr::from_ptr(libr::get(libr::R_ParseErrorMsg).as_ptr()) .to_string_lossy() .to_string(), - line: libr::get(libr::R_ParseError) as i32, }), _ => { // Should not get here @@ -207,15 +222,16 @@ mod tests { // Error assert_match!( parse_status(&ParseInput::Text("42 + _")), - Err(_) => {} + Ok(ParseResult::SyntaxError { message }) => { + assert!(message.contains("invalid use of pipe placeholder")); + } ); // "normal" syntax error assert_match!( parse_status(&ParseInput::Text("1+1\n*42")), - Ok(ParseResult::SyntaxError {message, line}) => { + Ok(ParseResult::SyntaxError { message }) => { assert!(message.contains("unexpected")); - assert_eq!(line, 2); } ); diff --git a/crates/harp/src/parser/srcref.rs b/crates/harp/src/parser/srcref.rs index 91562fd94..65e7f58c0 100644 --- a/crates/harp/src/parser/srcref.rs +++ b/crates/harp/src/parser/srcref.rs @@ -153,6 +153,42 @@ impl TryFrom<&harp::CharacterVector> for SrcFile { } } +pub fn get_srcref(srcrefs: libr::SEXP, ind: isize) -> RObject { + if crate::r_is_null(srcrefs) { + return RObject::null(); + } + + if harp::r_length(srcrefs) <= ind { + return RObject::null(); + } + + let result = harp::list_get(srcrefs, ind); + + if crate::r_is_null(result) { + return RObject::null(); + } + + if unsafe { libr::TYPEOF(result) as u32 } != libr::INTSXP { + return RObject::null(); + } + + if harp::r_length(result) < 6 { + return RObject::null(); + } + + RObject::new(result) +} + +pub fn get_block_srcrefs(call: libr::SEXP) -> RObject { + let srcrefs = unsafe { libr::Rf_getAttrib(call, libr::R_SrcrefSymbol) }; + + if unsafe { libr::TYPEOF(srcrefs) as u32 } == libr::VECSXP { + return RObject::new(srcrefs); + } + + RObject::null() +} + #[cfg(test)] mod tests { use std::ops::Range; diff --git a/crates/harp/src/session.rs b/crates/harp/src/session.rs index 6d75e53b0..261ab0164 100644 --- a/crates/harp/src/session.rs +++ b/crates/harp/src/session.rs @@ -27,6 +27,7 @@ static SESSION_INIT: Once = Once::new(); static mut NFRAME_CALL: Option = None; static mut SYS_CALLS_CALL: Option = None; static mut SYS_FRAMES_CALL: Option = None; +static mut CURRENT_ENV_CALL: Option = None; pub fn r_n_frame() -> crate::Result { SESSION_INIT.call_once(init_interface); @@ -60,6 +61,11 @@ pub fn r_sys_frames() -> crate::Result { } } +pub fn r_current_frame() -> RObject { + SESSION_INIT.call_once(init_interface); + unsafe { libr::Rf_eval(CURRENT_ENV_CALL.unwrap_unchecked(), R_BaseEnv) }.into() +} + pub fn r_sys_functions() -> crate::Result { unsafe { let mut protect = RProtect::new(); @@ -150,5 +156,16 @@ fn init_interface() { let sys_frames_call = r_lang!(r_symbol!("sys.frames")); R_PreserveObject(sys_frames_call); SYS_FRAMES_CALL = Some(sys_frames_call); + + // Create a closure that calls `sys.frame(-1)` to get the current + // evaluation environment. We use `sys.frame(-1)` from within a closure + // because `sys.nframe()` returns the frame number where evaluation + // occurs, not the number of frames on the stack. By calling from a + // closure, we push a new frame and use negative indexing to get the + // previous frame (the actual current environment). + let closure = harp::parse_eval_base("function() sys.frame(-1)").unwrap(); + let current_env_call = r_lang!(closure.sexp); + R_PreserveObject(current_env_call); + CURRENT_ENV_CALL = Some(current_env_call); } } diff --git a/crates/libr/src/r.rs b/crates/libr/src/r.rs index eb927e48c..510a57dad 100644 --- a/crates/libr/src/r.rs +++ b/crates/libr/src/r.rs @@ -98,6 +98,13 @@ functions::generate! { data: *mut std::ffi::c_void ) -> Rboolean; + pub fn R_ExecWithCleanup( + fun: Option SEXP>, + data: *mut std::ffi::c_void, + cleanfun: Option, + cleandata: *mut std::ffi::c_void + ) -> SEXP; + pub fn R_withCallingErrorHandler( body: Option SEXP>, bdata: *mut std::ffi::c_void, @@ -135,7 +142,7 @@ functions::generate! { pub fn Rf_cons(arg1: SEXP, arg2: SEXP) -> SEXP; - pub fn Rf_defineVar(arg1: SEXP, arg2: SEXP, arg3: SEXP); + pub fn Rf_defineVar(sym: SEXP, value: SEXP, env: SEXP); pub fn Rf_eval(arg1: SEXP, arg2: SEXP) -> SEXP; @@ -617,6 +624,14 @@ constant_globals::generate! { #[default = std::ptr::null_mut()] pub static R_TripleColonSymbol: SEXP; + #[doc = "\"srcfile\""] + #[default = std::ptr::null_mut()] + pub static R_SrcfileSymbol: SEXP; + + #[doc = "\"srcref\""] + #[default = std::ptr::null_mut()] + pub static R_SrcrefSymbol: SEXP; + #[doc = "\"tsp\""] #[default = std::ptr::null_mut()] pub static R_TspSymbol: SEXP; @@ -689,6 +704,8 @@ mutable_globals::generate! { pub static mut R_Srcref: SEXP; + pub static mut R_Visible: Rboolean; + // ----------------------------------------------------------------------------------- // Unix @@ -743,6 +760,9 @@ mutable_globals::generate! { #[cfg(target_family = "unix")] pub static mut ptr_R_Suicide: Option; + #[cfg(target_family = "unix")] + pub static mut ptr_R_CleanUp: Option; + // ----------------------------------------------------------------------------------- // Windows