From 4af418fddd0ed2d9a8861007112006fc657ecbac Mon Sep 17 00:00:00 2001
From: Wez Furlong <wez@wezfurlong.org>
Date: Thu, 9 May 2024 12:52:29 -0700
Subject: [PATCH] mux: enable ssh agent forwarding

This is done with a wezterm twist: not only is the auth sock forwarded,
but the mux on the remote end will automatically maintain a symlink to
point to the auth sock of the most recently active mux client, and set
SSH_AUTH_SOCK to that symlink so that your remote panes should always be
referencing something sane.

refs: https://github.com/wez/wezterm/issues/1647
refs: https://github.com/wez/wezterm/discussions/988
---
 config/src/config.rs                          |   3 +
 docs/changelog.md                             |   5 +
 .../config/lua/config/mux_enable_ssh_agent.md |  26 +++
 mux/src/domain.rs                             |   3 +
 mux/src/lib.rs                                |  13 ++
 mux/src/ssh_agent.rs                          | 209 ++++++++++++++++++
 wezterm-mux-server-impl/src/sessionhandler.rs |   2 +
 7 files changed, 261 insertions(+)
 create mode 100644 docs/config/lua/config/mux_enable_ssh_agent.md
 create mode 100644 mux/src/ssh_agent.rs

diff --git a/config/src/config.rs b/config/src/config.rs
index 8f52b91e5..80110678a 100644
--- a/config/src/config.rs
+++ b/config/src/config.rs
@@ -377,6 +377,9 @@ pub struct Config {
     #[dynamic(default = "default_mux_output_parser_buffer_size")]
     pub mux_output_parser_buffer_size: usize,
 
+    #[dynamic(default = "default_true")]
+    pub mux_enable_ssh_agent: bool,
+
     /// How many ms to delay after reading a chunk of output
     /// in order to try to coalesce fragmented writes into
     /// a single bigger chunk of output and reduce the chances
diff --git a/docs/changelog.md b/docs/changelog.md
index e1076a4b1..150413c92 100644
--- a/docs/changelog.md
+++ b/docs/changelog.md
@@ -39,6 +39,11 @@ As features stabilize some brief notes about them will accumulate here.
 * [wezterm.serde](config/lua/wezterm.serde/index.md) module for serialization
   and deserialization of JSON, TOML and YAML. Thanks to @expnn! #4969
 * `wezterm ssh` now supports agent forwarding. Thanks to @Riatre! #5345
+* SSH multiplexer domains now support agent forwarding, and will automatically
+  maintain `SSH_AUTH_SOCK` to an appropriate value on the destination host,
+  depending on the value of the new
+  [mux_enable_ssh_agent](config/lua/config/mux_enable_ssh_agent.md) option.
+  ?988 #1647
 
 #### Fixed
 * Race condition when very quickly adjusting font scale, and other improvements
diff --git a/docs/config/lua/config/mux_enable_ssh_agent.md b/docs/config/lua/config/mux_enable_ssh_agent.md
new file mode 100644
index 000000000..92d2eaecf
--- /dev/null
+++ b/docs/config/lua/config/mux_enable_ssh_agent.md
@@ -0,0 +1,26 @@
+---
+tags:
+  - multiplexing
+  - ssh
+---
+# `mux_enable_ssh_agent = true`
+
+{{since('nightly')}}
+
+When set to `true` (the default), wezterm will configure the `SSH_AUTH_SOCK`
+environment variable for panes spawned in the `local` domain.
+
+The auth sock will point to a symbolic link that will in turn be pointed to the
+authentication socket associated with the most recently active multiplexer
+client.
+
+You can review the authentication socket that will be used for various clients
+by running `wezterm cli list-clients` and inspecting the `SSH_AUTH_SOCK`
+column.
+
+The symlink is updated within (at the time of writing this documentation) 100ms
+of the active Mux client changing.
+
+You can set `mux_enable_ssh_agent = false` to prevent wezterm from assigning
+`SSH_AUTH_SOCK` or updating the symlink.
+
diff --git a/mux/src/domain.rs b/mux/src/domain.rs
index bb7d9b67d..3cb3767d5 100644
--- a/mux/src/domain.rs
+++ b/mux/src/domain.rs
@@ -468,6 +468,9 @@ impl LocalDomain {
             cmd.env("WEZTERM_UNIX_SOCKET", sock);
         }
         cmd.env("WEZTERM_PANE", pane_id.to_string());
+        if let Some(agent) = Mux::get().agent.as_ref() {
+            cmd.env("SSH_AUTH_SOCK", agent.path());
+        }
         self.fixup_command(&mut cmd).await?;
         Ok(cmd)
     }
diff --git a/mux/src/lib.rs b/mux/src/lib.rs
index 4f6b330f1..abb2e196e 100644
--- a/mux/src/lib.rs
+++ b/mux/src/lib.rs
@@ -1,5 +1,6 @@
 use crate::client::{ClientId, ClientInfo};
 use crate::pane::{CachePolicy, Pane, PaneId};
+use crate::ssh_agent::AgentProxy;
 use crate::tab::{SplitRequest, Tab, TabId};
 use crate::window::{Window, WindowId};
 use anyhow::{anyhow, Context, Error};
@@ -38,6 +39,7 @@ pub mod localpane;
 pub mod pane;
 pub mod renderable;
 pub mod ssh;
+pub mod ssh_agent;
 pub mod tab;
 pub mod termwiztermtab;
 pub mod tmux;
@@ -108,6 +110,7 @@ pub struct Mux {
     identity: RwLock<Option<Arc<ClientId>>>,
     num_panes_by_workspace: RwLock<HashMap<String, usize>>,
     main_thread_id: std::thread::ThreadId,
+    agent: Option<AgentProxy>,
 }
 
 const BUFSIZE: usize = 1024 * 1024;
@@ -421,6 +424,12 @@ impl Mux {
             );
         }
 
+        let agent = if config::configuration().mux_enable_ssh_agent {
+            Some(AgentProxy::new())
+        } else {
+            None
+        };
+
         Self {
             tabs: RwLock::new(HashMap::new()),
             panes: RwLock::new(HashMap::new()),
@@ -434,6 +443,7 @@ impl Mux {
             identity: RwLock::new(None),
             num_panes_by_workspace: RwLock::new(HashMap::new()),
             main_thread_id: std::thread::current().id(),
+            agent,
         }
     }
 
@@ -471,6 +481,9 @@ impl Mux {
         if let Some(info) = self.clients.write().get_mut(client_id) {
             info.update_last_input();
         }
+        if let Some(agent) = &self.agent {
+            agent.update_target();
+        }
     }
 
     pub fn record_input_for_current_identity(&self) {
diff --git a/mux/src/ssh_agent.rs b/mux/src/ssh_agent.rs
new file mode 100644
index 000000000..d751ffb79
--- /dev/null
+++ b/mux/src/ssh_agent.rs
@@ -0,0 +1,209 @@
+use crate::{ClientId, Mux};
+use anyhow::Context;
+use chrono::{DateTime, Duration, Utc};
+use parking_lot::RwLock;
+#[cfg(unix)]
+use std::os::unix::fs::symlink as symlink_file;
+#[cfg(windows)]
+use std::os::windows::fs::symlink_file;
+use std::path::{Path, PathBuf};
+use std::sync::mpsc::{sync_channel, Receiver, SyncSender};
+use std::sync::Arc;
+
+/// AgentProxy manages an agent.PID symlink in the wezterm runtime
+/// directory.
+/// The intent is to maintain the symlink and have it point to the
+/// appropriate ssh agent socket path for the most recently active
+/// mux client.
+///
+/// Why symlink rather than running an agent proxy socket of our own?
+/// Some agent implementations use low level unix socket operations
+/// to decide whether the client process is allowed to consume
+/// the agent or not, and us sitting in the middle breaks that.
+///
+/// As a further complication, when a wezterm proxy client is
+/// present, both the proxy and the mux instance inside a gui
+/// tend to be updated together, with the gui often being
+/// touched last.
+///
+/// To deal with that we de-bounce input events and weight
+/// proxy clients higher so that we can avoid thrashing
+/// between gui and proxy.
+///
+/// The consequence of this is that there is 100ms of artificial
+/// latency to detect a change in the active client.
+/// This number was selected because it is unlike for a human
+/// to be able to switch devices that quickly.
+///
+/// How is this used? The Mux::client_had_input function
+/// will call AgentProxy::update_target to signal when
+/// the active client may have changed.
+
+pub struct AgentProxy {
+    sock_path: PathBuf,
+    current_target: RwLock<Option<Arc<ClientId>>>,
+    sender: SyncSender<()>,
+}
+
+impl Drop for AgentProxy {
+    fn drop(&mut self) {
+        std::fs::remove_file(&self.sock_path).ok();
+    }
+}
+
+fn update_symlink<P: AsRef<Path>, Q: AsRef<Path>>(original: P, link: Q) -> anyhow::Result<()> {
+    let original = original.as_ref();
+    let link = link.as_ref();
+
+    match symlink_file(original, link) {
+        Ok(()) => Ok(()),
+        Err(err) => {
+            if err.kind() == std::io::ErrorKind::AlreadyExists {
+                std::fs::remove_file(link)
+                    .with_context(|| format!("failed to remove {}", link.display()))?;
+                symlink_file(original, link).with_context(|| {
+                    format!(
+                        "failed to create symlink {} -> {}: {err:#}",
+                        link.display(),
+                        original.display()
+                    )
+                })
+            } else {
+                anyhow::bail!(
+                    "failed to create symlink {} -> {}: {err:#}",
+                    link.display(),
+                    original.display()
+                );
+            }
+        }
+    }
+}
+
+impl AgentProxy {
+    pub fn new() -> Self {
+        let pid = unsafe { libc::getpid() };
+        let sock_path = config::RUNTIME_DIR.join(format!("agent.{pid}"));
+
+        if let Ok(inherited) = std::env::var("SSH_AUTH_SOCK") {
+            if let Err(err) = update_symlink(&inherited, &sock_path) {
+                log::error!("failed to set {sock_path:?} to initial inherited SSH_AUTH_SOCK value of {inherited:?}: {err:#}");
+            }
+        }
+
+        let (sender, receiver) = sync_channel(16);
+
+        std::thread::spawn(move || Self::process_updates(receiver));
+
+        Self {
+            sock_path,
+            current_target: RwLock::new(None),
+            sender,
+        }
+    }
+
+    pub fn path(&self) -> &Path {
+        &self.sock_path
+    }
+
+    pub fn update_target(&self) {
+        // If the send fails, the channel is most likely
+        // full, which means that the updater thread is
+        // going to observe the now-current state when
+        // it wakes up, so we needn't try any harder
+        self.sender.try_send(()).ok();
+    }
+
+    fn process_updates(receiver: Receiver<()>) {
+        while let Ok(_) = receiver.recv() {
+            // De-bounce multiple input events so that we don't quickly
+            // thrash between the host and proxy value
+            std::thread::sleep(std::time::Duration::from_millis(100));
+            while receiver.try_recv().is_ok() {}
+
+            if let Some(agent) = &Mux::get().agent {
+                agent.update_now();
+            }
+        }
+    }
+
+    fn update_now(&self) {
+        // Get list of clients from mux
+        // Order by most recent activity
+        // Take first one with auth sock -> that's the path
+        // If we find none, then we print an error and drop
+        // this stream.
+
+        let mut clients = Mux::get().iter_clients();
+        clients.retain(|info| info.client_id.ssh_auth_sock.is_some());
+
+        clients.sort_by(|a, b| {
+            // The biggest last_input time is most recent, so it sorts sooner.
+            // However, when using a proxy into a gui mux, both the proxy and the
+            // gui will update around the same time, with the gui often being
+            // updated fractionally after the proxy.
+            // In this situation we want the proxy to be selected, so we weight
+            // proxy entries slightly higher by adding a small Duration to
+            // the actual observed value.
+            // `via proxy pid` is coupled with the Pdu::SetClientId logic
+            // in wezterm-mux-server-impl/src/sessionhandler.rs
+            const PROXY_MARKER: &str = "via proxy pid";
+            let a_proxy = a.client_id.hostname.contains(PROXY_MARKER);
+            let b_proxy = b.client_id.hostname.contains(PROXY_MARKER);
+
+            fn adjust_for_proxy(time: DateTime<Utc>, is_proxy: bool) -> DateTime<Utc> {
+                if is_proxy {
+                    time + Duration::milliseconds(100)
+                } else {
+                    time
+                }
+            }
+
+            let a_time = adjust_for_proxy(a.last_input, a_proxy);
+            let b_time = adjust_for_proxy(b.last_input, b_proxy);
+
+            b_time.cmp(&a_time)
+        });
+
+        log::trace!("filtered to {clients:#?}");
+        match clients.get(0) {
+            Some(info) => {
+                let current = self.current_target.read().clone();
+                let needs_update = match (current, &info.client_id) {
+                    (None, _) => true,
+                    (Some(prior), current) => prior != *current,
+                };
+
+                if needs_update {
+                    let ssh_auth_sock = info
+                        .client_id
+                        .ssh_auth_sock
+                        .as_ref()
+                        .expect("we checked in the retain above");
+                    log::trace!(
+                        "Will update {} -> {ssh_auth_sock}",
+                        self.sock_path.display(),
+                    );
+                    self.current_target.write().replace(info.client_id.clone());
+
+                    if let Err(err) = update_symlink(ssh_auth_sock, &self.sock_path) {
+                        log::error!(
+                            "Problem updating {} -> {ssh_auth_sock}: {err:#}",
+                            self.sock_path.display(),
+                        );
+                    }
+                }
+            }
+            None => {
+                if self.current_target.write().take().is_some() {
+                    log::trace!("Updating agent to be bogus");
+                    if let Err(err) = update_symlink(".", &self.sock_path) {
+                        log::error!(
+                            "Problem updating {} -> .: {err:#}",
+                            self.sock_path.display()
+                        );
+                    }
+                }
+            }
+        }
+    }
+}
diff --git a/wezterm-mux-server-impl/src/sessionhandler.rs b/wezterm-mux-server-impl/src/sessionhandler.rs
index 0c5725af8..a8a497cdf 100644
--- a/wezterm-mux-server-impl/src/sessionhandler.rs
+++ b/wezterm-mux-server-impl/src/sessionhandler.rs
@@ -311,6 +311,8 @@ impl SessionHandler {
                     // on from the `wezterm cli list-clients` information
                     if let Some(proxy_id) = &self.proxy_client_id {
                         client_id.ssh_auth_sock = proxy_id.ssh_auth_sock.clone();
+                        // Note that this `via proxy pid` string is coupled
+                        // with the logic in mux/src/ssh_agent
                         client_id.hostname =
                             format!("{} (via proxy pid {})", client_id.hostname, proxy_id.pid);
                     }