@@ -28,6 +28,7 @@ use tokio_tungstenite::tungstenite::{
2828} ;
2929use tracing:: Instrument ;
3030use url:: Url ;
31+ use uuid:: Uuid ;
3132
3233use crate :: {
3334 WebSocketHandle , custom_serve:: CustomServeTrait , errors, metrics,
@@ -1171,7 +1172,7 @@ impl ProxyService {
11711172 }
11721173
11731174 // Handle WebSocket upgrade properly with hyper_tungstenite
1174- tracing:: debug!( "Upgrading client connection to WebSocket" ) ;
1175+ tracing:: debug!( %req_path , "Upgrading client connection to WebSocket" ) ;
11751176 let ( client_response, client_ws) = match hyper_tungstenite:: upgrade ( req, None ) {
11761177 Ok ( x) => {
11771178 tracing:: debug!( "Client WebSocket upgrade successful" ) ;
@@ -1782,18 +1783,20 @@ impl ProxyService {
17821783 }
17831784 ResolveRouteOutput :: Response ( _) => unreachable ! ( ) ,
17841785 ResolveRouteOutput :: CustomServe ( mut handlers) => {
1785- tracing:: debug!( "Spawning task to handle WebSocket communication" ) ;
1786+ tracing:: debug!( %req_path , "Spawning task to handle WebSocket communication" ) ;
17861787 let mut request_context = request_context. clone ( ) ;
17871788 let req_headers = req_headers. clone ( ) ;
17881789 let req_path = req_path. clone ( ) ;
17891790 let req_host = req_host. clone ( ) ;
17901791
1791- // TODO: Handle errors here, the error message is lost
17921792 tokio:: spawn (
17931793 async move {
1794+ let request_id = Uuid :: new_v4 ( ) ;
17941795 let mut attempts = 0u32 ;
17951796
1796- let ws_handle = WebSocketHandle :: new ( client_ws) ;
1797+ let ws_handle = WebSocketHandle :: new ( client_ws)
1798+ . await
1799+ . context ( "failed initiating websocket handle" ) ?;
17971800
17981801 loop {
17991802 match handlers
@@ -1802,6 +1805,7 @@ impl ProxyService {
18021805 & req_headers,
18031806 & req_path,
18041807 & mut request_context,
1808+ request_id,
18051809 )
18061810 . await
18071811 {
@@ -1825,13 +1829,17 @@ impl ProxyService {
18251829 break ;
18261830 }
18271831 Err ( err) => {
1832+ tracing:: debug!( ?err, "websocket handler error" ) ;
1833+
18281834 attempts += 1 ;
18291835 if attempts > max_attempts || !is_retryable_ws_error ( & err) {
1836+ tracing:: debug!( ?attempts, "WebSocket failed to reconnect" ) ;
1837+
18301838 // Close WebSocket with error
18311839 ws_handle
1832- . accept_and_send ( to_hyper_close ( Some (
1833- err_to_close_frame ( err, ray_id) ,
1834- ) ) )
1840+ . send ( to_hyper_close ( Some ( err_to_close_frame (
1841+ err, ray_id,
1842+ ) ) ) )
18351843 . await ?;
18361844
18371845 // Flush to ensure close frame is sent
@@ -1846,6 +1854,13 @@ impl ProxyService {
18461854 attempts,
18471855 initial_interval,
18481856 ) ;
1857+ let backoff = Duration :: from_millis ( 100 ) ;
1858+
1859+ tracing:: debug!(
1860+ ?backoff,
1861+ "WebSocket attempt {attempts} failed (service unavailable)"
1862+ ) ;
1863+
18491864 tokio:: time:: sleep ( backoff) . await ;
18501865
18511866 match state
@@ -1864,11 +1879,9 @@ impl ProxyService {
18641879 }
18651880 Ok ( ResolveRouteOutput :: Response ( response) ) => {
18661881 ws_handle
1867- . accept_and_send ( to_hyper_close ( Some (
1868- str_to_close_frame (
1869- response. message . as_ref ( ) ,
1870- ) ,
1871- ) ) )
1882+ . send ( to_hyper_close ( Some ( str_to_close_frame (
1883+ response. message . as_ref ( ) ,
1884+ ) ) ) )
18721885 . await ?;
18731886
18741887 // Flush to ensure close frame is sent
@@ -1879,12 +1892,10 @@ impl ProxyService {
18791892 }
18801893 Ok ( ResolveRouteOutput :: Target ( _) ) => {
18811894 ws_handle
1882- . accept_and_send ( to_hyper_close ( Some (
1883- err_to_close_frame (
1884- errors:: WebSocketTargetChanged . build ( ) ,
1885- ray_id,
1886- ) ,
1887- ) ) )
1895+ . send ( to_hyper_close ( Some ( err_to_close_frame (
1896+ errors:: WebSocketTargetChanged . build ( ) ,
1897+ ray_id,
1898+ ) ) ) )
18881899 . await ?;
18891900
18901901 // Flush to ensure close frame is sent
@@ -1897,9 +1908,9 @@ impl ProxyService {
18971908 }
18981909 Err ( err) => {
18991910 ws_handle
1900- . accept_and_send ( to_hyper_close ( Some (
1901- err_to_close_frame ( err, ray_id) ,
1902- ) ) )
1911+ . send ( to_hyper_close ( Some ( err_to_close_frame (
1912+ err, ray_id,
1913+ ) ) ) )
19031914 . await ?;
19041915
19051916 // Flush to ensure close frame is sent
@@ -1947,13 +1958,17 @@ impl ProxyService {
19471958
19481959impl ProxyService {
19491960 // Process an individual request
1950- #[ tracing:: instrument( name = "guard_request" , skip_all) ]
1961+ #[ tracing:: instrument( name = "guard_request" , skip_all, fields ( ray_id , req_id ) ) ]
19511962 pub async fn process ( & self , mut req : Request < BodyIncoming > ) -> Result < Response < ResponseBody > > {
19521963 let start_time = Instant :: now ( ) ;
19531964
19541965 let request_ids = RequestIds :: new ( self . state . config . dc_label ( ) ) ;
19551966 req. extensions_mut ( ) . insert ( request_ids) ;
19561967
1968+ tracing:: Span :: current ( )
1969+ . record ( "req_id" , request_ids. req_id . to_string ( ) )
1970+ . record ( "ray_id" , request_ids. ray_id . to_string ( ) ) ;
1971+
19571972 // Create request context for analytics tracking
19581973 let mut request_context =
19591974 RequestContext :: new ( self . state . clickhouse_inserter . clone ( ) , request_ids) ;
@@ -2063,35 +2078,50 @@ impl ProxyService {
20632078
20642079 // If we receive an error during a websocket request, we attempt to open the websocket anyway
20652080 // so we can send the error via websocket instead of http. Most websocket clients don't handle
2066- // HTTP errors in a meaningful way for the user resulting in unhelpful errors
2081+ // HTTP errors in a meaningful way resulting in unhelpful errors for the user
20672082 if is_websocket {
20682083 tracing:: debug!( "Upgrading client connection to WebSocket for error proxy" ) ;
20692084 match hyper_tungstenite:: upgrade ( mock_req, None ) {
20702085 Ok ( ( client_response, client_ws) ) => {
20712086 tracing:: debug!( "Client WebSocket upgrade for error proxy successful" ) ;
20722087
2073- tokio:: spawn ( async move {
2074- let ws_handle = WebSocketHandle :: new ( client_ws) ;
2075- let frame = err_to_close_frame ( err, Some ( request_ids. ray_id ) ) ;
2088+ tokio:: spawn (
2089+ async move {
2090+ let ws_handle = match WebSocketHandle :: new ( client_ws) . await {
2091+ Ok ( ws_handle) => ws_handle,
2092+ Err ( err) => {
2093+ tracing:: debug!(
2094+ ?err,
2095+ "failed initiating websocket handle for error proxy"
2096+ ) ;
2097+ return ;
2098+ }
2099+ } ;
2100+ let frame = err_to_close_frame ( err, Some ( request_ids. ray_id ) ) ;
20762101
2077- // Manual conversion to handle different tungstenite versions
2078- let code_num: u16 = frame. code . into ( ) ;
2079- let reason = frame. reason . clone ( ) ;
2102+ // Manual conversion to handle different tungstenite versions
2103+ let code_num: u16 = frame. code . into ( ) ;
2104+ let reason = frame. reason . clone ( ) ;
20802105
2081- if let Err ( err) = ws_handle
2082- . accept_and_send (
2083- tokio_tungstenite:: tungstenite:: Message :: Close ( Some (
2106+ if let Err ( err) = ws_handle
2107+ . send ( tokio_tungstenite:: tungstenite:: Message :: Close ( Some (
20842108 tokio_tungstenite:: tungstenite:: protocol:: CloseFrame {
20852109 code : code_num. into ( ) ,
20862110 reason,
20872111 } ,
2088- ) ) ,
2089- )
2090- . await
2091- {
2092- tracing:: debug!( ?err, "failed sending error proxy" ) ;
2112+ ) ) )
2113+ . await
2114+ {
2115+ tracing:: debug!(
2116+ ?err,
2117+ "failed sending websocket error proxy"
2118+ ) ;
2119+ }
20932120 }
2094- } ) ;
2121+ . instrument (
2122+ tracing:: info_span!( "ws_error_proxy_task" , ?request_ids. ray_id) ,
2123+ ) ,
2124+ ) ;
20952125
20962126 // Return the response that will upgrade the client connection
20972127 // For proper WebSocket handshaking, we need to preserve the original response
0 commit comments