better reconnection better tracing
This commit is contained in:
		
							parent
							
								
									aefb726109
								
							
						
					
					
						commit
						642abf6aa5
					
				
					 2 changed files with 73 additions and 43 deletions
				
			
		
							
								
								
									
										114
									
								
								src/indexer.rs
									
									
									
									
									
								
							
							
						
						
									
										114
									
								
								src/indexer.rs
									
									
									
									
									
								
							|  | @ -1,6 +1,6 @@ | ||||||
| use std::{io::Cursor, sync::Arc}; | use std::{io::Cursor, sync::Arc, time::Duration}; | ||||||
| 
 | 
 | ||||||
| use anyhow::{anyhow, bail, Context, Result}; | use anyhow::{bail, Result}; | ||||||
| use atrium_api::com::atproto::sync::subscribe_repos; | use atrium_api::com::atproto::sync::subscribe_repos; | ||||||
| use bytes::Bytes; | use bytes::Bytes; | ||||||
| use fastwebsockets::{FragmentCollector, OpCode, Payload, WebSocketError}; | use fastwebsockets::{FragmentCollector, OpCode, Payload, WebSocketError}; | ||||||
|  | @ -8,14 +8,8 @@ use hyper::{header, upgrade::Upgraded, Request, Uri}; | ||||||
| use hyper_util::rt::{TokioExecutor, TokioIo}; | use hyper_util::rt::{TokioExecutor, TokioIo}; | ||||||
| use ipld_core::ipld::Ipld; | use ipld_core::ipld::Ipld; | ||||||
| use serde_ipld_dagcbor::DecodeError; | use serde_ipld_dagcbor::DecodeError; | ||||||
| use tokio::{ | use tokio::{net::TcpStream, sync::mpsc}; | ||||||
|     net::TcpStream, | use tokio_rustls::rustls::pki_types::ServerName; | ||||||
|     sync::{ |  | ||||||
|         broadcast::{self}, |  | ||||||
|         mpsc, |  | ||||||
|     }, |  | ||||||
| }; |  | ||||||
| use tokio_rustls::{rustls::pki_types::ServerName, TlsConnector}; |  | ||||||
| 
 | 
 | ||||||
| use crate::{ | use crate::{ | ||||||
|     http::body_empty, |     http::body_empty, | ||||||
|  | @ -60,22 +54,46 @@ async fn create_ws_client( | ||||||
| struct DataServerSubscription { | struct DataServerSubscription { | ||||||
|     server: Arc<RelayServer>, |     server: Arc<RelayServer>, | ||||||
|     host: String, |     host: String, | ||||||
|     raw_block_tx: broadcast::Sender<Bytes>, |  | ||||||
|     event_tx: mpsc::Sender<StreamEvent>, |     event_tx: mpsc::Sender<StreamEvent>, | ||||||
|     last_seq: Option<i128>, |     last_seq: Option<i64>, | ||||||
| } | } | ||||||
| 
 | 
 | ||||||
| impl DataServerSubscription { | impl DataServerSubscription { | ||||||
|     fn new(server: Arc<RelayServer>, host: String) -> Self { |     fn new(server: Arc<RelayServer>, host: String) -> Self { | ||||||
|         Self { |         Self { | ||||||
|             host, |             host, | ||||||
|             raw_block_tx: server.raw_block_tx.clone(), |  | ||||||
|             event_tx: server.event_tx.clone(), |             event_tx: server.event_tx.clone(), | ||||||
|             last_seq: None, |             last_seq: None, | ||||||
|             server, |             server, | ||||||
|         } |         } | ||||||
|     } |     } | ||||||
| 
 | 
 | ||||||
|  |     async fn handle_commit( | ||||||
|  |         &mut self, | ||||||
|  |         payload: subscribe_repos::Commit, | ||||||
|  |     ) -> Result<Option<StreamEventPayload>> { | ||||||
|  |         let user = lookup_user(&self.server, &payload.repo).await?; | ||||||
|  |         let Some(pds) = user.pds else { | ||||||
|  |             bail!("user has no associated pds? {:?}", user); | ||||||
|  |         }; | ||||||
|  |         let uri: Uri = pds.parse()?; | ||||||
|  |         if uri.authority().map(|a| a.host()) != Some(&self.host) { | ||||||
|  |             bail!( | ||||||
|  |                 "commit from non-authoritative pds (got {} expected {})", | ||||||
|  |                 self.host, | ||||||
|  |                 pds | ||||||
|  |             ); | ||||||
|  |         } | ||||||
|  | 
 | ||||||
|  |         if user.takedown { | ||||||
|  |             tracing::debug!(did = %user.did, seq = %payload.seq, "dropping commit event from taken-down user"); | ||||||
|  |             return Ok(None); | ||||||
|  |         } | ||||||
|  | 
 | ||||||
|  |         self.last_seq = Some(payload.seq); | ||||||
|  |         Ok(Some(StreamEventPayload::Commit(payload))) | ||||||
|  |     } | ||||||
|  | 
 | ||||||
|     async fn handle_event(&mut self, frame: Bytes) -> Result<()> { |     async fn handle_event(&mut self, frame: Bytes) -> Result<()> { | ||||||
|         // TODO: validate if this message is valid to come from this host
 |         // TODO: validate if this message is valid to come from this host
 | ||||||
| 
 | 
 | ||||||
|  | @ -92,52 +110,60 @@ impl DataServerSubscription { | ||||||
|             Some("#commit") => { |             Some("#commit") => { | ||||||
|                 let payload = |                 let payload = | ||||||
|                     serde_ipld_dagcbor::from_slice::<subscribe_repos::Commit>(payload_buf)?; |                     serde_ipld_dagcbor::from_slice::<subscribe_repos::Commit>(payload_buf)?; | ||||||
| 
 |                 self.handle_commit(payload).await? | ||||||
|                 let user = lookup_user(&self.server, &payload.repo).await?; |  | ||||||
|                 let Some(pds) = user.pds else { |  | ||||||
|                     bail!("user has no associated pds? {}", user.did); |  | ||||||
|                 }; |  | ||||||
|                 let uri: Uri = pds.parse()?; |  | ||||||
|                 if uri.authority().map(|a| a.host()) != Some(&self.host) { |  | ||||||
|                     bail!( |  | ||||||
|                         "commit from non-authoritative pds (got {} expected {})", |  | ||||||
|                         self.host, |  | ||||||
|                         pds |  | ||||||
|                     ); |  | ||||||
|             } |             } | ||||||
| 
 | 
 | ||||||
|                 StreamEventPayload::Commit(payload) |             Some("#handle") => { | ||||||
|  |                 // TODO
 | ||||||
|  |                 None | ||||||
|             } |             } | ||||||
|  | 
 | ||||||
|  |             Some("#info") => { | ||||||
|  |                 let payload = serde_ipld_dagcbor::from_slice::<subscribe_repos::Info>(payload_buf)?; | ||||||
|  |                 if payload.name == "OutdatedCursor" { | ||||||
|  |                     tracing::warn!(message = ?payload.message, "outdated cursor"); | ||||||
|  |                 } | ||||||
|  | 
 | ||||||
|  |                 None | ||||||
|  |             } | ||||||
|  | 
 | ||||||
|             Some(t) => { |             Some(t) => { | ||||||
|                 tracing::warn!("dropped unknown message type '{}'", t); |                 tracing::warn!("dropped unknown message type '{}'", t); | ||||||
|                 return Ok(()); |                 None | ||||||
|             } |  | ||||||
|             None => { |  | ||||||
|                 return Ok(()); |  | ||||||
|                 // skip ig
 |  | ||||||
|             } |             } | ||||||
|  |             None => None, | ||||||
|         }; |         }; | ||||||
| 
 | 
 | ||||||
|  |         if let Some(payload) = payload { | ||||||
|             self.event_tx.send((header, payload)).await?; |             self.event_tx.send((header, payload)).await?; | ||||||
|  |         } | ||||||
| 
 | 
 | ||||||
|         Ok(()) |         Ok(()) | ||||||
|     } |     } | ||||||
| } | } | ||||||
| 
 | 
 | ||||||
| pub async fn subscribe_to_host(server: Arc<RelayServer>, host: String) -> Result<()> { | #[tracing::instrument(skip_all, fields(host = %host))] | ||||||
|     tracing::debug!(%host, "establishing connection"); | async fn host_subscription(server: Arc<RelayServer>, host: String) -> Result<()> { | ||||||
|  |     tracing::debug!("establishing connection"); | ||||||
| 
 | 
 | ||||||
|     let mut subscription = DataServerSubscription::new(server, host); |     let mut subscription = DataServerSubscription::new(server, host); | ||||||
| 
 | 
 | ||||||
|  |     // TODO: load seq from db ?
 | ||||||
|  | 
 | ||||||
|     'reconnect: loop { |     'reconnect: loop { | ||||||
|         let mut ws = create_ws_client( |         let mut ws = create_ws_client( | ||||||
|             &subscription.host, |             &subscription.host, | ||||||
|             443, |             443, | ||||||
|             "/xrpc/com.atproto.sync.subscribeRepos", |             &format!( | ||||||
|  |                 "/xrpc/com.atproto.sync.subscribeRepos{}", | ||||||
|  |                 subscription | ||||||
|  |                     .last_seq | ||||||
|  |                     .map(|c| format!("?cursor={c}")) | ||||||
|  |                     .unwrap_or_default() | ||||||
|  |             ), | ||||||
|         ) |         ) | ||||||
|         .await?; |         .await?; | ||||||
| 
 |         tracing::debug!(seq = ?subscription.last_seq, "listening"); | ||||||
|         tracing::debug!(host = %subscription.host, "listening"); |  | ||||||
| 
 | 
 | ||||||
|         loop { |         loop { | ||||||
|             match ws.read_frame().await { |             match ws.read_frame().await { | ||||||
|  | @ -153,15 +179,19 @@ pub async fn subscribe_to_host(server: Arc<RelayServer>, host: String) -> Result | ||||||
|                         tracing::error!("error handling event (skipping): {e:?}"); |                         tracing::error!("error handling event (skipping): {e:?}"); | ||||||
|                     } |                     } | ||||||
|                 } |                 } | ||||||
|  |                 Ok(frame) if frame.opcode == OpCode::Close => { | ||||||
|  |                     tracing::debug!("got close frame. reconnecting in 10s"); | ||||||
|  |                     tokio::time::sleep(Duration::from_secs(10)).await; | ||||||
|  |                     continue 'reconnect; | ||||||
|  |                 } | ||||||
|                 Ok(frame) => { |                 Ok(frame) => { | ||||||
|                     tracing::warn!("unexpected frame type {:?}", frame.opcode); |                     tracing::warn!("unexpected frame type {:?}", frame.opcode); | ||||||
|                 } |                 } | ||||||
|                 Err(e) => { |                 Err(e) => { | ||||||
|                     tracing::error!(host = %subscription.host, "{e:?}"); |                     tracing::error!("{e:?}"); | ||||||
|                     // TODO: should we try reconnect in every situation?
 |                     // TODO: should we try to reconnect in every situation?
 | ||||||
|                     if let WebSocketError::UnexpectedEOF = e { |                     if let WebSocketError::UnexpectedEOF = e { | ||||||
|                         tracing::debug!(host = %subscription.host, "reconnecting"); |                         tracing::debug!("got unexpected EOF. reconnecting immediately"); | ||||||
|                         // TODO: should we sleep at all here
 |  | ||||||
|                         continue 'reconnect; |                         continue 'reconnect; | ||||||
|                     } else { |                     } else { | ||||||
|                         break 'reconnect; |                         break 'reconnect; | ||||||
|  | @ -171,7 +201,7 @@ pub async fn subscribe_to_host(server: Arc<RelayServer>, host: String) -> Result | ||||||
|         } |         } | ||||||
|     } |     } | ||||||
| 
 | 
 | ||||||
|     tracing::debug!(host = %subscription.host, "disconnected"); |     tracing::debug!("disconnected"); | ||||||
| 
 | 
 | ||||||
|     Ok(()) |     Ok(()) | ||||||
| } | } | ||||||
|  | @ -183,7 +213,7 @@ pub fn index_servers(server: Arc<RelayServer>, hosts: &[String]) { | ||||||
|         let host = host.to_string(); |         let host = host.to_string(); | ||||||
|         let server = Arc::clone(&server); |         let server = Arc::clone(&server); | ||||||
|         tokio::task::spawn(async move { |         tokio::task::spawn(async move { | ||||||
|             if let Err(e) = subscribe_to_host(server, host).await { |             if let Err(e) = host_subscription(server, host).await { | ||||||
|                 tracing::warn!("encountered error subscribing to PDS: {e:?}"); |                 tracing::warn!("encountered error subscribing to PDS: {e:?}"); | ||||||
|             } |             } | ||||||
|         }); |         }); | ||||||
|  |  | ||||||
|  | @ -14,7 +14,7 @@ use crate::{ | ||||||
|     RelayServer, |     RelayServer, | ||||||
| }; | }; | ||||||
| 
 | 
 | ||||||
| #[derive(Serialize, Deserialize)] | #[derive(Serialize, Deserialize, Debug, Clone)] | ||||||
| pub struct User { | pub struct User { | ||||||
|     pub did: String, |     pub did: String, | ||||||
|     pub pds: Option<String>, |     pub pds: Option<String>, | ||||||
|  |  | ||||||
		Loading…
	
		Reference in a new issue