Skip to content

Commit

Permalink
chore(chrome): add streaming ws
Browse files Browse the repository at this point in the history
  • Loading branch information
j-mendez committed Dec 11, 2024
1 parent 88a3f83 commit f645643
Show file tree
Hide file tree
Showing 11 changed files with 45 additions and 64 deletions.
42 changes: 20 additions & 22 deletions Cargo.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

16 changes: 3 additions & 13 deletions examples/real_world.rs
Original file line number Diff line number Diff line change
Expand Up @@ -7,13 +7,12 @@ use spider::website::Website;
use spider::{
configuration::WaitForIdleNetwork, features::chrome_common::RequestInterceptConfiguration,
};
use spider_utils::spider_transformations::transformation::content::{
transform_content, ReturnFormat, TransformConfig,
};
use std::io::Result;
use std::time::Duration;

async fn crawl_website(url: &str) -> Result<()> {
let mut stdout = tokio::io::stdout();

let mut website: Website = Website::new(url)
.with_limit(1)
.with_chrome_intercept(RequestInterceptConfiguration::new(true))
Expand All @@ -24,15 +23,12 @@ async fn crawl_website(url: &str) -> Result<()> {
.with_stealth(true)
.with_return_page_links(true)
.with_fingerprint(true)
.with_proxies(Some(vec!["http://localhost:8888".into()]))
// .with_proxies(Some(vec!["http://localhost:8888".into()]))
.with_chrome_connection(Some("http://127.0.0.1:9222/json/version".into()))
.build()
.unwrap();

let mut rx2 = website.subscribe(16).unwrap();
let mut stdout = tokio::io::stdout();
let mut conf = TransformConfig::default();
conf.return_format = ReturnFormat::Markdown;

tokio::spawn(async move {
while let Ok(page) = rx2.recv().await {
Expand All @@ -51,12 +47,6 @@ async fn crawl_website(url: &str) -> Result<()> {
.as_bytes(),
)
.await;

let markup = transform_content(&page, &conf, &None, &None, &None);

let _ = stdout
.write_all(format!("- {}\n {}\n", page.get_url(), markup).as_bytes())
.await;
}
});

Expand Down
4 changes: 2 additions & 2 deletions spider/Cargo.toml
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
[package]
name = "spider"
version = "2.21.8"
version = "2.21.9"
authors = [
"j-mendez <[email protected]>"
]
Expand Down Expand Up @@ -108,8 +108,8 @@ version = "2"
path = "../spider_chrome"
optional = true
features = [
"tokio-runtime",
"bytes",
"stream"
]

[features]
Expand Down
15 changes: 5 additions & 10 deletions spider_chrome/Cargo.toml
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
[package]
name = "spider_chrome"
version = "2.21.8"
version = "2.21.9"
rust-version = "1.70"
authors = [
"j-mendez <[email protected]>"
Expand All @@ -18,7 +18,7 @@ categories = ["web-programming", "api-bindings", "development-tools::testing"]
name = "chromiumoxide"

[dependencies]
async-tungstenite = "0.28"
tokio-tungstenite = "0.24"
serde = { version = "1", features = ["derive"] }
futures = "0.3"
chromiumoxide_types = { version = "0.7" }
Expand Down Expand Up @@ -63,8 +63,8 @@ tracing-subscriber = "0.3"
tokio = { version = "1", features = ["rt-multi-thread", "time", "macros"] }

[features]
default = ["tokio-runtime", "bytes"]
tokio-runtime = ["async-tungstenite/tokio-runtime"]
default = ["bytes"]
stream = ["tokio-tungstenite/stream"]
fetcher = []
bytes = ["dep:bytes"]
serde0 = []
Expand All @@ -77,24 +77,19 @@ _fetcher-native-tokio = ["fetcher", "chromiumoxide_fetcher/_native-tokio"]

[[example]]
name = "wiki-tokio"
required-features = ["tokio-runtime"]

[[example]]
name = "iframe-workaround"
required-features = ["tokio-runtime", "tokio"]

[[example]]
name = "storage-cookie"
required-features = ["tokio-runtime"]

[[example]]
name = "console-logs"
required-features = ["tokio-runtime"]

[[example]]
name = "httpfuture"
required-features = ["tokio-runtime"]

[[example]]
name = "fetcher-tokio"
required-features = ["tokio-runtime", "_fetcher-native-tokio"]
required-features = ["_fetcher-native-tokio"]
4 changes: 1 addition & 3 deletions spider_chrome/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -75,10 +75,8 @@ Use `chromiumoxide` with the `async-std` runtime:
chromiumoxide = { git = "https://github.com/mattsse/chromiumoxide", branch = "main"}
```

To use the `tokio` runtime instead add `features = ["tokio-runtime"]` and set `default-features = false` to disable the default runtime (`async-std`):

```toml
chromiumoxide = { git = "https://github.com/mattsse/chromiumoxide", features = ["tokio-runtime"], default-features = false, branch = "main"}
chromiumoxide = { git = "https://github.com/mattsse/chromiumoxide", default-features = false, branch = "main"}
```

This configuration is made possible primarily by the websocket crate of choice: [`async-tungstenite`](https://github.com/sdroege/async-tungstenite).
Expand Down
16 changes: 8 additions & 8 deletions spider_chrome/src/conn.rs
Original file line number Diff line number Diff line change
Expand Up @@ -3,18 +3,20 @@ use std::marker::PhantomData;
use std::pin::Pin;
use std::task::ready;

use async_tungstenite::tungstenite::Message as WsMessage;
use async_tungstenite::{tungstenite::protocol::WebSocketConfig, WebSocketStream};
use futures::stream::Stream;
use futures::task::{Context, Poll};
use futures::{SinkExt, StreamExt};
use tokio_tungstenite::tungstenite::Message as WsMessage;
use tokio_tungstenite::MaybeTlsStream;
use tokio_tungstenite::{tungstenite::protocol::WebSocketConfig, WebSocketStream};

use chromiumoxide_cdp::cdp::browser_protocol::target::SessionId;
use chromiumoxide_types::{CallId, EventMessage, Message, MethodCall, MethodId};

use crate::error::CdpError;
use crate::error::Result;
use async_tungstenite::tokio::ConnectStream;

type ConnectStream = MaybeTlsStream<tokio::net::TcpStream>;

/// Exchanges the messages with the websocket
#[must_use = "streams do nothing unless polled"]
Expand All @@ -40,11 +42,9 @@ impl<T: EventMessage + Unpin> Connection<T> {
..Default::default()
};

let (ws, _) = async_tungstenite::tokio::connect_async_with_config(
debug_ws_url.as_ref(),
Some(config),
)
.await?;
let (ws, _) =
tokio_tungstenite::connect_async_with_config(debug_ws_url.as_ref(), Some(config), true)
.await?;

Ok(Self {
pending_commands: Default::default(),
Expand Down
4 changes: 2 additions & 2 deletions spider_chrome/src/error.rs
Original file line number Diff line number Diff line change
Expand Up @@ -3,12 +3,12 @@ use std::io;
use std::process::ExitStatus;
use std::time::Instant;

use async_tungstenite::tungstenite;
use async_tungstenite::tungstenite::Message;
use base64::DecodeError;
use futures::channel::mpsc::SendError;
use futures::channel::oneshot::Canceled;
use thiserror::Error;
use tokio_tungstenite::tungstenite;
use tokio_tungstenite::tungstenite::Message;

use chromiumoxide_cdp::cdp::browser_protocol::page::FrameId;

Expand Down
2 changes: 1 addition & 1 deletion spider_cli/Cargo.toml
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
[package]
name = "spider_cli"
version = "2.21.8"
version = "2.21.9"
authors = [
"j-mendez <[email protected]>"
]
Expand Down
2 changes: 1 addition & 1 deletion spider_transformations/Cargo.toml
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
[package]
name = "spider_transformations"
version = "2.21.8"
version = "2.21.9"
authors = [
"j-mendez <[email protected]>"
]
Expand Down
2 changes: 1 addition & 1 deletion spider_utils/Cargo.toml
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
[package]
name = "spider_utils"
version = "2.21.8"
version = "2.21.9"
authors = [
"j-mendez <[email protected]>"
]
Expand Down
2 changes: 1 addition & 1 deletion spider_worker/Cargo.toml
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
[package]
name = "spider_worker"
version = "2.21.8"
version = "2.21.9"
authors = [
"j-mendez <[email protected]>"
]
Expand Down

0 comments on commit f645643

Please sign in to comment.