did some stuff. Proper error handling. speed statistics and stuff. read the code
This commit is contained in:
parent
9426f6b855
commit
d8632c9228
130
Cargo.lock
generated
130
Cargo.lock
generated
@ -26,6 +26,55 @@ dependencies = [
|
||||
"memchr",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "anstream"
|
||||
version = "0.6.15"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "64e15c1ab1f89faffbf04a634d5e1962e9074f2741eef6d97f3c4e322426d526"
|
||||
dependencies = [
|
||||
"anstyle",
|
||||
"anstyle-parse",
|
||||
"anstyle-query",
|
||||
"anstyle-wincon",
|
||||
"colorchoice",
|
||||
"is_terminal_polyfill",
|
||||
"utf8parse",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "anstyle"
|
||||
version = "1.0.8"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "1bec1de6f59aedf83baf9ff929c98f2ad654b97c9510f4e70cf6f661d49fd5b1"
|
||||
|
||||
[[package]]
|
||||
name = "anstyle-parse"
|
||||
version = "0.2.5"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "eb47de1e80c2b463c735db5b217a0ddc39d612e7ac9e2e96a5aed1f57616c1cb"
|
||||
dependencies = [
|
||||
"utf8parse",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "anstyle-query"
|
||||
version = "1.1.1"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "6d36fc52c7f6c869915e99412912f22093507da8d9e942ceaf66fe4b7c14422a"
|
||||
dependencies = [
|
||||
"windows-sys 0.52.0",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "anstyle-wincon"
|
||||
version = "3.0.4"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "5bf74e1b6e971609db8ca7a9ce79fd5768ab6ae46441c572e46cf596f59e57f8"
|
||||
dependencies = [
|
||||
"anstyle",
|
||||
"windows-sys 0.52.0",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "anyhow"
|
||||
version = "1.0.86"
|
||||
@ -104,6 +153,52 @@ version = "1.0.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "baf1de4339761588bc0619e3cbc0120ee582ebb74b53b4efbf79117bd2da40fd"
|
||||
|
||||
[[package]]
|
||||
name = "clap"
|
||||
version = "4.5.16"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "ed6719fffa43d0d87e5fd8caeab59be1554fb028cd30edc88fc4369b17971019"
|
||||
dependencies = [
|
||||
"clap_builder",
|
||||
"clap_derive",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "clap_builder"
|
||||
version = "4.5.15"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "216aec2b177652e3846684cbfe25c9964d18ec45234f0f5da5157b207ed1aab6"
|
||||
dependencies = [
|
||||
"anstream",
|
||||
"anstyle",
|
||||
"clap_lex",
|
||||
"strsim",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "clap_derive"
|
||||
version = "4.5.13"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "501d359d5f3dcaf6ecdeee48833ae73ec6e42723a1e52419c79abf9507eec0a0"
|
||||
dependencies = [
|
||||
"heck",
|
||||
"proc-macro2",
|
||||
"quote",
|
||||
"syn",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "clap_lex"
|
||||
version = "0.7.2"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "1462739cb27611015575c0c11df5df7601141071f07518d56fcc1be504cbec97"
|
||||
|
||||
[[package]]
|
||||
name = "colorchoice"
|
||||
version = "1.0.2"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "d3fd119d74b830634cea2a0f58bbd0d54540518a14397557951e79340abc28c0"
|
||||
|
||||
[[package]]
|
||||
name = "cookie"
|
||||
version = "0.16.2"
|
||||
@ -330,6 +425,12 @@ version = "0.14.5"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "e5274423e17b7c9fc20b6e7e208532f9b19825d82dfd615708b70edd83df41f1"
|
||||
|
||||
[[package]]
|
||||
name = "heck"
|
||||
version = "0.5.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "2304e00983f87ffb38b55b444b5e3b60a884b5d30c0fca7d82fe33449bbe55ea"
|
||||
|
||||
[[package]]
|
||||
name = "hermit-abi"
|
||||
version = "0.3.9"
|
||||
@ -486,6 +587,12 @@ version = "2.9.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "8f518f335dce6725a761382244631d86cf0ccb2863413590b31338feb467f9c3"
|
||||
|
||||
[[package]]
|
||||
name = "is_terminal_polyfill"
|
||||
version = "1.70.1"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "7943c866cc5cd64cbc25b2e01621d07fa8eb2a1a23160ee81ce38704e97b8ecf"
|
||||
|
||||
[[package]]
|
||||
name = "itoa"
|
||||
version = "1.0.11"
|
||||
@ -942,18 +1049,18 @@ dependencies = [
|
||||
|
||||
[[package]]
|
||||
name = "serde"
|
||||
version = "1.0.208"
|
||||
version = "1.0.209"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "cff085d2cb684faa248efb494c39b68e522822ac0de72ccf08109abde717cfb2"
|
||||
checksum = "99fce0ffe7310761ca6bf9faf5115afbc19688edd00171d81b1bb1b116c63e09"
|
||||
dependencies = [
|
||||
"serde_derive",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "serde_derive"
|
||||
version = "1.0.208"
|
||||
version = "1.0.209"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "24008e81ff7613ed8e5ba0cfaf24e2c2f1e5b8a0495711e44fcd4882fca62bcf"
|
||||
checksum = "a5831b979fd7b5439637af1752d535ff49f4860c0f341d1baeb6faf0f4242170"
|
||||
dependencies = [
|
||||
"proc-macro2",
|
||||
"quote",
|
||||
@ -1035,14 +1142,23 @@ name = "stackscraper"
|
||||
version = "0.1.0"
|
||||
dependencies = [
|
||||
"anyhow",
|
||||
"clap",
|
||||
"fantoccini",
|
||||
"lazy_static",
|
||||
"log",
|
||||
"regex",
|
||||
"reqwest",
|
||||
"serde",
|
||||
"serde_json",
|
||||
"tokio",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "strsim"
|
||||
version = "0.11.1"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "7da8b5736845d9f2fcb837ea5d9e2628564b3b043a70948a3f0b778838c5fb4f"
|
||||
|
||||
[[package]]
|
||||
name = "subtle"
|
||||
version = "2.6.1"
|
||||
@ -1328,6 +1444,12 @@ dependencies = [
|
||||
"percent-encoding",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "utf8parse"
|
||||
version = "0.2.2"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "06abde3611657adf66d383f00b093d7faecc7fa57071cce2578660c9f1010821"
|
||||
|
||||
[[package]]
|
||||
name = "vcpkg"
|
||||
version = "0.2.15"
|
||||
|
@ -5,9 +5,12 @@ edition = "2021"
|
||||
|
||||
[dependencies]
|
||||
anyhow = "1.0.86"
|
||||
clap = { version = "4.5.16", features = ["derive"] }
|
||||
fantoccini = "0.21.1"
|
||||
lazy_static = "1.5.0"
|
||||
log = "0.4.22"
|
||||
regex = "1.10.6"
|
||||
reqwest = "0.12.7"
|
||||
serde = { version = "1.0.209", features = ["derive"] }
|
||||
serde_json = "1.0.127"
|
||||
tokio = { version = "1.39.3", features = ["full"] }
|
||||
|
1
answers.json
Normal file
1
answers.json
Normal file
File diff suppressed because one or more lines are too long
@ -1,5 +1,6 @@
|
||||
use fantoccini::{Client, ClientBuilder, Locator};
|
||||
use log::warn;
|
||||
use serde::{Deserialize, Serialize};
|
||||
|
||||
macro_rules! skip_fail {
|
||||
($res:expr) => {
|
||||
@ -26,7 +27,7 @@ macro_rules! skip_fail_opt {
|
||||
}
|
||||
|
||||
/// Holds data about stackoverflow answers
|
||||
#[derive(Debug, Clone)]
|
||||
#[derive(Debug, Serialize, Deserialize, Clone)]
|
||||
pub struct Answer {
|
||||
upvotes: u32,
|
||||
author: String,
|
||||
@ -43,7 +44,7 @@ pub async fn get_answers(c: &Client, url: &str, i: usize) -> anyhow::Result<Vec<
|
||||
let answer_loc = c.find_all(Locator::Css(".answer")).await?;
|
||||
let mut out_answers = vec![];
|
||||
for (j, answer) in answer_loc.iter().enumerate() {
|
||||
println!("Getting answer {} on page {}", j, i);
|
||||
println!("Getting answer {} on link {}", j, i);
|
||||
let text = skip_fail!(answer.text().await);
|
||||
|
||||
let score =
|
||||
@ -51,11 +52,15 @@ pub async fn get_answers(c: &Client, url: &str, i: usize) -> anyhow::Result<Vec<
|
||||
skip_fail_opt!(text.clone().split('\n').collect::<Vec<&str>>().get(0))
|
||||
.parse::<u32>()
|
||||
);
|
||||
let content = text;
|
||||
let content = text
|
||||
.split("Share\nImprove this answer")
|
||||
.collect::<Vec<&str>>()[0]
|
||||
.to_string()
|
||||
.replace("\\n", "\n");
|
||||
|
||||
out_answers.push(Answer {
|
||||
upvotes: score,
|
||||
content,
|
||||
content: content,
|
||||
author: "unimplemented".to_string(),
|
||||
});
|
||||
}
|
||||
|
16
src/main.rs
16
src/main.rs
@ -1,20 +1,30 @@
|
||||
use fantoccini::{Client, ClientBuilder};
|
||||
pub mod collector;
|
||||
use clap::Parser;
|
||||
use collector::*;
|
||||
|
||||
#[derive(Debug, Parser, Clone)]
|
||||
pub struct Args {
|
||||
#[clap(default_value_t = 5)]
|
||||
pages: u16,
|
||||
}
|
||||
|
||||
#[tokio::main]
|
||||
async fn main() {
|
||||
let args = Args::parse();
|
||||
|
||||
let start = std::time::Instant::now();
|
||||
println!("Spawning client");
|
||||
let c: Client = ClientBuilder::native()
|
||||
.connect("http://localhost:4444")
|
||||
.await
|
||||
.expect("failed to connect to WebDriver");
|
||||
|
||||
println!("Getting links");
|
||||
let links = get_top_links(&c, 1)
|
||||
let links = get_top_links(&c, args.pages)
|
||||
.await
|
||||
.expect("Failed to get links. Exiting");
|
||||
println!("Got {} links. Expected {}", links.len(), 5 * 15);
|
||||
println!("Got {} links. Expected {}", links.len(), args.pages * 15);
|
||||
println!("Getting answers");
|
||||
let mut answers = vec![];
|
||||
for (i, link) in links.iter().enumerate() {
|
||||
@ -30,4 +40,6 @@ async fn main() {
|
||||
start.elapsed().as_secs_f32()
|
||||
);
|
||||
c.close().await.unwrap();
|
||||
println!("Writing answers to answers.json");
|
||||
std::fs::write("answers.json", serde_json::to_string(&answers).unwrap());
|
||||
}
|
||||
|
Loading…
x
Reference in New Issue
Block a user