did some stuff. Proper error handling. speed statistics and stuff. read the code

This commit is contained in:
spv 2024-08-26 16:57:52 +02:00
parent 9426f6b855
commit d8632c9228
No known key found for this signature in database
GPG Key ID: 7638A987CE28ADFA
5 changed files with 153 additions and 10 deletions

130
Cargo.lock generated
View File

@ -26,6 +26,55 @@ dependencies = [
"memchr",
]
[[package]]
name = "anstream"
version = "0.6.15"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "64e15c1ab1f89faffbf04a634d5e1962e9074f2741eef6d97f3c4e322426d526"
dependencies = [
"anstyle",
"anstyle-parse",
"anstyle-query",
"anstyle-wincon",
"colorchoice",
"is_terminal_polyfill",
"utf8parse",
]
[[package]]
name = "anstyle"
version = "1.0.8"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "1bec1de6f59aedf83baf9ff929c98f2ad654b97c9510f4e70cf6f661d49fd5b1"
[[package]]
name = "anstyle-parse"
version = "0.2.5"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "eb47de1e80c2b463c735db5b217a0ddc39d612e7ac9e2e96a5aed1f57616c1cb"
dependencies = [
"utf8parse",
]
[[package]]
name = "anstyle-query"
version = "1.1.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "6d36fc52c7f6c869915e99412912f22093507da8d9e942ceaf66fe4b7c14422a"
dependencies = [
"windows-sys 0.52.0",
]
[[package]]
name = "anstyle-wincon"
version = "3.0.4"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "5bf74e1b6e971609db8ca7a9ce79fd5768ab6ae46441c572e46cf596f59e57f8"
dependencies = [
"anstyle",
"windows-sys 0.52.0",
]
[[package]]
name = "anyhow"
version = "1.0.86"
@ -104,6 +153,52 @@ version = "1.0.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "baf1de4339761588bc0619e3cbc0120ee582ebb74b53b4efbf79117bd2da40fd"
[[package]]
name = "clap"
version = "4.5.16"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "ed6719fffa43d0d87e5fd8caeab59be1554fb028cd30edc88fc4369b17971019"
dependencies = [
"clap_builder",
"clap_derive",
]
[[package]]
name = "clap_builder"
version = "4.5.15"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "216aec2b177652e3846684cbfe25c9964d18ec45234f0f5da5157b207ed1aab6"
dependencies = [
"anstream",
"anstyle",
"clap_lex",
"strsim",
]
[[package]]
name = "clap_derive"
version = "4.5.13"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "501d359d5f3dcaf6ecdeee48833ae73ec6e42723a1e52419c79abf9507eec0a0"
dependencies = [
"heck",
"proc-macro2",
"quote",
"syn",
]
[[package]]
name = "clap_lex"
version = "0.7.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "1462739cb27611015575c0c11df5df7601141071f07518d56fcc1be504cbec97"
[[package]]
name = "colorchoice"
version = "1.0.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "d3fd119d74b830634cea2a0f58bbd0d54540518a14397557951e79340abc28c0"
[[package]]
name = "cookie"
version = "0.16.2"
@ -330,6 +425,12 @@ version = "0.14.5"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "e5274423e17b7c9fc20b6e7e208532f9b19825d82dfd615708b70edd83df41f1"
[[package]]
name = "heck"
version = "0.5.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "2304e00983f87ffb38b55b444b5e3b60a884b5d30c0fca7d82fe33449bbe55ea"
[[package]]
name = "hermit-abi"
version = "0.3.9"
@ -486,6 +587,12 @@ version = "2.9.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "8f518f335dce6725a761382244631d86cf0ccb2863413590b31338feb467f9c3"
[[package]]
name = "is_terminal_polyfill"
version = "1.70.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "7943c866cc5cd64cbc25b2e01621d07fa8eb2a1a23160ee81ce38704e97b8ecf"
[[package]]
name = "itoa"
version = "1.0.11"
@ -942,18 +1049,18 @@ dependencies = [
[[package]]
name = "serde"
version = "1.0.208"
version = "1.0.209"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "cff085d2cb684faa248efb494c39b68e522822ac0de72ccf08109abde717cfb2"
checksum = "99fce0ffe7310761ca6bf9faf5115afbc19688edd00171d81b1bb1b116c63e09"
dependencies = [
"serde_derive",
]
[[package]]
name = "serde_derive"
version = "1.0.208"
version = "1.0.209"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "24008e81ff7613ed8e5ba0cfaf24e2c2f1e5b8a0495711e44fcd4882fca62bcf"
checksum = "a5831b979fd7b5439637af1752d535ff49f4860c0f341d1baeb6faf0f4242170"
dependencies = [
"proc-macro2",
"quote",
@ -1035,14 +1142,23 @@ name = "stackscraper"
version = "0.1.0"
dependencies = [
"anyhow",
"clap",
"fantoccini",
"lazy_static",
"log",
"regex",
"reqwest",
"serde",
"serde_json",
"tokio",
]
[[package]]
name = "strsim"
version = "0.11.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "7da8b5736845d9f2fcb837ea5d9e2628564b3b043a70948a3f0b778838c5fb4f"
[[package]]
name = "subtle"
version = "2.6.1"
@ -1328,6 +1444,12 @@ dependencies = [
"percent-encoding",
]
[[package]]
name = "utf8parse"
version = "0.2.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "06abde3611657adf66d383f00b093d7faecc7fa57071cce2578660c9f1010821"
[[package]]
name = "vcpkg"
version = "0.2.15"

View File

@ -5,9 +5,12 @@ edition = "2021"
[dependencies]
anyhow = "1.0.86"
clap = { version = "4.5.16", features = ["derive"] }
fantoccini = "0.21.1"
lazy_static = "1.5.0"
log = "0.4.22"
regex = "1.10.6"
reqwest = "0.12.7"
serde = { version = "1.0.209", features = ["derive"] }
serde_json = "1.0.127"
tokio = { version = "1.39.3", features = ["full"] }

1
answers.json Normal file

File diff suppressed because one or more lines are too long

View File

@ -1,5 +1,6 @@
use fantoccini::{Client, ClientBuilder, Locator};
use log::warn;
use serde::{Deserialize, Serialize};
macro_rules! skip_fail {
($res:expr) => {
@ -26,7 +27,7 @@ macro_rules! skip_fail_opt {
}
/// Holds data about stackoverflow answers
#[derive(Debug, Clone)]
#[derive(Debug, Serialize, Deserialize, Clone)]
pub struct Answer {
upvotes: u32,
author: String,
@ -43,7 +44,7 @@ pub async fn get_answers(c: &Client, url: &str, i: usize) -> anyhow::Result<Vec<
let answer_loc = c.find_all(Locator::Css(".answer")).await?;
let mut out_answers = vec![];
for (j, answer) in answer_loc.iter().enumerate() {
println!("Getting answer {} on page {}", j, i);
println!("Getting answer {} on link {}", j, i);
let text = skip_fail!(answer.text().await);
let score =
@ -51,11 +52,15 @@ pub async fn get_answers(c: &Client, url: &str, i: usize) -> anyhow::Result<Vec<
skip_fail_opt!(text.clone().split('\n').collect::<Vec<&str>>().get(0))
.parse::<u32>()
);
let content = text;
let content = text
.split("Share\nImprove this answer")
.collect::<Vec<&str>>()[0]
.to_string()
.replace("\\n", "\n");
out_answers.push(Answer {
upvotes: score,
content,
content: content,
author: "unimplemented".to_string(),
});
}

View File

@ -1,20 +1,30 @@
use fantoccini::{Client, ClientBuilder};
pub mod collector;
use clap::Parser;
use collector::*;
#[derive(Debug, Parser, Clone)]
pub struct Args {
#[clap(default_value_t = 5)]
pages: u16,
}
#[tokio::main]
async fn main() {
let args = Args::parse();
let start = std::time::Instant::now();
println!("Spawning client");
let c: Client = ClientBuilder::native()
.connect("http://localhost:4444")
.await
.expect("failed to connect to WebDriver");
println!("Getting links");
let links = get_top_links(&c, 1)
let links = get_top_links(&c, args.pages)
.await
.expect("Failed to get links. Exiting");
println!("Got {} links. Expected {}", links.len(), 5 * 15);
println!("Got {} links. Expected {}", links.len(), args.pages * 15);
println!("Getting answers");
let mut answers = vec![];
for (i, link) in links.iter().enumerate() {
@ -30,4 +40,6 @@ async fn main() {
start.elapsed().as_secs_f32()
);
c.close().await.unwrap();
println!("Writing answers to answers.json");
std::fs::write("answers.json", serde_json::to_string(&answers).unwrap());
}