added some logs. Not using a real logger. fuck you
This commit is contained in:
parent
55c7bba09d
commit
9426f6b855
1
Cargo.lock
generated
1
Cargo.lock
generated
@ -1037,6 +1037,7 @@ dependencies = [
|
|||||||
"anyhow",
|
"anyhow",
|
||||||
"fantoccini",
|
"fantoccini",
|
||||||
"lazy_static",
|
"lazy_static",
|
||||||
|
"log",
|
||||||
"regex",
|
"regex",
|
||||||
"reqwest",
|
"reqwest",
|
||||||
"tokio",
|
"tokio",
|
||||||
|
@ -7,6 +7,7 @@ edition = "2021"
|
|||||||
anyhow = "1.0.86"
|
anyhow = "1.0.86"
|
||||||
fantoccini = "0.21.1"
|
fantoccini = "0.21.1"
|
||||||
lazy_static = "1.5.0"
|
lazy_static = "1.5.0"
|
||||||
|
log = "0.4.22"
|
||||||
regex = "1.10.6"
|
regex = "1.10.6"
|
||||||
reqwest = "0.12.7"
|
reqwest = "0.12.7"
|
||||||
tokio = { version = "1.39.3", features = ["full"] }
|
tokio = { version = "1.39.3", features = ["full"] }
|
||||||
|
89
src/collector.rs
Normal file
89
src/collector.rs
Normal file
@ -0,0 +1,89 @@
|
|||||||
|
use fantoccini::{Client, ClientBuilder, Locator};
|
||||||
|
use log::warn;
|
||||||
|
|
||||||
|
macro_rules! skip_fail {
|
||||||
|
($res:expr) => {
|
||||||
|
match $res {
|
||||||
|
Ok(val) => val,
|
||||||
|
Err(e) => {
|
||||||
|
warn!("An error: {}; skipped.", e);
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
};
|
||||||
|
}
|
||||||
|
|
||||||
|
macro_rules! skip_fail_opt {
|
||||||
|
($res:expr) => {
|
||||||
|
match $res {
|
||||||
|
Some(val) => val,
|
||||||
|
None => {
|
||||||
|
warn!("Unexpected empty value; skipped.");
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
};
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Holds data about stackoverflow answers
|
||||||
|
#[derive(Debug, Clone)]
|
||||||
|
pub struct Answer {
|
||||||
|
upvotes: u32,
|
||||||
|
author: String,
|
||||||
|
content: String,
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Get all answers from a stackoverflow domain. No error handling is done so get ready to either
|
||||||
|
/// check your input or "note: run with `RUST_BACKTRACE=1` environment variable to display a
|
||||||
|
/// backtrace"
|
||||||
|
pub async fn get_answers(c: &Client, url: &str, i: usize) -> anyhow::Result<Vec<Answer>> {
|
||||||
|
// first, go to the Wikipedia page for Foobar
|
||||||
|
c.goto(url).await?;
|
||||||
|
|
||||||
|
let answer_loc = c.find_all(Locator::Css(".answer")).await?;
|
||||||
|
let mut out_answers = vec![];
|
||||||
|
for (j, answer) in answer_loc.iter().enumerate() {
|
||||||
|
println!("Getting answer {} on page {}", j, i);
|
||||||
|
let text = skip_fail!(answer.text().await);
|
||||||
|
|
||||||
|
let score =
|
||||||
|
skip_fail!(
|
||||||
|
skip_fail_opt!(text.clone().split('\n').collect::<Vec<&str>>().get(0))
|
||||||
|
.parse::<u32>()
|
||||||
|
);
|
||||||
|
let content = text;
|
||||||
|
|
||||||
|
out_answers.push(Answer {
|
||||||
|
upvotes: score,
|
||||||
|
content,
|
||||||
|
author: "unimplemented".to_string(),
|
||||||
|
});
|
||||||
|
}
|
||||||
|
|
||||||
|
Ok(out_answers)
|
||||||
|
}
|
||||||
|
|
||||||
|
pub async fn get_top_links(c: &Client, pages: u16) -> anyhow::Result<Vec<String>> {
|
||||||
|
let mut answers = vec![];
|
||||||
|
for page in 1..=pages {
|
||||||
|
skip_fail!(
|
||||||
|
c.goto(
|
||||||
|
format!(
|
||||||
|
"https://stackoverflow.com/questions?tab=votes&page={}",
|
||||||
|
page
|
||||||
|
)
|
||||||
|
.as_str(),
|
||||||
|
)
|
||||||
|
.await
|
||||||
|
);
|
||||||
|
|
||||||
|
let finds = c.find_all(Locator::Css(".s-link")).await?;
|
||||||
|
for find in finds {
|
||||||
|
if skip_fail_opt!(skip_fail!(find.attr("href").await)).contains("/questions/") {
|
||||||
|
answers.push(find.attr("href").await.unwrap().unwrap());
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
Ok(answers)
|
||||||
|
}
|
96
src/main.rs
96
src/main.rs
@ -1,89 +1,33 @@
|
|||||||
use fantoccini::{elements::Element, Client, ClientBuilder, Locator};
|
use fantoccini::{Client, ClientBuilder};
|
||||||
use lazy_static::lazy_static;
|
pub mod collector;
|
||||||
use regex;
|
use collector::*;
|
||||||
|
|
||||||
/// Holds data about stackoverflow answers
|
|
||||||
#[derive(Debug, Clone)]
|
|
||||||
struct Answer {
|
|
||||||
upvotes: u32,
|
|
||||||
author: String,
|
|
||||||
content: String,
|
|
||||||
}
|
|
||||||
|
|
||||||
/// Get all answers from a stackoverflow domain. No error handling is done so get ready to either
|
|
||||||
/// check your input or "note: run with `RUST_BACKTRACE=1` environment variable to display a
|
|
||||||
/// backtrace"
|
|
||||||
async fn get_answers(c: &Client, url: &str) -> Vec<Answer> {
|
|
||||||
// first, go to the Wikipedia page for Foobar
|
|
||||||
c.goto(url).await.unwrap();
|
|
||||||
|
|
||||||
let answer_loc = c.find_all(Locator::Css(".answer")).await.unwrap();
|
|
||||||
let mut out_answers = vec![];
|
|
||||||
for answer in answer_loc {
|
|
||||||
let text = answer.text().await.unwrap();
|
|
||||||
|
|
||||||
let score = text
|
|
||||||
.clone()
|
|
||||||
.split('\n')
|
|
||||||
.collect::<Vec<&str>>()
|
|
||||||
.get(0)
|
|
||||||
.unwrap()
|
|
||||||
.parse::<u32>()
|
|
||||||
.unwrap();
|
|
||||||
let content = text;
|
|
||||||
|
|
||||||
out_answers.push(Answer {
|
|
||||||
upvotes: score,
|
|
||||||
content,
|
|
||||||
author: "unimplemented".to_string(),
|
|
||||||
});
|
|
||||||
}
|
|
||||||
|
|
||||||
out_answers
|
|
||||||
}
|
|
||||||
|
|
||||||
async fn get_top_links(c: &Client, pages: u16) -> anyhow::Result<Vec<String>> {
|
|
||||||
let mut answers = vec![];
|
|
||||||
for page in 1..=pages {
|
|
||||||
c.goto(
|
|
||||||
format!(
|
|
||||||
"https://stackoverflow.com/questions?tab=votes&page={}",
|
|
||||||
page
|
|
||||||
)
|
|
||||||
.as_str(),
|
|
||||||
)
|
|
||||||
.await
|
|
||||||
.unwrap();
|
|
||||||
|
|
||||||
let finds = c.find_all(Locator::Css(".s-link")).await.unwrap();
|
|
||||||
for find in finds {
|
|
||||||
if find
|
|
||||||
.attr("href")
|
|
||||||
.await
|
|
||||||
.unwrap()
|
|
||||||
.unwrap()
|
|
||||||
.contains("/questions/")
|
|
||||||
{
|
|
||||||
answers.push(find.attr("href").await.unwrap().unwrap());
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
Ok(answers)
|
|
||||||
}
|
|
||||||
|
|
||||||
#[tokio::main]
|
#[tokio::main]
|
||||||
async fn main() {
|
async fn main() {
|
||||||
|
let start = std::time::Instant::now();
|
||||||
|
println!("Spawning client");
|
||||||
let c: Client = ClientBuilder::native()
|
let c: Client = ClientBuilder::native()
|
||||||
.connect("http://localhost:4444")
|
.connect("http://localhost:4444")
|
||||||
.await
|
.await
|
||||||
.expect("failed to connect to WebDriver");
|
.expect("failed to connect to WebDriver");
|
||||||
let links = get_top_links(&c, 5).await.unwrap();
|
println!("Getting links");
|
||||||
|
let links = get_top_links(&c, 1)
|
||||||
|
.await
|
||||||
|
.expect("Failed to get links. Exiting");
|
||||||
|
println!("Got {} links. Expected {}", links.len(), 5 * 15);
|
||||||
|
println!("Getting answers");
|
||||||
let mut answers = vec![];
|
let mut answers = vec![];
|
||||||
for link in links {
|
for (i, link) in links.iter().enumerate() {
|
||||||
answers.append(
|
answers.append(
|
||||||
&mut get_answers(&c, format!("https://stackoverflow.com{}", link).as_str()).await,
|
&mut get_answers(&c, format!("https://stackoverflow.com{}", link).as_str(), i)
|
||||||
|
.await
|
||||||
|
.unwrap_or_default(),
|
||||||
);
|
);
|
||||||
}
|
}
|
||||||
|
println!(
|
||||||
|
"Got {} answers in {} sec",
|
||||||
|
answers.len(),
|
||||||
|
start.elapsed().as_secs_f32()
|
||||||
|
);
|
||||||
c.close().await.unwrap();
|
c.close().await.unwrap();
|
||||||
}
|
}
|
||||||
|
Loading…
x
Reference in New Issue
Block a user