added some logs. Not using a real logger. fuck you

This commit is contained in:
spv 2024-08-26 16:27:07 +02:00
parent 55c7bba09d
commit 9426f6b855
No known key found for this signature in database
GPG Key ID: 7638A987CE28ADFA
4 changed files with 111 additions and 76 deletions

1
Cargo.lock generated
View File

@ -1037,6 +1037,7 @@ dependencies = [
"anyhow",
"fantoccini",
"lazy_static",
"log",
"regex",
"reqwest",
"tokio",

View File

@ -7,6 +7,7 @@ edition = "2021"
anyhow = "1.0.86"
fantoccini = "0.21.1"
lazy_static = "1.5.0"
log = "0.4.22"
regex = "1.10.6"
reqwest = "0.12.7"
tokio = { version = "1.39.3", features = ["full"] }

89
src/collector.rs Normal file
View File

@ -0,0 +1,89 @@
use fantoccini::{Client, ClientBuilder, Locator};
use log::warn;
macro_rules! skip_fail {
($res:expr) => {
match $res {
Ok(val) => val,
Err(e) => {
warn!("An error: {}; skipped.", e);
continue;
}
}
};
}
macro_rules! skip_fail_opt {
($res:expr) => {
match $res {
Some(val) => val,
None => {
warn!("Unexpected empty value; skipped.");
continue;
}
}
};
}
/// Holds data about stackoverflow answers
#[derive(Debug, Clone)]
pub struct Answer {
upvotes: u32,
author: String,
content: String,
}
/// Get all answers from a stackoverflow domain. No error handling is done so get ready to either
/// check your input or "note: run with `RUST_BACKTRACE=1` environment variable to display a
/// backtrace"
pub async fn get_answers(c: &Client, url: &str, i: usize) -> anyhow::Result<Vec<Answer>> {
// first, go to the Wikipedia page for Foobar
c.goto(url).await?;
let answer_loc = c.find_all(Locator::Css(".answer")).await?;
let mut out_answers = vec![];
for (j, answer) in answer_loc.iter().enumerate() {
println!("Getting answer {} on page {}", j, i);
let text = skip_fail!(answer.text().await);
let score =
skip_fail!(
skip_fail_opt!(text.clone().split('\n').collect::<Vec<&str>>().get(0))
.parse::<u32>()
);
let content = text;
out_answers.push(Answer {
upvotes: score,
content,
author: "unimplemented".to_string(),
});
}
Ok(out_answers)
}
pub async fn get_top_links(c: &Client, pages: u16) -> anyhow::Result<Vec<String>> {
let mut answers = vec![];
for page in 1..=pages {
skip_fail!(
c.goto(
format!(
"https://stackoverflow.com/questions?tab=votes&page={}",
page
)
.as_str(),
)
.await
);
let finds = c.find_all(Locator::Css(".s-link")).await?;
for find in finds {
if skip_fail_opt!(skip_fail!(find.attr("href").await)).contains("/questions/") {
answers.push(find.attr("href").await.unwrap().unwrap());
}
}
}
Ok(answers)
}

View File

@ -1,89 +1,33 @@
use fantoccini::{elements::Element, Client, ClientBuilder, Locator};
use lazy_static::lazy_static;
use regex;
/// Holds data about stackoverflow answers
#[derive(Debug, Clone)]
struct Answer {
upvotes: u32,
author: String,
content: String,
}
/// Get all answers from a stackoverflow domain. No error handling is done so get ready to either
/// check your input or "note: run with `RUST_BACKTRACE=1` environment variable to display a
/// backtrace"
async fn get_answers(c: &Client, url: &str) -> Vec<Answer> {
// first, go to the Wikipedia page for Foobar
c.goto(url).await.unwrap();
let answer_loc = c.find_all(Locator::Css(".answer")).await.unwrap();
let mut out_answers = vec![];
for answer in answer_loc {
let text = answer.text().await.unwrap();
let score = text
.clone()
.split('\n')
.collect::<Vec<&str>>()
.get(0)
.unwrap()
.parse::<u32>()
.unwrap();
let content = text;
out_answers.push(Answer {
upvotes: score,
content,
author: "unimplemented".to_string(),
});
}
out_answers
}
async fn get_top_links(c: &Client, pages: u16) -> anyhow::Result<Vec<String>> {
let mut answers = vec![];
for page in 1..=pages {
c.goto(
format!(
"https://stackoverflow.com/questions?tab=votes&page={}",
page
)
.as_str(),
)
.await
.unwrap();
let finds = c.find_all(Locator::Css(".s-link")).await.unwrap();
for find in finds {
if find
.attr("href")
.await
.unwrap()
.unwrap()
.contains("/questions/")
{
answers.push(find.attr("href").await.unwrap().unwrap());
}
}
}
Ok(answers)
}
use fantoccini::{Client, ClientBuilder};
pub mod collector;
use collector::*;
#[tokio::main]
async fn main() {
let start = std::time::Instant::now();
println!("Spawning client");
let c: Client = ClientBuilder::native()
.connect("http://localhost:4444")
.await
.expect("failed to connect to WebDriver");
let links = get_top_links(&c, 5).await.unwrap();
println!("Getting links");
let links = get_top_links(&c, 1)
.await
.expect("Failed to get links. Exiting");
println!("Got {} links. Expected {}", links.len(), 5 * 15);
println!("Getting answers");
let mut answers = vec![];
for link in links {
for (i, link) in links.iter().enumerate() {
answers.append(
&mut get_answers(&c, format!("https://stackoverflow.com{}", link).as_str()).await,
&mut get_answers(&c, format!("https://stackoverflow.com{}", link).as_str(), i)
.await
.unwrap_or_default(),
);
}
println!(
"Got {} answers in {} sec",
answers.len(),
start.elapsed().as_secs_f32()
);
c.close().await.unwrap();
}