113 lines
3.3 KiB
Rust
113 lines
3.3 KiB
Rust
use fantoccini::{Client, ClientBuilder};
|
|
pub mod analyze;
|
|
pub mod collector;
|
|
use analyze::*;
|
|
use clap::Parser;
|
|
use collector::*;
|
|
use fern::{
|
|
self,
|
|
colors::{Color, ColoredLevelConfig},
|
|
};
|
|
use log::{debug, error, info, trace, warn};
|
|
use std::{path::PathBuf, process::exit};
|
|
|
|
#[derive(Debug, Parser, Clone)]
|
|
#[command(about = "Scrape stackoverflow for something idk")]
|
|
pub struct Args {
|
|
#[clap(
|
|
short,
|
|
long,
|
|
default_value_t = 5,
|
|
help = "Amount of pages to scrape for links. Sorted by top voted"
|
|
)]
|
|
pages: u16,
|
|
#[clap(short, long, default_value_t = log::LevelFilter::Info)]
|
|
log_level: log::LevelFilter,
|
|
#[clap(short, long)]
|
|
answers_file: Option<PathBuf>,
|
|
}
|
|
|
|
#[tokio::main]
|
|
async fn main() {
|
|
let args = Args::parse();
|
|
init_fern(args.log_level);
|
|
|
|
if let Some(path) = args.answers_file {
|
|
let answers = serde_json::from_str(&std::fs::read_to_string(path).unwrap()).unwrap();
|
|
let freqs = analyze_frequencies(answers);
|
|
let mut freqs = freqs.iter().collect::<Vec<(&String, &u16)>>();
|
|
freqs.sort_by(|a, b| b.1.cmp(&a.1));
|
|
for i in &freqs[0..100] {
|
|
println!("{} : {}", i.0, i.1);
|
|
}
|
|
} else {
|
|
let start = std::time::Instant::now();
|
|
info!("Spawning client");
|
|
let c: Client = ClientBuilder::native()
|
|
.connect("http://localhost:4444")
|
|
.await
|
|
.unwrap_or_else(|e| {
|
|
error!("Error: {e}");
|
|
panic!();
|
|
});
|
|
|
|
info!("Getting links");
|
|
let links = get_top_links(&c, args.pages)
|
|
.await
|
|
.expect("Failed to get links. Exiting");
|
|
info!("Got {} links. Expected {}", links.len(), args.pages * 15);
|
|
info!("Getting answers");
|
|
let mut answers = vec![];
|
|
for (i, link) in links.iter().enumerate() {
|
|
answers.append(
|
|
&mut get_answers(
|
|
&c,
|
|
format!("https://stackoverflow.com{}", link).as_str(),
|
|
i,
|
|
links.len(),
|
|
)
|
|
.await
|
|
.unwrap_or_default(),
|
|
);
|
|
}
|
|
info!(
|
|
"Got {} answers in {} sec",
|
|
answers.len(),
|
|
start.elapsed().as_secs_f32()
|
|
);
|
|
c.close().await.unwrap();
|
|
info!("Writing answers to answers.json");
|
|
let _ = std::fs::write(
|
|
"answers.json",
|
|
serde_json::to_string(&answers).unwrap_or_else(|e| {
|
|
error!("Error: {}", e);
|
|
panic!();
|
|
}),
|
|
);
|
|
}
|
|
}
|
|
|
|
fn init_fern(level: log::LevelFilter) -> anyhow::Result<()> {
|
|
let colors = ColoredLevelConfig::new()
|
|
.trace(Color::White)
|
|
.info(Color::Green)
|
|
.debug(Color::Magenta)
|
|
.warn(Color::Yellow)
|
|
.error(Color::Red);
|
|
|
|
fern::Dispatch::new()
|
|
.format(move |out, message, record| {
|
|
out.finish(format_args!(
|
|
"[{} {} {}] {}",
|
|
humantime::format_rfc3339_seconds(std::time::SystemTime::now()),
|
|
colors.color(record.level()),
|
|
record.target(),
|
|
message
|
|
))
|
|
})
|
|
.level(level)
|
|
.chain(std::io::stdout())
|
|
.apply()?;
|
|
Ok(())
|
|
}
|