some very simple analysis printing the top 100 most common words in all answers
This commit is contained in:
parent
b38a7c1c4c
commit
6b4a54a2c9
17
src/analyze.rs
Normal file
17
src/analyze.rs
Normal file
@ -0,0 +1,17 @@
|
|||||||
|
use std::collections::HashMap;
|
||||||
|
|
||||||
|
use crate::Answer;
|
||||||
|
|
||||||
|
pub fn analyze_frequencies(answers: Vec<Answer>) -> HashMap<String, u16> {
|
||||||
|
let mut out: HashMap<String, u16> = HashMap::new();
|
||||||
|
|
||||||
|
for answer in answers {
|
||||||
|
for word in answer.content.replace("\n", " ").split_whitespace() {
|
||||||
|
out.entry(word.to_string())
|
||||||
|
.and_modify(|count| *count += 1)
|
||||||
|
.or_insert(1);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
out
|
||||||
|
}
|
@ -32,7 +32,7 @@ macro_rules! skip_fail_opt {
|
|||||||
pub struct Answer {
|
pub struct Answer {
|
||||||
upvotes: u32,
|
upvotes: u32,
|
||||||
author: String,
|
author: String,
|
||||||
content: String,
|
pub content: String,
|
||||||
}
|
}
|
||||||
|
|
||||||
/// Get all answers from a stackoverflow domain. No error handling is done so get ready to either
|
/// Get all answers from a stackoverflow domain. No error handling is done so get ready to either
|
||||||
|
18
src/main.rs
18
src/main.rs
@ -1,5 +1,7 @@
|
|||||||
use fantoccini::{Client, ClientBuilder};
|
use fantoccini::{Client, ClientBuilder};
|
||||||
|
pub mod analyze;
|
||||||
pub mod collector;
|
pub mod collector;
|
||||||
|
use analyze::*;
|
||||||
use clap::Parser;
|
use clap::Parser;
|
||||||
use collector::*;
|
use collector::*;
|
||||||
use fern::{
|
use fern::{
|
||||||
@ -7,7 +9,7 @@ use fern::{
|
|||||||
colors::{Color, ColoredLevelConfig},
|
colors::{Color, ColoredLevelConfig},
|
||||||
};
|
};
|
||||||
use log::{debug, error, info, trace, warn};
|
use log::{debug, error, info, trace, warn};
|
||||||
use std::process::exit;
|
use std::{path::PathBuf, process::exit};
|
||||||
|
|
||||||
#[derive(Debug, Parser, Clone)]
|
#[derive(Debug, Parser, Clone)]
|
||||||
#[command(about = "Scrape stackoverflow for something idk")]
|
#[command(about = "Scrape stackoverflow for something idk")]
|
||||||
@ -21,6 +23,8 @@ pub struct Args {
|
|||||||
pages: u16,
|
pages: u16,
|
||||||
#[clap(short, long, default_value_t = log::LevelFilter::Info)]
|
#[clap(short, long, default_value_t = log::LevelFilter::Info)]
|
||||||
log_level: log::LevelFilter,
|
log_level: log::LevelFilter,
|
||||||
|
#[clap(short, long)]
|
||||||
|
answers_file: Option<PathBuf>,
|
||||||
}
|
}
|
||||||
|
|
||||||
#[tokio::main]
|
#[tokio::main]
|
||||||
@ -28,6 +32,15 @@ async fn main() {
|
|||||||
let args = Args::parse();
|
let args = Args::parse();
|
||||||
init_fern(args.log_level);
|
init_fern(args.log_level);
|
||||||
|
|
||||||
|
if let Some(path) = args.answers_file {
|
||||||
|
let answers = serde_json::from_str(&std::fs::read_to_string(path).unwrap()).unwrap();
|
||||||
|
let freqs = analyze_frequencies(answers);
|
||||||
|
let mut freqs = freqs.iter().collect::<Vec<(&String, &u16)>>();
|
||||||
|
freqs.sort_by(|a, b| b.1.cmp(&a.1));
|
||||||
|
for i in &freqs[0..100] {
|
||||||
|
println!("{} : {}", i.0, i.1);
|
||||||
|
}
|
||||||
|
} else {
|
||||||
let start = std::time::Instant::now();
|
let start = std::time::Instant::now();
|
||||||
info!("Spawning client");
|
info!("Spawning client");
|
||||||
let c: Client = ClientBuilder::native()
|
let c: Client = ClientBuilder::native()
|
||||||
@ -64,13 +77,14 @@ async fn main() {
|
|||||||
);
|
);
|
||||||
c.close().await.unwrap();
|
c.close().await.unwrap();
|
||||||
info!("Writing answers to answers.json");
|
info!("Writing answers to answers.json");
|
||||||
std::fs::write(
|
let _ = std::fs::write(
|
||||||
"answers.json",
|
"answers.json",
|
||||||
serde_json::to_string(&answers).unwrap_or_else(|e| {
|
serde_json::to_string(&answers).unwrap_or_else(|e| {
|
||||||
error!("Error: {}", e);
|
error!("Error: {}", e);
|
||||||
panic!();
|
panic!();
|
||||||
}),
|
}),
|
||||||
);
|
);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
fn init_fern(level: log::LevelFilter) -> anyhow::Result<()> {
|
fn init_fern(level: log::LevelFilter) -> anyhow::Result<()> {
|
||||||
|
Loading…
x
Reference in New Issue
Block a user