added multithreading to loading and processing winners

This commit is contained in:
Jay Robson 2025-05-10 00:12:52 +10:00
parent 2256ea367c
commit 17a6c88dbc
11 changed files with 131 additions and 38 deletions

16
Cargo.lock generated
View File

@ -7,7 +7,6 @@ name = "ballot-counter"
version = "0.1.0"
dependencies = [
"itertools",
"quick-csv",
]
[[package]]
@ -24,18 +23,3 @@ checksum = "2b192c782037fadd9cfa75548310488aabdbf3d2da73885b31bd0abd03351285"
dependencies = [
"either",
]
[[package]]
name = "quick-csv"
version = "0.1.6"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "edf4b7701db7d2e4c9c010f21eebd8676a50f79223a0cf858162d24bff47338c"
dependencies = [
"rustc-serialize",
]
[[package]]
name = "rustc-serialize"
version = "0.3.25"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "fe834bc780604f4674073badbad26d7219cadfb4a2275802db12cbae17498401"

View File

@ -5,4 +5,3 @@ edition = "2024"
[dependencies]
itertools = "0.14.0"
quick-csv = "0.1.6"

View File

@ -1,6 +1,6 @@
use itertools::Itertools;
use crate::{header::Header, stats::Stats};
use crate::{csv, header::Header, stats::Stats};
#[derive(Debug)]
@ -12,10 +12,10 @@ pub struct Ballot {
const CANDIDATE_MIN: usize = 6;
impl Ballot {
pub fn parse(row: quick_csv::Row, header: &Header, stats: &mut Stats) -> Result<Ballot, Box<dyn std::error::Error>> {
let mut cols = row.columns()?.skip(6);
pub fn parse(mut row: csv::reader::RowReader, header: &Header, stats: &mut Stats) -> Result<Ballot, Box<dyn std::error::Error>> {
let mut cols = row.by_ref().skip(6);
let place_filter = |(index, place): (usize, &str)| match place.parse::<i32>() {
let place_filter = |(index, place): (usize, String)| match place.parse::<i32>() {
Ok(place) => Some((place, index)),
Err(_) => None,
};
@ -45,6 +45,9 @@ impl Ballot {
}
else {
println!("abl votes: {votes_party:?}");
println!("btl votes: {votes_candidate:?}");
println!("at: {}", row.get_line_number());
return Err("ballot is informal".into());
}

View File

@ -1,7 +1,7 @@
use std::{sync::{atomic::{AtomicUsize, Ordering}, mpsc, Arc, Mutex}, thread};
use std::{sync::{Arc, Mutex}, thread};
use itertools::Itertools;
use crate::{ballot::Ballot, header::Header, stats::Stats, util::ScoreItem};
use crate::{ballot::Ballot, csv, header::Header, stats::Stats, util::ScoreItem};
#[derive(Debug)]
pub enum Event {
@ -20,18 +20,37 @@ pub struct Counter {
}
impl Counter {
pub fn new<T: std::io::BufRead>(mut csv: quick_csv::Csv<T>, winners: usize) -> Result<Self, Box<dyn std::error::Error>> {
let header = Arc::new(Header::parse(csv.next().ok_or("csv header missing")??, winners)?);
let mut ballots = Vec::new();
let mut stats = Stats::new();
pub fn new<'a,I>(mut csv: I, winners: usize) -> Result<Self, Box<dyn std::error::Error>>
where I: Iterator<Item=csv::reader::RowReader<'a>> + Send + Sync
{
let header = Arc::new(Header::parse(csv.next().ok_or("csv header missing")?, winners)?);
let ballots = Mutex::new(Vec::new());
let stats = Mutex::new(Stats::new());
let csv = Mutex::new(csv);
if winners > header.candidates.len() {
return Err("winners can't be smaller than the candidates list".into());
}
for row in csv {
ballots.push(Ballot::parse(row?, &header, &mut stats)?);
}
thread::scope(|s| {
let threads = std::thread::available_parallelism().unwrap().into();
for _ in 0..threads {
s.spawn(|| {
let mut l_stats = Stats::new();
let mut l_ballots = Vec::new();
while let Some(mut row) = { csv.lock().unwrap().next() } {
if !row.check_empty() {
l_ballots.push(Ballot::parse(row, &header, &mut l_stats).unwrap());
}
}
{ballots.lock().unwrap().append(&mut l_ballots)};
{stats.lock().unwrap().add(&l_stats)};
});
}
});
let ballots = ballots.into_inner().unwrap();
let stats = stats.into_inner().unwrap();
let enabled = vec![true; header.candidates.len()];
let quota = (ballots.len() as f64) / (header.winners as f64 + 1.0) + 1.0;

4
src/csv.rs Normal file
View File

@ -0,0 +1,4 @@
pub mod writer;
pub mod reader;

74
src/csv/reader.rs Normal file
View File

@ -0,0 +1,74 @@
use std::{iter::Peekable, str::Chars};
pub struct RowReader<'a> {
it: Peekable<Chars<'a>>,
delimiter: char,
ended: bool,
at: usize,
}
impl<'a> RowReader<'a> {
pub fn new(line: &'a str, at: usize, delimiter: char) -> Self {
Self { it: line.chars().peekable(), at, delimiter, ended: false }
}
pub fn get_line_number(&self) -> usize {
self.at
}
pub fn check_empty(&mut self) -> bool {
self.it.peek().is_none()
}
}
impl<'a> Iterator for RowReader<'a> {
type Item = String;
fn next(&mut self) -> Option<Self::Item> {
if self.ended {
return None;
}
let mut value = String::new();
let mut escaped = false;
let mut end_quote = false;
let can_escape = self.it.peek().copied() == Some('"');
if can_escape {
self.it.next();
}
for ch in self.it.by_ref() {
if escaped {
value.push(ch);
escaped = false;
continue;
}
if can_escape {
if ch == '\\' {
escaped = true;
continue;
}
if ch == '"' {
if end_quote {
value.push(ch);
}
end_quote = !end_quote;
continue;
}
}
if !can_escape || end_quote {
if ch == '\r' {
continue;
}
if ch == self.delimiter {
return Some(value);
}
}
value.push(ch);
}
self.ended = true;
Some(value)
}
}

View File

@ -1,6 +1,6 @@
use std::collections::HashMap;
use itertools::Itertools;
use crate::{candidate::{Candidate, CandidateName}, party::Party};
use crate::{candidate::{Candidate, CandidateName}, csv, party::Party};
#[derive(Debug)]
@ -11,12 +11,12 @@ pub struct Header {
}
impl Header {
pub fn parse(row: quick_csv::Row, winners: usize) -> Result<Header, Box<dyn std::error::Error>> {
pub fn parse(row: csv::reader::RowReader, winners: usize) -> Result<Header, Box<dyn std::error::Error>> {
let mut parties = Vec::<Party>::new();
let mut candidates = Vec::<Candidate>::new();
let mut parties_lookup = HashMap::<&str, usize>::new();
for col in row.columns()?.skip(6) {
for col in row.skip(6).collect_vec().iter() {
let [party, name] = col.split(':').next_array().ok_or("Missing ':'")?;
if party == "UG" { // independents

View File

@ -1,4 +1,5 @@
use std::env;
use std::{env, fs};
use counter::Counter;
use util::Percent;
@ -8,6 +9,7 @@ pub mod ballot;
pub mod header;
pub mod party;
pub mod stats;
pub mod csv;
mod counter;
fn main() {
@ -19,8 +21,11 @@ fn main() {
}
(args[1].clone(), args[2].parse::<usize>().unwrap())
};
let csv = quick_csv::Csv::from_file(csv_path).unwrap().flexible(true);
let counter = Counter::new(csv, winner_count).unwrap();
let csv = String::from_utf8(fs::read(&csv_path).unwrap()).unwrap();
let rows = csv.split('\n').enumerate().map(|(i,v)| csv::reader::RowReader::new(v, i, ','));
let counter = Counter::new(rows, winner_count).unwrap();
let mut winners = Vec::with_capacity(winner_count);
let total = counter.ballots.len() as f64;
let header = counter.header.clone();

View File

@ -20,5 +20,12 @@ impl Stats {
both: 0,
}
}
pub fn add(&mut self, rhs: &Self) {
self.total += rhs.total;
self.party += rhs.party;
self.party_single += rhs.party_single;
self.candidate += rhs.candidate;
self.both += rhs.both;
}
}

View File

@ -2,10 +2,8 @@
pub mod escape;
pub mod percent;
pub mod score_item;
pub mod csv;
pub use score_item::ScoreItem;
pub use escape::{EscapeWriter, EscapeWriterOpts};
pub use percent::Percent;
pub use csv::CsvWriter;