diff --git a/Cargo.lock b/Cargo.lock index 0a7180b..b2bc9b8 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -7,7 +7,6 @@ name = "ballot-counter" version = "0.1.0" dependencies = [ "itertools", - "quick-csv", ] [[package]] @@ -24,18 +23,3 @@ checksum = "2b192c782037fadd9cfa75548310488aabdbf3d2da73885b31bd0abd03351285" dependencies = [ "either", ] - -[[package]] -name = "quick-csv" -version = "0.1.6" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "edf4b7701db7d2e4c9c010f21eebd8676a50f79223a0cf858162d24bff47338c" -dependencies = [ - "rustc-serialize", -] - -[[package]] -name = "rustc-serialize" -version = "0.3.25" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "fe834bc780604f4674073badbad26d7219cadfb4a2275802db12cbae17498401" diff --git a/Cargo.toml b/Cargo.toml index 1b6162a..b89c495 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -5,4 +5,3 @@ edition = "2024" [dependencies] itertools = "0.14.0" -quick-csv = "0.1.6" diff --git a/src/ballot.rs b/src/ballot.rs index 3001ce5..107afc6 100644 --- a/src/ballot.rs +++ b/src/ballot.rs @@ -1,6 +1,6 @@ use itertools::Itertools; -use crate::{header::Header, stats::Stats}; +use crate::{csv, header::Header, stats::Stats}; #[derive(Debug)] @@ -12,10 +12,10 @@ pub struct Ballot { const CANDIDATE_MIN: usize = 6; impl Ballot { - pub fn parse(row: quick_csv::Row, header: &Header, stats: &mut Stats) -> Result> { - let mut cols = row.columns()?.skip(6); + pub fn parse(mut row: csv::reader::RowReader, header: &Header, stats: &mut Stats) -> Result> { + let mut cols = row.by_ref().skip(6); - let place_filter = |(index, place): (usize, &str)| match place.parse::() { + let place_filter = |(index, place): (usize, String)| match place.parse::() { Ok(place) => Some((place, index)), Err(_) => None, }; @@ -45,6 +45,9 @@ impl Ballot { } else { + println!("abl votes: {votes_party:?}"); + println!("btl votes: {votes_candidate:?}"); + println!("at: {}", row.get_line_number()); return Err("ballot is informal".into()); } diff --git a/src/counter.rs b/src/counter.rs index 7d06779..d2054dd 100644 --- a/src/counter.rs +++ b/src/counter.rs @@ -1,7 +1,7 @@ -use std::{sync::{atomic::{AtomicUsize, Ordering}, mpsc, Arc, Mutex}, thread}; +use std::{sync::{Arc, Mutex}, thread}; use itertools::Itertools; -use crate::{ballot::Ballot, header::Header, stats::Stats, util::ScoreItem}; +use crate::{ballot::Ballot, csv, header::Header, stats::Stats, util::ScoreItem}; #[derive(Debug)] pub enum Event { @@ -20,18 +20,37 @@ pub struct Counter { } impl Counter { - pub fn new(mut csv: quick_csv::Csv, winners: usize) -> Result> { - let header = Arc::new(Header::parse(csv.next().ok_or("csv header missing")??, winners)?); - let mut ballots = Vec::new(); - let mut stats = Stats::new(); + pub fn new<'a,I>(mut csv: I, winners: usize) -> Result> + where I: Iterator> + Send + Sync + { + let header = Arc::new(Header::parse(csv.next().ok_or("csv header missing")?, winners)?); + let ballots = Mutex::new(Vec::new()); + let stats = Mutex::new(Stats::new()); + let csv = Mutex::new(csv); if winners > header.candidates.len() { return Err("winners can't be smaller than the candidates list".into()); } - for row in csv { - ballots.push(Ballot::parse(row?, &header, &mut stats)?); - } + thread::scope(|s| { + let threads = std::thread::available_parallelism().unwrap().into(); + for _ in 0..threads { + s.spawn(|| { + let mut l_stats = Stats::new(); + let mut l_ballots = Vec::new(); + while let Some(mut row) = { csv.lock().unwrap().next() } { + if !row.check_empty() { + l_ballots.push(Ballot::parse(row, &header, &mut l_stats).unwrap()); + } + } + {ballots.lock().unwrap().append(&mut l_ballots)}; + {stats.lock().unwrap().add(&l_stats)}; + }); + } + }); + + let ballots = ballots.into_inner().unwrap(); + let stats = stats.into_inner().unwrap(); let enabled = vec![true; header.candidates.len()]; let quota = (ballots.len() as f64) / (header.winners as f64 + 1.0) + 1.0; diff --git a/src/csv.rs b/src/csv.rs new file mode 100644 index 0000000..ab3ea24 --- /dev/null +++ b/src/csv.rs @@ -0,0 +1,4 @@ + +pub mod writer; +pub mod reader; + diff --git a/src/csv/reader.rs b/src/csv/reader.rs new file mode 100644 index 0000000..9669320 --- /dev/null +++ b/src/csv/reader.rs @@ -0,0 +1,74 @@ +use std::{iter::Peekable, str::Chars}; + + +pub struct RowReader<'a> { + it: Peekable>, + delimiter: char, + ended: bool, + at: usize, +} + +impl<'a> RowReader<'a> { + pub fn new(line: &'a str, at: usize, delimiter: char) -> Self { + Self { it: line.chars().peekable(), at, delimiter, ended: false } + } + pub fn get_line_number(&self) -> usize { + self.at + } + pub fn check_empty(&mut self) -> bool { + self.it.peek().is_none() + } +} + +impl<'a> Iterator for RowReader<'a> { + type Item = String; + + fn next(&mut self) -> Option { + if self.ended { + return None; + } + + let mut value = String::new(); + let mut escaped = false; + let mut end_quote = false; + let can_escape = self.it.peek().copied() == Some('"'); + + if can_escape { + self.it.next(); + } + + for ch in self.it.by_ref() { + if escaped { + value.push(ch); + escaped = false; + continue; + } + if can_escape { + if ch == '\\' { + escaped = true; + continue; + } + if ch == '"' { + if end_quote { + value.push(ch); + } + end_quote = !end_quote; + continue; + } + } + if !can_escape || end_quote { + if ch == '\r' { + continue; + } + if ch == self.delimiter { + return Some(value); + } + } + value.push(ch); + } + + self.ended = true; + Some(value) + } +} + diff --git a/src/util/csv.rs b/src/csv/writer.rs similarity index 100% rename from src/util/csv.rs rename to src/csv/writer.rs diff --git a/src/header.rs b/src/header.rs index 8464760..7a341c6 100644 --- a/src/header.rs +++ b/src/header.rs @@ -1,6 +1,6 @@ use std::collections::HashMap; use itertools::Itertools; -use crate::{candidate::{Candidate, CandidateName}, party::Party}; +use crate::{candidate::{Candidate, CandidateName}, csv, party::Party}; #[derive(Debug)] @@ -11,12 +11,12 @@ pub struct Header { } impl Header { - pub fn parse(row: quick_csv::Row, winners: usize) -> Result> { + pub fn parse(row: csv::reader::RowReader, winners: usize) -> Result> { let mut parties = Vec::::new(); let mut candidates = Vec::::new(); let mut parties_lookup = HashMap::<&str, usize>::new(); - for col in row.columns()?.skip(6) { + for col in row.skip(6).collect_vec().iter() { let [party, name] = col.split(':').next_array().ok_or("Missing ':'")?; if party == "UG" { // independents diff --git a/src/main.rs b/src/main.rs index c05c724..c934cd2 100644 --- a/src/main.rs +++ b/src/main.rs @@ -1,4 +1,5 @@ -use std::env; +use std::{env, fs}; + use counter::Counter; use util::Percent; @@ -8,6 +9,7 @@ pub mod ballot; pub mod header; pub mod party; pub mod stats; +pub mod csv; mod counter; fn main() { @@ -19,8 +21,11 @@ fn main() { } (args[1].clone(), args[2].parse::().unwrap()) }; - let csv = quick_csv::Csv::from_file(csv_path).unwrap().flexible(true); - let counter = Counter::new(csv, winner_count).unwrap(); + + let csv = String::from_utf8(fs::read(&csv_path).unwrap()).unwrap(); + let rows = csv.split('\n').enumerate().map(|(i,v)| csv::reader::RowReader::new(v, i, ',')); + + let counter = Counter::new(rows, winner_count).unwrap(); let mut winners = Vec::with_capacity(winner_count); let total = counter.ballots.len() as f64; let header = counter.header.clone(); diff --git a/src/stats.rs b/src/stats.rs index 12c594c..b8f503e 100644 --- a/src/stats.rs +++ b/src/stats.rs @@ -20,5 +20,12 @@ impl Stats { both: 0, } } + pub fn add(&mut self, rhs: &Self) { + self.total += rhs.total; + self.party += rhs.party; + self.party_single += rhs.party_single; + self.candidate += rhs.candidate; + self.both += rhs.both; + } } diff --git a/src/util.rs b/src/util.rs index 5858cef..e725187 100644 --- a/src/util.rs +++ b/src/util.rs @@ -2,10 +2,8 @@ pub mod escape; pub mod percent; pub mod score_item; -pub mod csv; pub use score_item::ScoreItem; pub use escape::{EscapeWriter, EscapeWriterOpts}; pub use percent::Percent; -pub use csv::CsvWriter;