added optimisations
This commit is contained in:
parent
17a6c88dbc
commit
beeea3597e
|
|
@ -1,6 +1,6 @@
|
|||
use itertools::Itertools;
|
||||
|
||||
use crate::{csv, header::Header, stats::Stats};
|
||||
use crate::{csv, header::Header, stats::Stats, util::ScoreItem};
|
||||
|
||||
|
||||
#[derive(Debug)]
|
||||
|
|
@ -12,7 +12,7 @@ pub struct Ballot {
|
|||
const CANDIDATE_MIN: usize = 6;
|
||||
|
||||
impl Ballot {
|
||||
pub fn parse(mut row: csv::reader::RowReader, header: &Header, stats: &mut Stats) -> Result<Ballot, Box<dyn std::error::Error>> {
|
||||
pub fn parse(mut row: csv::reader::Row, header: &Header, stats: &mut Stats) -> Result<Ballot, Box<dyn std::error::Error>> {
|
||||
let mut cols = row.by_ref().skip(6);
|
||||
|
||||
let place_filter = |(index, place): (usize, String)| match place.parse::<i32>() {
|
||||
|
|
@ -45,22 +45,32 @@ impl Ballot {
|
|||
}
|
||||
|
||||
else {
|
||||
println!("abl votes: {votes_party:?}");
|
||||
println!("btl votes: {votes_candidate:?}");
|
||||
println!("at: {}", row.get_line_number());
|
||||
return Err("ballot is informal".into());
|
||||
}
|
||||
|
||||
stats.total += 1;
|
||||
Ok(Ballot { weight: 1.0, votes })
|
||||
}
|
||||
pub fn get_weight(&self) -> f64 {
|
||||
self.weight
|
||||
pub fn count_primary(&self, scores: &mut [f64], enabled: &[bool]) {
|
||||
if let Some(index) = self.get_primary_index(enabled) {
|
||||
scores[index] += self.weight;
|
||||
}
|
||||
pub fn apply_weight(&mut self, weight: f64) {
|
||||
}
|
||||
pub fn apply_winners(&mut self, enabled: &[bool], winners: &[ScoreItem], quota: f64) {
|
||||
let index = match self.get_primary_index(enabled) {
|
||||
Some(v) => v,
|
||||
None => return,
|
||||
};
|
||||
if let Some(winner) = winners.iter().filter(|&v| v.index == index).next() {
|
||||
self.apply_weight((winner.value - quota) / winner.value);
|
||||
}
|
||||
}
|
||||
#[inline]
|
||||
fn apply_weight(&mut self, weight: f64) {
|
||||
self.weight *= weight;
|
||||
}
|
||||
pub fn get_primary_index(&self, enabled: &[bool]) -> Option<usize> {
|
||||
#[inline]
|
||||
fn get_primary_index(&self, enabled: &[bool]) -> Option<usize> {
|
||||
for &id in self.votes.iter() {
|
||||
if enabled[id] {
|
||||
return Some(id);
|
||||
|
|
|
|||
|
|
@ -1,7 +1,7 @@
|
|||
use std::{sync::{Arc, Mutex}, thread};
|
||||
use itertools::Itertools;
|
||||
|
||||
use crate::{ballot::Ballot, csv, header::Header, stats::Stats, util::ScoreItem};
|
||||
use crate::{ballot::Ballot, csv, header::Header, stats::Stats, util::{self, ScoreItem}};
|
||||
|
||||
#[derive(Debug)]
|
||||
pub enum Event {
|
||||
|
|
@ -19,10 +19,10 @@ pub struct Counter {
|
|||
quota: f64,
|
||||
}
|
||||
|
||||
const CHUNK_SIZE: usize = 1024;
|
||||
|
||||
impl Counter {
|
||||
pub fn new<'a,I>(mut csv: I, winners: usize) -> Result<Self, Box<dyn std::error::Error>>
|
||||
where I: Iterator<Item=csv::reader::RowReader<'a>> + Send + Sync
|
||||
{
|
||||
pub fn new(mut csv: csv::reader::CsvReader, winners: usize) -> Result<Self, Box<dyn std::error::Error + Sync + Send>> {
|
||||
let header = Arc::new(Header::parse(csv.next().ok_or("csv header missing")?, winners)?);
|
||||
let ballots = Mutex::new(Vec::new());
|
||||
let stats = Mutex::new(Stats::new());
|
||||
|
|
@ -32,22 +32,25 @@ impl Counter {
|
|||
return Err("winners can't be smaller than the candidates list".into());
|
||||
}
|
||||
|
||||
thread::scope(|s| {
|
||||
let threads = std::thread::available_parallelism().unwrap().into();
|
||||
for _ in 0..threads {
|
||||
s.spawn(|| {
|
||||
util::thread::spawn_all_with_result(|| -> Result<(), Box<dyn std::error::Error + Sync + Send>> {
|
||||
let mut l_stats = Stats::new();
|
||||
let mut l_ballots = Vec::new();
|
||||
while let Some(mut row) = { csv.lock().unwrap().next() } {
|
||||
if !row.check_empty() {
|
||||
loop {
|
||||
let rows = csv.lock().unwrap().by_ref().take(CHUNK_SIZE).collect_vec();
|
||||
|
||||
if rows.len() == 0 {
|
||||
break;
|
||||
}
|
||||
|
||||
for row in rows {
|
||||
l_ballots.push(Ballot::parse(row, &header, &mut l_stats).unwrap());
|
||||
}
|
||||
|
||||
ballots.lock().unwrap().append(&mut l_ballots);
|
||||
}
|
||||
{ballots.lock().unwrap().append(&mut l_ballots)};
|
||||
{stats.lock().unwrap().add(&l_stats)};
|
||||
});
|
||||
}
|
||||
});
|
||||
Ok(())
|
||||
})?;
|
||||
|
||||
let ballots = ballots.into_inner().unwrap();
|
||||
let stats = stats.into_inner().unwrap();
|
||||
|
|
@ -67,26 +70,17 @@ impl Counter {
|
|||
}
|
||||
fn count_primaries(&self) -> Vec<ScoreItem> {
|
||||
let mut scores = Mutex::new(vec![0.0; self.enabled.len()]);
|
||||
let ballot_chunks = Mutex::new(self.ballots.chunks(1024));
|
||||
let ballot_chunks = Mutex::new(self.ballots.chunks(CHUNK_SIZE));
|
||||
|
||||
thread::scope(|s| {
|
||||
let threads = std::thread::available_parallelism().unwrap().into();
|
||||
for _ in 0..threads {
|
||||
s.spawn(|| {
|
||||
util::thread::spawn_all(|| {
|
||||
let mut l_scores = vec![0.0; self.enabled.len()];
|
||||
while let Some(ballots) = { ballot_chunks.lock().unwrap().next() } {
|
||||
for ballot in ballots {
|
||||
if let Some(index) = ballot.get_primary_index(&self.enabled) {
|
||||
l_scores[index] += ballot.get_weight();
|
||||
}
|
||||
}
|
||||
for ballot in util::mutex::chunk_iter(&ballot_chunks) {
|
||||
ballot.count_primary(&mut l_scores, &self.enabled);
|
||||
}
|
||||
for (dst, src) in scores.lock().unwrap().iter_mut().zip(l_scores.into_iter()) {
|
||||
*dst += src;
|
||||
}
|
||||
});
|
||||
}
|
||||
});
|
||||
|
||||
scores.get_mut().unwrap().iter().copied().enumerate().filter_map(|(index, value)| match self.enabled[index] {
|
||||
true => Some(ScoreItem::new(index, value)),
|
||||
|
|
@ -94,27 +88,15 @@ impl Counter {
|
|||
}).collect_vec()
|
||||
}
|
||||
fn apply_weights(&mut self, winners: &[ScoreItem]) {
|
||||
let ballot_chunks = Mutex::new(self.ballots.chunks_mut(1024));
|
||||
thread::scope(|s| {
|
||||
let threads = std::thread::available_parallelism().unwrap().into();
|
||||
for _ in 0..threads {
|
||||
s.spawn(|| {
|
||||
while let Some(ballots) = { ballot_chunks.lock().unwrap().next() } {
|
||||
for ballot in ballots {
|
||||
let index = match ballot.get_primary_index(&self.enabled) {
|
||||
Some(v) => v,
|
||||
None => continue,
|
||||
};
|
||||
if let Some(winner) = winners.iter().filter(|&v| v.index == index).next() {
|
||||
let surplus = winner.value - self.quota;
|
||||
ballot.apply_weight(surplus / winner.value);
|
||||
}
|
||||
}
|
||||
}
|
||||
});
|
||||
let ballot_chunks = Mutex::new(self.ballots.chunks_mut(CHUNK_SIZE));
|
||||
|
||||
util::thread::spawn_all(|| {
|
||||
for ballot in util::mutex::chunk_iter_mut(&ballot_chunks) {
|
||||
ballot.apply_winners(&self.enabled, winners, self.quota);
|
||||
}
|
||||
});
|
||||
}
|
||||
#[inline]
|
||||
pub fn get_quota(&self) -> f64 {
|
||||
self.quota
|
||||
}
|
||||
|
|
|
|||
|
|
@ -1,74 +1,33 @@
|
|||
use std::{iter::Peekable, str::Chars};
|
||||
use std::str::Split;
|
||||
|
||||
pub use row::Row;
|
||||
pub mod row;
|
||||
|
||||
pub struct RowReader<'a> {
|
||||
it: Peekable<Chars<'a>>,
|
||||
pub struct CsvReader<'a> {
|
||||
it: Split<'a, char>,
|
||||
delimiter: char,
|
||||
ended: bool,
|
||||
at: usize,
|
||||
}
|
||||
|
||||
impl<'a> RowReader<'a> {
|
||||
pub fn new(line: &'a str, at: usize, delimiter: char) -> Self {
|
||||
Self { it: line.chars().peekable(), at, delimiter, ended: false }
|
||||
}
|
||||
pub fn get_line_number(&self) -> usize {
|
||||
self.at
|
||||
}
|
||||
pub fn check_empty(&mut self) -> bool {
|
||||
self.it.peek().is_none()
|
||||
impl<'a> CsvReader<'a> {
|
||||
pub fn new(text: &'a str, delimiter: char) -> Self {
|
||||
Self {it: text.split('\n'), delimiter }
|
||||
}
|
||||
}
|
||||
|
||||
impl<'a> Iterator for RowReader<'a> {
|
||||
type Item = String;
|
||||
impl<'a> Iterator for CsvReader<'a> {
|
||||
type Item = Row<'a>;
|
||||
|
||||
fn next(&mut self) -> Option<Self::Item> {
|
||||
if self.ended {
|
||||
return None;
|
||||
}
|
||||
|
||||
let mut value = String::new();
|
||||
let mut escaped = false;
|
||||
let mut end_quote = false;
|
||||
let can_escape = self.it.peek().copied() == Some('"');
|
||||
|
||||
if can_escape {
|
||||
self.it.next();
|
||||
}
|
||||
|
||||
for ch in self.it.by_ref() {
|
||||
if escaped {
|
||||
value.push(ch);
|
||||
escaped = false;
|
||||
continue;
|
||||
}
|
||||
if can_escape {
|
||||
if ch == '\\' {
|
||||
escaped = true;
|
||||
continue;
|
||||
}
|
||||
if ch == '"' {
|
||||
if end_quote {
|
||||
value.push(ch);
|
||||
}
|
||||
end_quote = !end_quote;
|
||||
continue;
|
||||
for line in self.it.by_ref() {
|
||||
let row = Row::new(line, self.delimiter);
|
||||
if row.has_next() {
|
||||
return Some(row);
|
||||
}
|
||||
}
|
||||
if !can_escape || end_quote {
|
||||
if ch == '\r' {
|
||||
continue;
|
||||
None
|
||||
}
|
||||
if ch == self.delimiter {
|
||||
return Some(value);
|
||||
}
|
||||
}
|
||||
value.push(ch);
|
||||
}
|
||||
|
||||
self.ended = true;
|
||||
Some(value)
|
||||
fn size_hint(&self) -> (usize, Option<usize>) {
|
||||
(0, self.it.size_hint().1)
|
||||
}
|
||||
}
|
||||
|
||||
|
|
|
|||
|
|
@ -0,0 +1,85 @@
|
|||
use std::{iter::Peekable, str::Chars};
|
||||
|
||||
|
||||
pub struct Row<'a> {
|
||||
it: Peekable<Chars<'a>>,
|
||||
delimiter: char,
|
||||
ended: bool,
|
||||
}
|
||||
|
||||
impl<'a> Row<'a> {
|
||||
pub fn new(line: &'a str, delimiter: char) -> Self {
|
||||
let mut it = line.chars().peekable();
|
||||
let ended = match it.peek().copied() {
|
||||
Some('\r') => true,
|
||||
Some(_) => false,
|
||||
None => true,
|
||||
};
|
||||
Row { it, ended, delimiter }
|
||||
}
|
||||
}
|
||||
|
||||
impl<'a> Row<'a> {
|
||||
pub fn has_next(&self) -> bool {
|
||||
!self.ended
|
||||
}
|
||||
}
|
||||
|
||||
impl<'a> Iterator for Row<'a> {
|
||||
type Item = String;
|
||||
|
||||
fn next(&mut self) -> Option<Self::Item> {
|
||||
if self.ended {
|
||||
return None;
|
||||
}
|
||||
|
||||
let mut value = String::new();
|
||||
let mut escaped = false;
|
||||
let mut end_quote = false;
|
||||
let can_escape = self.it.peek().copied() == Some('"');
|
||||
|
||||
if can_escape {
|
||||
self.it.next();
|
||||
}
|
||||
|
||||
for ch in self.it.by_ref() {
|
||||
if escaped {
|
||||
value.push(ch);
|
||||
escaped = false;
|
||||
continue;
|
||||
}
|
||||
if can_escape {
|
||||
if ch == '\\' {
|
||||
escaped = true;
|
||||
continue;
|
||||
}
|
||||
if ch == '"' {
|
||||
if end_quote {
|
||||
value.push(ch);
|
||||
}
|
||||
end_quote = !end_quote;
|
||||
continue;
|
||||
}
|
||||
}
|
||||
if !can_escape || end_quote {
|
||||
if ch == '\r' {
|
||||
continue;
|
||||
}
|
||||
if ch == self.delimiter {
|
||||
return Some(value);
|
||||
}
|
||||
}
|
||||
value.push(ch);
|
||||
}
|
||||
|
||||
self.ended = true;
|
||||
Some(value)
|
||||
}
|
||||
fn size_hint(&self) -> (usize, Option<usize>) {
|
||||
match self.ended {
|
||||
true => (0, Some(0)),
|
||||
false => (1, None),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
|
@ -11,12 +11,12 @@ pub struct Header {
|
|||
}
|
||||
|
||||
impl Header {
|
||||
pub fn parse(row: csv::reader::RowReader, winners: usize) -> Result<Header, Box<dyn std::error::Error>> {
|
||||
pub fn parse(row: csv::reader::Row, winners: usize) -> Result<Header, Box<dyn std::error::Error + Sync + Send>> {
|
||||
let mut parties = Vec::<Party>::new();
|
||||
let mut candidates = Vec::<Candidate>::new();
|
||||
let mut parties_lookup = HashMap::<&str, usize>::new();
|
||||
let mut parties_lookup = HashMap::<String, usize>::new();
|
||||
|
||||
for col in row.skip(6).collect_vec().iter() {
|
||||
for col in row.skip(6) {
|
||||
let [party, name] = col.split(':').next_array().ok_or("Missing ':'")?;
|
||||
|
||||
if party == "UG" { // independents
|
||||
|
|
@ -33,7 +33,7 @@ impl Header {
|
|||
});
|
||||
}
|
||||
else {
|
||||
parties_lookup.insert(party, parties.len());
|
||||
parties_lookup.insert(party.to_owned(), parties.len());
|
||||
parties.push(Party::new(name.to_owned()));
|
||||
}
|
||||
}
|
||||
|
|
|
|||
|
|
@ -1,6 +1,7 @@
|
|||
use std::{env, fs};
|
||||
|
||||
use counter::Counter;
|
||||
use csv::reader::CsvReader;
|
||||
use util::Percent;
|
||||
|
||||
pub mod util;
|
||||
|
|
@ -22,10 +23,10 @@ fn main() {
|
|||
(args[1].clone(), args[2].parse::<usize>().unwrap())
|
||||
};
|
||||
|
||||
let csv = String::from_utf8(fs::read(&csv_path).unwrap()).unwrap();
|
||||
let rows = csv.split('\n').enumerate().map(|(i,v)| csv::reader::RowReader::new(v, i, ','));
|
||||
let csv_text = String::from_utf8(fs::read(&csv_path).unwrap()).unwrap();
|
||||
let csv = CsvReader::new(&csv_text, ',');
|
||||
|
||||
let counter = Counter::new(rows, winner_count).unwrap();
|
||||
let counter = Counter::new(csv, winner_count).unwrap();
|
||||
let mut winners = Vec::with_capacity(winner_count);
|
||||
let total = counter.ballots.len() as f64;
|
||||
let header = counter.header.clone();
|
||||
|
|
|
|||
|
|
@ -2,6 +2,8 @@
|
|||
pub mod escape;
|
||||
pub mod percent;
|
||||
pub mod score_item;
|
||||
pub mod mutex;
|
||||
pub mod thread;
|
||||
|
||||
pub use score_item::ScoreItem;
|
||||
pub use escape::{EscapeWriter, EscapeWriterOpts};
|
||||
|
|
|
|||
|
|
@ -0,0 +1,17 @@
|
|||
use std::{slice::{Chunks, ChunksMut}, sync::Mutex};
|
||||
|
||||
|
||||
pub fn chunk_iter<'a,'b,T>(mtx: &'a Mutex<Chunks<'b,T>>) -> impl Iterator<Item=&'b T> {
|
||||
std::iter::repeat_with(|| { mtx.lock().unwrap().next() }).take_while(|v| v.is_some()).filter_map(|v| match v {
|
||||
Some(v) => Some(v.iter()),
|
||||
None => None,
|
||||
}).flatten()
|
||||
}
|
||||
|
||||
pub fn chunk_iter_mut<'a,'b,T>(mtx: &'a Mutex<ChunksMut<'b,T>>) -> impl Iterator<Item=&'b mut T> {
|
||||
std::iter::repeat_with(|| { mtx.lock().unwrap().next() }).take_while(|v| v.is_some()).filter_map(|v| match v {
|
||||
Some(v) => Some(v.iter_mut()),
|
||||
None => None,
|
||||
}).flatten()
|
||||
}
|
||||
|
||||
|
|
@ -0,0 +1,31 @@
|
|||
use std::sync::Mutex;
|
||||
|
||||
#[inline]
|
||||
pub fn spawn_all<F>(f: F) where F: Fn() + Sync {
|
||||
std::thread::scope(|s| {
|
||||
for _ in 0..std::thread::available_parallelism().unwrap().into() {
|
||||
s.spawn(|| f());
|
||||
}
|
||||
});
|
||||
}
|
||||
|
||||
#[inline]
|
||||
pub fn spawn_all_with_result<F, T>(f: F) -> Result<(), T>
|
||||
where
|
||||
F: Fn() -> Result<(), T> + Sync,
|
||||
T: Send
|
||||
{
|
||||
let res = Mutex::new(None);
|
||||
|
||||
spawn_all(|| {
|
||||
if let Err(err) = f() {
|
||||
*res.lock().unwrap() = Some(err);
|
||||
}
|
||||
});
|
||||
|
||||
match res.into_inner().unwrap() {
|
||||
Some(err) => Err(err),
|
||||
None => Ok(()),
|
||||
}
|
||||
}
|
||||
|
||||
Loading…
Reference in New Issue