%%% Senate_estimator.m – a MATLAB script

%%% Copyright 2008, 2014 by Samuel S.-H. Wang

%%% Noncommercial-use-only license:

%%% You may use or modify this software, but only for noncommercial purposes.

%%% To seek a commercial-use license, contact the author at sswang@princeton.edu.

% Likelihood analysis of all possible outcomes of election based

% on the meta-analytical methods of Prof. Sam Wang, Princeton University.

%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%

% Senate_estimator.m

%

% This script loads ‘poll.median.2014Senate.txt’ and generates or updates/replaces 4 CSV files:

%

% Senate_estimates.csv

% all in one line:

% 2 values – median_seats for the two parties, where the first value is for Democrats/Independents

% 1 values – Dem/Ind control probability

% 3 values – assigned (>95% prob) seats for each party, with a third entry for undecided;

% 4 values – confidence intervals for party 1’s EV: +/-1 sigma, then

% 95% band; and

% 1 value – number of state polls used to make the estimates.

% 1 value – (calculated by Senate_metamargin and appended) the meta-margin.

%

% Another file, Senate_estimate_history, is updated with the same

% information as Senate_estimates.csv plus 1 value for the date.

%

% stateprobs.csv

% An N-line file giving percentage probabilities for Dem/Ind win of the popular vote, state by state.

% Note that this is the same as the EV calculation, except 1 seats per race

% The second field on each line is the current median polling margin.

% The third field on each line is the two-letter postal abbreviation.

%

% Senate_histogram.csv

% A 100-line file giving the probability histogram of each seat-count outcome. Line 1 is

% the probability of party #1 (Democrats/Independents) getting 1 seat. Line 2 is 2 seat, and so on.

% Note that 0 seat is left out of this histogram for ease of indexing.

%

%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%

% This routine expects the global variables biaspct and analysisdate

%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%

%%%%%%%%%%%%%%%%%%%%%%%%% Initialize variables %%%%%%%%%%%%%%%%%%%%%

%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%

% polls.state=[

% ‘AL,AK,AZ,AR,CA,CO,CT,DC,DE,FL,GA,HI,ID,IL,IN,IA,KS,KY,LA,ME,MD,MA,MI,MN,MS,MO,MT,NE,NV,NH,NJ,NM,NY,NC,ND,OH,OK,OR,PA,RI,SC,SD,TN,TX,UT,VT,VA,WA,WV,WI,WY ‘];

polls.state=[‘AK,AR,CO,GA,HI,IA,KS,KY,LA,MI,MN,MS,MT,NC,NH,OR,SD,VA,WV ‘]; % 19 races

%AK Begich Sullivan

%AR Pryor Cotton

%CO Udall Gardner

%GA Nunn Kingston

%HI Schatz

%IA Braley Ernst

%KS Roberts Taylor

%KY Grimes McConnell

%LA Landrieu Cassidy

%MI Peters Land

%MN Franken McFadden

%MS Childers Cochran

%MT Walsh Daines

%NC Hagan Tillis

%NH Shaheen Brown

%OR Merkley Wehby

%SD Weiland Rounds

%VA Warner Gillespie

%WV Tennant Capito

polls.EV=ones(1, length(polls.state)/3);

num_states=size(polls.EV,2);

assignedEV(3)=sum(polls.EV);

assignedEV(1)=40; assignedEV(2)=41; % these are the seats not up for election

Demsafe=assignedEV(1);

% 1=Dem, 2=GOP, 3=up for election

% checksum to make sure no double assignment or missed assignment

if (sum(assignedEV)~=100)

warning(‘Warning: Senate seats do not sum to 100!’)

assignedEV

end

if ~exist(‘biaspct’,’var’)

biaspct=0;

end

forhistory=biaspct==0;

if ~exist(‘analysisdate’,’var’)

analysisdate=0;

end

if ~exist(‘metacalc’,’var’)

metacalc=1;

end

%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%

%%%%%%%%%%%%%%%%%%%%%% Load and parse polling data %%%%%%%%%%%%%%%%%

%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%

polldata=load(‘polls.median.2014Senate.txt’);

numlines = size(polldata,1);

if mod(numlines,num_states)>0

warning(‘Warning: polls.median.txt is not a multiple of num_states lines long’);

end

% Currently we are using median and effective SEM of the last 3 polls.

% To de-emphasize extreme outliers, in place of SD we use (median absolute deviation)/0.6745

% find the desired data within the file

if analysisdate>0 && numlines>num_states

foo=find(polldata(:,5)==analysisdate,1,’first’);

% ind=min([size(polldata,1)-50 foo’]);

foo2=find(polldata(:,5)==max(polldata(:,5)),1,’first’);

ind=max([foo2 foo]); %assume reverse time order

polldata=polldata(ind:ind+50,:);

clear foo2 foo ind

elseif numlines>num_states

% polldata = polldata(numlines-num_states+1:numlines,:);

polldata = polldata(1:num_states,:);

end

% Use statistics from data file

polls.margin=polldata(:,3)’;

polls.SEM=polldata(:,4)’;

polls.SEM=max(polls.SEM,zeros(1,num_states)+2)

totalpollsused=sum(polldata(:,1))-1 % assume DC has no polls

% mock data in case we ever need to do a dry run

% Use three poll (as of 23 July)

%polls.margin=[-14 -7 -10 -10 24 7 20 81 9 -2 -9 30 -13 13 1 10 -20 -16 -19 10 13 16 5 18 -6 0 5 -16 2 3 11 5 13 -4 0 -6 -14 9 4 24 -9 -4 -15 -9 -24 34 0 12 -8 11 -13];

%polls.SEM=zeros(1,num_states)+3;

%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%

%%%%%%%%%%%%%%%%%%%%%%% Where the magic happens! %%%%%%%%%%%%%%%%%%%

%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%

Senate_median

stateprobs

%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%

%%%%%%%%%%%%%%%%%%%%%%%%%% Plot the histogram %%%%%%%%%%%%%%%%%%%%%%

%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%

close

phandle=plot([49.5 49.5],[0 max(histogram)*105],’-r’,’LineWidth’,1.5);

EVticks=200:20:380;

grid on

hold on

bar(Senateseats(5:9),histogram(5:9)*100,’r’)

bar(Senateseats(10:14),histogram(10:14)*100,’b’)

axis([Senateseats(5)-0.5 Senateseats(14)+0.5 0 max(histogram)*105])

xlabel(‘Democratic/Independent Senate seats’,’FontSize’,14);

ylabel(‘Probability (%)’,’FontSize’,14)

set(gcf, ‘InvertHardCopy’, ‘off’);

title(‘Distribution of all possible outcomes’,’FontSize’,14)

Dstr=[‘D control: ‘,num2str(round(D_Senate_control_probability*100)),’%’];

Rstr=[‘R control: ‘,num2str(round(R_Senate_control_probability*100)),’%’];

text(Senateseats(5)-0.35,max(histogram)*99,Rstr,’FontSize’,18)

text(Senateseats(14)-3.2,max(histogram)*99,Dstr,’FontSize’,18)

if analysisdate==0

datelabel=datestr(now);

else

datelabel=datestr(analysisdate);

end

text(44.6,max(histogram)*92,datelabel(1:6),’FontSize’,12)

text(44.6,max(histogram)*86,’election.princeton.edu’,’FontSize’,12)

if biaspct==0

set(gcf,’PaperPositionMode’,’auto’)

print -djpeg EV_histogram_today.jpg

end

confidenceintervals(3)=Senateseats(find(cumulative_prob<=0.025,1,’last’)); % 95-pct lower limit

confidenceintervals(1)=Senateseats(find(cumulative_prob<=0.15865,1,’last’)); % 1-sigma lower limit confidenceintervals(2)=Senateseats(find(cumulative_prob>=0.84135,1,’first’)); % 1-sigma upper limit

confidenceintervals(4)=Senateseats(find(cumulative_prob>=0.975,1,’first’)); % 95-pct upper limit

mode_seats(1)=find(histogram==max(histogram));

median_seats(2)=100-median_seats(1); % assume no seats go to a third candidate

mode_seats(2)=100-mode_seats(1); % assume no seats go to a third candidate

% Re-calculate safe EV for each party

assignedEV(1)=sum(polls.EV(find(stateprobs>=95)));

assignedEV(2)=sum(polls.EV(find(stateprobs<=5)));

assignedEV(3)=100-assignedEV(1)-assignedEV(2);

uncertain=intersect(find(stateprobs<95),find(stateprobs>5));

uncertainstates=”;

for i=1:max(size(uncertain))

uncertainstates=[uncertainstates statename2(uncertain(i),polls.state) ‘ ‘];

end

%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%

%%%%%%%%%%%%%%%%%%%%%%%%%%% Daily file update %%%%%%%%%%%%%%%%%%%%%%

%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%

% Write a file of unbiased statewise percentage probabilities

% Only write this file if bias is zero!

outputs=[median_seats D_Senate_control_probability assignedEV confidenceintervals totalpollsused];

if biaspct==0

% Export probability histogram:

dlmwrite(‘Senate_histogram.csv’,histogram’)

% Export state-by-state percentage probabilities as CSV, with 2-letter state abbreviations:

% Each line includes hypothetical probabilities for D+2% and R+2% biases

if exist(‘stateprobs.csv’,’file’)

delete(‘stateprobs.csv’)

end

foo=(polls.margin+2)./polls.SEM;

D2probs=round((erf(foo/sqrt(2))+1)*50);

foo=(polls.margin-2)./polls.SEM;

R2probs=round((erf(foo/sqrt(2))+1)*50);

for i=1:num_states

foo=[num2str(stateprobs(i)) ‘,’ num2str(polls.margin(i)) ‘,’ num2str(D2probs(i)) ‘,’ num2str(R2probs(i)) ‘,’ statename2(i,polls.state)];

dlmwrite(‘stateprobs.csv’,foo,’-append’,’delimiter’,”)

end

end

%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%

%%%%%%%%%%%%%%%%%%%%%%%%%%% The meta-margin %%%%%%%%%%%%%%%%%%%%%%%%

%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%

reality=R_Senate_control_probability;

if metacalc==0

metamargin=-999;

else

foo=biaspct;

% biaspct=round((269-median_seats(1))/1.25)/10-2; % clever way to start

% range

biaspct=-7; % just brute force

Senate_median

while median_seats(1) < 50

biaspct=biaspct+.02;

Senate_median

end

metamargin=-biaspct

biaspct=foo;

clear foo

end

%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%

%%%%%%%%%%%%%%%%%%%%%% Daily and History Update %%%%%%%%%%%%%%%%%%%%

%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%

outputs = [outputs metamargin];

dlmwrite(‘Senate_estimates.csv’, outputs)

if forhistory && size(polldata,2)==5

dlmwrite(‘Senate_estimate_history.csv’,[polldata(1,5) outputs],’-append’)

end