Opinion polls for the 2021 German elections

The Guardian newspaper has an election poll tracker for the upcoming German election. The list of the opinion polls since Jan 2021 can be found at Wikipedia .

The following code will scrape the wikipedia page and import the table in a dataframe.

url <- "https://en.wikipedia.org/wiki/Opinion_polling_for_the_2021_German_federal_election"
# https://www.economist.com/graphic-detail/who-will-succeed-angela-merkel
# https://www.theguardian.com/world/2021/jun/21/german-election-poll-tracker-who-will-be-the-next-chancellor


# get tables that exist on wikipedia page 
tables <- url %>% 
  read_html() %>% 
  html_nodes(css="table")


# parse HTML tables into a dataframe called polls 
# Use purr::map() to create a list of all tables in URL
polls <- map(tables, . %>% 
             html_table(fill=TRUE)%>% 
             janitor::clean_names())


# list of opinion polls
german_election_polls <- polls[[1]] %>% # the first table on the page contains the list of all opinions polls
  slice(2:(n()-1)) %>%  # drop the first row, as it contains again the variable names and last row that contains 2017 results
  mutate(
         # polls are shown to run from-to, e.g. 9-13 Aug 2021. We keep the last date, 13 Aug here, as the poll date
         # and we extract it by picking the last 11 characters from that field
         end_date = str_sub(fieldwork_date, -11),
         
         # end_date is still a string, so we convert it into a date object using lubridate::dmy()
         end_date = dmy(end_date),
         
         # we also get the month and week number from the date, if we want to do analysis by month- week, etc.
         month = month(end_date),
         week = isoweek(end_date)
         )
german_election_polls %>% select(union,spd,af_d, fdp, linke, grune, end_date)%>%
  pivot_longer(.,cols= c(union,spd,af_d,fdp,linke,grune), 
               names_to = "Political_Parties",values_to = "val") %>%
  ggplot(aes(x= end_date, y= val, fill= Political_Parties, 
             colour= Political_Parties))+
  geom_point(alpha=0.5)+
  geom_smooth(se=FALSE)+
  scale_y_continuous(labels=function(x) paste0(x,"%"))+
  scale_x_date(date_labels = "%b %y")+
  xlab("Date")+
  ylab("Percentage of votes")+
  labs(title= "German Election Polls 2021")+
  scale_color_manual(values=c("#0099FF", "#FFFF00", "#339933", "#CC0099", 
                              "#FF3300", "#000000"))+ 
  theme(plot.title=element_text(hjust=0.5))+
  knitr::opts_chunk$set(fig.width=unit(10,"cm"), fig.height=unit(10,"cm"))