library(votesmart)The first step to using the votesmart package is to
register an API key and store it in an environment variable by following
these
instructions.
Let’s make sure our API key is set.
# If our key is not registered in this environment variable,
# the result of `Sys.getenv("VOTESMART_API_KEY")` will be `""` (i.e. a string of `nchar` 0)
key <- Sys.getenv("VOTESMART_API_KEY")
key_exists <- (nchar(key) > 0)
if (!key_exists) knitr::knit_exit()We’ll also attach dplyr for working with dataframes.
suppressPackageStartupMessages(library(dplyr))
conflicted::conflict_prefer("filter", "dplyr")
#> [conflicted] Will prefer dplyr::filter over any other package.Some of these functions are necessary precursors to obtain data you
might want. For instance, in order to get candidates’ ratings by SIGs,
you’ll need to get office_level_ids in order to get
office_ids, which is a required argument to get candidate
information using candidates_get_by_office_state. We’ll go
through what might be a typical example of how you might use the
votesmart package.
There are currently three functions for getting data on VoteSmart
candidates: candidates_get_by_lastname,
candidates_get_by_levenshtein, and
candidates_get_by_office_state.
Let’s search for former US House Rep Barney Frank using
candidates_get_by_lastname.
From ?candidates_get_by_lastname, this function’s
defaults are:
candidates_get_by_lastname(
last_names,
election_years = lubridate::year(lubridate::today()),
stage_ids = "",
all = TRUE,
verbose = TRUE
)
Since the default election year is the current year and Barney Frank left office in 2013, we’ll specify a few years in which he ran for office.
(franks <-
candidates_get_by_lastname(
last_names = "frank",
election_years = c(2000, 2004)
)
)
#> Requesting data for {last_name: frank, election_year: 2000, stage_id: }.
#> Requesting data for {last_name: frank, election_year: 2004, stage_id: }.
#> # A tibble: 13 × 32
#> candidate_id first_name nick_name middle_name last_name suffix title
#> <chr> <chr> <chr> <chr> <chr> <chr> <chr>
#> 1 12063 A. T. <NA> <NA> Frank <NA> <NA>
#> 2 26897 Barney <NA> <NA> Frank <NA> <NA>
#> 3 54614 Floyd <NA> <NA> Frank <NA> <NA>
#> 4 36663 Jo Anne <NA> <NA> Frank <NA> <NA>
#> 5 1507 Lonnie Danell <NA> <NA> Frank <NA> <NA>
#> 6 54827 Terrence Terry D. Frank <NA> <NA>
#> 7 26897 Barney <NA> <NA> Frank <NA> <NA>
#> 8 50597 Craig <NA> A. Frank <NA> <NA>
#> 9 37152 Deborah <NA> L. Frank <NA> <NA>
#> 10 50318 Douglas <NA> <NA> Frank <NA> <NA>
#> 11 33210 Keith <NA> R. Frank <NA> <NA>
#> 12 1507 Lonnie Danell <NA> <NA> Frank <NA> <NA>
#> 13 51171 William Bill R. Frank <NA> <NA>
#> # ℹ 25 more variables: ballot_name <chr>, stage_id <chr>, election_year <chr>,
#> # preferred_name <chr>, election_parties <chr>, election_status <chr>,
#> # election_stage <chr>, election_district_id <chr>,
#> # election_district_name <chr>, election_office <chr>,
#> # election_office_id <chr>, election_state_id <chr>,
#> # election_office_type_id <chr>, election_special <lgl>, election_date <chr>,
#> # office_parties <chr>, office_status <chr>, office_district_id <chr>, …Looking at the first_name column, are a number of
non-Barneys returned. We can next filter our results to Barney.
(barneys <-
franks %>%
filter(first_name == "Barney") %>%
select(
candidate_id, first_name, last_name,
election_year, election_state_id, election_office
)
)
#> # A tibble: 2 × 6
#> candidate_id first_name last_name election_year election_state_id
#> <chr> <chr> <chr> <chr> <chr>
#> 1 26897 Barney Frank 2000 MA
#> 2 26897 Barney Frank 2004 MA
#> # ℹ 1 more variable: election_office <chr>The two rows returned correspond to the two
election_years we specified. Each candidate gets their own
unique candidate_id, which we can pull
out.
(barney_id <-
barneys %>%
pull(candidate_id) %>%
unique()
)
#> [1] "26897"One of the most powerful things about VoteSmart is its wealth of information about candidates’ positions on issues as rated by a number of Special Interest Groups, or SIGs.
Given a candidate_id, we can ask for those ratings using
rating_get_candidate_ratings.
(barney_ratings <-
rating_get_candidate_ratings(
candidate_ids = barney_id,
sig_ids = "" # All SIGs
)
)
#> Requesting data for {candidate_id: 26897, sig_id: }.
#> # A tibble: 1,640 × 19
#> rating_id candidate_id sig_id rating rating_name timespan rating_text
#> <chr> <chr> <chr> <chr> <chr> <chr> <chr>
#> 1 8661 26897 2419 63 Positions 2013-20… Barney Fra…
#> 2 6598 26897 1985 9 Lifetime Positions 2013 Bsed on le…
#> 3 6093 26897 1578 100 Lifetime Positions… 2012 <NA>
#> 4 6305 26897 2086 0 Positions 2012 <NA>
#> 5 6408 26897 2023 75 Positions 2012 <NA>
#> 6 6481 26897 1084 91 Positions 2012 <NA>
#> 7 6616 26897 2159 75 Positions on Techn… 2012 Barney Fra…
#> 8 6642 26897 230 50 Positions 2012 Barney Fra…
#> 9 6725 26897 1734 21 Positions 2012 Barney Fra…
#> 10 6732 26897 329 92 Global Issues Score 2012 Barney Fra…
#> # ℹ 1,630 more rows
#> # ℹ 12 more variables: category_id_1 <chr>, category_name_1 <chr>,
#> # category_id_2 <chr>, category_name_2 <chr>, category_id_3 <chr>,
#> # category_name_3 <chr>, category_id_4 <chr>, category_name_4 <chr>,
#> # category_id_5 <chr>, category_name_5 <chr>, category_id_6 <chr>,
#> # category_name_6 <chr>There are a lot of columns here because some ratings are tagged with multiple categories.
main_cols <- c("rating", "category_name_1", "sig_id", "timespan")We’ll filter to Barney’s ratings on the environment using just the first category name.
(barney_on_env <-
barney_ratings %>%
filter(category_name_1 == "Environment") %>%
select(main_cols)
)
#> Warning: Using an external vector in selections was deprecated in tidyselect 1.1.0.
#> ℹ Please use `all_of()` or `any_of()` instead.
#> # Was:
#> data %>% select(main_cols)
#>
#> # Now:
#> data %>% select(all_of(main_cols))
#>
#> See <https://tidyselect.r-lib.org/reference/faq-external-vector.html>.
#> This warning is displayed once every 8 hours.
#> Call `lifecycle::last_lifecycle_warnings()` to see where this warning was
#> generated.
#> # A tibble: 39 × 4
#> rating category_name_1 sig_id timespan
#> <chr> <chr> <chr> <chr>
#> 1 92 Environment 1012 2012
#> 2 89 Environment 1012 2012
#> 3 91 Environment 1012 2011-2012
#> 4 100 Environment 1938 2011-2012
#> 5 88 Environment 1826 2011-2012
#> 6 71 Environment 922 2011-2012
#> 7 94 Environment 1012 2011
#> 8 92 Environment 1012 2011
#> 9 100 Environment 1197 2011
#> 10 96 Environment 1826 2011
#> # ℹ 29 more rowsSomething to be aware of is that some SIGs give ratings as letter grades:
barney_ratings %>%
filter(
stringr::str_detect(rating, "[A-Z]")
) %>%
select(rating, category_name_1)
#> # A tibble: 26 × 2
#> rating category_name_1
#> <chr> <chr>
#> 1 F Guns
#> 2 A Foreign Affairs
#> 3 F Social
#> 4 F Guns
#> 5 F- Guns
#> 6 A Foreign Affairs
#> 7 A+ Foreign Affairs
#> 8 F Fiscally Conservative
#> 9 C Foreign Affairs
#> 10 F Immigration
#> # ℹ 16 more rowsBut using just Barney’s number grades, we can get his average rating
on this category per timespan:
barney_on_env %>%
group_by(timespan) %>%
summarise(
avg_rating = mean(as.numeric(rating), na.rm = TRUE)
) %>%
arrange(desc(timespan))
#> # A tibble: 23 × 2
#> timespan avg_rating
#> <chr> <dbl>
#> 1 2012 90.5
#> 2 2011-2012 87.5
#> 3 2011 95.5
#> 4 2010 86
#> 5 2009-2010 83.5
#> 6 2009 100
#> 7 2008 92
#> 8 2007-2008 88
#> 9 2007 90
#> 10 2006 100
#> # ℹ 13 more rowsKeep in mind that these are ratings given by SIGs, which often have very different baseline stances on issues. For example, a pro-life group might give a candidate a rating of 0 whereas a pro-choice group might give that same candidate a 100.
barney_ratings %>%
filter(category_name_1 == "Abortion") %>%
select(
rating, sig_id, category_name_1
)
#> # A tibble: 36 × 3
#> rating sig_id category_name_1
#> <chr> <chr> <chr>
#> 1 100 1016 Abortion
#> 2 0 252 Abortion
#> 3 100 1016 Abortion
#> 4 0 252 Abortion
#> 5 0 1195 Abortion
#> 6 100 1016 Abortion
#> 7 0 1086 Abortion
#> 8 0 252 Abortion
#> 9 100 1016 Abortion
#> 10 0 1086 Abortion
#> # ℹ 26 more rowsWhen it comes to the Special Interest Groups themselves, the result
of rating_get_candidate_ratings only supplies us with a
sig_id.
We can get more information about these SIGs given these IDs with
rating_get_sig.
(some_sigs <-
barney_ratings %>%
pull(sig_id) %>%
unique() %>%
sample(3)
)
#> [1] "1654" "989" "253"rating_get_sig(
sig_ids = some_sigs
)
#> Requesting data for {sig_id: 1654}.
#> Requesting data for {sig_id: 989}.
#> Requesting data for {sig_id: 253}.
#> # A tibble: 3 × 14
#> sig_id name description state_id address city state zip phone_1 phone_2
#> <chr> <chr> <chr> <chr> <chr> <chr> <chr> <chr> <chr> <chr>
#> 1 1654 Nationa… The Nation… <NA> Post O… Wash… DC 20091 202-78… <NA>
#> 2 989 Nationa… The Nation… <NA> 1605 K… Alex… VA 22314 703-52… <NA>
#> 3 253 Nationa… The Nation… <NA> 25 Mas… Wash… DC 20001 202-39… <NA>
#> # ℹ 4 more variables: fax <chr>, email <chr>, url <chr>, contact_name <chr>Or, if we don’t yet know any sig_ids, we can get a
dataframe of them with the function
rating_get_sig_list.
That function requires a vector of issue category_ids,
however, so let’s first get a vector of some
category_ids.
(category_df <-
rating_get_categories(
state_ids = NA # NA for national
) %>%
distinct() %>%
sample_n(nrow(.)) # Sampling so we can see multiple categories in the 10 rows shown here
)
#> Beginning to get categories for state NA.
#> # A tibble: 40 × 3
#> category_id name state_id
#> <chr> <chr> <chr>
#> 1 67 Unemployed and Low-Income <NA>
#> 2 71 Death Penalty <NA>
#> 3 68 Women <NA>
#> 4 11 Business and Consumers <NA>
#> 5 66 Veterans <NA>
#> 6 12 Elections <NA>
#> 7 73 Gambling and Gaming <NA>
#> 8 39 Housing and Property <NA>
#> 9 20 Criminal Justice <NA>
#> 10 29 Energy <NA>
#> # ℹ 30 more rowsNow we can get our dataframe of SIGs given some categories.
(some_categories <- category_df$category_id %>% sample(3))
#> [1] "64" "71" "13"(sigs <-
rating_get_sig_list(
category_ids = some_categories,
state_ids = NA
) %>%
select(sig_id, name, category_id, state_id) %>%
sample_n(nrow(.))
)
#> Requesting data for {category_id: 64, state_id: NA}.
#> Requesting data for {category_id: 71, state_id: NA}.
#> Requesting data for {category_id: 13, state_id: NA}.
#> # A tibble: 58 × 4
#> sig_id name category_id state_id
#> <chr> <chr> <chr> <chr>
#> 1 1268 United Automobile, Aerospace and Agricultural Im… 64 <NA>
#> 2 2024 National Religious Campaign Against Torture 13 <NA>
#> 3 1378 American Civil Liberties Union (ACLU) 13 <NA>
#> 4 1419 Center for International Policy 13 <NA>
#> 5 545 National LGBTQ Task Force 13 <NA>
#> 6 847 Transportation Communications Union 64 <NA>
#> 7 3107 One Fair Wage Action 13 <NA>
#> 8 2550 Emgage Action 13 <NA>
#> 9 3127 Moms In Office 13 <NA>
#> 10 318 Campaign for a Color Blind America 13 <NA>
#> # ℹ 48 more rowsWe already have the category names corresponding to those
category_ids in our category_df, so we can
join category_df onto sigss to attach
category_name_1s to each of those SIGs.
sigs %>%
rename(
sig_name = name
) %>%
left_join(
category_df,
by = c("state_id", "category_id")
) %>%
rename(
category_name_1 = name
) %>%
sample_n(nrow(.))
#> # A tibble: 58 × 5
#> sig_id sig_name category_id state_id category_name_1
#> <chr> <chr> <chr> <chr> <chr>
#> 1 3020 RootsAction 13 <NA> Civil Libertie…
#> 2 1059 League of United Latin American … 13 <NA> Civil Libertie…
#> 3 545 National LGBTQ Task Force 13 <NA> Civil Libertie…
#> 4 3127 Moms In Office 13 <NA> Civil Libertie…
#> 5 978 Americans for Democratic Action … 13 <NA> Civil Libertie…
#> 6 3001 Brand New Congress 13 <NA> Civil Libertie…
#> 7 1419 Center for International Policy 13 <NA> Civil Libertie…
#> 8 2983 RAICES Action Fund 13 <NA> Civil Libertie…
#> 9 3126 The Jewish Vote 13 <NA> Civil Libertie…
#> 10 437 Friends Committee on National Le… 13 <NA> Civil Libertie…
#> # ℹ 48 more rowsFor more info or to report a bug to VoteSmart, please refer to the VoteSmart API docs!