U.S. Census Data API in R#
by Michael T. Moen and Adam M. Nguyen
The U.S. Census Data API provides programmatic access to demographic, economic, and geographic data collected by the U.S. Census Bureau. It enables users to retrieve and analyze a wide variety of data sets, including Census surveys and population statistics.
Please see the following resources for more information on API usage:
Documentation
Terms
Data Reuse
NOTE: Please see access details and rate limit requests for this API in the official documentation.
These recipe examples were tested on March 23, 2026.
Setup#
Load Libraries#
The following packages need to be installed into your environment to run the code examples in this tutorial. These packages can be installed with install.packages().
We load the libraries used in this tutorial below:
library(httr)
library(jsonlite)
Import API Key#
An API key is required to access the U.S. Census Data API. You can sign up for one at the Key Signup page.
We keep our token in a .Renviron file that is stored in the working directory and use Sys.getenv() to access it. The .Renviron should have an entry like the one below.
CENSUS_API_KEY="PUT_YOUR_API_KEY_HERE"
Below, we can test to whether the key was successfully imported.
if (nzchar(Sys.getenv("CENSUS_API_KEY"))) {
print("API key successfully loaded.")
} else {
warning("API key not found or is empty.")
}
## [1] "API key successfully loaded."
1. Get Population Estimates of Counties by State#
Note: This data includes the District of Columbia and Puerto Rico
For obtaining data from the Census API, it is helpful to first obtain a list of state IDs:
# Set the base URL that will be used throughout this tutorial
BASE_URL <- "https://api.census.gov/data/"
# The parameters specify what data we want to retrieve
params <- list(
get = "NAME",
`for` = "state:*", # This will grab the names of all states in the US
key = Sys.getenv("CENSUS_API_KEY")
)
year <- 2019
# Make the request to the Census API
response <- GET(paste0(BASE_URL, year, "/pep/population"), query = params)
# Get the JSON data from the response
states <- data.frame(fromJSON(rawToChar(response$content)))
# Print first 6 rows of the resulting data frame
head(states)
## X1 X2
## 1 NAME state
## 2 Alabama 01
## 3 Alaska 02
## 4 Arizona 04
## 5 Arkansas 05
## 6 California 06
# Add rownames and drop the first row
names(states) <- c("state", "fips_state")
states <- states[-1, ]
head(states)
## state fips_state
## 2 Alabama 01
## 3 Alaska 02
## 4 Arizona 04
## 5 Arkansas 05
## 6 California 06
## 7 Colorado 08
params <- list(
get = "NAME,POP",
`for` = "county:*",
key = Sys.getenv("CENSUS_API_KEY")
)
year <- 2019
response <- GET(paste0(BASE_URL, year, "/pep/population"), query = params)
raw_df <- data.frame(fromJSON(rawToChar(response$content)))
names(raw_df) <- raw_df[1, ]
raw_df <- raw_df[-1, ]
df <- data.frame(
# Merge State and County FIPS codes
fips = paste0(raw_df$state, raw_df$county),
# Split County and State name into different columns
county = sub(",.*$", "", raw_df$NAME),
state = sub("^[^,]*,\\s*", "", raw_df$NAME),
pop_2019 = as.integer(raw_df$POP)
)
# Print number of rows in the data frame (one per U.S. county)
nrow(df)
## [1] 3220
# Print first 6 rows of the result data frame
head(df)
## fips county state pop_2019
## 1 17051 Fayette County Illinois 21336
## 2 17107 Logan County Illinois 28618
## 3 17165 Saline County Illinois 23491
## 4 17127 Massac County Illinois 13772
## 5 18069 Huntington County Indiana 36520
## 6 18075 Jay County Indiana 20436
# Filter data frame by state
head(df[df$state == "Missouri", ])
## fips county state pop_2019
## 219 29041 Chariton County Missouri 7426
## 220 29201 Scott County Missouri 38280
## 221 29073 Gasconade County Missouri 14706
## 222 29186 Ste. Genevieve County Missouri 17894
## 223 29067 Douglas County Missouri 13185
## 224 29083 Henry County Missouri 21824
# Filter counties over a certain population threshold
df[df$pop_2019 > 3000000, ]
## fips county state pop_2019
## 204 06073 San Diego County California 3338330
## 347 06059 Orange County California 3175692
## 703 17031 Cook County Illinois 5150233
## 1198 04013 Maricopa County Arizona 4485414
## 1876 06037 Los Angeles County California 10039107
## 2771 48201 Harris County Texas 4713325
2. Get Population Estimates Over a Range of Years#
We can use similar code as before, but now loop through different population estimate datasets by year. Here are the specific endpoints used:
params <- list(
get = "GEONAME,POP",
`for` = "county:*",
key = Sys.getenv("CENSUS_API_KEY")
)
for (year in 2015:2018) {
response <- GET(paste0(BASE_URL, year, "/pep/population"), query = params)
Sys.sleep(1)
# Process data from the response
raw_df <- data.frame(fromJSON(rawToChar(response$content)))
names(raw_df) <- raw_df[1, ]
raw_df <- raw_df[-1, ]
raw_df <- data.frame(
# Merge State and County FIPS codes
fips = paste0(raw_df$state, raw_df$county),
# Split County and State name into different columns
pop = as.integer(raw_df$POP)
)
# Rename the pop column to contain the year
names(raw_df)[names(raw_df) == "pop"] <- paste0("pop_", year)
# Merge the new data with the overall data frame
df <- merge(df, raw_df, by = "fips")
}
# Reorder columns
df <- df[, c("state", "county", "fips", "pop_2015", "pop_2016", "pop_2017",
"pop_2018", "pop_2019")]
# Print updated data frame
head(df)
## state county fips pop_2015 pop_2016 pop_2017 pop_2018 pop_2019
## 1 Alabama Autauga County 01001 55347 55416 55504 55601 55869
## 2 Alabama Baldwin County 01003 203709 208563 212628 218022 223234
## 3 Alabama Barbour County 01005 26489 25965 25270 24881 24686
## 4 Alabama Bibb County 01007 22583 22643 22668 22400 22394
## 5 Alabama Blount County 01009 57673 57704 58013 57840 57826
## 6 Alabama Bullock County 01011 10696 10362 10309 10138 10101
3. Plot Population Change#
This data is based off the 2021 Population Estimates dataset.
The percentage change in population is from July 1, 2020 to July 1, 2021 for states (including the District of Columbia and Puerto Rico).
params <- list(
get = "NAME,POP_2021,PPOPCHG_2021",
`for` = "state:*",
key = Sys.getenv("CENSUS_API_KEY")
)
year <- 2021
response <- GET(paste0(BASE_URL, year, "/pep/population"), query = params)
data <- data.frame(fromJSON(rawToChar(response$content)))
data <- data[-1, ]
# Print number of results
nrow(data)
## [1] 52
# Rename columns
names(data) <- c("state", "pop_2021", "percent_pop_change_2021", "fips_state")
# Sort data frame alphabetically by state
data <- data[order(data$state), ]
# Convert state to a factor for plotting
data$state <- factor(data$state, levels = rev(sort(data$state)))
# Print first 6 states
head(data)
## state pop_2021 percent_pop_change_2021 fips_state
## 50 Alabama 5039877 0.2999918604 01
## 53 Alaska 732673 0.0316749062 02
## 47 Arizona 7276316 1.3698828613 04
## 14 Arkansas 3025891 0.4534511286 05
## 21 California 39237836 -0.6630474360 06
## 30 Colorado 5812069 0.4799364073 08
# Expand left margin
par(mar = c(3, 7, 3, 3))
# Make a scatter plot
plot(
data$percent_pop_change_2021,
data$state,
pch = 21,
bg = adjustcolor("#1f77b4", 0.7),
col = "white",
cex = 1.2,
xlab = "% Population Change",
ylab = "",
main = "Population Change from 2020 to 2021",
frame.plot = FALSE,
yaxt = "n"
)
# Add line at x = 0
abline(v = 0, col = "gray", lty = 2)
# Add state labels along y-axis
axis(
side = 2,
at = seq_along(levels(data$state)),
labels = levels(data$state),
las = 2,
cex.axis = 0.7
)
