Introduction

This data notebook contains the analysis that generated facts in the story US deems migrant seafood workers ‘essential’ but limits their COVID-19 protections. For each sentence in the story generated by original data analysis, we have provided the corresponding code and results.

Load Libraries, Settings and Global Functions

# For general data science
library(tidyverse)

# For Data Cleaning
library(janitor)

# For loading Excel Files
library(readxl)

# For working with datetime
library(lubridate)

# For pulling Bureau of Labor Statistics data
library(blscrapeR)

# For pretty Tables
library(DT)
library(kableExtra)
library(knitr)

## Function for formatted table output
output_formatted_table = function(table, text){
  
  table %>% 
    kable(caption = text) %>%
    kable_styling(bootstrap_options = c("striped", "hover", "condensed", "responsive"), font_size = 14, fixed_thead = T) %>%
    scroll_box(width = "100%")
}

Load and Clean Data

Federal H-2B Data

We examined H-2B data published by the Department of Labor. We looked at H-2B performance data from fiscal year 2017 to fiscal year 2020. The dataset provides information about employers’ H-2B applications to the department’s Office of Foreign Labor Certification. Each fiscal year covers applications from Oct. 1 of the year prior through Sept. 30 of the year reported. For example, FY 2020 covers Oct. 1, 2019 to Sept. 30, 2020. The data can be downloaded from the Department of Labor’s website.

## Load 2017-2020 H2-B data

# Data for 2020 
h2b_federal <- read_excel("data/H2B_Disclosure_Data_FY2020.xlsx")%>%
  clean_names()

# Data for 2019
H2B_2019 <- read_excel("data/H2B_2019.xlsx")%>%
  clean_names()

# Data for 2018
H2B_2018 <- read_excel("data/H2B_2018.xlsx")%>%
  clean_names()

# Data for 2017
H2B_2017 <- read_excel("data/H2B_2017.xlsx")%>%
  clean_names()

## All years, joined

# Create truncated h2b 2017 dataframe 
h2b_2017_select_columns<- H2B_2017%>%
  select(total_workers_certified = nbr_workers_certified, naics_code)%>%
  mutate(fy = "2017")

# Create truncated h2b 2018 dataframe 
h2b_2018_select_columns<- H2B_2018%>%
  select(total_workers_certified = nbr_workers_certified, naics_code)%>%
  mutate(fy = "2018")

# Create truncated h2b 2019 dataframe 
h2b_2019_select_columns<- H2B_2019%>%
  select(total_workers_certified = nbr_workers_certified, naics_code)%>%
  mutate(fy = "2019")

# Create truncated h2b 2020 dataframe
h2b_2020_select_columns<- h2b_federal%>%
  select(total_workers_certified, naics_code)%>%
  mutate(fy = "2020")

# Bind new dataframes
h2b_federal_all_years<- h2b_2017_select_columns%>%
  bind_rows(h2b_2018_select_columns)%>%
  bind_rows(h2b_2019_select_columns)%>%
  bind_rows(h2b_2020_select_columns)%>%
  mutate(total_workers_certified=replace_na(total_workers_certified,0))

Federal OSHA Complaint Data

We examined COVID-19 complaints published by the federal Occupational Safety and Health Administration. The data includes all valid complaints received by OSHA since the start of the pandemic. Federal and state OSHA offices submit their data to the federal database. However, certain data may be unique to the state’s operating system or may not be transferred to federal OSHA. This means that, while state OSHA complaints are accessible via the federal OSHA website, totals may not always be comprehensive. OSHA updates the complaints data weekly.Variable names involving “OIS,” which stands for OSHA Information System, indicate the federal database. The data can be downloaded from OSHA’s website.

# Load the complaints to OSHA
ois_closed_complaints_20210328 = read_excel("../../../etl/federal_osha_complaints/input_data/Closed_Federal_State_Plan_Valid_COVID-19_Complaints_Through_0328_2021.xlsx")
ois_open_complaints_20210328 = read_excel("../../../etl/federal_osha_complaints/input_data/Open_Federal_State_Plan_Valid_COVID-19_Complaints_Through_0328_2021.xlsx")

## Combine open and closed complaints

# Clean open complaints --------------------
open_complaints <- ois_open_complaints_20210328  %>%
  # Format the data set — take out the first two rows because they aren't row headings
  remove_empty() %>%
  slice(-1) %>%
  # Bring row headings to the first line of the data set
  row_to_names(1) %>%
  clean_names() %>%
  # Create a new column to check the status of the complaint -- open or closed
  mutate(complaint_status = "open_complaint")


# Clean closed complaints --------------------
closed_complaints <- ois_closed_complaints_20210328 %>%
  # Same as part 1 -- take out the first two rows because they aren't row headings
  remove_empty() %>%
  slice(-1) %>%
  # Bring row headings to the first line of the data set
  row_to_names(1) %>%
  clean_names() %>%
  # Create a new column to check the status of the complaint -- open or closed
  mutate(complaint_status = "closed_complaint")


# Combine open and closed complaints --------------------
federal_osha_complaints_data <- closed_complaints %>%
  # Use bind_rows to join the two data sets
  bind_rows(open_complaints) %>%
  # Convert dates to a number
  mutate(upa_receipt_date = as.numeric(upa_receipt_date)) %>%
  # Use the "excel_numeric_to_date" function to return an actual date
  mutate(upa_receipt_date = excel_numeric_to_date(upa_receipt_date)) %>%
  # Separate the number of alleged hazards and number of employees exposed into two columns
  separate(number_alleged_hazards_emp_exposed, sep = "/", into = c("number_alleged_hazards","number_employees_exposed"))%>%
  # Remove spaces after alleged hazards using str_trim
  mutate(number_alleged_hazards = str_trim(number_alleged_hazards, side = "right")) %>%
  # Columns are stored as characters, so convert them to numbers
  mutate(number_alleged_hazards = as.numeric(number_alleged_hazards)) %>%
  mutate(number_employees_exposed = as.numeric(number_employees_exposed)) %>%
  # Separate the two NAICS codes into two columns: primary_site_naics_1 and primary_site_naics_2
  separate(primary_site_naics, sep = "/", into = c("primary_site_naics_1","primary_site_naics_2"))%>%
  # Remove spaces before and after the codes using str_trim
  mutate(primary_site_naics_1 = str_trim(primary_site_naics_1, side = "both"))%>%
  mutate(primary_site_naics_2 = str_trim(primary_site_naics_2, side = "both"))%>%
  # If/else statement: if primary_site_naics_1 equals primary_site_naics_2, return true in another column, else, return false
  mutate(naics_check = case_when(
    primary_site_naics_1 == primary_site_naics_2 ~ "true",
    TRUE ~ "false"
  )) %>%
  # Break apart the RID to interpret: first two numbers are OSHA office region, if the 3rd digit is 5 it is a state_plan office, else its a federal plan office
  mutate(osha_office_region = substr(rid, 1,2), office_type_number = substr(rid, 3,3), office_type = ifelse(office_type_number == "5", "state_plan_office", "federal_plan_office") ) %>%
  # Create column to identify if an inspection occurred
  mutate(inspection_occured = ifelse(is.na(insp_number) == FALSE,'yes', 'no'))

NAICS Codes

We used the North American Industry Classification System classification codes to identify companies in the “seafood product preparation and packaging industries.” That industry’s NAICS codes begin with 3117.

# Load NAICS codes from blscraperR
naics_codes = naics

Line-By-Line Fact-Check

FACT: In FY2020, 12k workers authorized for seafood — 2,232 at 41 companies in MD, VA and NC [CQ | Full & AP & Caption]

“Between October 2019 and September 2020, more than 12,000 H-2B workers were authorized to work at seafood companies across the U.S, including 2,232 at 41 companies across these states according to an analysis by the Howard Center for Investigative Journalism at the University of Maryland.”

Explanation

We used FY2020 H-2B data from the U.S. Labor Department to find all the certified workers in the seafood product preparation and packaging sector nationwide between October 2019 and September 2020 for all states and in Maryland, Virginia and North Carolina. Some companies that have different names are listed under the same address in the data. We chose to treat these as separate companies.

Supporting code and output

# Find all workers certified in seafood product packaging and preparation nationwide from October 2019 to September 2020
total_seafood <- h2b_federal %>% 
  clean_names() %>% 
  mutate(primary_naics_group = str_sub(naics_code, end=4)) %>% 
  select(job_title, total_workers_certified, total_workers_requested, nature_of_temporary_need, employer_name, employer_address1, employer_city, employer_state, employer_postal_code, primary_naics_group, naics_code, employer_poc_first_name, employer_poc_last_name, employer_poc_job_title, decision_date) %>% 
  left_join(naics, by=c("primary_naics_group"="industry_code"))%>%
  mutate(total_workers_certified=replace_na(total_workers_certified,0)) %>% 
  filter(str_detect(naics_code, "^3117")) %>% 
  summarise(total_workers_certified = sum(total_workers_certified),
            min_cert_date = min(decision_date), 
            max_cert_date = max(decision_date))

# Results --------------------
output_formatted_table(total_seafood, "In FY2020, 12k workers authorized for seafood")
In FY2020, 12k workers authorized for seafood
total_workers_certified min_cert_date max_cert_date
12633 2019-10-07 2020-09-29
# Identify target states: Maryland, Virginia and North Carolina
mdvanc_list=c("MD","VA","NC")

# Identify companies within target states and finds total certified H-2B employees
va_nc_md_h2b <-h2b_federal %>%
  clean_names() %>% 
  mutate(primary_naics_group = str_sub(naics_code, end=4))%>%
  select(job_title, total_workers_certified, total_workers_requested, nature_of_temporary_need, employer_name, employer_address1, employer_city, employer_state, employer_postal_code, primary_naics_group, naics_code, employer_poc_first_name, employer_poc_last_name, employer_poc_job_title, decision_date) %>% 
  left_join(naics, by=c("primary_naics_group"="industry_code"))%>%
  mutate(total_workers_certified=replace_na(total_workers_certified,0)) %>% 
  filter(employer_state%in%mdvanc_list, 
         str_detect(naics_code, "^3117")) %>% 
  mutate(employer_name = tolower(employer_name)) %>% 
  mutate(employer_name = case_when(
    employer_name == "g. w. hall & son, llc" ~ "g. w. hall & son llc",
    TRUE ~ employer_name
  )) %>% 
  group_by(employer_name) %>% 
  summarise(total_workers_certified = sum(total_workers_certified),
            min_cert_date = min(decision_date), 
            max_cert_date = max(decision_date)) %>% 
            #total_companies = n()) %>% 
  summarise(total_workers_certified = sum(total_workers_certified),
            min_cert_date = min(min_cert_date), 
            max_cert_date = max(max_cert_date),
            total_companies = n())

# Results --------------------
output_formatted_table(va_nc_md_h2b, "In FY2020, 2,232 workers were certified at 41 companies in MD, VA and NC")
In FY2020, 2,232 workers were certified at 41 companies in MD, VA and NC
total_workers_certified min_cert_date max_cert_date total_companies
2232 2019-10-15 2020-09-25 41

FACT: OSHA received 63,455 workplace COVID-19 complaints, 32 for seafood processing companies, more H-2B workers than any other industry except landscaping. [CQ | Full & AP]

“Since January 2020, OSHA received 63,455 workplace complaints related to COVID-19 across the U.S. Of those complaints, 32 were against seafood-processing companies, which have more H-2B workers than any other industry except landscaping, a Howard Center analysis found.”

Explanation

We examined open and closed COVID-19 complaints published by the federal Occupational Safety and Health Administration to find the total number of workplace complaints related to COVID-19 across the U.S. since January 2020. We identified complaints in the seafood product preparation and packaging sector. Finally, using the FY2020 H-2B data, we found the top five industries with the most H-2B workers workers certified between October 2019 and September 2020.

Supporting code and output

# Find total federal OSHA complaints within the seafood product preparation and packaging industry
seafood_complaints <- federal_osha_complaints_data %>%
  filter(str_detect(primary_site_naics_1, "^3117") | str_detect(primary_site_naics_2, "^3117") ) %>% 
  select(estab_name, primary_site_naics_1, primary_site_naics_2) %>% 
  count()

# Find total federal OSHA complaints nationwide
total_complaints <- federal_osha_complaints_data %>% 
  count()

# Results --------------------
output_formatted_table(seafood_complaints, "Since January 2020, OSHA received 32 workplace safety complaints related to seafood product preparation and packaging")
Since January 2020, OSHA received 32 workplace safety complaints related to seafood product preparation and packaging
n
32
output_formatted_table(total_complaints, "OSHA received 63,455 workplace safety complaints since January 2020")
OSHA received 63,455 workplace safety complaints since January 2020
n
63455
# Find total H-2B workers certified by industry in FY2020
more_h2b_2020 <- h2b_federal_all_years%>%
  mutate(naics_code = str_sub(naics_code, end=5))%>%
  mutate(original_length = str_length(naics_code))%>%
  mutate(naics_code = str_pad(naics_code, 5, "right", "1"))%>%
  mutate(naics_code = case_when(
    naics_code == "851" ~ "8510",
    TRUE ~ naics_code))%>%
  left_join(naics, by=c("naics_code"="industry_code"))%>%
  mutate(length = str_length(naics_code))%>%
  group_by(fy,naics_code, industry_title)%>%
  summarize(total_workers_certified_by_naics = sum(total_workers_certified))%>%
  arrange(fy, desc(total_workers_certified_by_naics))%>%
  ungroup()%>%
  group_by(fy)%>%
  slice(1:5)%>%
  filter(fy == "2020")%>%
  arrange(desc(total_workers_certified_by_naics))

# Results --------------------
output_formatted_table(more_h2b_2020, "There were more H-2B workers in the seafood industry than any other industry except landscaping in FY2020")
There were more H-2B workers in the seafood industry than any other industry except landscaping in FY2020
fy naics_code industry_title total_workers_certified_by_naics
2020 56173 NAICS 56173 Landscaping services 72134
2020 31171 NAICS 31171 Seafood product preparation and packaging 12633
2020 72111 NAICS 72111 Hotels and motels, except casino hotels 12239
2020 11531 NAICS 11531 Support activities for forestry 11063
2020 71399 NAICS 71399 All other amusement and recreation industries 8108

FACT: 907 H-2B recipients in NC seafood plants [CQ | Full & AP]

“In August, North Carolina Gov. Roy Cooper, a Democrat, considered an executive order that would have ensured COVID-safety precautions in migrant housing, transportation and worksites. It would have protected the 907 H-2B recipients certified to work in North Carolina seafood plants between January and August 2020.”

Explanation

Using the FY2020 H-2B data between October 2019 and September 2020, we found the total number of H-2B recipients certified to work in the seafood industry in North Carolina. We limited the industry codes to four digits and searched for seafood product preparation and packaging and determined the earliest and latest certification date.

Supporting code and output

# Find total number of H-2B workers certified in the seafood product preparation and packaging industry in North Carolina in FY2020
nc_h2b <-h2b_federal %>%
  clean_names() %>% 
  mutate(primary_naics_group = str_sub(naics_code, end=4))%>%
  mutate(total_workers_certified=replace_na(total_workers_certified,0)) %>%
  filter(employer_state == "NC") %>% 
  filter(str_detect(naics_code, "^3117")) %>% 
  summarise(total_workers_certified = sum(total_workers_certified),
            min_cert_date = min(decision_date), 
            max_cert_date = max(decision_date))

# Results --------------------
output_formatted_table(nc_h2b, "H-2B workers certified in the seafood product preparation and packaging industry in North Carolina in FY2020")
H-2B workers certified in the seafood product preparation and packaging industry in North Carolina in FY2020
total_workers_certified min_cert_date max_cert_date
907 2020-01-24 2020-08-27

FACT: More than 250 H-2B seafood workers at Bevans Oyster Company and Bonums Oyster Company [CQ | Full & AP]

In the coastal town of Kinsale, Virginia, Ronald and Shirley Bevans received federal approval last year to hire 110 H-2B baitfish and shellfish packers, 130 H-2B oyster shuckers and 14 seafood processors at their seafood-wholesale businesses: Bevans Oyster Company and Bonums Oyster Company.

Explanation

Using the FY2020 H-2B data, we found the number of H-2B workers certified to work for Bevans Oyster Company and Bonums Oyster Company.

Supporting code and output

# Find total H-2B workers certified to work in seafood product preparation and packaging in Virginia for Bevans Oyster Company and Bonums Oyster Company in FY2020
h2b_bevans <- h2b_federal %>% 
  clean_names() %>% 
  filter(employer_state == "VA") %>% 
  filter(str_detect(naics_code, "^3117")) %>%
  filter(str_detect(employer_name, "Bevans|Bonums")) %>% 
  select(employer_name, total_workers_certified, case_status, received_date, job_title)

# Results --------------------
output_formatted_table(h2b_bevans, "Bevans Oyster Company and Bonums Oyster Company received federal approval to hire 110 H-2B baitfish and shellfish packers, 130 H-2B oyster shuckers and 14 seafood processors in FY2020")
Bevans Oyster Company and Bonums Oyster Company received federal approval to hire 110 H-2B baitfish and shellfish packers, 130 H-2B oyster shuckers and 14 seafood processors in FY2020
employer_name total_workers_certified case_status received_date job_title
Bonums Oyster Company, Inc.  130 Determination Issued - Certification 2020-07-03 11:09:33 Oyster Shucker
Bevans Oyster Company, Inc.  14 Determination Issued - Certification 2020-01-02 00:02:53 Seafood Processor
Bevans Oyster Company, Inc.  110 Determination Issued - Certification 2020-01-02 00:07:19 Baitfish/Shellfish Packer

GRAPHIC: Total H2B visa certifications by state in the seafood product preparation and packaging subsector

“In 2020, only 12 states had workers certified in seafood product preparation and packaging.”

Explanation

We used the H-2B FY2020 data to identify certified H-2B seafood product preparation and packaging workers in each state.

Supporting code and output

# Find H-2B workers certified in the seafood product preparation and packaging in each state in FY2020
h2b_seafood_by_state <-h2b_federal %>%
  clean_names() %>% 
  mutate(primary_naics_group = str_sub(naics_code, end=4))%>%
  mutate(total_workers_certified=replace_na(total_workers_certified,0)) %>%
  filter(str_detect(naics_code, "^3117")) %>% 
  group_by(employer_state)%>%
  summarise(total_workers_certified = sum(total_workers_certified))%>%
  arrange(desc(total_workers_certified)) 

# Results --------------------
output_formatted_table(h2b_seafood_by_state, "Total H2B visa certifications by state in the seafood product preparation and packaging subsector by state in FY 2020")
Total H2B visa certifications by state in the seafood product preparation and packaging subsector by state in FY 2020
employer_state total_workers_certified
WA 6791
LA 1829
NC 907
OR 846
MD 678
VA 647
TX 548
MS 205
AK 91
AL 69
FL 18
ME 4

GRAPHIC: Top five industry sub-sectors with most H2B workers certified across four years

"Landscaping services had the most H-2B workers certified across all industry subsectors across all years. Between 2017 and 2020, H-2B visa certifications nationwide in seafood product preparation and packaging increased by 84%.

Explanation

For this graphic, used H-2B data from FY2017 through FY2020. Using NAICS industry codes, we extracted the top five industries with the most certified H-2B workers in each year. We also calculated the change in certified H-2B seafood workers between 2017 and 2020.

Supporting code and output

# Find the top five industry sectors with most H2-B certified workers 
top_five_graphic <- h2b_federal_all_years%>%
  mutate(naics_code = str_sub(naics_code, end=5))%>%
  mutate(original_length = str_length(naics_code))%>%
  mutate(naics_code = str_pad(naics_code, 5, "right", "1"))%>%
  mutate(naics_code = case_when(
    naics_code == "851" ~ "8510",
    TRUE ~ naics_code))%>%
  left_join(naics, by=c("naics_code"="industry_code"))%>%
  mutate(length = str_length(naics_code))%>%
  group_by(fy,naics_code, industry_title)%>%
  summarize(total_workers_certified_by_naics = sum(total_workers_certified))%>%
  arrange(fy, desc(total_workers_certified_by_naics))%>%
  ungroup()%>%
  group_by(fy)%>%
  slice(1:5)

top_five_graphic_output <- top_five_graphic %>%
  select(industry_title,fy,total_workers_certified_by_naics) %>%
  pivot_wider(names_from=fy,values_from=total_workers_certified_by_naics) %>%
  arrange(desc(`2020`))

# Calculate percent change from FY2017 - FY2020
seafood_change_2017_2020 <- top_five_graphic%>%
  filter(fy %in% c("2017", "2020") & str_detect(industry_title, "Seafood"))%>%
  pivot_wider(names_from = fy, values_from = total_workers_certified_by_naics)%>%
  mutate(percent_change = round(((`2020` - `2017`)/`2017`)*100,2)) %>%
  select(-naics_code)

# Results --------------------
output_formatted_table(top_five_graphic_output, "Top five industry sub-sectors with most H2B workers certified across four years.")
Top five industry sub-sectors with most H2B workers certified across four years.
industry_title 2017 2018 2019 2020
NAICS 56173 Landscaping services 50450 63388 63299 72134
NAICS 31171 Seafood product preparation and packaging 6872 6592 8773 12633
NAICS 72111 Hotels and motels, except casino hotels 11872 11718 12109 12239
NAICS 11531 Support activities for forestry 10791 10372 10983 11063
NAICS 71399 All other amusement and recreation industries 7210 7737 7936 8108
output_formatted_table(seafood_change_2017_2020, "Percentage change from FY2017 to FY 2020 in seafood product packaging")
Percentage change from FY2017 to FY 2020 in seafood product packaging
industry_title 2017 2020 percent_change
NAICS 31171 Seafood product preparation and packaging 6872 12633 83.83