Across

In this lesson, we will introduce the across() function, which can be used to apply dplyr functions like mutate, summarize, select, etc to subsets of columns that satisfy certain criteria, such as to all numeric columns, or all columns that start with age_.

# load the tidyverse and demographics dataset
library(tidyverse)
── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
✔ dplyr     1.1.4     ✔ readr     2.1.5
✔ forcats   1.0.0     ✔ stringr   1.5.1
✔ ggplot2   3.5.1     ✔ tibble    3.2.1
✔ lubridate 1.9.3     ✔ tidyr     1.3.1
✔ purrr     1.0.2     
── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
✖ dplyr::filter() masks stats::filter()
✖ dplyr::lag()    masks stats::lag()
ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
demographics <- read_csv("data/demographics.csv")
Rows: 10175 Columns: 19
── Column specification ────────────────────────────────────────────────────────
Delimiter: ","
chr (5): interview_examination, gender, race, marital_status, pregnant
dbl (9): respondent_id, age_years, age_months_sc_0_2yr, six_month_period, ag...
lgl (5): served_active_duty_us, served_active_duty_foreign, born_usa, citize...

ℹ Use `spec()` to retrieve the full column specification for this data.
ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
head(demographics)
# A tibble: 6 × 19
  respondent_id interview_examination gender age_years age_months_sc_0_2yr race 
          <dbl> <chr>                 <chr>      <dbl>               <dbl> <chr>
1         73557 both interview and e… male          69                  NA black
2         73558 both interview and e… male          54                  NA white
3         73559 both interview and e… male          72                  NA white
4         73560 both interview and e… male           9                  NA white
5         73561 both interview and e… female        73                  NA white
6         73562 both interview and e… male          56                  NA mexi…
# ℹ 13 more variables: six_month_period <dbl>, age_months_ex_0_19yr <dbl>,
#   served_active_duty_us <lgl>, served_active_duty_foreign <lgl>,
#   born_usa <lgl>, citizen_usa <lgl>, time_in_us <dbl>, education_youth <dbl>,
#   education <dbl>, marital_status <chr>, pregnant <chr>,
#   language_english <lgl>, household_income <dbl>

Let’s suppose that our task is to convert all character columns of the demographics data to upper-case.

# apply toupper() to the string value "abc"
toupper("abc")
[1] "ABC"

Before introducing across, let’s first see one way that we could do this using the map() functions that we have just learned:

# create a copy of demographics
demographics_tmp <- demographics
# create a logical vector for identifying character columns using map_chr()
character_cols <- map_chr(demographics_tmp, class) == "character"
# re-assign the character columns to an upper-case version of themselves 
# using map_df() applied to demographics_tmp[character_cols] with toupper()
demographics_tmp[character_cols] <- map_df(demographics_tmp[character_cols], toupper)

# look at demographics_tmp
demographics_tmp
# A tibble: 10,175 × 19
   respondent_id interview_examination   gender age_years age_months_sc_0_2yr
           <dbl> <chr>                   <chr>      <dbl>               <dbl>
 1         73557 BOTH INTERVIEW AND EXAM MALE          69                  NA
 2         73558 BOTH INTERVIEW AND EXAM MALE          54                  NA
 3         73559 BOTH INTERVIEW AND EXAM MALE          72                  NA
 4         73560 BOTH INTERVIEW AND EXAM MALE           9                  NA
 5         73561 BOTH INTERVIEW AND EXAM FEMALE        73                  NA
 6         73562 BOTH INTERVIEW AND EXAM MALE          56                  NA
 7         73563 BOTH INTERVIEW AND EXAM MALE           0                   5
 8         73564 BOTH INTERVIEW AND EXAM FEMALE        61                  NA
 9         73565 INTERVIEW ONLY          MALE          42                  NA
10         73566 BOTH INTERVIEW AND EXAM FEMALE        56                  NA
# ℹ 10,165 more rows
# ℹ 14 more variables: race <chr>, six_month_period <dbl>,
#   age_months_ex_0_19yr <dbl>, served_active_duty_us <lgl>,
#   served_active_duty_foreign <lgl>, born_usa <lgl>, citizen_usa <lgl>,
#   time_in_us <dbl>, education_youth <dbl>, education <dbl>,
#   marital_status <chr>, pregnant <chr>, language_english <lgl>,
#   household_income <dbl>
# look at demographics (it should still have lowercase)
demographics
# A tibble: 10,175 × 19
   respondent_id interview_examination   gender age_years age_months_sc_0_2yr
           <dbl> <chr>                   <chr>      <dbl>               <dbl>
 1         73557 both interview and exam male          69                  NA
 2         73558 both interview and exam male          54                  NA
 3         73559 both interview and exam male          72                  NA
 4         73560 both interview and exam male           9                  NA
 5         73561 both interview and exam female        73                  NA
 6         73562 both interview and exam male          56                  NA
 7         73563 both interview and exam male           0                   5
 8         73564 both interview and exam female        61                  NA
 9         73565 interview only          male          42                  NA
10         73566 both interview and exam female        56                  NA
# ℹ 10,165 more rows
# ℹ 14 more variables: race <chr>, six_month_period <dbl>,
#   age_months_ex_0_19yr <dbl>, served_active_duty_us <lgl>,
#   served_active_duty_foreign <lgl>, born_usa <lgl>, citizen_usa <lgl>,
#   time_in_us <dbl>, education_youth <dbl>, education <dbl>,
#   marital_status <chr>, pregnant <chr>, language_english <lgl>,
#   household_income <dbl>

While this works, it is fairly convoluted, is hard to read, and involves defining an intermediate variable (character_cols) – in the tidyverse style, it is generally recommended to avoid defining intermediate variables whenever possible.

Introducing across()

We could instead do this kind of operation using a mutate() operation in conjunction with across().

Let’s figure out the code for just one column first

# apply toupper to interview_examination with mutate
demographics |> mutate(interview_examination = toupper(interview_examination))
# A tibble: 10,175 × 19
   respondent_id interview_examination   gender age_years age_months_sc_0_2yr
           <dbl> <chr>                   <chr>      <dbl>               <dbl>
 1         73557 BOTH INTERVIEW AND EXAM male          69                  NA
 2         73558 BOTH INTERVIEW AND EXAM male          54                  NA
 3         73559 BOTH INTERVIEW AND EXAM male          72                  NA
 4         73560 BOTH INTERVIEW AND EXAM male           9                  NA
 5         73561 BOTH INTERVIEW AND EXAM female        73                  NA
 6         73562 BOTH INTERVIEW AND EXAM male          56                  NA
 7         73563 BOTH INTERVIEW AND EXAM male           0                   5
 8         73564 BOTH INTERVIEW AND EXAM female        61                  NA
 9         73565 INTERVIEW ONLY          male          42                  NA
10         73566 BOTH INTERVIEW AND EXAM female        56                  NA
# ℹ 10,165 more rows
# ℹ 14 more variables: race <chr>, six_month_period <dbl>,
#   age_months_ex_0_19yr <dbl>, served_active_duty_us <lgl>,
#   served_active_duty_foreign <lgl>, born_usa <lgl>, citizen_usa <lgl>,
#   time_in_us <dbl>, education_youth <dbl>, education <dbl>,
#   marital_status <chr>, pregnant <chr>, language_english <lgl>,
#   household_income <dbl>

To do this for all character columns, you need to provide across() to the argument of your mutate function. The across() function has two arguments:

  • The first argument is a “select statement” that specifies which columns you want to apply your function to.

  • The second argument is the function name, toupper, that we want to apply to all columns that are selected as a result of the first argument.

The result is that toupper() is applied to all columns “where” is.character() would result in TRUE:

# use across inside mutate to apply toupper() to all columns 
# where(is.character) is TRUE...
demographics |> mutate(across(where(is.character), toupper))
# A tibble: 10,175 × 19
   respondent_id interview_examination   gender age_years age_months_sc_0_2yr
           <dbl> <chr>                   <chr>      <dbl>               <dbl>
 1         73557 BOTH INTERVIEW AND EXAM MALE          69                  NA
 2         73558 BOTH INTERVIEW AND EXAM MALE          54                  NA
 3         73559 BOTH INTERVIEW AND EXAM MALE          72                  NA
 4         73560 BOTH INTERVIEW AND EXAM MALE           9                  NA
 5         73561 BOTH INTERVIEW AND EXAM FEMALE        73                  NA
 6         73562 BOTH INTERVIEW AND EXAM MALE          56                  NA
 7         73563 BOTH INTERVIEW AND EXAM MALE           0                   5
 8         73564 BOTH INTERVIEW AND EXAM FEMALE        61                  NA
 9         73565 INTERVIEW ONLY          MALE          42                  NA
10         73566 BOTH INTERVIEW AND EXAM FEMALE        56                  NA
# ℹ 10,165 more rows
# ℹ 14 more variables: race <chr>, six_month_period <dbl>,
#   age_months_ex_0_19yr <dbl>, served_active_duty_us <lgl>,
#   served_active_duty_foreign <lgl>, born_usa <lgl>, citizen_usa <lgl>,
#   time_in_us <dbl>, education_youth <dbl>, education <dbl>,
#   marital_status <chr>, pregnant <chr>, language_english <lgl>,
#   household_income <dbl>

More across examples

Add 100 to all age_ columns

If we were to do this to just the age_years column, our code would look like:

# use mutate to add 100 to age_years
demographics |> mutate(age_years = age_years + 100)
# A tibble: 10,175 × 19
   respondent_id interview_examination   gender age_years age_months_sc_0_2yr
           <dbl> <chr>                   <chr>      <dbl>               <dbl>
 1         73557 both interview and exam male         169                  NA
 2         73558 both interview and exam male         154                  NA
 3         73559 both interview and exam male         172                  NA
 4         73560 both interview and exam male         109                  NA
 5         73561 both interview and exam female       173                  NA
 6         73562 both interview and exam male         156                  NA
 7         73563 both interview and exam male         100                   5
 8         73564 both interview and exam female       161                  NA
 9         73565 interview only          male         142                  NA
10         73566 both interview and exam female       156                  NA
# ℹ 10,165 more rows
# ℹ 14 more variables: race <chr>, six_month_period <dbl>,
#   age_months_ex_0_19yr <dbl>, served_active_duty_us <lgl>,
#   served_active_duty_foreign <lgl>, born_usa <lgl>, citizen_usa <lgl>,
#   time_in_us <dbl>, education_youth <dbl>, education <dbl>,
#   marital_status <chr>, pregnant <chr>, language_english <lgl>,
#   household_income <dbl>

To generalize this to all columns that start with age_ using across(), we can use the starts_with() select helper, and apply the anonymous function ~{. + 100}:

# use across inside mutate to apply ~{. + 100} to all columns 
# that starts_with("age_")...
demographics |> mutate(across(starts_with("age_"), ~{. + 100}))
# A tibble: 10,175 × 19
   respondent_id interview_examination   gender age_years age_months_sc_0_2yr
           <dbl> <chr>                   <chr>      <dbl>               <dbl>
 1         73557 both interview and exam male         169                  NA
 2         73558 both interview and exam male         154                  NA
 3         73559 both interview and exam male         172                  NA
 4         73560 both interview and exam male         109                  NA
 5         73561 both interview and exam female       173                  NA
 6         73562 both interview and exam male         156                  NA
 7         73563 both interview and exam male         100                 105
 8         73564 both interview and exam female       161                  NA
 9         73565 interview only          male         142                  NA
10         73566 both interview and exam female       156                  NA
# ℹ 10,165 more rows
# ℹ 14 more variables: race <chr>, six_month_period <dbl>,
#   age_months_ex_0_19yr <dbl>, served_active_duty_us <lgl>,
#   served_active_duty_foreign <lgl>, born_usa <lgl>, citizen_usa <lgl>,
#   time_in_us <dbl>, education_youth <dbl>, education <dbl>,
#   marital_status <chr>, pregnant <chr>, language_english <lgl>,
#   household_income <dbl>

Convert born_usa and citizen_usa to numeric (binary) columns using ends_with()

# take a look at born_usa and citizen_use using select()
demographics |> select(born_usa, citizen_usa)
# A tibble: 10,175 × 2
   born_usa citizen_usa
   <lgl>    <lgl>      
 1 TRUE     TRUE       
 2 TRUE     TRUE       
 3 TRUE     TRUE       
 4 TRUE     TRUE       
 5 TRUE     TRUE       
 6 TRUE     TRUE       
 7 TRUE     TRUE       
 8 TRUE     TRUE       
 9 TRUE     TRUE       
10 TRUE     TRUE       
# ℹ 10,165 more rows
# use across inside mutate to apply as.numeric to all columns 
# that ends_with("_usa")...
demographics |> mutate(across(ends_with("_usa"), as.numeric)) |> 
  print(width = Inf)
# A tibble: 10,175 × 19
   respondent_id interview_examination   gender age_years age_months_sc_0_2yr
           <dbl> <chr>                   <chr>      <dbl>               <dbl>
 1         73557 both interview and exam male          69                  NA
 2         73558 both interview and exam male          54                  NA
 3         73559 both interview and exam male          72                  NA
 4         73560 both interview and exam male           9                  NA
 5         73561 both interview and exam female        73                  NA
 6         73562 both interview and exam male          56                  NA
 7         73563 both interview and exam male           0                   5
 8         73564 both interview and exam female        61                  NA
 9         73565 interview only          male          42                  NA
10         73566 both interview and exam female        56                  NA
   race             six_month_period age_months_ex_0_19yr served_active_duty_us
   <chr>                       <dbl>                <dbl> <lgl>                
 1 black                           1                   NA TRUE                 
 2 white                           1                   NA FALSE                
 3 white                           2                   NA TRUE                 
 4 white                           1                  119 NA                   
 5 white                           1                   NA FALSE                
 6 mexican american                1                   NA TRUE                 
 7 white                           2                    6 NA                   
 8 white                           2                   NA FALSE                
 9 other hispanic                 NA                   NA FALSE                
10 white                           1                   NA FALSE                
   served_active_duty_foreign born_usa citizen_usa time_in_us education_youth
   <lgl>                         <dbl>       <dbl>      <dbl>           <dbl>
 1 TRUE                              1           1         NA              NA
 2 NA                                1           1         NA              NA
 3 TRUE                              1           1         NA              NA
 4 NA                                1           1         NA               3
 5 NA                                1           1         NA              NA
 6 FALSE                             1           1         NA              NA
 7 NA                                1           1         NA              NA
 8 NA                                1           1         NA              NA
 9 NA                                1           1         NA              NA
10 NA                                1           1         NA              NA
   education marital_status pregnant language_english household_income
       <dbl> <chr>          <chr>    <lgl>                       <dbl>
 1         3 separated      <NA>     TRUE                        17500
 2         3 married        <NA>     TRUE                        40000
 3         4 married        <NA>     TRUE                        70000
 4        NA <NA>           <NA>     TRUE                        60000
 5         5 married        <NA>     TRUE                       100000
 6         4 divorced       <NA>     TRUE                        60000
 7        NA <NA>           <NA>     TRUE                       100000
 8         5 widowed        <NA>     TRUE                        70000
 9         3 married        <NA>     TRUE                       100000
10         3 divorced       <NA>     TRUE                        17500
# ℹ 10,165 more rows

Convert born_usa and citizen_usa to numeric (binary) columns using a vector of column names

Suppose we had a vector of column names that we wanted to modify using mutate():

# define a vector selected_columns with "born_usa" and "citizen_usa"
selected_cols <- c("born_usa", "citizen_usa")

Then we could use all_of(selected_cols) in across() to apply the function to all columns listed in the selected_cols vector as follows:

# use across inside mutate to apply as.numeric to all_of(selected_columns)
demographics |> mutate(across(all_of(selected_cols), as.numeric)) |>
  print(width = Inf)
# A tibble: 10,175 × 19
   respondent_id interview_examination   gender age_years age_months_sc_0_2yr
           <dbl> <chr>                   <chr>      <dbl>               <dbl>
 1         73557 both interview and exam male          69                  NA
 2         73558 both interview and exam male          54                  NA
 3         73559 both interview and exam male          72                  NA
 4         73560 both interview and exam male           9                  NA
 5         73561 both interview and exam female        73                  NA
 6         73562 both interview and exam male          56                  NA
 7         73563 both interview and exam male           0                   5
 8         73564 both interview and exam female        61                  NA
 9         73565 interview only          male          42                  NA
10         73566 both interview and exam female        56                  NA
   race             six_month_period age_months_ex_0_19yr served_active_duty_us
   <chr>                       <dbl>                <dbl> <lgl>                
 1 black                           1                   NA TRUE                 
 2 white                           1                   NA FALSE                
 3 white                           2                   NA TRUE                 
 4 white                           1                  119 NA                   
 5 white                           1                   NA FALSE                
 6 mexican american                1                   NA TRUE                 
 7 white                           2                    6 NA                   
 8 white                           2                   NA FALSE                
 9 other hispanic                 NA                   NA FALSE                
10 white                           1                   NA FALSE                
   served_active_duty_foreign born_usa citizen_usa time_in_us education_youth
   <lgl>                         <dbl>       <dbl>      <dbl>           <dbl>
 1 TRUE                              1           1         NA              NA
 2 NA                                1           1         NA              NA
 3 TRUE                              1           1         NA              NA
 4 NA                                1           1         NA               3
 5 NA                                1           1         NA              NA
 6 FALSE                             1           1         NA              NA
 7 NA                                1           1         NA              NA
 8 NA                                1           1         NA              NA
 9 NA                                1           1         NA              NA
10 NA                                1           1         NA              NA
   education marital_status pregnant language_english household_income
       <dbl> <chr>          <chr>    <lgl>                       <dbl>
 1         3 separated      <NA>     TRUE                        17500
 2         3 married        <NA>     TRUE                        40000
 3         4 married        <NA>     TRUE                        70000
 4        NA <NA>           <NA>     TRUE                        60000
 5         5 married        <NA>     TRUE                       100000
 6         4 divorced       <NA>     TRUE                        60000
 7        NA <NA>           <NA>     TRUE                       100000
 8         5 widowed        <NA>     TRUE                        70000
 9         3 married        <NA>     TRUE                       100000
10         3 divorced       <NA>     TRUE                        17500
# ℹ 10,165 more rows

Select helpers summary

So far we have seen the following “select helpers” that can be used to specify the columns inside across():

  • where() – across to all columns that satisfy a logical condition (across(where(is.character), fun))

  • starts_with() and ends_with() – across all columns that start or ends with a specified string (across(starts_with("abc"), fun))

  • contains() – across all columns that contain a specified string (across(contains("abc"), fun))

  • all_of() and any_of() – across all columns whose names are contained in a character vector, where all_of() requires all columns in the vector be present. (across(all_of(vec_names), fun))

  • everything() – across all columns

These can also be used within select()

# select all columns that contain "age"
demographics |> select(contains("age"))
# A tibble: 10,175 × 4
   age_years age_months_sc_0_2yr age_months_ex_0_19yr language_english
       <dbl>               <dbl>                <dbl> <lgl>           
 1        69                  NA                   NA TRUE            
 2        54                  NA                   NA TRUE            
 3        72                  NA                   NA TRUE            
 4         9                  NA                  119 TRUE            
 5        73                  NA                   NA TRUE            
 6        56                  NA                   NA TRUE            
 7         0                   5                    6 TRUE            
 8        61                  NA                   NA TRUE            
 9        42                  NA                   NA TRUE            
10        56                  NA                   NA TRUE            
# ℹ 10,165 more rows
# select all columns that start with "age"
demographics |> select(starts_with("age"))
# A tibble: 10,175 × 3
   age_years age_months_sc_0_2yr age_months_ex_0_19yr
       <dbl>               <dbl>                <dbl>
 1        69                  NA                   NA
 2        54                  NA                   NA
 3        72                  NA                   NA
 4         9                  NA                  119
 5        73                  NA                   NA
 6        56                  NA                   NA
 7         0                   5                    6
 8        61                  NA                   NA
 9        42                  NA                   NA
10        56                  NA                   NA
# ℹ 10,165 more rows
# select all columns that have a logical type
demographics |> select(where(is.logical))
# A tibble: 10,175 × 5
   served_active_duty_us served_active_duty_foreign born_usa citizen_usa
   <lgl>                 <lgl>                      <lgl>    <lgl>      
 1 TRUE                  TRUE                       TRUE     TRUE       
 2 FALSE                 NA                         TRUE     TRUE       
 3 TRUE                  TRUE                       TRUE     TRUE       
 4 NA                    NA                         TRUE     TRUE       
 5 FALSE                 NA                         TRUE     TRUE       
 6 TRUE                  FALSE                      TRUE     TRUE       
 7 NA                    NA                         TRUE     TRUE       
 8 FALSE                 NA                         TRUE     TRUE       
 9 FALSE                 NA                         TRUE     TRUE       
10 FALSE                 NA                         TRUE     TRUE       
# ℹ 10,165 more rows
# ℹ 1 more variable: language_english <lgl>

Exercise

  1. For all columns that contain the word “education” (education_youth and education), add the number 5 to each value

  2. For all numeric columns, standardize the values by subtracting the mean and dividing by the standard deviation

  3. Compute the average age across all columns that start with age_ (Hint: use across() within summarize())

  4. Compute the total number of TRUE values in the born_usa, citizen_usa, and language_english columns (Hint: use across() within summarize())

Solution

demographics |> mutate(
  across(
    contains("education"), 
    ~{. + 5}
  )
) |> 
  print(width = Inf)
# A tibble: 10,175 × 19
   respondent_id interview_examination   gender age_years age_months_sc_0_2yr
           <dbl> <chr>                   <chr>      <dbl>               <dbl>
 1         73557 both interview and exam male          69                  NA
 2         73558 both interview and exam male          54                  NA
 3         73559 both interview and exam male          72                  NA
 4         73560 both interview and exam male           9                  NA
 5         73561 both interview and exam female        73                  NA
 6         73562 both interview and exam male          56                  NA
 7         73563 both interview and exam male           0                   5
 8         73564 both interview and exam female        61                  NA
 9         73565 interview only          male          42                  NA
10         73566 both interview and exam female        56                  NA
   race             six_month_period age_months_ex_0_19yr served_active_duty_us
   <chr>                       <dbl>                <dbl> <lgl>                
 1 black                           1                   NA TRUE                 
 2 white                           1                   NA FALSE                
 3 white                           2                   NA TRUE                 
 4 white                           1                  119 NA                   
 5 white                           1                   NA FALSE                
 6 mexican american                1                   NA TRUE                 
 7 white                           2                    6 NA                   
 8 white                           2                   NA FALSE                
 9 other hispanic                 NA                   NA FALSE                
10 white                           1                   NA FALSE                
   served_active_duty_foreign born_usa citizen_usa time_in_us education_youth
   <lgl>                      <lgl>    <lgl>            <dbl>           <dbl>
 1 TRUE                       TRUE     TRUE                NA              NA
 2 NA                         TRUE     TRUE                NA              NA
 3 TRUE                       TRUE     TRUE                NA              NA
 4 NA                         TRUE     TRUE                NA               8
 5 NA                         TRUE     TRUE                NA              NA
 6 FALSE                      TRUE     TRUE                NA              NA
 7 NA                         TRUE     TRUE                NA              NA
 8 NA                         TRUE     TRUE                NA              NA
 9 NA                         TRUE     TRUE                NA              NA
10 NA                         TRUE     TRUE                NA              NA
   education marital_status pregnant language_english household_income
       <dbl> <chr>          <chr>    <lgl>                       <dbl>
 1         8 separated      <NA>     TRUE                        17500
 2         8 married        <NA>     TRUE                        40000
 3         9 married        <NA>     TRUE                        70000
 4        NA <NA>           <NA>     TRUE                        60000
 5        10 married        <NA>     TRUE                       100000
 6         9 divorced       <NA>     TRUE                        60000
 7        NA <NA>           <NA>     TRUE                       100000
 8        10 widowed        <NA>     TRUE                        70000
 9         8 married        <NA>     TRUE                       100000
10         8 divorced       <NA>     TRUE                        17500
# ℹ 10,165 more rows
# standardize all numeric columns
demographics |> mutate(
  across(
    where(is.numeric), 
    ~{(. - mean(., na.rm = TRUE)) / sd(., na.rm = TRUE)}
  )
)
# A tibble: 10,175 × 19
   respondent_id interview_examination   gender age_years age_months_sc_0_2yr
           <dbl> <chr>                   <chr>      <dbl>               <dbl>
 1         -1.73 both interview and exam male       1.54               NA    
 2         -1.73 both interview and exam male       0.922              NA    
 3         -1.73 both interview and exam male       1.66               NA    
 4         -1.73 both interview and exam male      -0.921              NA    
 5         -1.73 both interview and exam female     1.70               NA    
 6         -1.73 both interview and exam male       1.00               NA    
 7         -1.73 both interview and exam male      -1.29               -0.776
 8         -1.73 both interview and exam female     1.21               NA    
 9         -1.73 interview only          male       0.431              NA    
10         -1.73 both interview and exam female     1.00               NA    
# ℹ 10,165 more rows
# ℹ 14 more variables: race <chr>, six_month_period <dbl>,
#   age_months_ex_0_19yr <dbl>, served_active_duty_us <lgl>,
#   served_active_duty_foreign <lgl>, born_usa <lgl>, citizen_usa <lgl>,
#   time_in_us <dbl>, education_youth <dbl>, education <dbl>,
#   marital_status <chr>, pregnant <chr>, language_english <lgl>,
#   household_income <dbl>
# compute average age across all age columns
demographics |> summarize(
  across(
    starts_with("age"), 
    ~mean(., na.rm = TRUE)
  )
)
# A tibble: 1 × 3
  age_years age_months_sc_0_2yr age_months_ex_0_19yr
      <dbl>               <dbl>                <dbl>
1      31.5                10.3                 107.
# compute the total in the `born_usa`, `citizen_usa`, and `language_english`
demographics |> summarise(
  across(
    any_of(c("born_usa", "citizen_usa", "language_english")),
    ~sum(., na.rm = TRUE)
  )
)
# A tibble: 1 × 3
  born_usa citizen_usa language_english
     <int>       <int>            <int>
1     8262        9220             9100