Combine Column to Remove Na'S

Combine column to remove NA's

A dplyr::coalesce based solution could be as:

data %>% mutate(mycol = coalesce(x,y,z)) %>%
select(a, mycol)
# a mycol
# 1 A 1
# 2 B 2
# 3 C 3
# 4 D 4
# 5 E 5

Data

data <- data.frame('a' = c('A','B','C','D','E'),
'x' = c(1,2,NA,NA,NA),
'y' = c(NA,NA,3,NA,NA),
'z' = c(NA,NA,NA,4,5))

Merge two columns containing NA values in complementing rows

We can try using the coalesce function from the dplyr package:

df$merged <- coalesce(df$x, df$y)
df$flag <- ifelse(is.na(df$y), 0, 1)
df

x y merged flag
1 1 NA 1 0
2 NA 2 2 1
3 NA 3 3 1
4 4 NA 4 0
5 5 NA 5 0
6 NA 6 6 1

How to combine multiple character columns into one columns and remove NA without knowing column numbers

Here is a base R method

input$ALL <- apply(input[-1], 1, function(x) paste(na.omit(x), collapse=" "))
input$ALL
#[1] "tv" "web" "book" "web tv"

How to combine columns within one data.frame that contain NA's in order to remove NA's

With unite, there is na.rm argument which is FALSE by default

library(tidyr)
unite( all_data, Total, VoS, Value, Total.Value, na.rm = TRUE )
# Total
#1 1
#2 41
#3 13
#4 76
#5 4
#6 7
#7 22

In the OP's original data, convert the columns of interest to character from factor and then do the unite

library(dplyr)
all_data_new %>%
mutate_at(c(3, 6, 7, 11), as.character) %>%
unite(New, names(.)[c(3, 6, 7, 11)], na.rm = TRUE)
# Geographic.area.name Year New X2007.NAICS.codes.and.NAICS.based.rollup.code
#1 Alabama 2009 90,530,746 31-33
#2 Alabama 2008 116,401,285 31-33
#3 Alabama 2009 9,932,542 311
#4 Alabama 2008 9,661,432 311
#5 Alabama 2009 1,819,728 3111
#6 Alabama 2008 1,744,928 3111
# Meaning.of.2007.NAICS.codes.and.NAICS.based.rollup.code
#1 Manufacturing
#2 Manufacturing
#3 Food manufacturing
#4 Food manufacturing
#5 Animal food manufacturing
#6 Animal food manufacturing
#Relative.standard.error.for.estimate.of.total.value.of.shipments.and.receipts.for.services.... X2012.NAICS.code
#1 <NA> <NA>
#2 <NA> <NA>
#3 <NA> <NA>
#4 <NA> <NA>
#5 <NA> <NA>
#6 <NA> <NA>
# Meaning.of.2012.NAICS.code
#1 <NA>
#2 <NA>
#3 <NA>
#4 <NA>
#5 <NA>
#6 <NA>

Or another option is coalesce

library(dplyr)
all_data %>%
transmute(Total = coalesce(!!! .))
# Total
#1 1
#2 41
#3 13
#4 76
#5 4
#6 7
#7 22

Or in base R with pmax

do.call(pmax, c(all_data, na.rm = TRUE))

Or using pmin

do.call(pmin, c(all_data, na.rm = TRUE))

data

all_data <- structure(list(VoS = c(1L, NA, NA, 76L, 4L, NA, NA), Value = c(NA, 
NA, 13L, NA, NA, 7L, NA), Total.Value = c(NA, 41L, NA, NA, NA,
NA, 22L)), class = "data.frame", row.names = c(NA, -7L))

all_data_new <- structure(list(Geographic.area.name = structure(c(1L, 1L, 1L,
1L, 1L, 1L), .Label = "Alabama", class = "factor"), Year = c(2009L,
2008L, 2009L, 2008L, 2009L, 2008L), Total.value.of.shipments...1.000. = c("90,530,746",
"116,401,285", "9,932,542", "9,661,432", "1,819,728", "1,744,928"
), X2007.NAICS.codes.and.NAICS.based.rollup.code = structure(c(1L,
1L, 2L, 2L, 3L, 3L), .Label = c("31-33", "311", "3111"), class = "factor"),
Meaning.of.2007.NAICS.codes.and.NAICS.based.rollup.code = structure(c(3L,
3L, 2L, 2L, 1L, 1L), .Label = c("Animal food manufacturing",
"Food manufacturing", "Manufacturing"), class = "factor"),
X.Total.value.of.shipments...1.000.. = structure(c(NA_integer_,
NA_integer_, NA_integer_, NA_integer_, NA_integer_, NA_integer_
), .Label = character(0), class = "factor"), X.Total.value.of.shipments.and.receipts.for.services...1.000.. = structure(c(NA_integer_,
NA_integer_, NA_integer_, NA_integer_, NA_integer_, NA_integer_
), .Label = character(0), class = "factor"), Relative.standard.error.for.estimate.of.total.value.of.shipments.and.receipts.for.services.... = c(NA_character_,
NA_character_, NA_character_, NA_character_, NA_character_,
NA_character_), X2012.NAICS.code = structure(c(NA_integer_,
NA_integer_, NA_integer_, NA_integer_, NA_integer_, NA_integer_
), .Label = character(0), class = "factor"), Meaning.of.2012.NAICS.code = structure(c(NA_integer_,
NA_integer_, NA_integer_, NA_integer_, NA_integer_, NA_integer_
), .Label = character(0), class = "factor"), Total.value.of.shipments.and.receipts.for.services...1.000. = c(NA_character_,
NA_character_, NA_character_, NA_character_, NA_character_,
NA_character_)), row.names = c(NA, 6L), class = "data.frame")

remove NA values and combine non NA values into a single column

We can use pmax

do.call(pmax, c(fb_all_data , na.rm = TRUE))

If there are more than one non-NA element and want to combine as a string, a simple base R option would be

data.frame(final = apply(fb_all_data, 1, function(x) toString(x[!is.na(x)])))

Or using coalesce

library(dplyr)
library(tibble)
fb_all_data %>%
rownames_to_column('rn') %>%
transmute(rn, final = coalesce(v1, v2, v3, v4, v5)) %>%
column_to_rownames('rn')
# final
#a 1
#b 2
#c 3
#d 4
#e 5

Or using tidyverse, for multiple non-NA elements

fb_all_data %>%
rownames_to_column('rn') %>%
transmute(rn, final = pmap_chr(.[-1], ~ c(...) %>%
na.omit %>%
toString)) %>%
column_to_rownames('rn')

NOTE: Here we are showing data that the OP showed as example and not some other dataset

data

fb_all_data <- structure(list(v1 = c(1L, NA, NA, NA, NA), v2 = c(NA, 2L, NA, 
NA, NA), v3 = c(NA, NA, 3L, NA, NA), v4 = c(NA, NA, NA, 4L, NA
), v5 = c(NA, NA, NA, NA, 5L)), class = "data.frame",
row.names = c("a",
"b", "c", "d", "e"))

Combining more than 2 columns by removing NA's in R

You can use apply for this. If df is your dataframe`:

df2 <- apply(df,1,function(x) x[!is.na(x)])
df3 <- data.frame(t(df2))
colnames(df3) <- colnames(df)[1:ncol(df3)]

Output:

#      col1 col2
# 1 13
# 10 18
# 7 15
# 4 16

R: Combine columns ignoring NAs

Using base R...

data$mycol <- apply(data[,2:4], 1, function(x) x[!is.na(x)][1])

data
a x y z mycol
1 A 1 NA NA 1
2 B 2 NA NA 2
3 C NA 3 NA 3
4 D NA NA 4 4
5 E NA NA 5 5
6 F NA NA NA NA


Related Topics



Leave a reply



Submit