Filtering DataFrames in R Using Base R and Dplyr

Filtering DataFrames in R

In this example, we will show you how to filter dataframes in R using base R functions and dplyr.

Base R Method

We start by putting our dataframes into a list using mget. Then we use lapply to apply an anonymous function to each dataframe in the list. This function returns the row with the minimum value for the RMSE column.

nbb <- data.frame(nbb_lb = c(2, 3, 4, 5, 6, 7, 8, 9), nbb_RMSE = c(1.0152338, 0.7199394, 0.7990978, 0.9045563, 1.6514406, 0.5160516, 0.4964024, 0.2617795))
mbb <- data.frame(mbb_lb = c(2, 3, 4, 5, 6, 7, 8, 9), mbb_RMSE = c(0.8324074, 0.9278236, 1.9817984, 0.9567368, 0.2814623, 0.1129459, 0.1233126, 0.4222578))
cbb <- data.frame(cbb_lb = c(2, 3, 4, 5, 6, 7, 8, 9), cbb_RMSE = c(1.27782499, 1.96332220, 0.74704997, 0.46579943, 1.10850563, 0.40456698, 0.26027359, 0.02452239))
tmbb <- data.frame(tmbb_lb = c(2, 3, 4, 5, 6, 7, 8, 9), tmbb_RMSE = c(0.83240742, 1.05126826, 0.08290467, 0.76397988, 1.23772208, 0.57628337, 0.56437185, 0.46460279))
tcbb <- data.frame(tcbb_lb = c(2, 3, 4, 5, 6, 7, 8, 9), tcbb_RMSE = c(0.9328510, 0.8312332, 0.9402116, 1.6029357, 2.0001519, 0.4387557, 0.5965582, 0.4148854))

df_list <- mget(ls(pattern = "bb$"))

tmp <- lapply(df_list, function(x){
  i <- which.min(x[[2]])
  if(length(i) > 0L) {
    data.frame(lb = x[i, 1], RMSE = x[i, 2])
  } else NULL
})
res <- do.call(rbind, tmp)
res <- cbind.data.frame(df = names(df_list), res)

i <- order(c("nbb", "mbb", "cbb", "tmbb", "tcbb"))
res <- res[i,]
res

This will give the same output as our example.

Dplyr Method

Alternatively, we can use dplyr to achieve the same result with more concise code:

library(dplyr)

nbb <- data.frame(nbb_lb = c(2, 3, 4, 5, 6, 7, 8, 9), nbb_RMSE = c(1.0152338, 0.7199394, 0.7990978, 0.9045563, 1.6514406, 0.5160516, 0.4964024, 0.2617795))
mbb <- data.frame(mbb_lb = c(2, 3, 4, 5, 6, 7, 8, 9), mbb_RMSE = c(0.8324074, 0.9278236, 1.9817984, 0.9567368, 0.2814623, 0.1129459, 0.1233126, 0.4222578))
cbb <- data.frame(cbb_lb = c(2, 3, 4, 5, 6, 7, 8, 9), cbb_RMSE = c(1.27782499, 1.96332220, 0.74704997, 0.46579943, 1.10850563, 0.40456698, 0.26027359, 0.02452239))
tmbb <- data.frame(tmbb_lb = c(2, 3, 4, 5, 6, 7, 8, 9), tmbb_RMSE = c(0.83240742, 1.05126826, 0.08290467, 0.76397988, 1.23772208, 0.57628337, 0.56437185, 0.46460279))
tcbb <- data.frame(tcbb_lb = c(2, 3, 4, 5, 6, 7, 8, 9), tcbb_RMSE = c(0.9328510, 0.8312332, 0.9402116, 1.6029357, 2.0001519, 0.4387557, 0.5965582, 0.4148854))

df_list <- mget(ls(pattern = "bb$"))

res <- df_list %>%
  pipe(
    group_by(.),
    arrange(After_arrange_field(RMSE), desc(.)),
    slice(1)
  )

res

This will also give the same output as our example.


Last modified on 2024-07-21