Filtering DataFrames in R Using Base R and Dplyr
Filtering DataFrames in R
In this example, we will show you how to filter dataframes in R using base R functions and dplyr.
Base R Method
We start by putting our dataframes into a list using mget
. Then we use lapply
to apply an anonymous function to each dataframe in the list. This function returns the row with the minimum value for the RMSE column.
nbb <- data.frame(nbb_lb = c(2, 3, 4, 5, 6, 7, 8, 9), nbb_RMSE = c(1.0152338, 0.7199394, 0.7990978, 0.9045563, 1.6514406, 0.5160516, 0.4964024, 0.2617795))
mbb <- data.frame(mbb_lb = c(2, 3, 4, 5, 6, 7, 8, 9), mbb_RMSE = c(0.8324074, 0.9278236, 1.9817984, 0.9567368, 0.2814623, 0.1129459, 0.1233126, 0.4222578))
cbb <- data.frame(cbb_lb = c(2, 3, 4, 5, 6, 7, 8, 9), cbb_RMSE = c(1.27782499, 1.96332220, 0.74704997, 0.46579943, 1.10850563, 0.40456698, 0.26027359, 0.02452239))
tmbb <- data.frame(tmbb_lb = c(2, 3, 4, 5, 6, 7, 8, 9), tmbb_RMSE = c(0.83240742, 1.05126826, 0.08290467, 0.76397988, 1.23772208, 0.57628337, 0.56437185, 0.46460279))
tcbb <- data.frame(tcbb_lb = c(2, 3, 4, 5, 6, 7, 8, 9), tcbb_RMSE = c(0.9328510, 0.8312332, 0.9402116, 1.6029357, 2.0001519, 0.4387557, 0.5965582, 0.4148854))
df_list <- mget(ls(pattern = "bb$"))
tmp <- lapply(df_list, function(x){
i <- which.min(x[[2]])
if(length(i) > 0L) {
data.frame(lb = x[i, 1], RMSE = x[i, 2])
} else NULL
})
res <- do.call(rbind, tmp)
res <- cbind.data.frame(df = names(df_list), res)
i <- order(c("nbb", "mbb", "cbb", "tmbb", "tcbb"))
res <- res[i,]
res
This will give the same output as our example.
Dplyr Method
Alternatively, we can use dplyr to achieve the same result with more concise code:
library(dplyr)
nbb <- data.frame(nbb_lb = c(2, 3, 4, 5, 6, 7, 8, 9), nbb_RMSE = c(1.0152338, 0.7199394, 0.7990978, 0.9045563, 1.6514406, 0.5160516, 0.4964024, 0.2617795))
mbb <- data.frame(mbb_lb = c(2, 3, 4, 5, 6, 7, 8, 9), mbb_RMSE = c(0.8324074, 0.9278236, 1.9817984, 0.9567368, 0.2814623, 0.1129459, 0.1233126, 0.4222578))
cbb <- data.frame(cbb_lb = c(2, 3, 4, 5, 6, 7, 8, 9), cbb_RMSE = c(1.27782499, 1.96332220, 0.74704997, 0.46579943, 1.10850563, 0.40456698, 0.26027359, 0.02452239))
tmbb <- data.frame(tmbb_lb = c(2, 3, 4, 5, 6, 7, 8, 9), tmbb_RMSE = c(0.83240742, 1.05126826, 0.08290467, 0.76397988, 1.23772208, 0.57628337, 0.56437185, 0.46460279))
tcbb <- data.frame(tcbb_lb = c(2, 3, 4, 5, 6, 7, 8, 9), tcbb_RMSE = c(0.9328510, 0.8312332, 0.9402116, 1.6029357, 2.0001519, 0.4387557, 0.5965582, 0.4148854))
df_list <- mget(ls(pattern = "bb$"))
res <- df_list %>%
pipe(
group_by(.),
arrange(After_arrange_field(RMSE), desc(.)),
slice(1)
)
res
This will also give the same output as our example.
Last modified on 2024-07-21