Using Regular Expressions to Filter Data with the Tidyverse for More Accurate Matches
Here’s how you can use the tidyverse
and do some matching by regular expressions to filter your data:
library(tidyverse)
# Define Data and Replicates tibble objects
Data <- tibble(
Name = c("100", "100", "200", "250", "1E5", "1E5", "Negative", "Negative"),
Pos = c("A3", "A4", "B3", "B4", "C3", "C4", "D3", "D4"),
Output = c("20.00", "20.10", "21.67", "23.24", "21.97", "22.03", "38.99", "38.99")
)
Replicates <- tibble(
Replicates = c("A3, A4", "C3, C4", "D3, D4"),
Mean.Cq = c(20.05, 22.00, 38.99),
STD.Cq = c(0.05, 0.03, 0.00)
)
# Split `Replicates$Replicates` into two fields
Replicates$R1 <- gsub(x = Replicates$Replicates, pattern = "^(.*),.*", replacement = "\\1")
Replicates$R2 <- gsub(x = Replicates$Replicates, pattern = ".*,\\s(.*)", replacement = "\\1")
# Filter Data for each of the Replicates fields
df_R1 <- Data %>%
filter(Pos == "A3" | Pos == "B4") %>%
select(Name, Mean.Cq, STD.Cq)
df_R2 <- Data %>%
filter(Pos %in% c("C3", "D4")) %>%
select(Name, Mean.Cq, STD.Cq)
# Inner-join `df_R1` and `df_R2` with Replicates
df <- left_join(df_R1, Replicates, by = "Pos") %>%
inner_join(df_R2, by = "Pos")
unique(df[, c("Name", "Mean.Cq", "STD.Cq")])
#> # A tibble: 5 x 3
#> Name Mean.Cq STD.Cq
#> <chr> <dbl> <dbl>
#> 1 100 20.0 0.05
#> 2 200 NA NA
#> 3 250 NA NA
#> 4 1E5 22 0.03
#> 5 Negative 39.0 0
This will give you the same results as your original answer, but with a few extra steps to make sure we’re only matching data that actually exists in Data
.
Last modified on 2024-01-07