我在r上载了一个大型数据集(见下面的简短版本):我想计算每个Cruiseid,Samplenr,Species和Age的值(所以基于四个条件):
Cruiseid Samplenr ……
早上好,
我认为这个问题并不完全清楚。但你可以尝试类似的东西(用dplyr)
sample <- sample %>% mutate(calculate = ((TNumLK/TNumStat) * 0.5 + 0.25) * 10) %>% group_by(Cruiseid, Samplenr, Species, Age) summarisedDF <- sample %>% summarise(avg.calculate = mean(calculate))
让我感到震惊的是你的专栏 "Length", "TNumStat", "TNumLK" 有 , 代替 . 因此,字符格式不容易被强制转换为数字。
"Length", "TNumStat", "TNumLK"
,
.
TNumStat[c("TNumStat", "TNumLK")] <- lapply(TNumStat[c("TNumStat", "TNumLK")], function(x) as.numeric(gsub(",", ".", x)))
也许这取决于您的系统区域设置,因此如果适合您,请忽略此步骤。
然后,你可以使用 by 申请你的公式。
by
l <- by(TNumStat, TNumStat[c("Cruiseid", "Samplenr", "Species")], function(x) cbind(unique(x[1:3]), value=with(x, ((mean(TNumLK)/mean(TNumStat))*0.5+0.25)*10)))
这给你一个列表 rbind 得到结果。
rbind
TNumStat.new <- do.call(rbind, l) TNumStat.new # Cruiseid Samplenr Species value # 6 197502 35 154 148.46288 # 10 197502 36 154 85.14956 # 1 197502 37 154 149.61421 # 12 197502 41 154 174.24600 # 26 197503 53 154 106.86347 # 20 197503 54 154 159.17545 # 16 197503 56 154 131.26698
的 数据 强>
TNumStat <- structure(list(Cruiseid = c(197502L, 197502L, 197502L, 197502L, 197502L, 197502L, 197502L, 197502L, 197502L, 197502L, 197502L, 197502L, 197502L, 197502L, 197502L, 197503L, 197503L, 197503L, 197503L, 197503L, 197503L, 197503L, 197503L, 197503L, 197503L, 197503L, 197503L, 197503L, 197503L), Samplenr = c(37L, 37L, 37L, 37L, 37L, 35L, 35L, 35L, 35L, 36L, 36L, 41L, 41L, 41L, 41L, 56L, 56L, 56L, 56L, 54L, 54L, 54L, 54L, 54L, 54L, 53L, 53L, 53L, 53L ), Species = c(154L, 154L, 154L, 154L, 154L, 154L, 154L, 154L, 154L, 154L, 154L, 154L, 154L, 154L, 154L, 154L, 154L, 154L, 154L, 154L, 154L, 154L, 154L, 154L, 154L, 154L, 154L, 154L, 154L), Age = c(0L, 0L, 2L, 2L, 2L, 0L, 2L, 2L, 2L, 0L, 2L, 0L, 2L, 2L, 2L, 0L, 0L, 2L, 2L, 0L, 0L, 0L, 2L, 2L, 2L, 0L, 0L, 0L, 0L), Length = structure(c(3L, 8L, 9L, 10L, 11L, 2L, 13L, 16L, 17L, 3L, 11L, 8L, 14L, 15L, 18L, 8L, 5L, 9L, 12L, 3L, 6L, 7L, 9L, 12L, 16L, 3L, 19L, 1L, 4L), .Label = c("107,5", "112,5", "12,5", "142,5", "147,5", "157,5", "167,5", "17,5", "172,5", "177,5", "182,5", "187,5", "197,5", "202,5", "212,5", "217,5", "232,5", "242,5", "97,5"), class = "factor"), LK = c(2L, 3L, 34L, 35L, 36L, 22L, 39L, 43L, 46L, 2L, 36L, 3L, 40L, 42L, 48L, 3L, 29L, 34L, 37L, 2L, 31L, 33L, 34L, 37L, 43L, 2L, 19L, 21L, 28L), TNumStat = structure(c(16L, 11L, 2L, 5L, 9L, 3L, 10L, 10L, 10L, 21L, 19L, 13L, 20L, 12L, 12L, 24L, 1L, 6L, 14L, 18L, 4L, 7L, 8L, 15L, 15L, 17L, 22L, 23L, 25L), .Label = c("10,30952381", "11,54166667", "11,85654008", "12", "12,0625", "13,19047619", "13,25", "13,85", "2,083333333", "2,109704641", "2,166666667", "2,173913043", "2,260869565", "2,380952381", "2,5", "2,791666667", "2,875536481", "3,35", "3,496503497", "4,347826087", "4,685314685", "4,806866953", "5,622317597", "7,428571429", "8,776824034"), class = "factor"), TNumLK = structure(c(16L, 18L, 11L, 12L, 21L, 8L, 22L, 25L, 29L, 24L, 4L, 20L, 5L, 26L, 1L, 6L, 9L, 14L, 23L, 19L, 10L, 13L, 15L, 28L, 2L, 17L, 27L, 3L, 7L), .Label = c("104,3478", "107,5", "118,0687", "125,8741", "173,913", "22,28571", "245,7511", "260,8439", "298,9762", "372", "392,4167", "422,1875", "437,25", "448,4762", "470,9", "5,583333", "5,751073", "6,5", "6,7", "6,782609", "75", "82,27848", "88,09524", "9,370629", "90,7173", "91,30435", "91,33047", "92,5", "97,04641"), class = "factor")), class = "data.frame", row.names = c(NA, -29L))