https://cloud.tencent.com/developer/article/1666764 #GRID_input_new_p10 group group library(doBy) #使用其中的 summaryBy() 以方便按分组计算均值、中位数 #读取数据 gene <- read.table('gene.txt', sep = '\t', row.names = 1, header = TRUE, stringsAsFactors = FALSE, check.names = FALSE) group <- read.table('group.txt', sep = '\t', header = TRUE, stringsAsFactors = FALSE, check.names = FALSE) result <- NULL #使用循环,逐一对各基因进行两组间 wilcox 秩和检验 for (n in 1:nrow(gene)) { gene_n <- data.frame(t(gene[n,subset(group, group %in% c(1, 2))$sample])) gene_id <- names(gene_n)[1] names(gene_n)[1] <- 'gene' gene_n$sample <- rownames(gene_n) gene_n <- merge(gene_n, group, by = 'sample', all.x = TRUE) gene_n$group <- factor(gene_n$group) p_value <- wilcox.test(gene~group, gene_n)$p.value if (!is.na(p_value) & p_value < 0.05) { stat <- summaryBy(gene~group, gene_n, FUN = c(mean, median)) result <- rbind(result, c(gene_id, as.character(stat[1,1]), stat[1,2], stat[1,3], as.character(stat[2,1]), stat[2,2], stat[2,3], p_value)) } } #输出统计结果 result <- data.frame(result) names(result) <- c('gene_id', 'group1', 'mean1', 'median1', 'group2', 'mean2', 'median2', 'p_value') result$p_adjust <- p.adjust(result$p_value, method = 'BH') #推荐加个 p 值校正的过程 write.table(result, 'gene.wilcox.txt', sep = '\t', row.names = FALSE, quote = FALSE) > gene <- read.table('GRID_input_new_p10.csv', sep = '\t', row.names = 1, header = TRUE, stringsAsFactors = FALSE, check.names = FALSE) > group <- read.table('group.csv', sep = ',', header = TRUE, stringsAsFactors = FALSE, check.names = FALSE) > gene_2 <- data.frame(t(gene[2,subset(group, group %in% c(1, 2))$sample])) > gene_2 [1] Micrococcus.aloeverae <0 rows> (or 0-length row.names) > gene_id <- names(gene_2)[1] > names(gene_2)[1] <- 'gene' > rownames(gene_2) character(0) > for (n in 1:nrow(gene)) { + gene_n <- data.frame(t(gene[n,subset(group, group %in% c(1, 2))$sample])) + gene_id <- names(gene_n)[1] + names(gene_n)[1] <- 'gene' + + gene_n$sample <- rownames(gene_n) + gene_n <- merge(gene_n, group, by = 'sample', all.x = TRUE) + + gene_n$group <- factor(gene_n$group) + p_value <- wilcox.test(gene~group, gene_n)$p.value + if (!is.na(p_value) & p_value < 0.05) { + stat <- summaryBy(gene~group, gene_n, FUN = c(mean, median)) + result <- rbind(result, c(gene_id, as.character(stat[1,1]), stat[1,2], stat[1,3], as.character(stat[2,1]), stat[2,2], stat[2,3], p_value)) + } + } Error in wilcox.test.formula(gene ~ group, gene_n) : grouping factor must have exactly 2 levels > group <- read.table('group.csv', sep = ',', header = TRUE, stringsAsFactors = FALSE, check.names = FALSE) > group sample group 1 LOR002C 1 2 LOR003C 1 3 LOR005C 1 4 LOR006C 1 5 LOR007C 1 6 LOR008C 1 7 LOR009C 1 8 LOR011C 1 9 LOR012C 1 10 LOR013C 1 11 LOR014C 1 12 LOR015C 1 13 LOR016C 1 14 LOR017C 1 15 LOR018C 1 16 LOR019C 1 17 LOR020C 1 18 LOR021C 1 19 LOR022C 1 20 LOR023C 1 21 LOR024C 1 > for (n in 1:nrow(gene)) { + gene_n <- data.frame(t(gene[n,subset(group, group %in% c(1, 2))$sample])) + gene_id <- names(gene_n)[1] + names(gene_n)[1] <- 'gene' + + gene_n$sample <- rownames(gene_n) + gene_n <- merge(gene_n, group, by = 'sample', all.x = TRUE) + + gene_n$group <- factor(gene_n$group) + p_value <- wilcox.test(gene~group, gene_n)$p.value + if (!is.na(p_value)){ + stat <- summaryBy(gene~group, gene_n, FUN = c(mean, median)) + result <- rbind(result, c(gene_id, as.character(stat[1,1]), stat[1,2], stat[1,3], as.character(stat[2,1]), stat[2,2], stat[2,3], p_value)) + } + } > result <- data.frame(result) > names(result) <- c('gene_id', 'group1', 'mean1', 'median1', 'group2', 'mean2', 'median2', 'p_value') > result$p_adjust <- p.adjust(result$p_value, method = 'BH') #推荐加个 p 值校正的过程 > write.table(result, 'gene.wilcox.all.csv', sep = '\t', row.names = FALSE, quote = FALSE) > for (n in 1:nrow(gene)) { + gene_n <- data.frame(t(gene[n,subset(group, group %in% c(1, 2))$sample])) + gene_id <- names(gene_n)[1] + names(gene_n)[1] <- 'gene' + + gene_n$sample <- rownames(gene_n) + gene_n <- merge(gene_n, group, by = 'sample', all.x = TRUE) + + gene_n$group <- factor(gene_n$group) + p_value <- wilcox.test(gene~group, gene_n)$p.value + if (!is.na(p_value) & p_value < 0.5) { + stat <- summaryBy(gene~group, gene_n, FUN = c(mean, median)) + result <- rbind(result, c(gene_id, as.character(stat[1,1]), stat[1,2], stat[1,3], as.character(stat[2,1]), stat[2,2], stat[2,3], p_value)) + } + } > result <- data.frame(result) > names(result) <- c('gene_id', 'group1', 'mean1', 'median1', 'group2', 'mean2', 'median2', 'p_value') > result$p_adjust <- p.adjust(result$p_value, method = 'BH') #推荐加个 p 值校正的过程 > write.table(result, 'gene.wilcox.P0.1.csv', sep = '\t', row.names = FALSE, quote = FALSE) >
Recent Comments