项目作者: cparsania

项目描述 :
An R package to generate heatbox and scatter plots from a feature matrix.
高级语言: R
项目地址: git://github.com/cparsania/corplot.git
创建时间: 2020-04-01T14:29:18Z
项目社区:https://github.com/cparsania/corplot

开源协议:Other

下载


corplot



Motivation

Genomics data often stored in a matrix like format, where each row is a
feature (gene, transcript, protein etc.) and columns are variables
(e.g. signal intensity of experiments such as RNA-seq, ChIP-seq, Pol-II
ChIP-seq etc.). Variables are often grouped by replicates, time-points
or specific experimental conditions such as wild type, deletion,
control, treatment etc. In such a multidimensional data, plotting a x-y
scatter plot between different groups require lots of data wrangling
before it goes for final ggplot.

corplot has functions to generate heatbox and pairwise scatter plots
directly from feature matrix given in a tbl format. Let’s have a look
into required input data and resultant plots out of corpot.

Install

  1. if(require("devtools")){
  2. devtools::install_github("cparsania/corplot")
  3. } else{
  4. install.packages("devtools")
  5. devtools::install_github("cparsania/corplot")
  6. }

Correlation heatbox

All samples vs all samples

  1. expr_mat_file <- system.file("extdata" ,"example_data_expr_mat_01.txt" , package = "corplot")
  2. expr_mat <- readr::read_delim(expr_mat_file , delim = "\t")
  3. expr_mat
  4. #> # A tibble: 6,338 x 9
  5. #> gene_name Control_Rep.A Control_Rep.B Treat1_Rep.A Treat1_Rep.B Treat2_Rep.A
  6. #> <chr> <dbl> <dbl> <dbl> <dbl> <dbl>
  7. #> 1 C1_00010… 1 0 1 2.81 5.13
  8. #> 2 C1_00020… 9.65 9.32 9.15 9.32 10.9
  9. #> 3 C1_00030… 5.46 4.70 4.64 5.36 6.15
  10. #> 4 C1_00040… 10.9 10.5 11.4 12.2 11.3
  11. #> 5 C1_00050… 0 1 1 5.88 5.43
  12. #> 6 C1_00060… 13.4 13.0 12.9 13.1 13.2
  13. #> 7 C1_00070… 12.9 12.7 12.6 12.2 11.2
  14. #> 8 C1_00080… 10.4 9.81 10.2 9.96 10.4
  15. #> 9 C1_00090… 7.33 6.61 6.13 6.88 7
  16. #> 10 C1_00100… 10.0 10.1 10.9 10.9 10.8
  17. #> # … with 6,328 more rows, and 3 more variables: Treat2_Rep.B <dbl>,
  18. #> # Treat3_Rep.A <dbl>, Treat3_Rep.B <dbl>
  19. ## calculate pairwise correlation
  20. cor_tbl <- corplot::get_pairwise_cor_tbl(expr_mat , var = "gene_name" , method = "pearson")
  21. cor_tbl
  22. #> # A tibble: 64 x 3
  23. #> var1 var2 corr
  24. #> <chr> <chr> <dbl>
  25. #> 1 Control_Rep.A Control_Rep.A 1
  26. #> 2 Control_Rep.B Control_Rep.A 0.96
  27. #> 3 Treat1_Rep.A Control_Rep.A 0.93
  28. #> 4 Treat1_Rep.B Control_Rep.A 0.9
  29. #> 5 Treat2_Rep.A Control_Rep.A 0.86
  30. #> 6 Treat2_Rep.B Control_Rep.A 0.86
  31. #> 7 Treat3_Rep.A Control_Rep.A 0.91
  32. #> 8 Treat3_Rep.B Control_Rep.A 0.91
  33. #> 9 Control_Rep.A Control_Rep.B 0.96
  34. #> 10 Control_Rep.B Control_Rep.B 1
  35. #> # … with 54 more rows
  36. cp <- corplot::get_corr_heat_box(cor_tbl,var1 = var1, var2 = var2 ,value = corr)
  37. cp + viridis::scale_fill_viridis() + ggplot2::theme(axis.text.x = ggplot2::element_text(angle=90))

Group by replicates

All samples vs all samples correlation heatbox has redundant samples on
each axis. This makes plot less readable. Alternate way to overcome this
is to plot samples of replicate 1 vs samples of replicate 2.

  1. cor_tbl2 <- cor_tbl %>% dplyr::filter(grepl("Rep.A", var1) ) %>% dplyr::filter(grepl("Rep.B", var2) )
  2. cor_tbl2
  3. #> # A tibble: 16 x 3
  4. #> var1 var2 corr
  5. #> <chr> <chr> <dbl>
  6. #> 1 Control_Rep.A Control_Rep.B 0.96
  7. #> 2 Treat1_Rep.A Control_Rep.B 0.95
  8. #> 3 Treat2_Rep.A Control_Rep.B 0.86
  9. #> 4 Treat3_Rep.A Control_Rep.B 0.91
  10. #> 5 Control_Rep.A Treat1_Rep.B 0.9
  11. #> 6 Treat1_Rep.A Treat1_Rep.B 0.94
  12. #> 7 Treat2_Rep.A Treat1_Rep.B 0.93
  13. #> 8 Treat3_Rep.A Treat1_Rep.B 0.9
  14. #> 9 Control_Rep.A Treat2_Rep.B 0.86
  15. #> 10 Treat1_Rep.A Treat2_Rep.B 0.9
  16. #> 11 Treat2_Rep.A Treat2_Rep.B 0.99
  17. #> 12 Treat3_Rep.A Treat2_Rep.B 0.9
  18. #> 13 Control_Rep.A Treat3_Rep.B 0.91
  19. #> 14 Treat1_Rep.A Treat3_Rep.B 0.92
  20. #> 15 Treat2_Rep.A Treat3_Rep.B 0.9
  21. #> 16 Treat3_Rep.A Treat3_Rep.B 0.97
  22. corplot::get_corr_heat_box(cor_tbl2,var1 = var1, var2 = var2, value = corr) +
  23. viridis::scale_fill_viridis()

Scatter plot

Group by replicates : All combinations

  1. groups_file <- expr_mat_file <- system.file("extdata" ,"example_data_01_sample_groups.txt" , package = "corplot")
  2. groups <- readr::read_delim(file = groups_file,delim = "\t")
  3. groups
  4. #> # A tibble: 8 x 3
  5. #> samples condition repl
  6. #> <chr> <chr> <chr>
  7. #> 1 Control_Rep.A Control Rep.A
  8. #> 2 Control_Rep.B Control Rep.B
  9. #> 3 Treat1_Rep.A Treat1 Rep.A
  10. #> 4 Treat1_Rep.B Treat1 Rep.B
  11. #> 5 Treat2_Rep.A Treat2 Rep.A
  12. #> 6 Treat2_Rep.B Treat2 Rep.B
  13. #> 7 Treat3_Rep.A Treat3 Rep.A
  14. #> 8 Treat3_Rep.B Treat3 Rep.B
  15. csp <- corplot::get_pair_wise_scatter(dat_tbl = expr_mat, group_tbl = groups,var_plot = condition, var_plot_group = repl,dat_id = gene_name)
  16. csp

Display corr value

  1. cor_tbl2 <- cor_tbl %>% dplyr::rename(`Rep.A`=var1, `Rep.B` = var2) %>%
  2. dplyr::filter(grepl("Rep.A" ,`Rep.A`)) %>%
  3. dplyr::filter(grepl("Rep.B" ,`Rep.B`)) %>%
  4. TidyWrappers::tbl_replace_string("_.*" , "")
  5. cor_tbl2
  6. #> # A tibble: 16 x 3
  7. #> Rep.A Rep.B corr
  8. #> <chr> <chr> <dbl>
  9. #> 1 Control Control 0.96
  10. #> 2 Treat1 Control 0.95
  11. #> 3 Treat2 Control 0.86
  12. #> 4 Treat3 Control 0.91
  13. #> 5 Control Treat1 0.9
  14. #> 6 Treat1 Treat1 0.94
  15. #> 7 Treat2 Treat1 0.93
  16. #> 8 Treat3 Treat1 0.9
  17. #> 9 Control Treat2 0.86
  18. #> 10 Treat1 Treat2 0.9
  19. #> 11 Treat2 Treat2 0.99
  20. #> 12 Treat3 Treat2 0.9
  21. #> 13 Control Treat3 0.91
  22. #> 14 Treat1 Treat3 0.92
  23. #> 15 Treat2 Treat3 0.9
  24. #> 16 Treat3 Treat3 0.97
  25. csp + ggplot2::geom_text(data = cor_tbl2, x = 4, y = 18, ggplot2::aes(label = paste("r","=",corr , sep = "")) ,
  26. fontface="italic" , col = "red",size = 5)

Group by replicates : Only replicate pairs

  1. csp2 <- corplot::get_pair_wise_scatter(dat_tbl = expr_mat, group_tbl = groups,var_plot = condition, var_plot_group = repl,dat_id = gene_name,view_matrix = FALSE)
  2. csp2

Display corr value

  1. cor_tbl3 <- cor_tbl2 %>% dplyr::filter(`Rep.A` == `Rep.B`)
  2. csp3 <- corplot::get_pair_wise_scatter(dat_tbl = expr_mat, group_tbl = groups,var_plot = condition, var_plot_group = repl,dat_id = gene_name,view_matrix = FALSE)
  3. csp2 + ggplot2::geom_text(data = cor_tbl3, x = 3, y = 18, ggplot2::aes(label = paste("r","=",corr , sep = "")) ,
  4. fontface="italic" , col = "red")