Workflow Steps and Code Snippets
729 tagged steps and code snippets that match keyword tidyverse
Bioinfo Macro Host Genome Short Variant Discovery Workflow
2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 | library(getopt) library(tidyverse) option_matrix <- matrix( c( "infile", "i", 1, "character", "outfile", "o", 1, "character" ), byrow = TRUE, ncol = 4 ) options <- getopt(option_matrix) read_gtcheck <- function(gtcheck_filename) { read_tsv( file = gtcheck_filename, skip = 20, col_names = c( "dc", "query_sample", "genotyped_sample", "discordance", "log_p_hwe", "number_of_sites" ) ) %>% select(-dc) } create_distance_matrix <- function(gtcheck_long) { gtcheck_diagonal <- gtcheck_long %>% select(query_sample, genotyped_sample) %>% mutate(discordance = 0) %>% pivot_longer( query_sample:genotyped_sample, names_to = "type", values_to = "query_sample" ) %>% select(query_sample) %>% mutate( genotyped_sample = query_sample, discordance = 0.0 ) %>% distinct() gtcheck_upper <- gtcheck_long %>% select(query_sample, genotyped_sample, discordance) gtcheck_lower <- gtcheck_long %>% select( genotyped_sample = query_sample, query_sample = genotyped_sample, discordance ) gtcheck_discordances <- bind_rows(gtcheck_diagonal, gtcheck_upper, gtcheck_lower) %>% arrange(query_sample) gtcheck_discordances_matrix <- gtcheck_discordances %>% pivot_wider( names_from = genotyped_sample, values_from = discordance ) %>% column_to_rownames("query_sample") %>% as.matrix() return(gtcheck_discordances_matrix) } if (!interactive()) { gtcheck_long <- read_gtcheck(options$infile) gtcheck_discordances_matrix <- create_distance_matrix(gtcheck_long) pdf(file = options$outfile, paper = "a4") heatmap(gtcheck_discordances_matrix) dev.off() } |
Repository containing bioinformatic code for macro-scale host transcriptomic data processing (v0.0.1)
3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 | library(tidyverse) library(argparse) read_star_counts <- function(filename) { # Read only the first two columns. The other two are for strand-specific data # Extract the file name, since we have to match the Illumina name with the # sample name star_column_names <- c( "gene_id", "unstranded", "stranded_forward", "stranded_reverse" ) filename %>% read_tsv( col_names = star_column_names, col_select = 1:2, skip = 4, show_col_types = FALSE ) %>% mutate( sample_id = filename %>% basename() %>% str_remove(".ReadsPerGene.out.tab") ) %>% select(sample_id, gene_id, counts = unstranded) } parser <- ArgumentParser() parser$add_argument( "-i", "--input-folder", type = "character", dest = "input_folder", help = paste( "Folder that contains the STAR counts. Will search recursively for ", "files ended in \".ReadsPerGene.out.tab\"." ) ) parser$add_argument( "-o", "--output-file", type = "character", dest = "output_file", help = paste( "Output file with all the table containing all the counts together" ) ) args <- parser$parse_args() files <- list.files( path = args$input_folder, pattern = "*.ReadsPerGene.out.tab", recursive = TRUE, full.names = TRUE ) counts_raw <- files %>% map(read_star_counts) %>% bind_rows() %>% pivot_wider(names_from = sample_id, values_from = counts) dir.create(dirname(args$output_file)) write_tsv(counts_raw, args$output_file) |
Pipeline to pull microbial reads from WGS data and perform metagenomic analysis (v1.1)
3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 | library(tidyverse) library(MetBrewer) #### Load Data #### concatenated_alignments <- read_tsv(snakemake@input[["alignment_stats"]], col_names = c("SN", "sequence", "num_sequences", "file_samples")) concatenated_alignments$file_samples[concatenated_alignments$file_samples == "# excluding supplementary and secondary reads"] <- "" #### Get List of Sample Names #### sample_names <- concatenated_alignments %>% separate(file_samples, c("file", "sample"), sep = "stats_ffq/") %>% separate(sample, c("sample", "junk"), sep = "_align") %>% select(sequence, num_sequences, sample) %>% group_by(sample) %>% summarise(n()) %>% select(sample) %>% filter(sample != "NA") sample_names <- as.vector(sample_names$sample) #### Clean Data #### concatenated_alignments_clean <- concatenated_alignments %>% select(!SN) %>% separate(file_samples, c("file", "sample"), sep = "stats_ffq/") %>% separate(sample, c("sample", "junk"), sep = "_align") %>% select(sequence, num_sequences, sample) %>% mutate(sample = replace(sample, is.na(sample), sample_names)) ##### Generate Summary Tables ###### # Wider, 'human readable' format with proportions concatenated_alignments_clean_wide <- concatenated_alignments_clean %>% group_by(sample) %>% pivot_wider(names_from = sequence, values_from = num_sequences) %>% mutate(proportion_mapped = `reads mapped:` / `raw total sequences:` * 100) write.table(concatenated_alignments_clean_wide, file = snakemake@output[["human_read_tbl"]], sep = "\t", row.names = FALSE) write.table(concatenated_alignments_clean, file = snakemake@output[["long_read_tbl"]], sep = "\t", row.names = FALSE) #### Plot Summaries #### alignment_statistics_plot <- concatenated_alignments_clean %>% ggplot() + aes(x = sample, y = num_sequences, fill = sequence) %>% geom_bar(stat = "identity", position = "dodge") + theme_bw(base_size = 15) + scale_fill_manual(values = met.brewer("Redon", n = 3, type = "discrete"), labels = c("Raw Read Number", "Reads Mapped", "Reads Unmapped")) + xlab("Sample") + ylab("Read Number") + theme(legend.title = element_blank(), axis.text.x = element_text(angle = 60, vjust = 1, hjust = 1), legend.position = "top") ggsave(snakemake@output[["alignment_stat_plot"]], alignment_statistics_plot) |
2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 | library(tidyverse) genus_delim <- function(){ if(snakemake@params[["kraken_db_type"]]){ return("g__") }else{ return("g__g__") } } spp_delim <- function(){ if(snakemake@params[["kraken_db_type"]]){ return("s__") }else{ return("s__s__") } } genus <- genus_delim() spp <- spp_delim() data <- read_tsv(snakemake@input[["combined_report"]]) ##### Get Domain ##### kingdom_data <- data %>% filter(!grepl("p__|g__|s__|c__|o__|f__", `#Classification`)) %>% separate(`#Classification`, c("junk", "kingdom"), sep = "^k__") %>% select(!junk) kingdom_data_long <- kingdom_data %>% pivot_longer(cols = !kingdom, names_to = "study", values_to = "read_number") ##### Get genus ##### genus_data <- data %>% filter(grepl("g__", `#Classification`)) %>% separate(`#Classification`, c("junk", "genus"), sep = genus) %>% select(!junk) %>% filter(!grepl("s__", genus)) genus_data_long <- genus_data %>% pivot_longer(cols = !genus, names_to = "study", values_to = "read_number") ##### Get species ##### species_data <- data %>% filter(grepl("s__", `#Classification`)) %>% separate(`#Classification`, c("junk", "species"), sep = spp) %>% select(!junk) species_data_long <- species_data %>% pivot_longer(cols = !species, names_to = "study", values_to = "read_number") ##### How many reads per study? ##### total_reads_per_study <- species_data_long %>% group_by(study) %>% summarise(total_reads = sum(read_number)) classified_reads_plot <- total_reads_per_study %>% ggplot() + aes(x = study, y = total_reads) + geom_col() + theme_classic() + coord_flip() ##### Species Classification Passed a specific threshold (unfinished) ##### species_data_filtered <- species_data_long %>% filter(read_number > 1000) genus_data_filtered <- genus_data_long %>% filter(read_number > 1000) #### Export Dataframes to TSV files #### write.table(kingdom_data_long, file = snakemake@output[["kingdom_table"]], sep = "\t", row.names = FALSE) write.table(genus_data_long, file = snakemake@output[["genus_table"]], sep = "\t", row.names = FALSE) write.table(species_data_long, file = snakemake@output[["spp_table"]], sep = "\t", row.names = FALSE) write.table(total_reads_per_study, file = snakemake@output[["classified_reads"]], sep = "\t", row.names = FALSE) #### Export Classified Reads Plot #### pdf(snakemake@output[["classified_reads_plot"]]) print(classified_reads_plot) dev.off() |
2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 | library(tidyverse) metaphlan_tbl <- read_tsv(snakemake@input[["concat_tbl"]], skip = 1) %>% select(!NCBI_tax_id) #### Get Kingdom #### metaphlan_tbl_king <- metaphlan_tbl %>% filter(!grepl("p__|g__|s__|c__|o__|f__", clade_name)) %>% mutate_at("clade_name", str_replace, "k__", "") metaphlan_tbl_king_long <- metaphlan_tbl_king %>% pivot_longer(cols = !clade_name, names_to = "samples", values_to = "rel_abund") #### Get Genus #### metaphlan_tbl_genus <- metaphlan_tbl %>% filter(grepl("g__", clade_name)) %>% separate(clade_name, c("junk", "genus"), sep = "g__") %>% select(!junk) %>% filter(!grepl("s__", genus)) metaphlan_tbl_genus_long <- metaphlan_tbl_genus %>% pivot_longer(cols = !genus, names_to = "samples", values_to = "rel_abund") #### Get Species #### metaphlan_tbl_spp <- metaphlan_tbl %>% filter(grepl("s__", clade_name)) %>% separate(clade_name, c("junk", "species"), sep = "s__") %>% select(!junk) metaphlan_tbl_spp_long <- metaphlan_tbl_spp %>% pivot_longer(cols = !species, names_to = "samples", values_to = "rel_abund") #### Export Dataframes to TSV files #### write.table(metaphlan_tbl_king_long, file = snakemake@output[["kingdom_table"]], sep = "\t", row.names = FALSE) write.table(metaphlan_tbl_genus_long, file = snakemake@output[["genus_table"]], sep = "\t", row.names = FALSE) write.table(metaphlan_tbl_spp_long, file = snakemake@output[["spp_table"]], sep = "\t", row.names = FALSE) |
3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 | library(tidyverse) # Get colour pallette cbPalette <- c("#999999", "#E69F00", "#56B4E9", "#009E73", "#F0E442", "#0072B2", "#D55E00", "#CC79A7") # Set Read Filter filter <- as.character(snakemake@params[["bracken_threshold"]]) filter <- as.numeric(filter) #### Load Data ##### conc_data <- read_tsv(snakemake@input[["conc_input"]], col_names = c("species", "taxa_id", "taxa_lvl", "kraken_assigned_reads", "added_reads", "new_est_reads", "fraction_total", "file_path")) %>% filter(species != "name") conc_data_clean <- conc_data %>% separate(file_path, c("file", "sample"), sep = "out/") %>% separate(sample, c("sample", "junk"), sep = "_bracken") %>% select(sample, species, taxa_id, taxa_lvl, kraken_assigned_reads, added_reads, new_est_reads, fraction_total) %>% mutate_at(c("taxa_id", "kraken_assigned_reads", "added_reads", "new_est_reads", "fraction_total"), as.numeric) #### Added Reads Across Samples #### sample_summary <- conc_data_clean %>% group_by(sample) %>% summarise(kraken_reads = sum(kraken_assigned_reads), added_reads = sum(added_reads), total_new_reads = sum(new_est_reads)) sample_summary_long <- sample_summary %>% pivot_longer(cols = !sample, names_to = "classification_type", values_to = "num_reads") sample_summary_plot <- sample_summary %>% pivot_longer(cols = !sample, names_to = "classification_type", values_to = "num_reads") %>% filter(classification_type != "total_new_reads") %>% ggplot() + aes(reorder(x = sample, desc(num_reads)), y = num_reads, fill = classification_type) + geom_bar(stat = "identity", position = "stack") + coord_flip() + scale_fill_manual(values = cbPalette) + theme_classic(base_size = 25) + xlab("Sample") + ylab("Number of Reads") ggsave(snakemake@output[["added_reads_plot"]], height = 15, width = 20) #### Stratified Heatmap of Relative Abundance Per Per Sample #### conc_data_clean$species <- sub(" ", "_", conc_data_clean$species) conc_data_clean_sep <- conc_data_clean %>% separate(species, into = c("Genus", "Species"), sep = "_") %>% filter(kraken_assigned_reads > filter) conc_data_clean_genus_group <- conc_data_clean_sep %>% group_by(Genus) %>% summarise(n()) plot_stratified_heatmap <- function(){ for(i in conc_data_clean_genus_group$Genus) { print( conc_data_clean_sep %>% filter(Genus == i) %>% ggplot() + aes(x = sample, y = Species) + geom_tile(aes(fill = new_est_reads), linejoin = "round") + scale_fill_gradientn(colours = c("white", "#F0E442", "#E69F00")) + facet_grid(rows = "Genus") + theme_classic(base_size = 25) + theme(axis.text.x = element_text(angle = 90, vjust = 0.5, hjust = 1)) + xlab("Sample")) } } pdf(snakemake@output[["bracken_stratified_heatmap"]], height = 20, width = 20) plot_stratified_heatmap() dev.off() ##### Heatmap of Species ##### plot_heatmap_per_sample <- function(data, filter_thresh){ data %>% filter(new_est_reads > filter_thresh) %>% select(sample, species, fraction_total) %>% ggplot() + aes(x = sample, y = species) + geom_tile(aes(fill = log(fraction_total))) + scale_fill_gradientn(colours = c("white", "orange", "blue")) + theme_classic() + theme(axis.text.x = element_text(angle = 90, vjust = 0.5, hjust = 0.5)) + ggtitle(paste("Log Relative Abundance of Microbial Species filter: Filtered > ", filter_thresh, sep = "")) } pdf(snakemake@output[["bracken_overall_heatmap"]], height = 20, width = 20) plot_heatmap_per_sample(conc_data_clean, filter) dev.off() |
2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 | library(tidyverse) library(MetBrewer) # Load Appropriate Columns columns <- c("bin", "marker_lineage", "num_genomes", "num_markers", "num_marker_sets", "completeness", "contamination", "strain_heterogen", "genome_size_bp", "num_ambiguous_bases", "num_scaffolds", "num_contigs", "N50_scaffold", "N50_contig", "mean_scaffold_length", "mean_contig_length", "longest_scaffold", "longest_contig", "GC", "GC_std", "coding_density", "translation_table", "num_predicted_genes", "0", "1", "2", "3", "4", "5+", "sample") # Read Data, skip first row and remove the redundant header columns from previous step bin_stats <- read_tsv(snakemake@input[["checkm_conc_result"]], col_names = columns, skip = 1) %>% filter(bin != "Bin Id") # Conver to Numeric Columns bin_stats$completeness <- as.numeric(bin_stats$completeness) bin_stats$contamination <- as.numeric(bin_stats$contamination) bin_stats$genome_size_bp <- as.numeric(bin_stats$genome_size_bp) bin_stats$num_contigs <- as.numeric(bin_stats$num_contigs) bin_stats$mean_contig_length <- as.numeric(bin_stats$mean_contig_length) bin_stats$N50_contig <- as.numeric(bin_stats$N50_contig) # Get breaks for plotting bin_stats$col <- cut(bin_stats$completeness, breaks = c(-Inf, 80, Inf), labels = c("<80", ">=80")) bin_stats$contig_cut <- cut(bin_stats$num_contigs, breaks = c(-Inf, 250, Inf), labels = c("<=250", ">250")) # Plot Completeness Vs Contamination Per Sample CompletenessVsContam <- bin_stats %>% ggplot() + aes(x = completeness, y = contamination, size = genome_size_bp, col = col) %>% geom_point(alpha = 0.5) + scale_x_continuous(limits = c(0, 100)) + theme_minimal() + scale_color_manual(values = c("blue", "orange")) pdf(snakemake@output[["CompletenessVsContam"]]) print(CompletenessVsContam) dev.off() #### Plot Completeness Vs Contamination Per Sample bin_stats_sample_split <- bin_stats %>% separate(bin, into = c("sample", "bin_num"), sep = "\\.") compvscontam_samp <- bin_stats_sample_split %>% ggplot() + aes(x = completeness, y = contamination, col = sample) %>% geom_point(alpha = 0.8, size = 4) + scale_x_continuous(limits = c(0, 100 )) + geom_vline( xintercept = 80, linetype = "dotted", color = "green", size = 1) + geom_vline(xintercept = 50, linetype = "dotted", color = "orange", size = 1) + theme_bw(base_size = 18) + scale_color_manual(values = met.brewer("Redon", 10, type = "discrete")) + xlab("Completeness") + ylab("Contamination") + labs(colour = "Sample") + theme(legend.position = "bottom", legend.direction = "horizontal", legend.text = element_text(size = 10)) pdf(snakemake@output[["CompletenessVsContam_Per_Sample"]]) print(compvscontam_samp) dev.off() #### Plot Bar Chart with Completeness vs Contamination Per Sample bin_stats_bar <- bin_stats_sample_split %>% unite("sample_bin", sample:bin_num) %>% select(completeness, contamination, sample_bin) %>% pivot_longer(c(completeness, contamination), names_to = "bin_rank", values_to = "value") %>% ggplot() + aes(reorder(x = sample_bin, desc(value)), y = value, fill = bin_rank) + geom_bar(stat = "identity", position = "dodge") + theme_bw(base_size = 15) + coord_flip() + scale_fill_manual(values = met.brewer("Redon", 2, type = "discrete"), labels = c("Completeness", "Contamination")) + geom_hline(yintercept = 80, linetype = "dotted", color = "green", size = 1) + geom_hline(yintercept = 50, linetype = "dotted", color = "orange", size = 1) + xlab("Sample") + ylab("Contamination and Completion Score (%)") + labs(fill = "CheckM Completion vs Contamination Score") + theme(legend.position = "bottom", legend.direction = "vertical") pdf(snakemake@output[["BarChartCompletenessContamination"]]) print(bin_stats_bar) dev.off() #### Plot the number of contigs vs completeness NumContigsVsCompleteness <- bin_stats %>% ggplot() + aes(x = num_contigs, y = completeness, size = mean_contig_length, col = contig_cut) + geom_point(alpha = 0.5) + theme_minimal() + scale_color_manual(values = c("blue", "orange")) pdf(snakemake@output[["NumContigsVsCompleteness"]]) print(NumContigsVsCompleteness) dev.off() |
2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 | library(tidyverse) library(MetBrewer) #### Import Kraken Data concatenated_kraken_summaries <- read_tsv(snakemake@input[["concatenated_kraken_summary"]], col_names = c("proportion_of_reads", "number_of_reads", "number_of_reads2", "read_code", "read_code2", "classification_status", "file_path")) concatenated_kraken_summaries_clean <- concatenated_kraken_summaries %>% separate(file_path, into = c("junk", "filename"), sep = "classified_summary/") %>% separate(filename, into = c("sample", "junk2"), sep = "_classified_summary.txt") %>% select(sample, proportion_of_reads, number_of_reads, classification_status) #### Plot Proportion of Classified vs Unclassified Reads from Kraken plot_kraken_proportion <- function(kraken_table){ concatenated_kraken_summaries_clean_plot <- kraken_table %>% ggplot() + aes(x = sample, y = number_of_reads, fill = classification_status) + geom_bar(stat = "identity", position = "fill") + theme_bw(base_size = 15) + scale_fill_manual(values = met.brewer("Redon", n = 2, type = "continuous"), labels = c("Classified Reads", "Unclassified Reads")) + xlab("Sample") + ylab("Proportion of Reads") + theme(legend.title = element_blank(), axis.text.x = element_text(angle = 60, vjust = 1, hjust = 1), legend.position = "top") return(concatenated_kraken_summaries_clean_plot) } kraken_classification_proportions <- plot_kraken_proportion(concatenated_kraken_summaries_clean) #### Save file ggsave(snakemake@output[["classified_proportions"]], kraken_classification_proportions) |
2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 | library(tidyverse) library(MetBrewer) ##### Import Data ##### spp_tbl <- read_tsv(snakemake@input[["spp_table"]]) genus_tbl <- read_tsv(snakemake@input[["genus_table"]]) ##### Define User Read Threshold ##### filter_threshold <- as.character(snakemake@params[["strat_thresh"]]) filter_threshold <- as.numeric(filter_threshold) genus_thres <- as.character(snakemake@params[["genus_thresh"]]) genus_thres <- as.numeric(genus_thres) spp_thres <- as.character(snakemake@params[["species_thresh"]]) spp_thres <- as.numeric(spp_thres) ##### Clean and Prep Data ##### genus_tbl_clean <- genus_tbl %>% separate(study, into = c("sample", "junk"), sep = "_") %>% select(!junk) %>% rename("phylo_level" = genus) spp_tbl_clean <- spp_tbl %>% separate(study, into = c("sample", "junk"), sep = "_") %>% select(!junk) %>% rename("phylo_level" = species) ##### Plot Function ##### kraken_heatmap <- function(clean_kraken_table, read_thresh, type){ heatmap <- clean_kraken_table %>% filter(read_number > read_thresh) %>% filter(phylo_level != "Homo_sapiens") %>% ggplot() + aes(x = sample, y = phylo_level) + geom_tile(aes(fill = log(read_number)), linejoin = "round") + scale_fill_gradientn(colours = met.brewer("Hiroshige", 4, type = "discrete")) + theme_bw(base_size = 20) + theme(axis.text.x = element_text(angle = 60, vjust = 1, hjust = 1), legend.position = "top", legend.text = element_text(size = 15), legend.title = element_text(size = 15)) + labs(fill = "Read Number (log)") + xlab("Sample") + ylab(type) return(heatmap) } species_heatmap <- kraken_heatmap(spp_tbl_clean, spp_thres, "Species") genus_heatmap <- kraken_heatmap(genus_tbl_clean, genus_thres, "Genus") ggsave(snakemake@output[["genus_heatmap"]], genus_heatmap, height = 20, width = 15) ggsave(snakemake@output[["species_heatmap"]], species_heatmap, height = 20, width = 15) ##### Faceted Plot By Genus ##### spp_tbl_split <- spp_tbl_clean %>% separate(phylo_level, into = c("Genus", "Species1", "Species2"), sep = "_") %>% unite("Species", Species1:Species2, na.rm = TRUE) genus_list <- spp_tbl_split %>% group_by(Genus) %>% summarise(n()) plot_species_facets <- function() { for(i in genus_list$Genus) { print( spp_tbl_split %>% filter(Genus == i) %>% ggplot() + aes(x = sample, y = Species) + geom_tile(aes(fill = read_number), linejoin = "round") + scale_fill_gradientn(colours = c("white", "#F0E442", "#E69F00")) + facet_grid(rows = "Genus") + theme_classic(base_size = 25) + theme(axis.text.x = element_text(angle = 90, vjust = 0.5, hjust = 1)) ) } } pdf(snakemake@output[["stratified_heatmap"]], height = 20, width = 20) plot_species_facets() dev.off() |
2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 | library(tidyverse) library(MetBrewer) library(treemapify) ##### Import Data ##### spp_tbl <- read_tsv(snakemake@input[["spp_table"]]) genus_tbl <- read_tsv(snakemake@input[["genus_table"]]) ##### Define User Read Threshold ##### filter_threshold <- as.character(snakemake@params[["strat_thresh"]]) filter_threshold <- as.numeric(filter_threshold) genus_thres <- as.character(snakemake@params[["genus_thresh"]]) genus_thres <- as.numeric(genus_thres) spp_thres <- as.character(snakemake@params[["species_thresh"]]) spp_thres <- as.numeric(spp_thres) ##### Clean and Prep Data ##### genus_tbl_clean <- genus_tbl %>% separate(study, into = c("sample", "junk"), sep = "_") %>% select(!junk) %>% rename("phylo_level" = genus) spp_tbl_clean <- spp_tbl %>% separate(study, into = c("sample", "junk"), sep = "_") %>% select(!junk) %>% rename("phylo_level" = species) ##### Plot Spatial Plots ##### plot_spatial_classification <- function(classificaiton_tbl, read_thresh){ tbl_clean_sum <- classificaiton_tbl %>% group_by(phylo_level) %>% summarise(summed_read_num = sum(read_number)) %>% filter(summed_read_num > read_thresh) spatial_plot <- tbl_clean_sum %>% filter(phylo_level != "Homo_sapiens") %>% filter(summed_read_num > read_thresh) %>% ggplot(aes(area = summed_read_num, fill = summed_read_num, label = phylo_level, subgroup = phylo_level)) + geom_treemap() + geom_treemap_subgroup_border(colour = "white", size = 1) + geom_treemap_text(colour = "white", place = "center", reflow = T) + scale_fill_gradientn(colours = met.brewer("Hiroshige", 10, type = "discrete"), name = "Total Read Number") + theme_minimal(base_size = 15) + theme(legend.position = "right") return(spatial_plot) } species_spatial_plot <- plot_spatial_classification(spp_tbl_clean, spp_thres) genus_spatial_plot <- plot_spatial_classification(genus_tbl_clean, genus_thres) ggsave(snakemake@output[["genus_spatial_plot"]], genus_spatial_plot, height = 20, width = 15) ggsave(snakemake@output[["species_spatial_plot"]], species_spatial_plot, height = 20, width = 15) |
tool / cran
tidyverse
Easily Install and Load the 'Tidyverse': The 'tidyverse' is a set of packages that work in harmony because they share common data representations and 'API' design. This package is designed to make it easy to install and load multiple 'tidyverse' packages in a single step. Learn more about the 'tidyverse' at .