@inproceedings{a9cc8f93b1b14d3194a3765eced4d599,
title = "High Throughput Reproducible Literate Phylogenetic Analysis",
abstract = "We present a holistic approach from a literate programming perspective to frame and solve systems biology problems. In particular, given the large data-sets required for answering questions relating to evolutionary histories we focus on the generalization and workflow required on a typical SLURM or PBS TORQUE queue driven high performance computing cluster. We demonstrate how to leverage multiple CLI tools compiled for efficient use in a portable manner on heterogeneous computational resources and further demonstrating the use of R to generate literate data-driven plots and analysis. High Performance Computing cluster (HPC) bottlenecks and installation barriers are also discussed and mitigation strategies are developed. As a concrete example we demonstrate the estimation of a phylogenetic tree, used to pose and answer questions on evolutionary lineages. In this manner, a generalized approach which can be used for systems biology is elucidated for manipulating phylogenetic data, including its validation, multiple sequence alignment, tree estimation through different models and reproduction.",
keywords = "high-performance-computing, literate-programming, phylogenetics, r-lang, reproducible-research",
author = "Rohit Goswami and S. Ruhila",
note = "Publisher Copyright: {\textcopyright} 2022 IEEE.; 7th International Conference on Parallel, Distributed and Grid Computing, PDGC 2022 ; Conference date: 25-11-2022 Through 27-11-2022",
year = "2022",
doi = "10.1109/PDGC56933.2022.10053210",
language = "English",
series = "PDGC 2022 - 2022 7th International Conference on Parallel, Distributed and Grid Computing",
publisher = "Institute of Electrical and Electronics Engineers Inc.",
pages = "337--340",
editor = "Rawat, \{Hari Singh\} and Ravindara Bhatt and Gupta, \{Pradeep Kumar\} and Seghal, \{Vivek Kumar\}",
booktitle = "PDGC 2022 - 2022 7th International Conference on Parallel, Distributed and Grid Computing",
address = "United States",
}