@article{0e15221801b84257a469780c9b8b44f0,
title = "Emu: species-level microbial community profiling of full-length 16S rRNA Oxford Nanopore sequencing data",
abstract = "16S ribosomal RNA-based analysis is the established standard for elucidating the composition of microbial communities. While short-read 16S rRNA analyses are largely confined to genus-level resolution at best, given that only a portion of the gene is sequenced, full-length 16S rRNA gene amplicon sequences have the potential to provide species-level accuracy. However, existing taxonomic identification algorithms are not optimized for the increased read length and error rate often observed in long-read data. Here we present Emu, an approach that uses an expectation–maximization algorithm to generate taxonomic abundance profiles from full-length 16S rRNA reads. Results produced from simulated datasets and mock communities show that Emu is capable of accurate microbial community profiling while obtaining fewer false positives and false negatives than alternative methods. Additionally, we illustrate a real-world application of Emu by comparing clinical sample composition estimates generated by an established whole-genome shotgun sequencing workflow with those returned by full-length 16S rRNA gene sequences processed with Emu.",
keywords = "Animals, Bacteria/genetics, Dromaiidae/genetics, High-Throughput Nucleotide Sequencing/methods, Microbiota/genetics, Nanopore Sequencing, Phylogeny, RNA, Ribosomal, 16S/genetics, Sequence Analysis, DNA/methods",
author = "Curry, {Kristen D.} and Qi Wang and Nute, {Michael G.} and Alona Tyshaieva and Elizabeth Reeves and Sirena Soriano and Qinglong Wu and Enid Graeber and Patrick Finzer and Werner Mendling and Tor Savidge and Sonia Villapol and Alexander Dilthey and Treangen, {Todd J.}",
note = "Funding Information: We thank two additional members of the Treangen Laboratory, B. Kille for technical support and N. Sapoval for algorithm development. Computational support and infrastructure were provided by the Centre for Information and Media Technology (ZIM) at the University of D{\"u}sseldorf (Germany). This work has been supported by J{\"u}rgen Manchot Foundation and Deutsche Forschungsgemeinschaft (DFG) award 428994620 (A.D., A.T., W.M., P.F. and E.G.). This work has also been supported by NIH grants from NIDDK P30-DK56338, NIAID R01-AI10091401, U01-AI24290 and P01-AI152999, and NINR R01-NR013497 (T.S. and Q. Wu). Q. Wang and S.V. were supported in part by NIH grant R21NS106640 from the National Institute for Neurological Disorders and Stroke (NINDS). K.D.C. was supported in part by a Ken Kennedy Institute Computational Science and Engineering Graduate Recruiting Fellowship. K.D.C., M.G.N. and T.J.T. were supported in part by NIH grant P01-AI152999 from the National Institute of Allergy and Infectious Diseases (NIAID). K.D.C. and T.J.T. were supported in part by NSF EF-2126387. M.G.N. was funded by a fellowship from the National Library of Medicine Training Program in Biomedical Informatics and Data Science (T15LM007093, PI: Kavraki). Funding Information: We thank two additional members of the Treangen Laboratory, B. Kille for technical support and N. Sapoval for algorithm development. Computational support and infrastructure were provided by the Centre for Information and Media Technology (ZIM) at the University of D{\"u}sseldorf (Germany). This work has been supported by J{\"u}rgen Manchot Foundation and Deutsche Forschungsgemeinschaft (DFG) award 428994620 (A.D., A.T., W.M., P.F. and E.G.). This work has also been supported by NIH grants from NIDDK P30-DK56338, NIAID R01-AI10091401, U01-AI24290 and P01-AI152999, and NINR R01-NR013497 (T.S. and Q. Wu). Q. Wang and S.V. were supported in part by NIH grant R21NS106640 from the National Institute for Neurological Disorders and Stroke (NINDS). K.D.C. was supported in part by a Ken Kennedy Institute Computational Science and Engineering Graduate Recruiting Fellowship. K.D.C., M.G.N. and T.J.T. were supported in part by NIH grant P01-AI152999 from the National Institute of Allergy and Infectious Diseases (NIAID). K.D.C. and T.J.T. were supported in part by NSF EF-2126387. M.G.N. was funded by a fellowship from the National Library of Medicine Training Program in Biomedical Informatics and Data Science (T15LM007093, PI: Kavraki). Publisher Copyright: {\textcopyright} 2022, The Author(s), under exclusive licence to Springer Nature America, Inc.",
year = "2022",
month = jul,
doi = "10.1038/s41592-022-01520-4",
language = "English (US)",
volume = "19",
pages = "845--853",
journal = "Nature Methods",
issn = "1548-7091",
publisher = "Public Library of Science",
number = "7",
}