@article{f4fa20904cf14ac9a5e4554700b35c58,
title = "HydRA: Deep-learning models for predicting RNA-binding capacity from protein interaction association context and protein sequence",
abstract = "RNA-binding proteins (RBPs) control RNA metabolism to orchestrate gene expression and, when dysfunctional, underlie human diseases. Proteome-wide discovery efforts predict thousands of RBP candidates, many of which lack canonical RNA-binding domains (RBDs). Here, we present a hybrid ensemble RBP classifier (HydRA), which leverages information from both intermolecular protein interactions and internal protein sequence patterns to predict RNA-binding capacity with unparalleled specificity and sensitivity using support vector machines (SVMs), convolutional neural networks (CNNs), and Transformer-based protein language models. Occlusion mapping by HydRA robustly detects known RBDs and predicts hundreds of uncharacterized RNA-binding associated domains. Enhanced CLIP (eCLIP) for HydRA-predicted RBP candidates reveals transcriptome-wide RNA targets and confirms RNA-binding activity for HydRA-predicted RNA-binding associated domains. HydRA accelerates construction of a comprehensive RBP catalog and expands the diversity of RNA-binding associated domains.",
keywords = "machine learning, protein-protein interaction network, RNA-binding domains, RNA-binding proteins, RNA/metabolism, Animals, Humans, Protein Binding, Binding Sites/genetics, Deep Learning, Hydra/genetics",
author = "Wenhao Jin and Brannan, {Kristopher W.} and Katannya Kapeli and Park, {Samuel S.} and Tan, {Hui Qing} and Gosztyla, {Maya L.} and Mayuresh Mujumdar and Joshua Ahdout and Bryce Henroid and Katherine Rothamel and Xiang, {Joy S.} and Limsoon Wong and Yeo, {Gene W.}",
note = "Funding Information: G.W.Y. is supported by NIH R01 HG004659 , NIH U24 HG009889 , an Allen Distinguished Investigator Award , and a Paul G. Allen Frontiers Group advised grant of the Paul G. Allen Foundation . K.W.B. is supported by NIH/NINDS K22 K22NS112678 and CPRIT Award RR220017 . Funding Information: G.W.Y. is supported by NIH R01 HG004659, NIH U24 HG009889, an Allen Distinguished Investigator Award, and a Paul G. Allen Frontiers Group advised grant of the Paul G. Allen Foundation. K.W.B. is supported by NIH/NINDS K22 K22NS112678 and CPRIT Award RR220017. W.J. and G.W.Y. conceived the study. W.J. designed HydRA algorithm, developed the software, tested the software, collected data, analyzed data, and visualized results. K.W.B. K.K. G.W.Y. W.J. and J.S.X. designed wet-lab validation experiments. K.W.B. K.K. S.S.P. H.Q.T. M.L.G. M.M. and J.A. carried out the experimental work. W.J. G.W.Y. and L.W. designed computational validation experiments. W.J. and B.H. carried out the computational validation experiments. W.J. K.W.B. and K.K. wrote the original manuscript draft. W.J. K.W.B. K.K. G.W.Y. S.S.P. and K.R. reviewed and edited the manuscript. G.W.Y. acquired the funding and supervised the study. G.W.Y. is a co-founder, member of the Board of Directors, on the SAB, equity holder, and paid consultant for Locanabio and Eclipse BioInnovations. G.W.Y. is a visiting professor at the National University of Singapore. G.W.Y.{\textquoteright}s interests have been reviewed and approved by the University of California, San Diego in accordance with its conflict-of-interest policies. Publisher Copyright: {\textcopyright} 2023 University of California San Diego",
year = "2023",
month = jul,
day = "20",
doi = "10.1016/j.molcel.2023.06.019",
language = "English (US)",
volume = "83",
pages = "2595--2611.e11",
journal = "Molecular Cell",
issn = "1097-2765",
publisher = "Cell Press",
number = "14",
}