@article{HU2021114241, title = {Accurate prediction of protein-ATP binding residues using position-specific frequency matrix}, journal = {Analytical Biochemistry}, volume = {626}, pages = {114241}, year = {2021}, issn = {0003-2697}, doi = {https://doi.org/10.1016/j.ab.2021.114241}, url = {https://www.sciencedirect.com/science/article/pii/S0003269721001421}, author = {Jun Hu and Lin-Lin Zheng and Yan-Song Bai and Ke-Wen Zhang and Dong-Jun Yu and Gui-Jun Zhang}, keywords = {Protein-ATP Binding residues, Deep convolutional neural network, Supporting vector machine, Protein sequence information}, abstract = {Knowledge of protein-ATP interaction can help for protein functional annotation and drug discovery. Accurately identifying protein-ATP binding residues is an important but challenging task to gain the knowledge of protein-ATP interactions, especially for the case where only protein sequence information is given. In this study, we propose a novel method, named DeepATPseq, to predict protein-ATP binding residues without using any information about protein three-dimension structure or sequence-derived structural information. In DeepATPseq, the HHBlits-generated position-specific frequency matrix (PSFM) profile is first employed to extract the feature information of each residue. Then, for each residue, the PSFM-based feature is fed into two prediction models, which are generated by the algorithms of deep convolutional neural network (DCNN) and support vector machine (SVM) separately. The final ATP-binding probability of the corresponding residue is calculated by the weighted sum of the outputted values of DCNN-based and SVM-based models. Experimental results on the independent validation data set demonstrate that DeepATPseq could achieve an accuracy of 77.71%, covering 57.42% of all ATP-binding residues, while achieving a Matthew's correlation coefficient value (0.655) that is significantly higher than that of existing sequence-based methods and comparable to that of the state-of-the-art structure-based predictors. Detailed data analysis show that the major advantage of DeepATPseq lies at the combination utilization of DCNN and SVM that helps dig out more discriminative information from the PSFM profiles. The online server and standalone package of DeepATPseq are freely available at: https://jun-csbio.github.io/DeepATPseq/for academic use.} }