分享

Python脚本:将mol2分子库文件拆分为单个mol2文件

 DrugAI 2022-04-19

用法:

python split_multimol2.py multi-mol2.mol2 out_dir

注释:python 脚本文件 mol2分子库 输出目录


split_multimol2.py:

  1. #Python2 or Python3

  2. #AspirinCode 2018

  3. #Script that splits a multi-mol2 file into individual mol2 files.

  4. #python split_multimol2.py multi-mol2.mol2 out_dir

  5. import sys

  6. import os

  7. def split_multimol2(multimol2):

  8. """

  9.    Splits a multi-mol2 file.

  10.    Parameters

  11.    ----------

  12.    multimol2 : str

  13.      Path to the multi-mol2 file.

  14.    Returns

  15.    ----------

  16.    A generator object for lists for every extracted mol2-file. Lists contain

  17.      the molecule ID and the mol2 file contents.

  18.      e.g., ['ID1234', '@<TRIPOS>MOLECULE...'

  19.    """

  20. with open(multimol2, 'r') as mol2file:

  21. line = mol2file.readline()

  22. while not mol2file.tell() == os.fstat(mol2file.fileno()).st_size:

  23. if line.startswith("@<TRIPOS>MOLECULE"):

  24. mol2cont = []

  25. mol2cont.append(line)

  26. line = mol2file.readline()

  27. molecule_id = line.strip()

  28. while not line.startswith("@<TRIPOS>MOLECULE"):

  29. mol2cont.append(line)

  30. line = mol2file.readline()

  31. if mol2file.tell() == os.fstat(mol2file.fileno()).st_size:

  32. mol2cont.append(line)

  33. break

  34. mol2cont[-1] = mol2cont[-1].rstrip() # removes blank line at file end

  35. yield [molecule_id, "".join(mol2cont)]

  36. def write_multimol2(multimol2, out_dir):

  37. """

  38.    Splits a multi-mol2 file into smaller multi-mol2 files.

  39.    Parameters

  40.    -----------

  41.    multimol2 : str

  42.      Path to the multi-mol2 file.

  43.    out_dir : str:

  44.      Output directory. New files will be named

  45.      <molecule_name_1>.mol2, ... <molecule_name_n>.mol2

  46.    Returns

  47.    -----------

  48.    chunks : int

  49.      Number of files written.

  50.    """

  51. if not out_dir:

  52. os.mkdir(out_dir)

  53. single_mol2s = split_multimol2(args.MOL2_FILE)

  54. for mol2 in single_mol2s:

  55. out_mol2 = os.path.join(args.OUT_DIR, mol2[0]) + '.mol2'

  56. with open(out_mol2, 'w') as out_file:

  57. for line in mol2[1]:

  58. out_file.write(line)

  59. out_file.write('\n')

  60. def write_multimol2_chunks(multimol2, chunk_size, out_dir):

  61. """

  62.    Splits a multi-mol2 file into smaller multi-mol2 files.

  63.    Parameters

  64.    -----------

  65.    multimol2 : str

  66.      Path to the multi-mol2 file.

  67.    chunksize : int

  68.      Number of mol2 files per chunk.

  69.    out_dir : str:

  70.      Output directory. New files will be named

  71.      <multimol2>_1.mol2, ... <multimol2>_n.mol2

  72.    Returns

  73.    -----------

  74.    chunks : int

  75.      Number of files written.

  76.    """

  77. if not os.path.exists(out_dir):

  78. os.mkdir(out_dir)

  79. out_path_stem = os.path.dirname(multimol2)

  80. out_file_stem = os.path.basename(multimol2).split('.mol2')[0]

  81. cnt = 0

  82. chunks = 1

  83. out_file = open(os.path.join(out_dir, out_file_stem)+'_%d.mol2' % chunks, 'w')

  84. for mol2 in split_multimol2(multimol2):

  85. cnt += 1

  86. if cnt == chunk_size:

  87. cnt = 0

  88. chunks += 1

  89. out_file.close()

  90. out_file = open(os.path.join(out_dir, out_file_stem)+'_%d.mol2' % chunks, 'w')

  91. out_file.write(mol2[1] + '\n')

  92. out_file.close()

  93. return chunks

  94. if __name__ == '__main__':

  95. import argparse

  96. parser = argparse.ArgumentParser(

  97. description='Splits a multi-mol2 file into individual mol2 files',

  98. formatter_class=argparse.RawTextHelpFormatter

  99. )

  100. parser.add_argument('MOL2_FILE')

  101. parser.add_argument('OUT_DIR')

  102. parser.add_argument('-c', '--chunksize', help='Number of MOL2 structures per file (1 by default)', type=int)

  103. parser.add_argument('-v', '--version', action='version', version='split_multimol2 v. 1.1')

  104. args = parser.parse_args()

  105. if args.chunksize:

  106. write_multimol2_chunks(multimol2=args.MOL2_FILE, chunk_size=args.chunksize, out_dir=args.OUT_DIR)

  107. else:

  108. write_multimol2(multimol2=args.MOL2_FILE, out_dir=args.OUT_DIR)

    转藏 分享 献花(0

    0条评论

    发表

    请遵守用户 评论公约