Source code for WaterNetworkAnalysis.WaterNetworkAnalysis

from __future__ import annotations

import os
import stat

import MDAnalysis
import MDAnalysis as mda
import MDAnalysis.analysis
import MDAnalysis.analysis.align
import MDAnalysis.core.topologyattrs
import numpy as np
from ConservedWaterSearch.utils import (
    get_orientations_from_positions,
    read_results,
)
from MDAnalysis.analysis.density import Density, DensityAnalysis
from MDAnalysis.topology.guessers import guess_types
from scipy.ndimage import gaussian_filter


def generate_water_selection_string():
    """Returns a selection string for selecting water molecules.

    This function generates a string that can be used in MDAnalysis's
    select_atoms method to select water molecules from a molecular dynamics
    simulation. The selection string is based on common residue names used
    for water molecules across different simulation packages and water models.

    Returns:
        str: Selection command in form of a string, which can be used with
             MDAnalysis's select_atoms method.

    Example::

        # Generate water selection string
        water_selection_string = generate_water_selection_string()

        # Use the selection string to select water molecules
        u = mda.Universe('topology_file.top', 'trajectory_file.trr')
        water_molecules = u.select_atoms(water_selection_string)
    """
    # List of common residue names for water
    water_resnames = [
        "SOL",
        "WAT",
        "H2O",
        "TIP3",
        "TIP4",
        "TIP5",
        "SPC",
        "SPCE",
        "HOH",
        "DOD",
        "T3P",
        "T4P",
    ]

    # Creating the selection string
    return " or ".join(f"resname {resname}" for resname in water_resnames)


[docs] def get_selection_string_from_resnums( resids: list[int], selection_type: str = "MDA" ) -> str: """Return selection string for given residue ids. Returns the selection command string for different programs based on amioacid residue IDs list given. Args: resids (list[int]): list of aminoacid residue ids. selection_type (str, optional): selection program language. Options:"MDA", "PYMOL", "PROBIS" and "NGLVIEW" Defaults to "MDA". Returns: str: selection command in form of a string Example:: # list of resids resids = [8,12,143,144] # print PYMOL selection string get_selection_string_from_resnums(resids, selection_type = "PYMOL") """ selection: str = "" for i in resids: if selection_type == "MDA": selection += "resnum " + str(i) + " or " if selection_type == "PYMOL": selection += str(i) + "+" if selection_type == "PROBIS": selection += str(i) + "," if selection_type == "NGLVIEW": selection += str(i) + " or " if selection_type in ("MDA", "NGLVIEW"): selection = selection[: len(selection) - 3] if selection_type == "PYMOL": selection = selection[: len(selection) - 1] selection = "resi " + selection if selection_type == "PROBIS": selection = selection[: len(selection) - 1] selection = ( ' -motif1 "[:A and (' + selection + ')]" -motif2 "[:A and (' + selection + ')]" ' ) return selection
[docs] def get_center_of_selection( selection: str, trajectory: str, topology: str | None = None ) -> np.ndarray: """Compute centre of selection with MDAnalysis. Calculates coordinates in xyz of the centre of selection using MDAnalysis. Args: selection (str): selection string for MDAnalysis trajectory (str): trajectory filename - anything that is accepted by MDAnalysis should work topology (str | None, optional): topology filename. Defaults to None. Returns: np.ndarray: returns array that contains coordinates of center of selection Example:: # find center of active site defined by residue ids resids = [8,12,143,144] get_center_of_selection(get_selection_string_from_resnums(resids)) """ if topology: u: mda.Universe = mda.Universe(topology, trajectory) else: u = mda.Universe(trajectory) s: mda.AtomGroup = u.select_atoms(selection) return s.center(None)
[docs] def calculate_oxygen_density_map( selection_center: np.ndarray, trajectory: str, topology: str | None = None, dist: float = 12.0, delta: float = 0.4, every: int = 1, SOL: str | None = None, OW: str | None = None, output_name: str = "water.dx", ) -> Density: """Generate oxygen density maps. Generate oxygen density maps using MDAnalysis. Args: selection_center (np.ndarray): center of selection around which waters will be selected. trajectory (str): trajectory filename. topology (str | None, optional): Topology filename if available. Defaults to None. dist (float, optional): distance around selection center inside which the oxygen will be selected. Defaults to 12.0. delta (float, optional): bin size for density map. Defaults to 0.4 Angstroms. every (int, optional): Take every ``n_every`` snapshot instead of taking all the snapshots (every = 1) for alignment. Defaults to 1. SOL (str, optional): Residue name of the water residue. If ``None`` it will be determined automatically. Defaults to None. OW (str, optional): Name of the oxygen atom. If ``None`` it will be determined automatically. Defaults to None. output_name (str, optional): name of the output file, it should end with '.dx' . Defaults to "water.dx". Returns: Density: returns MDA Density object containing the density map Example:: # Generate water oxygen density map near active site resids = [8,12,143,144] calculate_oxygen_density_map( get_center_of_selection( get_selection_string_from_resnums(resids)), trajectory = 'trajectory.pdb' ) ) """ if topology: u: mda.Universe = mda.Universe(topology, trajectory) else: u = mda.Universe(trajectory) if SOL is None: water_selection = generate_water_selection_string() else: water_selection = "resname " + SOL if OW is None: oxygen_selection = "element O" guessed_and_read_props = [ type(i) for i in list(u._topology.read_attributes) + list(u._topology.guessed_attributes) ] if MDAnalysis.core.topologyattrs.Elements not in guessed_and_read_props: u.add_TopologyAttr("elements", guess_types(u.atoms.names)) else: oxygen_selection = "name " + OW ss: mda.AtomGroup = u.select_atoms( ( f"{oxygen_selection} and {water_selection} and point " f"{selection_center[0]} {selection_center[1]} {selection_center[2]} " f"{dist}" ), updating=True, ) D: DensityAnalysis = DensityAnalysis( ss, delta=delta, gridcenter=selection_center, xdim=dist * 2, ydim=dist * 2, zdim=dist * 2, ) D.run(step=every) # oxygen vdw radius vdw_radius = 1.52 sigma = vdw_radius / delta probability_density = D.results.density.grid / D.results.density.grid.sum() smoothed_density = gaussian_filter(probability_density, sigma) smoothed_density = smoothed_density / np.max(smoothed_density) # Put smoothed density into a Density object smoothed_density_obj = Density( grid=smoothed_density, edges=D._edges, parameters={"isDensity": True}, ) smoothed_density_obj.export(output_name, type="double") return smoothed_density_obj
[docs] def make_results_pdb_MDA( water_type: list[str], waterO: np.ndarray, waterH1: np.ndarray, waterH2: np.ndarray, output_fname: str, protein_file: str | None = None, ligand_name: str | None = None, mode: str = "SOL", ) -> None: """Generate pdb file with clustering results. The water molecules determined by the clustering procedure are written in a pdb file which also contains protein and the ligand. Waters are labeled based on their hydrogen orientations (FCW for fully conserved, HCW for half conserved and WCW for weakly conserved). Uses MDAnalysis for construction of the pdb file. First 4 arguments of the function can be read from the results file by using cws.utils.read_results() or directly from the ``cws.water_clustering.WaterClustering`` class. Args: water_type (list[str]): List of water types. waterO (np.ndarray): numpy array containing coordinates of conserved waters' oxygens. waterH1 (np.ndarray): numpy array containing coordinates of conserved waters' first hydrogen waterH2 (np.ndarray): numpy array containing coordinates of conserved waters' second hydrogen output_fname (str): name of the output pdb file. Must end in '.pdb'. protein_file (str | None, optional): file which contains protein and ligand. It should be aligned in the same way as the trajectory used for calculation of conserved waters. If None no protein is saved. Defaults to None. ligand_name (str | None, optional): residue name of the ligand. If None no ligand is saved. Defaults to None. mode (str, optional): mode in which conserved waters will be saved. Options: - "SOL" - default mode. Saves water molecules as SOL so that visualisation softwares can recognise them as waters. No distinction is made between different types of conserved waters. - "cathegorise" - cathegorises the waters according to hydrogen orienation into fully conserved (FCW), half-coserved (HCW) and weakly conserved (WCW). This mode makes visualisers not able to recognise the waters as water/sol but usefull for interpreting results. Example:: # Generate pdb results file make_results_pdb_MDA( *cws.utils.read_results(), output_fname = 'results.pdb', mode = 'cathegorise' ) """ rescntr: int = 2 if protein_file is not None: us: mda.Universe = mda.Universe(protein_file) protein: mda.AtomGroup = us.select_atoms("protein") # TODO for multiple confs ligand: list = [] if ligand_name: ligand.append(us.select_atoms("resname " + str(ligand_name))) # ne radi? # if (ligand[0]==None): # print ("no ligand with given name "+str(ligand_name)) rescntr += len(ligand) + len(protein.residues) # waters n_residues: int = len(water_type) n_atoms: int = n_residues * 3 # create resindex list resindices: np.ndarray = np.repeat(range(n_residues), 3) # all water molecules belong to 1 segment segindices: list[int] = [0] * n_residues waters = mda.Universe.empty( n_atoms, n_residues=n_residues, atom_resindex=resindices, residue_segindex=segindices, trajectory=True, ) # necessary for adding coordintes waters.add_TopologyAttr("name", ["O", "H1", "H2"] * n_residues) waters.add_TopologyAttr("type", ["O", "H", "H"] * n_residues) # waters.add_TopologyAttr('segid', ['SOL']) coordinates: list[list[float]] = [] rns: list[str] = [] resid: list[int] = [] looper = zip(waterO, waterH1, waterH2, water_type, strict=False) for Opos, H1pos, H2pos, tip in looper: h2o = np.array( [ [Opos[0], Opos[1], Opos[2]], # oxygen [H1pos[0], H1pos[1], H1pos[2]], # hydrogen [H2pos[0], H2pos[1], H2pos[2]], # hydrogen ] ) coordinates.extend(h2o) if mode == "cathegorise": if tip == "FCW": rns.append("FCW") if tip == "HCW": rns.append("HCW") if tip == "WCW": rns.append("WCW") if tip == "onlyO": rns.append("onlyO") elif mode == "SOL": rns.append("SOL") resid.append(rescntr) rescntr += 1 waters.add_TopologyAttr("resname", rns) waters.add_TopologyAttr("resid", resid) coord_array = np.array(coordinates) waters.atoms.positions = coord_array bonds: list = [] for o in range(0, n_atoms, 3): bonds.extend([(o, o + 1), (o, o + 2)]) waters.add_TopologyAttr("bonds", bonds) if protein_file is not None: newU = mda.Merge( protein.atoms, *ligand, waters.atoms, ) else: newU = mda.Merge(waters.atoms) newU.dimensions = 3 if output_fname is None: if mode == "SOL": output_fname = "final_results_SOL.pdb" if mode == "cathegorise": output_fname = "final_results_cathegorise.pdb" newU.atoms.write(output_fname)
[docs] def extract_waters_from_trajectory( selection_center: np.ndarray, trajectory: str, topology: str | None = None, dist: float = 12.0, every: int = 1, SOL: str | None = None, OW: str | None = None, HW: str | None = None, extract_only_O: bool = False, save_file: str | None = None, ) -> np.ndarray | tuple[np.ndarray, np.ndarray]: """Extract waters for clustering analysis. Calculates water (oxygen and hydrogen) coordinates for all the waters in the aligned trajectory using MDAnalysis for further use in water clustering. The trajectory should be aligned previously. Args: selection_center (np.ndarray): coordinates of selection center around which waters will be selected. trajectory (str): Trajectory file name. topology (str | None, optional): Topology file name. Defaults to None. dist (float, optional): Distance around the center of selection inside which water molecules will be sampled. Defaults to 12.0. every (int, optional): Take every ``every`` snapshot instead of taking all the snapshots (every = 1) for alignment. Defaults to 1. SOL (str, optional): Residue name of the water residue. If ``None`` it will be determined automatically. Defaults to None. OW (str, optional): Name of the oxygen atom. If ``None`` it will be determined automatically. Defaults to None. HW (str, optional): Name of the hydrogen atom in water. Names checked will be the provided name and the name with a 1 or 2 appended. If ``None`` it will be determined automatically. Defaults to None. extract_only_O (bool, optional): If ``True`` only oxygen atom positions. Defaults to False. save_file (str | None, optional): File to which coordinates will be saved. If none doesn't save to a file. Defaults to None. Returns: np.ndarray or tuple[np.ndarray, np.ndarray]: returns xyz numpy arrays that contain coordinates of oxygens, and combined array of hydrogen 1 and hydrogen 2 coordinates. If ``extract_only_O`` is True, returns only oxygen coordinates. Example:: # Generate water coordinates for clustering analysis resids = [8,12,143,144] coordO, coordH = extract_waters_from_trajectory( get_center_of_selection(get_selection_string_from_resnums(resids)), trajectory = 'trajectory.xtc', topology = 'topology.tpr' ) """ if topology: u = mda.Universe(topology, trajectory) else: u = mda.Universe(trajectory) if SOL is None: water_selection = generate_water_selection_string() else: water_selection = "resname " + SOL if OW is None: oxygen_selection = "element O" else: oxygen_selection = "name " + OW if HW is None: hydrogen_selection = "element H" else: hydrogen_selection = "name " + HW + " or name " + HW + "1 or name " + HW + "2" if (HW is None and extract_only_O is False) or OW is None: guessed_and_read_props = [ type(i) for i in list(u._topology.read_attributes) + list(u._topology.guessed_attributes) ] if MDAnalysis.core.topologyattrs.Elements not in guessed_and_read_props: u.add_TopologyAttr("elements", guess_types(u.atoms.names)) if extract_only_O is False: coordsH = [] coordsO = [] mismatch_detected = False if is_pdb_file(trajectory): # Detect if any frame has a different number of atoms expected_n_atoms = len(u.atoms) try: for ts in u.trajectory: if ts.n_atoms != expected_n_atoms: mismatch_detected = True break except ValueError: mismatch_detected = True if mismatch_detected: model_counter = 0 inside_model = False buffer_lines = [] with open(trajectory) as f: for line in f: # Start of a new MODEL block if line.startswith("MODEL"): if model_counter % every == 0: inside_model = True model_counter += 1 buffer_lines = [line] continue # End of that MODEL block if inside_model and line.startswith("ENDMDL"): buffer_lines.append(line) # We have collected a full MODEL … ENDMDL block in buffer_lines # Write it to a temp PDB file frame_name = f"_temp_frame_{model_counter:03d}.pdb" with open(frame_name, "w") as tmpf: tmpf.writelines(buffer_lines) tmpf.close() # Load and process that single frame PDB u_frame = mda.Universe(frame_name) oxygens = u_frame.select_atoms( f"{oxygen_selection} and {water_selection} and point " f"{selection_center[0]} {selection_center[1]} " f"{selection_center[2]} {dist}" ) for oxygen in oxygens: coordsO.append(oxygen.position.copy()) if not extract_only_O: hydrogens = u_frame.select_atoms( f"resid {oxygen.resid} and " f"({hydrogen_selection}) and " f"({water_selection})" ) if len(hydrogens) != 2: msg = ( f"Water {oxygen.resid} has too many hydrogens " f"({len(hydrogens)}) in frame {model_counter}." ) os.remove(frame_name) raise Exception(msg) for hpos in hydrogens.positions: coordsH.append(hpos.copy()) # Delete the temp file before reading the next model os.remove(frame_name) # Reset for next MODEL block inside_model = False buffer_lines = [] continue # If we are inside a MODEL…ENDMDL block, keep adding lines if inside_model: buffer_lines.append(line) else: # loop over for _ in u.trajectory[::every]: oxygens = u.select_atoms( ( f"{oxygen_selection} and {water_selection} and point " f"{selection_center[0]} {selection_center[1]} {selection_center[2]}" f" {dist}" ), ) for oxygen in oxygens: coordsO.append(oxygen.position) if extract_only_O is False: hydrogens = u.select_atoms( f"resid {oxygen.resid} and " f"({hydrogen_selection}) and " f"({water_selection})" ) if len(hydrogens) != 2: exception_string: str = ( f"Water {oxygen.resid} has too many" f" hydrogens ({len(hydrogens)})." ) raise Exception(exception_string) for hydrogen_positions in hydrogens.positions: coordsH.append(hydrogen_positions) Odata: np.ndarray = np.asarray(coordsO) if extract_only_O is False: coordsH = np.asarray(coordsH) Opos, H1, H2 = get_orientations_from_positions(Odata, coordsH) # SAVEs full XYZ coordinates, not O coordinates and h orientations!!!!! if save_file is not None: np.savetxt(save_file, np.c_[Opos, H1, H2]) return Odata, coordsH if save_file is not None: np.savetxt(save_file, Odata) return Odata
def __align_mda( unaligned_trj_file: str, pdb_to_align_to: str, output_trj_file: str, topology: str | None = None, align_selection: str = "protein", ) -> None: """Alignment via MDAnalysis. This function is not ment to be used directly. Use align_trajectory instead. Args: unaligned_trj_file (str): trajectory file containing unaligned trajectory. pdb_to_align_to (str): pdb file that containes the reference state to align to. output_trj_file (str): output file trajectory will be saved to. topology (str | None, optional): File containing topology. Defaults to None. align_selection (str, optional): selection to align to. Defaults to "protein". """ if topology: mob: mda.Universe = mda.Universe(topology, unaligned_trj_file) else: mob = mda.Universe(unaligned_trj_file) ref: mda.Universe = mda.Universe(pdb_to_align_to) mda.analysis.align.AlignTraj( mob, ref, select=align_selection, filename=output_trj_file ).run() def __align_probis( unaligned_trj_file: str, pdb_to_align_to: str, output_trj_file: str, probis_exec: str, topology: str | None = None, align_selection: str = "protein", ) -> None: """Alignment function that uses probis. This function is not ment to be used directly. Use align_trajectory instead.Uses probis algorithm for alignment. See `this link <http://insilab.org/probis-algorithm/>`__ for more info. Args: unaligned_trj_file (str): trajectory file containing unaligned trajectory. pdb_to_align_to (str): pdb file that containes the reference state to align to. output_trj_file (str): output file trajectory will be saved to. probis_exec (str | None, optional): Executable to run probis algorithm. topology (str | None, optional): File containing topology. Defaults to None. align_selection (str, optional): selection to align to. Defaults to "protein". """ if probis_exec is None: msg = ( "probis_exec must be provided or probis must be installed in the " "current working directory." ) raise Exception(msg) st: os.stat_result = os.stat(probis_exec) os.chmod(probis_exec, st.st_mode | stat.S_IEXEC) if align_selection == "protein": sele: str = "" else: sele = align_selection if topology: u: mda.Universe = mda.Universe(topology, unaligned_trj_file) else: u = mda.Universe(unaligned_trj_file) mobile_pdb: str = "mobile.pdb" u.select_atoms(align_selection).segments.segids = "A" u.add_TopologyAttr("chainIDs") u.select_atoms(align_selection).chainIDs = "A" forwrite = u.atoms with mda.Writer(output_trj_file, multiframe=True) as W: for snap in u.trajectory: with mda.Writer(mobile_pdb) as mw: mw.write(forwrite) # string to run runstring: str = ( probis_exec + " -compare -super " + sele + " -f1 " + str(pdb_to_align_to) + " -c1 A -f2 " + str(mobile_pdb) + " -c2 A" ) if probis_exec == "probis": runstring = "./" + runstring # run probis print(runstring) os.system(runstring) # read in rota.pdb files and remove all other files newal: mda.Universe = mda.Universe( pdb_to_align_to[:4] + "A_" + mobile_pdb[:4] + "A.0.rota.pdb" ) # remove aligned pdb files removestring: str = "rm *.rota.pdb probis*.tmp " + mobile_pdb os.system(removestring) # add cell dimensions and other missing info? newal.dimensions = snap.dimensions # append snapshot to new trajectory selnew = newal.atoms W.write(selnew)
[docs] def align_trajectory( trajectory: str, output_trj_file: str, align_target_file_name: str, topology: str | None = None, every: int = 1, align_mode: str = "mda", align_target: int | None = -1, align_selection: str = "protein", probis_exec: str | None = None, ) -> None: """Align the trajectory. Before running water clustering for identification of conserved water molecules the trajectory should be aligned first. Alignment can be done via MDAnalysis or using the probis algorithm. Whole protein is aligned by default. To select the align reference state either select an integer for ``align_target`` and specify a file name to which the align target will be saved to with ``align_target_file_name`` OR set align_target to ``None`` and ``align_target_file_name`` will be read and used as align target. The trajectory or topology should contain information on bond topology for alignment. Supported topology file types: DATA DMS GSD MMTF MOL2 PARMED PDB ENT PSF TOP PRMTOP PARM7 TPR TXYZ ARC XML XPDB Alternatively the whole trajectory can be provided in some of the above given file types as well. Args: trajectory (str): File name containing unaligned trajectory. output_trj_file (str): output file name for aligned trajectory. align_target_file_name (str): File name for saving the align target (usually pdb) if ``align_target`` is int. If align target is ``None``, the align target will be read from this file instead. topology (str | None, optional): Topology file name. Defaults to ``None``. every (int, optional): Take every ``every`` snapshot instead of taking all the snapshots (every = 1) for alignment. Defaults to 1. align_mode (str, optional): Align algorithm to use. "mda" uses MDAnalysis while "probis" uses the probis algorithm. Defaults to "mda". align_target (int | None, optional): Align target. If ``None`` the align target is read from the ``align_target_file_name``. If a number is given uses the given snapshot of the trajectory as the align target. If -1 uses the last snapshot. Defaults to -1. align_selection (str, optional): Selection to align to. Defaults to "protein". probis_exec (str | None, optional): location of probis executable if probis is used. Must be provided if ``align_mode`` is "probis". Example:: # align the trajectory and save to a file align_trajectory( trajectory="trajectory.xtc", output_trj_file="aligned_trajectory.xtc", align_target_file_name='aligned.pdb', align_mode="mda", align_target=0, align_selection="protein", topology="topology.tpr", ) """ # check that output_trj_file name is not the same as trajectory, topology or # aligntarget if output_trj_file in {trajectory, topology, align_target_file_name}: exception_string = ( "output_trj_file name cannot be the same as " "trajectory, topology or align_target_file_name" ) raise Exception(exception_string) if align_target_file_name in {trajectory, topology}: exception_string = ( "align_target_file_name name cannot be the same as " "trajectory or topology" ) raise Exception(exception_string) if topology is not None: mob = mda.Universe(topology, trajectory) ref: mda.Universe = mda.Universe(topology, trajectory) else: mob: mda.Universe = mda.Universe(trajectory) ref = mda.Universe(trajectory) # center the box to center of mass if topology is None: if not ( trajectory.upper().endswith( ( "DATA", "DMS", "GSD", "MMTF", "MOL2", "PARMED", "PDB", "ENT", "PSF", "TOP", "PRMTOP", "PARM7", "TPR", "TXYZ", "ARC", "XML", "XPDB", "PDB", ) ) ): exception_string: str = ( "unsupported trajectory file format. bond " "topology information is needed for alignment. Consider " "providing a topology file." ) elif isinstance(topology, str): if not ( topology.upper().endswith( ( "DATA", "DMS", "GSD", "MMTF", "MOL2", "PARMED", "PDB", "ENT", "PSF", "TOP", "PRMTOP", "PARM7", "TPR", "TXYZ", "ARC", "XML", "XPDB", "PDB", ) ) ): exception_string: str = ( "unsupported topology file type. Bond information is " "needed for alignment." ) raise Exception(exception_string) ref.select_atoms(align_selection).segments.segids = "A" ref.add_TopologyAttr("chainIDs") ref.select_atoms(align_selection).chainIDs = "A" if isinstance(align_target, int): wr = ref.atoms wr.write(align_target_file_name, frames=ref.trajectory[[align_target]]) if every > 1: unaligned_trj_file: str = ( trajectory[: trajectory.rfind(".") - 1] + f"every_{every}" + trajectory[trajectory.rfind(".") :] ) with mda.Writer(unaligned_trj_file, multiframe=True) as W: for _ in mob.trajectory[::every]: W.write(mob.atoms) elif every == 1: unaligned_trj_file: str = trajectory else: exception_string: str = "every must be a positive integer" raise Exception(exception_string) if align_mode == "probis": __align_probis( unaligned_trj_file=unaligned_trj_file, pdb_to_align_to=align_target_file_name, output_trj_file=output_trj_file, probis_exec=probis_exec, topology=topology, align_selection=align_selection, ) elif align_mode == "mda": __align_mda( unaligned_trj_file=unaligned_trj_file, pdb_to_align_to=align_target_file_name, output_trj_file=output_trj_file, topology=topology, align_selection=align_selection, ) else: exception_string: str = "wrong alignemnt mode, must be probis or mda" raise Exception(exception_string)
[docs] def align_and_extract_waters( center_for_water_selection: np.ndarray, trajectory: str, aligned_trajectory_filename: str, align_target_file_name: str, topology: str | None = None, every: int = 1, align_mode: str = "mda", align_target: int | None = -1, align_selection: str = "protein", probis_exec: str | None = None, dist: float = 12.0, SOL: str = "SOL", OW: str = "OW", HW: str = "HW", ) -> tuple[np.ndarray]: """Align and extracts waters from trajectory. Aligns the trajectory first and then extracts water molecules for further water clustering analysis. If trajectory has already been aligned, one can use :py:meth:`extract_waters_from_trajectory` to extract the water molecules for water clustering analysis. Args: center_for_water_selection (np.ndarray): Coordiantes around which all water molecules inside a radius ``dist`` will be seleceted for water clustering analysis. trajectory (str): File name of the trajectory from which waters will be extracted. aligned_trajectory_filename (str): File name to which aligned trajectory will be saved. align_target_file_name (str): File name for saving the align target (usually pdb) if ``align_target`` is ``int``. If align target is ``None``, the align target will be read from this file instead! topology (str | None, optional): Topology file name. Defaults to ``None``. every (int, optional): Take every ``every`` snapshot instead of taking all the snapshots (every = 1) for alignment. Defaults to 1. align_mode (str, optional): Align algorithm to use. "mda" uses MDAnalysis while "probis" uses the probis algorithm. Defaults to "mda". align_target (int | None, optional): Align target. If ``None`` the align target is read from the align_target_file_name. If a number is given uses the given snapshot of the trajectory as the align target. If -1 uses the last snapshot. Defaults to -1. align_selection (str, optional): Selection to align to. Defaults to "protein". probis_exec (str | None, optional): location of probis executable if probis is used. If None it is downloaded from the internet. Defaults to None. dist (float, optional): Radius around ``center_for_water_selection`` to be used for extraction of water molecules. Defaults to 12.0. SOL (str, optional): Residue name for waters. Defaults to "SOL". OW (str, optional): Name of the oxygen atom. Defaults to "OW". HW (str, optional): Name of the hydrogen atom. Defaults to "HW". Returns: tuple[np.ndarray, np.ndarray]: Returns coordinates of oxygen atoms, first hydrogen atom and second hydrogen atom in three seperate numpy arrays. Each row in each array makes up coordinates of a single water molecule. Example:: # Generate water coordinates for clustering analysis from unaligned trajectory resids = [8,12,143,144] align_and_extract_waters( get_center_of_selection(get_selection_string_from_resnums(resids)), trajectory = 'trajectory.xtc', aligned_trajectory_filename = 'aligned_trj.xtc', align_target_file_name = 'aligned.pdb', topology = 'topology.tpr', every = 1, align_mode = "mda", align_target= 0, align_selection = "protein", dist = 10.0, ) """ align_trajectory( trajectory=trajectory, output_trj_file=aligned_trajectory_filename, align_target_file_name=align_target_file_name, topology=topology, every=every, align_mode=align_mode, align_target=align_target, align_selection=align_selection, probis_exec=probis_exec, ) return extract_waters_from_trajectory( center_for_water_selection, trajectory=aligned_trajectory_filename, dist=dist, topology=topology, SOL=SOL, OW=OW, HW=HW, )
[docs] def read_results_and_make_pdb( fname: str = "Clustering_results.dat", typefname: str = "Type_Clustering_results.dat", protein_file: str = "aligned.pdb", ligand_name: str | None = None, output_fname: str | None = None, mode: str = "SOL", ) -> None: """Read results from files and generate a pdb file. Args: fname (str, optional): File name with clustering results - coordinates of water molecules. Defaults to "Clustering_results.dat". typefname (str, optional): File name which contains the types of each water molecule. Defaults to "Type_Clustering_results.dat". protein_file (str, optional): File name which contains the reference structure trajectory has been aligned to. Defaults to "aligned.pdb". ligand_name (str | None, optional): Residue name for the ligand. If none is given, no ligand is extracted and visualised/saved in the pdb file. Defaults to None. output_fname (str | None, optional): Name of the output file (pdb prefered). Defaults to None. mode (str, optional): mode in which conserved waters will besaved. Options: - "SOL" - default mode. Saves water molecules as SOL so that visualisation softwares can recognise them as waters. No distinction is made between different types of conserved waters. - "cathegorise" - cathegorises the waters according to hydrogen orienation into fully conserved (FCW), half-coserved (HCW) and weakly conserved (WCW). This mode makes visualisers not able to recognise the waters as water/sol but usefull for interpreting results. Example:: # Generate pdb results file read_results_and_make_pdb( fname = 'Results.dat', typefname = 'TypeResults.dat', protein_file = 'aligned_protein.pdb', ligand_name = 'UBY', output_fname = 'results.pdb', mode = 'cathegorise', ) """ water_type, waterO, waterH1, waterH2 = read_results(fname, typefname) make_results_pdb_MDA( water_type=water_type, waterO=waterO, waterH1=waterH1, waterH2=waterH2, protein_file=protein_file, ligand_name=ligand_name, output_fname=output_fname, mode=mode, )
def is_pdb_file(filepath: str) -> bool: """Return True if `filepath` is probably a PDB. If it's binary (like XTC/DCD), this will return False safely. """ ext = os.path.splitext(filepath)[1].lower() if ext in {".pdb", ".ent", ".pdb1", ".pdb2"}: return True # If extension isn't a known PDB, peek at first few lines: try: with open(filepath, encoding="utf-8") as f: for _ in range(10): line = next(f) if line.startswith(("ATOM ", "HETATM", "HEADER", "TITLE")): return True except (UnicodeDecodeError, OSError, StopIteration): # If it fails to decode (binary), or can't open, it's not a PDB. return False return False