from __future__ import annotations
import os
import stat
import MDAnalysis
import MDAnalysis as mda
import MDAnalysis.analysis
import MDAnalysis.analysis.align
import MDAnalysis.core.topologyattrs
import numpy as np
from ConservedWaterSearch.utils import (
get_orientations_from_positions,
read_results,
)
from MDAnalysis.analysis.density import Density, DensityAnalysis
from MDAnalysis.topology.guessers import guess_types
from scipy.ndimage import gaussian_filter
def generate_water_selection_string():
"""Returns a selection string for selecting water molecules.
This function generates a string that can be used in MDAnalysis's
select_atoms method to select water molecules from a molecular dynamics
simulation. The selection string is based on common residue names used
for water molecules across different simulation packages and water models.
Returns:
str: Selection command in form of a string, which can be used with
MDAnalysis's select_atoms method.
Example::
# Generate water selection string
water_selection_string = generate_water_selection_string()
# Use the selection string to select water molecules
u = mda.Universe('topology_file.top', 'trajectory_file.trr')
water_molecules = u.select_atoms(water_selection_string)
"""
# List of common residue names for water
water_resnames = [
"SOL",
"WAT",
"H2O",
"TIP3",
"TIP4",
"TIP5",
"SPC",
"SPCE",
"HOH",
"DOD",
"T3P",
"T4P",
]
# Creating the selection string
return " or ".join(f"resname {resname}" for resname in water_resnames)
[docs]
def get_selection_string_from_resnums(
resids: list[int], selection_type: str = "MDA"
) -> str:
"""Return selection string for given residue ids.
Returns the selection command string for different programs based on
amioacid residue IDs list given.
Args:
resids (list[int]): list of aminoacid residue ids.
selection_type (str, optional): selection program language.
Options:"MDA", "PYMOL", "PROBIS" and "NGLVIEW" Defaults to
"MDA".
Returns:
str: selection command in form of a string
Example::
# list of resids
resids = [8,12,143,144] # print PYMOL selection string
get_selection_string_from_resnums(resids, selection_type =
"PYMOL")
"""
selection: str = ""
for i in resids:
if selection_type == "MDA":
selection += "resnum " + str(i) + " or "
if selection_type == "PYMOL":
selection += str(i) + "+"
if selection_type == "PROBIS":
selection += str(i) + ","
if selection_type == "NGLVIEW":
selection += str(i) + " or "
if selection_type in ("MDA", "NGLVIEW"):
selection = selection[: len(selection) - 3]
if selection_type == "PYMOL":
selection = selection[: len(selection) - 1]
selection = "resi " + selection
if selection_type == "PROBIS":
selection = selection[: len(selection) - 1]
selection = (
' -motif1 "[:A and ('
+ selection
+ ')]" -motif2 "[:A and ('
+ selection
+ ')]" '
)
return selection
[docs]
def get_center_of_selection(
selection: str, trajectory: str, topology: str | None = None
) -> np.ndarray:
"""Compute centre of selection with MDAnalysis.
Calculates coordinates in xyz of the centre of selection using
MDAnalysis.
Args:
selection (str): selection string for MDAnalysis
trajectory (str): trajectory filename - anything that is
accepted by MDAnalysis should work
topology (str | None, optional): topology filename. Defaults to
None.
Returns:
np.ndarray:
returns array that contains coordinates of center of selection
Example::
# find center of active site defined by residue ids
resids = [8,12,143,144]
get_center_of_selection(get_selection_string_from_resnums(resids))
"""
if topology:
u: mda.Universe = mda.Universe(topology, trajectory)
else:
u = mda.Universe(trajectory)
s: mda.AtomGroup = u.select_atoms(selection)
return s.center(None)
[docs]
def calculate_oxygen_density_map(
selection_center: np.ndarray,
trajectory: str,
topology: str | None = None,
dist: float = 12.0,
delta: float = 0.4,
every: int = 1,
SOL: str | None = None,
OW: str | None = None,
output_name: str = "water.dx",
) -> Density:
"""Generate oxygen density maps.
Generate oxygen density maps using MDAnalysis.
Args:
selection_center (np.ndarray): center of selection
around which waters will be selected.
trajectory (str): trajectory filename.
topology (str | None, optional): Topology filename if available.
Defaults to None.
dist (float, optional): distance around selection center inside
which the oxygen will be selected. Defaults to 12.0.
delta (float, optional): bin size for density map. Defaults to
0.4 Angstroms.
every (int, optional): Take every ``n_every`` snapshot instead
of taking all the snapshots (every = 1) for alignment.
Defaults to 1.
SOL (str, optional): Residue name of the water residue. If ``None`` it
will be determined automatically. Defaults to None.
OW (str, optional): Name of the oxygen atom. If ``None`` it will
be determined automatically. Defaults to None.
output_name (str, optional): name of the output file, it should
end with '.dx' . Defaults to "water.dx".
Returns:
Density:
returns MDA Density object containing the density map
Example::
# Generate water oxygen density map near active site
resids = [8,12,143,144] calculate_oxygen_density_map(
get_center_of_selection(
get_selection_string_from_resnums(resids)), trajectory =
'trajectory.pdb'
)
)
"""
if topology:
u: mda.Universe = mda.Universe(topology, trajectory)
else:
u = mda.Universe(trajectory)
if SOL is None:
water_selection = generate_water_selection_string()
else:
water_selection = "resname " + SOL
if OW is None:
oxygen_selection = "element O"
guessed_and_read_props = [
type(i)
for i in list(u._topology.read_attributes)
+ list(u._topology.guessed_attributes)
]
if MDAnalysis.core.topologyattrs.Elements not in guessed_and_read_props:
u.add_TopologyAttr("elements", guess_types(u.atoms.names))
else:
oxygen_selection = "name " + OW
ss: mda.AtomGroup = u.select_atoms(
(
f"{oxygen_selection} and {water_selection} and point "
f"{selection_center[0]} {selection_center[1]} {selection_center[2]} "
f"{dist}"
),
updating=True,
)
D: DensityAnalysis = DensityAnalysis(
ss,
delta=delta,
gridcenter=selection_center,
xdim=dist * 2,
ydim=dist * 2,
zdim=dist * 2,
)
D.run(step=every)
# oxygen vdw radius
vdw_radius = 1.52
sigma = vdw_radius / delta
probability_density = D.results.density.grid / D.results.density.grid.sum()
smoothed_density = gaussian_filter(probability_density, sigma)
smoothed_density = smoothed_density / np.max(smoothed_density)
# Put smoothed density into a Density object
smoothed_density_obj = Density(
grid=smoothed_density,
edges=D._edges,
parameters={"isDensity": True},
)
smoothed_density_obj.export(output_name, type="double")
return smoothed_density_obj
[docs]
def make_results_pdb_MDA(
water_type: list[str],
waterO: np.ndarray,
waterH1: np.ndarray,
waterH2: np.ndarray,
output_fname: str,
protein_file: str | None = None,
ligand_name: str | None = None,
mode: str = "SOL",
) -> None:
"""Generate pdb file with clustering results.
The water molecules determined by the clustering procedure are
written in a pdb file which also contains protein and the ligand.
Waters are labeled based on their hydrogen orientations (FCW for
fully conserved, HCW for half conserved and WCW for weakly
conserved). Uses MDAnalysis for construction of the pdb file.
First 4 arguments of the function can be read from the results file
by using cws.utils.read_results() or directly from the
``cws.water_clustering.WaterClustering`` class.
Args:
water_type (list[str]): List of water types.
waterO (np.ndarray): numpy array containing coordinates
of conserved waters' oxygens.
waterH1 (np.ndarray): numpy array containing coordinates
of conserved waters' first hydrogen
waterH2 (np.ndarray): numpy array containing coordinates
of conserved waters' second hydrogen
output_fname (str): name of the output pdb file. Must end in '.pdb'.
protein_file (str | None, optional): file which contains protein
and ligand. It should be aligned in the same way as the
trajectory used for calculation of conserved waters. If None no
protein is saved. Defaults to None.
ligand_name (str | None, optional): residue name of the ligand.
If None no ligand is saved. Defaults to None.
mode (str, optional): mode in which conserved waters will be
saved. Options:
- "SOL" - default mode. Saves water molecules as SOL so that
visualisation softwares can recognise them as waters. No
distinction is made between different types of conserved
waters.
- "cathegorise" - cathegorises the waters according to
hydrogen orienation into fully conserved (FCW),
half-coserved (HCW) and weakly conserved (WCW). This mode
makes visualisers not able to recognise the waters as
water/sol but usefull for interpreting results.
Example::
# Generate pdb results file
make_results_pdb_MDA(
*cws.utils.read_results(),
output_fname = 'results.pdb',
mode = 'cathegorise'
)
"""
rescntr: int = 2
if protein_file is not None:
us: mda.Universe = mda.Universe(protein_file)
protein: mda.AtomGroup = us.select_atoms("protein")
# TODO for multiple confs
ligand: list = []
if ligand_name:
ligand.append(us.select_atoms("resname " + str(ligand_name)))
# ne radi?
# if (ligand[0]==None):
# print ("no ligand with given name "+str(ligand_name))
rescntr += len(ligand) + len(protein.residues)
# waters
n_residues: int = len(water_type)
n_atoms: int = n_residues * 3
# create resindex list
resindices: np.ndarray = np.repeat(range(n_residues), 3)
# all water molecules belong to 1 segment
segindices: list[int] = [0] * n_residues
waters = mda.Universe.empty(
n_atoms,
n_residues=n_residues,
atom_resindex=resindices,
residue_segindex=segindices,
trajectory=True,
) # necessary for adding coordintes
waters.add_TopologyAttr("name", ["O", "H1", "H2"] * n_residues)
waters.add_TopologyAttr("type", ["O", "H", "H"] * n_residues)
# waters.add_TopologyAttr('segid', ['SOL'])
coordinates: list[list[float]] = []
rns: list[str] = []
resid: list[int] = []
looper = zip(waterO, waterH1, waterH2, water_type, strict=False)
for Opos, H1pos, H2pos, tip in looper:
h2o = np.array(
[
[Opos[0], Opos[1], Opos[2]], # oxygen
[H1pos[0], H1pos[1], H1pos[2]], # hydrogen
[H2pos[0], H2pos[1], H2pos[2]], # hydrogen
]
)
coordinates.extend(h2o)
if mode == "cathegorise":
if tip == "FCW":
rns.append("FCW")
if tip == "HCW":
rns.append("HCW")
if tip == "WCW":
rns.append("WCW")
if tip == "onlyO":
rns.append("onlyO")
elif mode == "SOL":
rns.append("SOL")
resid.append(rescntr)
rescntr += 1
waters.add_TopologyAttr("resname", rns)
waters.add_TopologyAttr("resid", resid)
coord_array = np.array(coordinates)
waters.atoms.positions = coord_array
bonds: list = []
for o in range(0, n_atoms, 3):
bonds.extend([(o, o + 1), (o, o + 2)])
waters.add_TopologyAttr("bonds", bonds)
if protein_file is not None:
newU = mda.Merge(
protein.atoms,
*ligand,
waters.atoms,
)
else:
newU = mda.Merge(waters.atoms)
newU.dimensions = 3
if output_fname is None:
if mode == "SOL":
output_fname = "final_results_SOL.pdb"
if mode == "cathegorise":
output_fname = "final_results_cathegorise.pdb"
newU.atoms.write(output_fname)
def __align_mda(
unaligned_trj_file: str,
pdb_to_align_to: str,
output_trj_file: str,
topology: str | None = None,
align_selection: str = "protein",
) -> None:
"""Alignment via MDAnalysis.
This function is not ment to be used directly. Use align_trajectory instead.
Args:
unaligned_trj_file (str): trajectory file containing unaligned
trajectory.
pdb_to_align_to (str): pdb file that containes the reference
state to align to.
output_trj_file (str): output file trajectory will be saved to.
topology (str | None, optional): File containing topology.
Defaults to None.
align_selection (str, optional): selection to align to. Defaults
to "protein".
"""
if topology:
mob: mda.Universe = mda.Universe(topology, unaligned_trj_file)
else:
mob = mda.Universe(unaligned_trj_file)
ref: mda.Universe = mda.Universe(pdb_to_align_to)
mda.analysis.align.AlignTraj(
mob, ref, select=align_selection, filename=output_trj_file
).run()
def __align_probis(
unaligned_trj_file: str,
pdb_to_align_to: str,
output_trj_file: str,
probis_exec: str,
topology: str | None = None,
align_selection: str = "protein",
) -> None:
"""Alignment function that uses probis.
This function is not ment to be used directly. Use align_trajectory
instead.Uses probis algorithm for alignment. See
`this link <http://insilab.org/probis-algorithm/>`__ for more info.
Args:
unaligned_trj_file (str): trajectory file containing unaligned
trajectory.
pdb_to_align_to (str): pdb file that containes the reference
state to align to.
output_trj_file (str): output file trajectory will be saved to.
probis_exec (str | None, optional): Executable to run probis
algorithm.
topology (str | None, optional): File containing topology.
Defaults to None.
align_selection (str, optional): selection to align to. Defaults
to "protein".
"""
if probis_exec is None:
msg = (
"probis_exec must be provided or probis must be installed in the "
"current working directory."
)
raise Exception(msg)
st: os.stat_result = os.stat(probis_exec)
os.chmod(probis_exec, st.st_mode | stat.S_IEXEC)
if align_selection == "protein":
sele: str = ""
else:
sele = align_selection
if topology:
u: mda.Universe = mda.Universe(topology, unaligned_trj_file)
else:
u = mda.Universe(unaligned_trj_file)
mobile_pdb: str = "mobile.pdb"
u.select_atoms(align_selection).segments.segids = "A"
u.add_TopologyAttr("chainIDs")
u.select_atoms(align_selection).chainIDs = "A"
forwrite = u.atoms
with mda.Writer(output_trj_file, multiframe=True) as W:
for snap in u.trajectory:
with mda.Writer(mobile_pdb) as mw:
mw.write(forwrite)
# string to run
runstring: str = (
probis_exec
+ " -compare -super "
+ sele
+ " -f1 "
+ str(pdb_to_align_to)
+ " -c1 A -f2 "
+ str(mobile_pdb)
+ " -c2 A"
)
if probis_exec == "probis":
runstring = "./" + runstring
# run probis
print(runstring)
os.system(runstring)
# read in rota.pdb files and remove all other files
newal: mda.Universe = mda.Universe(
pdb_to_align_to[:4] + "A_" + mobile_pdb[:4] + "A.0.rota.pdb"
)
# remove aligned pdb files
removestring: str = "rm *.rota.pdb probis*.tmp " + mobile_pdb
os.system(removestring)
# add cell dimensions and other missing info?
newal.dimensions = snap.dimensions
# append snapshot to new trajectory
selnew = newal.atoms
W.write(selnew)
[docs]
def align_trajectory(
trajectory: str,
output_trj_file: str,
align_target_file_name: str,
topology: str | None = None,
every: int = 1,
align_mode: str = "mda",
align_target: int | None = -1,
align_selection: str = "protein",
probis_exec: str | None = None,
) -> None:
"""Align the trajectory.
Before running water clustering for identification of conserved
water molecules the trajectory should be aligned first. Alignment
can be done via MDAnalysis or using the probis algorithm. Whole
protein is aligned by default. To select the align reference state
either select an integer for ``align_target`` and specify a file name to
which the align target will be saved to with ``align_target_file_name``
OR set align_target to ``None`` and ``align_target_file_name`` will be read
and used as align target.
The trajectory or topology should contain information on bond topology
for alignment. Supported topology file types:
DATA
DMS
GSD
MMTF
MOL2
PARMED
PDB
ENT
PSF
TOP
PRMTOP
PARM7
TPR
TXYZ
ARC
XML
XPDB
Alternatively the whole trajectory can be provided in some of the above
given file types as well.
Args:
trajectory (str): File name containing unaligned trajectory.
output_trj_file (str): output file name for aligned trajectory.
align_target_file_name (str): File name for saving the align
target (usually pdb) if ``align_target`` is int. If align target is
``None``, the align target will be read from this file instead.
topology (str | None, optional): Topology file name. Defaults to
``None``.
every (int, optional): Take every ``every`` snapshot instead
of taking all the snapshots (every = 1) for alignment. Defaults
to 1.
align_mode (str, optional): Align algorithm to use. "mda"
uses MDAnalysis while "probis" uses the probis algorithm.
Defaults to "mda".
align_target (int | None, optional): Align target. If ``None`` the
align target is read from the ``align_target_file_name``. If a
number is given uses the given snapshot of the trajectory as
the align target. If -1 uses the last snapshot. Defaults to
-1.
align_selection (str, optional): Selection to align to. Defaults
to "protein".
probis_exec (str | None, optional): location of probis
executable if probis is used. Must be provided if ``align_mode`` is
"probis".
Example::
# align the trajectory and save to a file
align_trajectory(
trajectory="trajectory.xtc",
output_trj_file="aligned_trajectory.xtc",
align_target_file_name='aligned.pdb', align_mode="mda",
align_target=0, align_selection="protein",
topology="topology.tpr",
)
"""
# check that output_trj_file name is not the same as trajectory, topology or
# aligntarget
if output_trj_file in {trajectory, topology, align_target_file_name}:
exception_string = (
"output_trj_file name cannot be the same as "
"trajectory, topology or align_target_file_name"
)
raise Exception(exception_string)
if align_target_file_name in {trajectory, topology}:
exception_string = (
"align_target_file_name name cannot be the same as " "trajectory or topology"
)
raise Exception(exception_string)
if topology is not None:
mob = mda.Universe(topology, trajectory)
ref: mda.Universe = mda.Universe(topology, trajectory)
else:
mob: mda.Universe = mda.Universe(trajectory)
ref = mda.Universe(trajectory)
# center the box to center of mass
if topology is None:
if not (
trajectory.upper().endswith(
(
"DATA",
"DMS",
"GSD",
"MMTF",
"MOL2",
"PARMED",
"PDB",
"ENT",
"PSF",
"TOP",
"PRMTOP",
"PARM7",
"TPR",
"TXYZ",
"ARC",
"XML",
"XPDB",
"PDB",
)
)
):
exception_string: str = (
"unsupported trajectory file format. bond "
"topology information is needed for alignment. Consider "
"providing a topology file."
)
elif isinstance(topology, str):
if not (
topology.upper().endswith(
(
"DATA",
"DMS",
"GSD",
"MMTF",
"MOL2",
"PARMED",
"PDB",
"ENT",
"PSF",
"TOP",
"PRMTOP",
"PARM7",
"TPR",
"TXYZ",
"ARC",
"XML",
"XPDB",
"PDB",
)
)
):
exception_string: str = (
"unsupported topology file type. Bond information is "
"needed for alignment."
)
raise Exception(exception_string)
ref.select_atoms(align_selection).segments.segids = "A"
ref.add_TopologyAttr("chainIDs")
ref.select_atoms(align_selection).chainIDs = "A"
if isinstance(align_target, int):
wr = ref.atoms
wr.write(align_target_file_name, frames=ref.trajectory[[align_target]])
if every > 1:
unaligned_trj_file: str = (
trajectory[: trajectory.rfind(".") - 1]
+ f"every_{every}"
+ trajectory[trajectory.rfind(".") :]
)
with mda.Writer(unaligned_trj_file, multiframe=True) as W:
for _ in mob.trajectory[::every]:
W.write(mob.atoms)
elif every == 1:
unaligned_trj_file: str = trajectory
else:
exception_string: str = "every must be a positive integer"
raise Exception(exception_string)
if align_mode == "probis":
__align_probis(
unaligned_trj_file=unaligned_trj_file,
pdb_to_align_to=align_target_file_name,
output_trj_file=output_trj_file,
probis_exec=probis_exec,
topology=topology,
align_selection=align_selection,
)
elif align_mode == "mda":
__align_mda(
unaligned_trj_file=unaligned_trj_file,
pdb_to_align_to=align_target_file_name,
output_trj_file=output_trj_file,
topology=topology,
align_selection=align_selection,
)
else:
exception_string: str = "wrong alignemnt mode, must be probis or mda"
raise Exception(exception_string)
[docs]
def align_and_extract_waters(
center_for_water_selection: np.ndarray,
trajectory: str,
aligned_trajectory_filename: str,
align_target_file_name: str,
topology: str | None = None,
every: int = 1,
align_mode: str = "mda",
align_target: int | None = -1,
align_selection: str = "protein",
probis_exec: str | None = None,
dist: float = 12.0,
SOL: str = "SOL",
OW: str = "OW",
HW: str = "HW",
) -> tuple[np.ndarray]:
"""Align and extracts waters from trajectory.
Aligns the trajectory first and then extracts water molecules for
further water clustering analysis. If trajectory has already been
aligned, one can use :py:meth:`extract_waters_from_trajectory` to
extract the water molecules for water clustering analysis.
Args:
center_for_water_selection (np.ndarray): Coordiantes
around which all water molecules inside a radius ``dist``
will be seleceted for water clustering analysis.
trajectory (str): File name of the trajectory from which waters
will be extracted.
aligned_trajectory_filename (str): File name to which aligned
trajectory will be saved.
align_target_file_name (str): File name for saving the align
target (usually pdb) if ``align_target`` is ``int``. If
align target is ``None``, the align target will be read
from this file instead!
topology (str | None, optional): Topology file name. Defaults
to ``None``.
every (int, optional): Take every ``every`` snapshot instead of
taking all the snapshots (every = 1) for alignment.
Defaults to 1.
align_mode (str, optional): Align algorithm to use. "mda" uses
MDAnalysis while "probis" uses the probis algorithm.
Defaults to "mda".
align_target (int | None, optional): Align target. If ``None``
the align target is read from the align_target_file_name.
If a number is given uses the given snapshot of the
trajectory as the align target. If -1 uses the last
snapshot. Defaults to -1.
align_selection (str, optional): Selection to align to. Defaults
to "protein".
probis_exec (str | None, optional): location of probis
executable if probis is used. If None it is downloaded
from the internet. Defaults to None.
dist (float, optional): Radius around
``center_for_water_selection`` to be used for extraction of
water molecules. Defaults to 12.0.
SOL (str, optional): Residue name for waters. Defaults to "SOL".
OW (str, optional): Name of the oxygen atom. Defaults to "OW".
HW (str, optional): Name of the hydrogen atom. Defaults to "HW".
Returns:
tuple[np.ndarray, np.ndarray]:
Returns coordinates of oxygen atoms, first
hydrogen atom and second hydrogen atom in three seperate numpy
arrays. Each row in each array makes up coordinates of a single
water molecule.
Example::
# Generate water coordinates for clustering analysis from unaligned trajectory
resids = [8,12,143,144]
align_and_extract_waters(
get_center_of_selection(get_selection_string_from_resnums(resids)),
trajectory = 'trajectory.xtc',
aligned_trajectory_filename = 'aligned_trj.xtc',
align_target_file_name = 'aligned.pdb',
topology = 'topology.tpr',
every = 1,
align_mode = "mda",
align_target= 0,
align_selection = "protein",
dist = 10.0,
)
"""
align_trajectory(
trajectory=trajectory,
output_trj_file=aligned_trajectory_filename,
align_target_file_name=align_target_file_name,
topology=topology,
every=every,
align_mode=align_mode,
align_target=align_target,
align_selection=align_selection,
probis_exec=probis_exec,
)
return extract_waters_from_trajectory(
center_for_water_selection,
trajectory=aligned_trajectory_filename,
dist=dist,
topology=topology,
SOL=SOL,
OW=OW,
HW=HW,
)
[docs]
def read_results_and_make_pdb(
fname: str = "Clustering_results.dat",
typefname: str = "Type_Clustering_results.dat",
protein_file: str = "aligned.pdb",
ligand_name: str | None = None,
output_fname: str | None = None,
mode: str = "SOL",
) -> None:
"""Read results from files and generate a pdb file.
Args:
fname (str, optional): File name with clustering results -
coordinates of water molecules. Defaults to "Clustering_results.dat".
typefname (str, optional): File name which contains the types of
each water molecule. Defaults to "Type_Clustering_results.dat".
protein_file (str, optional): File name which contains the
reference structure trajectory has been aligned to. Defaults
to "aligned.pdb".
ligand_name (str | None, optional): Residue name for the ligand.
If none is given, no ligand is extracted and
visualised/saved in the pdb file. Defaults to None.
output_fname (str | None, optional): Name of the output file
(pdb prefered). Defaults to None.
mode (str, optional): mode in which conserved waters will
besaved. Options:
- "SOL" - default mode. Saves water molecules as SOL so that
visualisation softwares can recognise them as waters. No
distinction is made between different types of conserved
waters.
- "cathegorise" - cathegorises the waters according to
hydrogen orienation into fully conserved (FCW),
half-coserved (HCW) and weakly conserved (WCW). This mode
makes visualisers not able to recognise the waters as
water/sol but usefull for interpreting results.
Example::
# Generate pdb results file
read_results_and_make_pdb(
fname = 'Results.dat',
typefname = 'TypeResults.dat',
protein_file = 'aligned_protein.pdb',
ligand_name = 'UBY',
output_fname = 'results.pdb',
mode = 'cathegorise',
)
"""
water_type, waterO, waterH1, waterH2 = read_results(fname, typefname)
make_results_pdb_MDA(
water_type=water_type,
waterO=waterO,
waterH1=waterH1,
waterH2=waterH2,
protein_file=protein_file,
ligand_name=ligand_name,
output_fname=output_fname,
mode=mode,
)
def is_pdb_file(filepath: str) -> bool:
"""Return True if `filepath` is probably a PDB.
If it's binary (like XTC/DCD), this will return False safely.
"""
ext = os.path.splitext(filepath)[1].lower()
if ext in {".pdb", ".ent", ".pdb1", ".pdb2"}:
return True
# If extension isn't a known PDB, peek at first few lines:
try:
with open(filepath, encoding="utf-8") as f:
for _ in range(10):
line = next(f)
if line.startswith(("ATOM ", "HETATM", "HEADER", "TITLE")):
return True
except (UnicodeDecodeError, OSError, StopIteration):
# If it fails to decode (binary), or can't open, it's not a PDB.
return False
return False