Source code for pydft_qmmm.system.file_manager

"""Utilities for reading and writing PDB files.
"""
from __future__ import annotations

__all__ = [
    "load_system",
    "write_to_pdb",
]

import pathlib
from typing import TYPE_CHECKING

import numpy as np

from pydft_qmmm.utils import ELEMENT_TO_MASS
from pydft_qmmm.utils import compute_lattice_constants
from pydft_qmmm.utils import compute_lattice_vectors
from pydft_qmmm.utils import check_array

from .atom import Atom

if TYPE_CHECKING:
    from numpy.typing import NDArray
    from pydft_qmmm import System


[docs] def load_system(*args: str) -> tuple[list[Atom], NDArray[np.float64]]: """Load system data from PDB files. Args: args: The PDB file or list of PDB files with position, element, name, residue, residue name, chain, and lattice vector data. Returns: A list of atom objects and the box vectors. """ atoms = [] boxes = [] residue = 0 for filename in args: tmp_atoms, tmp_box = _read_pdb(filename, residue) residue = tmp_atoms[-1].residue + 1 atoms.extend(tmp_atoms) boxes.append(tmp_box) if len(boxes) == 1: return atoms, boxes[0] for i in range(len(boxes) - 1): if not np.allclose(boxes[i], boxes[i+1]): raise OSError( "Multiple PDB files have been loaded with different lattices.", ) return atoms, boxes[-1]
def _read_pdb( pdb_file: str, residue: int = 0 ) -> tuple[list[Atom], NDArray[np.float64]]: """Extract system data from a PDB file. Args: pdb_file: A PDB file with position, element, name, residue, residue name, chain, and lattice vector data. Returns: A list of atom objects and the box vectors. """ atoms: list[Atom] = [] box = np.zeros((3, 3)) resnum_old = 0 # Read the PDB file. with open(pdb_file) as fh: lines = fh.readlines() for line in lines: # Extract lattice information and construct box. if line.startswith("CRYST1"): a = float(line[6:15].strip()) b = float(line[15:24].strip()) c = float(line[24:33].strip()) A = float(line[33:40].strip()) B = float(line[40:47].strip()) G = float(line[47:54].strip()) box = compute_lattice_vectors(a, b, c, A, B, G) # Add an atom to the list of Atom objects. if line.startswith("ATOM") or line.startswith("HETATM"): name = line[12:16].strip() resname = line[17:21].strip() chain = line[21].strip() resnum = int(line[22:26]) # Advance the residue counter if the residue name, residue # sequence number, or chain identifier changes once there is # at least one Atom in the list of Atom objects. if atoms: if (atoms[-1].residue_name != resname or atoms[-1].chain != chain or resnum_old != resnum): residue += 1 position = np.array( [float(line[30:38].strip()), float(line[38:46].strip()), float(line[46:54].strip())], ) element = line[76:78].strip().lower().capitalize() mass = ELEMENT_TO_MASS.get(element, 0.) atom = Atom( position=position, mass=mass, residue=residue, element=element, name=name, residue_name=resname, chain=chain, ) atoms.append(atom) resnum_old = resnum return atoms, box
[docs] def write_to_pdb(name: str, system: System) -> None: r"""Write a system to a PDB file. Args: name: The PDB file to which the system will be written. system: The system whose data will be written to a PDB file. """ filename = pathlib.Path(name).with_suffix(".pdb") a, b, c, A, B, G = compute_lattice_constants(system.box) residues: dict[str, list[int]] = {} for i in range(len(system)): residue = residues.get(system.residue_names[i], []) if not system.residues[i] in residue: residue.append(system.residues[i]) residues[system.residue_names[i]] = residue if check_array(system.positions): raise TypeError with open(filename, "w") as fh: fh.write( (f"CRYST1{a:9.3f}{b:9.3f}{c:9.3f}{A:7.2f}{B:7.2f}" f"{G:7.2f} P 1 1 \n"), ) for i, _ in enumerate(system.names): residue = residues[system.residue_names[i]] resid = residue.index(system.residues[i]) + 1 line = "HETATM" line += f"{i+1:5d} " line += f"{system.names[i]:4s}" line += f"{system.residue_names[i]:4s}" line += f"{system.chains[i]}{resid:4d} " line += f"{system.positions[i, 0]:8.3f}" line += f"{system.positions[i, 1]:8.3f}" line += f"{system.positions[i, 2]:8.3f}" line += " 1.00 0.00 " line += f"{system.elements[i]:2s} \n" fh.write(line) fh.write("END")