Source code for twodanalysis.MembProp

"""
MembProp
=============
Class created mainly to analyze lipid membranes in different ways

Classes
-------

.. autoclass:: MembProp
    :members:
    :undoc-members:
    :show-inheritance:


"""

import MDAnalysis as mda
import pandas as pd
import numpy as np
import networkx as nx
import matplotlib.pyplot as plt
import re

def natural_key(node_name):
    """
    Key function to sort strings like C1, C2, C10 numerically.
    Extracts the prefix and numeric part.
    """
    match = re.match(r"([A-Za-z]+)(\d+)", str(node_name))
    if match:
        prefix, number = match.groups()
        return (prefix, int(number))
    else:
        return (node_name, 0)  # fallback



[docs]
class MembProp:

    def __init__(
                self,
                obj,
                lipid_list = None,
                connection_chains = None,
                working_lip = None,
                verbose = False,
                forcefield = "charmm",
            ):
        """Class set up common properties and to automatically guess useful membrane properties
         useful for the analyses that can ber performed. It sets up lipid heads (used to identify membrane middle),
         periodicity (used to introduce periodicity), default edges for the 2D maps. On the other hand it helps with the guess
        of properties such as chain length for order parameter, last carbon for splay angle, polarity information and radii
        for packing defects.


        Parameters
        ----------
        obj : Universe or AtomGroup
            MDanalysis universe or AtomGroup with or without trajectory
        lipid_list : List of lipids in the membrane, optional
            If not provided, the code will guess them based on eliminating protein and RNA
            from the atoms. If you upload a trajectory with water and ions and does not specify this
            information it can lead to errors, by default None
        connection_chains : dict, optional
            Dictionary with the information of the atoms that connect the lipid tails and the hidrophilic head.
            This dictionary should be as follows {"CHL1" : [("O3", "C3")], "DODMA" : [("C21", "C22"), ("C31", "C32")],}
            and should be provided for each lipid. Defaults to None (If None we use ([("C21", "C22"), ("C31", "C32")] for most lipids))
        working_lip : dict, optional
            Dictionary with the information of the lipid heads to use for the multiple analysis. The dictionary should be as follows
            {"CHL1" : {"head" : "O3"}, "DODMA" : {"head" : "N1"}, "DSPC" : {"head":"P"}} and should have entries for all the lipids
        verbose : bool, optional
            Print information of the membrane, by default False
        """



        # Read trajectory depending if tpr is provided or not
        if isinstance(obj, mda.Universe):
            self.u = obj
        elif isinstance(obj,mda.core.groups.AtomGroup):

            self.u = mda.core.universe.Merge(obj)
        else:
            raise TypeError("Input must be an MDAnalysis Universe or AtomGroup")




        # Select elements in the membrane (in principle only lipids)
        if lipid_list is None: # Select only elements of the membrane
            self.memb = self.u.select_atoms("all and not protein and not\
                                             (resname URA or resname GUA\
                                             or resname ADE or resname CYT\
                                             or resname THY)")
            self.lipid_list = list(set(self.memb.residues.resnames))
        else:
            self.memb = self.u.select_atoms(f"{self.build_resname(list(lipid_list))}")
            self.lipid_list = list(set(self.memb.residues.resnames))


        # Set percentage for periodicity
        self.periodicity = 0.1


        # Set radius sizes of different elements
        self.radii_dict = None
        self.forcefield = forcefield
        self.chain_info = {}
        self.chain_conections = {}
        self.non_polar_dict = {}
        self.polar_dict = {}
        self.first_lipids = {}
        self.non_polar_visualize = {}

        self.amber_heads = ["PC", "PE", "PS", "PGR", "PGS", "PH", "SPM"]


        self.working_lip = {
                                "CHL1" : {"head" :"O3", "charge" : 0},
                                "DODMA" : {"head" :"N1", "charge" : -0.21},
                                "DSPC" : {"head" :"P", "charge" : 1.1},
                            }


        self.connection_chains = {

            "CHL1" : [("O3", "C3")],
            "DODMA" : [("C21", "C22"), ("C31", "C32")],
            "DSPC" : [("C21", "C22"), ("C31", "C32")],

        }

        #self.connection_chains = self.connection_chains if connection_chains is None else connection_chains
        if connection_chains is not None:
            for lip in connection_chains:
                self.connection_chains[lip] = connection_chains[lip]

        for lipid in self.lipid_list:
            test_lips = self.working_lip.keys()
            if lipid not in test_lips:
                self.working_lip[lipid] = {"head" : "P", "charge" : 0}
            if lipid not in list(self.connection_chains.keys()):
                self.connection_chains[lipid] = [("C21", "C22"), ("C31", "C32")]


        self.working_lip = self.working_lip if working_lip is None else working_lip


        head_atoms = set([self.working_lip[lipid]["head"] for lipid in self.working_lip])
        if "P" in head_atoms:
            self.all_head = self.memb.select_atoms(self.build_resname(self.lipid_list) + " and name P")
        else:
            self.all_head = self.memb.select_atoms(self.build_resname_head(self.lipid_list))


        self.map_layers = {"top" : " > ",
                           "bot": " < ",
                           "up" : " > ",
                           "down" : " < "}



        self.verbose = verbose
        if verbose:
            print(f"This system contains the following lipids : {self.lipid_list}\n\n")
            print(f"The chain length is : \n{self.print_dict(self.chain_info)}\n")
            print(f"We will use the following heads and charges for the following lipids.\
                   If the lipid is not here we will use P as head as default \n{self.print_dict(self.working_lip)}\n")
            print("Note: To compute the middle of the membrane we use only P heads\n\n")
            #print(f"The default start frame is {self.start}, final {self.final}, step {self.step}\n\n")


    def add_radii(self, radii_dict = None):
        if radii_dict is None:
            self.radii_dict = {"H": 1.1,
                            "N": 1.55,
                            "C": 1.7,
                            "P": 1.8,
                            "O": 1.52,
                            "S" : 1.8,
                            }
        # Add radii as a topology attribute for Mdanalysis
        try:
            string_array = self.memb.elements
        except:
            guessed_elements = mda.topology.guessers.guess_types(self.u.atoms.names)
            self.u.add_TopologyAttr("elements", guessed_elements)
            string_array = self.memb.elements
        radii_array = np.array([self.radii_dict[element] for element in string_array])
        self.u.add_TopologyAttr("radii")
        self.memb.radii = radii_array


    #def infer_chain_names(self, lipid, chain):
    #    lipid_w = self.memb.select_atoms(f"resname {lipid}")



    def guess_chain_lenght(self):
        # Guess the chain length of lipids. Chain sn2 start with C2 and chain sn1 start with C3
        for lipid in self.lipid_list:
            first_lipid = self.memb.select_atoms(f"resname {lipid}").resids[0]
            #print("###########",self.memb.select_atoms(f"resname {lipid}").resids)
            #print("first lipid", first_lipid)
            chain_l = []
            tail = self.memb.select_atoms(f"resid {first_lipid}")


            if self.forcefield == "charmm":
                for bond in self.connection_chains[lipid]:

                    _ , chaintemp = self.cut_structure(
                        tail,
                        bond,
                        )
                    c_in_node = [n for n in chaintemp.nodes if "C" in str(n)]
                    chain_l.append(len(c_in_node))
                #print(chain_l, lipid)
                try:
                    if len(chain_l) == 2:
                        chain_l = [chain_l[1]] + [chain_l[0]]
                    else:
                        chain_l = [chain_l[1]] +[chain_l[0]]  + [chain_l[2:]]
                except:
                    pass
                self.chain_info[lipid] = chain_l

            elif self.forcefield == "amber":
                if lipid != "CHL":

                    chain1 = tail.resids[0] - 1
                    chain2 = tail.resids[0] + 1

                    atoms_c1 = self.u.select_atoms(f"resid {chain1}")
                    atoms_c2 = self.u.select_atoms(f"resid {chain2}")
                    c1_in_node = [n for n in atoms_c1.names if "C" in str(n)]
                    c2_in_node = [n for n in atoms_c2.names if "C" in str(n)]
                    chain_l = [len(c1_in_node), len(c2_in_node)]
                else:
                    chain_l = [7]
                self.chain_info[lipid] = chain_l

            #print("################", self.chain_info)



    def guess_last_cs(self):

        if self.forcefield == "charmm":
            for lipid in self.lipid_list:
                first_lipid = self.memb.select_atoms(f"resname {lipid}").resids[0]
                actual_sn1 = self.memb.select_atoms(f"resid {first_lipid} and name C3*")
                actual_sn2 = self.memb.select_atoms(f"resid {first_lipid} and name C2*")
                actual_sn1 = actual_sn1.names
                actual_sn2 = actual_sn2.names
                self.working_lip[lipid]["last_c"] = [actual_sn1[-1], actual_sn2[-1]]
        elif self.forcefield == "amber":
            for lipid in self.lipid_list:
                structure = self.extract_chain_info(lipid)
                carbons_1 = [atom for conn in structure[0] for atom in conn if "C" in atom]
                carbons_2 = [atom for conn in structure[0] for atom in conn if "C" in atom]
                self.working_lip[lipid]["last_c"] = [carbons_1[-1], carbons_2[-1]]




        return self.working_lip


    def guess_polarity(self):
        self.non_polar_dict = {}
        self.first_lipids = {}
        self.non_polar_visualize = {}
        for lipid in self.lipid_list:
            first_lipid = self.memb.select_atoms(f"resname {lipid}").resids[0]
            self.first_lipids[lipid] = first_lipid
            if lipid == "CHL1":
                non_polar = self.memb.select_atoms(f"resid {first_lipid} and not (name O3 or name H3')")
            else:
                non_polar = self.memb.select_atoms(f"resid {first_lipid} and (name *C3* or name H*Y or name H*X or name H*Z  or name *C2* or name H*R or name H*S or name H*T) and not (name C3 or name C31 or name HY or name HX or name HZ  or name C2 or name C21 or name HR or name HS or name HT)")
            self.non_polar_dict[lipid] = list(non_polar.names)



    @staticmethod
    def build_resname(resnames_list):
        resnames_list = list(resnames_list)
        string = " (resname " + resnames_list[0]
        for resname in resnames_list[1:]:
            string = string + " or resname " + resname
        string = string + ") "
        return string


    def build_resname_head(self, resnames_list):
        resnames_list = list(resnames_list)
        string = f"( (resname {resnames_list[0]}  and name {self.working_lip[resnames_list[0]]['head']}) "

        for resname in resnames_list[1:]:
            string += f" or (resname {resname}  and name {self.working_lip[resname]['head']}) "

        string +=  " ) "
        return string

    @staticmethod
    def build_name(resnames_list):
        if isinstance(resnames_list, str):
            return f"(name {resnames_list})"
        string = " (name " + resnames_list[0]
        if len(resnames_list) > 1:
            for resname in resnames_list[1:]:
                string = string + " or name " + resname
        string = string + ") "
        return string


[docs]
    def visualize_polarity(self, lipids = "all"):
        """visualize_polarity

        This function is used to visualize what atoms are considered in polarity

        Parameters
        ----------
        lipids : str, optional
            Lipids to show polarity, by default "all"
        """
        aspect_ratio = [1, 1, 1]
        # Get lipids to work
        if lipids == "all":
            lipids = self.lipid_list
        else:
            if isinstance(lipids, str):
                lipids = [lipids]
        #Guess bonds if needed
        try:
            self.u.bonds
        except:
            bonds = mda.topology.guessers.guess_bonds(self.u.atoms, self.u.atoms.positions)
            self.u.add_TopologyAttr("bonds", bonds)
            #print(mda.topology.guessers.guess_bonds(first_lipid, first_lipid.positions))

        # Make a 3D plot of the individual lipids and shows it
        if not self.non_polar_dict:
            print("Non polar atoms are not yet established, please set self.non_polar_dict first")
            return
        else:
            nplots = len(lipids)
            fig = plt.figure(figsize=(5 * nplots, 5))


            count = 0
            for lipid in lipids:
                first_lipid = self.memb.select_atoms(f"resid {self.first_lipids[lipid]} and resname {lipid}")
                lipid_pos = first_lipid.positions
                lipid_ats = first_lipid.names
                polarity = ["blue" if name in self.non_polar_dict[lipid] else "red" for name in lipid_ats]
                #print(polarity, lipid_ats)
                ax = fig.add_subplot(1,nplots, count+1, projection = "3d")
                ax.scatter(*lipid_pos.T, s = 10, c = polarity)
                dists = []
                mids = []
                for i in range(3):
                    value = np.max(lipid_pos[:,i]) - np.min(lipid_pos[:,i])
                    dists.append(value)
                    mids.append(np.min(lipid_pos[:,i] + value/2))
                dists = max(dists)
                dists = dists + 0.01*dists
                for atom in first_lipid:
                    bonds = atom.bonded_atoms
                    for bond in bonds:
                        vector = np.array([atom.position, bond.position])
                        plt.plot(*vector.T, color = "black")
                for i in range(len(lipid_ats)):
                    if lipid_ats[i] in self.non_polar_dict[lipid]:
                        ax.text(lipid_pos[i,0], lipid_pos[i,1],  lipid_pos[i,2], lipid_ats[i], color = "black", fontsize = 6)
                    else:
                        ax.text(lipid_pos[i,0], lipid_pos[i,1], lipid_pos[i,2], lipid_ats[i], color = "black", fontsize = 6)
                ax.set_box_aspect(aspect_ratio)
                ax.set_title(lipid)
                ax.set_xlim(mids[0]-dists/2, mids[0]+dists/2)
                ax.set_ylim(mids[1]-dists/2, mids[1]+dists/2)
                ax.set_zlim(mids[2]-dists/2, mids[2]+dists/2)
                count += 1
        plt.show()




[docs]
    @staticmethod
    def extend_data(data, dimensions, percentage, others = None):
        """ Function to extent data for periodicity purposes

        Parameters
        ----------
        data : ndarray(:,2)
            data in 2D fashion
        dimensions : ndarray(2)
            periodicity dimension (usually gottem from u.ts.dimensions[:2])
        percentage : float
            Percentage of data to replicate with periodicity
        others : list(ndarray(:), ndarray(:),...,ndarray(:)), optional
            List of other data to replicate with periodicity (ndarrays initially match with data axis 0 dimension), by default None

        Returns
        -------
        ndarray
            Data extended
        ndarray, list
            Data extended, others extended
        """
        xmin = np.min(data[:,0])
        xmax = np.max(data[:,0])
        ymin = np.min(data[:,1])
        ymax = np.max(data[:,1])



        dist_x = xmax - xmin
        dist_y = ymax - ymin
        if len(data[0]) == 2:
            left_add = data[data[:,0] <= xmin + percentage*dist_x] + [dimensions[0],0]
            right_add = data[data[:,0] >= xmax - percentage*dist_x] - [dimensions[0],0]
        else:
            left_add = data[data[:,0] <= xmin + percentage*dist_x] + [dimensions[0],0,0]
            right_add = data[data[:,0] >= xmax - percentage*dist_x] - [dimensions[0],0,0]


        if others is not None:
            temp_right = []
            temp_left = []
            for i in range(len(others)):
                temp_left.append(others[i][data[:,0] <= xmin + percentage*dist_x])
                temp_right.append(others[i][data[:,0] >= xmax - percentage*dist_x])




        data = np.concatenate([data, left_add, right_add], axis = 0)

        if others is not None:
            for i in range(len(others)):
                others[i] = np.concatenate([others[i], temp_left[i], temp_right[i]], axis = 0)

        # Extent in y
        if len(data[0]) == 2:
            up_add = data[data[:,1] <= ymin + percentage*dist_y] + [0,dimensions[1]]
            low_add = data[data[:,1] >= ymax - percentage*dist_y] - [0,dimensions[1]]
        else:
            up_add = data[data[:,1] <= ymin + percentage*dist_y] + [0,dimensions[1],0]
            low_add = data[data[:,1] >= ymax - percentage*dist_y] - [0,dimensions[1],0]


        if others is not None:
            temp_up = []
            temp_low = []
            for i in range(len(others)):
                temp_up.append(others[i][data[:,1] <= ymin + percentage*dist_y])
                temp_low.append(others[i][data[:,1] >= ymax - percentage*dist_y])



        data = np.concatenate([data, up_add, low_add], axis = 0)
        if others is not None:
            for i in range(len(others)):
                others[i] = np.concatenate([others[i], temp_up[i], temp_low[i]], axis = 0)

        if others is not None:
            return data, others
        return data






    def nx_polarity(self):



        self.non_polar_dict = {}
        self.polar_dict = {}
        self.first_lipids = {}
        self.non_polar_visualize = {}

        for lipid in self.lipid_list:
            first_lipid = self.memb.select_atoms(f"resname {lipid}")
            first_id = first_lipid.resids[0]
            first_lipid = first_lipid.select_atoms(f"resid {first_id}")
            self.first_lipids[lipid] = first_id



            if self.forcefield == "charmm":
                G = self.atgroup2nx(first_lipid)
                non_polar = []
                for edge in self.connection_chains[lipid]:
                    G.remove_edge(edge[0], edge[1])
                    components = list(nx.connected_components(G))
                    non_polar.append(next(comp for comp in components if edge[1] in comp)) # Stores the tails

                self.non_polar_dict[lipid] = [elem for s in non_polar for elem in s]
                polar = next(comp for comp in components if self.connection_chains[lipid][0][0] in comp)
                self.polar_dict[lipid] = polar

            elif self.forcefield == "amber":


                if lipid != "CHL":
                    heads_ats = first_lipid
                    self.polar_dict[lipid] = heads_ats.names

                    tail1 = self.u.select_atoms(f"resid {first_id -1}")
                    tail2 = self.u.select_atoms(f"resid {first_id +1}")
                    #print("Here: ########", tail1.names, tail2.names, list(tail1.names) + list(tail2.names))
                    non_polar = list(set(list(tail1.names) + list(tail2.names)))
                    self.non_polar_dict[lipid] = non_polar
                else:
                    self.polar_dict[lipid] = ["O1", "HO1"]
                    non_polar = first_lipid.names
                    non_polar_final = [name for name in non_polar if name not in self.polar_dict[lipid]]
                    self.non_polar_dict[lipid] = non_polar_final




            #print(lipid , ":", self.polar_dict[lipid])

    def _store_fist_lipids(self):
        self.first_lipids = {}
        for lipid in self.lipid_list:
            first_lipid = self.memb.select_atoms(f"resname {lipid}")
            first_id = first_lipid.resids[0]
            first_lipid = first_lipid.select_atoms(f"resid {first_id}")
            self.first_lipids[lipid] = first_id

    def nx_chain_names(self):

        self.first_lipids = {}
        carbon_conections = {}

        for lipid in self.lipid_list:
            first_lipid = self.memb.select_atoms(f"resname {lipid}")
            first_id = first_lipid.resids[0]
            first_lipid = first_lipid.select_atoms(f"resid {first_id}")
            self.first_lipids[lipid] = first_id


            if lipid != "CHL1":


                G = self.atgroup2nx(first_lipid)


                chains = []
                for edge in self.connection_chains[lipid]:
                    G.remove_edge(edge[0], edge[1])
                    components = list(nx.connected_components(G))
                    chains.append(next(comp for comp in components if edge[1] in comp)) # Store the tail of the lipids

                chain_graphs = []
                carbons = []
                for chain in chains:
                    chain_graphs.append(G.subgraph(chain))
                    filtered_sorted_carbs = [item for item in chain if "C" in item]
                    carbons.append(filtered_sorted_carbs)


                carbon_conections = {}

                for chain, carbs in zip(chain_graphs,carbons):
                    for carbon in carbs:
                        connected_atoms = list(chain.neighbors(carbon))
                        connected_atoms = [atom for atom in connected_atoms if "C" not in atom]
                        carbon_conections[carbon] = connected_atoms


                #print("here", carbon_conections)

            self.chain_conections[lipid] = carbon_conections

        #print(self.chain_conections)


[docs]
    def cut_structure(self, structure, bond):
        """Tool needed to cut structures in two parts based on a bond.
        This function is used to cut the structure in two parts based on a bond.
        It uses NetworkX to create a graph of the structure and then removes the bond.
        It then finds the connected components of the graph and returns the two parts.
        If the bond does not cut the structure into two parts, it raises a ValueError.


        Parameters
        ----------
        structure : MDAnalysis Universe or AtomGroup
            Any structure that we aim to cut
        bond : tuple
            tuple with the names of the atoms of the bond we want to cut
            e.g. ("C21", "C22") or ("C31", "C32")

        Returns
        -------
        part1, part2 : NetworkX Graph
            Two parts of the structure after cutting it based on the bond.
        Raises
        ------
        ValueError
            If the bond does not cut the structure into two parts, it raises a ValueError.

        """

        G = self.atgroup2nx(structure)
        G.remove_edge(bond[0], bond[1])
        components = list(nx.connected_components(G))
        subgraphs = [G.subgraph(c).copy() for c in components]
        if len(subgraphs) != 2:
            raise ValueError("The bond does not cut the structure into two parts")

        part1 = subgraphs[0] if bond[0] in subgraphs[0].nodes else subgraphs[1]
        part2 = subgraphs[1] if bond[1] in subgraphs[1].nodes else subgraphs[0]

        return part1, part2



[docs]
    def atgroup2nx(self, atgroup):

        """Function to convert an MDAnalysis AtomGroup to a NetworkX Graph.

        Parameters
        ----------
        atgroup : MDAnalysis AtomGroup
            AtomGroup to convert

        Returns
        -------
        NetworkX Graph
            Graph representing the structure of the AtomGroup
        """
        try:
            bonds = atgroup.bonds.indices
            #print(bonds)
        except:
            bonds = mda.topology.guessers.guess_bonds(atgroup, atgroup.positions)
        nodes = set()
        for edge in bonds:
            nodes.update(edge)
        nodes = sorted(nodes)

        names = atgroup.names
        map_node_name = {nodes[i]:names[i] for i in range(len(nodes))}

        edges_labeled = [(map_node_name[u], map_node_name[v]) for u,v in bonds]

        G = nx.Graph()
        G.add_edges_from(edges_labeled)

        return G




[docs]
    def chain_structure(self, chain):
        """Function to get the structure of the chain in the shape [C,Hi,Hj] for any nomemclature.

        Parameters
        ----------
        chain : NetworkX Graph
            Graph representing a lipid tail

        Returns
        -------
        list
            List containing various list of the form [C, Hi, Hj] where C is the carbon and Hi and Hj are the hydrogens connected to it.
        """
        c_in_node = [n for n in chain.nodes if "C" in str(n)]
        c_in_node = sorted(c_in_node, key=natural_key)
        neighbors = [list(chain.neighbors(n)) for n in c_in_node]
        neighbors = [[c_in_node[i]] + [item for item in neighbors[i] if "C" not in item] for i in range(len(c_in_node))]
        return neighbors


    def extract_chain_info(self, lipid):
        self._store_fist_lipids()
        lipids_ids = self.first_lipids


        chains = []

        if self.forcefield == "charmm":
            for chain in self.connection_chains[lipid]:

                _, chaintemp = self.cut_structure(
                        self.memb.select_atoms(f"resid {lipids_ids[lipid]}"),
                        chain,
                        )
                chains.append(chaintemp)

        elif self.forcefield == "amber":
            if lipid != "CHL":
                chain1 = lipids_ids[lipid] - 1
                chain2 = lipids_ids[lipid] + 1

                atoms_c1 = self.u.select_atoms(f"resid {chain1}")
                atoms_c2 = self.u.select_atoms(f"resid {chain2}")

                Ch1 = self.atgroup2nx(atoms_c1)
                Ch2 = self.atgroup2nx(atoms_c2)

                chains = [Ch1, Ch2]
            if lipid == "CHL" and "CHL" in self.connection_chains:
                for chain in self.connection_chains[lipid]:

                    _, chaintemp = self.cut_structure(
                            self.memb.select_atoms(f"resid {lipids_ids[lipid]}"),
                            chain,
                            )

                    chains.append(chaintemp)


        return [self.chain_structure(tail) for tail in chains]



    @staticmethod
    def print_dict(dictio):
        string = ""
        for key in dictio.keys():
            string += f"{key} : {dictio[key]}\n"
        return string