import MDAnalysis as mda
import numpy as np
import pandas as pd
from twodanalysis import MembProp
from scipy.ndimage import label
[docs]
class PackingDefects(MembProp):
def __init__(self,
universe,
connection_chains = None,
lipid_list = None,
verbose = False,
edges = None,
nbins = 100,
working_lip = None,
forcefield = "charmm",
):
super().__init__(universe,
lipid_list=lipid_list,
connection_chains=connection_chains,
verbose=verbose, forcefield=forcefield, working_lip=working_lip)
self.forcefield = forcefield
if edges is None:
positions = self.all_head.positions[:,:2]
self.v_min = np.min(positions)
self.v_max = np.max(positions)
self.edges = [self.v_min, self.v_max, self.v_min, self.v_max]
else:
self.edges = edges
self.v_min = edges[0]
self.v_max = edges[1]
self.start = 0
self.final = -1
self.step = 1
self.nbins = nbins
try:
self.nx_polarity()
except:
raise """ Our code does not recognize the bond between the lipid head and the lipid tails,
or your lipid is not standard. We have set up the standard cuts in the dictionary self.connection_chains, that are
for example self.connection_chains["DOPC"] = [("C21", "C22"), ("C31", "C32")] for the first and second chain.
If this error occur to you, please provide connection chains when defining PackingDefects. For instance:
membrane = PackingDefects(universe = universe, connection_chains = {"CHL1" : [("O3", "C3")], "DOPC" : [("C21", "C22"), ("C31", "C32")]})
"""
self.build_polarity_dict()
self.radii_dict = {"H": 1.1,
"N": 1.55,
"C": 1.7,
"P": 1.8,
"O": 1.52,
"S" : 1.8,
"K" : 1.2,
"CL" : 1.81,
"CS" : 1.7
}
self.add_radii(self.radii_dict)
[docs]
def packing_defects(self,
layer = 'top',
nbins = None,
height = False,
periodic = False,
edges = None,
area = True,
count = True,
):
"""Compute packing defects based on packmem: https://doi.org/10.1016/j.bpj.2018.06.025
Parameters
----------
layer : str, optional
working layer (top/bot). Defaults to 'top', by default 'top'
nbins : int, optional
Number of bins of the xy grid, by default None
height : bool, optional
Store height matrix (To study deepness of th packing defects), by default False
periodic : bool, optional
Defines if using periodicity or not. When True, takes into acount periodicity and
returns a 2D grid with of the size of the periodic box, by default False
edges : list(float)
List with edges in the shape [xmin,xmax,ymin,ymax], defaults to None
area : bool
If True computes tha area of the packing defects for return
count : bool
If True computes the number of packing defects in the region
Returns
-------
ndarray
If height == Flase: matrix with packing defects
ndarray, ndarray
If height === True: matrix with packing defects, matrix with height information
"""
if nbins is None:
nbins = self.nbins
if edges is not None:
xmin = edges[0]
xmax = edges[1]
ymin = edges[2]
ymax = edges[3]
dx = xmax-xmin
dy = ymax-ymin
if int(dx) != int(dy):
print(dx,dy)
print("Distances in x and y must be equal")
return
else:
xmin = self.v_min
ymin = self.v_min
xmax = self.v_max
ymax = self.v_max
grid_size = abs(xmin - xmax)/nbins
n_aug = int(4/grid_size)
# Extend the grid 5 Amstrong to make sure all the packing defects are correctly taken
xmin_ex = xmin - n_aug * grid_size
xmax_ex = xmax + n_aug * grid_size
ymin_ex = ymin - n_aug * grid_size
ymax_ex = ymax + n_aug * grid_size
nbins_aug = nbins + 2 * n_aug
edges = [xmin,xmax,ymin,ymax]
if layer == "top":
sign = " > "
elif layer == "bot":
sign = " < "
##### Select all the P atoms to find the middle of the membrane
all_p = self.all_head
#### Define radious to be used for the different atoms
mat_radii_dict = {}
for atom in self.radii_dict.keys():
mat_radii_dict[atom] = self.create_circle_array(grid_size, self.radii_dict[atom])
# Create matrix to be filled
matrix = np.zeros((nbins_aug+2, nbins_aug+2))
if height:
matrix_height = np.zeros((nbins_aug, nbins_aug))
positions = all_p.positions[:,2]
mean_z = positions.mean()
dims = self.u.trajectory.ts.dimensions[:2]
for lipid in self.lipid_list:
selection_string = f"byres ((resname {lipid} and name {self.working_lip[lipid]['head']}) and prop z {sign} {mean_z})"
#print(selection_string)
layer_at = self.memb.select_atoms(selection_string)
pos_ats = layer_at.positions
elements = layer_at.elements
names = layer_at.names
if periodic:
#temp = pos_ats.copy()
pos_ats, others = self.extend_data(pos_ats, dims, self.periodicity, [elements, names])
elements = others[0]
names = others[1]
if not height:
indexes = self.get_indexes(pos_ats[:,:2], nbins_aug, edges=[xmin_ex,xmax_ex,ymin_ex,ymax_ex])
else:
indexes, matrix_temp = self.get_indexes(pos_ats,
nbins_aug,
edges=[xmin_ex,xmax_ex,ymin_ex,ymax_ex],
matrix_height = True)
matrix_height = np.maximum(matrix_height.copy(), matrix_temp.copy())
matrix = self.add_defects(matrix, indexes,elements, names, lipid, mat_radii_dict)
#print(f"Shapeeeeeee::::: {matrix.shape}, {matrix_height.shape}, dims {dims}")
core1 = 2*(slice(n_aug+1,-(n_aug+1)),)
core = 2*(slice(n_aug,-n_aug),)
matrix = matrix[core1]
#print(f"Shapeeeeeee::::: {matrix.shape}, {matrix_height.shape}, dims {dims}")
if periodic:
n = round((dims[0]/grid_size))
#print(n, dims[0]/grid_size, grid_size)
xmax = xmin + n*grid_size
ymax = ymin + n*grid_size
matrix = matrix[:n, :n]
if height:
matrix_height = matrix_height[:n, :n]
edges = [xmin,xmax,ymin,ymax]
#print(f"Shapeeeeeee::::: {matrix.shape}, {matrix_height.shape}, dims {dims}")
defects = matrix
defects = np.where(matrix < 1, matrix, np.nan)
#defects = np.where(defects > 0, defects, np.nan)
defects = np.rot90(defects)
return_dict = {
"edges" : edges,
}
if area:
non_nan = ~np.isnan(defects)
count = np.sum(non_nan)
area_v = count*grid_size*grid_size
area_tot = (defects.shape[0])**2 * grid_size * grid_size
return_dict["area"] = {"defects" : area_v,
"total": area_tot}
if count:
binary_matrix = ~np.isnan(defects)
structure = np.array([[1,1,1], [1,1,1], [1,1,1]])
labeled_array, num_features = label(binary_matrix, structure = structure)
cluster_sizes = np.bincount(labeled_array.ravel())[1:]
return_dict["count"] = {"number":num_features, "sizes":cluster_sizes}
return_dict["grid_size"] = grid_size
#plt.hist(cluster_sizes, bins=40)
#plt.show()
#print(return_dict)
if height:
matrix_height = matrix_height[core]
matrix_height[matrix_height == 0 ] = np.nan
return defects, matrix_height, return_dict
return defects, return_dict
[docs]
@staticmethod
def create_circle_array(grid_size, radius_A, center=None):
"""Create a small matrix with a inner circle of the size of radius_A
Parameters
----------
grid_size : float
define the grid size to create the optimun grid (Amstrongs)
radius_A : float
define the radius for the matrix (amstrongs)
center : bool, optional
Bool to set the center of the circle, by default None
Returns
-------
ndarray
array with a circle of ones
"""
n = int(2*radius_A /grid_size) + 1
if n % 2 == 0:
n += 1
# Create an n x n array initialized to 1
array = np.zeros((n, n))
# Default the center to the middle of the array if not provided
if center is None:
center = (n // 2, n // 2)
# Generate a grid of indices
y, x = np.ogrid[:n, :n]
# Calculate the distance from each grid point to the center
distance_from_center = (x - center[1])**2 + (y - center[0])**2
# Set values to 2 within the circle of the given radius
array[distance_from_center <= (radius_A/grid_size)**2] = 1
return array
[docs]
@staticmethod
def add_small_matrix(big_matrix, small_matrix, center_i, center_j):
"""Add smmall matrix to a big matrix
Parameters
----------
big_matrix : ndarray(n,n)
big matrix where a small matrix will be added
small_matrix : ndarray(m,m)
small matrix to be added
center_i : int
i coordinate
center_j : int
j coordinate
Returns
-------
ndarray(n,n)
big matrix modified
"""
# Calculate the top-left corner of the submatrix in big_matrix
start_i = center_i - small_matrix.shape[0] // 2
start_j = center_j - small_matrix.shape[1] // 2
end_i = start_i + small_matrix.shape[0]
end_j = start_j + small_matrix.shape[1]
# Handle boundaries to ensure indices stay within big_matrix
big_start_i = max(0, start_i)
big_start_j = max(0, start_j)
big_end_i = min(big_matrix.shape[0], end_i)
big_end_j = min(big_matrix.shape[1], end_j)
# Calculate the overlapping region for small_matrix
small_start_i = big_start_i - start_i
small_start_j = big_start_j - start_j
small_end_i = small_start_i + (big_end_i - big_start_i)
small_end_j = small_start_j + (big_end_j - big_start_j)
# Add the overlapping region of small_matrix to the big_matrix
big_matrix[big_start_i:big_end_i, big_start_j:big_end_j] += small_matrix[small_start_i:small_end_i, small_start_j:small_end_j]
return big_matrix
[docs]
def add_defects(self,
matrix,
indexes,
elements,
names,
lipid,
mat_radii_dict):
"""Code to easily add defects in the 2d matrix
Parameters
----------
matrix : ndarray(n,n)
Matrix where the defects are going to be added
indexes : ndarray(i,j)
List of indexes i,j in the matrix where the defects should be added
elements : list
type of element (needed to put the right radious)
names : list
names of the atoms (needed to map hydrophobic and not hydrophobic atoms)
lipid : str
lipid name
mat_radii_dict : dict
dictionary with the radii
Returns
-------
ndarray
matrix matrix filled with the defects
"""
for i in range(len(indexes[0])):
small_matrix = mat_radii_dict[elements[i]]
#if names[i] in self.non_polar_dict[lipid]:
# small_matrix = small_matrix * 0.0001
small_matrix = small_matrix * self.polarity_dict[lipid][names[i]]
self.add_small_matrix(matrix, small_matrix, indexes[0][i], indexes[1][i])
return matrix
[docs]
@staticmethod
def get_highest(data, min_lenght):
""" Code to get the highest value given two columns that are to be ordered in a 2D grid
Parameters
----------
data : (ndarray(:,2))
Array with two columns (column1: map to a 2D grid, column2: values)
min_lenght : (int)
Size of squares in the 2D grid
Returns
-------
ndarray(:,2)
With the maximun of each grid square
"""
columns = ["index", "weight"] # Data expected is an np array with columns ["index", "x", "y", "z"]
df = pd.DataFrame(data, columns = columns)
result = df.groupby('index', as_index=False)['weight'].max()
result_dict = dict(zip(result['index'], result['weight']))
hist = []
for i in range(min_lenght):
try:
hist.append(result_dict[i])
except:
hist.append(np.nan)
return np.array(hist)
# Computes and return the indexes of the data if where arranegd in a 2d histogram
[docs]
def get_indexes(self,
data,
bins = 10,
edges = None,
matrix_height = False):
"""given data in 2D, the code returns the indexes to locate the data in the 2D grid defined by edges and bins
Parameters
----------
data : ndarray(n,2 o 3)
Data in 2D fashion, (3 columns if height = True)
bins : int, optional
number of bins of the grid, by default 10
edges : list, optional
Edges in the following way [xmin,xmax,ymin,ymax], by default None
matrix_height : bool, optional
returns the height matrix (matrix with only the lipids closer to water), by default False
Returns
-------
tuple
tuple with the indexes
tuple, ndarray(bins,bins)
tuple with indexes and 2D data of the highest point
"""
edges = self.edges if edges is None else edges
xmin, xmax, ymin, ymax = edges
ran = [[xmin,xmax],[ymin,ymax]]
nbin = np.empty(2,np.intp)
edges = 2*[None]
for i in range(2):
edges[i] = np.linspace(ran[i][0], ran[i][1], bins +1)
nbin[i] = len(edges[i]) + 1
if not matrix_height:
indexes = (tuple(np.searchsorted(edges[i], data[:,i], side = "right") for i in range(2)))
else:
indexes = (tuple(np.searchsorted(edges[i], data[:,i], side = "right") for i in range(2)))
xy = np.ravel_multi_index(indexes, nbin)
xy_test = xy.reshape(-1,1)
#print("last shape", data[:,2].reshape(-1,1).shape, xy_test.shape)
xy_test = np.concatenate((xy_test, data[:,2].reshape(-1,1)), axis = 1)
hist = self.get_highest(xy_test, nbin.prod())
hist = hist.reshape(nbin)
hist = hist.astype(float, casting = "safe")
hist[np.isnan(hist)] = 0
core = 2*(slice(1,-1),)
hist = hist[core]
#print("here", hist[20,:])
return indexes, hist
#for i in range(2):
# on_edge = (data[:,i] == edges[i][-1])
# Ncount[i][on_edge] -= 1
#print(np.min(Ncount[0]), np.max(Ncount[0]), np.min(Ncount[1]), np.max(Ncount[1]))
#print(len(edges[0]), "edges len")
return indexes
[docs]
def packing_defects_stats(self,
nbins = 180,
layer = "top",
edges = None,
periodic = False,
start = 0,
final = -1,
step = 1,
area_size = True,
):
""" Run packing defects from `start` to `final` and stores data
about area of defects, total area, number of defects,
and size of defects
Parameters
----------
nbins : int, optional
number of bins to consider, by default 180
layer : str, optional
layer to consider, can be top, bot, by default "top"
edges : list(float), optional
Edges in the shape [xmin,xmax,ymin,ymax], by default None
periodic : bool, optional
Consider or not periodicity, by default False
start : int, optional
stat frame, by default 0
final : int, optional
final frame, by default -1
step : int, optional
frames to skip, by default 1
area_size : bool, optional
If true return the areas of the different defects, by default True
Returns
-------
pd.DataFrame, np.array
pandas dataframe with the area information of packing defects over time and informationabout the size of the packing defects
"""
results = []
sizes = []
for ts in self.u.trajectory[start:final:step]:
_, packing_dict = self.packing_defects(layer = layer,
nbins = nbins,
height = False,
periodic = periodic,
edges = edges,
area =True,
count = True,
)
if self.verbose:
print(f"Runing packing defects on frame {ts.frame}", end="\r")
results.append([packing_dict["area"]["defects"],
packing_dict["area"]["total"],
packing_dict["count"]["number"],
])
if area_size:
sizes.append(packing_dict["count"]["sizes"]*(packing_dict["grid_size"]*packing_dict["grid_size"]))
else:
sizes.append(packing_dict["count"]["sizes"])
results = pd.DataFrame(results, columns = ["defects_area", "total_area", "n_defects"])
sizes = np.concatenate(sizes, axis = 0)
return results, sizes
def build_polarity_dict(self):
polarity_dict = {}
for key in self.polar_dict.keys():
polarity_dict[key] = {atom : 1 for atom in self.polar_dict[key]} | {atom : 0.001 for atom in self.non_polar_dict[key]}
self.polarity_dict = polarity_dict