1 #!/usr/bin/env python3
2
3 ## README #####################################################################
4 #
5 # This script is meant to measure pi-pi stacking between the solute and solvent
6 # atoms. It will evaluate only those solvent atoms within a certain cutoff,
7 # defined by the user (below).
8
9 cutoff = 10.0
10
11 # This cutoff distance is based upon the approximated center of the ring
12 # containing the pi system. The center is approximated by using two of the
13 # three coordinates given by the user. The two atoms which will approximate the
14 # center need to be the second and third atoms defined in the lists below. The
15 # atoms are defined by the atom labels as they appear in the pdb file. Please
16 # note, this script works with pdb files ONLY.
17
18 solute_atoms = ['C0C', 'C0E', 'C09']
19 solvent_atoms = ['C02', 'N04', 'C05']
20
21 # Please ensure that all three atoms are in the aromatic ring, as these three
22 # atoms will be used to form the plane in which the pi system lies.
23 #
24 # The script will find the angles between the planes involved in the (hopefully
25 # found) pi-pi stacking, as well as take an average and standard deviation.
26 # Output for every solvent molecule evaluated is printed in an output file
27 # named 'output_pi-pi.txt'.
28 #
29 # !!The above two variables should be all that is required for the user to
30 # change!!
31 #
32 ## INVOCATION #################################################################
33 #
34 # Ensure that the script is marked executable or explicitly invoke python
35 # (version 3.2 minimal) to run the script. Any pdb file which you'd like to
36 # analyze should be given as an argument. Shell expansion is handled
37 # appropriately.
38 #
39 # Example:
40 # ./pipi.py *pdb
41 # ./pipi.py d50plt5 d50plt10 d50plt15
42 #
43 # Note that the filenames do not need a pdb suffix, but the script relies on
44 # the pdb format.
45 ##############################################################################
46 #
47 # Author: Billy Wayne McCann
48 # email : thebillywayne@gmail.com
49 # NOTE: My code is purposefully verbose. don't hate.
50 ###############################################################################
51
52 from sys import argv, exit
53 from math import pow, degrees, sqrt, acos
54 from sysconfig import get_python_version
55
56 # make sure we're using at least version 3.2 of Python
57 if float(get_python_version()) < 3.2:
58 print("Requires Python 3.2 or higher.")
59 exit(0)
60
61 # make sure arguments have been given to the script
62 if len(argv[1:]) == 0:
63 print("Give pdb files as an argument to this script.")
64 exit(0)
65 else:
66 pdbs = argv[1:]
67
68 # species are separated in the pdb file by 'TER '
69 ter = 'TER '
70
71 # file in which to place output
72 output = open('output_pi-pi.txt', 'w')
73
74 ### FUNCTIONS ###############################################################
75
76 def find_TERs(content):
77 ''' Find where the TER's in the pdb file occur'''
78
79 terlines = []
80 terline = content.index(ter)
81 terlines.append(terline)
82 while True:
83 try:
84 terline = content.index(ter, terline+1)
85 terlines.append(terline)
86 except:
87 return terlines
88
89 def get_coordinates(species, atoms):
90 ''' Find coordinates of atoms of interest of the species of interest,
91 whether the solute or the solvent. The atoms should have been defined in
92 the solute_atoms and solvent_atoms variables'''
93
94 atom_arrays = []
95 coordinates = []
96
97 # scan through species for specific atom entry and store into atom_arrays
98 for entry in species:
99 for atom in atoms:
100 if atom in entry:
101 atom_arrays.append(entry)
102
103 # extract only the desired elements from the atom_arrays and store them in
104 # coordinates list.
105 for element in atom_arrays:
106 array = element.split()
107 coordinates.append(array[4])
108 coordinates.append(array[5:8])
109
110 # in binary solvents, the coordinates will sometimes not be found in a
111 # particular solvent molecule. return None and test for it in the main
112 # body.
113 if not coordinates:
114 return [None,None]
115
116 # the molecular id is the first entry in the coordinates list
117 mol_id = coordinates[0]
118
119 # the actual coordinates are the even entries in the coordinates list
120 coordinates = [coordinates[i] for i in range(len(coordinates)) if float(i)
121 % 2 != 0]
122 return mol_id, coordinates
123
124 def vectorize(coordinate1, coordinate2):
125 '''Take coordinates and return a vector'''
126
127 # extract x, y, z values from coordinates
128 # make them floats for the subtraction operations in the return statement
129 x1 = float(coordinate1[0])
130 x2 = float(coordinate2[0])
131 y1 = float(coordinate1[1])
132 y2 = float(coordinate2[1])
133 z1 = float(coordinate1[2])
134 z2 = float(coordinate2[2])
135 return [x2-x1, y2-y1, z2-z1]
136
137 def dotproduct(vector1, vector2):
138 '''Return the dot product between two vectors'''
139
140 return vector1[0]*vector2[0]+vector1[1]*vector2[1]+vector1[2]*vector2[2]
141
142 def crossproduct(vector1, vector2):
143 '''Find the cross product between two vectors'''
144
145 return [vector1[1]*vector2[2]-vector1[2]*vector2[1],
146 vector1[2]*vector2[0]-vector1[0]*vector2[2],
147 vector1[0]*vector2[1]-vector1[1]*vector2[0]]
148
149 def magnitude(vector):
150 '''Return the magnitude of a vector'''
151
152 return sqrt(vector[0]*vector[0]+vector[1]*vector[1]+
153 vector[2]*vector[2])
154
155 def unit(vector):
156 '''Return the unit vector of a vector'''
157
158 mag = magnitude(vector)
159 unit_vector = []
160 for scalar in vector:
161 unit_vector.append(scalar/mag)
162 return unit_vector
163
164 def center(coordinate1, coordinate2):
165 ''' Given two coordinates, find the midpoint between them.
166 This function is used to approximate the center of a species. '''
167 x1 = float(coordinate1[0])
168 x2 = float(coordinate2[0])
169 y1 = float(coordinate1[1])
170 y2 = float(coordinate2[1])
171 z1 = float(coordinate1[2])
172 z2 = float(coordinate2[2])
173
174 return [(x2+x1)/2, (y2+y1)/2, (z2+z1)/2]
175
176 def distance(coordinate1, coordinate2):
177 ''' Find the distance between two points'''
178 x1 = float(coordinate1[0])
179 x2 = float(coordinate2[0])
180 y1 = float(coordinate1[1])
181 y2 = float(coordinate2[1])
182 z1 = float(coordinate1[2])
183 z2 = float(coordinate2[2])
184
185 return sqrt((x2-x1)**2 + (y2-y1)**2 + (z2-z1)**2)
186
187 def normal(coordinates):
188 ''' Given three coordinates, find the normal to the plane created by the
189 three coordinates'''
190
191 vector1 = vectorize(coordinates[0], coordinates[1])
192 vector2 = vectorize(coordinates[1], coordinates[2])
193
194 return crossproduct(vector1, vector2)
195
196 def calculate_angle(normal1, normal2):
197 ''' Calculate the angle between two planes.'''
198
199 # make normals into unit vectors first
200 normal1 = unit(normal1)
201 normal2 = unit(normal2)
202
203 # the angle between the two planes of atomic coordinates
204 # is the arccosine of dot product of the two normals.
205 return degrees(acos(dotproduct(normal1, normal2)))
206
207 def stddev(all_values, average_value):
208 '''Find the standard deviation of the angles'''
209
210 # create a new list which is the difference between the average of the
211 # values and each individual value.
212 square_diff_from_average = [pow((i-average_value), 2) for i in all_values]
213
214 # the standard deviation is square root of the sum of the differences
215 # between each each value and the average squared divided by the number of
216 # values in the list ('population' standard deviation).
217 return sqrt(sum(square_diff_from_average)/len(square_diff_from_average))
218
219 ## MAIN ######################################################################
220
221 # initialize dictionary to store solvent data into
222 solvent_data = {}
223
224 print("Using a cutoff of {0} Angstroms.".format(cutoff))
225 output.write("Using a cutoff of {0} Angstroms.\n\n".format(cutoff))
226
227 for pdb in pdbs:
228 with open(pdb, 'r') as pdb_file:
229 contents = pdb_file.read().split('\n')
230 TERlines = find_TERs(contents)
231
232 # find solutes coordinates, center, and normal vector
233 solute = contents[0:TERlines[0]]
234 solute_coordinates = get_coordinates(solute, solute_atoms)[1]
235 solute_center = center(solute_coordinates[1], solute_coordinates[2])
236 solute_normal = normal(solute_coordinates)
237
238 solvents = []
239 for i in range(len(TERlines)-1):
240 solvent = contents[TERlines[i]:TERlines[i+1]]
241 solvents.append(solvent)
242
243 # keep track of how many solvents are within cutoff in each pdb
244 j = 0
245 for solvent in solvents:
246 solvent_name, solvent_coordinates = get_coordinates(solvent, solvent_atoms)
247
248 if solvent_coordinates is None:
249 continue
250
251 solvent_name = int(solvent_name)
252 solvent_center = center(solvent_coordinates[1],
253 solvent_coordinates[2])
254 radius = distance(solute_center, solvent_center)
255
256 if radius < cutoff:
257 solvent_normal = normal(solvent_coordinates)
258 angle = calculate_angle(solute_normal, solvent_normal)
259 if solvent_name not in solvent_data:
260 solvent_data[solvent_name] = {'pdb': [],
261 'distance': [], 'angle': []}
262 solvent_data[solvent_name]['pdb'].append(pdb)
263 solvent_data[solvent_name]['distance'].append(radius)
264 solvent_data[solvent_name]['angle'].append(angle)
265 j += 1
266
267 else:
268 continue
269
270 print("PDB {0} contains {1} solvent(s) within the cutoff.".format(pdb, j))
271 output.write("PDB {0} contains {1} solvent(s) within the cutoff.\n".format(pdb, j))
272
273 # end of for pdb loop
274
275 output.write("\n")
276
277 # analyze the accepted solvent data.
278 for accepted in sorted(solvent_data):
279 output.write('Solvent ID: {0}\n'.format(accepted))
280
281 for index in range(len(solvent_data[accepted]['pdb'])):
282 output.write('+ PDB name: {0}\t\t'.format(solvent_data[accepted]['pdb'][index]))
283 output.write('Distance: {0:.2f}\t'.format(solvent_data[accepted]['distance'][index]))
284 output.write('Angle: {0:.2f}\t\n'.format(solvent_data[accepted]['angle'][index]))
285
286 output.write('== AVERAGES ==\n')
287 average_distance = sum(solvent_data[accepted]['distance'])/len(solvent_data[accepted]['distance'])
288 distance_stddev = stddev(solvent_data[accepted]['distance'],
289 average_distance)
290 average_angle = sum(solvent_data[accepted]['angle'])/len(solvent_data[accepted]['angle'])
291 angle_stddev = stddev(solvent_data[accepted]['angle'], average_angle)
292 output.write("Distance:\t{0:.2f} +- ".format(average_distance))
293 output.write("{0:.2f}.\n".format(distance_stddev))
294 output.write("Angle: \t{0:.2f} +- ".format(average_angle))
295 output.write("{0:.2f}.\n\n".format(angle_stddev))
296
297 output.close()
298 exit(0)