1 '''
2 Created 2012
3 @author: GieseS
4
5
6 Little plotting script which is called in the analysis of different mappings to an artificial reference genome.
7 It produces the following plots:
8
9
10
11
12 1) ROC Curve
13 2) Overview histograms for FP / TP.
14
15
16 '''
17
18 import numpy as np
19 import matplotlib.pyplot as plt
20 from mpl_toolkits.mplot3d import Axes3D
21 import random
22 import time
23 import pylab as p
24
25
26
28 """Approximates the integral through the points a,b"""
29 index = [i+1 for i in xrange(len(x)-1)]
30 xdiff = np.array([x[i]-x[i-1] for i in index])
31 ysum = np.array([y[i]+y[i-1] for i in index])
32 return(np.dot(xdiff,ysum)/2)
33
34
35
36
37
38 """
39 abbreviations:
40 tp = True positives
41 fp = false positives
42 NM = number of mismtaches
43 mq = mapping quality
44 rq = readquality
45 subs = substitutions in artifical reference genome (ARG)
46 """
47
48
49
50
51
52 -def CalculateRoc2(dataArray,prefix,readsize,uniquehits,mappedreads,filename):
53 """
54 Calculates the adjusted ROC curve as well as the AUC value derived from the adjusted points
55 and writes the ROC tables to .txt files.
56 """
57 starttime= time.time()
58 uniquehits = float(uniquehits)
59 readsize = float(readsize)
60
61
62 entries = len(dataArray)
63
64
65 resultmatrix = np.arange(entries*2)
66 resultmatrix = resultmatrix.reshape(2,entries)
67
68 maxrq = max(x.rq for x in dataArray)
69 maxnm = max(x.nm[0] for x in dataArray)
70 maxGaps= max(x.gaps[0] for x in dataArray)
71 maxMism= max(x.mism[0] for x in dataArray)
72
73
74 minrq = min(x.rq for x in dataArray)
75 minnm = min(x.nm[0] for x in dataArray)
76 minmq= min(x.mq[0] for x in dataArray)
77 minGaps= min(x.gaps[0] for x in dataArray)
78 minMism= min(x.mism[0] for x in dataArray)
79
80
81
82 quants = [1,2,3,4,5]
83 tempa = maxrq-minrq
84 stepsize = tempa/5
85
86 rqQuants = [round(minrq+(i-1)*stepsize,3) for i in quants]
87 rqQuants.reverse()
88 rqQuants[-1] =0
89
90 nmQuants = [i*maxnm/5 for i in quants]
91 GapsQuants = [i*maxGaps/5 for i in quants]
92 MismQuants = [i*maxMism/5 for i in quants]
93
94 rocvector = []
95
96
97 for l in quants:
98 for k in quants:
99 for j in quants:
100 temparray = [m for m in dataArray if m.gaps[0] <= GapsQuants[k-1] and m.mism[0] <= MismQuants[j-1] and m.rq >=rqQuants[l-1]]
101
102
103 tempids = [m.id for m in temparray]
104 uniquereads = {}
105 for i in xrange(0,len(tempids)):
106 uniquereads[tempids[i]] = ""
107
108 mappedreads = len(uniquereads)
109
110
111
112 templength = len(temparray)
113
114 if templength == 0:
115 continue
116 else:
117 tempTP = sum(x.mr[0] for x in temparray)
118 tempFP =templength-tempTP
119 F = round((float(mappedreads)/ readsize) ,3)
120 sens = round((tempTP/ uniquehits) * F,3)
121 if tempFP == 0:
122 spec = 0
123 else:
124 spec = round((tempFP / uniquehits) * F,3)
125
126 rocvector.append([rqQuants[l-1],GapsQuants[k-1],MismQuants[j-1],tempTP,tempFP,templength,sens,spec,F])
127
128
129
130
131
132
133
134 rocvector.append([0,0,0,0,0,0,0,0,0])
135 nproc = np.array(rocvector)
136
137
138
139
140 sens = [i[-3] for i in nproc]
141 spez = [i[-2] for i in nproc]
142
143
144
145
146 spez[-1] = 1
147 sens[-1] = sens[-2]
148
149
150 rocarray1 = np.array([sens,spez])
151 rocarray1 = rocarray1.flatten('F')
152 rocarray1= rocarray1.reshape((len(spez),2))
153
154 rocarray = np.array([sens,spez])
155 rocarray = rocarray.flatten('F')
156 rocarray = rocarray.reshape((len(spez),2))
157 rocarray = np.sort(rocarray.view('float,float'), order=['f0','f1'], axis=0).view(np.float)
158
159 rocarrayCorrected = rocarray
160
161
162
163 for m in range(len(rocarrayCorrected)-2,-1,-1):
164 if (rocarrayCorrected[m,1] >= rocarrayCorrected[m+1,1]):
165 rocarrayCorrected[m,1] = rocarrayCorrected[m+1,1]
166
167
168
169 plt.hold(True)
170 plt.figure()
171 plt.subplot(111)
172
173
174 plt.plot(rocarrayCorrected[:,1],rocarrayCorrected[:,0], marker='o', markersize=7,linestyle='--', color='r', label='projected')
175 plt.plot(rocarray1[:,1], rocarray1[:,0], linestyle="None",label='real',marker='.',color='g')
176 plt.xlabel('1-specificity')
177 plt.ylabel('sensitivity')
178 plt.title(r'ROC:'+filename)
179 plt.axis([-0.1,1.1,-0.1,1.1])
180 plt.grid(True)
181 plt.legend(loc='lower right')
182 plt.tight_layout()
183 plt.savefig(prefix + "_ROC.pdf",format='pdf')
184 plt.clf
185
186
187 AUC = trapezoidal_rule(rocarrayCorrected[:,1], rocarrayCorrected[:,0])
188
189 fobj = open(prefix+"_roctable.txt","w")
190 fobj.write("RQ\tGAPS\tMM\tPTP\tFP\tP\tSn\t1-Sp\tF\r\n")
191 for i in xrange(0,len(rocvector),1):
192 temp = [str(k) for k in rocvector[i]]
193 tempstr = "\t".join(temp)
194 fobj.write(tempstr+"\r\n")
195
196 endtime= time.time()
197 print ("X"),
198 return(round(AUC,3))
199
201 """Plots true positives and false positives into 2 different histogram subplots. """
202 prefix2 = "/".join(prefix.split("/")[0:-1])+"/"
203 fobj = open(prefix2+"indexMappingTools.txt","w")
204 for i in range(0,len(label)):
205 fobj.write("%s - %s\r\n" %(i+1,mappernames[i]))
206 fobj.close()
207
208 x = [i for i in range(1,len(fp)*3,3)]
209 xmax = max(x)+1
210 ymaxTP = max(tp)+0.1
211 ymaxFP = max(fp)+0.1
212
213
214 y = tp
215 x =x
216 z = fp
217
218
219 fig = p.figure()
220
221 if len(label) <= 7:
222 widthp = 0.7
223 ticks = label
224 else:
225 widthp = 0.3
226 ticks = [i if i%2 == 0 else "" for i in label]
227
228
229 ax1 = fig.add_subplot(1,2,1)
230 ax1.bar(x,y,width=widthp, facecolor='darkgreen')
231 ax1.set_ylabel('#TP hits')
232 ax1.set_xlabel('index mapping tool')
233 ax1.set_title("Global comparison #TP hits")
234 p.xticks(x,ticks)
235 p.grid(True)
236 p.axis([0,xmax,0,ymaxTP+ymaxTP*10/100])
237
238
239 ax2 = fig.add_subplot(1,2,2)
240 ax2.bar(x,z,width=widthp, facecolor='darkred')
241 ax2.set_ylabel('#FP hits')
242 ax2.set_xlabel('index mapping tool')
243 ax2.set_title("Global comparison #FP hits")
244 p.axis([0,xmax,0,ymaxFP+ymaxFP*10/100])
245 p.xticks(x,ticks)
246 p.grid(True)
247 plt.tight_layout()
248 p.savefig(prefix2 + "Overall_histabs.pdf",format='pdf')
249 p.clf()
250
251
252 tpsum =sum(tp)
253 fpsum =sum(fp)
254 y = [i/float(tpsum) for i in tp]
255 x =x
256 z = [i/float(fpsum) for i in fp]
257 ymax = max(max(z),max(y))+0.2
258
259 fig = p.figure()
260
261 if len(label) <= 7:
262 ticks = label
263 else:
264 ticks = [i if i%2 == 0 else "" for i in label]
265
266
267 ax1 = fig.add_subplot(1,2,1)
268
269
270 ax1.bar(x,y,width=widthp, facecolor='darkgreen')
271 ax1.set_ylabel('%TP hits')
272 ax1.set_xlabel('index mapping tool')
273 ax1.set_title("Global comparison %TP hits")
274 p.xticks(x,ticks)
275 p.grid(True)
276 p.axis([0,xmax,0,1.1])
277
278
279 ax2 = fig.add_subplot(1,2,2)
280 ax2.bar(x,z,width=widthp, facecolor='darkred')
281 ax2.set_ylabel('%FP hits')
282 ax2.set_xlabel('index mapping tool')
283 ax2.set_title("Global comparison %FP hits")
284 p.axis([0,xmax,0,1.1])
285 p.xticks(x,ticks)
286 p.grid(True)
287 plt.tight_layout()
288 p.savefig(prefix2 + "Overall_histper.pdf",format='pdf')
289 p.clf()
290
291 print ("X"),
292