File: water.R

package info (click to toggle)
isospec 2.3.2-1
  • links: PTS, VCS
  • area: main
  • in suites: forky, sid
  • size: 12,476 kB
  • sloc: cpp: 9,530; python: 2,095; makefile: 180; ansic: 100; sh: 88
file content (72 lines) | stat: -rw-r--r-- 3,251 bytes parent folder | download | duplicates (4)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
library(IsoSpecR)

water = c(H=2,O=1) # A water molecule:
p = .99 # joint probability of the output

# calculate one of the smallest sets of isotopologues with
# probability at least equal to 99%
WATER = IsoSpecify(molecule = water, stopCondition = p)
print(WATER)

# the outcome looks small, but water can only have 3*3 = 9 isotopologues.
# (for a general formula, see the formula in the supplement to our paper)
# you can see all of them by setting the probability to 1: this will include all isotopologues.
WATER = IsoSpecify(molecule = water, stopCondition = 1)
print(WATER)

# The default output consists of a matrix with two columns: mass and probability of the 
# isotopologues
print(WATER[,"mass"])
print(WATER[,"prob"])

# to get the probabilities, simply run
WATER = IsoSpecify(molecule = water, stopCondition = 1, showCounts=T)
print(WATER)

# Let's get a bigger example: bovine insulin
bovine_insulin = c(C=254, H=377, N=65, O=75, S=6)
BOVINE_INSULIN = IsoSpecify(bovine_insulin, .999)
# as you can see, there are 1301 isotopologs such that their probability summed together
# exceed .99. This is one of the smallest such sets.
print(nrow(BOVINE_INSULIN))

total_probability = sum(BOVINE_INSULIN[,'prob'])
print(total_probability)

# there are other ways to get this outcome, but this one is the fastest.
# if you want to get the isotopologues sorted by probability, you can run the
# priority-queue version of the algorithm.
BOVINE_INSULIN = IsoSpecify(bovine_insulin, .999, algo=1)
is.unsorted(BOVINE_INSULIN[,'prob'][nrow(BOVINE_INSULIN):1])
# is.unsorted == FALSE is the same as sorted == TRUE. It's elementary :)
# using the priority queue has poorer time complexity, which is O(n*log(n)).
T0 = Sys.time()
BOVINE_INSULIN = IsoSpecify(bovine_insulin, .999, algo=1)
T1 = Sys.time()
BOVINE_INSULIN = IsoSpecify(bovine_insulin, .999, algo=0)
T2 = Sys.time()

print(T1 - T0)
print(T2 - T1)
# If, for some reason, you just want to generate peaks above certain probability threshold,
# you could use the third algorithm
BOVINE_INSULIN_peaks_more_probable_than_10_percent_each = IsoSpecify(bovine_insulin, .1, algo=3)
print(BOVINE_INSULIN_peaks_more_probable_than_10_percent_each[,'prob'])

# ATTENTION: it is not a good idea to do it.
# Remember, that we supply you with infinitely resolved peaks.
# your experimental peaks are most likely generated by an mass spctrometer,
# that does not share this quality. Hence, the smallest observed peak might 
# be actually a sum of some smaller, unresolved peaks, giving you false impression of security.

# Anyway, if you still want to, you can also precise the limiting height of the peak w.r.t.
# the height of the heighest peak.
# You can achieve this with algorithm 3:
BOVINE_INSULIN_peaks_higher_than_the_one_ten_thousandth_of_the_height_of_the_heighest_peak = IsoSpecify(bovine_insulin, 1e-04, algo=3)
LP = BOVINE_INSULIN_peaks_higher_than_the_one_ten_thousandth_of_the_height_of_the_heighest_peak[,'prob']
print(LP[length(LP)] - LP[1])
# so, as you can see, this is a peak very close to 1/10,000, but still above the threshold.
# The next most probable that was not yet calculated, would have had that ratio below this number.
# You are invited to test it yourself :)

# Matteo and MichaƂ.