1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39
|
#!/usr/bin/env python
# A demonstration of using khmer for exact k-mer counting. The memory required
# is 4^k, which limits this to small values of k.
from __future__ import print_function
import khmer
# Note:
# - The forward and reverse complements will be collapsed since in this case
# k is even.
# - There are 4^k possible sequences of length k.
# - If the table size provided to the countgraph is not a prime number, it
# will select the next lowest prime number. So here we are requesting a
# table size of *slightly more* than 4^k rather than *slightly less* so we
# can avoid any false positives.
ksize = 6
nkmers = 4**ksize
tablesize = nkmers + 10
# Initialize countgraph
cg = khmer.Countgraph(ksize, tablesize, 1)
print('Created a countgraph with', cg.hashsizes(), 'buckets')
# Increment the count of some k-mers
cg.count('ATGGCA')
cg.count('ATGGCA')
cg.count('ACATGG')
cg.count('AAAAAA')
cg.count('TTTTTT') # this will be counted towards AAAAAA
# Show all >0 k-mer abundances from the table
for i in range(nkmers):
if cg.get(i):
print(cg.reverse_hash(i), cg.get(i))
# Note: The reverse_hash function is only available for Countgraph and
# Nodegraph, not Counttable and Nodetable.
|