File: algorithms.py

package info (click to toggle)
python-pynlpl 1.2.9-2
  • links: PTS, VCS
  • area: main
  • in suites: forky, sid, trixie
  • size: 1,900 kB
  • sloc: python: 25,677; sh: 73; makefile: 3
file content (65 lines) | stat: -rw-r--r-- 2,177 bytes parent folder | download | duplicates (2)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65

###############################################################9
# PyNLPl - Algorithms
#   by Maarten van Gompel
#   Centre for Language Studies
#   Radboud University Nijmegen
#   http://www.github.com/proycon/pynlpl
#   proycon AT anaproy DOT nl
#
#       Licensed under GPLv3
#
###############################################################

from __future__ import print_function
from __future__ import unicode_literals
from __future__ import division
from __future__ import absolute_import

def sum_to_n(n, size, limit=None): #from http://stackoverflow.com/questions/2065553/python-get-all-numbers-that-add-up-to-a-number
    """Produce all lists of `size` positive integers in decreasing order
    that add up to `n`."""
    if size == 1:
        yield [n]
        return
    if limit is None:
        limit = n
    start = (n + size - 1) // size
    stop = min(limit, n - size + 1) + 1
    for i in range(start, stop):
        for tail in sum_to_n(n - i, size - 1, i):
            yield [i] + tail


def consecutivegaps(n, leftmargin = 0, rightmargin = 0):
    """Compute all possible single consecutive gaps in any sequence of the specified length. Returns
    (beginindex, length) tuples. Runs in  O(n(n+1) / 2) time. Argument is the length of the sequence rather than the sequence itself"""
    begin = leftmargin
    while begin < n:
        length = (n - rightmargin) - begin
        while length > 0:
            yield (begin, length)
            length -= 1
        begin += 1

def possiblesplits(n, minsplits=2, maxsplits=0):
    """Returns lists of (index,length) tuples, representing all possible splits of a sequence of length n."""
    if not maxsplits: maxsplits = n
    for nrsplits in range(minsplits,maxsplits + 1):
        for split in sum_to_n(n,nrsplits):
            split_with_indices = []
            begin = 0
            for length in split:
                split_with_indices.append( (begin, length) )
                begin += length
            yield split_with_indices

def bytesize(n):
    """Return the required size in bytes to encode the specified integer"""
    for i in range(1, 1000):
        if n < 2**(8*i):
            return i