1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264
|
#include <Python.h>
#include "2bit.h"
#define pyTwoBitVersion "0.3.3"
typedef struct {
PyObject_HEAD
TwoBit *tb;
int storeMasked; //Whether storeMasked was set. 0 = False, 1 = True
} pyTwoBit_t;
static PyObject* py2bitOpen(PyObject *self, PyObject *args, PyObject *kwds);
static PyObject *py2bitEnter(pyTwoBit_t *pybw, PyObject *args);
static PyObject *py2bitInfo(pyTwoBit_t *pybw, PyObject *args);
static PyObject* py2bitClose(pyTwoBit_t *pybw, PyObject *args);
static PyObject* py2bitChroms(pyTwoBit_t *pybw, PyObject *args);
static PyObject *py2bitSequence(pyTwoBit_t *pybw, PyObject *args, PyObject *kwds);
static PyObject *py2bitBases(pyTwoBit_t *pybw, PyObject *args, PyObject *kwds);
static PyObject *py2bitHardMaskedBlocks(pyTwoBit_t *pybw, PyObject *args, PyObject *kwds);
static PyObject *py2bitSoftMaskedBlocks(pyTwoBit_t *pybw, PyObject *args, PyObject *kwds);
static void py2bitDealloc(pyTwoBit_t *pybw);
static PyMethodDef tbMethods[] = {
{"open", (PyCFunction)py2bitOpen, METH_VARARGS|METH_KEYWORDS,
"Open a 2bit file.\n\
\n\
Returns:\n\
A TwoBit object on success, otherwise None.\n\
\n\
Arguments:\n\
file: The name of a 2bit file.\n\
\n\
Optional arguments:\n\
storeMasked: Whether to store information about soft-masking (default False).\n\
\n\
Note that storing soft-masking information can be memory intensive and doing so\n\
will result in soft-masked bases being lower case if the sequence is fetched\n\
(see the sequence() function)\n\
\n\
>>> import py2bit\n\
>>> tb = py2bit.open(\"some_file.2bit\")\n\
\n\
To store soft-masking information:\n\
>>> tb = py2bit.open(\"some_file.2bit\", True)"},
{NULL, NULL, 0, NULL}
};
static PyMethodDef tbObjMethods[] = {
{"info", (PyCFunction)py2bitInfo, METH_VARARGS,
"Returns a dictionary containing the following key:value pairs: \n\
\n\
* The file size, in bytes ('file size').\n\
* The number of chromosomes/contigs ('nChroms').\n\
* The total sequence length ('sequence length').\n\
* The total hard-masked length ('hard-masked length').\n\
* The total soft-masked length, if available ('soft-masked length').\n\
\n\
A base is hard-masked if it is an N and soft-masked if it's lower case. Note that soft-masking is ignored by default (you must specify 'storeMasked=True' when you open the file.\n\
\n\
>>> import py2bit\n\
>>> tb = py2bit.open(\"some_file.2bit\")\n\
>>> tb.info()\n\
{'file size': 160L, 'nChroms': 2L, 'sequence length': 250L, 'hard-masked length': 150L, 'soft-masked length': 8L}\n\
>>> tb.close()\n"},
{"close", (PyCFunction)py2bitClose, METH_VARARGS,
"Close a 2bit file.\n\
\n\
>>> import py2bit\n\
>>> tb = py2bit.open(\"some_file.2bit\")\n\
>>> tb.close()\n"},
{"chroms", (PyCFunction)py2bitChroms, METH_VARARGS,
"Return a chromosome: length dictionary. The order is typically not\n\
alphabetical and the lengths are long (thus the 'L' suffix).\n\
\n\
Optional arguments:\n\
chrom: An optional chromosome name\n\
\n\
Returns:\n\
A list of chromosome lengths or a dictionary of them.\n\
\n\
>>> import py2bit\n\
>>> tb = py2bit.open(\"test/test.2bit\")\n\
>>> tb.chroms()\n\
{'chr1': 150L, 'chr2': 100L}\n\
\n\
Note that you may optionally supply a specific chromosome:\n\
\n\
>>> tb.chroms(\"chr1\")\n\
150L\n\
\n\
If you specify a non-existant chromosome then no output is produced:\n\
\n\
>>> tb.chroms(\"foo\")\n\
>>>\n"},
{"sequence", (PyCFunction)py2bitSequence, METH_VARARGS|METH_KEYWORDS,
"Retrieve the sequence of a chromosome, or subset of it. On error, a runtime\n\
exception is thrown.\n\
\n\
Positional arguments:\n\
chr: Chromosome name\n\
\n\
Keyword arguments:\n\
start: Starting position (0-based)\n\
end: Ending position (1-based)\n\
\n\
Returns:\n\
A string containing the sequence.\n\
\n\
If start and end aren't specified, the entire chromosome is returned. If the\n\
end value is beyond the end of the chromosome then it is adjusted accordingly.\n\
\n\
>>> import py2bit\n\
>>> tb = py2bit.open(\"test/test.2bit\")\n\
>>> tb.sequence(\"chr1\")\n\
NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNACGTACGTACGTagctagctGATCGATCGTAGCTAGCTAGCTAGCTGATCNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN\n\
>>> tb.sequence(\"chr1\", 24, 74)\n\
NNNNNNNNNNNNNNNNNNNNNNNNNNACGTACGTACGTagctagctGATC\n\
>>> tb.close()"},
{"bases", (PyCFunction)py2bitBases, METH_VARARGS|METH_KEYWORDS,
"Retrieve the percentage or number of A, C, T, and Gs in a chromosome or subset\n\
thereof. On error, a runtime exception is thrown.\n\
\n\
Positional arguments:\n\
chr: Chromosome name\n\
\n\
Optional keyword arguments:\n\
start: Starting position (0-based)\n\
end: Ending position (1-based)\n\
fraction: Whether to return fractional or integer values (default 'True',\n\
so fractional values are returned)\n\
\n\
Returns:\n\
A dictionary with nucleotide as the key and fraction (or count) as the\n\
value.\n\
\n\
If start and end aren't specified, the entire chromosome is returned. If the\n\
end value is beyond the end of the chromosome then it is adjusted accordingly.\n\
\n\
Note that the fractions will sum to much less than 1 if there are hard-masked\n\
bases. Counts may sum to less than the length of the region for the same reason.\n\
\n\
>>> import py2bit\n\
>>> tb = py2bit.open(\"test/test.2bit\")\n\
>>> tb.bases(tb, \"chr1\")\n\
{'A': 0.08, 'C': 0.08, 'T': 0.08666666666666667, 'G': 0.08666666666666667}\n\
>>> tb.bases(tb, \"chr1\", 24, 74)\n\
{'A': 0.12, 'C': 0.12, 'T': 0.12, 'G': 0.12}\n\
>>> tb.bases(tb, \"chr1\", 24, 74, True)\n\
{'A': 6, 'C': 6, 'T': 6, 'G': 6}\n\
>>> tb.close()"},
{"hardMaskedBlocks", (PyCFunction)py2bitHardMaskedBlocks, METH_VARARGS|METH_KEYWORDS,
"Retrieve a list of hard-masked blocks on a single-chromosome (or range on it).\n\
\n\
Positional arguments:\n\
chr: Chromosome name\n\
\n\
Optional keyword arguments:\n\
start: Starting position (0-based)\n\
end: Ending position (1-based)\n\
\n\
Returns:\n\
A list of tuples, with items start and end.\n\
\n\
>>> import py2bit\n\
>>> tb = py2bit.open(\"test/test.2bit\")\n\
>>> print(tb.hardMaskedBlocks(\"chr1\")\n\
[(0, 50), (100, 150)]\n\
>>> print(tb.hardMaskedBlocks(\"chr1\", 75, 100)\n\
[]\n\
>>> print(tb.hardMaskedBlocks(\"chr1\", 75, 101)\n\
[(100, 150)]\n\
>>> tb.close()"},
{"softMaskedBlocks", (PyCFunction)py2bitSoftMaskedBlocks, METH_VARARGS|METH_KEYWORDS,
"Retrieve a list of soft-masked blocks on a single-chromosome (or range on it).\n\
\n\
Positional arguments:\n\
chr: Chromosome name\n\
\n\
Optional keyword arguments:\n\
start: Starting position (0-based)\n\
end: Ending position (1-based)\n\
\n\
Returns:\n\
A list of tuples, with items start and end.\n\
\n\
>>> import py2bit\n\
>>> tb = py2bit.open(\"test/test.2bit\", storeMasked=True)\n\
>>> print(tb.softMaskedBlocks(\"chr1\")\n\
[(62, 70)]\n\
>>> print(tb.softMaskedBlocks(\"chr1\", 0, 50)\n\
[]\n\
>>> tb.close()"},
{"__enter__", (PyCFunction) py2bitEnter, METH_NOARGS, NULL},
{"__exit__", (PyCFunction) py2bitClose, METH_VARARGS, NULL},
{NULL, NULL, 0, NULL}
};
#if PY_MAJOR_VERSION >= 3
struct py2bitmodule_state {
PyObject *error;
};
#define GETSTATE(m) ((struct py2bitmodule_state*)PyModule_GetState(m))
static PyModuleDef py2bitmodule = {
PyModuleDef_HEAD_INIT,
"py2bit",
"A python module for accessing 2bit files",
-1,
tbMethods,
NULL, NULL, NULL, NULL
};
#endif
static PyTypeObject pyTwoBit = {
#if PY_MAJOR_VERSION >= 3
PyVarObject_HEAD_INIT(NULL, 0)
#else
PyObject_HEAD_INIT(NULL)
0, /*ob_size*/
#endif
"py2bit.pyTwoBit", /*tp_name*/
sizeof(pyTwoBit), /*tp_basicsize*/
0, /*tp_itemsize*/
(destructor)py2bitDealloc, /*tp_dealloc*/
0, /*tp_print*/
0, /*tp_getattr*/
0, /*tp_setattr*/
0, /*tp_compare*/
0, /*tp_repr*/
0, /*tp_as_number*/
0, /*tp_as_sequence*/
0, /*tp_as_mapping*/
0, /*tp_hash*/
0, /*tp_call*/
0, /*tp_str*/
PyObject_GenericGetAttr, /*tp_getattro*/
PyObject_GenericSetAttr, /*tp_setattro*/
0, /*tp_as_buffer*/
#if PY_MAJOR_VERSION >= 3
Py_TPFLAGS_DEFAULT, /*tp_flags*/
#else
Py_TPFLAGS_HAVE_CLASS, /*tp_flags*/
#endif
"bigWig File", /*tp_doc*/
0, /*tp_traverse*/
0, /*tp_clear*/
0, /*tp_richcompare*/
0, /*tp_weaklistoffset*/
0, /*tp_iter*/
0, /*tp_iternext*/
tbObjMethods, /*tp_methods*/
0, /*tp_members*/
0, /*tp_getset*/
0, /*tp_base*/
0, /*tp_dict*/
0, /*tp_descr_get*/
0, /*tp_descr_set*/
0, /*tp_dictoffset*/
0, /*tp_init*/
0, /*tp_alloc*/
0, /*tp_new*/
0,0,0,0,0,0
};
|