File: gtf_io.py

package info (click to toggle)
python-bioframe 0.4.1-2
  • links: PTS, VCS
  • area: main
  • in suites: forky, sid, trixie
  • size: 2,000 kB
  • sloc: python: 5,860; makefile: 38; sh: 13
file content (16 lines) | stat: -rw-r--r-- 555 bytes parent folder | download | duplicates (2)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
import pandas as pd


def parse_gtf_attributes(attrs, kv_sep="=", item_sep=";", quotechar='"', **kwargs):
    item_lists = attrs.str.split(item_sep)
    item_lists = item_lists.apply(
        lambda items: [item.strip().split(kv_sep) for item in items]
    )
    stripchars = quotechar + " "
    item_lists = item_lists.apply(
        lambda items: [
            map(lambda x: x.strip(stripchars), item) for item in items if len(item) == 2
        ]
    )
    kv_records = item_lists.apply(dict)
    return pd.DataFrame.from_records(kv_records, **kwargs)