File: SegmentInformationRecords.proto

package info (click to toggle)
libgoby-java 3.3.1%2Bdfsg2-11
  • links: PTS, VCS
  • area: main
  • in suites: forky, sid, trixie
  • size: 58,108 kB
  • sloc: java: 78,105; cpp: 5,011; xml: 3,170; python: 2,108; sh: 1,575; ansic: 277; makefile: 114
file content (106 lines) | stat: -rw-r--r-- 3,089 bytes parent folder | download | duplicates (2)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
syntax = "proto2";

package org.campagnelab.dl.varanalysis.protobuf;

option java_package = "org.campagnelab.dl.varanalysis.protobuf";

option optimize_for = SPEED;

/**
  Collection of segments.
*/
message SegmentInformationCollection {
    repeated SegmentInformation records = 1;
}

/**
One such message per segment in the genome that we observe.
*/
message SegmentInformation {

    /**  The position in the reference sequence of the genome. */
    optional ReferencePosition start_position = 1;

    optional ReferencePosition end_position = 2;

    /** Number of bases contained in this block/segment.
        Equals to end_position.location - start_position.location.  */
    optional uint32 length = 5;

    /** Aligned samples in this block/segment. */
    repeated Sample sample = 4;

}
/**
Keep segment information about a sample. More than one sample may be stored in an .ssi file, in which case the segment
positions are garanteed to be aligned across samples.
*/
message Sample {

    repeated Base base = 1;

}


message Base {

    repeated float features = 1;

    repeated float labels = 2;

    /** Color of the chromosome that contains the genotype. Used to reconstitute hapolotype continuity for indels.
        Color is used to assemble called bases at successive positions into alleles, such as A--C from called alleles
        at four positions. There are exactly ploidy number of colors in this list. Each integer encodes the color of the
        chromosome that carries the allele. */
    repeated uint32 color=6;

    /** True genotype, in the format of a list of alleles, where A,B,C are single base alleles {A,C,T,G,-}
        found at the genomic site in this genome.
    */
    repeated string trueLabel = 3;

    /** The genotype before segment post-processing. */
    optional string prePostProcessingGenotype=10;

    /** The offset used when creating the record copy during post-processing. */
    optional uint32 offset=11;

    /** True when the base contains some counts for indels. This does not mean that the base contains a true
        indel, but simply that it contains a potential indel.
    */
    optional bool hasCandidateIndel=4;

    /** True when the base actually overlaps an indel in one of its alleles. This is the set of bases used
        to evaluate F1 over indels (at the base level). Other bases are used to evaluate performance of SNPs. */
    optional bool hasTrueIndel=5;

    /** True when this base has at least one allele that does not match the reference. Those bases where
    isVariant==true are used to estimate precision/recall and F1 for SNPs and indels.
    */
    optional bool isVariant=7;

    optional string referenceAllele=8;
    /**
        The formatted counts from the sbi record at this base.
    */
    optional string formattedCounts=9;

    /**
        The location of this base.
    */
    optional uint32 location = 12;

}

/**
A position in the genome.
*/
message ReferencePosition {


    optional uint32 reference_index = 2;

    optional string reference_id = 3;

    optional uint32 location = 1;
}