1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97
|
syntax = "proto2";
package goby;
option java_package = "org.campagnelab.goby.reads";
option optimize_for = SPEED;
message ReadCollection {
repeated ReadEntry reads = 1;
}
message ReadEntry {
/*
Index of a read.
*/
required uint32 read_index = 1;
/*
Index of the barcode, if any.
*/
optional uint32 barcode_index = 10;
/*
Read identifier/name may be present.
*/
optional string read_identifier = 23;
/*
Additional description about the read (from Fasta/Q format).
*/
optional string description = 22;
/*
Length of the sequence.
*/
required uint32 read_length = 2;
/*
Sequence, encoded as ascii characters stored in single bytes.
*/
optional bytes sequence = 3;
/*
The second sequence in a pair. Stored the same way as the sequence attribute.
*/
optional bytes sequence_pair = 5;
/*
Length of the second sequence in a pair.
*/
optional uint32 read_length_pair = 6;
/*
Quality scores in Phred units, stored as single bytes (0-255).
*/
optional bytes quality_scores = 4;
/*
Quality scores for the second sequence in a pair. Stored as the 'qualityScores' attribute.
*/
optional bytes quality_scores_pair = 7;
/*
Compressed stream of data. The first byte indicates the compression/decompression method (codec). The remaining bytes are
content compressed with the codec.
*/
optional bytes compressed_data = 8;
/*
Stores meta-data about the reads. Typically meta-data is stored in the very first read of a
read collection, with the understanding that the meta-data applies to all the reads in the
collection. Meta-data can be used to store information about when the sample was sequenced,
or other information of interest. The key-value pair format is sufficiently flexible to
accomodate a variety of needs. The following keys are pre-defined. Please use pre-defined
keys so that automated tools can use metadata in relatively standard way. Please note that
some keys provide a format for the value. This format should also be followed to garantee
that meta data can be used computationally in fully automatic manner.
key="sequencing-run-start-date" value="MM/DD/YYYY" Used to record when the sequencing run
was initiated on the instrument. Can be used to detect batch effect in a large set of samples.
key="platform" value="<free-text>". Value is free text, but the following terms are pre-defined.
Illumina GaIIx
Illumina HiSeq 1000
Illumina HiSeq 2000
Helicos Heliscope
LifeTech 5500 SOLiD
LifeTech 5500xl SOLiD
Roche 454 GS FLX Ti
key="organism" value="species name"
Since Goby 1.9.1
*/
repeated MetaData meta_data = 25;
}
/*
A message to store a key/value pair and represent metadata about reads.
Since Goby 1.9.1
*/
message MetaData {
/*
Provides the key. See examples in the documentation of meta_data for ReadEntry.
*/
required string key=1;
/*
Describes the value associated with the key. See examples in the documentation of meta_data for ReadEntry.
*/
required string value=2;
}
|