File: format_specimens.rs

package info (click to toggle)
rust-needletail 0.6.3-3
  • links: PTS, VCS
  • area: main
  • in suites: forky, sid
  • size: 2,780 kB
  • sloc: makefile: 2
file content (94 lines) | stat: -rw-r--r-- 2,819 bytes parent folder | download
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
use std::fs;

use needletail::errors::ParseError;
use needletail::parser::parse_fastx_file;
use serde_derive::Deserialize;

#[derive(Debug, Deserialize)]
struct TestCase {
    filename: String,
    // origin: String,
    tags: Option<Vec<String>>,
    // comments: Option<Vec<String>>,
}

#[derive(Debug, Deserialize)]
struct TestIndex {
    valid: Vec<TestCase>,
    invalid: Option<Vec<TestCase>>,
}

fn test_fastx_file(path: &str) -> Result<(), ParseError> {
    let mut reader = parse_fastx_file(path)?;
    while let Some(rec) = reader.next() {
        let _ = rec?;
    }
    Ok(())
}

#[test]
fn test_specimen_fasta() {
    let raw_index = fs::read_to_string("tests/specimen/FASTA/index.toml").unwrap();
    let index: TestIndex = toml::from_str(&raw_index).expect("Could not deserialize index");
    for test in index.valid {
        // what kind of sicko puts comments in FASTAs?
        if test
            .tags
            .unwrap_or_else(Vec::new)
            .contains(&String::from("comments"))
        {
            continue;
        }

        let path = format!("tests/specimen/FASTA/{}", test.filename);
        assert_eq!(test_fastx_file(&path), Ok(()));
    }
}

#[test]
fn test_specimen_fastq() {
    let raw_index = fs::read_to_string("tests/specimen/FASTQ/index.toml").unwrap();
    let index: TestIndex = toml::from_str(&raw_index).expect("Could not deserialize index");

    for test in index.valid {
        if test.filename == "wrapping_original_sanger.fastq"
            || test.filename == "longreads_original_sanger.fastq"
            || test.filename == "tricky.fastq"
        {
            // may god have mercy upon us if someone ever tries a file like this
            // (sequences are one-line, but quality scores are line-wrapped)
            continue;
        }

        let path = format!("tests/specimen/FASTQ/{}", test.filename);
        assert!(
            test_fastx_file(&path).is_ok(),
            "File {} is bad?",
            test.filename
        );
    }

    for test in index.invalid.unwrap_or_default() {
        if test.filename == "error_diff_ids.fastq" {
            // we don't care if the sequence ID doesn't match the quality id?
            continue;
        }

        // We don't check for ascii validity since it's a big hit perf wise
        // This means some invalid sequences are considered ok but it's not a big issue
        // in practice
        if test.filename.starts_with("error_qual_")
            || test.filename == "error_spaces.fastq"
            || test.filename == "error_tabs.fastq"
        {
            continue;
        }

        let path = format!("tests/specimen/FASTQ/{}", test.filename);
        assert!(
            test_fastx_file(&path).is_err(),
            "File {} is good?",
            test.filename
        );
    }
}