File: ShortReadUtil.cpp

package info (click to toggle)
perm 0.4.0-8
links: PTS, VCS
area: main
in suites: bookworm, forky, sid, trixie
size: 976 kB
sloc: cpp: 13,499; makefile: 98; sh: 12
file content (252 lines) | stat: -rw-r--r-- 6,428 bytes
parent folder | download | duplicates (5)
#include "stdafx.h"
#include "ShortReadUtil.h"

void toUpperCase(char* caArray, int length)
{
    int i;
    for (i = 0; i < length; i++)	{
        caArray[i] = (char)toupper(caArray[i]);
    }
}

char* mutateRead(char* Kmer , unsigned int No_of_mutation)
{
    //This function simply generate mutations in a given string
    //To guarantee exactly No_of_mutation loci have been changed, transfer the original nt + a random number between 1-3

    const int MAX_MUTATION = 64;
    int mutatedPosition[MAX_MUTATION];
    unsigned int l = (unsigned int)strlen(Kmer);

    if (No_of_mutation > l) {
        cout << "More mutation than it could be" << endl;
        No_of_mutation = l;
    }
    //Pick the mutation position in advance and avoid picking the same position
    for (unsigned int i = 0; i < No_of_mutation; i++) {
        mutatedPosition[i] = rand() % l;
        for (unsigned int j = 0; j < i; j++) {
            if (mutatedPosition[i] == mutatedPosition[j]) {
                i--;
                break;
            }
        }
    }
    //Mutated the selected base
    for (unsigned int i = 0; i < No_of_mutation; i++) {
        int position = mutatedPosition[i];
        mutateBase(&Kmer[position]);
    }
    return(Kmer);
}

void mutateBase(char* Base)
{
    int nt = rand() % 3 + 1;
    switch (*Base) {
    case 'A':
    case 'a':
        nt = (nt + 0) % 4;
        break;
    case 'C':
    case 'c':
        nt = (nt + 1) % 4;
        break;
    case 'G':
    case 'g':
        nt = (nt + 2) % 4;
        break;
    case 'T':
    case 't':
        nt = (nt + 3) % 4;
        break;
    default:
        ;//cout<<Kmer[position];
    }
    switch (nt) {
    case 0:
        *Base = 'A';
        break;
    case 1:
        *Base = 'C';
        break;
    case 2:
        *Base = 'G';
        break;
    case 3:
        *Base = 'T';
        break;
    default:
        cout << "different nucleotide" << endl;
    }
}

char* mutatePairsOfConsecutiveBases(char* Kmer, unsigned int no_of_mutated_pairs)
{
    //This function simply generate mutations in a given string
    //To guarantee exactly No_of_mutation loci have been changed, transfer the original nt + a random number between 1-3

    const int MAX_MUTATION = 50;
    int mutatedPosition[MAX_MUTATION];
    unsigned int l = (unsigned int)strlen(Kmer);
    if (l < no_of_mutated_pairs) {
        cout << "Mutate too many pairs" << endl;
        return Kmer;
    }

    //Pick the mutation position in advance and avoid picking the same position
    for (unsigned int i = 0; i < no_of_mutated_pairs; i++) {
        mutatedPosition[i] = rand() % (l - 1);
        for (unsigned int j = 0; j < i; j++) {
            if (mutatedPosition[i] == mutatedPosition[j]) {
                i--; //selected again
                break;
            }
        }
    }
    for (unsigned int i = 0; i < no_of_mutated_pairs; i++) {
        int position = mutatedPosition[i];
        mutateBase(&(Kmer[position]));
        mutateBase(&(Kmer[position + 1]));
    }
    return(Kmer);
}

bool isBadRead(const char* Read, unsigned int ReadLength)
{
    unsigned int i;
    for (i = 0; i < ReadLength; i++) {
        if (!isACGT(Read[i])) {
            return(true);// Bad Read
        }
    }
    return(false);// Good Read
}

bool isBadSOLiDRead(const char* Read, unsigned int ReadLength)
{
    if (!isACGT(Read[0])) {
        return(true);// Bad Read
    }
    unsigned int i;
    for (i = 1; i < ReadLength; i++) {
        if (!is0123(Read[i])) {
            return(true);// Bad Read
        }
    }
    return(false);// Good Read
}

bool isBadRead(bool isSOLiD, const char* Read, unsigned int ReadLength)
{
    if(isSOLiD) {
        return(isBadSOLiDRead(Read, ReadLength));
    } else {
        return(isBadRead(Read, ReadLength));
    }
}

char* reverseKmer(char* Kmer)
{
    if (Kmer != NULL) {
        unsigned int length = (unsigned int)strlen(Kmer);
        unsigned int i;
        for (i = 0; i < length / 2; i++)	{
            swap(Kmer[i], Kmer[length-1-i]);
        }
    }
    return(Kmer);
}

// return the complement kmer from 5'->3', destroy the original kmer
char* reverseComplementKmer(char* Kmer)
{
    if (Kmer != NULL) {
        unsigned int length = 0;
        length = (unsigned int)strlen(Kmer);
        unsigned int i;
        for (i = 0; i < length / 2; i++)	{
            swap(Kmer[i], Kmer[length-1-i]);
        }
        for (i = 0; i < length; i++) {
            Kmer[i] = complimentBase(Kmer[i]);
        }
    }
    return(Kmer);
}

unsigned int strComp(char* str1, char* str2, int l)
{
    int i;
    unsigned int miscounter = 0;
    for (i = 0; i < l; i++) {
        if (str1[i] != str2[i])
            miscounter++;
    }
    return(miscounter);
}

/*
 * This function compare two string. It lower-case the character in the
 * second string, if it mismatches the corresponding position in the first
 * string
 */
unsigned int strCompMarkDiff(char* str1, char* str2)
{
    unsigned int iDiff = 0;
    for (int i = 0; str1[i] != '\0' && str2[i] != '\0'; i++) {
        if (str1[i] != str2[i]) {
            iDiff++;
            str2[i] = (char)tolower(str2[i]);
        }
    }
    return(iDiff);
}
/* return the # of wild-card mismatches bases between the two read */
unsigned int diNtStrWildCardComp(char* read1, char* read2, unsigned int readlength)
{
    unsigned int uiDiff = 0;
    for (unsigned int i = 0; i < readlength ; i++) {
        if (!diNtWildCardComp(read1[i], read2[i])) {
            uiDiff++;
        }
    }
    return(uiDiff);
}

bool isDummyRead(const char* read, int dummyT)
{
    if (dummyT > 0) {
        int dummyCount = 1;
        if (read[0] == '\0') {
            return(false);
        }
        for ( int i = 1; read[i] != '\0'; i++) {
            if (read[i] != read[i-1]) {
                dummyCount = 1;
            } else {
                dummyCount++;
            }
            if (dummyCount >= dummyT) {
                break;
            }
        }
        return(dummyCount >= dummyT);
    } else {
        return(false);
    }
}

char getBaseFromColors(char nt, const char* colors, int pos)
{
    for (int i = 0; i < pos; i++) {
        if (is0123(colors[i])) {
            // TODO The function should be named as color2base
            // TODO It should be moved to color space read
            nt = base2color(nt, colors[i]);
        } else {
            return('N');
        }
    }
    return(nt);
}