File: ApproxNeedlemanWunsch.java

package info (click to toggle)
libsecondstring-java 0.1~dfsg-2
  • links: PTS, VCS
  • area: main
  • in suites: bookworm, forky, sid, trixie
  • size: 764 kB
  • sloc: java: 9,592; xml: 114; makefile: 6
file content (111 lines) | stat: -rwxr-xr-x 3,919 bytes parent folder | download
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
package com.wcohen.ss;

import com.wcohen.ss.api.*;
import java.io.*;

import org.apache.log4j.Logger;

/**
 * Needleman-Wunsch string distance, following Durban et al. 
 * Sec 2.3, but using an approximate string distance.
 */

public class ApproxNeedlemanWunsch extends AbstractStringDistance
{
    private static Logger log=Logger.getLogger(ApproxNeedlemanWunsch.class);

    private static final int DEFAULT_WIDTH = 40;
    private CharMatchScore charMatchScore;
    private double gapCost;
    private MyMatrix mat;
    private int width = DEFAULT_WIDTH;
	
    public ApproxNeedlemanWunsch() { this(CharMatchScore.DIST_01, 1.0 ); }

    public ApproxNeedlemanWunsch(CharMatchScore charMatchScore,double gapCost) {
        this.charMatchScore = charMatchScore;
        this.gapCost = gapCost;
    }
	
    public void setWidth(int w) { this.width=w; }

    public double score(StringWrapper s,StringWrapper t) {
        mat = new MyMatrix( s, t );
        // fill matrix forward to prevent deep recursion
        for (int i=1; i<=s.length(); i++) {
            int j = (int)Math.round(i * mat.getScale());
            if (j>=1 && j<=t.length()) {
                double forceComputatationHere = mat.get( i, j);                
            }
        }
        return mat.get(s.length(), t.length() );
    }
	
    public String explainScore(StringWrapper s,StringWrapper t) {
        mat = new MyMatrix( s, t );
        double d = mat.get(s.length(), t.length() );
        mat.setPrintNegativeValues(true);
        return mat.toString() + "\nScore = "+d;
    }
	

    /** Find a character in the first string, s, that can be aligned
     * with the i-th character in the second string, t. */
    public int getAlignedChar(int iMinusOne,boolean preferHigherIndices)
    {
        // internally to this package, strings are indexed 1...N, so
        // we need to convert from the usual 0...N-1 Java convention
        int i = iMinusOne+1;

        int bestJ = -1;
        double bestScore = -Double.MAX_VALUE;
        for (int j=mat.getFirstStoredEntryInRow(i); j<=mat.getLastStoredEntryInRow(i); j++) {
            if (mat.outOfRange(i,j)) log.error("out of range: "+i+","+j);
            double score = mat.get(i,j);
            if ((score>bestScore) || (score==bestScore && preferHigherIndices)) {
                bestScore = score; bestJ = j;
            }
        }
        // convert back to the usual 0...N-1 Java convention
        return bestJ-1;
    }
    
    private class MyMatrix extends ApproxMemoMatrix 
    {
        public MyMatrix(StringWrapper s,StringWrapper t) {
	    super(s,t,width,-Double.MAX_VALUE);
        }
        public double compute(int i,int j) {
	    if (i==0) return -j*gapCost;
	    if (j==0) return -i*gapCost;
	    return max3( get(i-1,j-1) + charMatchScore.matchScore( sAt(i), tAt(j) ),
                         get(i-1, j) - gapCost,
                         get(i, j-1) - gapCost);
        }
    }
	

    static public void main(String[] argv) throws Exception
    {
        if (argv.length==3) {
            // -f file1 file2
            String s = readFile(new File(argv[1]));
            String t = readFile(new File(argv[2]));
            long t0 = System.currentTimeMillis();
            double score = new ApproxNeedlemanWunsch().score(s,t);
            long tf = System.currentTimeMillis();
            System.out.println("score = "+score+" runtime = "+((tf-t0)/1000.0)+" sec"); 
        } else {
            doMain(new ApproxNeedlemanWunsch(), argv);
        }
    }

    private static String readFile(File in) throws IOException
    {
        InputStream inputStream = new FileInputStream(in);
        byte[] bytes = new byte[inputStream.available()];
        inputStream.read(bytes);
        inputStream.close();
        return new String(bytes);
    }
}