1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117
|
/*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 3 of the License, or
* (at your option) any later version.
*
* Written (W) 1999-2008 Soeren Sonnenburg
* Copyright (C) 1999-2008 Fraunhofer Institute FIRST and Max-Planck-Society
*/
#include "lib/common.h"
#include "kernel/FixedDegreeStringKernel.h"
#include "features/Features.h"
#include "features/StringFeatures.h"
#include "lib/io.h"
CFixedDegreeStringKernel::CFixedDegreeStringKernel(INT size, INT d)
: CStringKernel<CHAR>(size), degree(d), sqrtdiag_lhs(NULL),
sqrtdiag_rhs(NULL), initialized(false)
{
}
CFixedDegreeStringKernel::CFixedDegreeStringKernel(
CStringFeatures<CHAR>* l, CStringFeatures<CHAR>* r, INT d)
: CStringKernel<CHAR>(10), degree(d), sqrtdiag_lhs(NULL),
sqrtdiag_rhs(NULL), initialized(false)
{
init(l, r);
}
CFixedDegreeStringKernel::~CFixedDegreeStringKernel()
{
cleanup();
}
bool CFixedDegreeStringKernel::init(CFeatures* l, CFeatures* r)
{
bool result = CStringKernel<CHAR>::init(l, r);
initialized = false;
if (sqrtdiag_lhs!=sqrtdiag_rhs)
delete[] sqrtdiag_rhs;
sqrtdiag_rhs=NULL;
delete[] sqrtdiag_lhs;
sqrtdiag_lhs=new DREAL[lhs->get_num_vectors()];
if (l==r)
sqrtdiag_rhs=sqrtdiag_lhs;
else
sqrtdiag_rhs=new DREAL[rhs->get_num_vectors()];
this->lhs=(CStringFeatures<CHAR>*) l;
this->rhs=(CStringFeatures<CHAR>*) l;
CKernel::init_sqrt_diag(sqrtdiag_lhs, lhs->get_num_vectors());
// if lhs is different from rhs (train/test data)
// compute also the normalization for rhs
if (sqrtdiag_lhs!=sqrtdiag_rhs)
{
this->lhs = (CStringFeatures<CHAR>*) r;
this->rhs = (CStringFeatures<CHAR>*) r;
CKernel::init_sqrt_diag(sqrtdiag_rhs, rhs->get_num_vectors());
}
this->lhs = (CStringFeatures<CHAR>*) l;
this->rhs = (CStringFeatures<CHAR>*) r;
initialized = true;
return result;
}
void CFixedDegreeStringKernel::cleanup()
{
if (sqrtdiag_lhs != sqrtdiag_rhs)
delete[] sqrtdiag_rhs;
sqrtdiag_rhs = NULL;
delete[] sqrtdiag_lhs;
sqrtdiag_lhs = NULL;
initialized = false;
CKernel::cleanup();
}
bool CFixedDegreeStringKernel::load_init(FILE* src)
{
return false;
}
bool CFixedDegreeStringKernel::save_init(FILE* dest)
{
return false;
}
DREAL CFixedDegreeStringKernel::compute(INT idx_a, INT idx_b)
{
INT alen, blen;
CHAR* avec = ((CStringFeatures<CHAR>*) lhs)->get_feature_vector(idx_a, alen);
CHAR* bvec = ((CStringFeatures<CHAR>*) rhs)->get_feature_vector(idx_b, blen);
// can only deal with strings of same length
ASSERT(alen==blen);
DREAL sqrt = initialized ? (sqrtdiag_lhs[idx_a]*sqrtdiag_rhs[idx_b]) : 1.0;
LONG sum = 0;
for (INT i = 0; i<alen-degree+1; i++)
{
bool match = true;
for (INT j = i; j<i+degree && match; j++)
match = avec[j]==bvec[j];
if (match)
sum++;
}
return (DREAL) sum/sqrt;
}
|