File: SelfHash.h

package info (click to toggle)
rcpp 1.1.0-1
  • links: PTS, VCS
  • area: main
  • in suites: forky, sid
  • size: 7,480 kB
  • sloc: cpp: 27,436; ansic: 7,778; sh: 53; makefile: 2
file content (129 lines) | stat: -rw-r--r-- 4,005 bytes parent folder | download | duplicates (7)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
// -*- mode: C++; c-indent-level: 4; c-basic-offset: 4; tab-width: 4 -*-
//
// hash.h: Rcpp R/C++ interface class library -- hashing utility, inspired
// from Simon's fastmatch package
//
// Copyright (C) 2010, 2011  Simon Urbanek
// Copyright (C) 2012  Dirk Eddelbuettel and Romain Francois
//
// This file is part of Rcpp.
//
// Rcpp is free software: you can redistribute it and/or modify it
// under the terms of the GNU General Public License as published by
// the Free Software Foundation, either version 2 of the License, or
// (at your option) any later version.
//
// Rcpp is distributed in the hope that it will be useful, but
// WITHOUT ANY WARRANTY; without even the implied warranty of
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
// GNU General Public License for more details.
//
// You should have received a copy of the GNU General Public License
// along with Rcpp.  If not, see <http://www.gnu.org/licenses/>.

#ifndef RCPP__HASH__SELF_HASH_H
#define RCPP__HASH__SELF_HASH_H

namespace Rcpp{
namespace sugar{


    template <int RTYPE>
    class SelfHash {
    public:
        typedef typename traits::storage_type<RTYPE>::type STORAGE ;
        typedef Vector<RTYPE> VECTOR ;

        SelfHash( SEXP table ) : n(Rf_length(table)), m(2), k(1),
            src( (STORAGE*)dataptr(table) ), data(), indices(), size_(0)
        {
            int desired = n*2 ;
            while( m < desired ){ m *= 2 ; k++ ; }
            data.resize( m ) ;
            indices.resize( m ) ;
        }

        inline IntegerVector fill_and_self_match(){
            IntegerVector result = no_init(n) ;
            int* res = INTEGER(result) ;
            for( int i=0; i<n; i++) res[i] = add_value_get_index(i) ;
            return result ;
        }

        inline int size() const {
            return size_ ;
        }

        int n, m, k ;
        STORAGE* src ;
        std::vector<int> data ;
        std::vector<int> indices ;
        int size_ ;

        int add_value_get_index(int i){
            STORAGE val = src[i++] ;
            unsigned int addr = get_addr(val) ;
            while (data[addr] && src[data[addr] - 1] != val) {
              addr++;
              if (addr == static_cast<unsigned int>(m)) addr = 0;
            }
            if (!data[addr]) {
                data[addr] = i ;
                indices[addr] = ++size_ ;
            }
            return indices[addr] ;
        }

        /* NOTE: we are returning a 1-based index ! */
        unsigned int get_index(STORAGE value) const {
            unsigned int addr = get_addr(value) ;
            while (data[addr]) {
              if (src[data[addr] - 1] == value)
                return data[addr];
              addr++;
              if (addr == static_cast<unsigned int>(m)) addr = 0;
            }
            return NA_INTEGER;
        }

        // defined below
        unsigned int get_addr(STORAGE value) const ;
    } ;

    template <>
    inline unsigned int SelfHash<INTSXP>::get_addr(int value) const {
        return RCPP_HASH(value) ;
    }
    template <>
    inline unsigned int SelfHash<REALSXP>::get_addr(double val) const {
      unsigned int addr;
      union dint_u {
          double d;
          unsigned int u[2];
        };
      union dint_u val_u;
      /* double is a bit tricky - we nave to normalize 0.0, NA and NaN */
      if (val == 0.0) val = 0.0;
      if (internal::Rcpp_IsNA(val)) val = NA_REAL;
      else if (internal::Rcpp_IsNaN(val)) val = R_NaN;
      val_u.d = val;
      addr = RCPP_HASH(val_u.u[0] + val_u.u[1]);
      return addr ;
    }

    template <>
    inline unsigned int SelfHash<STRSXP>::get_addr(SEXP value) const {
        intptr_t val = (intptr_t) value;
        unsigned int addr;
        #if (defined _LP64) || (defined __LP64__) || (defined WIN64)
          addr = RCPP_HASH((val & 0xffffffff) ^ (val >> 32));
        #else
          addr = RCPP_HASH(val);
        #endif
        return addr ;
    }

} // sugar
} // Rcpp

#endif