1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249
|
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
; Copyright(c) 2011-2016 Intel Corporation All rights reserved.
;
; Redistribution and use in source and binary forms, with or without
; modification, are permitted provided that the following conditions
; are met:
; * Redistributions of source code must retain the above copyright
; notice, this list of conditions and the following disclaimer.
; * Redistributions in binary form must reproduce the above copyright
; notice, this list of conditions and the following disclaimer in
; the documentation and/or other materials provided with the
; distribution.
; * Neither the name of Intel Corporation nor the names of its
; contributors may be used to endorse or promote products derived
; from this software without specific prior written permission.
;
; THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
; "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
; LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
; A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
; OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
; SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
; LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
; DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
; THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
; (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
; OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
%include "options.asm"
%include "lz0a_const.asm"
%include "stdmac.asm"
; Macros for doing Huffman Encoding
%ifdef LONGER_HUFFTABLE
%if (D > 8192)
%error History D is larger than 8K, cannot use %LONGER_HUFFTABLE
% error
%else
%define DIST_TABLE_SIZE 8192
%define DECODE_OFFSET 26
%endif
%else
%define DIST_TABLE_SIZE 2
%define DECODE_OFFSET 0
%endif
%define LEN_TABLE_SIZE 256
%define LIT_TABLE_SIZE 257
%define DIST_TABLE_START (ISAL_DEF_MAX_HDR_SIZE + 8)
%define DIST_TABLE_OFFSET (DIST_TABLE_START + - 4 * 1)
%define LEN_TABLE_OFFSET (DIST_TABLE_START + DIST_TABLE_SIZE * 4 - 4*3)
%define LIT_TABLE_OFFSET (DIST_TABLE_START + 4 * DIST_TABLE_SIZE + 4 * LEN_TABLE_SIZE)
%define LIT_TABLE_SIZES_OFFSET (LIT_TABLE_OFFSET + 2 * LIT_TABLE_SIZE)
%define DCODE_TABLE_OFFSET (LIT_TABLE_SIZES_OFFSET + LIT_TABLE_SIZE + 1 - DECODE_OFFSET * 2)
%define DCODE_TABLE_SIZE_OFFSET (DCODE_TABLE_OFFSET + 2 * 30 - DECODE_OFFSET)
;; /** @brief Holds the huffman tree used to huffman encode the input stream **/
;; struct isal_hufftables {
;; // deflate huffman tree header
;; uint8_t deflate_huff_hdr[ISAL_DEF_MAX_HDR_SIZE];
;;
;; //!< Number of whole bytes in deflate_huff_hdr
;; uint32_t deflate_huff_hdr_count;
;;
;; //!< Number of bits in the partial byte in header
;; uint32_t deflate_huff_hdr_extra_bits;
;;
;; //!< bits 7:0 are the code length, bits 31:8 are the code
;; uint32_t dist_table[DIST_TABLE_SIZE];
;;
;; //!< bits 7:0 are the code length, bits 31:8 are the code
;; uint32_t len_table[LEN_TABLE_SIZE];
;;
;; //!< bits 3:0 are the code length, bits 15:4 are the code
;; uint16_t lit_table[LIT_TABLE_SIZE];
;;
;; //!< bits 3:0 are the code length, bits 15:4 are the code
;; uint16_t dcodes[30 - DECODE_OFFSET];
;; };
%ifdef LONGER_HUFFTABLE
; Uses RCX, clobbers dist
; get_dist_code dist, code, len
%macro get_dist_code 4
%define %%dist %1 ; 64-bit IN
%define %%code %2d ; 32-bit OUT
%define %%len %3d ; 32-bit OUT
%define %%hufftables %4 ; address of the hufftable
mov %%len, [%%hufftables + DIST_TABLE_OFFSET + 4*(%%dist + 1) ]
mov %%code, %%len
and %%len, 0x1F;
shr %%code, 5
%endm
%macro get_packed_dist_code 3
%define %%dist %1 ; 64-bit IN
%define %%code_len %2d ; 32-bit OUT
%define %%hufftables %3 ; address of the hufftable
mov %%code_len, [%%hufftables + DIST_TABLE_OFFSET + 4*%%dist ]
%endm
%macro unpack_dist_code 2
%define %%code %1d ; 32-bit OUT
%define %%len %2d ; 32-bit OUT
mov %%len, %%code
and %%len, 0x1F;
shr %%code, 5
%endm
%else
; Assumes (dist != 0)
; Uses RCX, clobbers dist
; void compute_dist_code dist, code, len
%macro compute_dist_code 4
%define %%dist %1 ; IN, clobbered
%define %%distq %1
%define %%code %2 ; OUT
%define %%len %3 ; OUT
%define %%hufftables %4
bsr rcx, %%dist ; ecx = msb = bsr(dist)
dec rcx ; ecx = num_extra_bits = msb - N
BZHI %%code, %%dist, rcx, %%len
SHRX %%dist, %%dist, rcx ; dist >>= num_extra_bits
lea %%dist, [%%dist + 2*rcx] ; dist = sym = dist + num_extra_bits*2
mov %%len, rcx ; len = num_extra_bits
movzx rcx, byte [hufftables + DCODE_TABLE_SIZE_OFFSET + %%distq WRT_OPT]
movzx %%dist, word [hufftables + DCODE_TABLE_OFFSET + 2 * %%distq WRT_OPT]
SHLX %%code, %%code, rcx ; code = extra_bits << (sym & 0xF)
or %%code, %%dist ; code = (sym >> 4) | (extra_bits << (sym & 0xF))
add %%len, rcx ; len = num_extra_bits + (sym & 0xF)
%endm
; Uses RCX, clobbers dist
; get_dist_code dist, code, len
%macro get_dist_code 4
%define %%dist %1 ; 32-bit IN, clobbered
%define %%distq %1 ; 64-bit IN, clobbered
%define %%code %2 ; 32-bit OUT
%define %%len %3 ; 32-bit OUT
%define %%hufftables %4
cmp %%dist, DIST_TABLE_SIZE - 1
jg %%do_compute
%ifndef IACA
mov %%len %+ d, dword [hufftables + DIST_TABLE_OFFSET + 4*(%%distq + 1) WRT_OPT]
mov %%code, %%len
and %%len, 0x1F;
shr %%code, 5
jmp %%done
%endif
%%do_compute:
compute_dist_code %%distq, %%code, %%len, %%hufftables
%%done:
%endm
%macro get_packed_dist_code 3
%define %%dist %1 ; 64-bit IN
%define %%code_len %2d ; 32-bit OUT
%define %%hufftables %3 ; address of the hufftable
%endm
%endif
; Macros for doing Huffman Encoding
; Assumes (dist != 0)
; Uses RCX, clobbers dist
; void compute_dist_code dist, code, len
%macro compute_dist_icf_code 3
%define %%dist %1 ; IN, clobbered
%define %%distq %1
%define %%code %2 ; OUT
%define %%tmp1 %3
bsr rcx, %%dist ; ecx = msb = bsr(dist)
dec rcx ; ecx = num_extra_bits = msb - N
BZHI %%code, %%dist, rcx, %%tmp1
SHRX %%dist, %%dist, rcx ; dist >>= num_extra_bits
lea %%dist, [%%dist + 2*rcx] ; code = sym = dist + num_extra_bits*2
shl %%code, EXTRA_BITS_OFFSET - DIST_OFFSET
add %%code, %%dist ; code = extra_bits | sym
%endm
; Uses RCX, clobbers dist
; get_dist_code dist, code, len
%macro get_dist_icf_code 3
%define %%dist %1 ; 32-bit IN, clobbered
%define %%distq %1 ; 64-bit IN, clobbered
%define %%code %2 ; 32-bit OUT
%define %%tmp1 %3
cmp %%dist, 1
jg %%do_compute
%ifnidn %%code, %%dist
mov %%code, %%dist
%endif
jmp %%done
%%do_compute:
compute_dist_icf_code %%distq, %%code, %%tmp1
%%done:
shl %%code, DIST_OFFSET
%endm
; "len" can be same register as "length"
; get_len_code length, code, len
%macro get_len_code 4
%define %%length %1 ; 64-bit IN
%define %%code %2d ; 32-bit OUT
%define %%len %3d ; 32-bit OUT
%define %%hufftables %4
mov %%len, [%%hufftables + LEN_TABLE_OFFSET + 4 * %%length]
mov %%code, %%len
and %%len, 0x1F
shr %%code, 5
%endm
%macro get_lit_code 4
%define %%lit %1 ; 64-bit IN or CONST
%define %%code %2d ; 32-bit OUT
%define %%len %3d ; 32-bit OUT
%define %%hufftables %4
movzx %%len, byte [%%hufftables + LIT_TABLE_SIZES_OFFSET + %%lit]
movzx %%code, word [%%hufftables + LIT_TABLE_OFFSET + 2 * %%lit]
%endm
;; Compute hash of first 3 bytes of data
%macro compute_hash 2
%define %%result %1d ; 32-bit reg
%define %%data %2d ; 32-bit reg (low byte not clobbered)
xor %%result, %%result
crc32 %%result, %%data
%endm
|