1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343
|
import math
import re
from zxcvbn.matching import (KEYBOARD_STARTING_POSITIONS, KEYBOARD_AVERAGE_DEGREE,
KEYPAD_STARTING_POSITIONS, KEYPAD_AVERAGE_DEGREE)
def binom(n, k):
"""
Returns binomial coefficient (n choose k).
"""
# http://blog.plover.com/math/choose.html
if k > n:
return 0
if k == 0:
return 1
result = 1
for denom in range(1, k + 1):
result *= n
result /= denom
n -= 1
return result
def lg(n):
"""
Returns logarithm of n in base 2.
"""
return math.log(n, 2)
# ------------------------------------------------------------------------------
# minimum entropy search -------------------------------------------------------
# ------------------------------------------------------------------------------
#
# takes a list of overlapping matches, returns the non-overlapping sublist with
# minimum entropy. O(nm) dp alg for length-n password with m candidate matches.
# ------------------------------------------------------------------------------
def get(a, i):
if i < 0 or i >= len(a):
return 0
return a[i]
def minimum_entropy_match_sequence(password, matches):
"""
Returns minimum entropy
Takes a list of overlapping matches, returns the non-overlapping sublist with
minimum entropy. O(nm) dp alg for length-n password with m candidate matches.
"""
bruteforce_cardinality = calc_bruteforce_cardinality(password) # e.g. 26 for lowercase
up_to_k = [0] * len(password) # minimum entropy up to k.
# for the optimal sequence of matches up to k, holds the final match (match['j'] == k). null means the sequence ends
# without a brute-force character.
backpointers = []
for k in range(0, len(password)):
# starting scenario to try and beat: adding a brute-force character to the minimum entropy sequence at k-1.
up_to_k[k] = get(up_to_k, k-1) + lg(bruteforce_cardinality)
backpointers.append(None)
for match in matches:
if match['j'] != k:
continue
i, j = match['i'], match['j']
# see if best entropy up to i-1 + entropy of this match is less than the current minimum at j.
up_to = get(up_to_k, i-1)
candidate_entropy = up_to + calc_entropy(match)
if candidate_entropy < up_to_k[j]:
#print "New minimum: using " + str(match)
#print "Entropy: " + str(candidate_entropy)
up_to_k[j] = candidate_entropy
backpointers[j] = match
# walk backwards and decode the best sequence
match_sequence = []
k = len(password) - 1
while k >= 0:
match = backpointers[k]
if match:
match_sequence.append(match)
k = match['i'] - 1
else:
k -= 1
match_sequence.reverse()
# fill in the blanks between pattern matches with bruteforce "matches"
# that way the match sequence fully covers the password: match1.j == match2.i - 1 for every adjacent match1, match2.
def make_bruteforce_match(i, j):
return {
'pattern': 'bruteforce',
'i': i,
'j': j,
'token': password[i:j+1],
'entropy': lg(math.pow(bruteforce_cardinality, j - i + 1)),
'cardinality': bruteforce_cardinality,
}
k = 0
match_sequence_copy = []
for match in match_sequence:
i, j = match['i'], match['j']
if i - k > 0:
match_sequence_copy.append(make_bruteforce_match(k, i - 1))
k = j + 1
match_sequence_copy.append(match)
if k < len(password):
match_sequence_copy.append(make_bruteforce_match(k, len(password) - 1))
match_sequence = match_sequence_copy
min_entropy = 0 if len(password) == 0 else up_to_k[len(password) - 1] # corner case is for an empty password ''
crack_time = entropy_to_crack_time(min_entropy)
# final result object
return {
'password': password,
'entropy': round_to_x_digits(min_entropy, 3),
'match_sequence': match_sequence,
'crack_time': round_to_x_digits(crack_time, 3),
'crack_time_display': display_time(crack_time),
'score': crack_time_to_score(crack_time),
}
def round_to_x_digits(number, digits):
"""
Returns 'number' rounded to 'digits' digits.
"""
return round(number * math.pow(10, digits)) / math.pow(10, digits)
# ------------------------------------------------------------------------------
# threat model -- stolen hash catastrophe scenario -----------------------------
# ------------------------------------------------------------------------------
#
# assumes:
# * passwords are stored as salted hashes, different random salt per user.
# (making rainbow attacks infeasable.)
# * hashes and salts were stolen. attacker is guessing passwords at max rate.
# * attacker has several CPUs at their disposal.
# ------------------------------------------------------------------------------
# for a hash function like bcrypt/scrypt/PBKDF2, 10ms per guess is a safe lower bound.
# (usually a guess would take longer -- this assumes fast hardware and a small work factor.)
# adjust for your site accordingly if you use another hash function, possibly by
# several orders of magnitude!
SINGLE_GUESS = .010
NUM_ATTACKERS = 100 # number of cores guessing in parallel.
SECONDS_PER_GUESS = SINGLE_GUESS / NUM_ATTACKERS
def entropy_to_crack_time(entropy):
return (0.5 * math.pow(2, entropy)) * SECONDS_PER_GUESS # average, not total
def crack_time_to_score(seconds):
if seconds < math.pow(10, 2):
return 0
if seconds < math.pow(10, 4):
return 1
if seconds < math.pow(10, 6):
return 2
if seconds < math.pow(10, 8):
return 3
return 4
# ------------------------------------------------------------------------------
# entropy calcs -- one function per match pattern ------------------------------
# ------------------------------------------------------------------------------
def calc_entropy(match):
if 'entropy' in match: return match['entropy']
if match['pattern'] == 'repeat':
entropy_func = repeat_entropy
elif match['pattern'] == 'sequence':
entropy_func = sequence_entropy
elif match['pattern'] == 'digits':
entropy_func = digits_entropy
elif match['pattern'] == 'year':
entropy_func = year_entropy
elif match['pattern'] == 'date':
entropy_func = date_entropy
elif match['pattern'] == 'spatial':
entropy_func = spatial_entropy
elif match['pattern'] == 'dictionary':
entropy_func = dictionary_entropy
match['entropy'] = entropy_func(match)
return match['entropy']
def repeat_entropy(match):
cardinality = calc_bruteforce_cardinality(match['token'])
return lg(cardinality * len(match['token']))
def sequence_entropy(match):
first_chr = match['token'][0]
if first_chr in ['a', '1']:
base_entropy = 1
else:
if first_chr.isdigit():
base_entropy = lg(10) # digits
elif first_chr.isalpha():
base_entropy = lg(26) # lower
else:
base_entropy = lg(26) + 1 # extra bit for uppercase
if not match['ascending']:
base_entropy += 1 # extra bit for descending instead of ascending
return base_entropy + lg(len(match['token']))
def digits_entropy(match):
return lg(math.pow(10, len(match['token'])))
NUM_YEARS = 119 # years match against 1900 - 2019
NUM_MONTHS = 12
NUM_DAYS = 31
def year_entropy(match):
return lg(NUM_YEARS)
def date_entropy(match):
if match['year'] < 100:
entropy = lg(NUM_DAYS * NUM_MONTHS * 100) # two-digit year
else:
entropy = lg(NUM_DAYS * NUM_MONTHS * NUM_YEARS) # four-digit year
if match['separator']:
entropy += 2 # add two bits for separator selection [/,-,.,etc]
return entropy
def spatial_entropy(match):
if match['graph'] in ['qwerty', 'dvorak']:
s = KEYBOARD_STARTING_POSITIONS
d = KEYBOARD_AVERAGE_DEGREE
else:
s = KEYPAD_STARTING_POSITIONS
d = KEYPAD_AVERAGE_DEGREE
possibilities = 0
L = len(match['token'])
t = match['turns']
# estimate the number of possible patterns w/ length L or less with t turns or less.
for i in range(2, L + 1):
possible_turns = min(t, i - 1)
for j in range(1, possible_turns+1):
x = binom(i - 1, j - 1) * s * math.pow(d, j)
possibilities += x
entropy = lg(possibilities)
# add extra entropy for shifted keys. (% instead of 5, A instead of a.)
# math is similar to extra entropy from uppercase letters in dictionary matches.
if 'shifted_count' in match:
S = match['shifted_count']
U = L - S # unshifted count
possibilities = sum(binom(S + U, i) for i in xrange(0, min(S, U) + 1))
entropy += lg(possibilities)
return entropy
def dictionary_entropy(match):
match['base_entropy'] = lg(match['rank']) # keep these as properties for display purposes
match['uppercase_entropy'] = extra_uppercase_entropy(match)
match['l33t_entropy'] = extra_l33t_entropy(match)
ret = match['base_entropy'] + match['uppercase_entropy'] + match['l33t_entropy']
return ret
START_UPPER = re.compile('^[A-Z][^A-Z]+$')
END_UPPER = re.compile('^[^A-Z]+[A-Z]$')
ALL_UPPER = re.compile('^[A-Z]+$')
def extra_uppercase_entropy(match):
word = match['token']
if word.islower():
return 0
# a capitalized word is the most common capitalization scheme,
# so it only doubles the search space (uncapitalized + capitalized): 1 extra bit of entropy.
# allcaps and end-capitalized are common enough too, underestimate as 1 extra bit to be safe.
for regex in [START_UPPER, END_UPPER, ALL_UPPER]:
if regex.match(word):
return 1
# Otherwise calculate the number of ways to capitalize U+L uppercase+lowercase letters with U uppercase letters or
# less. Or, if there's more uppercase than lower (for e.g. PASSwORD), the number of ways to lowercase U+L letters
# with L lowercase letters or less.
upp_len = len([x for x in word if x.isupper()])
low_len = len([x for x in word if x.islower()])
possibilities = sum(binom(upp_len + low_len, i) for i in range(0, min(upp_len, low_len) + 1))
return lg(possibilities)
def extra_l33t_entropy(match):
if 'l33t' not in match or not match['l33t']:
return 0
possibilities = 0
for subbed, unsubbed in match['sub'].items():
sub_len = len([x for x in match['token'] if x == subbed])
unsub_len = len([x for x in match['token'] if x == unsubbed])
possibilities += sum(binom(unsub_len + sub_len, i) for i in range(0, min(unsub_len, sub_len) + 1))
# corner: return 1 bit for single-letter subs, like 4pple -> apple, instead of 0.
if possibilities <= 1:
return 1
return lg(possibilities)
# utilities --------------------------------------------------------------------
def calc_bruteforce_cardinality(password):
lower, upper, digits, symbols = 0, 0, 0, 0
for char in password:
if char.islower():
lower = 26
elif char.isdigit():
digits = 10
elif char.isupper():
upper = 26
else:
symbols = 33
cardinality = lower + digits + upper + symbols
return cardinality
def display_time(seconds):
minute = 60
hour = minute * 60
day = hour * 24
month = day * 31
year = month * 12
century = year * 100
if seconds < minute:
return 'instant'
elif seconds < hour:
return str(1 + math.ceil(seconds / minute)) + " minutes"
elif seconds < day:
return str(1 + math.ceil(seconds / hour)) + " hours"
elif seconds < month:
return str(1 + math.ceil(seconds / day)) + " days"
elif seconds < year:
return str(1 + math.ceil(seconds / month)) + " months"
elif seconds < century:
return str(1 + math.ceil(seconds / year)) + " years"
else:
return 'centuries'
|