1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123
|
from typing import List, Tuple
import torch
def greedy_knapsack(
memory: List[float], runtimes: List[float], max_memory: float
) -> Tuple[float, List[int], List[int]]:
n = len(runtimes)
items = list(range(n))
# Sort items based on the ratio of runtime to memory in descending order
items = sorted(items, key=lambda i: runtimes[i] / memory[i], reverse=True)
total_memory = 0.0
total_runtime = 0.0
items_to_save = []
items_to_allow_recomputing = []
for i in items:
if total_memory + memory[i] <= max_memory:
total_memory += memory[i]
total_runtime += runtimes[i]
items_to_save.append(i)
else:
items_to_allow_recomputing.append(i)
return total_runtime, items_to_save, items_to_allow_recomputing
def ilp_knapsack(
memory: List[float], runtimes: List[float], max_memory: float
) -> Tuple[float, List[int], List[int]]:
import numpy as np
try:
from scipy.optimize import Bounds, LinearConstraint, milp
except ImportError:
raise RuntimeError(
"To use the ILP for memory budget checkpointing you need to install scipy"
) from None
np_memory = np.array(memory)
np_runtimes = np.array(runtimes)
c = -np_runtimes # type: ignore[operator]
memory_constraint = LinearConstraint(A=np_memory, ub=np.array(max_memory))
constraints = [memory_constraint]
integrality = np.ones_like(c)
res = milp(
c=c, constraints=constraints, integrality=integrality, bounds=Bounds(0, 1)
)
if not res.success:
raise RuntimeError("Somehow scipy solving failed")
items_to_save = []
items_to_allow_recomputing = []
for idx, i in enumerate(res.x):
if i == 1:
items_to_save.append(idx)
else:
items_to_allow_recomputing.append(idx)
return -res.fun, items_to_save, items_to_allow_recomputing
def dp_knapsack(
memory: List[float], runtime: List[float], max_memory: float
) -> Tuple[float, List[int], List[int]]:
# Scaling factor to convert floating point weights to integers
S = 10000
# Quantize the memory weights
quantized_memory = torch.tensor(
[int(round(m * S)) for m in memory], dtype=torch.long, device="cpu"
)
runtimes = torch.tensor(runtime, dtype=torch.float32, device="cpu")
# Quantized pseudopolynomial DP for 0-1 Knapsack
quantized_max_memory = int(round(max_memory * S))
n = len(memory)
# Initialize the DP table
# TODO(chilli): I think if needed, this memory can be optimized with sliding
# window trick + Hirschberg trick:
# https://codeforces.com/blog/entry/47247?#comment-316200
dp = torch.zeros(
(n + 1, quantized_max_memory + 1), dtype=torch.float32, device="cpu"
)
for i in range(1, n + 1):
current_memory = quantized_memory[i - 1]
current_runtime = runtimes[i - 1]
# Copy the previous row
dp[i, :] = dp[i - 1, :]
# Update dp[i, j] for all j >= current_memory
if current_memory == 0:
dp[i, :] = dp[i - 1, :] + current_runtime
else:
dp[i, current_memory:] = torch.maximum(
dp[i - 1, current_memory:],
dp[i - 1, :-current_memory] + current_runtime,
)
# Backtrack to find the items included in the knapsack
saved_items = []
recomputable_items = []
j: int = quantized_max_memory
for i in range(n, 0, -1):
if dp[i][j] != dp[i - 1][j]:
saved_items.append(i - 1) # Include this item (indexing from 0)
j -= int(quantized_memory[i - 1].item())
else:
recomputable_items.append(i - 1)
saved_items.reverse() # To get items in the order they were added
# The maximum runtime that can be achieved within the max_memory constraint
max_runtime = dp[n][quantized_max_memory].item()
return max_runtime, saved_items, recomputable_items
|