1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279
|
/* This Source Code Form is subject to the terms of the Mozilla Public
* License, v. 2.0. If a copy of the MPL was not distributed with this
* file, You can obtain one at http://mozilla.org/MPL/2.0/. */
const {
SmartTabGroupingManager,
CLUSTER_METHODS,
ANCHOR_METHODS,
getBestAnchorClusterInfo,
ClusterRepresentation,
SMART_TAB_GROUPING_CONFIG,
isSearchTab,
} = ChromeUtils.importESModule(
"moz-src:///browser/components/tabbrowser/SmartTabGrouping.sys.mjs"
);
/**
* Checks if numbers are close up to decimalPoints decimal points
*
* @param {number} a
* @param {number} b
* @param {number} decimalPoints
* @returns {boolean} True if numbers are similar
*/
function numberLooseEquals(a, b, decimalPoints = 2) {
return a.toFixed(decimalPoints) === b.toFixed(decimalPoints);
}
/**
* Compares two vectors up to decimalPoints decimal points
* Returns true if all items the same up to decimalPoints threshold
*
* @param {number[]} a
* @param {number[]} b
* @param {number} decimalPoints
* @returns {boolean} True if vectors are similar
*/
function vectorLooseEquals(a, b, decimalPoints = 2) {
return a.every(
(item, index) =>
item.toFixed(decimalPoints) === b[index].toFixed(decimalPoints)
);
}
/**
* Extremely simple generator deterministic seeded list of numbers between
* 0 and 1 for use of tests in place of a true random generator
*
* @param {number} seed
* @returns {function(): number}
*/
function simpleNumberSequence(seed = 0) {
const values = [
0.42, 0.145, 0.5, 0.9234, 0.343, 0.1324, 0.8343, 0.534, 0.634, 0.3233,
];
let counter = Math.floor(seed) % values.length;
return () => {
counter = (counter + 1) % values.length;
return values[counter];
};
}
/**
* Utility function to shuffle an array, using a random
*
* @param {object[]} array of items to shuffle
* @param {Function} randFunc function that returns between 0 and 1
*/
function shuffleArray(array, randFunc) {
randFunc = randFunc ?? Math.random;
for (let i = array.length - 1; i >= 0; i--) {
const j = Math.floor(randFunc() * (i + 1));
[array[i], array[j]] = [array[j], array[i]];
}
}
/**
* Returns dict that averages input values
*
* @param {object[]} itemArray List of dicts, each with values to average
* @returns {object} Object with average of values passed in itemArray
*/
function averageStatsValues(itemArray) {
const result = {};
if (itemArray.length === 0) {
return result;
}
for (const key of Object.keys(itemArray[0])) {
let total = 0.0;
itemArray.forEach(a => (total += a[key]));
result[key] = total / itemArray.length;
}
return result;
}
/**
* Read tsv file from string
*
* @param {string} tsvString string to read from
* @returns {object} Object with parsed tsv string
*/
function parseTsvStructured(tsvString) {
const rows = tsvString.trim().split("\n");
const keys = rows[0].split("\t");
const arrayOfDicts = rows.slice(1).map(row => {
const values = row.split("\t");
// Map keys to corresponding values
const dict = {};
keys.forEach((key, index) => {
dict[key] = values[index];
});
return dict;
});
return arrayOfDicts;
}
/**
* Read tsv string with embeddings
*
* @param {string} tsvString string with embeddings present
* @returns {object} Object containing the embeddings
*/
function parseTsvEmbeddings(tsvString) {
const rows = tsvString.trim().split("\n");
return rows.map(row => {
return row.split("\t").map(value => parseFloat(value));
});
}
/**
*
* @param {string} clusterMethod kmeans or kmeans with anchor
* @param {string} umapMethod umap or dbscan
* @param {object[]} tabs tabs to cluster
* @param {object[]} embeddings precomputed embeddings for the tabs
* @param {number} iterations number of iterations before stopping clustering
* @param {number[]} preGroupedTabIndices indices of tabs that are present in the group
* @param {string} anchorMethod fixed or drift anchor methods
* @param {number} silBoost what value to multiply silhouette score
* @returns {Promise<{object}>} average of metric results
*/
async function testAugmentGroup(
clusterMethod,
umapMethod,
tabs,
embeddings,
iterations = 1,
preGroupedTabIndices,
anchorMethod = ANCHOR_METHODS.FIXED,
silBoost = undefined
) {
const groupManager = new SmartTabGroupingManager();
groupManager.setAnchorMethod(anchorMethod);
if (silBoost !== undefined) {
groupManager.setSilBoost(silBoost);
}
const randFunc = simpleNumberSequence();
groupManager.setDataTitleKey("title");
groupManager.setClusteringMethod(clusterMethod);
groupManager.setDimensionReductionMethod(umapMethod);
const allScores = [];
for (let i = 0; i < iterations; i++) {
const groupingResult = await groupManager.generateClusters(
tabs,
embeddings,
0,
randFunc,
preGroupedTabIndices
);
const titleKey = "title";
const centralClusterTitles = new Set(
groupingResult.getAnchorCluster().tabs.map(a => a[titleKey])
);
groupingResult.getAnchorCluster().print();
const anchorTitleSet = new Set(
preGroupedTabIndices.map(a => tabs[a][titleKey])
);
Assert.equal(
centralClusterTitles.intersection(anchorTitleSet).size,
anchorTitleSet.size,
`All anchor indices in target cluster`
);
const scoreInfo = groupingResult.getAccuracyStatsForCluster(
"smart_group_label",
groupingResult.getAnchorCluster().tabs[0].smart_group_label
);
allScores.push(scoreInfo);
}
return averageStatsValues(allScores);
}
/**
* Runs clustering test with multiple anchor tabs
*
* @param {object[]} data tabs to run test on
* @param {object []} precomputedEmbeddings embeddings for the tabs
* @param {number[]} anchorGroupIndices indices of tabs already present in the group
* @param {string} anchorMethod fixed or drift anchor method
* @param {number} silBoost value with which to boost silhouette score
* @returns {Promise<{}|null>} metric stats from running the clustering test
*/
async function runAnchorTabTest(
data,
precomputedEmbeddings = null,
anchorGroupIndices,
anchorMethod = ANCHOR_METHODS.FIXED,
silBoost = undefined
) {
const testParams = [[CLUSTER_METHODS.KMEANS]];
let scoreInfo;
for (let testP of testParams) {
scoreInfo = await testAugmentGroup(
testP[0],
testP[1],
data,
precomputedEmbeddings,
1,
anchorGroupIndices,
anchorMethod,
silBoost
);
}
if (testParams.length === 1) {
return scoreInfo;
}
return null;
}
/**
* Fetches a local file from prefix and filename
*
* @param {string} host_prefix root data folder path
* @param {string} filename name of file
* @returns {Promise}
*/
function fetchFile(host_prefix, filename) {
return new Promise((resolve, reject) => {
const xhr = new XMLHttpRequest();
// const url = `${HOST_PREFIX}${filename}`;
const url = `${host_prefix}${filename}`;
xhr.open("GET", url, true);
xhr.onload = () => {
if (xhr.status === 200) {
resolve(xhr.responseText);
} else {
reject(new Error(`Failed to fetch data: ${xhr.statusText}`));
}
};
xhr.onerror = () => reject(new Error(`Network error getting ${url}`));
xhr.send();
});
}
/**
* Creates a mock tab object with a mocked linkedBrowser,
* simulating the tab data structure
*
* @param {object} options
* @param {string|null} options.searchURL - The value to return from getAttribute("triggeringSearchEngineURL").
* @param {string} options.currentURL - The current URI of the tab's linked browser.
* @param {string|null} options.title - Title of page
* @returns {object} A mock tab object shaped like a real Firefox tab for testing.
*/
function createMockTab({ searchURL, currentURL, title }) {
return {
linkedBrowser: {
getAttribute(name) {
if (name === "triggeringSearchEngineURL") {
return searchURL;
}
return null;
},
currentURI: {
spec: currentURL,
},
},
label: title,
};
}
|