File: llm.py

package info (click to toggle)
pytorch 2.6.0%2Bdfsg-8
  • links: PTS, VCS
  • area: main
  • in suites: forky, sid
  • size: 161,672 kB
  • sloc: python: 1,278,832; cpp: 900,322; ansic: 82,710; asm: 7,754; java: 3,363; sh: 2,811; javascript: 2,443; makefile: 597; ruby: 195; xml: 84; objc: 68
file content (55 lines) | stat: -rw-r--r-- 1,864 bytes parent folder | download | duplicates (3)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
from __future__ import annotations

import json
import os
import re
from collections import defaultdict
from pathlib import Path
from typing import Any

from tools.stats.import_test_stats import ADDITIONAL_CI_FILES_FOLDER
from tools.testing.target_determination.heuristics.interface import (
    HeuristicInterface,
    TestPrioritizations,
)
from tools.testing.target_determination.heuristics.utils import normalize_ratings
from tools.testing.test_run import TestRun


REPO_ROOT = Path(__file__).resolve().parent.parent.parent.parent.parent


class LLM(HeuristicInterface):
    def __init__(self, **kwargs: dict[str, Any]) -> None:
        super().__init__(**kwargs)

    def get_prediction_confidence(self, tests: list[str]) -> TestPrioritizations:
        critical_tests = self.get_mappings()
        filter_valid_tests = {
            TestRun(test): score
            for test, score in critical_tests.items()
            if test in tests
        }
        normalized_scores = normalize_ratings(filter_valid_tests, 0.25)
        return TestPrioritizations(tests, normalized_scores)

    def get_mappings(self) -> dict[str, float]:
        path = (
            REPO_ROOT
            / ADDITIONAL_CI_FILES_FOLDER
            / "llm_results/mappings/indexer-files-gitdiff-output.json"
        )
        if not os.path.exists(path):
            print(f"could not find path {path}")
            return {}
        with open(path) as f:
            # Group by file
            r = defaultdict(list)
            for key, value in json.load(f).items():
                re_match = re.match("(.*).py", key)
                if re_match:
                    file = re_match.group(1)
                    r[file].append(value)
            # Average the scores for each file
            r = {file: sum(scores) / len(scores) for file, scores in r.items()}
            return r