File: output_old_packages.py

package info (click to toggle)
python-azure 20250603%2Bgit-1
  • links: PTS, VCS
  • area: main
  • in suites: forky, sid, trixie
  • size: 851,724 kB
  • sloc: python: 7,362,925; ansic: 804; javascript: 287; makefile: 195; sh: 145; xml: 109
file content (172 lines) | stat: -rw-r--r-- 5,699 bytes parent folder | download
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
# --------------------------------------------------------------------------------------------
# Copyright (c) Microsoft Corporation. All rights reserved.
# Licensed under the MIT License. See License.txt in the project root for license information.
# --------------------------------------------------------------------------------------------

import os
import typing
import csv
import argparse
import pathlib
import glob
import datetime

import requests

from ci_tools.parsing import get_config_setting
from pypi_tools.pypi import PyPIClient

INACTIVE_CLASSIFIER = "Development Status :: 7 - Inactive"


def get_newer(current: datetime.date, contender: datetime.date) -> datetime.date:
    if current > contender:
        return current
    return contender


def write_csv(packages: typing.Mapping[str, str]) -> None:
    if not packages:
        print("No packages found.")
        return

    with open("./old_packages.csv", mode="w", newline="", encoding="utf-8") as file:
        writer = csv.writer(file)

        column_names = [
            "Package",
            "Last released version",
            "Last released date",
            "Status",
            "Downloads (last 90 days)"
        ]
        writer.writerow(column_names)

        for package, info in packages.items():
            writer.writerow([package, info["version"], info["date"], info["status"], info["downloads_90d"]])


def get_latest_release(
    project: typing.Mapping[str, typing.Any]
) -> typing.Mapping[str, str]:
    current = datetime.datetime(1970, 1, 1).date()

    for version, release in project["releases"].items():
        if not release:
            # somehow we release without whl/sdist?
            continue

        release_date = datetime.datetime.strptime(
            release[0]["upload_time"], "%Y-%m-%dT%H:%M:%S"
        ).date()
        if get_newer(current, release_date) == release_date:
            latest = {
                "version": version,
                "date": release_date,
                "status": project["info"]["classifiers"][0],
            }
            current = release_date
    return latest


def apply_filters(pkg_path: str, release: typing.Mapping[str, str]) -> bool:
    """Filter out packages that are marked as Inactive or have a verify_status_by date in the future. 
    If the package has no verify_status_by date, it is considered active.
    """
    if release["status"] == INACTIVE_CLASSIFIER:
        return False

    verify_status_by = get_config_setting(pkg_path, "verify_status_by", default=None)
    if verify_status_by is None:
        return True

    today = datetime.datetime.today().date()
    if get_newer(today, verify_status_by) == verify_status_by:
        return False

    return True


class PepyClient:
    """Client to interact with the Pepy API to fetch package download data."""

    def __init__(self, api_key: str):
        """Initialize the client with your API key - https://www.pepy.tech/pepy-api (register first)"""
        self.api_key = api_key

    def get_downloads_90d(self, package: str) -> int:
        """Get the total downloads in the last 90 days for a given package."""
        url = f"https://api.pepy.tech/api/v2/projects/{package}"
        headers = {"x-api-key": self.api_key}
        try:
            response = requests.get(url, headers=headers)
            response.raise_for_status()
            downloads_90d = response.json().get("downloads", {})
        except requests.RequestException as e:
            print(f"Request failed: {e}")
            return -1

        total_downloads_90d = sum(
            downloads
            for versions in downloads_90d.values()
            for downloads in versions.values()
        )

        return total_downloads_90d


if __name__ == "__main__":
    parser = argparse.ArgumentParser(description="Output old packages in the repo.")

    parser.add_argument(
        "-y",
        "--years",
        dest="years",
        help="How many years since last release. Defaults to 2.",
        type=int,
        default=2,
    )

    parser.add_argument(
        "-f",
        "--disable-filter",
        dest="filter",
        help="Disable the filter which removes Inactive packages and ones with verify_status_by dates in the future.",
        action="store_false",
    )

    args = parser.parse_args()
    sdk_path = pathlib.Path(__file__).parent.parent.parent / "sdk"
    service_directories = glob.glob(f"{sdk_path}/*/", recursive=True)
    pypi_client = PyPIClient()
    pepy_client = PepyClient(os.environ["PEPY_API_KEY"])
    old_packages = {}

    years = args.years
    timepoint = datetime.datetime.today().date() - datetime.timedelta(days=365 * years)

    for service in service_directories:
        package_paths = glob.glob(f"{service}*/", recursive=True)
        for package_path in package_paths:
            package_name = pathlib.Path(package_path).name
            if not package_name.startswith("azure"):
                continue

            pypi_project = pypi_client.project(package_name)
            if pypi_project.get("releases") is None:
                # not yet released
                continue

            latest_release = get_latest_release(pypi_project)

            if (
                get_newer(latest_release["date"], timepoint) == timepoint
            ):
                add_package = not args.filter or apply_filters(package_path, latest_release)
                if add_package:
                    old_packages[package_name] = latest_release
                    old_packages[package_name]["downloads_90d"] = (
                        pepy_client.get_downloads_90d(package_name)
                    )

    write_csv(old_packages)