File: ml_samples_featurestore.py

package info (click to toggle)
python-azure 20250603%2Bgit-1
  • links: PTS, VCS
  • area: main
  • in suites: forky, sid, trixie
  • size: 851,724 kB
  • sloc: python: 7,362,925; ansic: 804; javascript: 287; makefile: 195; sh: 145; xml: 109
file content (141 lines) | stat: -rw-r--r-- 5,650 bytes parent folder | download
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
# coding: utf-8

# -------------------------------------------------------------------------
# Copyright (c) Microsoft Corporation. All rights reserved.
# Licensed under the MIT License. See License.txt in the project root for
# license information.
# --------------------------------------------------------------------------

"""
FILE: ml_samples_featurestore.py
DESCRIPTION:
    These samples demonstrate different ways to configure Feature Store and related resources.
USAGE:
    python ml_samples_featurestore.py

"""

import os


class FeatureStoreConfigurationOptions(object):
    def feature_store(self):
        from azure.ai.ml import MLClient
        from azure.identity import DefaultAzureCredential

        subscription_id = os.environ["AZUREML_ARM_SUBSCRIPTION"]
        resource_group = os.environ["AZUREML_ARM_RESOURCEGROUP"]
        storage_account_name = "<FEATURE_STORAGE_ACCOUNT_NAME>"
        storage_file_system_name = "offlinestore"
        redis_cache_name = "onlinestore"
        credential = DefaultAzureCredential()

        ml_client = MLClient(
            credential=credential,
            subscription_id=subscription_id,
            resource_group_name=resource_group,
        )

        # [START configure_feature_store_settings]
        from azure.ai.ml.entities import ComputeRuntime, FeatureStoreSettings

        offline_store_target = f"/subscriptions/{subscription_id}/resourceGroups/{resource_group}/providers/Microsoft.Storage/storageAccounts/{storage_account_name}/blobServices/default/containers/{storage_file_system_name}"

        online_store_target = f"/subscriptions/{subscription_id}/resourceGroups/{resource_group}/providers/Microsoft.Cache/Redis/{redis_cache_name}"

        FeatureStoreSettings(
            compute_runtime=ComputeRuntime(spark_runtime_version="3.4.0"),
            offline_store_connection_name=offline_store_target,
            online_store_connection_name=online_store_target,
        )
        # [END configure_feature_store_settings]

        # [START create_feature_store]
        from azure.ai.ml.entities import FeatureStore

        featurestore_name = "my-featurestore"
        featurestore_location = "eastus"
        featurestore = FeatureStore(name=featurestore_name, location=featurestore_location)

        # wait for featurestore creation
        fs_poller = ml_client.feature_stores.begin_create(featurestore, update_dependent_resources=True)
        print(fs_poller.result())
        # [END create_feature_store]

        featurestore_client = MLClient(
            credential=credential,
            subscription_id=subscription_id,
            resource_group_name=resource_group,
            workspace_name=featurestore_name,
        )

        # [START configure_feature_store_entity]
        from azure.ai.ml.entities import DataColumn, DataColumnType, FeatureStoreEntity

        account_column = DataColumn(name="accountID", type=DataColumnType.STRING)

        account_entity_config = FeatureStoreEntity(
            name="account",
            version="1",
            index_columns=[account_column],
            stage="Development",
            description="This entity represents user account index key accountID.",
            tags={"data_type": "nonPII"},
        )

        # wait for featurestore entity creation
        fs_entity_poller = featurestore_client.feature_store_entities.begin_create_or_update(account_entity_config)
        print(fs_entity_poller.result())
        # [END configure_feature_store_entity]

        # [START configure_feature_set]
        from azure.ai.ml.entities import FeatureSet, FeatureSetSpecification

        transaction_fset_config = FeatureSet(
            name="transactions",
            version="1",
            description="7-day and 3-day rolling aggregation of transactions featureset",
            entities=["azureml:account:1"],
            stage="Development",
            specification=FeatureSetSpecification(path="../azure-ai-ml/tests/test_configs/feature_set/code_sample/"),
            tags={"data_type": "nonPII"},
        )

        feature_set_poller = featurestore_client.feature_sets.begin_create_or_update(transaction_fset_config)
        print(feature_set_poller.result())
        # [END configure_feature_set]

        # [START configure_materialization_store]
        from azure.ai.ml.entities import ManagedIdentityConfiguration, MaterializationStore

        gen2_container_arm_id = "/subscriptions/{sub_id}/resourceGroups/{rg}/providers/Microsoft.Storage/storageAccounts/{account}/blobServices/default/containers/{container}".format(
            sub_id=subscription_id,
            rg=resource_group,
            account=storage_account_name,
            container=storage_file_system_name,
        )

        offline_store = MaterializationStore(
            type="azure_data_lake_gen2",
            target=gen2_container_arm_id,
        )

        # Must define materialization identity when defining offline/online store.
        fs = FeatureStore(
            name=featurestore_name,
            offline_store=offline_store,
            materialization_identity=ManagedIdentityConfiguration(
                client_id="<YOUR-UAI-CLIENT-ID>",
                resource_id="<YOUR-UAI-RESOURCE-ID>",
                principal_id="<YOUR-UAI-PRINCIPAL-ID>",
            ),
        )
        # [END configure_materialization_store]

        # Clean up created feature store for sample
        ml_client.feature_stores.begin_delete(featurestore.name, delete_dependent_resources=True)


if __name__ == "__main__":
    sample = FeatureStoreConfigurationOptions()
    sample.feature_store()