1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91
|
# coding: utf-8
# -------------------------------------------------------------------------
# Copyright (c) Microsoft Corporation. All rights reserved.
# Licensed under the MIT License. See License.txt in the project root for
# license information.
# --------------------------------------------------------------------------
"""
FILE: datalake_samples_query.py
DESCRIPTION:
This sample demos how to read quick query data.
USAGE: python datalake_samples_query.py
Set the environment variables with your own values before running the sample.
1) DATALAKE_STORAGE_CONNECTION_STRING - the connection string to your storage account
"""
import os
import sys
from azure.storage.filedatalake import DataLakeServiceClient, DelimitedJsonDialect, DelimitedTextDialect
CSV_DATA = b'Service,Package,Version,RepoPath,MissingDocs\r\nApp Configuration,' \
b'azure-data-appconfiguration,1,appconfiguration,FALSE\r\nEvent Hubs' \
b'\r\nEvent Hubs - Azure Storage CheckpointStore,' \
b'azure-messaging-eventhubs-checkpointstore-blob,1.0.1,eventhubs,FALSE\r\nIdentity,azure-identity,' \
b'1.1.0-beta.1,identity,FALSE\r\nKey Vault - Certificates,azure-security-keyvault-certificates,' \
b'4.0.0,keyvault,FALSE\r\nKey Vault - Keys,azure-security-keyvault-keys,4.2.0-beta.1,keyvault,' \
b'FALSE\r\nKey Vault - Secrets,azure-security-keyvault-secrets,4.1.0,keyvault,FALSE\r\n' \
b'Storage - Blobs,azure-storage-blob,12.4.0,storage,FALSE\r\nStorage - Blobs Batch,' \
b'azure-storage-blob-batch,12.4.0-beta.1,storage,FALSE\r\nStorage - Blobs Cryptography,' \
b'azure-storage-blob-cryptography,12.4.0,storage,FALSE\r\nStorage - File Shares,' \
b'azure-storage-file-share,12.2.0,storage,FALSE\r\nStorage - Queues,' \
b'azure-storage-queue,12.3.0,storage,FALSE\r\nText Analytics,' \
b'azure-ai-textanalytics,1.0.0-beta.2,textanalytics,FALSE\r\nTracing,' \
b'azure-core-tracing-opentelemetry,1.0.0-beta.2,core,FALSE\r\nService,Package,Version,RepoPath,' \
b'MissingDocs\r\nApp Configuration,azure-data-appconfiguration,1.0.1,appconfiguration,FALSE\r\n' \
b'Event Hubs,azure-messaging-eventhubs,5.0.1,eventhubs,FALSE\r\n' \
b'Event Hubs - Azure Storage CheckpointStore,azure-messaging-eventhubs-checkpointstore-blob,' \
b'1.0.1,eventhubs,FALSE\r\nIdentity,azure-identity,1.1.0-beta.1,identity,FALSE\r\n' \
b'Key Vault - Certificates,azure-security-keyvault-certificates,4.0.0,keyvault,FALSE\r\n' \
b'Key Vault - Keys,azure-security-keyvault-keys,4.2.0-beta.1,keyvault,FALSE\r\n' \
b'Key Vault - Secrets,azure-security-keyvault-secrets,4.1.0,keyvault,FALSE\r\n' \
b'Storage - Blobs,azure-storage-blob,12.4.0,storage,FALSE\r\n' \
b'Storage - Blobs Batch,azure-storage-blob-batch,12.4.0-beta.1,storage,FALSE\r\n' \
b'Storage - Blobs Cryptography,azure-storage-blob-cryptography,12.4.0,storage,FALSE\r\n' \
b'Storage - File Shares,azure-storage-file-share,12.2.0,storage,FALSE\r\n' \
b'Storage - Queues,azure-storage-queue,12.3.0,storage,FALSE\r\n' \
b'Text Analytics,azure-ai-textanalytics,1.0.0-beta.2,textanalytics,FALSE\r\n' \
b'Tracing,azure-core-tracing-opentelemetry,1.0.0-beta.2,core,FALSE\r\n' \
b'Service,Package,Version,RepoPath,MissingDocs\r\n' \
b'App Configuration,azure-data-appconfiguration,1.0.1,appconfiguration,FALSE\r\n' \
b'Event Hubs,azure-messaging-eventhubs,5.0.1,eventhubs,FALSE\r\n'
def main():
try:
CONNECTION_STRING = os.environ['DATALAKE_STORAGE_CONNECTION_STRING']
except KeyError:
print("DATALAKE_STORAGE_CONNECTION_STRING must be set.")
sys.exit(1)
datalake_service_client = DataLakeServiceClient.from_connection_string(CONNECTION_STRING)
filesystem_name = "quickqueryfilesystem"
filesystem_client = datalake_service_client.get_file_system_client(filesystem_name)
try:
filesystem_client.create_file_system()
except:
pass
# [START query]
errors = []
def on_error(error):
errors.append(error)
# upload the csv file
file_client = datalake_service_client.get_file_client(filesystem_name, "csvfile")
file_client.upload_data(CSV_DATA, overwrite=True)
# select the second column of the csv file
query_expression = "SELECT _2 from DataLakeStorage"
input_format = DelimitedTextDialect(delimiter=',', quotechar='"', lineterminator='\n', escapechar="", has_header=False)
output_format = DelimitedJsonDialect(delimiter='\n')
reader = file_client.query_file(query_expression, on_error=on_error, file_format=input_format, output_format=output_format)
content = reader.readall()
# [END query]
print(content)
filesystem_client.delete_file_system()
if __name__ == "__main__":
main()
|