File: exampleProtoCriteo.proto

package info (click to toggle)
python-confluent-kafka 2.12.2-1
  • links: PTS, VCS
  • area: main
  • in suites: forky, sid
  • size: 4,232 kB
  • sloc: python: 36,571; ansic: 9,717; sh: 1,519; makefile: 198
file content (81 lines) | stat: -rw-r--r-- 2,139 bytes parent folder | download
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
syntax = "proto3";
package Criteo.Glup;
option java_package = "com.criteo.glup";

import "tests/integration/schema_registry/data/proto/metadata_proto.proto";
import "tests/integration/schema_registry/data/proto/common_proto.proto";

message ClickCas {
  option (contains_nullable_fields) = true;

  option (glup).producers = {
    kafka: true
  };

  option (glup).kafka = {
    topic: "glup_click_cas"
  };

  option (glup).dataset = {
    id: "click_cas"
    java_class: "com.criteo.glup.ClickCasProto$ClickCas"
    kind: TIMESERIES
    partition_scheme: PLATFORM_HOURLY
    owner: "enginejoins"
    retention_days: 390
    // Switch time: 2018-03-20-11 timestamp: 1521543600
    format {
      path: "/glup/datasets/click_cas/data/full/JSON_PAIL"
      file_format: JSON_PAIL
      priority: 100
      label: "FEDERATED_JSON_PAIL"
    }
    format {
      path: "/glup/datasets/click_cas/data/full/PROTOBUF_PARQUET"
      file_format: PROTOBUF_PARQUET
      priority: 50
      label: "FEDERATED_PROTOBUF_PARQUET"
    }
  };

  option (glup).hdfs = {
    import: {
      owner: "enginejoins"
      name: "click_cas"
      partitioning: PLATFORM_HOURLY

      generator {
        kafka2hdfs {
          topic: "glup_click_cas"
          output_dataset_id: "click_cas"
          output_format_label: "FEDERATED_JSON_PAIL"
        }
        to { env: PROD dc: PA4 }
      }

      // Transcoding to Parquet to prepare the migration
      generator {
        transcoding {
          input_dataset_id: "click_cas" input_dataset_label: "FEDERATED_JSON_PAIL"
          output_dataset_id: "click_cas" output_dataset_label: "FEDERATED_PROTOBUF_PARQUET"
        }
        to: { env: PROD    dc: PA4 }
      }

      view: {
        hive: { partitioning: PLATFORM_HOURLY }
      }
    }
  };

  Origin glup_origin = 1;
  Partition partition = 2;
  string uid = 5;
  // Schema field
  // (...)
  map<int32, bool> set_fields = 50010; // nullable tracking special field
  repeated ControlMessage.Watermark control_message = 2097151 [ (json).name = "__metadata" ]; // standard glup field

  reserved 70 to 73, 75;
  reserved "obsolete", "obsolete2";
}