File: writer.go

package info (click to toggle)
golang-github-apache-arrow-go 18.2.0-1
  • links: PTS, VCS
  • area: main
  • in suites: forky, sid, trixie
  • size: 32,200 kB
  • sloc: asm: 477,547; ansic: 5,369; cpp: 759; sh: 585; makefile: 319; python: 190; sed: 5
file content (101 lines) | stat: -rw-r--r-- 2,800 bytes parent folder | download
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
// Licensed to the Apache Software Foundation (ASF) under one
// or more contributor license agreements.  See the NOTICE file
// distributed with this work for additional information
// regarding copyright ownership.  The ASF licenses this file
// to you under the Apache License, Version 2.0 (the
// "License"); you may not use this file except in compliance
// with the License.  You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

package arrjson

import (
	"fmt"
	"io"

	"github.com/apache/arrow-go/v18/arrow"
	"github.com/apache/arrow-go/v18/arrow/array"
	"github.com/apache/arrow-go/v18/arrow/arrio"
	"github.com/apache/arrow-go/v18/arrow/internal/dictutils"
	"github.com/apache/arrow-go/v18/internal/json"
)

const (
	jsonIndent    = "  "
	jsonPrefix    = "  "
	jsonRecPrefix = "    "
)

type rawJSON struct {
	Schema       Schema       `json:"schema"`
	Records      []Record     `json:"batches"`
	Dictionaries []Dictionary `json:"dictionaries,omitempty"`
}

type Writer struct {
	w io.Writer

	nrecs  int64
	raw    rawJSON
	mapper dictutils.Mapper
}

func NewWriter(w io.Writer, schema *arrow.Schema) (*Writer, error) {
	ww := &Writer{
		w: w,
	}
	ww.mapper.ImportSchema(schema)
	ww.raw.Schema = schemaToJSON(schema, &ww.mapper)
	ww.raw.Records = make([]Record, 0)
	return ww, nil
}

func (w *Writer) Write(rec arrow.Record) error {
	if w.nrecs == 0 {
		pairs, err := dictutils.CollectDictionaries(rec, &w.mapper)
		if err != nil {
			return err
		}

		if len(pairs) > 0 {
			w.raw.Dictionaries = make([]Dictionary, 0, len(pairs))
		}

		for _, p := range pairs {
			defer p.Dict.Release()
			sc := arrow.NewSchema([]arrow.Field{{Name: fmt.Sprintf("DICT%d", p.ID), Type: p.Dict.DataType(), Nullable: true}}, nil)
			dummy := array.NewRecord(sc, []arrow.Array{p.Dict}, int64(p.Dict.Len()))
			defer dummy.Release()
			w.raw.Dictionaries = append(w.raw.Dictionaries, Dictionary{ID: p.ID, Data: recordToJSON(dummy)})
		}
	}

	w.raw.Records = append(w.raw.Records, recordToJSON(rec))
	w.nrecs++
	return nil
}

func (w *Writer) Close() error {
	if w.w == nil {
		return nil
	}

	enc := json.NewEncoder(w.w)
	enc.SetIndent("", jsonIndent)
	// ensure that we don't convert <, >, !, etc. to their unicode equivalents
	// in the output json since we aren't using this in an HTML context so that
	// we can make sure that the json files match.
	enc.SetEscapeHTML(false)
	return enc.Encode(w.raw)
}

var (
	_ arrio.Writer = (*Writer)(nil)
)