File: transformer.rb

package info (click to toggle)
gitlab 17.6.5-19
  • links: PTS, VCS
  • area: main
  • in suites: sid
  • size: 629,368 kB
  • sloc: ruby: 1,915,304; javascript: 557,307; sql: 60,639; xml: 6,509; sh: 4,567; makefile: 1,239; python: 406
file content (113 lines) | stat: -rw-r--r-- 3,422 bytes parent folder | download
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
# frozen_string_literal: true

require 'json'
require 'yaml'
require 'ipynb_diff/output_transformer'
require 'ipynb_diff/symbolized_markdown_helper'
require 'ipynb_diff/symbol_map'
require 'ipynb_diff/transformed_notebook'
require 'oj'

module IpynbDiff
  InvalidNotebookError = Class.new(StandardError)

  # Returns a markdown version of the Jupyter Notebook
  class Transformer
    include SymbolizedMarkdownHelper

    @include_frontmatter = true

    def initialize(include_frontmatter: true, hide_images: false)
      @include_frontmatter = include_frontmatter
      @hide_images = hide_images
      @out_transformer = OutputTransformer.new(hide_images)
    end

    def validate_notebook(notebook)
      notebook_json = Oj::Parser.usual.parse(notebook)

      return notebook_json if notebook_json&.key?('cells')

      raise InvalidNotebookError
    rescue EncodingError, Oj::ParseError, JSON::ParserError
      raise InvalidNotebookError
    end

    def transform(notebook)
      return TransformedNotebook.new unless notebook

      notebook_json = validate_notebook(notebook)
      transformed = transform_document(notebook_json)
      symbol_map = SymbolMap.parse(notebook)

      TransformedNotebook.new(transformed, symbol_map)
    end

    def transform_document(notebook)
      symbol = JsonSymbol.new('.cells')

      transformed_blocks = notebook['cells'].map.with_index do |cell, idx|
        decorate_cell(transform_cell(cell, notebook, symbol / idx), cell, symbol / idx)
      end

      transformed_blocks.prepend(transform_metadata(notebook)) if @include_frontmatter
      transformed_blocks.flatten
    end

    def decorate_cell(rows, cell, symbol)
      tags = cell['metadata']&.fetch('tags', [])
      type = cell['cell_type'] || 'raw'

      [
        ___(symbol, %(%% Cell type:#{type} id:#{cell['id']} tags:#{tags&.join(',')})),
        ___,
        rows,
        ___
      ]
    end

    def transform_cell(cell, notebook, symbol)
      cell['cell_type'] == 'code' ? transform_code_cell(cell, notebook, symbol) : transform_text_cell(cell, symbol)
    end

    def transform_code_cell(cell, notebook, symbol)
      [
        ___(symbol / 'source', %(``` #{notebook.dig('metadata', 'kernelspec', 'language') || ''})),
        symbolize_array(symbol / 'source', cell['source'], &:rstrip),
        ___(nil, '```'),
        transform_outputs(cell['outputs'], symbol)
      ]
    end

    def transform_outputs(outputs, symbol)
      return [] unless outputs

      transformed = outputs.map
                           .with_index { |output, i| @out_transformer.transform(output, symbol / ['outputs', i]) }
                           .compact
                           .map { |el| [___, el] }

      [
        transformed.empty? ? [] : [___, ___(symbol / 'outputs', '%% Output')],
        transformed
      ]
    end

    def transform_text_cell(cell, symbol)
      symbolize_array(symbol / 'source', cell['source'], &:rstrip)
    end

    def transform_metadata(notebook_json)
      as_yaml = {
        'jupyter' => {
          'kernelspec' => notebook_json['metadata']['kernelspec'],
          'language_info' => notebook_json['metadata']['language_info'],
          'nbformat' => notebook_json['nbformat'],
          'nbformat_minor' => notebook_json['nbformat_minor']
        }
      }.to_yaml

      as_yaml.split("\n").map { |l| ___(nil, l) }.append(___(nil, '---'), ___)
    end
  end
end