File: transformer.rb

package info (click to toggle)
ruby-ipynbdiff 0.4.7-1
  • links: PTS, VCS
  • area: main
  • in suites: bookworm, forky, sid, trixie
  • size: 148 kB
  • sloc: ruby: 363; makefile: 5
file content (101 lines) | stat: -rw-r--r-- 3,108 bytes parent folder | download
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
# frozen_string_literal: true

module IpynbDiff
  class InvalidNotebookError < StandardError
  end

  # Returns a markdown version of the Jupyter Notebook
  class Transformer
    require 'json'
    require 'yaml'
    require 'output_transformer'
    require 'symbolized_markdown_helper'
    require 'ipynb_symbol_map'
    require 'transformed_notebook'
    include SymbolizedMarkdownHelper

    @include_frontmatter = true
    @objects_to_ignore = ['application/javascript', 'application/vnd.holoviews_load.v0+json']

    def initialize(include_frontmatter: true, hide_images: false)
      @include_frontmatter = include_frontmatter
      @hide_images = hide_images
      @output_transformer = OutputTransformer.new(hide_images: hide_images)
    end

    def validate_notebook(notebook)
      notebook_json = JSON.parse(notebook)

      return notebook_json if notebook_json.key?('cells')

      raise InvalidNotebookError
    rescue JSON::ParserError
      raise InvalidNotebookError
    end

    def transform(notebook)
      return TransformedNotebook.new unless notebook

      notebook_json = validate_notebook(notebook)
      transformed = transform_document(notebook_json)
      symbol_map = IpynbSymbolMap.parse(notebook)

      TransformedNotebook.new(transformed, symbol_map)
    end

    def transform_document(notebook)
      symbol = JsonSymbol.new('.cells')

      transformed_blocks = notebook['cells'].map.with_index do |cell, idx|
        decorate_cell(transform_cell(cell, notebook, symbol / idx), cell, symbol / idx)
      end

      transformed_blocks.prepend(transform_metadata(notebook)) if @include_frontmatter
      transformed_blocks.flatten
    end

    def decorate_cell(rows, cell, symbol)
      tags = cell['metadata']&.fetch('tags', [])
      type = cell['cell_type'] || 'raw'

      [
        _(symbol, %(%% Cell type:#{type} id:#{cell['id']} tags:#{tags&.join(',')})),
        _,
        rows,
        _
      ]
    end

    def transform_cell(cell, notebook, symbol)
      cell['cell_type'] == 'code' ? transform_code_cell(cell, notebook, symbol) : transform_text_cell(cell, symbol)
    end

    def transform_code_cell(cell, notebook, symbol)
      [
        _(symbol / 'source', %(``` #{notebook.dig('metadata', 'kernelspec', 'language') || ''})),
        symbolize_array(symbol / 'source', cell['source'], &:rstrip),
        _(nil, '```'),
        cell['outputs'].map.with_index do |output, idx|
          @output_transformer.transform(output, symbol / ['outputs', idx])
        end
      ]
    end

    def transform_text_cell(cell, symbol)
      symbolize_array(symbol / 'source', cell['source'], &:rstrip)
    end

    def transform_metadata(notebook_json)
      as_yaml = {
        'jupyter' => {
          'kernelspec' => notebook_json['metadata']['kernelspec'],
          'language_info' => notebook_json['metadata']['language_info'],
          'nbformat' => notebook_json['nbformat'],
          'nbformat_minor' => notebook_json['nbformat_minor']
        }
      }.to_yaml

      as_yaml.split("\n").map { |l| _(nil, l) }.append(_(nil, '---'), _)
    end
  end
end