File: protocol_0.go

package info (click to toggle)
golang-github-hydrogen18-stalecucumber 0.0~git20161112.0.1e918be-2
  • links: PTS, VCS
  • area: main
  • in suites: buster
  • size: 248 kB
  • sloc: python: 71; makefile: 2
file content (723 lines) | stat: -rw-r--r-- 18,652 bytes parent folder | download | duplicates (3)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
package stalecucumber

import "strconv"
import "fmt"
import "math/big"
import "errors"

//import "unicode/utf8"
import "unicode/utf16"

/**
Opcode: INT
Push an integer or bool.

      The argument is a newline-terminated decimal literal string.

      The intent may have been that this always fit in a short Python int,
      but INT can be generated in pickles written on a 64-bit box that
      require a Python long on a 32-bit box.  The difference between this
      and LONG then is that INT skips a trailing 'L', and produces a short
      int whenever possible.

      Another difference is due to that, when bool was introduced as a
      distinct type in 2.3, builtin names True and False were also added to
      2.2.2, mapping to ints 1 and 0.  For compatibility in both directions,
      True gets pickled as INT + "I01\n", and False as INT + "I00\n".
      Leading zeroes are never produced for a genuine integer.  The 2.3
      (and later) unpicklers special-case these and return bool instead;
      earlier unpicklers ignore the leading "0" and return the int.
      **
Stack before: []
Stack after: [int_or_bool]
**/
func (pm *PickleMachine) opcode_INT() error {
	str, err := pm.readString()
	if err != nil {
		return err
	}

	//check for boolean sentinels
	if len(str) == 2 {
		switch str {
		case "01":
			pm.push(true)
			return nil
		case "00":
			pm.push(false)
			return nil
		default:
		}
	}

	n, err := strconv.ParseInt(str, 10, 64)
	if err != nil {
		return err
	}

	pm.push(n)
	return nil
}

/**
Opcode: LONG
Push a long integer.

      The same as INT, except that the literal ends with 'L', and always
      unpickles to a Python long.  There doesn't seem a real purpose to the
      trailing 'L'.

      Note that LONG takes time quadratic in the number of digits when
      unpickling (this is simply due to the nature of decimal->binary
      conversion).  Proto 2 added linear-time (in C; still quadratic-time
      in Python) LONG1 and LONG4 opcodes.
      **
Stack before: []
Stack after: [long]
**/
func (pm *PickleMachine) opcode_LONG() error {
	i := new(big.Int)
	str, err := pm.readString()
	if err != nil {
		return err
	}
	if len(str) == 0 {
		return fmt.Errorf("String for LONG opcode cannot be zero length")
	}

	last := str[len(str)-1]
	if last != 'L' {
		return fmt.Errorf("String for LONG opcode must end with %q not %q", "L", last)
	}
	v := str[:len(str)-1]
	_, err = fmt.Sscan(v, i)
	if err != nil {
		return err
	}
	pm.push(i)
	return nil
}

/**
Opcode: STRING
Push a Python string object.

      The argument is a repr-style string, with bracketing quote characters,
      and perhaps embedded escapes.  The argument extends until the next
      newline character.
      **
Stack before: []
Stack after: [str]
**/

var unquoteInputs = []byte{0x27, 0x22, 0x0}

func (pm *PickleMachine) opcode_STRING() error {
	str, err := pm.readString()
	if err != nil {
		return err
	}

	//For whatever reason, the string is quoted. So the first and last character
	//should always be the single quote
	if len(str) < 2 {
		return fmt.Errorf("For STRING opcode, argument has invalid length %d", len(str))
	}

	if str[0] != '\'' || str[len(str)-1] != '\'' {
		return fmt.Errorf("For STRING opcode, argument has poorly formed value %q", str)
	}

	v := str[1 : len(str)-1]

	f := make([]rune, 0, len(v))

	for len(v) != 0 {
		var vr rune
		var replacement string
		for _, i := range unquoteInputs {
			vr, _, replacement, err = strconv.UnquoteChar(v, i)
			if err == nil {
				break
			}
		}

		if err != nil {
			c := v[0]
			return fmt.Errorf("Read thus far %q. Failed to unquote character %c error:%v", string(f), c, err)
		}
		v = replacement

		f = append(f, vr)
	}

	pm.push(string(f))
	return nil
}

/**
Opcode: NONE
Push None on the stack.**
Stack before: []
Stack after: [None]
**/
func (pm *PickleMachine) opcode_NONE() error {
	pm.push(PickleNone{})
	return nil
}

/**
Opcode: UNICODE
Push a Python Unicode string object.

      The argument is a raw-unicode-escape encoding of a Unicode string,
      and so may contain embedded escape sequences.  The argument extends
      until the next newline character.
      **
Stack before: []
Stack after: [unicode]
**/
func (pm *PickleMachine) opcode_UNICODE() error {
	str, err := pm.readBytes()
	if err != nil {
		return err
	}

	f := make([]rune, 0, len(str))

	var total int
	var consumed int
	total = len(str)
	for total != consumed {
		h := str[consumed]

		//Python 'raw-unicode-escape' doesnt
		//escape extended ascii
		if h > 127 {
			ea := utf16.Decode([]uint16{uint16(h)})
			f = append(f, ea...)
			consumed += 1
			continue
		}

		//Multibyte unicode points are escaped
		//so use "UnquoteChar" to handle those
		var vr rune
		for _, i := range unquoteInputs {
			pre := string(str[consumed:])
			var post string
			vr, _, post, err = strconv.UnquoteChar(pre, i)
			if err == nil {
				consumed += len(pre) - len(post)
				break
			}

		}

		if err != nil {
			c := str[0]
			return fmt.Errorf("Read thus far %q. Failed to unquote character %c error:%v", string(f), c, err)
		}

		f = append(f, vr)
	}

	pm.push(string(f))

	return nil
}

/**
Opcode: FLOAT
Newline-terminated decimal float literal.

      The argument is repr(a_float), and in general requires 17 significant
      digits for roundtrip conversion to be an identity (this is so for
      IEEE-754 double precision values, which is what Python float maps to
      on most boxes).

      In general, FLOAT cannot be used to transport infinities, NaNs, or
      minus zero across boxes (or even on a single box, if the platform C
      library can't read the strings it produces for such things -- Windows
      is like that), but may do less damage than BINFLOAT on boxes with
      greater precision or dynamic range than IEEE-754 double.
      **
Stack before: []
Stack after: [float]
**/
func (pm *PickleMachine) opcode_FLOAT() error {
	str, err := pm.readString()
	if err != nil {
		return err
	}
	var v float64
	_, err = fmt.Sscanf(str, "%f", &v)
	if err != nil {
		return err
	}
	pm.push(v)
	return nil
}

/**
Opcode: APPEND
Append an object to a list.

      Stack before:  ... pylist anyobject
      Stack after:   ... pylist+[anyobject]

      although pylist is really extended in-place.
      **
Stack before: [list, any]
Stack after: [list]
**/
func (pm *PickleMachine) opcode_APPEND() error {
	v, err := pm.pop()
	if err != nil {
		return err
	}

	listI, err := pm.pop()
	if err != nil {
		return err
	}

	list, ok := listI.([]interface{})
	if !ok {
		fmt.Errorf("Second item on top of stack must be of %T not %T", list, listI)
	}
	list = append(list, v)
	pm.push(list)
	return nil
}

/**
Opcode: LIST
Build a list out of the topmost stack slice, after markobject.

      All the stack entries following the topmost markobject are placed into
      a single Python list, which single list object replaces all of the
      stack from the topmost markobject onward.  For example,

      Stack before: ... markobject 1 2 3 'abc'
      Stack after:  ... [1, 2, 3, 'abc']
      **
Stack before: [mark, stackslice]
Stack after: [list]
**/
func (pm *PickleMachine) opcode_LIST() error {
	markIndex, err := pm.findMark()
	if err != nil {
		return err
	}
	v := make([]interface{}, 0)
	for i := markIndex + 1; i != len(pm.Stack); i++ {
		v = append(v, pm.Stack[i])
	}

	//Pop the values off the stack
	pm.popAfterIndex(markIndex)

	pm.push(v)
	return nil
}

/**
Opcode: TUPLE
Build a tuple out of the topmost stack slice, after markobject.

      All the stack entries following the topmost markobject are placed into
      a single Python tuple, which single tuple object replaces all of the
      stack from the topmost markobject onward.  For example,

      Stack before: ... markobject 1 2 3 'abc'
      Stack after:  ... (1, 2, 3, 'abc')
      **
Stack before: [mark, stackslice]
Stack after: [tuple]
**/
func (pm *PickleMachine) opcode_TUPLE() error {
	return pm.opcode_LIST()
}

/**
Opcode: DICT
Build a dict out of the topmost stack slice, after markobject.

      All the stack entries following the topmost markobject are placed into
      a single Python dict, which single dict object replaces all of the
      stack from the topmost markobject onward.  The stack slice alternates
      key, value, key, value, ....  For example,

      Stack before: ... markobject 1 2 3 'abc'
      Stack after:  ... {1: 2, 3: 'abc'}
      **
Stack before: [mark, stackslice]
Stack after: [dict]
**/
func (pm *PickleMachine) opcode_DICT() (err error) {
	defer func() {
		if r := recover(); r != nil {
			switch x := r.(type) {
			case string:
				err = errors.New(x)
			case error:
				err = x
			default:
				err = errors.New("Unknown panic")
			}
		}
	}()
	markIndex, err := pm.findMark()
	if err != nil {
		return err
	}

	v := make(map[interface{}]interface{})
	var key interface{}
	for i := markIndex + 1; i != len(pm.Stack); i++ {
		if key == nil {
			key = pm.Stack[i]
		} else {
			v[key] = pm.Stack[i]
			key = nil
		}
	}
	if key != nil {
		return fmt.Errorf("For opcode DICT stack after mark contained an odd number of items, this is not valid")
	}
	pm.popAfterIndex(markIndex)

	pm.push(v)
	return nil
}

/**
Opcode: SETITEM
Add a key+value pair to an existing dict.

      Stack before:  ... pydict key value
      Stack after:   ... pydict

      where pydict has been modified via pydict[key] = value.
      **
Stack before: [dict, any, any]
Stack after: [dict]
**/
func (pm *PickleMachine) opcode_SETITEM() (err error) {
	defer func() {
		if r := recover(); r != nil {
			switch x := r.(type) {
			case string:
				err = errors.New(x)
			case error:
				err = x
			default:
				err = errors.New("Unknown panic")
			}
		}
	}()
	v, err := pm.pop()
	if err != nil {
		return err
	}

	k, err := pm.pop()
	if err != nil {
		return err
	}

	dictI, err := pm.pop()
	if err != nil {
		return err
	}

	dict, ok := dictI.(map[interface{}]interface{})
	if !ok {
		return fmt.Errorf("For opcode SETITEM stack item 2 from top must be of type %T not %T", dict, dictI)
	}

	dict[k] = v
	pm.push(dict)

	return nil
}

/**
Opcode: POP
Discard the top stack item, shrinking the stack by one item.**
Stack before: [any]
Stack after: []
**/
func (pm *PickleMachine) opcode_POP() error {
	_, err := pm.pop()
	return err

}

/**
Opcode: DUP
Push the top stack item onto the stack again, duplicating it.**
Stack before: [any]
Stack after: [any, any]
**/
func (pm *PickleMachine) opcode_DUP() error {
	return ErrOpcodeNotImplemented
}

/**
Opcode: MARK
Push markobject onto the stack.

      markobject is a unique object, used by other opcodes to identify a
      region of the stack containing a variable number of objects for them
      to work on.  See markobject.doc for more detail.
      **
Stack before: []
Stack after: [mark]
**/
func (pm *PickleMachine) opcode_MARK() error {
	pm.lastMark = len(pm.Stack)
	pm.push(PickleMark{})
	return nil
}

/**
Opcode: GET
Read an object from the memo and push it on the stack.

      The index of the memo object to push is given by the newline-terminated
      decimal string following.  BINGET and LONG_BINGET are space-optimized
      versions.
      **
Stack before: []
Stack after: [any]
**/
func (pm *PickleMachine) opcode_GET() error {
	str, err := pm.readString()
	if err != nil {
		return err
	}

	index, err := strconv.Atoi(str)
	if err != nil {
		return err
	}

	v, err := pm.readFromMemo(int64(index))
	if err != nil {
		return err
	}

	//TODO test if the object we are about to push is mutable
	//if so it needs to be somehow deep copied first
	pm.push(v)
	return nil
}

/**
Opcode: PUT
Store the stack top into the memo.  The stack is not popped.

      The index of the memo location to write into is given by the newline-
      terminated decimal string following.  BINPUT and LONG_BINPUT are
      space-optimized versions.
      **
Stack before: []
Stack after: []
**/
func (pm *PickleMachine) opcode_PUT() error {
	if len(pm.Stack) < 1 {
		return ErrStackTooSmall
	}

	str, err := pm.readString()
	if err != nil {
		return err
	}

	idx, err := strconv.Atoi(str)
	if err != nil {
		return err
	}

	pm.storeMemo(int64(idx), pm.Stack[len(pm.Stack)-1])

	return nil
}

/**
Opcode: GLOBAL
Push a global object (module.attr) on the stack.

      Two newline-terminated strings follow the GLOBAL opcode.  The first is
      taken as a module name, and the second as a class name.  The class
      object module.class is pushed on the stack.  More accurately, the
      object returned by self.find_class(module, class) is pushed on the
      stack, so unpickling subclasses can override this form of lookup.
      **
Stack before: []
Stack after: [any]
**/
func (pm *PickleMachine) opcode_GLOBAL() error {
	//TODO push an object that represents the result of this operation
	return ErrOpcodeNotImplemented
}

/**
Opcode: REDUCE
Push an object built from a callable and an argument tuple.

      The opcode is named to remind of the __reduce__() method.

      Stack before: ... callable pytuple
      Stack after:  ... callable(*pytuple)

      The callable and the argument tuple are the first two items returned
      by a __reduce__ method.  Applying the callable to the argtuple is
      supposed to reproduce the original object, or at least get it started.
      If the __reduce__ method returns a 3-tuple, the last component is an
      argument to be passed to the object's __setstate__, and then the REDUCE
      opcode is followed by code to create setstate's argument, and then a
      BUILD opcode to apply  __setstate__ to that argument.

      If type(callable) is not ClassType, REDUCE complains unless the
      callable has been registered with the copy_reg module's
      safe_constructors dict, or the callable has a magic
      '__safe_for_unpickling__' attribute with a true value.  I'm not sure
      why it does this, but I've sure seen this complaint often enough when
      I didn't want to <wink>.
      **
Stack before: [any, any]
Stack after: [any]
**/
func (pm *PickleMachine) opcode_REDUCE() error {
	//TODO push an object that represents the result result of this operation
	return ErrOpcodeNotImplemented
}

/**
Opcode: BUILD
Finish building an object, via __setstate__ or dict update.

      Stack before: ... anyobject argument
      Stack after:  ... anyobject

      where anyobject may have been mutated, as follows:

      If the object has a __setstate__ method,

          anyobject.__setstate__(argument)

      is called.

      Else the argument must be a dict, the object must have a __dict__, and
      the object is updated via

          anyobject.__dict__.update(argument)

      This may raise RuntimeError in restricted execution mode (which
      disallows access to __dict__ directly); in that case, the object
      is updated instead via

          for k, v in argument.items():
              anyobject[k] = v
      **
Stack before: [any, any]
Stack after: [any]
**/
func (pm *PickleMachine) opcode_BUILD() error {
	return ErrOpcodeNotImplemented
}

/**
Opcode: INST
Build a class instance.

      This is the protocol 0 version of protocol 1's OBJ opcode.
      INST is followed by two newline-terminated strings, giving a
      module and class name, just as for the GLOBAL opcode (and see
      GLOBAL for more details about that).  self.find_class(module, name)
      is used to get a class object.

      In addition, all the objects on the stack following the topmost
      markobject are gathered into a tuple and popped (along with the
      topmost markobject), just as for the TUPLE opcode.

      Now it gets complicated.  If all of these are true:

        + The argtuple is empty (markobject was at the top of the stack
          at the start).

        + It's an old-style class object (the type of the class object is
          ClassType).

        + The class object does not have a __getinitargs__ attribute.

      then we want to create an old-style class instance without invoking
      its __init__() method (pickle has waffled on this over the years; not
      calling __init__() is current wisdom).  In this case, an instance of
      an old-style dummy class is created, and then we try to rebind its
      __class__ attribute to the desired class object.  If this succeeds,
      the new instance object is pushed on the stack, and we're done.  In
      restricted execution mode it can fail (assignment to __class__ is
      disallowed), and I'm not really sure what happens then -- it looks
      like the code ends up calling the class object's __init__ anyway,
      via falling into the next case.

      Else (the argtuple is not empty, it's not an old-style class object,
      or the class object does have a __getinitargs__ attribute), the code
      first insists that the class object have a __safe_for_unpickling__
      attribute.  Unlike as for the __safe_for_unpickling__ check in REDUCE,
      it doesn't matter whether this attribute has a true or false value, it
      only matters whether it exists (XXX this is a bug; cPickle
      requires the attribute to be true).  If __safe_for_unpickling__
      doesn't exist, UnpicklingError is raised.

      Else (the class object does have a __safe_for_unpickling__ attr),
      the class object obtained from INST's arguments is applied to the
      argtuple obtained from the stack, and the resulting instance object
      is pushed on the stack.

      NOTE:  checks for __safe_for_unpickling__ went away in Python 2.3.
      **
Stack before: [mark, stackslice]
Stack after: [any]
**/
func (pm *PickleMachine) opcode_INST() error {
	return ErrOpcodeNotImplemented
}

/**
Opcode: STOP
Stop the unpickling machine.

      Every pickle ends with this opcode.  The object at the top of the stack
      is popped, and that's the result of unpickling.  The stack should be
      empty then.
      **
Stack before: [any]
Stack after: []
**/
func (pm *PickleMachine) opcode_STOP() error {
	return ErrOpcodeStopped
}

/**
Opcode: PERSID
Push an object identified by a persistent ID.

      The pickle module doesn't define what a persistent ID means.  PERSID's
      argument is a newline-terminated str-style (no embedded escapes, no
      bracketing quote characters) string, which *is* "the persistent ID".
      The unpickler passes this string to self.persistent_load().  Whatever
      object that returns is pushed on the stack.  There is no implementation
      of persistent_load() in Python's unpickler:  it must be supplied by an
      unpickler subclass.
      **
Stack before: []
Stack after: [any]
**/
func (pm *PickleMachine) opcode_PERSID() error {
	return ErrOpcodeNotImplemented
}