File: stext-iterator.c

package info (click to toggle)
mupdf 1.27.0%2Bds1-3
  • links: PTS, VCS
  • area: main
  • in suites: sid
  • size: 29,260 kB
  • sloc: ansic: 335,322; python: 20,906; java: 7,520; javascript: 2,213; makefile: 1,152; xml: 675; cpp: 639; sh: 513; cs: 307; awk: 10; sed: 7; lisp: 3
file content (127 lines) | stat: -rw-r--r-- 3,719 bytes parent folder | download | duplicates (2)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
// Copyright (C) 2004-2025 Artifex Software, Inc.
//
// This file is part of MuPDF.
//
// MuPDF is free software: you can redistribute it and/or modify it under the
// terms of the GNU Affero General Public License as published by the Free
// Software Foundation, either version 3 of the License, or (at your option)
// any later version.
//
// MuPDF is distributed in the hope that it will be useful, but WITHOUT ANY
// WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
// FOR A PARTICULAR PURPOSE. See the GNU Affero General Public License for more
// details.
//
// You should have received a copy of the GNU Affero General Public License
// along with MuPDF. If not, see <https://www.gnu.org/licenses/agpl-3.0.en.html>
//
// Alternative licensing terms are available from the licensor.
// For commercial licensing, see <https://www.artifex.com/> or contact
// Artifex Software, Inc., 39 Mesa Street, Suite 108A, San Francisco,
// CA 94129, USA, for further information.

#include "mupdf/fitz.h"

fz_stext_page_block_iterator fz_stext_page_block_iterator_begin(fz_stext_page *page)
{
	fz_stext_page_block_iterator pos;

	pos.page = page;
	pos.parent = NULL;
	pos.block = page ? page->first_block : NULL;

	return pos;
}

/* Iterates along, stopping at every block. Stops at the end of the run. */
fz_stext_page_block_iterator fz_stext_page_block_iterator_next(fz_stext_page_block_iterator pos)
{
	/* If page == NULL, then this iterator can never go anywhere */
	if (pos.page == NULL)
		return pos;

	/* If we've hit EOF, then nowhere else to go. */
	if (pos.block == NULL)
		return pos;

	pos.block = pos.block->next;
	return pos;
}

fz_stext_page_block_iterator fz_stext_page_block_iterator_down(fz_stext_page_block_iterator pos)
{
	/* Can't throw here, so trying to move down on illegal nodes
	 * will just do nothing. */
	if (pos.block == NULL)
		return pos;
	if (pos.block->type != FZ_STEXT_BLOCK_STRUCT)
		return pos;

	pos.parent = pos.block->u.s.down;
	pos.block = pos.block->u.s.down->first_block;

	return pos;
}

fz_stext_page_block_iterator fz_stext_page_block_iterator_up(fz_stext_page_block_iterator pos)
{
	if (pos.parent == NULL)
		return pos;

	/* pos.parent->up is the struct block we are currently traversing the
	 * children of. So it's where we want to do 'next' from. */
	pos.block = pos.parent->up;
	/* pos.parent->parent is the struct that owns the new pos.block */
	pos.parent = pos.parent->parent;

	return pos;
}

/* Iterates along, and automatically (silently) goes down at structure
 * nodes and up at the end of runs. */
fz_stext_page_block_iterator fz_stext_page_block_iterator_next_dfs(fz_stext_page_block_iterator pos)
{
	while (1)
	{
		pos = fz_stext_page_block_iterator_next(pos);

		if (pos.block)
		{
			if (pos.block->type != FZ_STEXT_BLOCK_STRUCT)
				return pos;

			/* Move down. And loop. */
			pos.parent = pos.block->u.s.down;
			pos.block = pos.block->u.s.down->first_block;
			continue;
		}

		/* We've hit the end of the row. Move up. */
		/* If no parent, we've really hit the EOD. */
		if (pos.parent == NULL)
			return pos; /* EOF */
		/* pos.parent->up is the struct block we are currently traversing the
		 * children of. So it's where we want to do 'next' from. */
		pos.block = pos.parent->up;
		/* pos.parent->parent is the struct that owns the new pos.block */
		pos.parent = pos.parent->parent;
	}
}

int fz_stext_page_block_iterator_eod(fz_stext_page_block_iterator pos)
{
	return (pos.block == NULL);
}

int fz_stext_page_block_iterator_eod_dfs(fz_stext_page_block_iterator pos)
{
	while (1)
	{
		if (pos.block)
			return 0;
		if (pos.parent == NULL)
			return 1;
		pos.block = pos.parent->up;
		pos.parent = pos.parent->parent;
	}
}