File: textwords.c

package info (click to toggle)
k2pdfopt 2.51+ds-1
  • links: PTS
  • area: main
  • in suites: bullseye, buster, sid
  • size: 4,480 kB
  • sloc: ansic: 81,694; cpp: 5,829; makefile: 5
file content (124 lines) | stat: -rw-r--r-- 3,921 bytes parent folder | download
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
/*
** textwords.c  Functions to parse region into words.
**
** Copyright (C) 2013  http://willus.com
**
** This program is free software: you can redistribute it and/or modify
** it under the terms of the GNU Affero General Public License as
** published by the Free Software Foundation, either version 3 of the
** License, or (at your option) any later version.
**
** This program is distributed in the hope that it will be useful,
** but WITHOUT ANY WARRANTY; without even the implied warranty of
** MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
** GNU Affero General Public License for more details.
**
** You should have received a copy of the GNU Affero General Public License
** along with this program.  If not, see <http://www.gnu.org/licenses/>.
**
*/

#include "k2pdfopt.h"




void textwords_compute_col_gaps(TEXTWORDS *textwords,int c2)

    {
    int i,n;

    n=textwords->n;
    if (n<=0)
        return;
    for (i=0;i<n-1;i++)
        {
        textwords->textrow[i].gap = textwords->textrow[i+1].c1 - textwords->textrow[i].c2 - 1;
        textwords->textrow[i].gapblank = textwords->textrow[i].gap;
        textwords->textrow[i].rowheight = textwords->textrow[i+1].c1 - textwords->textrow[i].c1;
        }
    textwords->textrow[n-1].gap = c2 - textwords->textrow[n-1].c2;
    textwords->textrow[n-1].gapblank = textwords->textrow[n-1].gap;
    textwords->textrow[n-1].rowheight = textwords->textrow[n-1].c2 - textwords->textrow[n-1].c1;
    }


void textwords_remove_small_col_gaps(TEXTWORDS *textwords,int lcheight,double mingap,
                                     double word_spacing)

    {
    int i,j;

    if (mingap < word_spacing)
        mingap = word_spacing;
    for (i=0;i<textwords->n-1;i++)
        {
        double gap;

        gap=(double)textwords->textrow[i].gap / lcheight;
        if (gap >= mingap)
            continue;
        textwords->textrow[i].c2 = textwords->textrow[i+1].c2;
        textwords->textrow[i].gap = textwords->textrow[i+1].gap;
        if (textwords->textrow[i+1].r1 < textwords->textrow[i].r1)
            textwords->textrow[i].r1 = textwords->textrow[i+1].r1;
        if (textwords->textrow[i+1].r2 > textwords->textrow[i].r2)
            textwords->textrow[i].r2 = textwords->textrow[i+1].r2;
        for (j=i+1;j<textwords->n-1;j++)
            textwords->textrow[j] = textwords->textrow[j+1];
        textwords->n--;
        i--;
        }
    }


/*
** Track gaps between words so that we can tell when one is out of family.
** lcheight = height of a lowercase letter.
*/
void textwords_add_word_gaps(TEXTWORDS *textwords,int lcheight,double *median_gap,
                             double word_spacing)

    {
    static int nn=0;
    static double gap[1024];
    static char *funcname="word_gaps_add";

    if (textwords==NULL && median_gap==NULL)
        {
        nn=0;
        return;
        }
    if (textwords!=NULL && textwords->n>1)
        {
        int i;

        for (i=0;i<textwords->n-1;i++)
            {
            double g;
            g = (double)textwords->textrow[i].gap / lcheight;
            if (g>=word_spacing)
                {
                gap[nn&0x3ff]= g;
                nn++;
                }
            }
        }
    if (median_gap!=NULL)
        {
        if (nn>0)
            {
            int n;
            double *gap_sorted;  /* v2.02--this variable is no longer static */

            n = (nn>1024) ? 1024 : nn;
            willus_dmem_alloc_warn(28,(void **)&gap_sorted,sizeof(double)*n,funcname,10);
            memcpy(gap_sorted,gap,n*sizeof(double));
            sortd(gap_sorted,n);
            (*median_gap)=gap_sorted[n/2];
            willus_dmem_free(28,&gap_sorted,funcname);
            }
        else
            (*median_gap)=0.7;
        }
    }