File: preprocessor.go

package info (click to toggle)
c2go 0.26.11-3
  • links: PTS, VCS
  • area: main
  • in suites: forky, sid
  • size: 2,052 kB
  • sloc: ansic: 6,037; sh: 82; makefile: 5
file content (338 lines) | stat: -rw-r--r-- 8,288 bytes parent folder | download | duplicates (2)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
package preprocessor

import (
	"bufio"
	"bytes"
	"fmt"
	"os"
	"os/exec"
	"path/filepath"
	"runtime"
	"strings"
	"text/scanner"

	"github.com/elliotchance/c2go/program"
	"github.com/elliotchance/c2go/util"
)

// One simple part of preprocessor code
type entity struct {
	positionInSource int
	include          string
	other            string

	// Zero index of `lines` is look like that:
	// # 11 "/usr/include/x86_64-linux-gnu/gnu/stubs.h" 2 3 4
	// After that 0 or more lines of codes
	lines []*string
}

func (e *entity) parseComments(comments *[]program.Comment) {
	var source bytes.Buffer
	for i := range e.lines {
		if i == 0 {
			continue
		}
		source.Write([]byte(*e.lines[i]))
		source.Write([]byte{'\n'})
	}

	var s scanner.Scanner
	s.Init(strings.NewReader(source.String()))
	s.Mode = scanner.ScanComments
	s.Filename = e.include
	for tok := s.Scan(); tok != scanner.EOF; tok = s.Scan() {
		if scanner.TokenString(tok) == "Comment" {
			(*comments) = append(*comments, program.Comment{
				File:    e.include,
				Line:    s.Position.Line + e.positionInSource - 1,
				Comment: s.TokenText(),
			})
		}
	}
}

// isSame - check is Same entities
func (e *entity) isSame(x *entity) bool {
	if e.include != x.include {
		return false
	}
	if e.positionInSource != x.positionInSource {
		return false
	}
	if e.other != x.other {
		return false
	}
	if len(e.lines) != len(x.lines) {
		return false
	}
	for k := range e.lines {
		is := e.lines[k]
		js := x.lines[k]
		if len(*is) != len(*js) {
			return false
		}
		if *is != *js {
			return false
		}
	}
	return true
}

// Analyze - separation preprocessor code to part
func Analyze(inputFiles, clangFlags []string, verbose bool) (pp []byte,
	comments []program.Comment, includes []program.IncludeHeader, err error) {

	var allItems []entity

	allItems, err = analyzeFiles(inputFiles, clangFlags, verbose)
	if err != nil {
		return
	}

	// Generate list of user files
	userSource := map[string]bool{}
	var us []string
	us, err = GetIncludeListWithUserSource(inputFiles, clangFlags)
	if err != nil {
		return
	}
	var all []string
	all, err = GetIncludeFullList(inputFiles, clangFlags)
	if err != nil {
		return
	}
	// Generate C header list
	includes = generateIncludeList(us, all)

	for j := range us {
		userSource[us[j]] = true
	}

	// Merge the entities
	var lines []string
	for i := range allItems {
		// If found same part of preprocess code, then
		// don't include in result buffer for transpiling
		// for avoid dublicate of code
		var found bool
		for j := 0; j < i; j++ {
			if allItems[i].isSame(&allItems[j]) {
				found = true
				break
			}
		}
		if found {
			continue
		}
		// Parse comments only for user sources
		var isUserSource bool
		if userSource[allItems[i].include] {
			isUserSource = true
		}
		if allItems[i].include[0] == '.' &&
			allItems[i].include[1] == '/' &&
			userSource[allItems[i].include[2:]] {
			isUserSource = true
		}
		if isUserSource {
			allItems[i].parseComments(&comments)
		}

		// Parameter "other" is not included for avoid like:
		// ./tests/multi/head.h:4:28: error: invalid line marker flag '2': cannot pop empty include stack
		// # 2 "./tests/multi/main.c" 2
		//                            ^
		header := fmt.Sprintf("# %d \"%s\"", allItems[i].positionInSource, allItems[i].include)
		lines = append(lines, header)
		if len(allItems[i].lines) > 0 {
			for ii, l := range allItems[i].lines {
				if ii == 0 {
					continue
				}
				lines = append(lines, *l)
			}
		}
	}
	pp = ([]byte)(strings.Join(lines, "\n"))

	return
}

// analyzeFiles - analyze single file and separation preprocessor code to part
func analyzeFiles(inputFiles, clangFlags []string, verbose bool) (items []entity, err error) {
	// See : https://clang.llvm.org/docs/CommandGuide/clang.html
	// clang -E <file>    Run the preprocessor stage.
	var out bytes.Buffer
	out, err = getPreprocessSources(inputFiles, clangFlags, verbose)
	if err != nil {
		return
	}

	// Parsing preprocessor file
	r := bytes.NewReader(out.Bytes())
	scanner := bufio.NewScanner(r)
	scanner.Split(bufio.ScanLines)
	// counter - get position of line
	var counter int
	// item, items - entity of preprocess file
	var item *entity

	reg := util.GetRegex("# (\\d+) \".*\".*")

	for scanner.Scan() {
		line := scanner.Text()
		if reg.MatchString(line) {
			if item != (*entity)(nil) {
				items = append(items, *item)
			}
			item, err = parseIncludePreprocessorLine(line)
			if err != nil {
				err = fmt.Errorf("Cannot parse line : %s with error: %s", line, err)
				return
			}
			if item.positionInSource == 0 {
				// cannot by less 1 for avoid problem with
				// indentification of "0" AST base element
				item.positionInSource = 1
			}
			item.lines = make([]*string, 0)
		}
		counter++
		item.lines = append(item.lines, &line)
	}
	if item != (*entity)(nil) {
		items = append(items, *item)
	}
	return
}

// See : https://clang.llvm.org/docs/CommandGuide/clang.html
// clang -E <file>    Run the preprocessor stage.
func getPreprocessSources(inputFiles, clangFlags []string, verbose bool) (out bytes.Buffer, err error) {
	// get temp dir
	dir, err := os.MkdirTemp("", "c2go-union")
	if err != nil {
		return
	}
	defer func() { _ = os.RemoveAll(dir) }()

	// file name union file
	var unionFileName = dir + "/" + "unionFileName.c"

	// create a body for union file
	var unionBody string
	for i := range inputFiles {
		var absPath string
		absPath, err = filepath.Abs(inputFiles[i])
		if err != nil {
			return
		}
		unionBody += fmt.Sprintf("#include \"%s\"\n", absPath)
	}

	// write a union file
	err = os.WriteFile(unionFileName, []byte(unionBody), 0644)
	if err != nil {
		return
	}

	// Add open source defines
	if runtime.GOOS == "darwin" {
		clangFlags = append(clangFlags, "-D_XOPEN_SOURCE")
	} else {
		clangFlags = append(clangFlags, "-D_GNU_SOURCE")
	}

	// preprocessor clang
	var stderr bytes.Buffer

	var args []string
	args = append(args, "-E", "-C")
	args = append(args, clangFlags...)
	args = append(args, unionFileName) // All inputFiles

	var outFile bytes.Buffer
	if verbose {
		fmt.Println("executing clang:")
		fmt.Println("clang", strings.Join(args, " "))
	}
	cmd := exec.Command("clang", args...)
	cmd.Stdout = &outFile
	cmd.Stderr = &stderr
	err = cmd.Run()
	if err != nil {
		err = fmt.Errorf("preprocess for file: %v\nfailed: %v\nStdErr = %v", inputFiles, err, stderr.String())
		return
	}
	_, err = out.Write(outFile.Bytes())
	if err != nil {
		return
	}

	return
}

func generateIncludeList(userList, allList []string) (
	includes []program.IncludeHeader) {

	for i := range allList {
		var isUser bool
		for j := range userList {
			if allList[i] == userList[j] {
				isUser = true
				break
			}
		}
		includes = append(includes, program.IncludeHeader{
			HeaderName:   allList[i],
			IsUserSource: isUser,
		})
	}
	return
}

// GetIncludeListWithUserSource - Get list of include files
// Example:
// $ clang  -MM -c exit.c
// exit.o: exit.c tests.h
func GetIncludeListWithUserSource(inputFiles, clangFlags []string) (lines []string, err error) {
	return getIncludeList(inputFiles, clangFlags, "-MM")
}

// GetIncludeFullList - Get full list of include files
// Example:
// $ clang -M -c triangle.c
// triangle.o: triangle.c /usr/include/stdio.h /usr/include/features.h \
//   /usr/include/stdc-predef.h /usr/include/x86_64-linux-gnu/sys/cdefs.h \
//   /usr/include/x86_64-linux-gnu/bits/wordsize.h \
//   /usr/include/x86_64-linux-gnu/gnu/stubs.h \
//   /usr/include/x86_64-linux-gnu/gnu/stubs-64.h \
//   / ........ and other
func GetIncludeFullList(inputFiles, clangFlags []string) (lines []string, err error) {
	return getIncludeList(inputFiles, clangFlags, "-M")
}

func getIncludeList(inputFiles, clangFlags []string, flag string) (lines []string, err error) {
	var out bytes.Buffer
	var stderr bytes.Buffer
	var args []string
	for i := range inputFiles {
		inputFiles[i], err = filepath.Abs(inputFiles[i])
		if err != nil {
			return
		}
	}
	args = append(args, flag, "-c")
	args = append(args, inputFiles...)
	args = append(args, clangFlags...)
	cmd := exec.Command("clang", args...)
	cmd.Stdout = &out
	cmd.Stderr = &stderr
	err = cmd.Run()
	if err != nil {
		err = fmt.Errorf("preprocess failed: %v\nStdErr = %v", err, stderr.String())
		return
	}
	return parseIncludeList(out.String())
}