File: Compiler.cpp

package info (click to toggle)
clickhouse 18.16.1%2Bds-7.3
  • links: PTS, VCS
  • area: main
  • in suites: bookworm, sid
  • size: 40,292 kB
  • sloc: cpp: 223,075; sql: 21,608; python: 6,596; sh: 4,299; ansic: 3,889; xml: 3,312; perl: 155; makefile: 57; asm: 34
file content (322 lines) | stat: -rw-r--r-- 11,359 bytes parent folder | download | duplicates (3)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
#include <Poco/DirectoryIterator.h>
#include <Poco/Util/Application.h>
#include <ext/unlock_guard.h>
#include <Common/ClickHouseRevision.h>
#include <Common/SipHash.h>
#include <Common/ShellCommand.h>
#include <Common/StringUtils/StringUtils.h>
#include <IO/Operators.h>
#include <IO/ReadBufferFromString.h>
#include <IO/ReadBufferFromFileDescriptor.h>
#include <IO/WriteBufferFromFile.h>
#include <Interpreters/Compiler.h>

#if __has_include(<Interpreters/config_compile.h>)
#include <Interpreters/config_compile.h>
#endif

namespace ProfileEvents
{
    extern const Event CompileAttempt;
    extern const Event CompileSuccess;
}

namespace DB
{

namespace ErrorCodes
{
    extern const int CANNOT_DLOPEN;
    extern const int CANNOT_COMPILE_CODE;
}

Compiler::Compiler(const std::string & path_, size_t threads)
    : path(path_), pool(threads)
{
    Poco::File(path).createDirectory();

    Poco::DirectoryIterator dir_end;
    for (Poco::DirectoryIterator dir_it(path); dir_end != dir_it; ++dir_it)
    {
        const std::string & name = dir_it.name();
        if (endsWith(name, ".so"))
        {
            files.insert(name.substr(0, name.size() - 3));
        }
    }

    LOG_INFO(log, "Having " << files.size() << " compiled files from previous start.");
}

Compiler::~Compiler()
{
    LOG_DEBUG(log, "Waiting for threads to finish.");
    pool.wait();
}


static Compiler::HashedKey getHash(const std::string & key)
{
    SipHash hash;

    auto revision = ClickHouseRevision::get();
    hash.update(revision);
    hash.update(key.data(), key.size());

    Compiler::HashedKey res;
    hash.get128(res.low, res.high);
    return res;
}


/// Without .so extension.
static std::string hashedKeyToFileName(Compiler::HashedKey hashed_key)
{
    WriteBufferFromOwnString out;
    out << hashed_key.low << '_' << hashed_key.high;
    return out.str();
}


SharedLibraryPtr Compiler::getOrCount(
    const std::string & key,
    UInt32 min_count_to_compile,
    const std::string & additional_compiler_flags,
    CodeGenerator get_code,
    ReadyCallback on_ready)
{
    HashedKey hashed_key = getHash(key);

    std::lock_guard<std::mutex> lock(mutex);

    UInt32 count = ++counts[hashed_key];

    /// Is there a ready open library? Or, if the library is in the process of compiling, there will be nullptr.
    Libraries::iterator libraries_it = libraries.find(hashed_key);
    if (libraries.end() != libraries_it)
    {
        if (!libraries_it->second)
            LOG_INFO(log, "Library " << hashedKeyToFileName(hashed_key) << " is already compiling or compilation was failed.");

        /// TODO In this case, after the compilation is finished, the callback will not be called.

        return libraries_it->second;
    }

    /// Is there a file with the library left over from the previous launch?
    std::string file_name = hashedKeyToFileName(hashed_key);
    Files::iterator files_it = files.find(file_name);
    if (files.end() != files_it)
    {
        std::string so_file_path = path + '/' + file_name + ".so";
        LOG_INFO(log, "Loading existing library " << so_file_path);

        SharedLibraryPtr lib;

        try
        {
            lib = std::make_shared<SharedLibrary>(so_file_path);
        }
        catch (const Exception & e)
        {
            if (e.code() != ErrorCodes::CANNOT_DLOPEN)
                throw;

            /// Found broken .so file (or file cannot be dlopened by whatever reason).
            /// This could happen when filesystem is corrupted after server restart.
            /// We remove the file - it will be recompiled on next attempt.

            tryLogCurrentException(log);

            files.erase(files_it);
            Poco::File(so_file_path).remove();
            return nullptr;
        }

        libraries[hashed_key] = lib;
        return lib;
    }

    /// Has min_count_to_compile been reached?
    if (count >= min_count_to_compile)
    {
        /// The min_count_to_compile value of zero indicates the need for synchronous compilation.

        /// Are there any free threads?
        if (min_count_to_compile == 0 || pool.active() < pool.size())
        {
            /// Indicates that the library is in the process of compiling.
            libraries[hashed_key] = nullptr;

            LOG_INFO(log, "Compiling code " << file_name << ", key: " << key);

            if (min_count_to_compile == 0)
            {
                {
                    ext::unlock_guard<std::mutex> unlock(mutex);
                    compile(hashed_key, file_name, additional_compiler_flags, get_code, on_ready);
                }

                return libraries[hashed_key];
            }
            else
            {
                pool.schedule([=]
                {
                    try
                    {
                        compile(hashed_key, file_name, additional_compiler_flags, get_code, on_ready);
                    }
                    catch (...)
                    {
                        tryLogCurrentException("Compiler");
                    }
                });
            }
        }
        else
            LOG_INFO(log, "All threads are busy.");
    }

    return nullptr;
}


/// This will guarantee that code will compile only when version of headers match version of running server.
static void addCodeToAssertHeadersMatch(WriteBuffer & out)
{
    out <<
        "#define STRING2(x) #x\n"
        "#define STRING(x) STRING2(x)\n"
        "#include <Common/config_version.h>\n"
        "#if VERSION_REVISION != " << ClickHouseRevision::get() << "\n"
        "#pragma message \"ClickHouse headers revision = \" STRING(VERSION_REVISION) \n"
        "#error \"ClickHouse headers revision doesn't match runtime revision of the server (" << ClickHouseRevision::get() << ").\"\n"
        "#endif\n\n";
}


void Compiler::compile(
    HashedKey hashed_key,
    std::string file_name,
    const std::string & additional_compiler_flags,
    CodeGenerator get_code,
    ReadyCallback on_ready)
{
    ProfileEvents::increment(ProfileEvents::CompileAttempt);

#if !defined(INTERNAL_COMPILER_EXECUTABLE)
    throw Exception("Cannot compile code: Compiler disabled", ErrorCodes::CANNOT_COMPILE_CODE);
#else
    std::string prefix = path + "/" + file_name;
    std::string cpp_file_path = prefix + ".cpp";
    std::string so_file_path = prefix + ".so";
    std::string so_tmp_file_path = prefix + ".so.tmp";

    {
        WriteBufferFromFile out(cpp_file_path);

        addCodeToAssertHeadersMatch(out);
        out << get_code();
    }

    std::stringstream command;

    auto compiler_executable_root = Poco::Util::Application::instance().config().getString("compiler_executable_root", INTERNAL_COMPILER_BIN_ROOT);
    auto compiler_headers = Poco::Util::Application::instance().config().getString("compiler_headers", INTERNAL_COMPILER_HEADERS);
    auto compiler_headers_root = Poco::Util::Application::instance().config().getString("compiler_headers_root", INTERNAL_COMPILER_HEADERS_ROOT);
    LOG_DEBUG(log, "Using internal compiler: compiler_executable_root=" << compiler_executable_root << "; compiler_headers_root=" << compiler_headers_root << "; compiler_headers=" << compiler_headers);

    /// Slightly unconvenient.
    command <<
        "("
            INTERNAL_COMPILER_ENV
            " " << compiler_executable_root << INTERNAL_COMPILER_EXECUTABLE
            " " INTERNAL_COMPILER_FLAGS
            /// It is hard to correctly call a ld program manually, because it is easy to skip critical flags, which might lead to
            /// unhandled exceptions. Therefore pass path to llvm's lld directly to clang.
            " -fuse-ld=" << compiler_executable_root << INTERNAL_LINKER_EXECUTABLE
            " -fdiagnostics-color=never"

    #if INTERNAL_COMPILER_CUSTOM_ROOT
            /// To get correct order merge this results carefully:
            /// echo | clang -x c++ -E -Wp,-v -
            /// echo | g++ -x c++ -E -Wp,-v -

            " -isystem " << compiler_headers_root << "/usr/include/c++/*"
        #if defined(CMAKE_LIBRARY_ARCHITECTURE)
            " -isystem " << compiler_headers_root << "/usr/include/" CMAKE_LIBRARY_ARCHITECTURE "/c++/*"
        #endif
            " -isystem " << compiler_headers_root << "/usr/include/c++/*/backward"
            " -isystem " << compiler_headers_root << "/usr/include/clang/*/include"                  /// if compiler is clang (from package)
            " -isystem " << compiler_headers_root << "/usr/local/lib/clang/*/include"                /// if clang installed manually
            " -isystem " << compiler_headers_root << "/usr/lib/clang/*/include"                      /// if clang build from submodules
        #if defined(CMAKE_LIBRARY_ARCHITECTURE)
            " -isystem " << compiler_headers_root << "/usr/lib/gcc/" CMAKE_LIBRARY_ARCHITECTURE "/*/include-fixed"
            " -isystem " << compiler_headers_root << "/usr/lib/gcc/" CMAKE_LIBRARY_ARCHITECTURE "/*/include"
        #endif
            " -isystem " << compiler_headers_root << "/usr/local/include"                            /// if something installed manually
        #if defined(CMAKE_LIBRARY_ARCHITECTURE)
            " -isystem " << compiler_headers_root << "/usr/include/" CMAKE_LIBRARY_ARCHITECTURE
        #endif
            " -isystem " << compiler_headers_root << "/usr/include"
    #endif
            " -I " << compiler_headers << "/dbms/src/"
            " -isystem " << compiler_headers << "/contrib/cityhash102/include/"
            " -isystem " << compiler_headers << "/contrib/libpcg-random/include/"
            " -isystem " << compiler_headers << INTERNAL_DOUBLE_CONVERSION_INCLUDE_DIR
            " -isystem " << compiler_headers << INTERNAL_Poco_Foundation_INCLUDE_DIR
            " -isystem " << compiler_headers << INTERNAL_Boost_INCLUDE_DIRS
            " -I " << compiler_headers << "/libs/libcommon/include/"
            " " << additional_compiler_flags <<
            " -shared -o " << so_tmp_file_path << " " << cpp_file_path
            << " 2>&1"
        ") || echo Return code: $?";

#if !NDEBUG
    LOG_TRACE(log, "Compile command: " << command.str());
#endif

    std::string compile_result;

    {
        auto process = ShellCommand::execute(command.str());
        readStringUntilEOF(compile_result, process->out);
        process->wait();
    }

    if (!compile_result.empty())
    {
        std::string error_message = "Cannot compile code:\n\n" + command.str() + "\n\n" + compile_result;

        Poco::File so_tmp_file(so_tmp_file_path);
        if (so_tmp_file.exists() && so_tmp_file.canExecute())
        {
            /// Compiler may emit information messages. This is suspicious, but we still can use compiled result.
            LOG_WARNING(log, error_message);
        }
        else
            throw Exception(error_message, ErrorCodes::CANNOT_COMPILE_CODE);
    }

    /// If there was an error before, the file with the code remains for viewing.
    Poco::File(cpp_file_path).remove();

    Poco::File(so_tmp_file_path).renameTo(so_file_path);
    SharedLibraryPtr lib(new SharedLibrary(so_file_path));

    {
        std::lock_guard<std::mutex> lock(mutex);
        libraries[hashed_key] = lib;
    }

    LOG_INFO(log, "Compiled code " << file_name);
    ProfileEvents::increment(ProfileEvents::CompileSuccess);

    on_ready(lib);

#endif
}


}