From: Roman Lebedev <lebedev.ri@gmail.com>
Date: Sun, 11 Aug 2024 03:30:44 +0300
Subject: JITModule: rework/fix `__udivdi3` handling

The original workaround is very partial,
and was not really working in my experience,
even after making it non-GCC specific.

Instead:
1. Ensure that the library that actually provides that symbol
   (as per the compiler used!) is actually linked into.
   This was not enough still.
2. Replace `HalideJITMemoryManager` hack with a more direct approach
   of actually telling the JIT the address of the symbol.
3. While there, move the symbol's forward definition to outside
   of namespaces. It's a global symbol, it makes sense to place it there.

This makes python binding tests pass on i386,
and i'm really happy about that.

Refs. https://github.com/llvm/llvm-project/issues/61289
Inspired by https://github.com/root-project/root/pull/13286

Forwarded: https://github.com/halide/Halide/pull/8389
---
 src/CMakeLists.txt | 28 ++++++++++++++++++++++++++++
 src/JITModule.cpp  | 47 ++++++++++++++++++++++++-----------------------
 2 files changed, 52 insertions(+), 23 deletions(-)

diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt
index cf23d7e..d66448f 100644
--- a/src/CMakeLists.txt
+++ b/src/CMakeLists.txt
@@ -579,6 +579,34 @@ if (WITH_SERIALIZATION_JIT_ROUNDTRIP_TESTING)
     target_compile_definitions(Halide PRIVATE WITH_SERIALIZATION_JIT_ROUNDTRIP_TESTING)
 endif ()
 
+if (NOT DEFINED Halide_COMPILER_BUILTIN_LIBRARY)
+    # FIXME: `execute_process()` can not use `$<TARGET_FILE:clang>`, workarounding that.
+    get_property(clang_LOCATION TARGET clang PROPERTY LOCATION)
+    execute_process(
+        COMMAND ${CMAKE_CROSSCOMPILING_EMULATOR} ${CMAKE_C_COMPILER_LAUNCHER} ${clang_LOCATION} "--print-libgcc-file-name"
+        OUTPUT_VARIABLE Halide_COMPILER_BUILTIN_LIBRARY
+        OUTPUT_STRIP_TRAILING_WHITESPACE
+        ERROR_VARIABLE Halide_COMPILER_BUILTIN_LIBRARY_stderr
+        ERROR_STRIP_TRAILING_WHITESPACE
+        RESULT_VARIABLE Halide_COMPILER_BUILTIN_LIBRARY_exit_code
+    )
+    if (NOT "${Halide_COMPILER_BUILTIN_LIBRARY_exit_code}" STREQUAL "0")
+        message(FATAL_ERROR "Unable to invoke clang --print-libgcc-file-name: ${Halide_COMPILER_BUILTIN_LIBRARY_stderr}")
+    endif()
+endif ()
+
+set(Halide_COMPILER_BUILTIN_LIBRARY "${Halide_COMPILER_BUILTIN_LIBRARY}"
+    CACHE FILEPATH "Library containing __udivdi3 symbol, either “libgcc.a” or “libclang_rt.builtins.*.a”.")
+
+add_library(Halide::__udivdi3 UNKNOWN IMPORTED)
+set_target_properties(
+  Halide::__udivdi3
+  PROPERTIES
+  IMPORTED_LOCATION "${Halide_COMPILER_BUILTIN_LIBRARY}"
+)
+
+target_link_libraries(Halide PRIVATE Halide::__udivdi3)
+
 # Note that we (deliberately) redeclare these versions here, even though the macros
 # with identical versions are expected to be defined in source; this allows us to
 # ensure that the versions defined between all build systems are identical.
diff --git a/src/JITModule.cpp b/src/JITModule.cpp
index 3aa30a8..176531c 100644
--- a/src/JITModule.cpp
+++ b/src/JITModule.cpp
@@ -23,15 +23,13 @@
 #include "Pipeline.h"
 #include "WasmExecutor.h"
 
+extern "C" unsigned long __udivdi3(unsigned long a, unsigned long b);
+
 namespace Halide {
 namespace Internal {
 
 using std::string;
 
-#if defined(__GNUC__) && defined(__i386__)
-extern "C" unsigned long __udivdi3(unsigned long a, unsigned long b);
-#endif
-
 #ifdef _WIN32
 void *get_symbol_address(const char *s) {
     return (void *)GetProcAddress(GetModuleHandle(nullptr), s);
@@ -135,6 +133,26 @@ void load_webgpu() {
                                << "(Try setting the env var HL_WEBGPU_NATIVE_LIB to an explicit path to fix this.)\n";
 }
 
+llvm::orc::SymbolMap GetListOfAdditionalSymbols(const llvm::orc::LLJIT &Jit) {
+    // Inject a number of symbols that may be in libgcc.a where they are
+    // not found automatically. See also the upstream issue:
+    // https://github.com/llvm/llvm-project/issues/61289.
+
+    static const std::pair<const char *, const void *> NamePtrList[] = {
+        // NOTE: at least libgcc does not provide this symbol on 64-bit x86_64, only on 32-bit i386.
+        {"__udivdi3", (((CHAR_BIT * sizeof(void *)) == 32) ? ((void *)&__udivdi3) : nullptr)},
+    };
+
+    llvm::orc::SymbolMap AdditionalSymbols;
+    for (const auto &NamePtr : NamePtrList) {
+        auto Addr = static_cast<llvm::orc::ExecutorAddr>(
+            reinterpret_cast<uintptr_t>(NamePtr.second));
+        AdditionalSymbols[Jit.mangleAndIntern(NamePtr.first)] =
+            llvm::orc::ExecutorSymbolDef(Addr, llvm::JITSymbolFlags::Exported);
+    }
+    return AdditionalSymbols;
+}
+
 }  // namespace
 
 using namespace llvm;
@@ -215,25 +233,6 @@ public:
             }
         }
         uint64_t result = SectionMemoryManager::getSymbolAddress(name);
-#if defined(__GNUC__) && defined(__i386__)
-        // This is a workaround for an odd corner case (cross-compiling + testing
-        // Python bindings x86-32 on an x86-64 system): __udivdi3 is a helper function
-        // that GCC uses to do u64/u64 division on 32-bit systems; it's usually included
-        // by the linker on these systems as needed. When we JIT, LLVM will include references
-        // to this call; MCJIT fixes up these references by doing (roughly) dlopen(NULL)
-        // to look up the symbol. For normal JIT tests, this works fine, as dlopen(NULL)
-        // finds the test executable, which has the right lookups to locate it inside libHalide.so.
-        // If, however, we are running a JIT-via-Python test, dlopen(NULL) returns the
-        // CPython executable... which apparently *doesn't* include this as an exported
-        // function, so the lookup fails and crashiness ensues. So our workaround here is
-        // a bit icky, but expedient: check for this name if we can't find it elsewhere,
-        // and if so, return the one we know should be present. (Obviously, if other runtime
-        // helper functions of this sort crop up in the future, this should be expanded
-        // into a "builtins map".)
-        if (result == 0 && name == "__udivdi3") {
-            result = (uint64_t)&__udivdi3;
-        }
-#endif
         internal_assert(result != 0)
             << "HalideJITMemoryManager: unable to find address for " << name << "\n";
         return result;
@@ -365,6 +364,8 @@ void JITModule::compile_module(std::unique_ptr<llvm::Module> m, const string &fu
     internal_assert(gen) << llvm::toString(gen.takeError()) << "\n";
     JIT->getMainJITDylib().addGenerator(std::move(gen.get()));
 
+    cantFail(JIT->getMainJITDylib().define(absoluteSymbols(GetListOfAdditionalSymbols(*JIT))));
+
     llvm::orc::ThreadSafeModule tsm(std::move(m), std::move(jit_module->context));
     auto err = JIT->addIRModule(std::move(tsm));
     internal_assert(!err) << llvm::toString(std::move(err)) << "\n";
