File: s4u-platform-failures.cpp

package info (click to toggle)
simgrid 4.1-1
  • links: PTS, VCS
  • area: main
  • in suites: forky, sid
  • size: 39,192 kB
  • sloc: cpp: 124,913; ansic: 66,744; python: 8,560; java: 6,773; fortran: 6,079; f90: 5,123; xml: 4,587; sh: 2,194; perl: 1,436; makefile: 111; lisp: 49; javascript: 7; sed: 6
file content (142 lines) | stat: -rw-r--r-- 5,575 bytes parent folder | download
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
/* Copyright (c) 2007-2025. The SimGrid Team. All rights reserved.          */

/* This program is free software; you can redistribute it and/or modify it
 * under the terms of the license (GNU LGPL) which comes with this package. */

/* This example shows how to work with the state profile of a host or a link,
 * specifying when the resource must be turned on or off.
 *
 * To set such a profile, the first way is to use a file in the XML, while the second is to use the programmatic
 * interface, as exemplified in the main() below. Once this profile is in place, the resource will automatically
 * be turned on and off.
 *
 * The actors running on a host that is turned off are forcefully killed
 * once their on_exit callbacks are executed. They cannot avoid this fate.
 * Since we specified on_failure="RESTART" for each actors in the XML file,
 * they will be automatically restarted when the host starts again.
 *
 * Communications using failed links will .. fail.
 */

#include "simgrid/kernel/ProfileBuilder.hpp"
#include "simgrid/s4u.hpp"

namespace sg4 = simgrid::s4u;

XBT_LOG_NEW_DEFAULT_CATEGORY(s4u_test, "Messages specific for this s4u example");

static void master(std::vector<std::string> args)
{
  xbt_assert(args.size() == 5, "Expecting one parameter");

  sg4::Mailbox* mailbox;
  long number_of_tasks = std::stol(args[1]);
  double comp_size     = std::stod(args[2]);
  long comm_size       = std::stol(args[3]);
  long workers_count   = std::stol(args[4]);

  XBT_INFO("Got %ld workers and %ld tasks to process", workers_count, number_of_tasks);

  for (int i = 0; i < number_of_tasks; i++) {
    mailbox         = sg4::Mailbox::by_name("worker-" + std::to_string(i % workers_count));
    auto* payload   = new double(comp_size);
    try {
      XBT_INFO("Send a message to %s", mailbox->get_cname());
      mailbox->put(payload, comm_size, 10.0);
      XBT_INFO("Send to %s completed", mailbox->get_cname());
    } catch (const simgrid::TimeoutException&) {
      delete payload;
      XBT_INFO("Mmh. Got timeouted while speaking to '%s'. Nevermind. Let's keep going!", mailbox->get_cname());
    } catch (const simgrid::NetworkFailureException&) {
      delete payload;
      XBT_INFO("Mmh. The communication with '%s' failed. Nevermind. Let's keep going!", mailbox->get_cname());
    }
  }

  XBT_INFO("All tasks have been dispatched. Let's tell everybody the computation is over.");
  for (int i = 0; i < workers_count; i++) {
    /* - Eventually tell all the workers to stop by sending a "finalize" task */
    mailbox         = sg4::Mailbox::by_name("worker-" + std::to_string(i));
    auto* payload   = new double(-1.0);
    try {
      mailbox->put(payload, 0, 1.0);
    } catch (const simgrid::TimeoutException&) {
      delete payload;
      XBT_INFO("Mmh. Got timeouted while speaking to '%s'. Nevermind. Let's keep going!", mailbox->get_cname());
    } catch (const simgrid::NetworkFailureException&) {
      delete payload;
      XBT_INFO("Mmh. Something went wrong with '%s'. Nevermind. Let's keep going!", mailbox->get_cname());
    }
  }

  XBT_INFO("Goodbye now!");
}

static void worker(std::vector<std::string> args)
{
  xbt_assert(args.size() == 2, "Expecting one parameter");
  long id               = std::stol(args[1]);
  sg4::Mailbox* mailbox = sg4::Mailbox::by_name("worker-" + std::to_string(id));
  while (true) {
    try {
      XBT_INFO("Waiting a message on %s", mailbox->get_cname());
      auto payload = mailbox->get_unique<double>();
      xbt_assert(payload != nullptr, "mailbox->get() failed");
      double comp_size = *payload;
      if (comp_size < 0) { /* - Exit when -1.0 is received */
        XBT_INFO("I'm done. See you!");
        break;
      }
      /*  - Otherwise, process the task */
      XBT_INFO("Start execution...");
      sg4::this_actor::execute(comp_size);
      XBT_INFO("Execution complete.");
    } catch (const simgrid::NetworkFailureException&) {
      XBT_INFO("Mmh. Something went wrong. Nevermind. Let's keep going!");
    }
  }
}

int main(int argc, char* argv[])
{
  sg4::Engine e(&argc, argv);

  // This is how to attach a profile to an host that is created from the XML file.
  // This should be done before calling load_platform(), as the on_creation() event is fired when loading the platform.
  // You can never set a new profile to a resource that already have one.
  sg4::Host::on_creation_cb([](sg4::Host& h) {
    if (h.get_name() == "Bourrassa") {
      h.set_state_profile(simgrid::kernel::profile::ProfileBuilder::from_string("bourassa_profile", "67 0\n70 1\n", 0));
    }
  });
  e.load_platform(argv[1]);

  e.register_function("master", master);
  e.register_function("worker", worker);
  e.load_deployment(argv[2]);

  // Add a new host programatically, and attach a state profile to it
  auto* root     = e.netzone_by_name_or_null("AS0");
  auto* lilibeth = root->add_host("Lilibeth", 1e15);
  auto link      = e.link_by_name("10");
  root->add_route(e.host_by_name("Tremblay"), lilibeth, {link});
  lilibeth->set_state_profile(simgrid::kernel::profile::ProfileBuilder::from_string("lilibeth_profile", R"(
4 0
5 1
)",
                                                                                    10));
  lilibeth->seal();

  // Create an actor on that new host, to monitor its own state
  auto actor = lilibeth->add_actor("sleeper", []() {
    XBT_INFO("Start sleeping...");
    sg4::this_actor::sleep_for(1);
    XBT_INFO("done sleeping.");
  });
  actor->set_auto_restart(true);

  e.run();

  XBT_INFO("Simulation time %g", e.get_clock());
  return 0;
}