File: ipmi-sel.c

package info (click to toggle)
skiboot 5.3.3-1
  • links: PTS
  • area: main
  • in suites: stretch
  • size: 13,280 kB
  • ctags: 13,840
  • sloc: ansic: 77,199; asm: 1,002; sh: 997; cpp: 894; tcl: 408; makefile: 325; python: 166; pascal: 65
file content (579 lines) | stat: -rw-r--r-- 15,730 bytes parent folder | download
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
/* Copyright 2013-2014 IBM Corp.
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 * 	http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
 * implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

#define pr_fmt(fmt) "IPMI: " fmt
#include <skiboot.h>
#include <stdlib.h>
#include <string.h>
#include <ipmi.h>
#include <device.h>
#include <opal.h>
#include <lock.h>
#include <errorlog.h>
#include <pel.h>
#include <opal-msg.h>

/* OEM SEL fields */
#define SEL_OEM_ID_0		0x55
#define SEL_OEM_ID_1		0x55
#define SEL_RECORD_TYPE_OEM	0xC0
#define SEL_RECORD_TYPE_EVENT	0x02

#define SEL_NETFN_IBM		0x3a

/* OEM SEL Commands */
#define CMD_AMI_POWER		0x04
#define CMD_AMI_PNOR_ACCESS	0x07
#define CMD_AMI_OCC_RESET	0x0e

#define SOFT_OFF	        0x00
#define SOFT_REBOOT	        0x01

#define RELEASE_PNOR		0x00
#define REQUEST_PNOR		0x01

/* 32.1 SEL Event Records type */
#define SEL_REC_TYPE_SYS_EVENT	0x02
#define SEL_REC_TYPE_AMI_ESEL	0xDF

/* OEM SEL generator ID for AMI */
#define SEL_GENERATOR_ID_AMI	0x2000

/* IPMI SEL version */
#define SEL_EVM_VER_1		0x03
#define SEL_EVM_VER_2		0x04

/*
 * Sensor type for System events
 *
 * Sensor information (type, number, etc) is passed to us via
 * device tree. Currently we are using System Event type to
 * log OPAL events.
 */
#define SENSOR_TYPE_SYS_EVENT	0x12

/*
 * 42.1 Event/Reading Type Codes
 *
 * Note that device hotplug and availability related events
 * are not defined as we are not using those events type.
 */
#define SEL_EVENT_DIR_TYPE_UNSPECIFIED	0x00
#define SEL_EVENT_DIR_TYPE_THRESHOLD	0x01
#define SEL_EVENT_DIR_TYPE_STATE	0x03
#define SEL_EVENT_DIR_TYPE_PREDICTIVE	0x04
#define SEL_EVENT_DIR_TYPE_LIMIT	0x05
#define SEL_EVENT_DIR_TYPE_PERFORMANCE	0x06
#define SEL_EVENT_DIR_TYPE_TRANSITION	0x07
#define SEL_EVENT_DIR_TYPE_OEM		0x70

/*
 * 42.1 Event/Reading Type Codes
 */
#define SEL_DATA1_AMI			0xAA
#define SEL_DATA1_DEASSERTED		0x00
#define SEL_DATA1_ASSERTED		0x01
#define SEL_DATA1_OK			0x00
#define SEL_DATA1_NON_CRIT_FROM_OK	0x01
#define SEL_DATA1_CRIT_FROM_LESS_SEV	0x02
#define SEL_DATA1_NON_REC_FROM_LESS_SEV	0x03
#define SEL_DATA1_NON_CRIT		0x04
#define SEL_DATA1_CRITICAL		0x05
#define SEL_DATA1_NON_RECOVERABLE	0X06
#define SEL_DATA1_MONITOR		0x07
#define SEL_DATA1_INFORMATIONAL		0x08

/* SEL Record Entry */
struct sel_record {
	le16		record_id;
	uint8_t		record_type;
	le32		timestamp;
	le16		generator_id;
	uint8_t		evm_ver;
	uint8_t		sensor_type;
	uint8_t		sensor_number;
	uint8_t		event_dir_type;
	uint8_t		event_data1;
	uint8_t		event_data2;
	uint8_t		event_data3;
} __packed;

static struct sel_record sel_record;

struct oem_sel {
	/* SEL header */
	uint8_t id[2];
	uint8_t type;
	uint8_t timestamp[4];
	uint8_t manuf_id[3];
	/* OEM SEL data (6 bytes) follows */
	uint8_t netfun;
	uint8_t cmd;
	uint8_t data[4];
};

#define ESEL_HDR_SIZE 7

/* Used for sending PANIC events like abort() path */
struct ipmi_sel_panic_msg {
	bool		busy;
	struct ipmi_msg	*msg;
	struct lock	lock;
};
static struct ipmi_sel_panic_msg ipmi_sel_panic_msg;

/* Forward declaration */
static void ipmi_elog_poll(struct ipmi_msg *msg);

void ipmi_sel_init(void)
{
	/* Already done */
	if (ipmi_sel_panic_msg.msg != NULL)
		return;

	memset(&ipmi_sel_panic_msg, 0, sizeof(struct ipmi_sel_panic_msg));
	ipmi_sel_panic_msg.msg = ipmi_mkmsg(IPMI_DEFAULT_INTERFACE,
					    IPMI_RESERVE_SEL, ipmi_elog_poll,
					    NULL, NULL, IPMI_MAX_REQ_SIZE, 2);
}

/*
 * Allocate IPMI message
 *  For normal event, allocate memory using ipmi_mkmsg and for PANIC
 *  event, use pre-allocated buffer.
 */
static struct ipmi_msg *ipmi_sel_alloc_msg(struct errorlog *elog_buf)
{
	struct ipmi_msg *msg = NULL;

	if (elog_buf->event_severity == OPAL_ERROR_PANIC) {
		/* Called before initialization completes */
		if (ipmi_sel_panic_msg.msg == NULL) {
			ipmi_sel_init();	/* Try to allocate IPMI message */
			if (ipmi_sel_panic_msg.msg == NULL)
				return NULL;
		}

		if (ipmi_sel_panic_msg.busy == true)
			return NULL;

		lock(&ipmi_sel_panic_msg.lock);
		msg = ipmi_sel_panic_msg.msg;
		ipmi_sel_panic_msg.busy = true;
		unlock(&ipmi_sel_panic_msg.lock);

		ipmi_init_msg(msg, IPMI_DEFAULT_INTERFACE,
			      IPMI_RESERVE_SEL, ipmi_elog_poll,
			      elog_buf, IPMI_MAX_REQ_SIZE, 2);
	} else {
		msg = ipmi_mkmsg(IPMI_DEFAULT_INTERFACE, IPMI_RESERVE_SEL,
				 ipmi_elog_poll, elog_buf,
				 NULL, IPMI_MAX_REQ_SIZE, 2);
	}

	return msg;
}

static void ipmi_sel_free_msg(struct ipmi_msg *msg)
{
	if (msg == ipmi_sel_panic_msg.msg) {
		lock(&ipmi_sel_panic_msg.lock);
		ipmi_sel_panic_msg.busy = false;
		unlock(&ipmi_sel_panic_msg.lock);
	} else {
		ipmi_free_msg(msg);
	}
	msg = NULL;
}

/* Initialize eSEL record */
static void ipmi_init_esel_record(void)
{
	memset(&sel_record, 0, sizeof(struct sel_record));
	sel_record.record_type = SEL_REC_TYPE_AMI_ESEL;
	sel_record.generator_id = SEL_GENERATOR_ID_AMI;
	sel_record.evm_ver = SEL_EVM_VER_2;
	sel_record.sensor_type	= SENSOR_TYPE_SYS_EVENT;
	sel_record.sensor_number =
		ipmi_get_sensor_number(SENSOR_TYPE_SYS_EVENT);
	sel_record.event_dir_type = SEL_EVENT_DIR_TYPE_OEM;
	sel_record.event_data1 = SEL_DATA1_AMI;
}

/* Update required fields in SEL record */
static void ipmi_update_sel_record(uint8_t event_severity, uint16_t esel_record_id)
{
	sel_record.record_type = SEL_REC_TYPE_SYS_EVENT;
	sel_record.event_data2 = (esel_record_id >> 8) & 0xff;
	sel_record.event_data3 = esel_record_id & 0xff;

	switch (event_severity) {
	case OPAL_ERROR_PANIC:
		sel_record.event_dir_type = SEL_EVENT_DIR_TYPE_TRANSITION;
		sel_record.event_data1 = SEL_DATA1_CRITICAL;
		break;
	case OPAL_UNRECOVERABLE_ERR_GENERAL:	/* Fall through */
	case OPAL_UNRECOVERABLE_ERR_DEGRADE_PERF:
	case OPAL_UNRECOVERABLE_ERR_LOSS_REDUNDANCY:
	case OPAL_UNRECOVERABLE_ERR_LOSS_REDUNDANCY_PERF:
	case OPAL_UNRECOVERABLE_ERR_LOSS_OF_FUNCTION:
		sel_record.event_dir_type = SEL_EVENT_DIR_TYPE_TRANSITION;
		sel_record.event_data1 = SEL_DATA1_NON_RECOVERABLE;
		break;
	case OPAL_PREDICTIVE_ERR_GENERAL:	/* Fall through */
	case OPAL_PREDICTIVE_ERR_DEGRADED_PERF:
	case OPAL_PREDICTIVE_ERR_FAULT_RECTIFY_REBOOT:
	case OPAL_PREDICTIVE_ERR_FAULT_RECTIFY_BOOT_DEGRADE_PERF:
	case OPAL_PREDICTIVE_ERR_LOSS_OF_REDUNDANCY:
		sel_record.event_dir_type = SEL_EVENT_DIR_TYPE_PREDICTIVE;
		sel_record.event_data1 = SEL_DATA1_NON_CRIT_FROM_OK;
		break;
	case OPAL_RECOVERED_ERR_GENERAL:
		sel_record.event_dir_type = SEL_EVENT_DIR_TYPE_TRANSITION;
		sel_record.event_data1 = SEL_DATA1_OK;
		break;
	case OPAL_INFO:
		sel_record.event_dir_type = SEL_EVENT_DIR_TYPE_TRANSITION;
		sel_record.event_data1 = SEL_DATA1_INFORMATIONAL;
		break;
	default:
		sel_record.event_dir_type = SEL_EVENT_DIR_TYPE_STATE;
		sel_record.event_data1 = SEL_DATA1_ASSERTED;
		break;
	}
}

static void ipmi_elog_error(struct ipmi_msg *msg)
{
	if (msg->cc == IPMI_LOST_ARBITRATION_ERR)
		/* Retry due to SEL erase */
		ipmi_queue_msg(msg);
	else {
		opal_elog_complete(msg->user_data, false);
		ipmi_sel_free_msg(msg);
	}
}

static void ipmi_log_sel_event_error(struct ipmi_msg *msg)
{
	if (msg->cc != IPMI_CC_NO_ERROR)
		prlog(PR_INFO, "SEL: Failed to log SEL event\n");

	ipmi_sel_free_msg(msg);
}

static void ipmi_log_sel_event_complete(struct ipmi_msg *msg)
{
	prlog(PR_INFO, "SEL: New event logged [ID : %x%x]\n",
	      msg->data[1], msg->data[0]);

	ipmi_sel_free_msg(msg);
}

/* Log SEL event with eSEL record ID */
static void ipmi_log_sel_event(struct ipmi_msg *msg,
			       uint8_t event_severity, uint16_t esel_record_id)
{
	/* Fill required SEL event fields */
	ipmi_update_sel_record(event_severity, esel_record_id);

	/* Fill IPMI message */
	ipmi_init_msg(msg, IPMI_DEFAULT_INTERFACE, IPMI_ADD_SEL_EVENT,
		      ipmi_log_sel_event_complete, NULL,
		      sizeof(struct sel_record), 2);

	/* Copy SEL data */
	memcpy(msg->data, &sel_record, sizeof(struct sel_record));

	msg->error = ipmi_log_sel_event_error;
	ipmi_queue_msg_head(msg);
}

/* Goes through the required steps to add a complete eSEL:
 *
 *  1. Get a reservation
 *  2. Add eSEL header
 *  3. Partially add data to the SEL
 *
 * Because a reservation is needed we need to ensure eSEL's are added
 * as a single transaction as concurrent/interleaved adds would cancel
 * the reservation. We guarantee this by always adding our messages to
 * the head of the transmission queue, blocking any other messages
 * being sent until we have completed sending this message.
 *
 * There is still a very small chance that we will accidentally
 * interleave a message if there is another one waiting at the head of
 * the ipmi queue and another cpu calls the ipmi poller before we
 * complete. However this should just cause a resevation cancelled
 * error which we have to deal with anyway (eg. because there may be a
 * SEL erase in progress) so it shouldn't cause any problems.
 */
static void ipmi_elog_poll(struct ipmi_msg *msg)
{
	static bool first = false;
	static char pel_buf[IPMI_MAX_PEL_SIZE];
	static size_t pel_size;
	static size_t esel_size;
	static int esel_index = 0;
	int pel_index;
	static unsigned int reservation_id = 0;
	static unsigned int record_id = 0;
	struct errorlog *elog_buf = (struct errorlog *) msg->user_data;
	size_t req_size;

	ipmi_init_esel_record();

	if (msg->cmd == IPMI_CMD(IPMI_RESERVE_SEL)) {
		first = true;
		reservation_id = msg->data[0];
		reservation_id |= msg->data[1] << 8;
		if (!reservation_id) {
			/* According to specification we should never
			 * get here, but just in case we do we cancel
			 * sending the message. */
			prerror("Invalid reservation id");
			opal_elog_complete(elog_buf, false);
			ipmi_sel_free_msg(msg);
			return;
		}

		pel_size = create_pel_log(elog_buf,
					  pel_buf, IPMI_MAX_PEL_SIZE);
		esel_size = pel_size + sizeof(struct sel_record);
		esel_index = 0;
		record_id = 0;
	} else {
		record_id = msg->data[0];
		record_id |= msg->data[1] << 8;
	}

	/* Start or continue the IPMI_PARTIAL_ADD_SEL */
	if (esel_index >= esel_size) {
		/* We're all done. Invalidate the resevation id to
		 * ensure we get an error if we cut in on another eSEL
		 * message. */
		reservation_id = 0;
		esel_index = 0;

		/* Log SEL event and free ipmi message */
		ipmi_log_sel_event(msg, elog_buf->event_severity, record_id);

		opal_elog_complete(elog_buf, true);
		return;
	}

	if ((esel_size - esel_index) <= (IPMI_MAX_REQ_SIZE - ESEL_HDR_SIZE)) {
		/* Last data to send */
		msg->data[6] = 1;
		req_size = esel_size - esel_index + ESEL_HDR_SIZE;
	} else {
		msg->data[6] = 0;
		req_size = IPMI_MAX_REQ_SIZE;
	}

	ipmi_init_msg(msg, IPMI_DEFAULT_INTERFACE, IPMI_PARTIAL_ADD_ESEL,
		      ipmi_elog_poll, elog_buf, req_size, 2);

	msg->data[0] = reservation_id & 0xff;
	msg->data[1] = (reservation_id >> 8) & 0xff;
	msg->data[2] = record_id & 0xff;
	msg->data[3] = (record_id >> 8) & 0xff;
	msg->data[4] = esel_index & 0xff;
	msg->data[5] = (esel_index >> 8) & 0xff;

	if (first) {
		first = false;
		memcpy(&msg->data[ESEL_HDR_SIZE],
		       &sel_record, sizeof(struct sel_record));
		esel_index = sizeof(struct sel_record);
		msg->req_size = esel_index + ESEL_HDR_SIZE;
	} else {
		pel_index = esel_index - sizeof(struct sel_record);
		memcpy(&msg->data[ESEL_HDR_SIZE],
		       &pel_buf[pel_index], msg->req_size - ESEL_HDR_SIZE);
		esel_index += msg->req_size - ESEL_HDR_SIZE;
	}

	ipmi_queue_msg_head(msg);
	return;
}

int ipmi_elog_commit(struct errorlog *elog_buf)
{
	struct ipmi_msg *msg;

	/* Only log events that needs attention */
	if (elog_buf->event_severity < OPAL_PREDICTIVE_ERR_FAULT_RECTIFY_REBOOT ||
	    elog_buf->elog_origin != ORG_SAPPHIRE) {
		prlog(PR_INFO, "dropping non severe PEL event\n");
		opal_elog_complete(elog_buf, true);
		return 0;
	}

	/* We pass a large request size in to mkmsg so that we have a
	 * large enough allocation to reuse the message to pass the
	 * PEL data via a series of partial add commands.  */
	msg = ipmi_sel_alloc_msg(elog_buf);
	if (!msg) {
		opal_elog_complete(elog_buf, false);
		return OPAL_RESOURCE;
	}
	msg->error = ipmi_elog_error;

	msg->req_size = 0;

	if (elog_buf->event_severity == OPAL_ERROR_PANIC)
		ipmi_queue_msg_sync(msg);
	else
		ipmi_queue_msg(msg);

	return 0;
}

#define ACCESS_DENIED	0x00
#define ACCESS_GRANTED	0x01

static void sel_pnor(uint8_t access)
{
	struct ipmi_msg *msg;
	uint8_t granted = ACCESS_GRANTED;

	switch (access) {
	case REQUEST_PNOR:
		prlog(PR_NOTICE, "PNOR access requested\n");
		granted = flash_reserve();
		if (granted)
			occ_pnor_set_owner(PNOR_OWNER_EXTERNAL);

		/* Ack the request */
		msg = ipmi_mkmsg_simple(IPMI_PNOR_ACCESS_STATUS, &granted, 1);
		ipmi_queue_msg(msg);
		break;
	case RELEASE_PNOR:
		prlog(PR_NOTICE, "PNOR access released\n");
		flash_release();
		occ_pnor_set_owner(PNOR_OWNER_HOST);
		break;
	default:
		/**
		 * @fwts-label InvalidPNORAccessRequest
		 * @fwts-advice In negotiating PNOR access with BMC, we
		 * got an odd/invalid request from the BMC. Likely a bug
		 * in OPAL/BMC interaction.
		 */
		prlog(PR_ERR, "invalid PNOR access requested: %02x\n",
		      access);
	}
}

static void sel_power(uint8_t power)
{
	switch (power) {
	case SOFT_OFF:
		prlog(PR_NOTICE, "Soft shutdown requested\n");
		if (!(debug_descriptor.state_flags & OPAL_BOOT_COMPLETE) &&
		    platform.cec_power_down) {
			prlog(PR_NOTICE, "Host not up, shutting down now\n");
			platform.cec_power_down(IPMI_CHASSIS_PWR_DOWN);
		} else {
			opal_queue_msg(OPAL_MSG_SHUTDOWN, NULL, NULL, SOFT_OFF);
		}
		break;
	case SOFT_REBOOT:
		prlog(PR_NOTICE, "Soft reboot requested\n");
		if (!(debug_descriptor.state_flags & OPAL_BOOT_COMPLETE) &&
		    platform.cec_reboot) {
			prlog(PR_NOTICE, "Host not up, rebooting now\n");
			platform.cec_reboot();
		} else {
			opal_queue_msg(OPAL_MSG_SHUTDOWN, NULL, NULL, SOFT_REBOOT);
		}
		break;
	default:
		prlog(PR_WARNING, "requested bad power state: %02x\n",
		      power);
	}
}

static uint32_t occ_sensor_id_to_chip(uint8_t sensor, uint32_t *chip)
{
	/* todo: lookup sensor ID node in the DT, and map to a chip id */
	(void)sensor;
	*chip = 0;
	return 0;
}

static void sel_occ_reset(uint8_t sensor)
{
	uint32_t chip;
	int rc;

	rc = occ_sensor_id_to_chip(sensor, &chip);
	if (rc) {
		/**
		 * @fwts-label: SELUnknownOCCReset
		 * @fwts-advice: Likely bug in what sent us the OCC reset.
		 */
		prlog(PR_ERR, "SEL message to reset an unknown OCC "
				"(sensor ID 0x%02x)\n", sensor);
		return;
	}

	prd_occ_reset(chip);
}

void ipmi_parse_sel(struct ipmi_msg *msg)
{
	struct oem_sel sel;

	assert(msg->resp_size <= 16);

	memcpy(&sel, msg->data, msg->resp_size);

	/* We do not process system event records */
	if (sel.type == SEL_RECORD_TYPE_EVENT) {
		prlog(PR_INFO, "dropping System Event Record SEL\n");
		return;
	}

	prlog(PR_DEBUG, "SEL received (%d bytes, netfn %d, cmd %d)\n",
			msg->resp_size, sel.netfun, sel.cmd);

	/* Only accept OEM SEL messages */
	if (sel.id[0] != SEL_OEM_ID_0 ||
	    sel.id[1] != SEL_OEM_ID_1 ||
	    sel.type != SEL_RECORD_TYPE_OEM) {
		prlog(PR_WARNING, "unknown SEL %02x%02x (type %02x)\n",
		      sel.id[0], sel.id[1], sel.type);
		return;
	}

	switch (sel.cmd) {
	case CMD_AMI_POWER:
		sel_power(sel.data[0]);
		break;
	case CMD_AMI_OCC_RESET:
		sel_occ_reset(sel.data[0]);
		break;
	case CMD_AMI_PNOR_ACCESS:
		sel_pnor(sel.data[0]);
		break;
	default:
		prlog(PR_WARNING,
		      "unknown OEM SEL command %02x received\n",
		      sel.cmd);
	}
}