linux/drivers/acpi/acpi_ipmi.c
Lv Zheng a1a69b297e ACPI / IPMI: Fix race caused by the unprotected ACPI IPMI user
This patch uses reference counting to fix the race caused by the
unprotected ACPI IPMI user.

There are two rules for using the ipmi_si APIs:
 1. In ipmi_si, ipmi_destroy_user() can ensure that no ipmi_recv_msg will
    be passed to ipmi_msg_handler(), but ipmi_request_settime() can not
    use an invalid ipmi_user_t.  This means the ipmi_si users must ensure
    that there won't be any local references on ipmi_user_t before invoking
    ipmi_destroy_user().
 2. In ipmi_si, the smi_gone()/new_smi() callbacks are protected by
    smi_watchers_mutex, so their execution is serialized.  But as a
    new smi can re-use a freed intf_num, it requires that the callback
    implementation must not use intf_num as an identification mean or it
    must ensure all references to the previous smi are all dropped before
    exiting smi_gone() callback.

As the acpi_ipmi_device->user_interface check in acpi_ipmi_space_handler()
can happen before setting user_interface to NULL and codes after the check
in acpi_ipmi_space_handler() can happen after user_interface becomes NULL,
the on-going acpi_ipmi_space_handler() still can pass an invalid
acpi_ipmi_device->user_interface to ipmi_request_settime().  Such race
conditions are not allowed by the IPMI layer's API design as a crash will
happen in ipmi_request_settime() if something like that happens.

This patch follows the ipmi_devintf.c design:
 1. Invoke ipmi_destroy_user() after the reference count of
    acpi_ipmi_device drops to 0.  References of acpi_ipmi_device dropping
    to 0 also means tx_msg related to this acpi_ipmi_device are all freed.
    This matches the IPMI layer's API calling rule on ipmi_destroy_user()
    and ipmi_request_settime().
 2. ipmi_flush_tx_msg() is performed so that no on-going tx_msg can still be
    running in acpi_ipmi_space_handler().  And it is invoked after invoking
    __ipmi_dev_kill() where acpi_ipmi_device is deleted from the list with a
    "dead" flag set, and the "dead" flag check is also introduced to the
    point where a tx_msg is going to be added to the tx_msg_list so that no
    new tx_msg can be created after returning from the __ipmi_dev_kill().
 3. The waiting codes in ipmi_flush_tx_msg() is deleted because it is not
    required since this patch ensures no acpi_ipmi reference is still held
    for ipmi_user_t before calling ipmi_destroy_user() and
    ipmi_destroy_user() can ensure no more ipmi_msg_handler() can happen
    after returning from ipmi_destroy_user().
 4. The flushing of tx_msg is also moved out of ipmi_lock in this patch.

The forthcoming IPMI operation region handler installation changes also
requires acpi_ipmi_device be handled in this style.

The header comment of the file is also updated due to this design change.

Signed-off-by: Lv Zheng <lv.zheng@intel.com>
Reviewed-by: Huang Ying <ying.huang@intel.com>
Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
2013-09-30 19:46:12 +02:00

615 lines
17 KiB
C

/*
* acpi_ipmi.c - ACPI IPMI opregion
*
* Copyright (C) 2010, 2013 Intel Corporation
* Author: Zhao Yakui <yakui.zhao@intel.com>
* Lv Zheng <lv.zheng@intel.com>
*
* ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 2 of the License, or (at
* your option) any later version.
*
* This program is distributed in the hope that it will be useful, but
* WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* General Public License for more details.
*
* You should have received a copy of the GNU General Public License along
* with this program; if not, write to the Free Software Foundation, Inc.,
* 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA.
*
* ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
*/
#include <linux/kernel.h>
#include <linux/module.h>
#include <linux/init.h>
#include <linux/types.h>
#include <linux/delay.h>
#include <linux/proc_fs.h>
#include <linux/seq_file.h>
#include <linux/interrupt.h>
#include <linux/list.h>
#include <linux/spinlock.h>
#include <linux/io.h>
#include <acpi/acpi_bus.h>
#include <acpi/acpi_drivers.h>
#include <linux/ipmi.h>
#include <linux/device.h>
#include <linux/pnp.h>
#include <linux/spinlock.h>
MODULE_AUTHOR("Zhao Yakui");
MODULE_DESCRIPTION("ACPI IPMI Opregion driver");
MODULE_LICENSE("GPL");
#define IPMI_FLAGS_HANDLER_INSTALL 0
#define ACPI_IPMI_OK 0
#define ACPI_IPMI_TIMEOUT 0x10
#define ACPI_IPMI_UNKNOWN 0x07
/* the IPMI timeout is 5s */
#define IPMI_TIMEOUT (5000)
#define ACPI_IPMI_MAX_MSG_LENGTH 64
struct acpi_ipmi_device {
/* the device list attached to driver_data.ipmi_devices */
struct list_head head;
/* the IPMI request message list */
struct list_head tx_msg_list;
spinlock_t tx_msg_lock;
acpi_handle handle;
struct pnp_dev *pnp_dev;
ipmi_user_t user_interface;
int ipmi_ifnum; /* IPMI interface number */
long curr_msgid;
unsigned long flags;
struct ipmi_smi_info smi_data;
bool dead;
struct kref kref;
};
struct ipmi_driver_data {
struct list_head ipmi_devices;
struct ipmi_smi_watcher bmc_events;
struct ipmi_user_hndl ipmi_hndlrs;
struct mutex ipmi_lock;
};
struct acpi_ipmi_msg {
struct list_head head;
/*
* General speaking the addr type should be SI_ADDR_TYPE. And
* the addr channel should be BMC.
* In fact it can also be IPMB type. But we will have to
* parse it from the Netfn command buffer. It is so complex
* that it is skipped.
*/
struct ipmi_addr addr;
long tx_msgid;
/* it is used to track whether the IPMI message is finished */
struct completion tx_complete;
struct kernel_ipmi_msg tx_message;
int msg_done;
/* tx/rx data . And copy it from/to ACPI object buffer */
u8 data[ACPI_IPMI_MAX_MSG_LENGTH];
u8 rx_len;
struct acpi_ipmi_device *device;
};
/* IPMI request/response buffer per ACPI 4.0, sec 5.5.2.4.3.2 */
struct acpi_ipmi_buffer {
u8 status;
u8 length;
u8 data[ACPI_IPMI_MAX_MSG_LENGTH];
};
static void ipmi_register_bmc(int iface, struct device *dev);
static void ipmi_bmc_gone(int iface);
static void ipmi_msg_handler(struct ipmi_recv_msg *msg, void *user_msg_data);
static int ipmi_install_space_handler(struct acpi_ipmi_device *ipmi);
static void ipmi_remove_space_handler(struct acpi_ipmi_device *ipmi);
static struct ipmi_driver_data driver_data = {
.ipmi_devices = LIST_HEAD_INIT(driver_data.ipmi_devices),
.bmc_events = {
.owner = THIS_MODULE,
.new_smi = ipmi_register_bmc,
.smi_gone = ipmi_bmc_gone,
},
.ipmi_hndlrs = {
.ipmi_recv_hndl = ipmi_msg_handler,
},
};
static struct acpi_ipmi_device *
ipmi_dev_alloc(int iface, struct ipmi_smi_info *smi_data, acpi_handle handle)
{
struct acpi_ipmi_device *ipmi_device;
int err;
ipmi_user_t user;
ipmi_device = kzalloc(sizeof(*ipmi_device), GFP_KERNEL);
if (!ipmi_device)
return NULL;
kref_init(&ipmi_device->kref);
INIT_LIST_HEAD(&ipmi_device->head);
INIT_LIST_HEAD(&ipmi_device->tx_msg_list);
spin_lock_init(&ipmi_device->tx_msg_lock);
ipmi_device->handle = handle;
ipmi_device->pnp_dev = to_pnp_dev(get_device(smi_data->dev));
memcpy(&ipmi_device->smi_data, smi_data, sizeof(struct ipmi_smi_info));
ipmi_device->ipmi_ifnum = iface;
err = ipmi_create_user(iface, &driver_data.ipmi_hndlrs,
ipmi_device, &user);
if (err) {
put_device(smi_data->dev);
kfree(ipmi_device);
return NULL;
}
ipmi_device->user_interface = user;
ipmi_install_space_handler(ipmi_device);
return ipmi_device;
}
static void ipmi_dev_release(struct acpi_ipmi_device *ipmi_device)
{
ipmi_remove_space_handler(ipmi_device);
ipmi_destroy_user(ipmi_device->user_interface);
put_device(ipmi_device->smi_data.dev);
kfree(ipmi_device);
}
static void ipmi_dev_release_kref(struct kref *kref)
{
struct acpi_ipmi_device *ipmi =
container_of(kref, struct acpi_ipmi_device, kref);
ipmi_dev_release(ipmi);
}
static void __ipmi_dev_kill(struct acpi_ipmi_device *ipmi_device)
{
list_del(&ipmi_device->head);
/*
* Always setting dead flag after deleting from the list or
* list_for_each_entry() codes must get changed.
*/
ipmi_device->dead = true;
}
static struct acpi_ipmi_device *acpi_ipmi_dev_get(int iface)
{
struct acpi_ipmi_device *temp, *ipmi_device = NULL;
mutex_lock(&driver_data.ipmi_lock);
list_for_each_entry(temp, &driver_data.ipmi_devices, head) {
if (temp->ipmi_ifnum == iface) {
ipmi_device = temp;
kref_get(&ipmi_device->kref);
break;
}
}
mutex_unlock(&driver_data.ipmi_lock);
return ipmi_device;
}
static void acpi_ipmi_dev_put(struct acpi_ipmi_device *ipmi_device)
{
kref_put(&ipmi_device->kref, ipmi_dev_release_kref);
}
static struct acpi_ipmi_msg *acpi_alloc_ipmi_msg(struct acpi_ipmi_device *ipmi)
{
struct acpi_ipmi_msg *ipmi_msg;
struct pnp_dev *pnp_dev = ipmi->pnp_dev;
ipmi_msg = kzalloc(sizeof(struct acpi_ipmi_msg), GFP_KERNEL);
if (!ipmi_msg) {
dev_warn(&pnp_dev->dev, "Can't allocate memory for ipmi_msg\n");
return NULL;
}
init_completion(&ipmi_msg->tx_complete);
INIT_LIST_HEAD(&ipmi_msg->head);
ipmi_msg->device = ipmi;
ipmi_msg->msg_done = ACPI_IPMI_UNKNOWN;
return ipmi_msg;
}
#define IPMI_OP_RGN_NETFN(offset) ((offset >> 8) & 0xff)
#define IPMI_OP_RGN_CMD(offset) (offset & 0xff)
static int acpi_format_ipmi_request(struct acpi_ipmi_msg *tx_msg,
acpi_physical_address address,
acpi_integer *value)
{
struct kernel_ipmi_msg *msg;
struct acpi_ipmi_buffer *buffer;
struct acpi_ipmi_device *device;
unsigned long flags;
msg = &tx_msg->tx_message;
/*
* IPMI network function and command are encoded in the address
* within the IPMI OpRegion; see ACPI 4.0, sec 5.5.2.4.3.
*/
msg->netfn = IPMI_OP_RGN_NETFN(address);
msg->cmd = IPMI_OP_RGN_CMD(address);
msg->data = tx_msg->data;
/*
* value is the parameter passed by the IPMI opregion space handler.
* It points to the IPMI request message buffer
*/
buffer = (struct acpi_ipmi_buffer *)value;
/* copy the tx message data */
if (buffer->length > ACPI_IPMI_MAX_MSG_LENGTH) {
dev_WARN_ONCE(&tx_msg->device->pnp_dev->dev, true,
"Unexpected request (msg len %d).\n",
buffer->length);
return -EINVAL;
}
msg->data_len = buffer->length;
memcpy(tx_msg->data, buffer->data, msg->data_len);
/*
* now the default type is SYSTEM_INTERFACE and channel type is BMC.
* If the netfn is APP_REQUEST and the cmd is SEND_MESSAGE,
* the addr type should be changed to IPMB. Then we will have to parse
* the IPMI request message buffer to get the IPMB address.
* If so, please fix me.
*/
tx_msg->addr.addr_type = IPMI_SYSTEM_INTERFACE_ADDR_TYPE;
tx_msg->addr.channel = IPMI_BMC_CHANNEL;
tx_msg->addr.data[0] = 0;
/* Get the msgid */
device = tx_msg->device;
spin_lock_irqsave(&device->tx_msg_lock, flags);
device->curr_msgid++;
tx_msg->tx_msgid = device->curr_msgid;
spin_unlock_irqrestore(&device->tx_msg_lock, flags);
return 0;
}
static void acpi_format_ipmi_response(struct acpi_ipmi_msg *msg,
acpi_integer *value)
{
struct acpi_ipmi_buffer *buffer;
/*
* value is also used as output parameter. It represents the response
* IPMI message returned by IPMI command.
*/
buffer = (struct acpi_ipmi_buffer *)value;
/*
* If the flag of msg_done is not set, it means that the IPMI command is
* not executed correctly.
*/
buffer->status = msg->msg_done;
if (msg->msg_done != ACPI_IPMI_OK)
return;
/*
* If the IPMI response message is obtained correctly, the status code
* will be ACPI_IPMI_OK
*/
buffer->length = msg->rx_len;
memcpy(buffer->data, msg->data, msg->rx_len);
}
static void ipmi_flush_tx_msg(struct acpi_ipmi_device *ipmi)
{
struct acpi_ipmi_msg *tx_msg, *temp;
unsigned long flags;
/*
* NOTE: On-going ipmi_recv_msg
* ipmi_msg_handler() may still be invoked by ipmi_si after
* flushing. But it is safe to do a fast flushing on module_exit()
* without waiting for all ipmi_recv_msg(s) to complete from
* ipmi_msg_handler() as it is ensured by ipmi_si that all
* ipmi_recv_msg(s) are freed after invoking ipmi_destroy_user().
*/
spin_lock_irqsave(&ipmi->tx_msg_lock, flags);
list_for_each_entry_safe(tx_msg, temp, &ipmi->tx_msg_list, head) {
/* wake up the sleep thread on the Tx msg */
complete(&tx_msg->tx_complete);
}
spin_unlock_irqrestore(&ipmi->tx_msg_lock, flags);
}
static void ipmi_msg_handler(struct ipmi_recv_msg *msg, void *user_msg_data)
{
struct acpi_ipmi_device *ipmi_device = user_msg_data;
int msg_found = 0;
struct acpi_ipmi_msg *tx_msg;
struct pnp_dev *pnp_dev = ipmi_device->pnp_dev;
unsigned long flags;
if (msg->user != ipmi_device->user_interface) {
dev_warn(&pnp_dev->dev, "Unexpected response is returned. "
"returned user %p, expected user %p\n",
msg->user, ipmi_device->user_interface);
goto out_msg;
}
spin_lock_irqsave(&ipmi_device->tx_msg_lock, flags);
list_for_each_entry(tx_msg, &ipmi_device->tx_msg_list, head) {
if (msg->msgid == tx_msg->tx_msgid) {
msg_found = 1;
break;
}
}
if (!msg_found) {
dev_warn(&pnp_dev->dev, "Unexpected response (msg id %ld) is "
"returned.\n", msg->msgid);
goto out_lock;
}
/* copy the response data to Rx_data buffer */
if (msg->msg.data_len > ACPI_IPMI_MAX_MSG_LENGTH) {
dev_WARN_ONCE(&pnp_dev->dev, true,
"Unexpected response (msg len %d).\n",
msg->msg.data_len);
goto out_comp;
}
/* response msg is an error msg */
msg->recv_type = IPMI_RESPONSE_RECV_TYPE;
if (msg->recv_type == IPMI_RESPONSE_RECV_TYPE &&
msg->msg.data_len == 1) {
if (msg->msg.data[0] == IPMI_TIMEOUT_COMPLETION_CODE) {
dev_WARN_ONCE(&pnp_dev->dev, true,
"Unexpected response (timeout).\n");
tx_msg->msg_done = ACPI_IPMI_TIMEOUT;
}
goto out_comp;
}
tx_msg->rx_len = msg->msg.data_len;
memcpy(tx_msg->data, msg->msg.data, tx_msg->rx_len);
tx_msg->msg_done = ACPI_IPMI_OK;
out_comp:
complete(&tx_msg->tx_complete);
out_lock:
spin_unlock_irqrestore(&ipmi_device->tx_msg_lock, flags);
out_msg:
ipmi_free_recv_msg(msg);
};
static void ipmi_register_bmc(int iface, struct device *dev)
{
struct acpi_ipmi_device *ipmi_device, *temp;
struct pnp_dev *pnp_dev;
int err;
struct ipmi_smi_info smi_data;
acpi_handle handle;
err = ipmi_get_smi_info(iface, &smi_data);
if (err)
return;
if (smi_data.addr_src != SI_ACPI)
goto err_ref;
handle = smi_data.addr_info.acpi_info.acpi_handle;
if (!handle)
goto err_ref;
pnp_dev = to_pnp_dev(smi_data.dev);
ipmi_device = ipmi_dev_alloc(iface, &smi_data, handle);
if (!ipmi_device) {
dev_warn(&pnp_dev->dev, "Can't create IPMI user interface\n");
goto err_ref;
}
mutex_lock(&driver_data.ipmi_lock);
list_for_each_entry(temp, &driver_data.ipmi_devices, head) {
/*
* if the corresponding ACPI handle is already added
* to the device list, don't add it again.
*/
if (temp->handle == handle)
goto err_lock;
}
list_add_tail(&ipmi_device->head, &driver_data.ipmi_devices);
mutex_unlock(&driver_data.ipmi_lock);
put_device(smi_data.dev);
return;
err_lock:
mutex_unlock(&driver_data.ipmi_lock);
ipmi_dev_release(ipmi_device);
err_ref:
put_device(smi_data.dev);
return;
}
static void ipmi_bmc_gone(int iface)
{
struct acpi_ipmi_device *ipmi_device, *temp;
bool dev_found = false;
mutex_lock(&driver_data.ipmi_lock);
list_for_each_entry_safe(ipmi_device, temp,
&driver_data.ipmi_devices, head) {
if (ipmi_device->ipmi_ifnum != iface) {
dev_found = true;
__ipmi_dev_kill(ipmi_device);
break;
}
}
mutex_unlock(&driver_data.ipmi_lock);
if (dev_found) {
ipmi_flush_tx_msg(ipmi_device);
acpi_ipmi_dev_put(ipmi_device);
}
}
/* --------------------------------------------------------------------------
* Address Space Management
* -------------------------------------------------------------------------- */
/*
* This is the IPMI opregion space handler.
* @function: indicates the read/write. In fact as the IPMI message is driven
* by command, only write is meaningful.
* @address: This contains the netfn/command of IPMI request message.
* @bits : not used.
* @value : it is an in/out parameter. It points to the IPMI message buffer.
* Before the IPMI message is sent, it represents the actual request
* IPMI message. After the IPMI message is finished, it represents
* the response IPMI message returned by IPMI command.
* @handler_context: IPMI device context.
*/
static acpi_status
acpi_ipmi_space_handler(u32 function, acpi_physical_address address,
u32 bits, acpi_integer *value,
void *handler_context, void *region_context)
{
struct acpi_ipmi_msg *tx_msg;
int iface = (long)handler_context;
struct acpi_ipmi_device *ipmi_device;
int err;
acpi_status status;
unsigned long flags;
/*
* IPMI opregion message.
* IPMI message is firstly written to the BMC and system software
* can get the respsonse. So it is unmeaningful for the read access
* of IPMI opregion.
*/
if ((function & ACPI_IO_MASK) == ACPI_READ)
return AE_TYPE;
ipmi_device = acpi_ipmi_dev_get(iface);
if (!ipmi_device)
return AE_NOT_EXIST;
tx_msg = acpi_alloc_ipmi_msg(ipmi_device);
if (!tx_msg) {
status = AE_NO_MEMORY;
goto out_ref;
}
if (acpi_format_ipmi_request(tx_msg, address, value) != 0) {
status = AE_TYPE;
goto out_msg;
}
mutex_lock(&driver_data.ipmi_lock);
/* Do not add a tx_msg that can not be flushed. */
if (ipmi_device->dead) {
status = AE_NOT_EXIST;
mutex_unlock(&driver_data.ipmi_lock);
goto out_msg;
}
spin_lock_irqsave(&ipmi_device->tx_msg_lock, flags);
list_add_tail(&tx_msg->head, &ipmi_device->tx_msg_list);
spin_unlock_irqrestore(&ipmi_device->tx_msg_lock, flags);
mutex_unlock(&driver_data.ipmi_lock);
err = ipmi_request_settime(ipmi_device->user_interface,
&tx_msg->addr,
tx_msg->tx_msgid,
&tx_msg->tx_message,
NULL, 0, 0, IPMI_TIMEOUT);
if (err) {
status = AE_ERROR;
goto out_list;
}
wait_for_completion(&tx_msg->tx_complete);
acpi_format_ipmi_response(tx_msg, value);
status = AE_OK;
out_list:
spin_lock_irqsave(&ipmi_device->tx_msg_lock, flags);
list_del(&tx_msg->head);
spin_unlock_irqrestore(&ipmi_device->tx_msg_lock, flags);
out_msg:
kfree(tx_msg);
out_ref:
acpi_ipmi_dev_put(ipmi_device);
return status;
}
static void ipmi_remove_space_handler(struct acpi_ipmi_device *ipmi)
{
if (!test_bit(IPMI_FLAGS_HANDLER_INSTALL, &ipmi->flags))
return;
acpi_remove_address_space_handler(ipmi->handle,
ACPI_ADR_SPACE_IPMI, &acpi_ipmi_space_handler);
clear_bit(IPMI_FLAGS_HANDLER_INSTALL, &ipmi->flags);
}
static int ipmi_install_space_handler(struct acpi_ipmi_device *ipmi)
{
acpi_status status;
if (test_bit(IPMI_FLAGS_HANDLER_INSTALL, &ipmi->flags))
return 0;
status = acpi_install_address_space_handler(ipmi->handle,
ACPI_ADR_SPACE_IPMI, &acpi_ipmi_space_handler,
NULL, (void *)((long)ipmi->ipmi_ifnum));
if (ACPI_FAILURE(status)) {
struct pnp_dev *pnp_dev = ipmi->pnp_dev;
dev_warn(&pnp_dev->dev, "Can't register IPMI opregion space "
"handle\n");
return -EINVAL;
}
set_bit(IPMI_FLAGS_HANDLER_INSTALL, &ipmi->flags);
return 0;
}
static int __init acpi_ipmi_init(void)
{
int result = 0;
if (acpi_disabled)
return result;
mutex_init(&driver_data.ipmi_lock);
result = ipmi_smi_watcher_register(&driver_data.bmc_events);
return result;
}
static void __exit acpi_ipmi_exit(void)
{
struct acpi_ipmi_device *ipmi_device;
if (acpi_disabled)
return;
ipmi_smi_watcher_unregister(&driver_data.bmc_events);
/*
* When one smi_watcher is unregistered, it is only deleted
* from the smi_watcher list. But the smi_gone callback function
* is not called. So explicitly uninstall the ACPI IPMI oregion
* handler and free it.
*/
mutex_lock(&driver_data.ipmi_lock);
while (!list_empty(&driver_data.ipmi_devices)) {
ipmi_device = list_first_entry(&driver_data.ipmi_devices,
struct acpi_ipmi_device,
head);
__ipmi_dev_kill(ipmi_device);
mutex_unlock(&driver_data.ipmi_lock);
ipmi_flush_tx_msg(ipmi_device);
acpi_ipmi_dev_put(ipmi_device);
mutex_lock(&driver_data.ipmi_lock);
}
mutex_unlock(&driver_data.ipmi_lock);
}
module_init(acpi_ipmi_init);
module_exit(acpi_ipmi_exit);