linux/drivers/mtd/ubi/ubi.h
Artem Bityutskiy 43f9b25a9c UBI: bugfix: protect from volume removal
When the WL worker is moving an LEB, the volume might go away
occasionally. UBI does not handle these situations correctly.

This patch introduces a new mutex which serializes wear-levelling
worker and the the 'ubi_wl_put_peb()' function. Now, if one puts
an LEB, and its PEB is being moved, it will wait on the mutex.
And because we unmap all LEBs when removing volumes, this will make
the volume remove function to wait while the LEB movement
finishes.

Below is an example of an oops which should be fixed by this patch:

Pid: 9167, comm: io_paral Not tainted (2.6.24-rc5-ubi-2.6.git #2)
EIP: 0060:[<f884a379>] EFLAGS: 00010246 CPU: 0
EIP is at prot_tree_del+0x2a/0x63 [ubi]
EAX: f39a90e0 EBX: 00000000 ECX: 00000000 EDX: 00000134
ESI: f39a90e0 EDI: f39a90e0 EBP: f2d55ddc ESP: f2d55dd4
 DS: 007b ES: 007b FS: 00d8 GS: 0033 SS: 0068
Process io_paral (pid: 9167, ti=f2d54000 task=f72a8030 task.ti=f2d54000)
Stack: f39a95f8 ef6aae50 f2d55e08 f884a511 f88538e1 f884ecea 00000134 00000000
       f39a9604 f39a95f0 efea8280 00000000 f39a90e0 f2d55e40 f8847261 f8850c3c
       f884eaad 00000001 000000b9 00000134 00000172 000000b9 00000134 00000001
Call Trace:
 [<c0105227>] show_trace_log_lvl+0x1a/0x30
 [<c01052e2>] show_stack_log_lvl+0xa5/0xca
 [<c01053d6>] show_registers+0xcf/0x21b
 [<c0105648>] die+0x126/0x224
 [<c0119a62>] do_page_fault+0x27f/0x60d
 [<c037dd62>] error_code+0x72/0x78
 [<f884a511>] ubi_wl_put_peb+0xf0/0x191 [ubi]
 [<f8847261>] ubi_eba_unmap_leb+0xaf/0xcc [ubi]
 [<f8843c21>] ubi_remove_volume+0x102/0x1e8 [ubi]
 [<f8846077>] ubi_cdev_ioctl+0x22a/0x383 [ubi]
 [<c017d768>] do_ioctl+0x68/0x71
 [<c017d7c6>] vfs_ioctl+0x55/0x271
 [<c017da15>] sys_ioctl+0x33/0x52
 [<c0104152>] sysenter_past_esp+0x5f/0xa5
 =======================

Signed-off-by: Artem Bityutskiy <Artem.Bityutskiy@nokia.com>
2007-12-26 19:15:16 +02:00

597 lines
19 KiB
C

/*
* Copyright (c) International Business Machines Corp., 2006
* Copyright (c) Nokia Corporation, 2006, 2007
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 2 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See
* the GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program; if not, write to the Free Software
* Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
*
* Author: Artem Bityutskiy (Битюцкий Артём)
*/
#ifndef __UBI_UBI_H__
#define __UBI_UBI_H__
#include <linux/init.h>
#include <linux/types.h>
#include <linux/list.h>
#include <linux/rbtree.h>
#include <linux/sched.h>
#include <linux/wait.h>
#include <linux/mutex.h>
#include <linux/rwsem.h>
#include <linux/spinlock.h>
#include <linux/fs.h>
#include <linux/cdev.h>
#include <linux/device.h>
#include <linux/string.h>
#include <linux/vmalloc.h>
#include <linux/mtd/mtd.h>
#include <mtd/ubi-header.h>
#include <linux/mtd/ubi.h>
#include "scan.h"
#include "debug.h"
/* Maximum number of supported UBI devices */
#define UBI_MAX_DEVICES 32
/* UBI name used for character devices, sysfs, etc */
#define UBI_NAME_STR "ubi"
/* Normal UBI messages */
#define ubi_msg(fmt, ...) printk(KERN_NOTICE "UBI: " fmt "\n", ##__VA_ARGS__)
/* UBI warning messages */
#define ubi_warn(fmt, ...) printk(KERN_WARNING "UBI warning: %s: " fmt "\n", \
__FUNCTION__, ##__VA_ARGS__)
/* UBI error messages */
#define ubi_err(fmt, ...) printk(KERN_ERR "UBI error: %s: " fmt "\n", \
__FUNCTION__, ##__VA_ARGS__)
/* Lowest number PEBs reserved for bad PEB handling */
#define MIN_RESEVED_PEBS 2
/* Background thread name pattern */
#define UBI_BGT_NAME_PATTERN "ubi_bgt%dd"
/* This marker in the EBA table means that the LEB is um-mapped */
#define UBI_LEB_UNMAPPED -1
/*
* In case of errors, UBI tries to repeat the operation several times before
* returning error. The below constant defines how many times UBI re-tries.
*/
#define UBI_IO_RETRIES 3
/*
* Error codes returned by the I/O unit.
*
* UBI_IO_PEB_EMPTY: the physical eraseblock is empty, i.e. it contains only
* 0xFF bytes
* UBI_IO_PEB_FREE: the physical eraseblock is free, i.e. it contains only a
* valid erase counter header, and the rest are %0xFF bytes
* UBI_IO_BAD_EC_HDR: the erase counter header is corrupted (bad magic or CRC)
* UBI_IO_BAD_VID_HDR: the volume identifier header is corrupted (bad magic or
* CRC)
* UBI_IO_BITFLIPS: bit-flips were detected and corrected
*/
enum {
UBI_IO_PEB_EMPTY = 1,
UBI_IO_PEB_FREE,
UBI_IO_BAD_EC_HDR,
UBI_IO_BAD_VID_HDR,
UBI_IO_BITFLIPS
};
/**
* struct ubi_wl_entry - wear-leveling entry.
* @rb: link in the corresponding RB-tree
* @ec: erase counter
* @pnum: physical eraseblock number
*
* This data structure is used in the WL unit. Each physical eraseblock has a
* corresponding &struct wl_entry object which may be kept in different
* RB-trees. See WL unit for details.
*/
struct ubi_wl_entry {
struct rb_node rb;
int ec;
int pnum;
};
/**
* struct ubi_ltree_entry - an entry in the lock tree.
* @rb: links RB-tree nodes
* @vol_id: volume ID of the locked logical eraseblock
* @lnum: locked logical eraseblock number
* @users: how many tasks are using this logical eraseblock or wait for it
* @mutex: read/write mutex to implement read/write access serialization to
* the (@vol_id, @lnum) logical eraseblock
*
* This data structure is used in the EBA unit to implement per-LEB locking.
* When a logical eraseblock is being locked - corresponding
* &struct ubi_ltree_entry object is inserted to the lock tree (@ubi->ltree).
* See EBA unit for details.
*/
struct ubi_ltree_entry {
struct rb_node rb;
int vol_id;
int lnum;
int users;
struct rw_semaphore mutex;
};
struct ubi_volume_desc;
/**
* struct ubi_volume - UBI volume description data structure.
* @dev: device object to make use of the the Linux device model
* @cdev: character device object to create character device
* @ubi: reference to the UBI device description object
* @vol_id: volume ID
* @ref_count: volume reference count
* @readers: number of users holding this volume in read-only mode
* @writers: number of users holding this volume in read-write mode
* @exclusive: whether somebody holds this volume in exclusive mode
* @checked: if this static volume was checked
*
* @reserved_pebs: how many physical eraseblocks are reserved for this volume
* @vol_type: volume type (%UBI_DYNAMIC_VOLUME or %UBI_STATIC_VOLUME)
* @usable_leb_size: logical eraseblock size without padding
* @used_ebs: how many logical eraseblocks in this volume contain data
* @last_eb_bytes: how many bytes are stored in the last logical eraseblock
* @used_bytes: how many bytes of data this volume contains
* @upd_marker: non-zero if the update marker is set for this volume
* @corrupted: non-zero if the volume is corrupted (static volumes only)
* @alignment: volume alignment
* @data_pad: how many bytes are not used at the end of physical eraseblocks to
* satisfy the requested alignment
* @name_len: volume name length
* @name: volume name
*
* @updating: whether the volume is being updated
* @upd_ebs: how many eraseblocks are expected to be updated
* @upd_bytes: how many bytes are expected to be received
* @upd_received: how many update bytes were already received
* @upd_buf: update buffer which is used to collect update data
*
* @eba_tbl: EBA table of this volume (LEB->PEB mapping)
*
* @gluebi_desc: gluebi UBI volume descriptor
* @gluebi_refcount: reference count of the gluebi MTD device
* @gluebi_mtd: MTD device description object of the gluebi MTD device
*
* The @corrupted field indicates that the volume's contents is corrupted.
* Since UBI protects only static volumes, this field is not relevant to
* dynamic volumes - it is user's responsibility to assure their data
* integrity.
*
* The @upd_marker flag indicates that this volume is either being updated at
* the moment or is damaged because of an unclean reboot.
*/
struct ubi_volume {
struct device dev;
struct cdev cdev;
struct ubi_device *ubi;
int vol_id;
int ref_count;
int readers;
int writers;
int exclusive;
int checked;
int reserved_pebs;
int vol_type;
int usable_leb_size;
int used_ebs;
int last_eb_bytes;
long long used_bytes;
int upd_marker;
int corrupted;
int alignment;
int data_pad;
int name_len;
char name[UBI_VOL_NAME_MAX+1];
int updating;
int upd_ebs;
long long upd_bytes;
long long upd_received;
void *upd_buf;
int *eba_tbl;
#ifdef CONFIG_MTD_UBI_GLUEBI
/* Gluebi-related stuff may be compiled out */
struct ubi_volume_desc *gluebi_desc;
int gluebi_refcount;
struct mtd_info gluebi_mtd;
#endif
};
/**
* struct ubi_volume_desc - descriptor of the UBI volume returned when it is
* opened.
* @vol: reference to the corresponding volume description object
* @mode: open mode (%UBI_READONLY, %UBI_READWRITE, or %UBI_EXCLUSIVE)
*/
struct ubi_volume_desc {
struct ubi_volume *vol;
int mode;
};
struct ubi_wl_entry;
/**
* struct ubi_device - UBI device description structure
* @dev: class device object to use the the Linux device model
* @cdev: character device object to create character device
* @ubi_num: UBI device number
* @ubi_name: UBI device name
* @vol_count: number of volumes in this UBI device
* @volumes: volumes of this UBI device
* @volumes_lock: protects @volumes, @rsvd_pebs, @avail_pebs, beb_rsvd_pebs,
* @beb_rsvd_level, @bad_peb_count, @good_peb_count, @vol_count,
* @vol->readers, @vol->writers, @vol->exclusive,
* @vol->ref_count, @vol->mapping and @vol->eba_tbl.
*
* @rsvd_pebs: count of reserved physical eraseblocks
* @avail_pebs: count of available physical eraseblocks
* @beb_rsvd_pebs: how many physical eraseblocks are reserved for bad PEB
* handling
* @beb_rsvd_level: normal level of PEBs reserved for bad PEB handling
*
* @vtbl_slots: how many slots are available in the volume table
* @vtbl_size: size of the volume table in bytes
* @vtbl: in-RAM volume table copy
* @volumes_mutex: protects on-flash volume table and serializes volume
* changes, like creation, deletion, update, resize
*
* @max_ec: current highest erase counter value
* @mean_ec: current mean erase counter value
*
* @global_sqnum: global sequence number
* @ltree_lock: protects the lock tree and @global_sqnum
* @ltree: the lock tree
* @alc_mutex: serializes "atomic LEB change" operations
*
* @used: RB-tree of used physical eraseblocks
* @free: RB-tree of free physical eraseblocks
* @scrub: RB-tree of physical eraseblocks which need scrubbing
* @prot: protection trees
* @prot.pnum: protection tree indexed by physical eraseblock numbers
* @prot.aec: protection tree indexed by absolute erase counter value
* @wl_lock: protects the @used, @free, @prot, @lookuptbl, @abs_ec, @move_from,
* @move_to, @move_to_put @erase_pending, @wl_scheduled, and @works
* fields
* @move_mutex: serializes eraseblock moves
* @wl_scheduled: non-zero if the wear-leveling was scheduled
* @lookuptbl: a table to quickly find a &struct ubi_wl_entry object for any
* physical eraseblock
* @abs_ec: absolute erase counter
* @move_from: physical eraseblock from where the data is being moved
* @move_to: physical eraseblock where the data is being moved to
* @move_to_put: if the "to" PEB was put
* @works: list of pending works
* @works_count: count of pending works
* @bgt_thread: background thread description object
* @thread_enabled: if the background thread is enabled
* @bgt_name: background thread name
*
* @flash_size: underlying MTD device size (in bytes)
* @peb_count: count of physical eraseblocks on the MTD device
* @peb_size: physical eraseblock size
* @bad_peb_count: count of bad physical eraseblocks
* @good_peb_count: count of good physical eraseblocks
* @min_io_size: minimal input/output unit size of the underlying MTD device
* @hdrs_min_io_size: minimal I/O unit size used for VID and EC headers
* @ro_mode: if the UBI device is in read-only mode
* @leb_size: logical eraseblock size
* @leb_start: starting offset of logical eraseblocks within physical
* eraseblocks
* @ec_hdr_alsize: size of the EC header aligned to @hdrs_min_io_size
* @vid_hdr_alsize: size of the VID header aligned to @hdrs_min_io_size
* @vid_hdr_offset: starting offset of the volume identifier header (might be
* unaligned)
* @vid_hdr_aloffset: starting offset of the VID header aligned to
* @hdrs_min_io_size
* @vid_hdr_shift: contains @vid_hdr_offset - @vid_hdr_aloffset
* @bad_allowed: whether the MTD device admits of bad physical eraseblocks or
* not
* @mtd: MTD device descriptor
*
* @peb_buf1: a buffer of PEB size used for different purposes
* @peb_buf2: another buffer of PEB size used for different purposes
* @buf_mutex: proptects @peb_buf1 and @peb_buf2
* @dbg_peb_buf: buffer of PEB size used for debugging
* @dbg_buf_mutex: proptects @dbg_peb_buf
*/
struct ubi_device {
struct cdev cdev;
struct device dev;
int ubi_num;
char ubi_name[sizeof(UBI_NAME_STR)+5];
int vol_count;
struct ubi_volume *volumes[UBI_MAX_VOLUMES+UBI_INT_VOL_COUNT];
spinlock_t volumes_lock;
int rsvd_pebs;
int avail_pebs;
int beb_rsvd_pebs;
int beb_rsvd_level;
int vtbl_slots;
int vtbl_size;
struct ubi_vtbl_record *vtbl;
struct mutex volumes_mutex;
int max_ec;
int mean_ec;
/* EBA unit's stuff */
unsigned long long global_sqnum;
spinlock_t ltree_lock;
struct rb_root ltree;
struct mutex alc_mutex;
/* Wear-leveling unit's stuff */
struct rb_root used;
struct rb_root free;
struct rb_root scrub;
struct {
struct rb_root pnum;
struct rb_root aec;
} prot;
spinlock_t wl_lock;
struct mutex move_mutex;
int wl_scheduled;
struct ubi_wl_entry **lookuptbl;
unsigned long long abs_ec;
struct ubi_wl_entry *move_from;
struct ubi_wl_entry *move_to;
int move_to_put;
struct list_head works;
int works_count;
struct task_struct *bgt_thread;
int thread_enabled;
char bgt_name[sizeof(UBI_BGT_NAME_PATTERN)+2];
/* I/O unit's stuff */
long long flash_size;
int peb_count;
int peb_size;
int bad_peb_count;
int good_peb_count;
int min_io_size;
int hdrs_min_io_size;
int ro_mode;
int leb_size;
int leb_start;
int ec_hdr_alsize;
int vid_hdr_alsize;
int vid_hdr_offset;
int vid_hdr_aloffset;
int vid_hdr_shift;
int bad_allowed;
struct mtd_info *mtd;
void *peb_buf1;
void *peb_buf2;
struct mutex buf_mutex;
#ifdef CONFIG_MTD_UBI_DEBUG
void *dbg_peb_buf;
struct mutex dbg_buf_mutex;
#endif
};
extern struct kmem_cache *ubi_ltree_slab;
extern struct kmem_cache *ubi_wl_entry_slab;
extern struct file_operations ubi_cdev_operations;
extern struct file_operations ubi_vol_cdev_operations;
extern struct ubi_device *ubi_devices[];
extern struct class *ubi_class;
/* vtbl.c */
int ubi_change_vtbl_record(struct ubi_device *ubi, int idx,
struct ubi_vtbl_record *vtbl_rec);
int ubi_read_volume_table(struct ubi_device *ubi, struct ubi_scan_info *si);
/* vmt.c */
int ubi_create_volume(struct ubi_device *ubi, struct ubi_mkvol_req *req);
int ubi_remove_volume(struct ubi_volume_desc *desc);
int ubi_resize_volume(struct ubi_volume_desc *desc, int reserved_pebs);
int ubi_add_volume(struct ubi_device *ubi, struct ubi_volume *vol);
void ubi_free_volume(struct ubi_device *ubi, struct ubi_volume *vol);
/* upd.c */
int ubi_start_update(struct ubi_device *ubi, int vol_id, long long bytes);
int ubi_more_update_data(struct ubi_device *ubi, int vol_id,
const void __user *buf, int count);
/* misc.c */
int ubi_calc_data_len(const struct ubi_device *ubi, const void *buf, int length);
int ubi_check_volume(struct ubi_device *ubi, int vol_id);
void ubi_calculate_reserved(struct ubi_device *ubi);
/* gluebi.c */
#ifdef CONFIG_MTD_UBI_GLUEBI
int ubi_create_gluebi(struct ubi_device *ubi, struct ubi_volume *vol);
int ubi_destroy_gluebi(struct ubi_volume *vol);
void ubi_gluebi_updated(struct ubi_volume *vol);
#else
#define ubi_create_gluebi(ubi, vol) 0
#define ubi_destroy_gluebi(vol) 0
#define ubi_gluebi_updated(vol)
#endif
/* eba.c */
int ubi_eba_unmap_leb(struct ubi_device *ubi, struct ubi_volume *vol,
int lnum);
int ubi_eba_read_leb(struct ubi_device *ubi, struct ubi_volume *vol, int lnum,
void *buf, int offset, int len, int check);
int ubi_eba_write_leb(struct ubi_device *ubi, struct ubi_volume *vol, int lnum,
const void *buf, int offset, int len, int dtype);
int ubi_eba_write_leb_st(struct ubi_device *ubi, struct ubi_volume *vol,
int lnum, const void *buf, int len, int dtype,
int used_ebs);
int ubi_eba_atomic_leb_change(struct ubi_device *ubi, struct ubi_volume *vol,
int lnum, const void *buf, int len, int dtype);
int ubi_eba_copy_leb(struct ubi_device *ubi, int from, int to,
struct ubi_vid_hdr *vid_hdr);
int ubi_eba_init_scan(struct ubi_device *ubi, struct ubi_scan_info *si);
void ubi_eba_close(const struct ubi_device *ubi);
/* wl.c */
int ubi_wl_get_peb(struct ubi_device *ubi, int dtype);
int ubi_wl_put_peb(struct ubi_device *ubi, int pnum, int torture);
int ubi_wl_flush(struct ubi_device *ubi);
int ubi_wl_scrub_peb(struct ubi_device *ubi, int pnum);
int ubi_wl_init_scan(struct ubi_device *ubi, struct ubi_scan_info *si);
void ubi_wl_close(struct ubi_device *ubi);
/* io.c */
int ubi_io_read(const struct ubi_device *ubi, void *buf, int pnum, int offset,
int len);
int ubi_io_write(struct ubi_device *ubi, const void *buf, int pnum, int offset,
int len);
int ubi_io_sync_erase(struct ubi_device *ubi, int pnum, int torture);
int ubi_io_is_bad(const struct ubi_device *ubi, int pnum);
int ubi_io_mark_bad(const struct ubi_device *ubi, int pnum);
int ubi_io_read_ec_hdr(struct ubi_device *ubi, int pnum,
struct ubi_ec_hdr *ec_hdr, int verbose);
int ubi_io_write_ec_hdr(struct ubi_device *ubi, int pnum,
struct ubi_ec_hdr *ec_hdr);
int ubi_io_read_vid_hdr(struct ubi_device *ubi, int pnum,
struct ubi_vid_hdr *vid_hdr, int verbose);
int ubi_io_write_vid_hdr(struct ubi_device *ubi, int pnum,
struct ubi_vid_hdr *vid_hdr);
/*
* ubi_rb_for_each_entry - walk an RB-tree.
* @rb: a pointer to type 'struct rb_node' to to use as a loop counter
* @pos: a pointer to RB-tree entry type to use as a loop counter
* @root: RB-tree's root
* @member: the name of the 'struct rb_node' within the RB-tree entry
*/
#define ubi_rb_for_each_entry(rb, pos, root, member) \
for (rb = rb_first(root), \
pos = (rb ? container_of(rb, typeof(*pos), member) : NULL); \
rb; \
rb = rb_next(rb), pos = container_of(rb, typeof(*pos), member))
/**
* ubi_zalloc_vid_hdr - allocate a volume identifier header object.
* @ubi: UBI device description object
* @gfp_flags: GFP flags to allocate with
*
* This function returns a pointer to the newly allocated and zero-filled
* volume identifier header object in case of success and %NULL in case of
* failure.
*/
static inline struct ubi_vid_hdr *
ubi_zalloc_vid_hdr(const struct ubi_device *ubi, gfp_t gfp_flags)
{
void *vid_hdr;
vid_hdr = kzalloc(ubi->vid_hdr_alsize, gfp_flags);
if (!vid_hdr)
return NULL;
/*
* VID headers may be stored at un-aligned flash offsets, so we shift
* the pointer.
*/
return vid_hdr + ubi->vid_hdr_shift;
}
/**
* ubi_free_vid_hdr - free a volume identifier header object.
* @ubi: UBI device description object
* @vid_hdr: the object to free
*/
static inline void ubi_free_vid_hdr(const struct ubi_device *ubi,
struct ubi_vid_hdr *vid_hdr)
{
void *p = vid_hdr;
if (!p)
return;
kfree(p - ubi->vid_hdr_shift);
}
/*
* This function is equivalent to 'ubi_io_read()', but @offset is relative to
* the beginning of the logical eraseblock, not to the beginning of the
* physical eraseblock.
*/
static inline int ubi_io_read_data(const struct ubi_device *ubi, void *buf,
int pnum, int offset, int len)
{
ubi_assert(offset >= 0);
return ubi_io_read(ubi, buf, pnum, offset + ubi->leb_start, len);
}
/*
* This function is equivalent to 'ubi_io_write()', but @offset is relative to
* the beginning of the logical eraseblock, not to the beginning of the
* physical eraseblock.
*/
static inline int ubi_io_write_data(struct ubi_device *ubi, const void *buf,
int pnum, int offset, int len)
{
ubi_assert(offset >= 0);
return ubi_io_write(ubi, buf, pnum, offset + ubi->leb_start, len);
}
/**
* ubi_ro_mode - switch to read-only mode.
* @ubi: UBI device description object
*/
static inline void ubi_ro_mode(struct ubi_device *ubi)
{
if (!ubi->ro_mode) {
ubi->ro_mode = 1;
ubi_warn("switch to read-only mode");
}
}
/**
* vol_id2idx - get table index by volume ID.
* @ubi: UBI device description object
* @vol_id: volume ID
*/
static inline int vol_id2idx(const struct ubi_device *ubi, int vol_id)
{
if (vol_id >= UBI_INTERNAL_VOL_START)
return vol_id - UBI_INTERNAL_VOL_START + ubi->vtbl_slots;
else
return vol_id;
}
/**
* idx2vol_id - get volume ID by table index.
* @ubi: UBI device description object
* @idx: table index
*/
static inline int idx2vol_id(const struct ubi_device *ubi, int idx)
{
if (idx >= ubi->vtbl_slots)
return idx - ubi->vtbl_slots + UBI_INTERNAL_VOL_START;
else
return idx;
}
#endif /* !__UBI_UBI_H__ */