diff --git a/.gitignore b/.gitignore index fdcce40226d7..3016ed30526d 100644 --- a/.gitignore +++ b/.gitignore @@ -27,6 +27,7 @@ TAGS vmlinux* !vmlinux.lds.S System.map +Module.markers Module.symvers !.gitignore diff --git a/CREDITS b/CREDITS index da0a56e23bee..8fec7b3f96d5 100644 --- a/CREDITS +++ b/CREDITS @@ -403,6 +403,8 @@ D: Linux CD and Support Giveaway List N: Erik Inge Bolsø E: knan@mo.himolde.no D: Misc kernel hacks +D: Updated PC speaker driver for 2.3 +S: Norway N: Andreas E. Bombe E: andreas.bombe@munich.netsurf.de @@ -3116,6 +3118,12 @@ S: Post Office Box 64132 S: Sunnyvale, California 94088-4132 S: USA +N: Stas Sergeev +E: stsp@users.sourceforge.net +D: PCM PC-Speaker driver +D: misc fixes +S: Russia + N: Simon Shapiro E: shimon@i-Connect.Net W: http://www.-i-Connect.Net/~shimon diff --git a/Documentation/00-INDEX b/Documentation/00-INDEX index e8fb24671967..1977fab38656 100644 --- a/Documentation/00-INDEX +++ b/Documentation/00-INDEX @@ -25,8 +25,6 @@ DMA-API.txt - DMA API, pci_ API & extensions for non-consistent memory machines. DMA-ISA-LPC.txt - How to do DMA with ISA (and LPC) devices. -DMA-mapping.txt - - info for PCI drivers using DMA portably across all platforms. DocBook/ - directory with DocBook templates etc. for kernel documentation. HOWTO @@ -43,8 +41,6 @@ ManagementStyle - how to (attempt to) manage kernel hackers. MSI-HOWTO.txt - the Message Signaled Interrupts (MSI) Driver Guide HOWTO and FAQ. -PCIEBUS-HOWTO.txt - - a guide describing the PCI Express Port Bus driver. RCU/ - directory with info on RCU (read-copy update). README.DAC960 @@ -167,10 +163,8 @@ highuid.txt - notes on the change from 16 bit to 32 bit user/group IDs. hpet.txt - High Precision Event Timer Driver for Linux. -hrtimer/ - - info on the timer_stats debugging facility for timer (ab)use. -hrtimers/ - - info on the hrtimers subsystem for high-resolution kernel timers. +timers/ + - info on the timer related topics hw_random.txt - info on Linux support for random number generator in i8xx chipsets. hwmon/ @@ -287,12 +281,6 @@ parport.txt - how to use the parallel-port driver. parport-lowlevel.txt - description and usage of the low level parallel port functions. -pci-error-recovery.txt - - info on PCI error recovery. -pci.txt - - info on the PCI subsystem for device driver authors. -pcieaer-howto.txt - - the PCI Express Advanced Error Reporting Driver Guide HOWTO. pcmcia/ - info on the Linux PCMCIA driver. pi-futex.txt @@ -341,8 +329,6 @@ sgi-visws.txt - short blurb on the SGI Visual Workstations. sh/ - directory with info on porting Linux to a new architecture. -smart-config.txt - - description of the Smart Config makefile feature. sound/ - directory with info on sound card support. sparc/ diff --git a/Documentation/ABI/obsolete/o2cb b/Documentation/ABI/obsolete/o2cb new file mode 100644 index 000000000000..9c49d8e6c0cc --- /dev/null +++ b/Documentation/ABI/obsolete/o2cb @@ -0,0 +1,11 @@ +What: /sys/o2cb symlink +Date: Dec 2005 +KernelVersion: 2.6.16 +Contact: ocfs2-devel@oss.oracle.com +Description: This is a symlink: /sys/o2cb to /sys/fs/o2cb. The symlink will + be removed when new versions of ocfs2-tools which know to look + in /sys/fs/o2cb are sufficiently prevalent. Don't code new + software to look here, it should try /sys/fs/o2cb instead. + See Documentation/ABI/stable/o2cb for more information on usage. +Users: ocfs2-tools. It's sufficient to mail proposed changes to + ocfs2-devel@oss.oracle.com. diff --git a/Documentation/ABI/stable/o2cb b/Documentation/ABI/stable/o2cb new file mode 100644 index 000000000000..5eb1545e0b8d --- /dev/null +++ b/Documentation/ABI/stable/o2cb @@ -0,0 +1,10 @@ +What: /sys/fs/o2cb/ (was /sys/o2cb) +Date: Dec 2005 +KernelVersion: 2.6.16 +Contact: ocfs2-devel@oss.oracle.com +Description: Ocfs2-tools looks at 'interface-revision' for versioning + information. Each logmask/ file controls a set of debug prints + and can be written into with the strings "allow", "deny", or + "off". Reading the file returns the current state. +Users: ocfs2-tools. It's sufficient to mail proposed changes to + ocfs2-devel@oss.oracle.com. diff --git a/Documentation/ABI/stable/sysfs-class-ubi b/Documentation/ABI/stable/sysfs-class-ubi new file mode 100644 index 000000000000..18d471d9faea --- /dev/null +++ b/Documentation/ABI/stable/sysfs-class-ubi @@ -0,0 +1,212 @@ +What: /sys/class/ubi/ +Date: July 2006 +KernelVersion: 2.6.22 +Contact: Artem Bityutskiy +Description: + The ubi/ class sub-directory belongs to the UBI subsystem and + provides general UBI information, per-UBI device information + and per-UBI volume information. + +What: /sys/class/ubi/version +Date: July 2006 +KernelVersion: 2.6.22 +Contact: Artem Bityutskiy +Description: + This file contains version of the latest supported UBI on-media + format. Currently it is 1, and there is no plan to change this. + However, if in the future UBI needs on-flash format changes + which cannot be done in a compatible manner, a new format + version will be added. So this is a mechanism for possible + future backward-compatible (but forward-incompatible) + improvements. + +What: /sys/class/ubiX/ +Date: July 2006 +KernelVersion: 2.6.22 +Contact: Artem Bityutskiy +Description: + The /sys/class/ubi0, /sys/class/ubi1, etc directories describe + UBI devices (UBI device 0, 1, etc). They contain general UBI + device information and per UBI volume information (each UBI + device may have many UBI volumes) + +What: /sys/class/ubi/ubiX/avail_eraseblocks +Date: July 2006 +KernelVersion: 2.6.22 +Contact: Artem Bityutskiy +Description: + Amount of available logical eraseblock. For example, one may + create a new UBI volume which has this amount of logical + eraseblocks. + +What: /sys/class/ubi/ubiX/bad_peb_count +Date: July 2006 +KernelVersion: 2.6.22 +Contact: Artem Bityutskiy +Description: + Count of bad physical eraseblocks on the underlying MTD device. + +What: /sys/class/ubi/ubiX/bgt_enabled +Date: July 2006 +KernelVersion: 2.6.22 +Contact: Artem Bityutskiy +Description: + Contains ASCII "0\n" if the UBI background thread is disabled, + and ASCII "1\n" if it is enabled. + +What: /sys/class/ubi/ubiX/dev +Date: July 2006 +KernelVersion: 2.6.22 +Contact: Artem Bityutskiy +Description: + Major and minor numbers of the character device corresponding + to this UBI device (in : format). + +What: /sys/class/ubi/ubiX/eraseblock_size +Date: July 2006 +KernelVersion: 2.6.22 +Contact: Artem Bityutskiy +Description: + Maximum logical eraseblock size this UBI device may provide. UBI + volumes may have smaller logical eraseblock size because of their + alignment. + +What: /sys/class/ubi/ubiX/max_ec +Date: July 2006 +KernelVersion: 2.6.22 +Contact: Artem Bityutskiy +Description: + Maximum physical eraseblock erase counter value. + +What: /sys/class/ubi/ubiX/max_vol_count +Date: July 2006 +KernelVersion: 2.6.22 +Contact: Artem Bityutskiy +Description: + Maximum number of volumes which this UBI device may have. + +What: /sys/class/ubi/ubiX/min_io_size +Date: July 2006 +KernelVersion: 2.6.22 +Contact: Artem Bityutskiy +Description: + Minimum input/output unit size. All the I/O may only be done + in fractions of the contained number. + +What: /sys/class/ubi/ubiX/mtd_num +Date: January 2008 +KernelVersion: 2.6.25 +Contact: Artem Bityutskiy +Description: + Number of the underlying MTD device. + +What: /sys/class/ubi/ubiX/reserved_for_bad +Date: July 2006 +KernelVersion: 2.6.22 +Contact: Artem Bityutskiy +Description: + Number of physical eraseblocks reserved for bad block handling. + +What: /sys/class/ubi/ubiX/total_eraseblocks +Date: July 2006 +KernelVersion: 2.6.22 +Contact: Artem Bityutskiy +Description: + Total number of good (not marked as bad) physical eraseblocks on + the underlying MTD device. + +What: /sys/class/ubi/ubiX/volumes_count +Date: July 2006 +KernelVersion: 2.6.22 +Contact: Artem Bityutskiy +Description: + Count of volumes on this UBI device. + +What: /sys/class/ubi/ubiX/ubiX_Y/ +Date: July 2006 +KernelVersion: 2.6.22 +Contact: Artem Bityutskiy +Description: + The /sys/class/ubi/ubiX/ubiX_0/, /sys/class/ubi/ubiX/ubiX_1/, + etc directories describe UBI volumes on UBI device X (volumes + 0, 1, etc). + +What: /sys/class/ubi/ubiX/ubiX_Y/alignment +Date: July 2006 +KernelVersion: 2.6.22 +Contact: Artem Bityutskiy +Description: + Volume alignment - the value the logical eraseblock size of + this volume has to be aligned on. For example, 2048 means that + logical eraseblock size is multiple of 2048. In other words, + volume logical eraseblock size is UBI device logical eraseblock + size aligned to the alignment value. + +What: /sys/class/ubi/ubiX/ubiX_Y/corrupted +Date: July 2006 +KernelVersion: 2.6.22 +Contact: Artem Bityutskiy +Description: + Contains ASCII "0\n" if the UBI volume is OK, and ASCII "1\n" + if it is corrupted (e.g., due to an interrupted volume update). + +What: /sys/class/ubi/ubiX/ubiX_Y/data_bytes +Date: July 2006 +KernelVersion: 2.6.22 +Contact: Artem Bityutskiy +Description: + The amount of data this volume contains. This value makes sense + only for static volumes, and for dynamic volume it equivalent + to the total volume size in bytes. + +What: /sys/class/ubi/ubiX/ubiX_Y/dev +Date: July 2006 +KernelVersion: 2.6.22 +Contact: Artem Bityutskiy +Description: + Major and minor numbers of the character device corresponding + to this UBI volume (in : format). + +What: /sys/class/ubi/ubiX/ubiX_Y/name +Date: July 2006 +KernelVersion: 2.6.22 +Contact: Artem Bityutskiy +Description: + Volume name. + +What: /sys/class/ubi/ubiX/ubiX_Y/reserved_ebs +Date: July 2006 +KernelVersion: 2.6.22 +Contact: Artem Bityutskiy +Description: + Count of physical eraseblock reserved for this volume. + Equivalent to the volume size in logical eraseblocks. + +What: /sys/class/ubi/ubiX/ubiX_Y/type +Date: July 2006 +KernelVersion: 2.6.22 +Contact: Artem Bityutskiy +Description: + Volume type. Contains ASCII "dynamic\n" for dynamic volumes and + "static\n" for static volumes. + +What: /sys/class/ubi/ubiX/ubiX_Y/upd_marker +Date: July 2006 +KernelVersion: 2.6.22 +Contact: Artem Bityutskiy +Description: + Contains ASCII "0\n" if the update marker is not set for this + volume, and "1\n" if it is set. The update marker is set when + volume update starts, and cleaned when it ends. So the presence + of the update marker indicates that the volume is being updated + at the moment of the update was interrupted. The later may be + checked using the "corrupted" sysfs file. + +What: /sys/class/ubi/ubiX/ubiX_Y/usable_eb_size +Date: July 2006 +KernelVersion: 2.6.22 +Contact: Artem Bityutskiy +Description: + Logical eraseblock size of this volume. Equivalent to logical + eraseblock size of the device aligned on the volume alignment + value. diff --git a/Documentation/ABI/testing/sysfs-bus-pci b/Documentation/ABI/testing/sysfs-bus-pci new file mode 100644 index 000000000000..ceddcff4082a --- /dev/null +++ b/Documentation/ABI/testing/sysfs-bus-pci @@ -0,0 +1,11 @@ +What: /sys/bus/pci/devices/.../vpd +Date: February 2008 +Contact: Ben Hutchings +Description: + A file named vpd in a device directory will be a + binary file containing the Vital Product Data for the + device. It should follow the VPD format defined in + PCI Specification 2.1 or 2.2, but users should consider + that some devices may have malformatted data. If the + underlying VPD has a writable section then the + corresponding section of this file will be writable. diff --git a/Documentation/ABI/testing/sysfs-ibft b/Documentation/ABI/testing/sysfs-ibft new file mode 100644 index 000000000000..c2b7d1154bec --- /dev/null +++ b/Documentation/ABI/testing/sysfs-ibft @@ -0,0 +1,23 @@ +What: /sys/firmware/ibft/initiator +Date: November 2007 +Contact: Konrad Rzeszutek +Description: The /sys/firmware/ibft/initiator directory will contain + files that expose the iSCSI Boot Firmware Table initiator data. + Usually this contains the Initiator name. + +What: /sys/firmware/ibft/targetX +Date: November 2007 +Contact: Konrad Rzeszutek +Description: The /sys/firmware/ibft/targetX directory will contain + files that expose the iSCSI Boot Firmware Table target data. + Usually this contains the target's IP address, boot LUN, + target name, and what NIC it is associated with. It can also + contain the CHAP name (and password), the reverse CHAP + name (and password) + +What: /sys/firmware/ibft/ethernetX +Date: November 2007 +Contact: Konrad Rzeszutek +Description: The /sys/firmware/ibft/ethernetX directory will contain + files that expose the iSCSI Boot Firmware Table NIC data. + This can this can the IP address, MAC, and gateway of the NIC. diff --git a/Documentation/ABI/testing/sysfs-ocfs2 b/Documentation/ABI/testing/sysfs-ocfs2 new file mode 100644 index 000000000000..b7cc516a8a8a --- /dev/null +++ b/Documentation/ABI/testing/sysfs-ocfs2 @@ -0,0 +1,89 @@ +What: /sys/fs/ocfs2/ +Date: April 2008 +Contact: ocfs2-devel@oss.oracle.com +Description: + The /sys/fs/ocfs2 directory contains knobs used by the + ocfs2-tools to interact with the filesystem. + +What: /sys/fs/ocfs2/max_locking_protocol +Date: April 2008 +Contact: ocfs2-devel@oss.oracle.com +Description: + The /sys/fs/ocfs2/max_locking_protocol file displays version + of ocfs2 locking supported by the filesystem. This version + covers how ocfs2 uses distributed locking between cluster + nodes. + + The protocol version has a major and minor number. Two + cluster nodes can interoperate if they have an identical + major number and an overlapping minor number - thus, + a node with version 1.10 can interoperate with a node + sporting version 1.8, as long as both use the 1.8 protocol. + + Reading from this file returns a single line, the major + number and minor number joined by a period, eg "1.10". + + This file is read-only. The value is compiled into the + driver. + +What: /sys/fs/ocfs2/loaded_cluster_plugins +Date: April 2008 +Contact: ocfs2-devel@oss.oracle.com +Description: + The /sys/fs/ocfs2/loaded_cluster_plugins file describes + the available plugins to support ocfs2 cluster operation. + A cluster plugin is required to use ocfs2 in a cluster. + There are currently two available plugins: + + * 'o2cb' - The classic o2cb cluster stack that ocfs2 has + used since its inception. + * 'user' - A plugin supporting userspace cluster software + in conjunction with fs/dlm. + + Reading from this file returns the names of all loaded + plugins, one per line. + + This file is read-only. Its contents may change as + plugins are loaded or removed. + +What: /sys/fs/ocfs2/active_cluster_plugin +Date: April 2008 +Contact: ocfs2-devel@oss.oracle.com +Description: + The /sys/fs/ocfs2/active_cluster_plugin displays which + cluster plugin is currently in use by the filesystem. + The active plugin will appear in the loaded_cluster_plugins + file as well. Only one plugin can be used at a time. + + Reading from this file returns the name of the active plugin + on a single line. + + This file is read-only. Which plugin is active depends on + the cluster stack in use. The contents may change + when all filesystems are unmounted and the cluster stack + is changed. + +What: /sys/fs/ocfs2/cluster_stack +Date: April 2008 +Contact: ocfs2-devel@oss.oracle.com +Description: + The /sys/fs/ocfs2/cluster_stack file contains the name + of current ocfs2 cluster stack. This value is set by + userspace tools when bringing the cluster stack online. + + Cluster stack names are 4 characters in length. + + When the 'o2cb' cluster stack is used, the 'o2cb' cluster + plugin is active. All other cluster stacks use the 'user' + cluster plugin. + + Reading from this file returns the name of the current + cluster stack on a single line. + + Writing a new stack name to this file changes the current + cluster stack unless there are mounted ocfs2 filesystems. + If there are mounted filesystems, attempts to change the + stack return an error. + +Users: + ocfs2-tools diff --git a/Documentation/DocBook/Makefile b/Documentation/DocBook/Makefile index 300e1707893f..83966e94cc32 100644 --- a/Documentation/DocBook/Makefile +++ b/Documentation/DocBook/Makefile @@ -9,9 +9,10 @@ DOCBOOKS := wanbook.xml z8530book.xml mcabook.xml videobook.xml \ kernel-hacking.xml kernel-locking.xml deviceiobook.xml \ procfs-guide.xml writing_usb_driver.xml networking.xml \ - kernel-api.xml filesystems.xml lsm.xml usb.xml \ + kernel-api.xml filesystems.xml lsm.xml usb.xml kgdb.xml \ gadget.xml libata.xml mtdnand.xml librs.xml rapidio.xml \ - genericirq.xml s390-drivers.xml uio-howto.xml scsi.xml + genericirq.xml s390-drivers.xml uio-howto.xml scsi.xml \ + mac80211.xml ### # The build process is as follows (targets): @@ -186,8 +187,11 @@ quiet_cmd_fig2png = FIG2PNG $@ ### # Rule to convert a .c file to inline XML documentation + gen_xml = : + quiet_gen_xml = echo ' GEN $@' +silent_gen_xml = : %.xml: %.c - @echo ' GEN $@' + @$($(quiet)gen_xml) @( \ echo ""; \ expand --tabs=8 < $< | \ diff --git a/Documentation/DocBook/kernel-api.tmpl b/Documentation/DocBook/kernel-api.tmpl index dc0f30c3e571..b7b1482f6e04 100644 --- a/Documentation/DocBook/kernel-api.tmpl +++ b/Documentation/DocBook/kernel-api.tmpl @@ -119,7 +119,7 @@ X!Ilib/string.c !Elib/string.c Bit Operations -!Iinclude/asm-x86/bitops_32.h +!Iinclude/asm-x86/bitops.h @@ -297,11 +297,6 @@ X!Earch/x86/kernel/mca_32.c !Ikernel/acct.c - - Power Management -!Ekernel/power/pm.c - - Device drivers infrastructure Device Drivers Base @@ -650,4 +645,58 @@ X!Idrivers/video/console/fonts.c !Edrivers/i2c/i2c-core.c + + Clock Framework + + + The clock framework defines programming interfaces to support + software management of the system clock tree. + This framework is widely used with System-On-Chip (SOC) platforms + to support power management and various devices which may need + custom clock rates. + Note that these "clocks" don't relate to timekeeping or real + time clocks (RTCs), each of which have separate frameworks. + These struct clk instances may be used + to manage for example a 96 MHz signal that is used to shift bits + into and out of peripherals or busses, or otherwise trigger + synchronous state machine transitions in system hardware. + + + + Power management is supported by explicit software clock gating: + unused clocks are disabled, so the system doesn't waste power + changing the state of transistors that aren't in active use. + On some systems this may be backed by hardware clock gating, + where clocks are gated without being disabled in software. + Sections of chips that are powered but not clocked may be able + to retain their last state. + This low power state is often called a retention + mode. + This mode still incurs leakage currents, especially with finer + circuit geometries, but for CMOS circuits power is mostly used + by clocked state changes. + + + + Power-aware drivers only enable their clocks when the device + they manage is in active use. Also, system sleep states often + differ according to which clock domains are active: while a + "standby" state may allow wakeup from several active domains, a + "mem" (suspend-to-RAM) state may require a more wholesale shutdown + of clocks derived from higher speed PLLs and oscillators, limiting + the number of possible wakeup event sources. A driver's suspend + method may need to be aware of system-specific clock constraints + on the target sleep state. + + + + Some platforms support programmable clock generators. These + can be used by external chips of various kinds, such as other + CPUs, multimedia codecs, and devices with strict requirements + for interface clocking. + + +!Iinclude/linux/clk.h + + diff --git a/Documentation/DocBook/kernel-locking.tmpl b/Documentation/DocBook/kernel-locking.tmpl index 2e9d6b41f034..77c42f40be5d 100644 --- a/Documentation/DocBook/kernel-locking.tmpl +++ b/Documentation/DocBook/kernel-locking.tmpl @@ -241,7 +241,7 @@ The third type is a semaphore - (include/asm/semaphore.h): it + (include/linux/semaphore.h): it can have more than one holder at any time (the number decided at initialization time), although it is most commonly used as a single-holder lock (a mutex). If you can't get a semaphore, your @@ -290,7 +290,7 @@ If you have a data structure which is only ever accessed from user context, then you can use a simple semaphore - (linux/asm/semaphore.h) to protect it. This + (linux/linux/semaphore.h) to protect it. This is the most trivial case: you initialize the semaphore to the number of resources available (usually 1), and call down_interruptible() to grab the semaphore, and @@ -854,7 +854,7 @@ The change is shown below, in standard patch format: the }; -static DEFINE_MUTEX(cache_lock); -+static spinlock_t cache_lock = SPIN_LOCK_UNLOCKED; ++static DEFINE_SPINLOCK(cache_lock); static LIST_HEAD(cache); static unsigned int cache_num = 0; #define MAX_CACHE_SIZE 10 @@ -1238,7 +1238,7 @@ Here is the "lock-per-object" implementation: - int popularity; }; - static spinlock_t cache_lock = SPIN_LOCK_UNLOCKED; + static DEFINE_SPINLOCK(cache_lock); @@ -77,6 +84,7 @@ obj->id = id; obj->popularity = 0; @@ -1656,7 +1656,7 @@ the amount of locking which needs to be done. #include <linux/slab.h> #include <linux/string.h> +#include <linux/rcupdate.h> - #include <asm/semaphore.h> + #include <linux/semaphore.h> #include <asm/errno.h> struct object diff --git a/Documentation/DocBook/kgdb.tmpl b/Documentation/DocBook/kgdb.tmpl new file mode 100644 index 000000000000..97618bed4d65 --- /dev/null +++ b/Documentation/DocBook/kgdb.tmpl @@ -0,0 +1,447 @@ + + + + + + Using kgdb and the kgdb Internals + + + + Jason + Wessel + +
+ jason.wessel@windriver.com +
+
+
+
+ + + + Tom + Rini + +
+ trini@kernel.crashing.org +
+
+
+
+ + + + Amit S. + Kale + +
+ amitkale@linsyssoft.com +
+
+
+
+ + + 2008 + Wind River Systems, Inc. + + + 2004-2005 + MontaVista Software, Inc. + + + 2004 + Amit S. Kale + + + + + This file is licensed under the terms of the GNU General Public License + version 2. This program is licensed "as is" without any warranty of any + kind, whether express or implied. + + + +
+ + + + Introduction + + kgdb is a source level debugger for linux kernel. It is used along + with gdb to debug a linux kernel. The expectation is that gdb can + be used to "break in" to the kernel to inspect memory, variables + and look through a cal stack information similar to what an + application developer would use gdb for. It is possible to place + breakpoints in kernel code and perform some limited execution + stepping. + + + Two machines are required for using kgdb. One of these machines is a + development machine and the other is a test machine. The kernel + to be debugged runs on the test machine. The development machine + runs an instance of gdb against the vmlinux file which contains + the symbols (not boot image such as bzImage, zImage, uImage...). + In gdb the developer specifies the connection parameters and + connects to kgdb. Depending on which kgdb I/O modules exist in + the kernel for a given architecture, it may be possible to debug + the test machine's kernel with the development machine using a + rs232 or ethernet connection. + + + + Compiling a kernel + + To enable CONFIG_KGDB, look under the "Kernel debugging" + and then select "KGDB: kernel debugging with remote gdb". + + + Next you should choose one of more I/O drivers to interconnect debugging + host and debugged target. Early boot debugging requires a KGDB + I/O driver that supports early debugging and the driver must be + built into the kernel directly. Kgdb I/O driver configuration + takes place via kernel or module parameters, see following + chapter. + + + The kgdb test compile options are described in the kgdb test suite chapter. + + + + + Enable kgdb for debugging + + In order to use kgdb you must activate it by passing configuration + information to one of the kgdb I/O drivers. If you do not pass any + configuration information kgdb will not do anything at all. Kgdb + will only actively hook up to the kernel trap hooks if a kgdb I/O + driver is loaded and configured. If you unconfigure a kgdb I/O + driver, kgdb will unregister all the kernel hook points. + + + All drivers can be reconfigured at run time, if + CONFIG_SYSFS and CONFIG_MODULES + are enabled, by echo'ing a new config string to + /sys/module/<driver>/parameter/<option>. + The driver can be unconfigured by passing an empty string. You cannot + change the configuration while the debugger is attached. Make sure + to detach the debugger with the detach command + prior to trying unconfigure a kgdb I/O driver. + + + Kernel parameter: kgdbwait + + The Kernel command line option kgdbwait makes + kgdb wait for a debugger connection during booting of a kernel. You + can only use this option you compiled a kgdb I/O driver into the + kernel and you specified the I/O driver configuration as a kernel + command line option. The kgdbwait parameter should always follow the + configuration parameter for the kgdb I/O driver in the kernel + command line else the I/O driver will not be configured prior to + asking the kernel to use it to wait. + + + The kernel will stop and wait as early as the I/O driver and + architecture will allow when you use this option. If you build the + kgdb I/O driver as a kernel module kgdbwait will not do anything. + + + + Kernel parameter: kgdboc + + The kgdboc driver was originally an abbreviation meant to stand for + "kgdb over console". Kgdboc is designed to work with a single + serial port. It was meant to cover the circumstance + where you wanted to use a serial console as your primary console as + well as using it to perform kernel debugging. Of course you can + also use kgdboc without assigning a console to the same port. + + + Using kgdboc + + You can configure kgdboc via sysfs or a module or kernel boot line + parameter depending on if you build with CONFIG_KGDBOC as a module + or built-in. + + From the module load or build-in + kgdboc=<tty-device>,[baud] + + The example here would be if your console port was typically ttyS0, you would use something like kgdboc=ttyS0,115200 or on the ARM Versatile AB you would likely use kgdboc=ttyAMA0,115200 + + + From sysfs + echo ttyS0 > /sys/module/kgdboc/parameters/kgdboc + + + + + NOTE: Kgdboc does not support interrupting the target via the + gdb remote protocol. You must manually send a sysrq-g unless you + have a proxy that splits console output to a terminal problem and + has a separate port for the debugger to connect to that sends the + sysrq-g for you. + + When using kgdboc with no debugger proxy, you can end up + connecting the debugger for one of two entry points. If an + exception occurs after you have loaded kgdboc a message should print + on the console stating it is waiting for the debugger. In case you + disconnect your terminal program and then connect the debugger in + its place. If you want to interrupt the target system and forcibly + enter a debug session you have to issue a Sysrq sequence and then + type the letter g. Then you disconnect the + terminal session and connect gdb. Your options if you don't like + this are to hack gdb to send the sysrq-g for you as well as on the + initial connect, or to use a debugger proxy that allows an + unmodified gdb to do the debugging. + + + + + Kernel parameter: kgdbcon + + Kgdb supports using the gdb serial protocol to send console messages + to the debugger when the debugger is connected and running. There + are two ways to activate this feature. + + Activate with the kernel command line option: + kgdbcon + + Use sysfs before configuring an io driver + + echo 1 > /sys/module/kgdb/parameters/kgdb_use_con + + + NOTE: If you do this after you configure the kgdb I/O driver, the + setting will not take effect until the next point the I/O is + reconfigured. + + + + + + IMPORTANT NOTE: Using this option with kgdb over the console + (kgdboc) or kgdb over ethernet (kgdboe) is not supported. + + + + + Connecting gdb + + If you are using kgdboc, you need to have used kgdbwait as a boot + argument, issued a sysrq-g, or the system you are going to debug + has already taken an exception and is waiting for the debugger to + attach before you can connect gdb. + + + If you are not using different kgdb I/O driver other than kgdboc, + you should be able to connect and the target will automatically + respond. + + + Example (using a serial port): + + + % gdb ./vmlinux + (gdb) set remotebaud 115200 + (gdb) target remote /dev/ttyS0 + + + Example (kgdb to a terminal server): + + + % gdb ./vmlinux + (gdb) target remote udp:192.168.2.2:6443 + + + Example (kgdb over ethernet): + + + % gdb ./vmlinux + (gdb) target remote udp:192.168.2.2:6443 + + + Once connected, you can debug a kernel the way you would debug an + application program. + + + If you are having problems connecting or something is going + seriously wrong while debugging, it will most often be the case + that you want to enable gdb to be verbose about its target + communications. You do this prior to issuing the target + remote command by typing in: set remote debug 1 + + + + kgdb Test Suite + + When kgdb is enabled in the kernel config you can also elect to + enable the config parameter KGDB_TESTS. Turning this on will + enable a special kgdb I/O module which is designed to test the + kgdb internal functions. + + + The kgdb tests are mainly intended for developers to test the kgdb + internals as well as a tool for developing a new kgdb architecture + specific implementation. These tests are not really for end users + of the Linux kernel. The primary source of documentation would be + to look in the drivers/misc/kgdbts.c file. + + + The kgdb test suite can also be configured at compile time to run + the core set of tests by setting the kernel config parameter + KGDB_TESTS_ON_BOOT. This particular option is aimed at automated + regression testing and does not require modifying the kernel boot + config arguments. If this is turned on, the kgdb test suite can + be disabled by specifying "kgdbts=" as a kernel boot argument. + + + + KGDB Internals + + Architecture Specifics + + Kgdb is organized into three basic components: + + kgdb core + + The kgdb core is found in kernel/kgdb.c. It contains: + + All the logic to implement the gdb serial protocol + A generic OS exception handler which includes sync'ing the processors into a stopped state on an multi cpu system. + The API to talk to the kgdb I/O drivers + The API to make calls to the arch specific kgdb implementation + The logic to perform safe memory reads and writes to memory while using the debugger + A full implementation for software breakpoints unless overridden by the arch + + + + kgdb arch specific implementation + + This implementation is generally found in arch/*/kernel/kgdb.c. + As an example, arch/x86/kernel/kgdb.c contains the specifics to + implement HW breakpoint as well as the initialization to + dynamically register and unregister for the trap handlers on + this architecture. The arch specific portion implements: + + contains an arch specific trap catcher which + invokes kgdb_handle_exception() to start kgdb about doing its + work + translation to and from gdb specific packet format to pt_regs + Registration and unregistration of architecture specific trap hooks + Any special exception handling and cleanup + NMI exception handling and cleanup + (optional)HW breakpoints + + + + kgdb I/O driver + + Each kgdb I/O driver has to provide an implemenation for the following: + + configuration via builtin or module + dynamic configuration and kgdb hook registration calls + read and write character interface + A cleanup handler for unconfiguring from the kgdb core + (optional) Early debug methodology + + Any given kgdb I/O driver has to operate very closely with the + hardware and must do it in such a way that does not enable + interrupts or change other parts of the system context without + completely restoring them. The kgdb core will repeatedly "poll" + a kgdb I/O driver for characters when it needs input. The I/O + driver is expected to return immediately if there is no data + available. Doing so allows for the future possibility to touch + watch dog hardware in such a way as to have a target system not + reset when these are enabled. + + + + + + If you are intent on adding kgdb architecture specific support + for a new architecture, the architecture should define + HAVE_ARCH_KGDB in the architecture specific + Kconfig file. This will enable kgdb for the architecture, and + at that point you must create an architecture specific kgdb + implementation. + + + There are a few flags which must be set on every architecture in + their <asm/kgdb.h> file. These are: + + + + NUMREGBYTES: The size in bytes of all of the registers, so + that we can ensure they will all fit into a packet. + + + BUFMAX: The size in bytes of the buffer GDB will read into. + This must be larger than NUMREGBYTES. + + + CACHE_FLUSH_IS_SAFE: Set to 1 if it is always safe to call + flush_cache_range or flush_icache_range. On some architectures, + these functions may not be safe to call on SMP since we keep other + CPUs in a holding pattern. + + + + + + There are also the following functions for the common backend, + found in kernel/kgdb.c, that must be supplied by the + architecture-specific backend unless marked as (optional), in + which case a default function maybe used if the architecture + does not need to provide a specific implementation. + +!Iinclude/linux/kgdb.h + + + kgdboc internals + + The kgdboc driver is actually a very thin driver that relies on the + underlying low level to the hardware driver having "polling hooks" + which the to which the tty driver is attached. In the initial + implementation of kgdboc it the serial_core was changed to expose a + low level uart hook for doing polled mode reading and writing of a + single character while in an atomic context. When kgdb makes an I/O + request to the debugger, kgdboc invokes a call back in the serial + core which in turn uses the call back in the uart driver. It is + certainly possible to extend kgdboc to work with non-uart based + consoles in the future. + + + When using kgdboc with a uart, the uart driver must implement two callbacks in the struct uart_ops. Example from drivers/8250.c: +#ifdef CONFIG_CONSOLE_POLL + .poll_get_char = serial8250_get_poll_char, + .poll_put_char = serial8250_put_poll_char, +#endif + + Any implementation specifics around creating a polling driver use the + #ifdef CONFIG_CONSOLE_POLL, as shown above. + Keep in mind that polling hooks have to be implemented in such a way + that they can be called from an atomic context and have to restore + the state of the uart chip on return such that the system can return + to normal when the debugger detaches. You need to be very careful + with any kind of lock you consider, because failing here is most + going to mean pressing the reset button. + + + + + Credits + + The following people have contributed to this document: + + Amit Kaleamitkale@linsyssoft.com + Tom Rinitrini@kernel.crashing.org + + In March 2008 this document was completely rewritten by: + + Jason Wesseljason.wessel@windriver.com + + + +
+ diff --git a/Documentation/DocBook/mac80211.tmpl b/Documentation/DocBook/mac80211.tmpl new file mode 100644 index 000000000000..b651e0a4b1c0 --- /dev/null +++ b/Documentation/DocBook/mac80211.tmpl @@ -0,0 +1,335 @@ + + + + + + The mac80211 subsystem for kernel developers + + + + Johannes + Berg + +
johannes@sipsolutions.net
+
+
+
+ + + 2007 + 2008 + Johannes Berg + + + + + This documentation is free software; you can redistribute + it and/or modify it under the terms of the GNU General Public + License version 2 as published by the Free Software Foundation. + + + + This documentation is distributed in the hope that it will be + useful, but WITHOUT ANY WARRANTY; without even the implied + warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + See the GNU General Public License for more details. + + + + You should have received a copy of the GNU General Public + License along with this documentation; if not, write to the Free + Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, + MA 02111-1307 USA + + + + For more details see the file COPYING in the source + distribution of Linux. + + + + +!Pinclude/net/mac80211.h Introduction +!Pinclude/net/mac80211.h Warning + +
+ + + + + + + The basic mac80211 driver interface + + + You should read and understand the information contained + within this part of the book while implementing a driver. + In some chapters, advanced usage is noted, that may be + skipped at first. + + + This part of the book only covers station and monitor mode + functionality, additional information required to implement + the other modes is covered in the second part of the book. + + + + + Basic hardware handling + TBD + + This chapter shall contain information on getting a hw + struct allocated and registered with mac80211. + + + Since it is required to allocate rates/modes before registering + a hw struct, this chapter shall also contain information on setting + up the rate/mode structs. + + + Additionally, some discussion about the callbacks and + the general programming model should be in here, including + the definition of ieee80211_ops which will be referred to + a lot. + + + Finally, a discussion of hardware capabilities should be done + with references to other parts of the book. + + +!Finclude/net/mac80211.h ieee80211_hw +!Finclude/net/mac80211.h ieee80211_hw_flags +!Finclude/net/mac80211.h SET_IEEE80211_DEV +!Finclude/net/mac80211.h SET_IEEE80211_PERM_ADDR +!Finclude/net/mac80211.h ieee80211_ops +!Finclude/net/mac80211.h ieee80211_alloc_hw +!Finclude/net/mac80211.h ieee80211_register_hw +!Finclude/net/mac80211.h ieee80211_get_tx_led_name +!Finclude/net/mac80211.h ieee80211_get_rx_led_name +!Finclude/net/mac80211.h ieee80211_get_assoc_led_name +!Finclude/net/mac80211.h ieee80211_get_radio_led_name +!Finclude/net/mac80211.h ieee80211_unregister_hw +!Finclude/net/mac80211.h ieee80211_free_hw + + + + PHY configuration + TBD + + This chapter should describe PHY handling including + start/stop callbacks and the various structures used. + +!Finclude/net/mac80211.h ieee80211_conf +!Finclude/net/mac80211.h ieee80211_conf_flags + + + + Virtual interfaces + TBD + + This chapter should describe virtual interface basics + that are relevant to the driver (VLANs, MGMT etc are not.) + It should explain the use of the add_iface/remove_iface + callbacks as well as the interface configuration callbacks. + + Things related to AP mode should be discussed there. + + Things related to supporting multiple interfaces should be + in the appropriate chapter, a BIG FAT note should be here about + this though and the recommendation to allow only a single + interface in STA mode at first! + +!Finclude/net/mac80211.h ieee80211_if_types +!Finclude/net/mac80211.h ieee80211_if_init_conf +!Finclude/net/mac80211.h ieee80211_if_conf + + + + Receive and transmit processing + + what should be here + TBD + + This should describe the receive and transmit + paths in mac80211/the drivers as well as + transmit status handling. + + + + Frame format +!Pinclude/net/mac80211.h Frame format + + + Alignment issues + TBD + + + Calling into mac80211 from interrupts +!Pinclude/net/mac80211.h Calling mac80211 from interrupts + + + functions/definitions +!Finclude/net/mac80211.h ieee80211_rx_status +!Finclude/net/mac80211.h mac80211_rx_flags +!Finclude/net/mac80211.h ieee80211_tx_control +!Finclude/net/mac80211.h ieee80211_tx_status_flags +!Finclude/net/mac80211.h ieee80211_rx +!Finclude/net/mac80211.h ieee80211_rx_irqsafe +!Finclude/net/mac80211.h ieee80211_tx_status +!Finclude/net/mac80211.h ieee80211_tx_status_irqsafe +!Finclude/net/mac80211.h ieee80211_rts_get +!Finclude/net/mac80211.h ieee80211_rts_duration +!Finclude/net/mac80211.h ieee80211_ctstoself_get +!Finclude/net/mac80211.h ieee80211_ctstoself_duration +!Finclude/net/mac80211.h ieee80211_generic_frame_duration +!Finclude/net/mac80211.h ieee80211_get_hdrlen_from_skb +!Finclude/net/mac80211.h ieee80211_get_hdrlen +!Finclude/net/mac80211.h ieee80211_wake_queue +!Finclude/net/mac80211.h ieee80211_stop_queue +!Finclude/net/mac80211.h ieee80211_start_queues +!Finclude/net/mac80211.h ieee80211_stop_queues +!Finclude/net/mac80211.h ieee80211_wake_queues + + + + + Frame filtering +!Pinclude/net/mac80211.h Frame filtering +!Finclude/net/mac80211.h ieee80211_filter_flags + + + + + Advanced driver interface + + + Information contained within this part of the book is + of interest only for advanced interaction of mac80211 + with drivers to exploit more hardware capabilities and + improve performance. + + + + + Hardware crypto acceleration +!Pinclude/net/mac80211.h Hardware crypto acceleration + +!Finclude/net/mac80211.h set_key_cmd +!Finclude/net/mac80211.h ieee80211_key_conf +!Finclude/net/mac80211.h ieee80211_key_alg +!Finclude/net/mac80211.h ieee80211_key_flags + + + + Multiple queues and QoS support + TBD +!Finclude/net/mac80211.h ieee80211_tx_queue_params +!Finclude/net/mac80211.h ieee80211_tx_queue_stats_data +!Finclude/net/mac80211.h ieee80211_tx_queue + + + + Access point mode support + TBD + Some parts of the if_conf should be discussed here instead + + Insert notes about VLAN interfaces with hw crypto here or + in the hw crypto chapter. + +!Finclude/net/mac80211.h ieee80211_get_buffered_bc +!Finclude/net/mac80211.h ieee80211_beacon_get + + + + Supporting multiple virtual interfaces + TBD + + Note: WDS with identical MAC address should almost always be OK + + + Insert notes about having multiple virtual interfaces with + different MAC addresses here, note which configurations are + supported by mac80211, add notes about supporting hw crypto + with it. + + + + + Hardware scan offload + TBD +!Finclude/net/mac80211.h ieee80211_scan_completed + + + + + Rate control interface + + TBD + + This part of the book describes the rate control algorithm + interface and how it relates to mac80211 and drivers. + + + + dummy chapter + TBD + + + + + Internals + + TBD + + This part of the book describes mac80211 internals. + + + + + Key handling + + Key handling basics +!Pnet/mac80211/key.c Key handling basics + + + MORE TBD + TBD + + + + + Receive processing + TBD + + + + Transmit processing + TBD + + + + Station info handling + + Programming information +!Fnet/mac80211/sta_info.h sta_info +!Fnet/mac80211/sta_info.h ieee80211_sta_info_flags + + + STA information lifetime rules +!Pnet/mac80211/sta_info.c STA information lifetime rules + + + + + Synchronisation + TBD + Locking, lots of RCU + + +
diff --git a/Documentation/DocBook/writing_usb_driver.tmpl b/Documentation/DocBook/writing_usb_driver.tmpl index d4188d4ff535..eeff19ca831b 100644 --- a/Documentation/DocBook/writing_usb_driver.tmpl +++ b/Documentation/DocBook/writing_usb_driver.tmpl @@ -100,8 +100,8 @@ useful documents, at the USB home page (see Resources). An excellent introduction to the Linux USB subsystem can be found at the USB Working Devices List (see Resources). It explains how the Linux USB subsystem is - structured and introduces the reader to the concept of USB urbs, which - are essential to USB drivers. + structured and introduces the reader to the concept of USB urbs + (USB Request Blocks), which are essential to USB drivers.
The first thing a Linux USB driver needs to do is register itself with @@ -162,8 +162,8 @@ static int __init usb_skel_init(void) module_init(usb_skel_init);
- When the driver is unloaded from the system, it needs to unregister - itself with the USB subsystem. This is done with the usb_unregister + When the driver is unloaded from the system, it needs to deregister + itself with the USB subsystem. This is done with the usb_deregister function: @@ -232,7 +232,7 @@ static int skel_probe(struct usb_interface *interface, were passed to the USB subsystem will be called from a user program trying to talk to the device. The first function called will be open, as the program tries to open the device for I/O. We increment our private usage - count and save off a pointer to our internal structure in the file + count and save a pointer to our internal structure in the file structure. This is done so that future calls to file operations will enable the driver to determine which device the user is addressing. All of this is done with the following code: @@ -252,8 +252,8 @@ file->private_data = dev; send to the device based on the size of the write urb it has created (this size depends on the size of the bulk out end point that the device has). Then it copies the data from user space to kernel space, points the urb to - the data and submits the urb to the USB subsystem. This can be shown in - he following code: + the data and submits the urb to the USB subsystem. This can be seen in + the following code: /* we can only write as much as 1 urb will hold */ diff --git a/Documentation/HOWTO b/Documentation/HOWTO index 54835610b3d6..0291ade44c17 100644 --- a/Documentation/HOWTO +++ b/Documentation/HOWTO @@ -249,9 +249,11 @@ process is as follows: release a new -rc kernel every week. - Process continues until the kernel is considered "ready", the process should last around 6 weeks. - - A list of known regressions present in each -rc release is - tracked at the following URI: - http://kernelnewbies.org/known_regressions + - Known regressions in each release are periodically posted to the + linux-kernel mailing list. The goal is to reduce the length of + that list to zero before declaring the kernel to be "ready," but, in + the real world, a small number of regressions often remain at + release time. It is worth mentioning what Andrew Morton wrote on the linux-kernel mailing list about kernel releases: @@ -261,7 +263,7 @@ mailing list about kernel releases: 2.6.x.y -stable kernel tree --------------------------- -Kernels with 4 digit versions are -stable kernels. They contain +Kernels with 4-part versions are -stable kernels. They contain relatively small and critical fixes for security problems or significant regressions discovered in a given 2.6.x kernel. @@ -273,7 +275,10 @@ If no 2.6.x.y kernel is available, then the highest numbered 2.6.x kernel is the current stable kernel. 2.6.x.y are maintained by the "stable" team , and are -released almost every other week. +released as needs dictate. The normal release period is approximately +two weeks, but it can be longer if there are no pressing problems. A +security-related problem, instead, can cause a release to happen almost +instantly. The file Documentation/stable_kernel_rules.txt in the kernel tree documents what kinds of changes are acceptable for the -stable tree, and @@ -298,7 +303,9 @@ a while Andrew or the subsystem maintainer pushes it on to Linus for inclusion in mainline. It is heavily encouraged that all new patches get tested in the -mm tree -before they are sent to Linus for inclusion in the main kernel tree. +before they are sent to Linus for inclusion in the main kernel tree. Code +which does not make an appearance in -mm before the opening of the merge +window will prove hard to merge into the mainline. These kernels are not appropriate for use on systems that are supposed to be stable and they are more risky to run than any of the other @@ -354,11 +361,12 @@ Here is a list of some of the different kernel trees available: - SCSI, James Bottomley git.kernel.org:/pub/scm/linux/kernel/git/jejb/scsi-misc-2.6.git + - x86, Ingo Molnar + git://git.kernel.org/pub/scm/linux/kernel/git/x86/linux-2.6-x86.git + quilt trees: - - USB, PCI, Driver Core, and I2C, Greg Kroah-Hartman + - USB, Driver Core, and I2C, Greg Kroah-Hartman kernel.org/pub/linux/kernel/people/gregkh/gregkh-2.6/ - - x86-64, partly i386, Andi Kleen - ftp.firstfloor.org:/pub/ak/x86_64/quilt/ Other kernel trees can be found listed at http://git.kernel.org/ and in the MAINTAINERS file. @@ -392,8 +400,8 @@ If you want to be advised of the future bug reports, you can subscribe to the bugme-new mailing list (only new bug reports are mailed here) or to the bugme-janitor mailing list (every change in the bugzilla is mailed here) - http://lists.osdl.org/mailman/listinfo/bugme-new - http://lists.osdl.org/mailman/listinfo/bugme-janitors + http://lists.linux-foundation.org/mailman/listinfo/bugme-new + http://lists.linux-foundation.org/mailman/listinfo/bugme-janitors diff --git a/Documentation/PCI/00-INDEX b/Documentation/PCI/00-INDEX new file mode 100644 index 000000000000..49f43946c6b6 --- /dev/null +++ b/Documentation/PCI/00-INDEX @@ -0,0 +1,12 @@ +00-INDEX + - this file +PCI-DMA-mapping.txt + - info for PCI drivers using DMA portably across all platforms +PCIEBUS-HOWTO.txt + - a guide describing the PCI Express Port Bus driver +pci-error-recovery.txt + - info on PCI error recovery +pci.txt + - info on the PCI subsystem for device driver authors +pcieaer-howto.txt + - the PCI Express Advanced Error Reporting Driver Guide HOWTO diff --git a/Documentation/PCI/PCIEBUS-HOWTO.txt b/Documentation/PCI/PCIEBUS-HOWTO.txt new file mode 100644 index 000000000000..9a07e38631b0 --- /dev/null +++ b/Documentation/PCI/PCIEBUS-HOWTO.txt @@ -0,0 +1,217 @@ + The PCI Express Port Bus Driver Guide HOWTO + Tom L Nguyen tom.l.nguyen@intel.com + 11/03/2004 + +1. About this guide + +This guide describes the basics of the PCI Express Port Bus driver +and provides information on how to enable the service drivers to +register/unregister with the PCI Express Port Bus Driver. + +2. Copyright 2004 Intel Corporation + +3. What is the PCI Express Port Bus Driver + +A PCI Express Port is a logical PCI-PCI Bridge structure. There +are two types of PCI Express Port: the Root Port and the Switch +Port. The Root Port originates a PCI Express link from a PCI Express +Root Complex and the Switch Port connects PCI Express links to +internal logical PCI buses. The Switch Port, which has its secondary +bus representing the switch's internal routing logic, is called the +switch's Upstream Port. The switch's Downstream Port is bridging from +switch's internal routing bus to a bus representing the downstream +PCI Express link from the PCI Express Switch. + +A PCI Express Port can provide up to four distinct functions, +referred to in this document as services, depending on its port type. +PCI Express Port's services include native hotplug support (HP), +power management event support (PME), advanced error reporting +support (AER), and virtual channel support (VC). These services may +be handled by a single complex driver or be individually distributed +and handled by corresponding service drivers. + +4. Why use the PCI Express Port Bus Driver? + +In existing Linux kernels, the Linux Device Driver Model allows a +physical device to be handled by only a single driver. The PCI +Express Port is a PCI-PCI Bridge device with multiple distinct +services. To maintain a clean and simple solution each service +may have its own software service driver. In this case several +service drivers will compete for a single PCI-PCI Bridge device. +For example, if the PCI Express Root Port native hotplug service +driver is loaded first, it claims a PCI-PCI Bridge Root Port. The +kernel therefore does not load other service drivers for that Root +Port. In other words, it is impossible to have multiple service +drivers load and run on a PCI-PCI Bridge device simultaneously +using the current driver model. + +To enable multiple service drivers running simultaneously requires +having a PCI Express Port Bus driver, which manages all populated +PCI Express Ports and distributes all provided service requests +to the corresponding service drivers as required. Some key +advantages of using the PCI Express Port Bus driver are listed below: + + - Allow multiple service drivers to run simultaneously on + a PCI-PCI Bridge Port device. + + - Allow service drivers implemented in an independent + staged approach. + + - Allow one service driver to run on multiple PCI-PCI Bridge + Port devices. + + - Manage and distribute resources of a PCI-PCI Bridge Port + device to requested service drivers. + +5. Configuring the PCI Express Port Bus Driver vs. Service Drivers + +5.1 Including the PCI Express Port Bus Driver Support into the Kernel + +Including the PCI Express Port Bus driver depends on whether the PCI +Express support is included in the kernel config. The kernel will +automatically include the PCI Express Port Bus driver as a kernel +driver when the PCI Express support is enabled in the kernel. + +5.2 Enabling Service Driver Support + +PCI device drivers are implemented based on Linux Device Driver Model. +All service drivers are PCI device drivers. As discussed above, it is +impossible to load any service driver once the kernel has loaded the +PCI Express Port Bus Driver. To meet the PCI Express Port Bus Driver +Model requires some minimal changes on existing service drivers that +imposes no impact on the functionality of existing service drivers. + +A service driver is required to use the two APIs shown below to +register its service with the PCI Express Port Bus driver (see +section 5.2.1 & 5.2.2). It is important that a service driver +initializes the pcie_port_service_driver data structure, included in +header file /include/linux/pcieport_if.h, before calling these APIs. +Failure to do so will result an identity mismatch, which prevents +the PCI Express Port Bus driver from loading a service driver. + +5.2.1 pcie_port_service_register + +int pcie_port_service_register(struct pcie_port_service_driver *new) + +This API replaces the Linux Driver Model's pci_module_init API. A +service driver should always calls pcie_port_service_register at +module init. Note that after service driver being loaded, calls +such as pci_enable_device(dev) and pci_set_master(dev) are no longer +necessary since these calls are executed by the PCI Port Bus driver. + +5.2.2 pcie_port_service_unregister + +void pcie_port_service_unregister(struct pcie_port_service_driver *new) + +pcie_port_service_unregister replaces the Linux Driver Model's +pci_unregister_driver. It's always called by service driver when a +module exits. + +5.2.3 Sample Code + +Below is sample service driver code to initialize the port service +driver data structure. + +static struct pcie_port_service_id service_id[] = { { + .vendor = PCI_ANY_ID, + .device = PCI_ANY_ID, + .port_type = PCIE_RC_PORT, + .service_type = PCIE_PORT_SERVICE_AER, + }, { /* end: all zeroes */ } +}; + +static struct pcie_port_service_driver root_aerdrv = { + .name = (char *)device_name, + .id_table = &service_id[0], + + .probe = aerdrv_load, + .remove = aerdrv_unload, + + .suspend = aerdrv_suspend, + .resume = aerdrv_resume, +}; + +Below is a sample code for registering/unregistering a service +driver. + +static int __init aerdrv_service_init(void) +{ + int retval = 0; + + retval = pcie_port_service_register(&root_aerdrv); + if (!retval) { + /* + * FIX ME + */ + } + return retval; +} + +static void __exit aerdrv_service_exit(void) +{ + pcie_port_service_unregister(&root_aerdrv); +} + +module_init(aerdrv_service_init); +module_exit(aerdrv_service_exit); + +6. Possible Resource Conflicts + +Since all service drivers of a PCI-PCI Bridge Port device are +allowed to run simultaneously, below lists a few of possible resource +conflicts with proposed solutions. + +6.1 MSI Vector Resource + +The MSI capability structure enables a device software driver to call +pci_enable_msi to request MSI based interrupts. Once MSI interrupts +are enabled on a device, it stays in this mode until a device driver +calls pci_disable_msi to disable MSI interrupts and revert back to +INTx emulation mode. Since service drivers of the same PCI-PCI Bridge +port share the same physical device, if an individual service driver +calls pci_enable_msi/pci_disable_msi it may result unpredictable +behavior. For example, two service drivers run simultaneously on the +same physical Root Port. Both service drivers call pci_enable_msi to +request MSI based interrupts. A service driver may not know whether +any other service drivers have run on this Root Port. If either one +of them calls pci_disable_msi, it puts the other service driver +in a wrong interrupt mode. + +To avoid this situation all service drivers are not permitted to +switch interrupt mode on its device. The PCI Express Port Bus driver +is responsible for determining the interrupt mode and this should be +transparent to service drivers. Service drivers need to know only +the vector IRQ assigned to the field irq of struct pcie_device, which +is passed in when the PCI Express Port Bus driver probes each service +driver. Service drivers should use (struct pcie_device*)dev->irq to +call request_irq/free_irq. In addition, the interrupt mode is stored +in the field interrupt_mode of struct pcie_device. + +6.2 MSI-X Vector Resources + +Similar to the MSI a device driver for an MSI-X capable device can +call pci_enable_msix to request MSI-X interrupts. All service drivers +are not permitted to switch interrupt mode on its device. The PCI +Express Port Bus driver is responsible for determining the interrupt +mode and this should be transparent to service drivers. Any attempt +by service driver to call pci_enable_msix/pci_disable_msix may +result unpredictable behavior. Service drivers should use +(struct pcie_device*)dev->irq and call request_irq/free_irq. + +6.3 PCI Memory/IO Mapped Regions + +Service drivers for PCI Express Power Management (PME), Advanced +Error Reporting (AER), Hot-Plug (HP) and Virtual Channel (VC) access +PCI configuration space on the PCI Express port. In all cases the +registers accessed are independent of each other. This patch assumes +that all service drivers will be well behaved and not overwrite +other service driver's configuration settings. + +6.4 PCI Config Registers + +Each service driver runs its PCI config operations on its own +capability structure except the PCI Express capability structure, in +which Root Control register and Device Control register are shared +between PME and AER. This patch assumes that all service drivers +will be well behaved and not overwrite other service driver's +configuration settings. diff --git a/Documentation/pci-error-recovery.txt b/Documentation/PCI/pci-error-recovery.txt similarity index 100% rename from Documentation/pci-error-recovery.txt rename to Documentation/PCI/pci-error-recovery.txt diff --git a/Documentation/PCI/pci.txt b/Documentation/PCI/pci.txt new file mode 100644 index 000000000000..8d4dc6250c58 --- /dev/null +++ b/Documentation/PCI/pci.txt @@ -0,0 +1,646 @@ + + How To Write Linux PCI Drivers + + by Martin Mares on 07-Feb-2000 + updated by Grant Grundler on 23-Dec-2006 + +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +The world of PCI is vast and full of (mostly unpleasant) surprises. +Since each CPU architecture implements different chip-sets and PCI devices +have different requirements (erm, "features"), the result is the PCI support +in the Linux kernel is not as trivial as one would wish. This short paper +tries to introduce all potential driver authors to Linux APIs for +PCI device drivers. + +A more complete resource is the third edition of "Linux Device Drivers" +by Jonathan Corbet, Alessandro Rubini, and Greg Kroah-Hartman. +LDD3 is available for free (under Creative Commons License) from: + + http://lwn.net/Kernel/LDD3/ + +However, keep in mind that all documents are subject to "bit rot". +Refer to the source code if things are not working as described here. + +Please send questions/comments/patches about Linux PCI API to the +"Linux PCI" mailing list. + + + +0. Structure of PCI drivers +~~~~~~~~~~~~~~~~~~~~~~~~~~~ +PCI drivers "discover" PCI devices in a system via pci_register_driver(). +Actually, it's the other way around. When the PCI generic code discovers +a new device, the driver with a matching "description" will be notified. +Details on this below. + +pci_register_driver() leaves most of the probing for devices to +the PCI layer and supports online insertion/removal of devices [thus +supporting hot-pluggable PCI, CardBus, and Express-Card in a single driver]. +pci_register_driver() call requires passing in a table of function +pointers and thus dictates the high level structure of a driver. + +Once the driver knows about a PCI device and takes ownership, the +driver generally needs to perform the following initialization: + + Enable the device + Request MMIO/IOP resources + Set the DMA mask size (for both coherent and streaming DMA) + Allocate and initialize shared control data (pci_allocate_coherent()) + Access device configuration space (if needed) + Register IRQ handler (request_irq()) + Initialize non-PCI (i.e. LAN/SCSI/etc parts of the chip) + Enable DMA/processing engines + +When done using the device, and perhaps the module needs to be unloaded, +the driver needs to take the follow steps: + Disable the device from generating IRQs + Release the IRQ (free_irq()) + Stop all DMA activity + Release DMA buffers (both streaming and coherent) + Unregister from other subsystems (e.g. scsi or netdev) + Release MMIO/IOP resources + Disable the device + +Most of these topics are covered in the following sections. +For the rest look at LDD3 or . + +If the PCI subsystem is not configured (CONFIG_PCI is not set), most of +the PCI functions described below are defined as inline functions either +completely empty or just returning an appropriate error codes to avoid +lots of ifdefs in the drivers. + + + +1. pci_register_driver() call +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +PCI device drivers call pci_register_driver() during their +initialization with a pointer to a structure describing the driver +(struct pci_driver): + + field name Description + ---------- ------------------------------------------------------ + id_table Pointer to table of device ID's the driver is + interested in. Most drivers should export this + table using MODULE_DEVICE_TABLE(pci,...). + + probe This probing function gets called (during execution + of pci_register_driver() for already existing + devices or later if a new device gets inserted) for + all PCI devices which match the ID table and are not + "owned" by the other drivers yet. This function gets + passed a "struct pci_dev *" for each device whose + entry in the ID table matches the device. The probe + function returns zero when the driver chooses to + take "ownership" of the device or an error code + (negative number) otherwise. + The probe function always gets called from process + context, so it can sleep. + + remove The remove() function gets called whenever a device + being handled by this driver is removed (either during + deregistration of the driver or when it's manually + pulled out of a hot-pluggable slot). + The remove function always gets called from process + context, so it can sleep. + + suspend Put device into low power state. + suspend_late Put device into low power state. + + resume_early Wake device from low power state. + resume Wake device from low power state. + + (Please see Documentation/power/pci.txt for descriptions + of PCI Power Management and the related functions.) + + shutdown Hook into reboot_notifier_list (kernel/sys.c). + Intended to stop any idling DMA operations. + Useful for enabling wake-on-lan (NIC) or changing + the power state of a device before reboot. + e.g. drivers/net/e100.c. + + err_handler See Documentation/PCI/pci-error-recovery.txt + + +The ID table is an array of struct pci_device_id entries ending with an +all-zero entry; use of the macro DEFINE_PCI_DEVICE_TABLE is the preferred +method of declaring the table. Each entry consists of: + + vendor,device Vendor and device ID to match (or PCI_ANY_ID) + + subvendor, Subsystem vendor and device ID to match (or PCI_ANY_ID) + subdevice, + + class Device class, subclass, and "interface" to match. + See Appendix D of the PCI Local Bus Spec or + include/linux/pci_ids.h for a full list of classes. + Most drivers do not need to specify class/class_mask + as vendor/device is normally sufficient. + + class_mask limit which sub-fields of the class field are compared. + See drivers/scsi/sym53c8xx_2/ for example of usage. + + driver_data Data private to the driver. + Most drivers don't need to use driver_data field. + Best practice is to use driver_data as an index + into a static list of equivalent device types, + instead of using it as a pointer. + + +Most drivers only need PCI_DEVICE() or PCI_DEVICE_CLASS() to set up +a pci_device_id table. + +New PCI IDs may be added to a device driver pci_ids table at runtime +as shown below: + +echo "vendor device subvendor subdevice class class_mask driver_data" > \ +/sys/bus/pci/drivers/{driver}/new_id + +All fields are passed in as hexadecimal values (no leading 0x). +The vendor and device fields are mandatory, the others are optional. Users +need pass only as many optional fields as necessary: + o subvendor and subdevice fields default to PCI_ANY_ID (FFFFFFFF) + o class and classmask fields default to 0 + o driver_data defaults to 0UL. + +Once added, the driver probe routine will be invoked for any unclaimed +PCI devices listed in its (newly updated) pci_ids list. + +When the driver exits, it just calls pci_unregister_driver() and the PCI layer +automatically calls the remove hook for all devices handled by the driver. + + +1.1 "Attributes" for driver functions/data + +Please mark the initialization and cleanup functions where appropriate +(the corresponding macros are defined in ): + + __init Initialization code. Thrown away after the driver + initializes. + __exit Exit code. Ignored for non-modular drivers. + + + __devinit Device initialization code. + Identical to __init if the kernel is not compiled + with CONFIG_HOTPLUG, normal function otherwise. + __devexit The same for __exit. + +Tips on when/where to use the above attributes: + o The module_init()/module_exit() functions (and all + initialization functions called _only_ from these) + should be marked __init/__exit. + + o Do not mark the struct pci_driver. + + o The ID table array should be marked __devinitconst; this is done + automatically if the table is declared with DEFINE_PCI_DEVICE_TABLE(). + + o The probe() and remove() functions should be marked __devinit + and __devexit respectively. All initialization functions + exclusively called by the probe() routine, can be marked __devinit. + Ditto for remove() and __devexit. + + o If mydriver_remove() is marked with __devexit(), then all address + references to mydriver_remove must use __devexit_p(mydriver_remove) + (in the struct pci_driver declaration for example). + __devexit_p() will generate the function name _or_ NULL if the + function will be discarded. For an example, see drivers/net/tg3.c. + + o Do NOT mark a function if you are not sure which mark to use. + Better to not mark the function than mark the function wrong. + + + +2. How to find PCI devices manually +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +PCI drivers should have a really good reason for not using the +pci_register_driver() interface to search for PCI devices. +The main reason PCI devices are controlled by multiple drivers +is because one PCI device implements several different HW services. +E.g. combined serial/parallel port/floppy controller. + +A manual search may be performed using the following constructs: + +Searching by vendor and device ID: + + struct pci_dev *dev = NULL; + while (dev = pci_get_device(VENDOR_ID, DEVICE_ID, dev)) + configure_device(dev); + +Searching by class ID (iterate in a similar way): + + pci_get_class(CLASS_ID, dev) + +Searching by both vendor/device and subsystem vendor/device ID: + + pci_get_subsys(VENDOR_ID,DEVICE_ID, SUBSYS_VENDOR_ID, SUBSYS_DEVICE_ID, dev). + +You can use the constant PCI_ANY_ID as a wildcard replacement for +VENDOR_ID or DEVICE_ID. This allows searching for any device from a +specific vendor, for example. + +These functions are hotplug-safe. They increment the reference count on +the pci_dev that they return. You must eventually (possibly at module unload) +decrement the reference count on these devices by calling pci_dev_put(). + + + +3. Device Initialization Steps +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +As noted in the introduction, most PCI drivers need the following steps +for device initialization: + + Enable the device + Request MMIO/IOP resources + Set the DMA mask size (for both coherent and streaming DMA) + Allocate and initialize shared control data (pci_allocate_coherent()) + Access device configuration space (if needed) + Register IRQ handler (request_irq()) + Initialize non-PCI (i.e. LAN/SCSI/etc parts of the chip) + Enable DMA/processing engines. + +The driver can access PCI config space registers at any time. +(Well, almost. When running BIST, config space can go away...but +that will just result in a PCI Bus Master Abort and config reads +will return garbage). + + +3.1 Enable the PCI device +~~~~~~~~~~~~~~~~~~~~~~~~~ +Before touching any device registers, the driver needs to enable +the PCI device by calling pci_enable_device(). This will: + o wake up the device if it was in suspended state, + o allocate I/O and memory regions of the device (if BIOS did not), + o allocate an IRQ (if BIOS did not). + +NOTE: pci_enable_device() can fail! Check the return value. + +[ OS BUG: we don't check resource allocations before enabling those + resources. The sequence would make more sense if we called + pci_request_resources() before calling pci_enable_device(). + Currently, the device drivers can't detect the bug when when two + devices have been allocated the same range. This is not a common + problem and unlikely to get fixed soon. + + This has been discussed before but not changed as of 2.6.19: + http://lkml.org/lkml/2006/3/2/194 +] + +pci_set_master() will enable DMA by setting the bus master bit +in the PCI_COMMAND register. It also fixes the latency timer value if +it's set to something bogus by the BIOS. + +If the PCI device can use the PCI Memory-Write-Invalidate transaction, +call pci_set_mwi(). This enables the PCI_COMMAND bit for Mem-Wr-Inval +and also ensures that the cache line size register is set correctly. +Check the return value of pci_set_mwi() as not all architectures +or chip-sets may support Memory-Write-Invalidate. Alternatively, +if Mem-Wr-Inval would be nice to have but is not required, call +pci_try_set_mwi() to have the system do its best effort at enabling +Mem-Wr-Inval. + + +3.2 Request MMIO/IOP resources +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +Memory (MMIO), and I/O port addresses should NOT be read directly +from the PCI device config space. Use the values in the pci_dev structure +as the PCI "bus address" might have been remapped to a "host physical" +address by the arch/chip-set specific kernel support. + +See Documentation/IO-mapping.txt for how to access device registers +or device memory. + +The device driver needs to call pci_request_region() to verify +no other device is already using the same address resource. +Conversely, drivers should call pci_release_region() AFTER +calling pci_disable_device(). +The idea is to prevent two devices colliding on the same address range. + +[ See OS BUG comment above. Currently (2.6.19), The driver can only + determine MMIO and IO Port resource availability _after_ calling + pci_enable_device(). ] + +Generic flavors of pci_request_region() are request_mem_region() +(for MMIO ranges) and request_region() (for IO Port ranges). +Use these for address resources that are not described by "normal" PCI +BARs. + +Also see pci_request_selected_regions() below. + + +3.3 Set the DMA mask size +~~~~~~~~~~~~~~~~~~~~~~~~~ +[ If anything below doesn't make sense, please refer to + Documentation/DMA-API.txt. This section is just a reminder that + drivers need to indicate DMA capabilities of the device and is not + an authoritative source for DMA interfaces. ] + +While all drivers should explicitly indicate the DMA capability +(e.g. 32 or 64 bit) of the PCI bus master, devices with more than +32-bit bus master capability for streaming data need the driver +to "register" this capability by calling pci_set_dma_mask() with +appropriate parameters. In general this allows more efficient DMA +on systems where System RAM exists above 4G _physical_ address. + +Drivers for all PCI-X and PCIe compliant devices must call +pci_set_dma_mask() as they are 64-bit DMA devices. + +Similarly, drivers must also "register" this capability if the device +can directly address "consistent memory" in System RAM above 4G physical +address by calling pci_set_consistent_dma_mask(). +Again, this includes drivers for all PCI-X and PCIe compliant devices. +Many 64-bit "PCI" devices (before PCI-X) and some PCI-X devices are +64-bit DMA capable for payload ("streaming") data but not control +("consistent") data. + + +3.4 Setup shared control data +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +Once the DMA masks are set, the driver can allocate "consistent" (a.k.a. shared) +memory. See Documentation/DMA-API.txt for a full description of +the DMA APIs. This section is just a reminder that it needs to be done +before enabling DMA on the device. + + +3.5 Initialize device registers +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +Some drivers will need specific "capability" fields programmed +or other "vendor specific" register initialized or reset. +E.g. clearing pending interrupts. + + +3.6 Register IRQ handler +~~~~~~~~~~~~~~~~~~~~~~~~ +While calling request_irq() is the last step described here, +this is often just another intermediate step to initialize a device. +This step can often be deferred until the device is opened for use. + +All interrupt handlers for IRQ lines should be registered with IRQF_SHARED +and use the devid to map IRQs to devices (remember that all PCI IRQ lines +can be shared). + +request_irq() will associate an interrupt handler and device handle +with an interrupt number. Historically interrupt numbers represent +IRQ lines which run from the PCI device to the Interrupt controller. +With MSI and MSI-X (more below) the interrupt number is a CPU "vector". + +request_irq() also enables the interrupt. Make sure the device is +quiesced and does not have any interrupts pending before registering +the interrupt handler. + +MSI and MSI-X are PCI capabilities. Both are "Message Signaled Interrupts" +which deliver interrupts to the CPU via a DMA write to a Local APIC. +The fundamental difference between MSI and MSI-X is how multiple +"vectors" get allocated. MSI requires contiguous blocks of vectors +while MSI-X can allocate several individual ones. + +MSI capability can be enabled by calling pci_enable_msi() or +pci_enable_msix() before calling request_irq(). This causes +the PCI support to program CPU vector data into the PCI device +capability registers. + +If your PCI device supports both, try to enable MSI-X first. +Only one can be enabled at a time. Many architectures, chip-sets, +or BIOSes do NOT support MSI or MSI-X and the call to pci_enable_msi/msix +will fail. This is important to note since many drivers have +two (or more) interrupt handlers: one for MSI/MSI-X and another for IRQs. +They choose which handler to register with request_irq() based on the +return value from pci_enable_msi/msix(). + +There are (at least) two really good reasons for using MSI: +1) MSI is an exclusive interrupt vector by definition. + This means the interrupt handler doesn't have to verify + its device caused the interrupt. + +2) MSI avoids DMA/IRQ race conditions. DMA to host memory is guaranteed + to be visible to the host CPU(s) when the MSI is delivered. This + is important for both data coherency and avoiding stale control data. + This guarantee allows the driver to omit MMIO reads to flush + the DMA stream. + +See drivers/infiniband/hw/mthca/ or drivers/net/tg3.c for examples +of MSI/MSI-X usage. + + + +4. PCI device shutdown +~~~~~~~~~~~~~~~~~~~~~~~ + +When a PCI device driver is being unloaded, most of the following +steps need to be performed: + + Disable the device from generating IRQs + Release the IRQ (free_irq()) + Stop all DMA activity + Release DMA buffers (both streaming and consistent) + Unregister from other subsystems (e.g. scsi or netdev) + Disable device from responding to MMIO/IO Port addresses + Release MMIO/IO Port resource(s) + + +4.1 Stop IRQs on the device +~~~~~~~~~~~~~~~~~~~~~~~~~~~ +How to do this is chip/device specific. If it's not done, it opens +the possibility of a "screaming interrupt" if (and only if) +the IRQ is shared with another device. + +When the shared IRQ handler is "unhooked", the remaining devices +using the same IRQ line will still need the IRQ enabled. Thus if the +"unhooked" device asserts IRQ line, the system will respond assuming +it was one of the remaining devices asserted the IRQ line. Since none +of the other devices will handle the IRQ, the system will "hang" until +it decides the IRQ isn't going to get handled and masks the IRQ (100,000 +iterations later). Once the shared IRQ is masked, the remaining devices +will stop functioning properly. Not a nice situation. + +This is another reason to use MSI or MSI-X if it's available. +MSI and MSI-X are defined to be exclusive interrupts and thus +are not susceptible to the "screaming interrupt" problem. + + +4.2 Release the IRQ +~~~~~~~~~~~~~~~~~~~ +Once the device is quiesced (no more IRQs), one can call free_irq(). +This function will return control once any pending IRQs are handled, +"unhook" the drivers IRQ handler from that IRQ, and finally release +the IRQ if no one else is using it. + + +4.3 Stop all DMA activity +~~~~~~~~~~~~~~~~~~~~~~~~~ +It's extremely important to stop all DMA operations BEFORE attempting +to deallocate DMA control data. Failure to do so can result in memory +corruption, hangs, and on some chip-sets a hard crash. + +Stopping DMA after stopping the IRQs can avoid races where the +IRQ handler might restart DMA engines. + +While this step sounds obvious and trivial, several "mature" drivers +didn't get this step right in the past. + + +4.4 Release DMA buffers +~~~~~~~~~~~~~~~~~~~~~~~ +Once DMA is stopped, clean up streaming DMA first. +I.e. unmap data buffers and return buffers to "upstream" +owners if there is one. + +Then clean up "consistent" buffers which contain the control data. + +See Documentation/DMA-API.txt for details on unmapping interfaces. + + +4.5 Unregister from other subsystems +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +Most low level PCI device drivers support some other subsystem +like USB, ALSA, SCSI, NetDev, Infiniband, etc. Make sure your +driver isn't losing resources from that other subsystem. +If this happens, typically the symptom is an Oops (panic) when +the subsystem attempts to call into a driver that has been unloaded. + + +4.6 Disable Device from responding to MMIO/IO Port addresses +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +io_unmap() MMIO or IO Port resources and then call pci_disable_device(). +This is the symmetric opposite of pci_enable_device(). +Do not access device registers after calling pci_disable_device(). + + +4.7 Release MMIO/IO Port Resource(s) +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +Call pci_release_region() to mark the MMIO or IO Port range as available. +Failure to do so usually results in the inability to reload the driver. + + + +5. How to access PCI config space +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +You can use pci_(read|write)_config_(byte|word|dword) to access the config +space of a device represented by struct pci_dev *. All these functions return 0 +when successful or an error code (PCIBIOS_...) which can be translated to a text +string by pcibios_strerror. Most drivers expect that accesses to valid PCI +devices don't fail. + +If you don't have a struct pci_dev available, you can call +pci_bus_(read|write)_config_(byte|word|dword) to access a given device +and function on that bus. + +If you access fields in the standard portion of the config header, please +use symbolic names of locations and bits declared in . + +If you need to access Extended PCI Capability registers, just call +pci_find_capability() for the particular capability and it will find the +corresponding register block for you. + + + +6. Other interesting functions +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +pci_find_slot() Find pci_dev corresponding to given bus and + slot numbers. +pci_set_power_state() Set PCI Power Management state (0=D0 ... 3=D3) +pci_find_capability() Find specified capability in device's capability + list. +pci_resource_start() Returns bus start address for a given PCI region +pci_resource_end() Returns bus end address for a given PCI region +pci_resource_len() Returns the byte length of a PCI region +pci_set_drvdata() Set private driver data pointer for a pci_dev +pci_get_drvdata() Return private driver data pointer for a pci_dev +pci_set_mwi() Enable Memory-Write-Invalidate transactions. +pci_clear_mwi() Disable Memory-Write-Invalidate transactions. + + + +7. Miscellaneous hints +~~~~~~~~~~~~~~~~~~~~~~ + +When displaying PCI device names to the user (for example when a driver wants +to tell the user what card has it found), please use pci_name(pci_dev). + +Always refer to the PCI devices by a pointer to the pci_dev structure. +All PCI layer functions use this identification and it's the only +reasonable one. Don't use bus/slot/function numbers except for very +special purposes -- on systems with multiple primary buses their semantics +can be pretty complex. + +Don't try to turn on Fast Back to Back writes in your driver. All devices +on the bus need to be capable of doing it, so this is something which needs +to be handled by platform and generic code, not individual drivers. + + + +8. Vendor and device identifications +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +One is not not required to add new device ids to include/linux/pci_ids.h. +Please add PCI_VENDOR_ID_xxx for vendors and a hex constant for device ids. + +PCI_VENDOR_ID_xxx constants are re-used. The device ids are arbitrary +hex numbers (vendor controlled) and normally used only in a single +location, the pci_device_id table. + +Please DO submit new vendor/device ids to pciids.sourceforge.net project. + + + +9. Obsolete functions +~~~~~~~~~~~~~~~~~~~~~ + +There are several functions which you might come across when trying to +port an old driver to the new PCI interface. They are no longer present +in the kernel as they aren't compatible with hotplug or PCI domains or +having sane locking. + +pci_find_device() Superseded by pci_get_device() +pci_find_subsys() Superseded by pci_get_subsys() +pci_find_slot() Superseded by pci_get_slot() + + +The alternative is the traditional PCI device driver that walks PCI +device lists. This is still possible but discouraged. + + + +10. MMIO Space and "Write Posting" +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +Converting a driver from using I/O Port space to using MMIO space +often requires some additional changes. Specifically, "write posting" +needs to be handled. Many drivers (e.g. tg3, acenic, sym53c8xx_2) +already do this. I/O Port space guarantees write transactions reach the PCI +device before the CPU can continue. Writes to MMIO space allow the CPU +to continue before the transaction reaches the PCI device. HW weenies +call this "Write Posting" because the write completion is "posted" to +the CPU before the transaction has reached its destination. + +Thus, timing sensitive code should add readl() where the CPU is +expected to wait before doing other work. The classic "bit banging" +sequence works fine for I/O Port space: + + for (i = 8; --i; val >>= 1) { + outb(val & 1, ioport_reg); /* write bit */ + udelay(10); + } + +The same sequence for MMIO space should be: + + for (i = 8; --i; val >>= 1) { + writeb(val & 1, mmio_reg); /* write bit */ + readb(safe_mmio_reg); /* flush posted write */ + udelay(10); + } + +It is important that "safe_mmio_reg" not have any side effects that +interferes with the correct operation of the device. + +Another case to watch out for is when resetting a PCI device. Use PCI +Configuration space reads to flush the writel(). This will gracefully +handle the PCI master abort on all platforms if the PCI device is +expected to not respond to a readl(). Most x86 platforms will allow +MMIO reads to master abort (a.k.a. "Soft Fail") and return garbage +(e.g. ~0). But many RISC platforms will crash (a.k.a."Hard Fail"). + diff --git a/Documentation/PCI/pcieaer-howto.txt b/Documentation/PCI/pcieaer-howto.txt new file mode 100644 index 000000000000..16c251230c82 --- /dev/null +++ b/Documentation/PCI/pcieaer-howto.txt @@ -0,0 +1,253 @@ + The PCI Express Advanced Error Reporting Driver Guide HOWTO + T. Long Nguyen + Yanmin Zhang + 07/29/2006 + + +1. Overview + +1.1 About this guide + +This guide describes the basics of the PCI Express Advanced Error +Reporting (AER) driver and provides information on how to use it, as +well as how to enable the drivers of endpoint devices to conform with +PCI Express AER driver. + +1.2 Copyright © Intel Corporation 2006. + +1.3 What is the PCI Express AER Driver? + +PCI Express error signaling can occur on the PCI Express link itself +or on behalf of transactions initiated on the link. PCI Express +defines two error reporting paradigms: the baseline capability and +the Advanced Error Reporting capability. The baseline capability is +required of all PCI Express components providing a minimum defined +set of error reporting requirements. Advanced Error Reporting +capability is implemented with a PCI Express advanced error reporting +extended capability structure providing more robust error reporting. + +The PCI Express AER driver provides the infrastructure to support PCI +Express Advanced Error Reporting capability. The PCI Express AER +driver provides three basic functions: + +- Gathers the comprehensive error information if errors occurred. +- Reports error to the users. +- Performs error recovery actions. + +AER driver only attaches root ports which support PCI-Express AER +capability. + + +2. User Guide + +2.1 Include the PCI Express AER Root Driver into the Linux Kernel + +The PCI Express AER Root driver is a Root Port service driver attached +to the PCI Express Port Bus driver. If a user wants to use it, the driver +has to be compiled. Option CONFIG_PCIEAER supports this capability. It +depends on CONFIG_PCIEPORTBUS, so pls. set CONFIG_PCIEPORTBUS=y and +CONFIG_PCIEAER = y. + +2.2 Load PCI Express AER Root Driver +There is a case where a system has AER support in BIOS. Enabling the AER +Root driver and having AER support in BIOS may result unpredictable +behavior. To avoid this conflict, a successful load of the AER Root driver +requires ACPI _OSC support in the BIOS to allow the AER Root driver to +request for native control of AER. See the PCI FW 3.0 Specification for +details regarding OSC usage. Currently, lots of firmwares don't provide +_OSC support while they use PCI Express. To support such firmwares, +forceload, a parameter of type bool, could enable AER to continue to +be initiated although firmwares have no _OSC support. To enable the +walkaround, pls. add aerdriver.forceload=y to kernel boot parameter line +when booting kernel. Note that forceload=n by default. + +2.3 AER error output +When a PCI-E AER error is captured, an error message will be outputed to +console. If it's a correctable error, it is outputed as a warning. +Otherwise, it is printed as an error. So users could choose different +log level to filter out correctable error messages. + +Below shows an example. ++------ PCI-Express Device Error -----+ +Error Severity : Uncorrected (Fatal) +PCIE Bus Error type : Transaction Layer +Unsupported Request : First +Requester ID : 0500 +VendorID=8086h, DeviceID=0329h, Bus=05h, Device=00h, Function=00h +TLB Header: +04000001 00200a03 05010000 00050100 + +In the example, 'Requester ID' means the ID of the device who sends +the error message to root port. Pls. refer to pci express specs for +other fields. + + +3. Developer Guide + +To enable AER aware support requires a software driver to configure +the AER capability structure within its device and to provide callbacks. + +To support AER better, developers need understand how AER does work +firstly. + +PCI Express errors are classified into two types: correctable errors +and uncorrectable errors. This classification is based on the impacts +of those errors, which may result in degraded performance or function +failure. + +Correctable errors pose no impacts on the functionality of the +interface. The PCI Express protocol can recover without any software +intervention or any loss of data. These errors are detected and +corrected by hardware. Unlike correctable errors, uncorrectable +errors impact functionality of the interface. Uncorrectable errors +can cause a particular transaction or a particular PCI Express link +to be unreliable. Depending on those error conditions, uncorrectable +errors are further classified into non-fatal errors and fatal errors. +Non-fatal errors cause the particular transaction to be unreliable, +but the PCI Express link itself is fully functional. Fatal errors, on +the other hand, cause the link to be unreliable. + +When AER is enabled, a PCI Express device will automatically send an +error message to the PCIE root port above it when the device captures +an error. The Root Port, upon receiving an error reporting message, +internally processes and logs the error message in its PCI Express +capability structure. Error information being logged includes storing +the error reporting agent's requestor ID into the Error Source +Identification Registers and setting the error bits of the Root Error +Status Register accordingly. If AER error reporting is enabled in Root +Error Command Register, the Root Port generates an interrupt if an +error is detected. + +Note that the errors as described above are related to the PCI Express +hierarchy and links. These errors do not include any device specific +errors because device specific errors will still get sent directly to +the device driver. + +3.1 Configure the AER capability structure + +AER aware drivers of PCI Express component need change the device +control registers to enable AER. They also could change AER registers, +including mask and severity registers. Helper function +pci_enable_pcie_error_reporting could be used to enable AER. See +section 3.3. + +3.2. Provide callbacks + +3.2.1 callback reset_link to reset pci express link + +This callback is used to reset the pci express physical link when a +fatal error happens. The root port aer service driver provides a +default reset_link function, but different upstream ports might +have different specifications to reset pci express link, so all +upstream ports should provide their own reset_link functions. + +In struct pcie_port_service_driver, a new pointer, reset_link, is +added. + +pci_ers_result_t (*reset_link) (struct pci_dev *dev); + +Section 3.2.2.2 provides more detailed info on when to call +reset_link. + +3.2.2 PCI error-recovery callbacks + +The PCI Express AER Root driver uses error callbacks to coordinate +with downstream device drivers associated with a hierarchy in question +when performing error recovery actions. + +Data struct pci_driver has a pointer, err_handler, to point to +pci_error_handlers who consists of a couple of callback function +pointers. AER driver follows the rules defined in +pci-error-recovery.txt except pci express specific parts (e.g. +reset_link). Pls. refer to pci-error-recovery.txt for detailed +definitions of the callbacks. + +Below sections specify when to call the error callback functions. + +3.2.2.1 Correctable errors + +Correctable errors pose no impacts on the functionality of +the interface. The PCI Express protocol can recover without any +software intervention or any loss of data. These errors do not +require any recovery actions. The AER driver clears the device's +correctable error status register accordingly and logs these errors. + +3.2.2.2 Non-correctable (non-fatal and fatal) errors + +If an error message indicates a non-fatal error, performing link reset +at upstream is not required. The AER driver calls error_detected(dev, +pci_channel_io_normal) to all drivers associated within a hierarchy in +question. for example, +EndPoint<==>DownstreamPort B<==>UpstreamPort A<==>RootPort. +If Upstream port A captures an AER error, the hierarchy consists of +Downstream port B and EndPoint. + +A driver may return PCI_ERS_RESULT_CAN_RECOVER, +PCI_ERS_RESULT_DISCONNECT, or PCI_ERS_RESULT_NEED_RESET, depending on +whether it can recover or the AER driver calls mmio_enabled as next. + +If an error message indicates a fatal error, kernel will broadcast +error_detected(dev, pci_channel_io_frozen) to all drivers within +a hierarchy in question. Then, performing link reset at upstream is +necessary. As different kinds of devices might use different approaches +to reset link, AER port service driver is required to provide the +function to reset link. Firstly, kernel looks for if the upstream +component has an aer driver. If it has, kernel uses the reset_link +callback of the aer driver. If the upstream component has no aer driver +and the port is downstream port, we will use the aer driver of the +root port who reports the AER error. As for upstream ports, +they should provide their own aer service drivers with reset_link +function. If error_detected returns PCI_ERS_RESULT_CAN_RECOVER and +reset_link returns PCI_ERS_RESULT_RECOVERED, the error handling goes +to mmio_enabled. + +3.3 helper functions + +3.3.1 int pci_find_aer_capability(struct pci_dev *dev); +pci_find_aer_capability locates the PCI Express AER capability +in the device configuration space. If the device doesn't support +PCI-Express AER, the function returns 0. + +3.3.2 int pci_enable_pcie_error_reporting(struct pci_dev *dev); +pci_enable_pcie_error_reporting enables the device to send error +messages to root port when an error is detected. Note that devices +don't enable the error reporting by default, so device drivers need +call this function to enable it. + +3.3.3 int pci_disable_pcie_error_reporting(struct pci_dev *dev); +pci_disable_pcie_error_reporting disables the device to send error +messages to root port when an error is detected. + +3.3.4 int pci_cleanup_aer_uncorrect_error_status(struct pci_dev *dev); +pci_cleanup_aer_uncorrect_error_status cleanups the uncorrectable +error status register. + +3.4 Frequent Asked Questions + +Q: What happens if a PCI Express device driver does not provide an +error recovery handler (pci_driver->err_handler is equal to NULL)? + +A: The devices attached with the driver won't be recovered. If the +error is fatal, kernel will print out warning messages. Please refer +to section 3 for more information. + +Q: What happens if an upstream port service driver does not provide +callback reset_link? + +A: Fatal error recovery will fail if the errors are reported by the +upstream ports who are attached by the service driver. + +Q: How does this infrastructure deal with driver that is not PCI +Express aware? + +A: This infrastructure calls the error callback functions of the +driver when an error happens. But if the driver is not aware of +PCI Express, the device might not report its own errors to root +port. + +Q: What modifications will that driver need to make it compatible +with the PCI Express AER Root driver? + +A: It could call the helper functions to enable AER in devices and +cleanup uncorrectable status register. Pls. refer to section 3.3. + diff --git a/Documentation/PCIEBUS-HOWTO.txt b/Documentation/PCIEBUS-HOWTO.txt deleted file mode 100644 index c93f42a74d7e..000000000000 --- a/Documentation/PCIEBUS-HOWTO.txt +++ /dev/null @@ -1,217 +0,0 @@ - The PCI Express Port Bus Driver Guide HOWTO - Tom L Nguyen tom.l.nguyen@intel.com - 11/03/2004 - -1. About this guide - -This guide describes the basics of the PCI Express Port Bus driver -and provides information on how to enable the service drivers to -register/unregister with the PCI Express Port Bus Driver. - -2. Copyright 2004 Intel Corporation - -3. What is the PCI Express Port Bus Driver - -A PCI Express Port is a logical PCI-PCI Bridge structure. There -are two types of PCI Express Port: the Root Port and the Switch -Port. The Root Port originates a PCI Express link from a PCI Express -Root Complex and the Switch Port connects PCI Express links to -internal logical PCI buses. The Switch Port, which has its secondary -bus representing the switch's internal routing logic, is called the -switch's Upstream Port. The switch's Downstream Port is bridging from -switch's internal routing bus to a bus representing the downstream -PCI Express link from the PCI Express Switch. - -A PCI Express Port can provide up to four distinct functions, -referred to in this document as services, depending on its port type. -PCI Express Port's services include native hotplug support (HP), -power management event support (PME), advanced error reporting -support (AER), and virtual channel support (VC). These services may -be handled by a single complex driver or be individually distributed -and handled by corresponding service drivers. - -4. Why use the PCI Express Port Bus Driver? - -In existing Linux kernels, the Linux Device Driver Model allows a -physical device to be handled by only a single driver. The PCI -Express Port is a PCI-PCI Bridge device with multiple distinct -services. To maintain a clean and simple solution each service -may have its own software service driver. In this case several -service drivers will compete for a single PCI-PCI Bridge device. -For example, if the PCI Express Root Port native hotplug service -driver is loaded first, it claims a PCI-PCI Bridge Root Port. The -kernel therefore does not load other service drivers for that Root -Port. In other words, it is impossible to have multiple service -drivers load and run on a PCI-PCI Bridge device simultaneously -using the current driver model. - -To enable multiple service drivers running simultaneously requires -having a PCI Express Port Bus driver, which manages all populated -PCI Express Ports and distributes all provided service requests -to the corresponding service drivers as required. Some key -advantages of using the PCI Express Port Bus driver are listed below: - - - Allow multiple service drivers to run simultaneously on - a PCI-PCI Bridge Port device. - - - Allow service drivers implemented in an independent - staged approach. - - - Allow one service driver to run on multiple PCI-PCI Bridge - Port devices. - - - Manage and distribute resources of a PCI-PCI Bridge Port - device to requested service drivers. - -5. Configuring the PCI Express Port Bus Driver vs. Service Drivers - -5.1 Including the PCI Express Port Bus Driver Support into the Kernel - -Including the PCI Express Port Bus driver depends on whether the PCI -Express support is included in the kernel config. The kernel will -automatically include the PCI Express Port Bus driver as a kernel -driver when the PCI Express support is enabled in the kernel. - -5.2 Enabling Service Driver Support - -PCI device drivers are implemented based on Linux Device Driver Model. -All service drivers are PCI device drivers. As discussed above, it is -impossible to load any service driver once the kernel has loaded the -PCI Express Port Bus Driver. To meet the PCI Express Port Bus Driver -Model requires some minimal changes on existing service drivers that -imposes no impact on the functionality of existing service drivers. - -A service driver is required to use the two APIs shown below to -register its service with the PCI Express Port Bus driver (see -section 5.2.1 & 5.2.2). It is important that a service driver -initializes the pcie_port_service_driver data structure, included in -header file /include/linux/pcieport_if.h, before calling these APIs. -Failure to do so will result an identity mismatch, which prevents -the PCI Express Port Bus driver from loading a service driver. - -5.2.1 pcie_port_service_register - -int pcie_port_service_register(struct pcie_port_service_driver *new) - -This API replaces the Linux Driver Model's pci_module_init API. A -service driver should always calls pcie_port_service_register at -module init. Note that after service driver being loaded, calls -such as pci_enable_device(dev) and pci_set_master(dev) are no longer -necessary since these calls are executed by the PCI Port Bus driver. - -5.2.2 pcie_port_service_unregister - -void pcie_port_service_unregister(struct pcie_port_service_driver *new) - -pcie_port_service_unregister replaces the Linux Driver Model's -pci_unregister_driver. It's always called by service driver when a -module exits. - -5.2.3 Sample Code - -Below is sample service driver code to initialize the port service -driver data structure. - -static struct pcie_port_service_id service_id[] = { { - .vendor = PCI_ANY_ID, - .device = PCI_ANY_ID, - .port_type = PCIE_RC_PORT, - .service_type = PCIE_PORT_SERVICE_AER, - }, { /* end: all zeroes */ } -}; - -static struct pcie_port_service_driver root_aerdrv = { - .name = (char *)device_name, - .id_table = &service_id[0], - - .probe = aerdrv_load, - .remove = aerdrv_unload, - - .suspend = aerdrv_suspend, - .resume = aerdrv_resume, -}; - -Below is a sample code for registering/unregistering a service -driver. - -static int __init aerdrv_service_init(void) -{ - int retval = 0; - - retval = pcie_port_service_register(&root_aerdrv); - if (!retval) { - /* - * FIX ME - */ - } - return retval; -} - -static void __exit aerdrv_service_exit(void) -{ - pcie_port_service_unregister(&root_aerdrv); -} - -module_init(aerdrv_service_init); -module_exit(aerdrv_service_exit); - -6. Possible Resource Conflicts - -Since all service drivers of a PCI-PCI Bridge Port device are -allowed to run simultaneously, below lists a few of possible resource -conflicts with proposed solutions. - -6.1 MSI Vector Resource - -The MSI capability structure enables a device software driver to call -pci_enable_msi to request MSI based interrupts. Once MSI interrupts -are enabled on a device, it stays in this mode until a device driver -calls pci_disable_msi to disable MSI interrupts and revert back to -INTx emulation mode. Since service drivers of the same PCI-PCI Bridge -port share the same physical device, if an individual service driver -calls pci_enable_msi/pci_disable_msi it may result unpredictable -behavior. For example, two service drivers run simultaneously on the -same physical Root Port. Both service drivers call pci_enable_msi to -request MSI based interrupts. A service driver may not know whether -any other service drivers have run on this Root Port. If either one -of them calls pci_disable_msi, it puts the other service driver -in a wrong interrupt mode. - -To avoid this situation all service drivers are not permitted to -switch interrupt mode on its device. The PCI Express Port Bus driver -is responsible for determining the interrupt mode and this should be -transparent to service drivers. Service drivers need to know only -the vector IRQ assigned to the field irq of struct pcie_device, which -is passed in when the PCI Express Port Bus driver probes each service -driver. Service drivers should use (struct pcie_device*)dev->irq to -call request_irq/free_irq. In addition, the interrupt mode is stored -in the field interrupt_mode of struct pcie_device. - -6.2 MSI-X Vector Resources - -Similar to the MSI a device driver for an MSI-X capable device can -call pci_enable_msix to request MSI-X interrupts. All service drivers -are not permitted to switch interrupt mode on its device. The PCI -Express Port Bus driver is responsible for determining the interrupt -mode and this should be transparent to service drivers. Any attempt -by service driver to call pci_enable_msix/pci_disable_msix may -result unpredictable behavior. Service drivers should use -(struct pcie_device*)dev->irq and call request_irq/free_irq. - -6.3 PCI Memory/IO Mapped Regions - -Service drivers for PCI Express Power Management (PME), Advanced -Error Reporting (AER), Hot-Plug (HP) and Virtual Channel (VC) access -PCI configuration space on the PCI Express port. In all cases the -registers accessed are independent of each other. This patch assumes -that all service drivers will be well behaved and not overwrite -other service driver's configuration settings. - -6.4 PCI Config Registers - -Each service driver runs its PCI config operations on its own -capability structure except the PCI Express capability structure, in -which Root Control register and Device Control register are shared -between PME and AER. This patch assumes that all service drivers -will be well behaved and not overwrite other service driver's -configuration settings. diff --git a/Documentation/SubmittingPatches b/Documentation/SubmittingPatches index 1fc4e7144dce..9c93a03ea33b 100644 --- a/Documentation/SubmittingPatches +++ b/Documentation/SubmittingPatches @@ -183,7 +183,7 @@ Even if the maintainer did not respond in step #4, make sure to ALWAYS copy the maintainer when you change their code. For small patches you may want to CC the Trivial Patch Monkey -trivial@kernel.org managed by Adrian Bunk; which collects "trivial" +trivial@kernel.org managed by Jesper Juhl; which collects "trivial" patches. Trivial patches must qualify for one of the following rules: Spelling fixes in documentation Spelling fixes which could break grep(1) @@ -196,7 +196,7 @@ patches. Trivial patches must qualify for one of the following rules: since people copy, as long as it's trivial) Any fix by the author/maintainer of the file (ie. patch monkey in re-transmission mode) -URL: +URL: diff --git a/Documentation/arm/Samsung-S3C24XX/NAND.txt b/Documentation/arm/Samsung-S3C24XX/NAND.txt new file mode 100644 index 000000000000..bc478a3409b8 --- /dev/null +++ b/Documentation/arm/Samsung-S3C24XX/NAND.txt @@ -0,0 +1,30 @@ + S3C24XX NAND Support + ==================== + +Introduction +------------ + +Small Page NAND +--------------- + +The driver uses a 512 byte (1 page) ECC code for this setup. The +ECC code is not directly compatible with the default kernel ECC +code, so the driver enforces its own OOB layout and ECC parameters + +Large Page NAND +--------------- + +The driver is capable of handling NAND flash with a 2KiB page +size, with support for hardware ECC generation and correction. + +Unlike the 512byte page mode, the driver generates ECC data for +each 256 byte block in an 2KiB page. This means that more than +one error in a page can be rectified. It also means that the +OOB layout remains the default kernel layout for these flashes. + + +Document Author +--------------- + +Ben Dooks, Copyright 2007 Simtec Electronics + diff --git a/Documentation/arm/Samsung-S3C24XX/Overview.txt b/Documentation/arm/Samsung-S3C24XX/Overview.txt index c31b76fa66c4..d04e1e30c47f 100644 --- a/Documentation/arm/Samsung-S3C24XX/Overview.txt +++ b/Documentation/arm/Samsung-S3C24XX/Overview.txt @@ -156,6 +156,8 @@ NAND controller. If there are any problems the latest linux-mtd code can be found from http://www.linux-mtd.infradead.org/ + For more information see Documentation/arm/Samsung-S3C24XX/NAND.txt + Serial ------ diff --git a/Documentation/block/biodoc.txt b/Documentation/block/biodoc.txt index 93f223b9723f..4dbb8be1c991 100644 --- a/Documentation/block/biodoc.txt +++ b/Documentation/block/biodoc.txt @@ -1097,7 +1097,7 @@ lock themselves, if required. Drivers that explicitly used the io_request_lock for serialization need to be modified accordingly. Usually it's as easy as adding a global lock: - static spinlock_t my_driver_lock = SPIN_LOCK_UNLOCKED; + static DEFINE_SPINLOCK(my_driver_lock); and passing the address to that lock to blk_init_queue(). diff --git a/Documentation/cdrom/cdrom-standard.tex b/Documentation/cdrom/cdrom-standard.tex index c713aeb020c4..c06233fe52ac 100644 --- a/Documentation/cdrom/cdrom-standard.tex +++ b/Documentation/cdrom/cdrom-standard.tex @@ -777,7 +777,7 @@ Note that a driver must have one static structure, $_dops$, while it may have as many structures $_info$ as there are minor devices active. $Register_cdrom()$ builds a linked list from these. -\subsection{$Int\ unregister_cdrom(struct\ cdrom_device_info * cdi)$} +\subsection{$Void\ unregister_cdrom(struct\ cdrom_device_info * cdi)$} Unregistering device $cdi$ with minor number $MINOR(cdi\to dev)$ removes the minor device from the list. If it was the last registered minor for diff --git a/Documentation/cli-sti-removal.txt b/Documentation/cli-sti-removal.txt index 0223c9d20331..60932b02fcb3 100644 --- a/Documentation/cli-sti-removal.txt +++ b/Documentation/cli-sti-removal.txt @@ -43,7 +43,7 @@ would execute while the cli()-ed section is executing. but from now on a more direct method of locking has to be used: - spinlock_t driver_lock = SPIN_LOCK_UNLOCKED; + DEFINE_SPINLOCK(driver_lock); struct driver_data; irq_handler (...) diff --git a/Documentation/cpusets.txt b/Documentation/cpusets.txt index ad2bb3b3acc1..aa854b9b18cd 100644 --- a/Documentation/cpusets.txt +++ b/Documentation/cpusets.txt @@ -8,6 +8,7 @@ Portions Copyright (c) 2004-2006 Silicon Graphics, Inc. Modified by Paul Jackson Modified by Christoph Lameter Modified by Paul Menage +Modified by Hidetoshi Seto CONTENTS: ========= @@ -20,7 +21,8 @@ CONTENTS: 1.5 What is memory_pressure ? 1.6 What is memory spread ? 1.7 What is sched_load_balance ? - 1.8 How do I use cpusets ? + 1.8 What is sched_relax_domain_level ? + 1.9 How do I use cpusets ? 2. Usage Examples and Syntax 2.1 Basic Usage 2.2 Adding/removing cpus @@ -497,7 +499,73 @@ the cpuset code to update these sched domains, it compares the new partition requested with the current, and updates its sched domains, removing the old and adding the new, for each change. -1.8 How do I use cpusets ? + +1.8 What is sched_relax_domain_level ? +-------------------------------------- + +In sched domain, the scheduler migrates tasks in 2 ways; periodic load +balance on tick, and at time of some schedule events. + +When a task is woken up, scheduler try to move the task on idle CPU. +For example, if a task A running on CPU X activates another task B +on the same CPU X, and if CPU Y is X's sibling and performing idle, +then scheduler migrate task B to CPU Y so that task B can start on +CPU Y without waiting task A on CPU X. + +And if a CPU run out of tasks in its runqueue, the CPU try to pull +extra tasks from other busy CPUs to help them before it is going to +be idle. + +Of course it takes some searching cost to find movable tasks and/or +idle CPUs, the scheduler might not search all CPUs in the domain +everytime. In fact, in some architectures, the searching ranges on +events are limited in the same socket or node where the CPU locates, +while the load balance on tick searchs all. + +For example, assume CPU Z is relatively far from CPU X. Even if CPU Z +is idle while CPU X and the siblings are busy, scheduler can't migrate +woken task B from X to Z since it is out of its searching range. +As the result, task B on CPU X need to wait task A or wait load balance +on the next tick. For some applications in special situation, waiting +1 tick may be too long. + +The 'sched_relax_domain_level' file allows you to request changing +this searching range as you like. This file takes int value which +indicates size of searching range in levels ideally as follows, +otherwise initial value -1 that indicates the cpuset has no request. + + -1 : no request. use system default or follow request of others. + 0 : no search. + 1 : search siblings (hyperthreads in a core). + 2 : search cores in a package. + 3 : search cpus in a node [= system wide on non-NUMA system] + ( 4 : search nodes in a chunk of node [on NUMA system] ) + ( 5~ : search system wide [on NUMA system]) + +This file is per-cpuset and affect the sched domain where the cpuset +belongs to. Therefore if the flag 'sched_load_balance' of a cpuset +is disabled, then 'sched_relax_domain_level' have no effect since +there is no sched domain belonging the cpuset. + +If multiple cpusets are overlapping and hence they form a single sched +domain, the largest value among those is used. Be careful, if one +requests 0 and others are -1 then 0 is used. + +Note that modifying this file will have both good and bad effects, +and whether it is acceptable or not will be depend on your situation. +Don't modify this file if you are not sure. + +If your situation is: + - The migration costs between each cpu can be assumed considerably + small(for you) due to your special application's behavior or + special hardware support for CPU cache etc. + - The searching cost doesn't have impact(for you) or you can make + the searching cost enough small by managing cpuset to compact etc. + - The latency is required even it sacrifices cache hit rate etc. +then increasing 'sched_relax_domain_level' would benefit you. + + +1.9 How do I use cpusets ? -------------------------- In order to minimize the impact of cpusets on critical kernel diff --git a/Documentation/debugging-via-ohci1394.txt b/Documentation/debugging-via-ohci1394.txt index c360d4e91b48..59a91e5c6909 100644 --- a/Documentation/debugging-via-ohci1394.txt +++ b/Documentation/debugging-via-ohci1394.txt @@ -41,15 +41,19 @@ to a working state and enables physical DMA by default for all remote nodes. This can be turned off by ohci1394's module parameter phys_dma=0. The alternative firewire-ohci driver in drivers/firewire uses filtered physical -DMA, hence is not yet suitable for remote debugging. +DMA by default, which is more secure but not suitable for remote debugging. +Compile the driver with CONFIG_FIREWIRE_OHCI_REMOTE_DMA (Kernel hacking menu: +Remote debugging over FireWire with firewire-ohci) to get unfiltered physical +DMA. -Because ohci1394 depends on the PCI enumeration to be completed, an -initialization routine which runs pretty early (long before console_init() -which makes the printk buffer appear on the console can be called) was written. +Because ohci1394 and firewire-ohci depend on the PCI enumeration to be +completed, an initialization routine which runs pretty early has been +implemented for x86. This routine runs long before console_init() can be +called, i.e. before the printk buffer appears on the console. To activate it, enable CONFIG_PROVIDE_OHCI1394_DMA_INIT (Kernel hacking menu: -Provide code for enabling DMA over FireWire early on boot) and pass the -parameter "ohci1394_dma=early" to the recompiled kernel on boot. +Remote debugging over FireWire early on boot) and pass the parameter +"ohci1394_dma=early" to the recompiled kernel on boot. Tools ----- diff --git a/Documentation/device-mapper/dm-crypt.txt b/Documentation/device-mapper/dm-crypt.txt new file mode 100644 index 000000000000..6680cab2c705 --- /dev/null +++ b/Documentation/device-mapper/dm-crypt.txt @@ -0,0 +1,52 @@ +dm-crypt +========= + +Device-Mapper's "crypt" target provides transparent encryption of block devices +using the kernel crypto API. + +Parameters: + + + Encryption cipher and an optional IV generation mode. + (In format cipher-chainmode-ivopts:ivmode). + Examples: + des + aes-cbc-essiv:sha256 + twofish-ecb + + /proc/crypto contains supported crypto modes + + + Key used for encryption. It is encoded as a hexadecimal number. + You can only use key sizes that are valid for the selected cipher. + + + The IV offset is a sector count that is added to the sector number + before creating the IV. + + + This is the device that is going to be used as backend and contains the + encrypted data. You can specify it as a path like /dev/xxx or a device + number :. + + + Starting sector within the device where the encrypted data begins. + +Example scripts +=============== +LUKS (Linux Unified Key Setup) is now the preferred way to set up disk +encryption with dm-crypt using the 'cryptsetup' utility, see +http://luks.endorphin.org/ + +[[ +#!/bin/sh +# Create a crypt device using dmsetup +dmsetup create crypt1 --table "0 `blockdev --getsize $1` crypt aes-cbc-essiv:sha256 babebabebabebabebabebabebabebabe 0 $1 0" +]] + +[[ +#!/bin/sh +# Create a crypt device using cryptsetup and LUKS header with default cipher +cryptsetup luksFormat $1 +cryptsetup luksOpen $1 crypt1 +]] diff --git a/Documentation/dontdiff b/Documentation/dontdiff index c09a96b99354..881e6dd03aea 100644 --- a/Documentation/dontdiff +++ b/Documentation/dontdiff @@ -47,7 +47,6 @@ .mm 53c700_d.h 53c8xx_d.h* -BitKeeper COPYING CREDITS CVS @@ -142,6 +141,7 @@ mkprep mktables mktree modpost +modules.order modversions.h* offset.h offsets.h @@ -172,6 +172,7 @@ sm_tbl* split-include tags tftpboot.img +timeconst.h times.h* tkparse trix_boot.h diff --git a/Documentation/early-userspace/README b/Documentation/early-userspace/README index 766d320c8eb6..e35d83052192 100644 --- a/Documentation/early-userspace/README +++ b/Documentation/early-userspace/README @@ -89,8 +89,8 @@ the 2.7 era (it missed the boat for 2.5). You can obtain somewhat infrequent snapshots of klibc from ftp://ftp.kernel.org/pub/linux/libs/klibc/ -For active users, you are better off using the klibc BitKeeper -repositories, at http://klibc.bkbits.net/ +For active users, you are better off using the klibc git +repository, at http://git.kernel.org/?p=libs/klibc/klibc.git The standalone klibc distribution currently provides three components, in addition to the klibc library: diff --git a/Documentation/fb/gxfb.txt b/Documentation/fb/gxfb.txt new file mode 100644 index 000000000000..2f640903bbb2 --- /dev/null +++ b/Documentation/fb/gxfb.txt @@ -0,0 +1,52 @@ +[This file is cloned from VesaFB/aty128fb] + +What is gxfb? +================= + +This is a graphics framebuffer driver for AMD Geode GX2 based processors. + +Advantages: + + * No need to use AMD's VSA code (or other VESA emulation layer) in the + BIOS. + * It provides a nice large console (128 cols + 48 lines with 1024x768) + without using tiny, unreadable fonts. + * You can run XF68_FBDev on top of /dev/fb0 + * Most important: boot logo :-) + +Disadvantages: + + * graphic mode is slower than text mode... + + +How to use it? +============== + +Switching modes is done using gxfb.mode_option=... boot +parameter or using `fbset' program. + +See Documentation/fb/modedb.txt for more information on modedb +resolutions. + + +X11 +=== + +XF68_FBDev should generally work fine, but it is non-accelerated. + + +Configuration +============= + +You can pass kernel command line options to gxfb with gxfb.