diff --git a/Documentation/devicetree/bindings/powerpc/fsl/board.txt b/Documentation/devicetree/bindings/powerpc/fsl/board.txt
index 700dec4774fa..cff38bdbc0e4 100644
--- a/Documentation/devicetree/bindings/powerpc/fsl/board.txt
+++ b/Documentation/devicetree/bindings/powerpc/fsl/board.txt
@@ -84,3 +84,19 @@ Example:
 		compatible = "fsl,bsc9132qds-fpga", "fsl,fpga-qixis-i2c";
 		reg = <0x66>;
 	};
+
+* Freescale on-board CPLD
+
+Some Freescale boards like T1040RDB have an on board CPLD connected.
+
+Required properties:
+- compatible: Should be a board-specific string like "fsl,<board>-cpld"
+  Example:
+	"fsl,t1040rdb-cpld", "fsl,t1042rdb-cpld", "fsl,t1042rdb_pi-cpld"
+- reg: should describe CPLD registers
+
+Example:
+	cpld@3,0 {
+		compatible = "fsl,t1040rdb-cpld";
+		reg = <3 0 0x300>;
+	};
diff --git a/Documentation/vfio.txt b/Documentation/vfio.txt
index b9ca02370d46..96978eced341 100644
--- a/Documentation/vfio.txt
+++ b/Documentation/vfio.txt
@@ -305,7 +305,15 @@ faster, the map/unmap handling has been implemented in real mode which provides
 an excellent performance which has limitations such as inability to do
 locked pages accounting in real time.
 
-So 3 additional ioctls have been added:
+4) According to sPAPR specification, A Partitionable Endpoint (PE) is an I/O
+subtree that can be treated as a unit for the purposes of partitioning and
+error recovery. A PE may be a single or multi-function IOA (IO Adapter), a
+function of a multi-function IOA, or multiple IOAs (possibly including switch
+and bridge structures above the multiple IOAs). PPC64 guests detect PCI errors
+and recover from them via EEH RTAS services, which works on the basis of
+additional ioctl commands.
+
+So 4 additional ioctls have been added:
 
 	VFIO_IOMMU_SPAPR_TCE_GET_INFO - returns the size and the start
 		of the DMA window on the PCI bus.
@@ -316,9 +324,12 @@ So 3 additional ioctls have been added:
 
 	VFIO_IOMMU_DISABLE - disables the container.
 
+	VFIO_EEH_PE_OP - provides an API for EEH setup, error detection and recovery.
 
 The code flow from the example above should be slightly changed:
 
+	struct vfio_eeh_pe_op pe_op = { .argsz = sizeof(pe_op), .flags = 0 };
+
 	.....
 	/* Add the group to the container */
 	ioctl(group, VFIO_GROUP_SET_CONTAINER, &container);
@@ -342,9 +353,79 @@ The code flow from the example above should be slightly changed:
 	dma_map.flags = VFIO_DMA_MAP_FLAG_READ | VFIO_DMA_MAP_FLAG_WRITE;
 
 	/* Check here is .iova/.size are within DMA window from spapr_iommu_info */
-
 	ioctl(container, VFIO_IOMMU_MAP_DMA, &dma_map);
-	.....
+
+	/* Get a file descriptor for the device */
+	device = ioctl(group, VFIO_GROUP_GET_DEVICE_FD, "0000:06:0d.0");
+
+	....
+
+	/* Gratuitous device reset and go... */
+	ioctl(device, VFIO_DEVICE_RESET);
+
+	/* Make sure EEH is supported */
+	ioctl(container, VFIO_CHECK_EXTENSION, VFIO_EEH);
+
+	/* Enable the EEH functionality on the device */
+	pe_op.op = VFIO_EEH_PE_ENABLE;
+	ioctl(container, VFIO_EEH_PE_OP, &pe_op);
+
+	/* You're suggested to create additional data struct to represent
+	 * PE, and put child devices belonging to same IOMMU group to the
+	 * PE instance for later reference.
+	 */
+
+	/* Check the PE's state and make sure it's in functional state */
+	pe_op.op = VFIO_EEH_PE_GET_STATE;
+	ioctl(container, VFIO_EEH_PE_OP, &pe_op);
+
+	/* Save device state using pci_save_state().
+	 * EEH should be enabled on the specified device.
+	 */
+
+	....
+
+	/* When 0xFF's returned from reading PCI config space or IO BARs
+	 * of the PCI device. Check the PE's state to see if that has been
+	 * frozen.
+	 */
+	ioctl(container, VFIO_EEH_PE_OP, &pe_op);
+
+	/* Waiting for pending PCI transactions to be completed and don't
+	 * produce any more PCI traffic from/to the affected PE until
+	 * recovery is finished.
+	 */
+
+	/* Enable IO for the affected PE and collect logs. Usually, the
+	 * standard part of PCI config space, AER registers are dumped
+	 * as logs for further analysis.
+	 */
+	pe_op.op = VFIO_EEH_PE_UNFREEZE_IO;
+	ioctl(container, VFIO_EEH_PE_OP, &pe_op);
+
+	/*
+	 * Issue PE reset: hot or fundamental reset. Usually, hot reset
+	 * is enough. However, the firmware of some PCI adapters would
+	 * require fundamental reset.
+	 */
+	pe_op.op = VFIO_EEH_PE_RESET_HOT;
+	ioctl(container, VFIO_EEH_PE_OP, &pe_op);
+	pe_op.op = VFIO_EEH_PE_RESET_DEACTIVATE;
+	ioctl(container, VFIO_EEH_PE_OP, &pe_op);
+
+	/* Configure the PCI bridges for the affected PE */
+	pe_op.op = VFIO_EEH_PE_CONFIGURE;
+	ioctl(container, VFIO_EEH_PE_OP, &pe_op);
+
+	/* Restored state we saved at initialization time. pci_restore_state()
+	 * is good enough as an example.
+	 */
+
+	/* Hopefully, error is recovered successfully. Now, you can resume to
+	 * start PCI traffic to/from the affected PE.
+	 */
+
+	....
 
 -------------------------------------------------------------------------------
 
diff --git a/MAINTAINERS b/MAINTAINERS
index 05e545096100..946a67e6c4ea 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -5424,6 +5424,7 @@ F:	arch/powerpc/boot/rs6000.h
 LINUX FOR POWERPC (32-BIT AND 64-BIT)
 M:	Benjamin Herrenschmidt <benh@kernel.crashing.org>
 M:	Paul Mackerras <paulus@samba.org>
+M:	Michael Ellerman <mpe@ellerman.id.au>
 W:	http://www.penguinppc.org/
 L:	linuxppc-dev@lists.ozlabs.org
 Q:	http://patchwork.ozlabs.org/project/linuxppc-dev/list/
@@ -5465,16 +5466,17 @@ F:	arch/powerpc/*/*/*virtex*
 
 LINUX FOR POWERPC EMBEDDED PPC8XX
 M:	Vitaly Bordug <vitb@kernel.crashing.org>
-M:	Marcelo Tosatti <marcelo@kvack.org>
 W:	http://www.penguinppc.org/
 L:	linuxppc-dev@lists.ozlabs.org
 S:	Maintained
 F:	arch/powerpc/platforms/8xx/
 
 LINUX FOR POWERPC EMBEDDED PPC83XX AND PPC85XX
+M:	Scott Wood <scottwood@freescale.com>
 M:	Kumar Gala <galak@kernel.crashing.org>
 W:	http://www.penguinppc.org/
 L:	linuxppc-dev@lists.ozlabs.org
+T:	git git://git.kernel.org/pub/scm/linux/kernel/git/scottwood/linux.git
 S:	Maintained
 F:	arch/powerpc/platforms/83xx/
 F:	arch/powerpc/platforms/85xx/
diff --git a/arch/powerpc/boot/dts/fsl/p2041si-post.dtsi b/arch/powerpc/boot/dts/fsl/p2041si-post.dtsi
index 5290df83ff30..69ce1026c948 100644
--- a/arch/powerpc/boot/dts/fsl/p2041si-post.dtsi
+++ b/arch/powerpc/boot/dts/fsl/p2041si-post.dtsi
@@ -359,6 +359,7 @@
 			compatible = "fsl,qoriq-core-mux-1.0";
 			clocks = <&pll0 0>, <&pll0 1>, <&pll1 0>, <&pll1 1>;
 			clock-names = "pll0", "pll0-div2", "pll1", "pll1-div2";
+			clock-output-names = "cmux2";
 		};
 
 		mux3: mux3@60 {
diff --git a/arch/powerpc/boot/dts/fsl/t2080si-post.dtsi b/arch/powerpc/boot/dts/fsl/t2080si-post.dtsi
new file mode 100644
index 000000000000..082ec2044060
--- /dev/null
+++ b/arch/powerpc/boot/dts/fsl/t2080si-post.dtsi
@@ -0,0 +1,69 @@
+/*
+ * T2080 Silicon/SoC Device Tree Source (post include)
+ *
+ * Copyright 2013 Freescale Semiconductor Inc.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *     * Redistributions of source code must retain the above copyright
+ *       notice, this list of conditions and the following disclaimer.
+ *     * Redistributions in binary form must reproduce the above copyright
+ *       notice, this list of conditions and the following disclaimer in the
+ *       documentation and/or other materials provided with the distribution.
+ *     * Neither the name of Freescale Semiconductor nor the
+ *       names of its contributors may be used to endorse or promote products
+ *       derived from this software without specific prior written permission.
+ *
+ *
+ * ALTERNATIVELY, this software may be distributed under the terms of the
+ * GNU General Public License ("GPL") as published by the Free Software
+ * Foundation, either version 2 of that License or (at your option) any
+ * later version.
+ *
+ * THIS SOFTWARE IS PROVIDED BY Freescale Semiconductor "AS IS" AND ANY
+ * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+ * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+ * DISCLAIMED. IN NO EVENT SHALL Freescale Semiconductor BE LIABLE FOR ANY
+ * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
+ * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+ * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+ * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+ * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+/include/ "t2081si-post.dtsi"
+
+&soc {
+/include/ "qoriq-sata2-0.dtsi"
+	sata@220000 {
+		fsl,iommu-parent = <&pamu1>;
+		fsl,liodn-reg = <&guts 0x550>; /* SATA1LIODNR */
+	};
+
+/include/ "qoriq-sata2-1.dtsi"
+	sata@221000 {
+		fsl,iommu-parent = <&pamu1>;
+		fsl,liodn-reg = <&guts 0x554>; /* SATA2LIODNR */
+	};
+};
+
+&rio {
+	compatible = "fsl,srio";
+	interrupts = <16 2 1 11>;
+	#address-cells = <2>;
+	#size-cells = <2>;
+	ranges;
+
+	port1 {
+		#address-cells = <2>;
+		#size-cells = <2>;
+		cell-index = <1>;
+	};
+
+	port2 {
+		#address-cells = <2>;
+		#size-cells = <2>;
+		cell-index = <2>;
+	};
+};
diff --git a/arch/powerpc/boot/dts/fsl/t2081si-post.dtsi b/arch/powerpc/boot/dts/fsl/t2081si-post.dtsi
new file mode 100644
index 000000000000..97479f0ce630
--- /dev/null
+++ b/arch/powerpc/boot/dts/fsl/t2081si-post.dtsi
@@ -0,0 +1,435 @@
+/*
+ * T2081 Silicon/SoC Device Tree Source (post include)
+ *
+ * Copyright 2013 Freescale Semiconductor Inc.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *     * Redistributions of source code must retain the above copyright
+ *	 notice, this list of conditions and the following disclaimer.
+ *     * Redistributions in binary form must reproduce the above copyright
+ *	 notice, this list of conditions and the following disclaimer in the
+ *	 documentation and/or other materials provided with the distribution.
+ *     * Neither the name of Freescale Semiconductor nor the
+ *	 names of its contributors may be used to endorse or promote products
+ *	 derived from this software without specific prior written permission.
+ *
+ *
+ * ALTERNATIVELY, this software may be distributed under the terms of the
+ * GNU General Public License ("GPL") as published by the Free Software
+ * Foundation, either version 2 of that License or (at your option) any
+ * later version.
+ *
+ * THIS SOFTWARE IS PROVIDED BY Freescale Semiconductor "AS IS" AND ANY
+ * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+ * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+ * DISCLAIMED. IN NO EVENT SHALL Freescale Semiconductor BE LIABLE FOR ANY
+ * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
+ * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+ * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+ * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+ * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+&ifc {
+	#address-cells = <2>;
+	#size-cells = <1>;
+	compatible = "fsl,ifc", "simple-bus";
+	interrupts = <25 2 0 0>;
+};
+
+/* controller at 0x240000 */
+&pci0 {
+	compatible = "fsl,t2080-pcie", "fsl,qoriq-pcie-v3.0", "fsl,qoriq-pcie";
+	device_type = "pci";
+	#size-cells = <2>;
+	#address-cells = <3>;
+	bus-range = <0x0 0xff>;
+	interrupts = <20 2 0 0>;
+	fsl,iommu-parent = <&pamu0>;
+	pcie@0 {
+		reg = <0 0 0 0 0>;
+		#interrupt-cells = <1>;
+		#size-cells = <2>;
+		#address-cells = <3>;
+		device_type = "pci";
+		interrupts = <20 2 0 0>;
+		interrupt-map-mask = <0xf800 0 0 7>;
+		interrupt-map = <
+			/* IDSEL 0x0 */
+			0000 0 0 1 &mpic 40 1 0 0
+			0000 0 0 2 &mpic 1 1 0 0
+			0000 0 0 3 &mpic 2 1 0 0
+			0000 0 0 4 &mpic 3 1 0 0
+		>;
+	};
+};
+
+/* controller at 0x250000 */
+&pci1 {
+	compatible = "fsl,t2080-pcie", "fsl,qoriq-pcie-v3.0", "fsl,qoriq-pcie";
+	device_type = "pci";
+	#size-cells = <2>;
+	#address-cells = <3>;
+	bus-range = <0 0xff>;
+	interrupts = <21 2 0 0>;
+	fsl,iommu-parent = <&pamu0>;
+	pcie@0 {
+		reg = <0 0 0 0 0>;
+		#interrupt-cells = <1>;
+		#size-cells = <2>;
+		#address-cells = <3>;
+		device_type = "pci";
+		interrupts = <21 2 0 0>;
+		interrupt-map-mask = <0xf800 0 0 7>;
+		interrupt-map = <
+			/* IDSEL 0x0 */
+			0000 0 0 1 &mpic 41 1 0 0
+			0000 0 0 2 &mpic 5 1 0 0
+			0000 0 0 3 &mpic 6 1 0 0
+			0000 0 0 4 &mpic 7 1 0 0
+		>;
+	};
+};
+
+/* controller at 0x260000 */
+&pci2 {
+	compatible = "fsl,t2080-pcie", "fsl,qoriq-pcie-v3.0", "fsl,qoriq-pcie";
+	device_type = "pci";
+	#size-cells = <2>;
+	#address-cells = <3>;
+	bus-range = <0x0 0xff>;
+	interrupts = <22 2 0 0>;
+	fsl,iommu-parent = <&pamu0>;
+	pcie@0 {
+		reg = <0 0 0 0 0>;
+		#interrupt-cells = <1>;
+		#size-cells = <2>;
+		#address-cells = <3>;
+		device_type = "pci";
+		interrupts = <22 2 0 0>;
+		interrupt-map-mask = <0xf800 0 0 7>;
+		interrupt-map = <
+			/* IDSEL 0x0 */
+			0000 0 0 1 &mpic 42 1 0 0
+			0000 0 0 2 &mpic 9 1 0 0
+			0000 0 0 3 &mpic 10 1 0 0
+			0000 0 0 4 &mpic 11 1 0 0
+		>;
+	};
+};
+
+/* controller at 0x270000 */
+&pci3 {
+	compatible = "fsl,t2080-pcie", "fsl,qoriq-pcie-v3.0", "fsl,qoriq-pcie";
+	device_type = "pci";
+	#size-cells = <2>;
+	#address-cells = <3>;
+	bus-range = <0x0 0xff>;
+	interrupts = <23 2 0 0>;
+	fsl,iommu-parent = <&pamu0>;
+	pcie@0 {
+		reg = <0 0 0 0 0>;
+		#interrupt-cells = <1>;
+		#size-cells = <2>;
+		#address-cells = <3>;
+		device_type = "pci";
+		interrupts = <23 2 0 0>;
+		interrupt-map-mask = <0xf800 0 0 7>;
+		interrupt-map = <
+			/* IDSEL 0x0 */
+			0000 0 0 1 &mpic 43 1 0 0
+			0000 0 0 2 &mpic 0 1 0 0
+			0000 0 0 3 &mpic 4 1 0 0
+			0000 0 0 4 &mpic 8 1 0 0
+		>;
+	};
+};
+
+&dcsr {
+	#address-cells = <1>;
+	#size-cells = <1>;
+	compatible = "fsl,dcsr", "simple-bus";
+
+	dcsr-epu@0 {
+		compatible = "fsl,t2080-dcsr-epu", "fsl,dcsr-epu";
+		interrupts = <52 2 0 0
+			      84 2 0 0
+			      85 2 0 0
+			      94 2 0 0
+			      95 2 0 0>;
+		reg = <0x0 0x1000>;
+	};
+	dcsr-npc {
+		compatible = "fsl,t2080-dcsr-cnpc", "fsl,dcsr-cnpc";
+		reg = <0x1000 0x1000 0x1002000 0x10000>;
+	};
+	dcsr-nxc@2000 {
+		compatible = "fsl,dcsr-nxc";
+		reg = <0x2000 0x1000>;
+	};
+	dcsr-corenet {
+		compatible = "fsl,dcsr-corenet";
+		reg = <0x8000 0x1000 0x1A000 0x1000>;
+	};
+	dcsr-ocn@11000 {
+		compatible = "fsl,t2080-dcsr-ocn", "fsl,dcsr-ocn";
+		reg = <0x11000 0x1000>;
+	};
+	dcsr-ddr@12000 {
+		compatible = "fsl,dcsr-ddr";
+		dev-handle = <&ddr1>;
+		reg = <0x12000 0x1000>;
+	};
+	dcsr-nal@18000 {
+		compatible = "fsl,t2080-dcsr-nal", "fsl,dcsr-nal";
+		reg = <0x18000 0x1000>;
+	};
+	dcsr-rcpm@22000 {
+		compatible = "fsl,t2080-dcsr-rcpm", "fsl,dcsr-rcpm";
+		reg = <0x22000 0x1000>;
+	};
+	dcsr-snpc@30000 {
+		compatible = "fsl,t2080-dcsr-snpc", "fsl,dcsr-snpc";
+		reg = <0x30000 0x1000 0x1022000 0x10000>;
+	};
+	dcsr-snpc@31000 {
+		compatible = "fsl,t2080-dcsr-snpc", "fsl,dcsr-snpc";
+		reg = <0x31000 0x1000 0x1042000 0x10000>;
+	};
+	dcsr-snpc@32000 {
+		compatible = "fsl,t2080-dcsr-snpc", "fsl,dcsr-snpc";
+		reg = <0x32000 0x1000 0x1062000 0x10000>;
+	};
+	dcsr-cpu-sb-proxy@100000 {
+		compatible = "fsl,dcsr-e6500-sb-proxy", "fsl,dcsr-cpu-sb-proxy";
+		cpu-handle = <&cpu0>;
+		reg = <0x100000 0x1000 0x101000 0x1000>;
+	};
+	dcsr-cpu-sb-proxy@108000 {
+		compatible = "fsl,dcsr-e6500-sb-proxy", "fsl,dcsr-cpu-sb-proxy";
+		cpu-handle = <&cpu1>;
+		reg = <0x108000 0x1000 0x109000 0x1000>;
+	};
+	dcsr-cpu-sb-proxy@110000 {
+		compatible = "fsl,dcsr-e6500-sb-proxy", "fsl,dcsr-cpu-sb-proxy";
+		cpu-handle = <&cpu2>;
+		reg = <0x110000 0x1000 0x111000 0x1000>;
+	};
+	dcsr-cpu-sb-proxy@118000 {
+		compatible = "fsl,dcsr-e6500-sb-proxy", "fsl,dcsr-cpu-sb-proxy";
+		cpu-handle = <&cpu3>;
+		reg = <0x118000 0x1000 0x119000 0x1000>;
+	};
+};
+
+&soc {
+	#address-cells = <1>;
+	#size-cells = <1>;
+	device_type = "soc";
+	compatible = "simple-bus";
+
+	soc-sram-error {
+		compatible = "fsl,soc-sram-error";
+		interrupts = <16 2 1 29>;
+	};
+
+	corenet-law@0 {
+		compatible = "fsl,corenet-law";
+		reg = <0x0 0x1000>;
+		fsl,num-laws = <32>;
+	};
+
+	ddr1: memory-controller@8000 {
+		compatible = "fsl,qoriq-memory-controller-v4.7",
+				"fsl,qoriq-memory-controller";
+		reg = <0x8000 0x1000>;
+		interrupts = <16 2 1 23>;
+	};
+
+	cpc: l3-cache-controller@10000 {
+		compatible = "fsl,t2080-l3-cache-controller", "cache";
+		reg = <0x10000 0x1000
+		       0x11000 0x1000
+		       0x12000 0x1000>;
+		interrupts = <16 2 1 27
+			      16 2 1 26
+			      16 2 1 25>;
+	};
+
+	corenet-cf@18000 {
+		compatible = "fsl,corenet2-cf", "fsl,corenet-cf";
+		reg = <0x18000 0x1000>;
+		interrupts = <16 2 1 31>;
+		fsl,ccf-num-csdids = <32>;
+		fsl,ccf-num-snoopids = <32>;
+	};
+
+	iommu@20000 {
+		compatible = "fsl,pamu-v1.0", "fsl,pamu";
+		reg = <0x20000 0x3000>;
+		fsl,portid-mapping = <0x8000>;
+		ranges = <0 0x20000 0x3000>;
+		#address-cells = <1>;
+		#size-cells = <1>;
+		interrupts = <
+			24 2 0 0
+			16 2 1 30>;
+
+		pamu0: pamu@0 {
+			reg = <0 0x1000>;
+			fsl,primary-cache-geometry = <32 1>;
+			fsl,secondary-cache-geometry = <128 2>;
+		};
+
+		pamu1: pamu@1000 {
+			reg = <0x1000 0x1000>;
+			fsl,primary-cache-geometry = <32 1>;
+			fsl,secondary-cache-geometry = <128 2>;
+		};
+
+		pamu2: pamu@2000 {
+			reg = <0x2000 0x1000>;
+			fsl,primary-cache-geometry = <32 1>;
+			fsl,secondary-cache-geometry = <128 2>;
+		};
+	};
+
+/include/ "qoriq-mpic4.3.dtsi"
+
+	guts: global-utilities@e0000 {
+		compatible = "fsl,t2080-device-config", "fsl,qoriq-device-config-2.0";
+		reg = <0xe0000 0xe00>;
+		fsl,has-rstcr;
+		fsl,liodn-bits = <12>;
+	};
+
+	clockgen: global-utilities@e1000 {
+		compatible = "fsl,t2080-clockgen", "fsl,qoriq-clockgen-2.0";
+		ranges = <0x0 0xe1000 0x1000>;
+		reg = <0xe1000 0x1000>;
+		#address-cells = <1>;
+		#size-cells = <1>;
+
+		sysclk: sysclk {
+			#clock-cells = <0>;
+			compatible = "fsl,qoriq-sysclk-2.0";
+			clock-output-names = "sysclk", "fixed-clock";
+		};
+
+		pll0: pll0@800 {
+			#clock-cells = <1>;
+			reg = <0x800 4>;
+			compatible = "fsl,qoriq-core-pll-2.0";
+			clocks = <&sysclk>;
+			clock-output-names = "pll0", "pll0-div2", "pll0-div4";
+		};
+
+		pll1: pll1@820 {
+			#clock-cells = <1>;
+			reg = <0x820 4>;
+			compatible = "fsl,qoriq-core-pll-2.0";
+			clocks = <&sysclk>;
+			clock-output-names = "pll1", "pll1-div2", "pll1-div4";
+		};
+
+		mux0: mux0@0 {
+			#clock-cells = <0>;
+			reg = <0x0 4>;
+			compatible = "fsl,qoriq-core-mux-2.0";
+			clocks = <&pll0 0>, <&pll0 1>, <&pll0 2>,
+				 <&pll1 0>, <&pll1 1>, <&pll1 2>;
+			clock-names = "pll0", "pll0-div2", "pll1-div4",
+				"pll1", "pll1-div2", "pll1-div4";
+			clock-output-names = "cmux0";
+		};
+
+		mux1: mux1@20 {
+			#clock-cells = <0>;
+			reg = <0x20 4>;
+			compatible = "fsl,qoriq-core-mux-2.0";
+			clocks = <&pll0 0>, <&pll0 1>, <&pll0 2>,
+				 <&pll1 0>, <&pll1 1>, <&pll1 2>;
+			clock-names = "pll0", "pll0-div2", "pll1-div4",
+				"pll1", "pll1-div2", "pll1-div4";
+			clock-output-names = "cmux1";
+		};
+	};
+
+	rcpm: global-utilities@e2000 {
+		compatible = "fsl,t2080-rcpm", "fsl,qoriq-rcpm-2.0";
+		reg = <0xe2000 0x1000>;
+	};
+
+	sfp: sfp@e8000 {
+		compatible = "fsl,t2080-sfp";
+		reg = <0xe8000 0x1000>;
+	};
+
+	serdes: serdes@ea000 {
+		compatible = "fsl,t2080-serdes";
+		reg = <0xea000 0x4000>;
+	};
+
+/include/ "elo3-dma-0.dtsi"
+	dma@100300 {
+		fsl,iommu-parent = <&pamu0>;
+		fsl,liodn-reg = <&guts 0x580>; /* DMA1LIODNR */
+	};
+/include/ "elo3-dma-1.dtsi"
+	dma@101300 {
+		fsl,iommu-parent = <&pamu0>;
+		fsl,liodn-reg = <&guts 0x584>; /* DMA2LIODNR */
+	};
+/include/ "elo3-dma-2.dtsi"
+	dma@102300 {
+		fsl,iommu-parent = <&pamu0>;
+		fsl,liodn-reg = <&guts 0x588>; /* DMA3LIODNR */
+	};
+
+/include/ "qoriq-espi-0.dtsi"
+	spi@110000 {
+		fsl,espi-num-chipselects = <4>;
+	};
+
+/include/ "qoriq-esdhc-0.dtsi"
+	sdhc@114000 {
+		compatible = "fsl,t2080-esdhc", "fsl,esdhc";
+		fsl,iommu-parent = <&pamu1>;
+		fsl,liodn-reg = <&guts 0x530>; /* SDMMCLIODNR */
+		sdhci,auto-cmd12;
+	};
+/include/ "qoriq-i2c-0.dtsi"
+/include/ "qoriq-i2c-1.dtsi"
+/include/ "qoriq-duart-0.dtsi"
+/include/ "qoriq-duart-1.dtsi"
+/include/ "qoriq-gpio-0.dtsi"
+/include/ "qoriq-gpio-1.dtsi"
+/include/ "qoriq-gpio-2.dtsi"
+/include/ "qoriq-gpio-3.dtsi"
+/include/ "qoriq-usb2-mph-0.dtsi"
+	usb0: usb@210000 {
+		compatible = "fsl-usb2-mph-v2.4", "fsl-usb2-mph";
+		fsl,iommu-parent = <&pamu1>;
+		fsl,liodn-reg = <&guts 0x520>; /* USB1LIODNR */
+		phy_type = "utmi";
+		port0;
+	};
+/include/ "qoriq-usb2-dr-0.dtsi"
+	usb1: usb@211000 {
+		compatible = "fsl-usb2-dr-v2.4", "fsl-usb2-dr";
+		fsl,iommu-parent = <&pamu1>;
+		fsl,liodn-reg = <&guts 0x524>; /* USB1LIODNR */
+		dr_mode = "host";
+		phy_type = "utmi";
+	};
+/include/ "qoriq-sec5.2-0.dtsi"
+
+	L2_1: l2-cache-controller@c20000 {
+		/* Cluster 0 L2 cache */
+		compatible = "fsl,t2080-l2-cache-controller";
+		reg = <0xc20000 0x40000>;
+		next-level-cache = <&cpc>;
+	};
+};
diff --git a/arch/powerpc/boot/dts/fsl/t208xsi-pre.dtsi b/arch/powerpc/boot/dts/fsl/t208xsi-pre.dtsi
new file mode 100644
index 000000000000..e71ceb0e1100
--- /dev/null
+++ b/arch/powerpc/boot/dts/fsl/t208xsi-pre.dtsi
@@ -0,0 +1,99 @@
+/*
+ * T2080/T2081 Silicon/SoC Device Tree Source (pre include)
+ *
+ * Copyright 2013 Freescale Semiconductor Inc.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *     * Redistributions of source code must retain the above copyright
+ *	 notice, this list of conditions and the following disclaimer.
+ *     * Redistributions in binary form must reproduce the above copyright
+ *	 notice, this list of conditions and the following disclaimer in the
+ *	 documentation and/or other materials provided with the distribution.
+ *     * Neither the name of Freescale Semiconductor nor the
+ *	 names of its contributors may be used to endorse or promote products
+ *	 derived from this software without specific prior written permission.
+ *
+ *
+ * ALTERNATIVELY, this software may be distributed under the terms of the
+ * GNU General Public License ("GPL") as published by the Free Software
+ * Foundation, either version 2 of that License or (at your option) any
+ * later version.
+ *
+ * THIS SOFTWARE IS PROVIDED BY Freescale Semiconductor "AS IS" AND ANY
+ * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+ * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+ * DISCLAIMED. IN NO EVENT SHALL Freescale Semiconductor BE LIABLE FOR ANY
+ * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
+ * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+ * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+ * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+ * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+/dts-v1/;
+
+/include/ "e6500_power_isa.dtsi"
+
+/ {
+	#address-cells = <2>;
+	#size-cells = <2>;
+	interrupt-parent = <&mpic>;
+
+	aliases {
+		ccsr = &soc;
+		dcsr = &dcsr;
+
+		serial0 = &serial0;
+		serial1 = &serial1;
+		serial2 = &serial2;
+		serial3 = &serial3;
+
+		crypto = &crypto;
+		pci0 = &pci0;
+		pci1 = &pci1;
+		pci2 = &pci2;
+		pci3 = &pci3;
+		usb0 = &usb0;
+		usb1 = &usb1;
+		dma0 = &dma0;
+		dma1 = &dma1;
+		dma2 = &dma2;
+		sdhc = &sdhc;
+	};
+
+	cpus {
+		#address-cells = <1>;
+		#size-cells = <0>;
+
+		cpu0: PowerPC,e6500@0 {
+			device_type = "cpu";
+			reg = <0 1>;
+			clocks = <&mux0>;
+			next-level-cache = <&L2_1>;
+			fsl,portid-mapping = <0x80000000>;
+		};
+		cpu1: PowerPC,e6500@2 {
+			device_type = "cpu";
+			reg = <2 3>;
+			clocks = <&mux0>;
+			next-level-cache = <&L2_1>;
+			fsl,portid-mapping = <0x80000000>;
+		};
+		cpu2: PowerPC,e6500@4 {
+			device_type = "cpu";
+			reg = <4 5>;
+			clocks = <&mux0>;
+			next-level-cache = <&L2_1>;
+			fsl,portid-mapping = <0x80000000>;
+		};
+		cpu3: PowerPC,e6500@6 {
+			device_type = "cpu";
+			reg = <6 7>;
+			clocks = <&mux0>;
+			next-level-cache = <&L2_1>;
+			fsl,portid-mapping = <0x80000000>;
+		};
+	};
+};
diff --git a/arch/powerpc/boot/dts/fsl/t4240si-post.dtsi b/arch/powerpc/boot/dts/fsl/t4240si-post.dtsi
index 793669baa13e..a3d582e0361a 100644
--- a/arch/powerpc/boot/dts/fsl/t4240si-post.dtsi
+++ b/arch/powerpc/boot/dts/fsl/t4240si-post.dtsi
@@ -476,6 +476,7 @@
 
 /include/ "elo3-dma-0.dtsi"
 /include/ "elo3-dma-1.dtsi"
+/include/ "elo3-dma-2.dtsi"
 
 /include/ "qoriq-espi-0.dtsi"
 	spi@110000 {
diff --git a/arch/powerpc/boot/dts/fsl/t4240si-pre.dtsi b/arch/powerpc/boot/dts/fsl/t4240si-pre.dtsi
index d2f157edbe81..261a3abb1a55 100644
--- a/arch/powerpc/boot/dts/fsl/t4240si-pre.dtsi
+++ b/arch/powerpc/boot/dts/fsl/t4240si-pre.dtsi
@@ -57,6 +57,7 @@
 		pci3 = &pci3;
 		dma0 = &dma0;
 		dma1 = &dma1;
+		dma2 = &dma2;
 		sdhc = &sdhc;
 	};
 
diff --git a/arch/powerpc/boot/dts/t2080qds.dts b/arch/powerpc/boot/dts/t2080qds.dts
new file mode 100644
index 000000000000..aa1d6d8c169b
--- /dev/null
+++ b/arch/powerpc/boot/dts/t2080qds.dts
@@ -0,0 +1,57 @@
+/*
+ * T2080QDS Device Tree Source
+ *
+ * Copyright 2013 Freescale Semiconductor Inc.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *     * Redistributions of source code must retain the above copyright
+ *	 notice, this list of conditions and the following disclaimer.
+ *     * Redistributions in binary form must reproduce the above copyright
+ *	 notice, this list of conditions and the following disclaimer in the
+ *	 documentation and/or other materials provided with the distribution.
+ *     * Neither the name of Freescale Semiconductor nor the
+ *	 names of its contributors may be used to endorse or promote products
+ *	 derived from this software without specific prior written permission.
+ *
+ *
+ * ALTERNATIVELY, this software may be distributed under the terms of the
+ * GNU General Public License ("GPL") as published by the Free Software
+ * Foundation, either version 2 of that License or (at your option) any
+ * later version.
+ *
+ * THIS SOFTWARE IS PROVIDED BY Freescale Semiconductor "AS IS" AND ANY
+ * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+ * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+ * DISCLAIMED. IN NO EVENT SHALL Freescale Semiconductor BE LIABLE FOR ANY
+ * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
+ * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+ * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+ * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+ * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+/include/ "fsl/t208xsi-pre.dtsi"
+/include/ "t208xqds.dtsi"
+
+/ {
+	model = "fsl,T2080QDS";
+	compatible = "fsl,T2080QDS";
+	#address-cells = <2>;
+	#size-cells = <2>;
+	interrupt-parent = <&mpic>;
+
+	rio: rapidio@ffe0c0000 {
+		reg = <0xf 0xfe0c0000 0 0x11000>;
+
+		port1 {
+			ranges = <0 0 0xc 0x20000000 0 0x10000000>;
+		};
+		port2 {
+			ranges = <0 0 0xc 0x30000000 0 0x10000000>;
+		};
+	};
+};
+
+/include/ "fsl/t2080si-post.dtsi"
diff --git a/arch/powerpc/boot/dts/t2080rdb.dts b/arch/powerpc/boot/dts/t2080rdb.dts
new file mode 100644
index 000000000000..e8891047600c
--- /dev/null
+++ b/arch/powerpc/boot/dts/t2080rdb.dts
@@ -0,0 +1,57 @@
+/*
+ * T2080PCIe-RDB Board Device Tree Source
+ *
+ * Copyright 2014 Freescale Semiconductor Inc.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *     * Redistributions of source code must retain the above copyright
+ *	 notice, this list of conditions and the following disclaimer.
+ *     * Redistributions in binary form must reproduce the above copyright
+ *	 notice, this list of conditions and the following disclaimer in the
+ *	 documentation and/or other materials provided with the distribution.
+ *     * Neither the name of Freescale Semiconductor nor the
+ *	 names of its contributors may be used to endorse or promote products
+ *	 derived from this software without specific prior written permission.
+ *
+ *
+ * ALTERNATIVELY, this software may be distributed under the terms of the
+ * GNU General Public License ("GPL") as published by the Free Software
+ * Foundation, either version 2 of that License or (at your option) any
+ * later version.
+ *
+ * THIS SOFTWARE IS PROVIDED BY Freescale Semiconductor "AS IS" AND ANY
+ * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+ * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+ * DISCLAIMED. IN NO EVENT SHALL Freescale Semiconductor BE LIABLE FOR ANY
+ * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
+ * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+ * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+ * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+ * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+/include/ "fsl/t208xsi-pre.dtsi"
+/include/ "t208xrdb.dtsi"
+
+/ {
+	model = "fsl,T2080RDB";
+	compatible = "fsl,T2080RDB";
+	#address-cells = <2>;
+	#size-cells = <2>;
+	interrupt-parent = <&mpic>;
+
+	rio: rapidio@ffe0c0000 {
+		reg = <0xf 0xfe0c0000 0 0x11000>;
+
+		port1 {
+			ranges = <0 0 0xc 0x20000000 0 0x10000000>;
+		};
+		port2 {
+			ranges = <0 0 0xc 0x30000000 0 0x10000000>;
+		};
+	};
+};
+
+/include/ "fsl/t2080si-post.dtsi"
diff --git a/arch/powerpc/boot/dts/t2081qds.dts b/arch/powerpc/boot/dts/t2081qds.dts
new file mode 100644
index 000000000000..8ec80a71e102
--- /dev/null
+++ b/arch/powerpc/boot/dts/t2081qds.dts
@@ -0,0 +1,46 @@
+/*
+ * T2081QDS Device Tree Source
+ *
+ * Copyright 2013 Freescale Semiconductor Inc.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *     * Redistributions of source code must retain the above copyright
+ *	 notice, this list of conditions and the following disclaimer.
+ *     * Redistributions in binary form must reproduce the above copyright
+ *	 notice, this list of conditions and the following disclaimer in the
+ *	 documentation and/or other materials provided with the distribution.
+ *     * Neither the name of Freescale Semiconductor nor the
+ *	 names of its contributors may be used to endorse or promote products
+ *	 derived from this software without specific prior written permission.
+ *
+ *
+ * ALTERNATIVELY, this software may be distributed under the terms of the
+ * GNU General Public License ("GPL") as published by the Free Software
+ * Foundation, either version 2 of that License or (at your option) any
+ * later version.
+ *
+ * THIS SOFTWARE IS PROVIDED BY Freescale Semiconductor "AS IS" AND ANY
+ * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+ * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+ * DISCLAIMED. IN NO EVENT SHALL Freescale Semiconductor BE LIABLE FOR ANY
+ * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
+ * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+ * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+ * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+ * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+/include/ "fsl/t208xsi-pre.dtsi"
+/include/ "t208xqds.dtsi"
+
+/ {
+	model = "fsl,T2081QDS";
+	compatible = "fsl,T2081QDS";
+	#address-cells = <2>;
+	#size-cells = <2>;
+	interrupt-parent = <&mpic>;
+};
+
+/include/ "fsl/t2081si-post.dtsi"
diff --git a/arch/powerpc/boot/dts/t208xqds.dtsi b/arch/powerpc/boot/dts/t208xqds.dtsi
new file mode 100644
index 000000000000..555dc6e03d89
--- /dev/null
+++ b/arch/powerpc/boot/dts/t208xqds.dtsi
@@ -0,0 +1,239 @@
+/*
+ * T2080/T2081 QDS Device Tree Source
+ *
+ * Copyright 2013 Freescale Semiconductor Inc.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *     * Redistributions of source code must retain the above copyright
+ *	 notice, this list of conditions and the following disclaimer.
+ *     * Redistributions in binary form must reproduce the above copyright
+ *	 notice, this list of conditions and the following disclaimer in the
+ *	 documentation and/or other materials provided with the distribution.
+ *     * Neither the name of Freescale Semiconductor nor the
+ *	 names of its contributors may be used to endorse or promote products
+ *	 derived from this software without specific prior written permission.
+ *
+ *
+ * ALTERNATIVELY, this software may be distributed under the terms of the
+ * GNU General Public License ("GPL") as published by the Free Software
+ * Foundation, either version 2 of that License or (at your option) any
+ * later version.
+ *
+ * THIS SOFTWARE IS PROVIDED BY Freescale Semiconductor "AS IS" AND ANY
+ * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+ * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+ * DISCLAIMED. IN NO EVENT SHALL Freescale Semiconductor BE LIABLE FOR ANY
+ * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
+ * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+ * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+ * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+ * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+/ {
+	model = "fsl,T2080QDS";
+	compatible = "fsl,T2080QDS";
+	#address-cells = <2>;
+	#size-cells = <2>;
+	interrupt-parent = <&mpic>;
+
+	ifc: localbus@ffe124000 {
+		reg = <0xf 0xfe124000 0 0x2000>;
+		ranges = <0 0 0xf 0xe8000000 0x08000000
+			  2 0 0xf 0xff800000 0x00010000
+			  3 0 0xf 0xffdf0000 0x00008000>;
+
+		nor@0,0 {
+			#address-cells = <1>;
+			#size-cells = <1>;
+			compatible = "cfi-flash";
+			reg = <0x0 0x0 0x8000000>;
+			bank-width = <2>;
+			device-width = <1>;
+		};
+
+		nand@2,0 {
+			#address-cells = <1>;
+			#size-cells = <1>;
+			compatible = "fsl,ifc-nand";
+			reg = <0x2 0x0 0x10000>;
+		};
+
+		boardctrl: board-control@3,0 {
+			#address-cells = <1>;
+			#size-cells = <1>;
+			compatible = "fsl,fpga-qixis";
+			reg = <3 0 0x300>;
+			ranges = <0 3 0 0x300>;
+		};
+	};
+
+	memory {
+		device_type = "memory";
+	};
+
+	dcsr: dcsr@f00000000 {
+		ranges = <0x00000000 0xf 0x00000000 0x01072000>;
+	};
+
+	soc: soc@ffe000000 {
+		ranges = <0x00000000 0xf 0xfe000000 0x1000000>;
+		reg = <0xf 0xfe000000 0 0x00001000>;
+		spi@110000 {
+			flash@0 {
+				#address-cells = <1>;
+				#size-cells = <1>;
+				compatible = "micron,n25q128a11"; /* 16MB */
+				reg = <0>;
+				spi-max-frequency = <40000000>; /* input clock */
+			};
+
+			flash@1 {
+				#address-cells = <1>;
+				#size-cells = <1>;
+				compatible = "sst,sst25wf040";
+				reg = <1>;
+				spi-max-frequency = <35000000>;
+			};
+
+			flash@2 {
+				#address-cells = <1>;
+				#size-cells = <1>;
+				compatible = "eon,en25s64";
+				reg = <2>;
+				spi-max-frequency = <35000000>;
+			};
+		};
+
+		i2c@118000 {
+			pca9547@77 {
+				compatible = "nxp,pca9547";
+				reg = <0x77>;
+				#address-cells = <1>;
+				#size-cells = <0>;
+
+				i2c@0 {
+					#address-cells = <1>;
+					#size-cells = <0>;
+					reg = <0x0>;
+
+					eeprom@50 {
+						compatible = "at24,24c512";
+						reg = <0x50>;
+					};
+
+					eeprom@51 {
+						compatible = "at24,24c02";
+						reg = <0x51>;
+					};
+
+					eeprom@57 {
+						compatible = "at24,24c02";
+						reg = <0x57>;
+					};
+
+					rtc@68 {
+						compatible = "dallas,ds3232";
+						reg = <0x68>;
+						interrupts = <0x1 0x1 0 0>;
+					};
+				};
+
+				i2c@1 {
+					#address-cells = <1>;
+					#size-cells = <0>;
+					reg = <0x1>;
+
+					eeprom@55 {
+						compatible = "at24,24c02";
+						reg = <0x55>;
+					};
+				};
+
+				i2c@2 {
+					#address-cells = <1>;
+					#size-cells = <0>;
+					reg = <0x2>;
+
+					ina220@40 {
+						compatible = "ti,ina220";
+						reg = <0x40>;
+						shunt-resistor = <1000>;
+					};
+
+					ina220@41 {
+						compatible = "ti,ina220";
+						reg = <0x41>;
+						shunt-resistor = <1000>;
+					};
+				};
+			};
+		};
+
+		sdhc@114000 {
+			voltage-ranges = <1800 1800 3300 3300>;
+		};
+	};
+
+	pci0: pcie@ffe240000 {
+		reg = <0xf 0xfe240000 0 0x10000>;
+		ranges = <0x02000000 0 0xe0000000 0xc 0x00000000 0x0 0x20000000
+			  0x01000000 0 0x00000000 0xf 0xf8000000 0x0 0x00010000>;
+		pcie@0 {
+			ranges = <0x02000000 0 0xe0000000
+				  0x02000000 0 0xe0000000
+				  0 0x20000000
+
+				  0x01000000 0 0x00000000
+				  0x01000000 0 0x00000000
+				  0 0x00010000>;
+		};
+	};
+
+	pci1: pcie@ffe250000 {
+		reg = <0xf 0xfe250000 0 0x10000>;
+		ranges = <0x02000000 0x0 0xe0000000 0xc 0x20000000 0x0 0x10000000
+			  0x01000000 0x0 0x00000000 0xf 0xf8010000 0x0 0x00010000>;
+		pcie@0 {
+			ranges = <0x02000000 0 0xe0000000
+				  0x02000000 0 0xe0000000
+				  0 0x20000000
+
+				  0x01000000 0 0x00000000
+				  0x01000000 0 0x00000000
+				  0 0x00010000>;
+		};
+	};
+
+	pci2: pcie@ffe260000 {
+		reg = <0xf 0xfe260000 0 0x1000>;
+		ranges = <0x02000000 0 0xe0000000 0xc 0x30000000 0 0x10000000
+			  0x01000000 0 0x00000000 0xf 0xf8020000 0 0x00010000>;
+		pcie@0 {
+			ranges = <0x02000000 0 0xe0000000
+				  0x02000000 0 0xe0000000
+				  0 0x20000000
+
+				  0x01000000 0 0x00000000
+				  0x01000000 0 0x00000000
+				  0 0x00010000>;
+		};
+	};
+
+	pci3: pcie@ffe270000 {
+		reg = <0xf 0xfe270000 0 0x10000>;
+		ranges = <0x02000000 0 0xe0000000 0xc 0x40000000 0 0x10000000
+			  0x01000000 0 0x00000000 0xf 0xf8030000 0 0x00010000>;
+		pcie@0 {
+			ranges = <0x02000000 0 0xe0000000
+				  0x02000000 0 0xe0000000
+				  0 0x20000000
+
+				  0x01000000 0 0x00000000
+				  0x01000000 0 0x00000000
+				  0 0x00010000>;
+		};
+	};
+};
diff --git a/arch/powerpc/boot/dts/t208xrdb.dtsi b/arch/powerpc/boot/dts/t208xrdb.dtsi
new file mode 100644
index 000000000000..1481e192e783
--- /dev/null
+++ b/arch/powerpc/boot/dts/t208xrdb.dtsi
@@ -0,0 +1,184 @@
+/*
+ * T2080PCIe-RDB Board Device Tree Source
+ *
+ * Copyright 2014 Freescale Semiconductor Inc.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *     * Redistributions of source code must retain the above copyright
+ *	 notice, this list of conditions and the following disclaimer.
+ *     * Redistributions in binary form must reproduce the above copyright
+ *	 notice, this list of conditions and the following disclaimer in the
+ *	 documentation and/or other materials provided with the distribution.
+ *     * Neither the name of Freescale Semiconductor nor the
+ *	 names of its contributors may be used to endorse or promote products
+ *	 derived from this software without specific prior written permission.
+ *
+ *
+ * ALTERNATIVELY, this software may be distributed under the terms of the
+ * GNU General Public License ("GPL") as published by the Free Software
+ * Foundation, either version 2 of that License or (at your option) any
+ * later version.
+ *
+ * THIS SOFTWARE IS PROVIDED BY Freescale Semiconductor "AS IS" AND ANY
+ * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+ * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+ * DISCLAIMED. IN NO EVENT SHALL Freescale Semiconductor BE LIABLE FOR ANY
+ * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
+ * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+ * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+ * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+ * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+/ {
+	model = "fsl,T2080RDB";
+	compatible = "fsl,T2080RDB";
+	#address-cells = <2>;
+	#size-cells = <2>;
+	interrupt-parent = <&mpic>;
+
+	ifc: localbus@ffe124000 {
+		reg = <0xf 0xfe124000 0 0x2000>;
+		ranges = <0 0 0xf 0xe8000000 0x08000000
+			  2 0 0xf 0xff800000 0x00010000
+			  3 0 0xf 0xffdf0000 0x00008000>;
+
+		nor@0,0 {
+			#address-cells = <1>;
+			#size-cells = <1>;
+			compatible = "cfi-flash";
+			reg = <0x0 0x0 0x8000000>;
+
+			bank-width = <2>;
+			device-width = <1>;
+		};
+
+		nand@1,0 {
+			#address-cells = <1>;
+			#size-cells = <1>;
+			compatible = "fsl,ifc-nand";
+			reg = <0x2 0x0 0x10000>;
+		};
+
+		boardctrl: board-control@2,0 {
+			#address-cells = <1>;
+			#size-cells = <1>;
+			compatible = "fsl,t2080-cpld";
+			reg = <3 0 0x300>;
+			ranges = <0 3 0 0x300>;
+		};
+	};
+
+	memory {
+		device_type = "memory";
+	};
+
+	dcsr: dcsr@f00000000 {
+		ranges = <0x00000000 0xf 0x00000000 0x01072000>;
+	};
+
+	soc: soc@ffe000000 {
+		ranges = <0x00000000 0xf 0xfe000000 0x1000000>;
+		reg = <0xf 0xfe000000 0 0x00001000>;
+		spi@110000 {
+			flash@0 {
+				#address-cells = <1>;
+				#size-cells = <1>;
+				compatible = "micron,n25q512a";
+				reg = <0>;
+				spi-max-frequency = <10000000>; /* input clock */
+			};
+		};
+
+		i2c@118000 {
+			adt7481@4c {
+				compatible = "adi,adt7481";
+				reg = <0x4c>;
+			};
+
+			rtc@68 {
+				compatible = "dallas,ds1339";
+				reg = <0x68>;
+				interrupts = <0x1 0x1 0 0>;
+			};
+
+			eeprom@50 {
+				compatible = "atmel,24c256";
+				reg = <0x50>;
+			};
+		};
+
+		i2c@118100 {
+			pca9546@77 {
+				compatible = "nxp,pca9546";
+				reg = <0x77>;
+			};
+		};
+
+		sdhc@114000 {
+			voltage-ranges = <1800 1800 3300 3300>;
+		};
+	};
+
+	pci0: pcie@ffe240000 {
+		reg = <0xf 0xfe240000 0 0x10000>;
+		ranges = <0x02000000 0 0xe0000000 0xc 0x00000000 0x0 0x20000000
+			  0x01000000 0 0x00000000 0xf 0xf8000000 0x0 0x00010000>;
+		pcie@0 {
+			ranges = <0x02000000 0 0xe0000000
+				  0x02000000 0 0xe0000000
+				  0 0x20000000
+
+				  0x01000000 0 0x00000000
+				  0x01000000 0 0x00000000
+				  0 0x00010000>;
+		};
+	};
+
+	pci1: pcie@ffe250000 {
+		reg = <0xf 0xfe250000 0 0x10000>;
+		ranges = <0x02000000 0x0 0xe0000000 0xc 0x20000000 0x0 0x10000000
+			  0x01000000 0x0 0x00000000 0xf 0xf8010000 0x0 0x00010000>;
+		pcie@0 {
+			ranges = <0x02000000 0 0xe0000000
+				  0x02000000 0 0xe0000000
+				  0 0x20000000
+
+				  0x01000000 0 0x00000000
+				  0x01000000 0 0x00000000
+				  0 0x00010000>;
+		};
+	};
+
+	pci2: pcie@ffe260000 {
+		reg = <0xf 0xfe260000 0 0x1000>;
+		ranges = <0x02000000 0 0xe0000000 0xc 0x30000000 0 0x10000000
+			  0x01000000 0 0x00000000 0xf 0xf8020000 0 0x00010000>;
+		pcie@0 {
+			ranges = <0x02000000 0 0xe0000000
+				  0x02000000 0 0xe0000000
+				  0 0x20000000
+
+				  0x01000000 0 0x00000000
+				  0x01000000 0 0x00000000
+				  0 0x00010000>;
+		};
+	};
+
+	pci3: pcie@ffe270000 {
+		reg = <0xf 0xfe270000 0 0x10000>;
+		ranges = <0x02000000 0 0xe0000000 0xc 0x40000000 0 0x10000000
+			  0x01000000 0 0x00000000 0xf 0xf8030000 0 0x00010000>;
+		pcie@0 {
+			ranges = <0x02000000 0 0xe0000000
+				  0x02000000 0 0xe0000000
+				  0 0x20000000
+
+				  0x01000000 0 0x00000000
+				  0x01000000 0 0x00000000
+				  0 0x00010000>;
+		};
+	};
+};
diff --git a/arch/powerpc/boot/dts/t4240rdb.dts b/arch/powerpc/boot/dts/t4240rdb.dts
new file mode 100644
index 000000000000..53761d4e8c51
--- /dev/null
+++ b/arch/powerpc/boot/dts/t4240rdb.dts
@@ -0,0 +1,186 @@
+/*
+ * T4240RDB Device Tree Source
+ *
+ * Copyright 2014 Freescale Semiconductor Inc.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *     * Redistributions of source code must retain the above copyright
+ *	 notice, this list of conditions and the following disclaimer.
+ *     * Redistributions in binary form must reproduce the above copyright
+ *	 notice, this list of conditions and the following disclaimer in the
+ *	 documentation and/or other materials provided with the distribution.
+ *     * Neither the name of Freescale Semiconductor nor the
+ *	 names of its contributors may be used to endorse or promote products
+ *	 derived from this software without specific prior written permission.
+ *
+ *
+ * ALTERNATIVELY, this software may be distributed under the terms of the
+ * GNU General Public License ("GPL") as published by the Free Software
+ * Foundation, either version 2 of that License or (at your option) any
+ * later version.
+ *
+ * THIS SOFTWARE IS PROVIDED BY Freescale Semiconductor "AS IS" AND ANY
+ * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+ * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+ * DISCLAIMED. IN NO EVENT SHALL Freescale Semiconductor BE LIABLE FOR ANY
+ * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
+ * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+ * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+ * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+ * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+/include/ "fsl/t4240si-pre.dtsi"
+
+/ {
+	model = "fsl,T4240RDB";
+	compatible = "fsl,T4240RDB";
+	#address-cells = <2>;
+	#size-cells = <2>;
+	interrupt-parent = <&mpic>;
+
+	ifc: localbus@ffe124000 {
+		reg = <0xf 0xfe124000 0 0x2000>;
+		ranges = <0 0 0xf 0xe8000000 0x08000000
+			  2 0 0xf 0xff800000 0x00010000
+			  3 0 0xf 0xffdf0000 0x00008000>;
+
+		nor@0,0 {
+			#address-cells = <1>;
+			#size-cells = <1>;
+			compatible = "cfi-flash";
+			reg = <0x0 0x0 0x8000000>;
+
+			bank-width = <2>;
+			device-width = <1>;
+		};
+
+		nand@2,0 {
+			#address-cells = <1>;
+			#size-cells = <1>;
+			compatible = "fsl,ifc-nand";
+			reg = <0x2 0x0 0x10000>;
+		};
+	};
+
+	memory {
+		device_type = "memory";
+	};
+
+	dcsr: dcsr@f00000000 {
+		ranges = <0x00000000 0xf 0x00000000 0x01072000>;
+	};
+
+	soc: soc@ffe000000 {
+		ranges = <0x00000000 0xf 0xfe000000 0x1000000>;
+		reg = <0xf 0xfe000000 0 0x00001000>;
+		spi@110000 {
+			flash@0 {
+				#address-cells = <1>;
+				#size-cells = <1>;
+				compatible = "sst,sst25wf040";
+				reg = <0>;
+				spi-max-frequency = <40000000>; /* input clock */
+			};
+		};
+
+		i2c@118000 {
+			eeprom@52 {
+				compatible = "at24,24c256";
+				reg = <0x52>;
+			};
+			eeprom@54 {
+				compatible = "at24,24c256";
+				reg = <0x54>;
+			};
+			eeprom@56 {
+				compatible = "at24,24c256";
+				reg = <0x56>;
+			};
+			rtc@68 {
+				compatible = "dallas,ds1374";
+				reg = <0x68>;
+				interrupts = <0x1 0x1 0 0>;
+			};
+		};
+
+		sdhc@114000 {
+			voltage-ranges = <1800 1800 3300 3300>;
+		};
+	};
+
+	pci0: pcie@ffe240000 {
+		reg = <0xf 0xfe240000 0 0x10000>;
+		ranges = <0x02000000 0 0xe0000000 0xc 0x00000000 0x0 0x20000000
+			  0x01000000 0 0x00000000 0xf 0xf8000000 0x0 0x00010000>;
+		pcie@0 {
+			ranges = <0x02000000 0 0xe0000000
+				  0x02000000 0 0xe0000000
+				  0 0x20000000
+
+				  0x01000000 0 0x00000000
+				  0x01000000 0 0x00000000
+				  0 0x00010000>;
+		};
+	};
+
+	pci1: pcie@ffe250000 {
+		reg = <0xf 0xfe250000 0 0x10000>;
+		ranges = <0x02000000 0x0 0xe0000000 0xc 0x20000000 0x0 0x20000000
+			  0x01000000 0x0 0x00000000 0xf 0xf8010000 0x0 0x00010000>;
+		pcie@0 {
+			ranges = <0x02000000 0 0xe0000000
+				  0x02000000 0 0xe0000000
+				  0 0x20000000
+
+				  0x01000000 0 0x00000000
+				  0x01000000 0 0x00000000
+				  0 0x00010000>;
+		};
+	};
+
+	pci2: pcie@ffe260000 {
+		reg = <0xf 0xfe260000 0 0x1000>;
+		ranges = <0x02000000 0 0xe0000000 0xc 0x40000000 0 0x20000000
+			  0x01000000 0 0x00000000 0xf 0xf8020000 0 0x00010000>;
+		pcie@0 {
+			ranges = <0x02000000 0 0xe0000000
+				  0x02000000 0 0xe0000000
+				  0 0x20000000
+
+				  0x01000000 0 0x00000000
+				  0x01000000 0 0x00000000
+				  0 0x00010000>;
+		};
+	};
+
+	pci3: pcie@ffe270000 {
+		reg = <0xf 0xfe270000 0 0x10000>;
+		ranges = <0x02000000 0 0xe0000000 0xc 0x60000000 0 0x20000000
+			  0x01000000 0 0x00000000 0xf 0xf8030000 0 0x00010000>;
+		pcie@0 {
+			ranges = <0x02000000 0 0xe0000000
+				  0x02000000 0 0xe0000000
+				  0 0x20000000
+
+				  0x01000000 0 0x00000000
+				  0x01000000 0 0x00000000
+				  0 0x00010000>;
+		};
+	};
+
+	rio: rapidio@ffe0c0000 {
+		reg = <0xf 0xfe0c0000 0 0x11000>;
+
+		port1 {
+			ranges = <0 0 0xc 0x20000000 0 0x10000000>;
+		};
+		port2 {
+			ranges = <0 0 0xc 0x30000000 0 0x10000000>;
+		};
+	};
+};
+
+/include/ "fsl/t4240si-post.dtsi"
diff --git a/arch/powerpc/boot/io.h b/arch/powerpc/boot/io.h
index 7c09f4861fe1..394da5500466 100644
--- a/arch/powerpc/boot/io.h
+++ b/arch/powerpc/boot/io.h
@@ -1,5 +1,5 @@
 #ifndef _IO_H
-#define __IO_H
+#define _IO_H
 
 #include "types.h"
 
diff --git a/arch/powerpc/configs/corenet32_smp_defconfig b/arch/powerpc/configs/corenet32_smp_defconfig
index c19ff057d0f9..6a3c58adf253 100644
--- a/arch/powerpc/configs/corenet32_smp_defconfig
+++ b/arch/powerpc/configs/corenet32_smp_defconfig
@@ -139,8 +139,9 @@ CONFIG_EDAC=y
 CONFIG_EDAC_MM_EDAC=y
 CONFIG_EDAC_MPC85XX=y
 CONFIG_RTC_CLASS=y
+CONFIG_RTC_DRV_DS1307=y
+CONFIG_RTC_DRV_DS1374=y
 CONFIG_RTC_DRV_DS3232=y
-CONFIG_RTC_DRV_CMOS=y
 CONFIG_UIO=y
 CONFIG_STAGING=y
 CONFIG_VIRT_DRIVERS=y
@@ -179,3 +180,4 @@ CONFIG_CRYPTO_SHA512=y
 CONFIG_CRYPTO_AES=y
 # CONFIG_CRYPTO_ANSI_CPRNG is not set
 CONFIG_CRYPTO_DEV_FSL_CAAM=y
+CONFIG_FSL_CORENET_CF=y
diff --git a/arch/powerpc/configs/corenet64_smp_defconfig b/arch/powerpc/configs/corenet64_smp_defconfig
index 5c7fa19ae4ef..4b07bade1ba9 100644
--- a/arch/powerpc/configs/corenet64_smp_defconfig
+++ b/arch/powerpc/configs/corenet64_smp_defconfig
@@ -123,6 +123,10 @@ CONFIG_USB_EHCI_FSL=y
 CONFIG_USB_STORAGE=y
 CONFIG_MMC=y
 CONFIG_MMC_SDHCI=y
+CONFIG_RTC_CLASS=y
+CONFIG_RTC_DRV_DS1307=y
+CONFIG_RTC_DRV_DS1374=y
+CONFIG_RTC_DRV_DS3232=y
 CONFIG_EDAC=y
 CONFIG_EDAC_MM_EDAC=y
 CONFIG_DMADEVICES=y
@@ -175,3 +179,4 @@ CONFIG_CRYPTO_SHA256=y
 CONFIG_CRYPTO_SHA512=y
 # CONFIG_CRYPTO_ANSI_CPRNG is not set
 CONFIG_CRYPTO_DEV_FSL_CAAM=y
+CONFIG_FSL_CORENET_CF=y
diff --git a/arch/powerpc/configs/mpc85xx_defconfig b/arch/powerpc/configs/mpc85xx_defconfig
index 55765c8cb08f..fa1bfd37f1ec 100644
--- a/arch/powerpc/configs/mpc85xx_defconfig
+++ b/arch/powerpc/configs/mpc85xx_defconfig
@@ -209,6 +209,9 @@ CONFIG_MMC_SDHCI_OF_ESDHC=y
 CONFIG_EDAC=y
 CONFIG_EDAC_MM_EDAC=y
 CONFIG_RTC_CLASS=y
+CONFIG_RTC_DRV_DS1307=y
+CONFIG_RTC_DRV_DS1374=y
+CONFIG_RTC_DRV_DS3232=y
 CONFIG_RTC_DRV_CMOS=y
 CONFIG_RTC_DRV_DS1307=y
 CONFIG_DMADEVICES=y
diff --git a/arch/powerpc/configs/mpc85xx_smp_defconfig b/arch/powerpc/configs/mpc85xx_smp_defconfig
index 5c6ecdc0f70e..0b452ebd8b3d 100644
--- a/arch/powerpc/configs/mpc85xx_smp_defconfig
+++ b/arch/powerpc/configs/mpc85xx_smp_defconfig
@@ -210,6 +210,9 @@ CONFIG_MMC_SDHCI_OF_ESDHC=y
 CONFIG_EDAC=y
 CONFIG_EDAC_MM_EDAC=y
 CONFIG_RTC_CLASS=y
+CONFIG_RTC_DRV_DS1307=y
+CONFIG_RTC_DRV_DS1374=y
+CONFIG_RTC_DRV_DS3232=y
 CONFIG_RTC_DRV_CMOS=y
 CONFIG_RTC_DRV_DS1307=y
 CONFIG_DMADEVICES=y
diff --git a/arch/powerpc/include/asm/cputable.h b/arch/powerpc/include/asm/cputable.h
index 0fdd7eece6d9..642e436d4595 100644
--- a/arch/powerpc/include/asm/cputable.h
+++ b/arch/powerpc/include/asm/cputable.h
@@ -195,8 +195,7 @@ extern const char *powerpc_base_platform;
 
 #define CPU_FTR_PPCAS_ARCH_V2	(CPU_FTR_NOEXECUTE | CPU_FTR_NODSISRALIGN)
 
-#define MMU_FTR_PPCAS_ARCH_V2 	(MMU_FTR_SLB | MMU_FTR_TLBIEL | \
-				 MMU_FTR_16M_PAGE)
+#define MMU_FTR_PPCAS_ARCH_V2 	(MMU_FTR_TLBIEL | MMU_FTR_16M_PAGE)
 
 /* We only set the altivec features if the kernel was compiled with altivec
  * support
@@ -268,10 +267,6 @@ extern const char *powerpc_base_platform;
 #define CPU_FTR_MAYBE_CAN_NAP	0
 #endif
 
-#define CLASSIC_PPC (!defined(CONFIG_8xx) && !defined(CONFIG_4xx) && \
-		     !defined(CONFIG_POWER3) && !defined(CONFIG_POWER4) && \
-		     !defined(CONFIG_BOOKE))
-
 #define CPU_FTRS_PPC601	(CPU_FTR_COMMON | CPU_FTR_601 | \
 	CPU_FTR_COHERENT_ICACHE | CPU_FTR_UNIFIED_ID_CACHE)
 #define CPU_FTRS_603	(CPU_FTR_COMMON | \
@@ -396,15 +391,10 @@ extern const char *powerpc_base_platform;
 	    CPU_FTR_L2CSR | CPU_FTR_LWSYNC | CPU_FTR_NOEXECUTE | \
 	    CPU_FTR_DBELL | CPU_FTR_POPCNTB | CPU_FTR_POPCNTD | \
 	    CPU_FTR_DEBUG_LVL_EXC | CPU_FTR_EMB_HV | CPU_FTR_ALTIVEC_COMP | \
-	    CPU_FTR_CELL_TB_BUG)
+	    CPU_FTR_CELL_TB_BUG | CPU_FTR_SMT)
 #define CPU_FTRS_GENERIC_32	(CPU_FTR_COMMON | CPU_FTR_NODSISRALIGN)
 
 /* 64-bit CPUs */
-#define CPU_FTRS_POWER3	(CPU_FTR_USE_TB | \
-	    CPU_FTR_IABR | CPU_FTR_PPC_LE)
-#define CPU_FTRS_RS64	(CPU_FTR_USE_TB | \
-	    CPU_FTR_IABR | \
-	    CPU_FTR_MMCRA | CPU_FTR_CTRL)
 #define CPU_FTRS_POWER4	(CPU_FTR_USE_TB | CPU_FTR_LWSYNC | \
 	    CPU_FTR_PPCAS_ARCH_V2 | CPU_FTR_CTRL | \
 	    CPU_FTR_MMCRA | CPU_FTR_CP_USE_DCBTZ | \
@@ -467,15 +457,14 @@ extern const char *powerpc_base_platform;
 #define CPU_FTRS_POSSIBLE	(CPU_FTRS_E6500 | CPU_FTRS_E5500 | CPU_FTRS_A2)
 #else
 #define CPU_FTRS_POSSIBLE	\
-	    (CPU_FTRS_POWER3 | CPU_FTRS_RS64 | CPU_FTRS_POWER4 |	\
-	    CPU_FTRS_PPC970 | CPU_FTRS_POWER5 | CPU_FTRS_POWER6 |	\
-	    CPU_FTRS_POWER7 | CPU_FTRS_POWER8E | CPU_FTRS_POWER8 |	\
-	    CPU_FTRS_CELL | CPU_FTRS_PA6T | CPU_FTR_VSX)
+	    (CPU_FTRS_POWER4 | CPU_FTRS_PPC970 | CPU_FTRS_POWER5 | \
+	     CPU_FTRS_POWER6 | CPU_FTRS_POWER7 | CPU_FTRS_POWER8E | \
+	     CPU_FTRS_POWER8 | CPU_FTRS_CELL | CPU_FTRS_PA6T | CPU_FTR_VSX)
 #endif
 #else
 enum {
 	CPU_FTRS_POSSIBLE =
-#if CLASSIC_PPC
+#ifdef CONFIG_PPC_BOOK3S_32
 	    CPU_FTRS_PPC601 | CPU_FTRS_603 | CPU_FTRS_604 | CPU_FTRS_740_NOTAU |
 	    CPU_FTRS_740 | CPU_FTRS_750 | CPU_FTRS_750FX1 |
 	    CPU_FTRS_750FX2 | CPU_FTRS_750FX | CPU_FTRS_750GX |
@@ -518,14 +507,14 @@ enum {
 #define CPU_FTRS_ALWAYS		(CPU_FTRS_E6500 & CPU_FTRS_E5500 & CPU_FTRS_A2)
 #else
 #define CPU_FTRS_ALWAYS		\
-	    (CPU_FTRS_POWER3 & CPU_FTRS_RS64 & CPU_FTRS_POWER4 &	\
-	    CPU_FTRS_PPC970 & CPU_FTRS_POWER5 & CPU_FTRS_POWER6 &	\
-	    CPU_FTRS_POWER7 & CPU_FTRS_CELL & CPU_FTRS_PA6T & CPU_FTRS_POSSIBLE)
+	    (CPU_FTRS_POWER4 & CPU_FTRS_PPC970 & CPU_FTRS_POWER5 & \
+	     CPU_FTRS_POWER6 & CPU_FTRS_POWER7 & CPU_FTRS_CELL & \
+	     CPU_FTRS_PA6T & CPU_FTRS_POSSIBLE)
 #endif
 #else
 enum {
 	CPU_FTRS_ALWAYS =
-#if CLASSIC_PPC
+#ifdef CONFIG_PPC_BOOK3S_32
 	    CPU_FTRS_PPC601 & CPU_FTRS_603 & CPU_FTRS_604 & CPU_FTRS_740_NOTAU &
 	    CPU_FTRS_740 & CPU_FTRS_750 & CPU_FTRS_750FX1 &
 	    CPU_FTRS_750FX2 & CPU_FTRS_750FX & CPU_FTRS_750GX &
diff --git a/arch/powerpc/include/asm/eeh.h b/arch/powerpc/include/asm/eeh.h
index fab7743c2640..9983c3d26bca 100644
--- a/arch/powerpc/include/asm/eeh.h
+++ b/arch/powerpc/include/asm/eeh.h
@@ -25,6 +25,7 @@
 #include <linux/list.h>
 #include <linux/string.h>
 #include <linux/time.h>
+#include <linux/atomic.h>
 
 struct pci_dev;
 struct pci_bus;
@@ -33,10 +34,11 @@ struct device_node;
 #ifdef CONFIG_EEH
 
 /* EEH subsystem flags */
-#define EEH_ENABLED		0x1	/* EEH enabled		*/
-#define EEH_FORCE_DISABLED	0x2	/* EEH disabled		*/
-#define EEH_PROBE_MODE_DEV	0x4	/* From PCI device	*/
-#define EEH_PROBE_MODE_DEVTREE	0x8	/* From device tree	*/
+#define EEH_ENABLED		0x01	/* EEH enabled		*/
+#define EEH_FORCE_DISABLED	0x02	/* EEH disabled		*/
+#define EEH_PROBE_MODE_DEV	0x04	/* From PCI device	*/
+#define EEH_PROBE_MODE_DEVTREE	0x08	/* From device tree	*/
+#define EEH_ENABLE_IO_FOR_LOG	0x10	/* Enable IO for log	*/
 
 /*
  * Delay for PE reset, all in ms
@@ -84,7 +86,9 @@ struct eeh_pe {
 	int freeze_count;		/* Times of froze up		*/
 	struct timeval tstamp;		/* Time on first-time freeze	*/
 	int false_positives;		/* Times of reported #ff's	*/
+	atomic_t pass_dev_cnt;		/* Count of passed through devs	*/
 	struct eeh_pe *parent;		/* Parent PE			*/
+	void *data;			/* PE auxillary data		*/
 	struct list_head child_list;	/* Link PE to the child list	*/
 	struct list_head edevs;		/* Link list of EEH devices	*/
 	struct list_head child;		/* Child PEs			*/
@@ -93,6 +97,11 @@ struct eeh_pe {
 #define eeh_pe_for_each_dev(pe, edev, tmp) \
 		list_for_each_entry_safe(edev, tmp, &pe->edevs, list)
 
+static inline bool eeh_pe_passed(struct eeh_pe *pe)
+{
+	return pe ? !!atomic_read(&pe->pass_dev_cnt) : false;
+}
+
 /*
  * The struct is used to trace EEH state for the associated
  * PCI device node or PCI device. In future, it might
@@ -165,6 +174,11 @@ enum {
 #define EEH_STATE_DMA_ACTIVE	(1 << 4)	/* Active DMA		*/
 #define EEH_STATE_MMIO_ENABLED	(1 << 5)	/* MMIO enabled		*/
 #define EEH_STATE_DMA_ENABLED	(1 << 6)	/* DMA enabled		*/
+#define EEH_PE_STATE_NORMAL		0	/* Normal state		*/
+#define EEH_PE_STATE_RESET		1	/* PE reset asserted	*/
+#define EEH_PE_STATE_STOPPED_IO_DMA	2	/* Frozen PE		*/
+#define EEH_PE_STATE_STOPPED_DMA	4	/* Stopped DMA, Enabled IO */
+#define EEH_PE_STATE_UNAVAIL		5	/* Unavailable		*/
 #define EEH_RESET_DEACTIVATE	0	/* Deactivate the PE reset	*/
 #define EEH_RESET_HOT		1	/* Hot reset			*/
 #define EEH_RESET_FUNDAMENTAL	3	/* Fundamental reset		*/
@@ -194,36 +208,28 @@ extern int eeh_subsystem_flags;
 extern struct eeh_ops *eeh_ops;
 extern raw_spinlock_t confirm_error_lock;
 
-static inline bool eeh_enabled(void)
-{
-	if ((eeh_subsystem_flags & EEH_FORCE_DISABLED) ||
-	    !(eeh_subsystem_flags & EEH_ENABLED))
-		return false;
-
-	return true;
-}
-
-static inline void eeh_set_enable(bool mode)
-{
-	if (mode)
-		eeh_subsystem_flags |= EEH_ENABLED;
-	else
-		eeh_subsystem_flags &= ~EEH_ENABLED;
-}
-
-static inline void eeh_probe_mode_set(int flag)
+static inline void eeh_add_flag(int flag)
 {
 	eeh_subsystem_flags |= flag;
 }
 
-static inline int eeh_probe_mode_devtree(void)
+static inline void eeh_clear_flag(int flag)
 {
-	return (eeh_subsystem_flags & EEH_PROBE_MODE_DEVTREE);
+	eeh_subsystem_flags &= ~flag;
 }
 
-static inline int eeh_probe_mode_dev(void)
+static inline bool eeh_has_flag(int flag)
 {
-	return (eeh_subsystem_flags & EEH_PROBE_MODE_DEV);
+        return !!(eeh_subsystem_flags & flag);
+}
+
+static inline bool eeh_enabled(void)
+{
+	if (eeh_has_flag(EEH_FORCE_DISABLED) ||
+	    !eeh_has_flag(EEH_ENABLED))
+		return false;
+
+	return true;
 }
 
 static inline void eeh_serialize_lock(unsigned long *flags)
@@ -243,6 +249,7 @@ static inline void eeh_serialize_unlock(unsigned long flags)
 #define EEH_MAX_ALLOWED_FREEZES 5
 
 typedef void *(*eeh_traverse_func)(void *data, void *flag);
+void eeh_set_pe_aux_size(int size);
 int eeh_phb_pe_create(struct pci_controller *phb);
 struct eeh_pe *eeh_phb_pe_get(struct pci_controller *phb);
 struct eeh_pe *eeh_pe_get(struct eeh_dev *edev);
@@ -272,6 +279,13 @@ void eeh_add_device_late(struct pci_dev *);
 void eeh_add_device_tree_late(struct pci_bus *);
 void eeh_add_sysfs_files(struct pci_bus *);
 void eeh_remove_device(struct pci_dev *);
+int eeh_dev_open(struct pci_dev *pdev);
+void eeh_dev_release(struct pci_dev *pdev);
+struct eeh_pe *eeh_iommu_group_to_pe(struct iommu_group *group);
+int eeh_pe_set_option(struct eeh_pe *pe, int option);
+int eeh_pe_get_state(struct eeh_pe *pe);
+int eeh_pe_reset(struct eeh_pe *pe, int option);
+int eeh_pe_configure(struct eeh_pe *pe);
 
 /**
  * EEH_POSSIBLE_ERROR() -- test for possible MMIO failure.
@@ -295,8 +309,6 @@ static inline bool eeh_enabled(void)
         return false;
 }
 
-static inline void eeh_set_enable(bool mode) { }
-
 static inline int eeh_init(void)
 {
 	return 0;
diff --git a/arch/powerpc/include/asm/exception-64s.h b/arch/powerpc/include/asm/exception-64s.h
index 8f35cd7d59cc..77f52b26dad6 100644
--- a/arch/powerpc/include/asm/exception-64s.h
+++ b/arch/powerpc/include/asm/exception-64s.h
@@ -425,6 +425,8 @@ label##_relon_hv:						\
 #define SOFTEN_VALUE_0xa00	PACA_IRQ_DBELL
 #define SOFTEN_VALUE_0xe80	PACA_IRQ_DBELL
 #define SOFTEN_VALUE_0xe82	PACA_IRQ_DBELL
+#define SOFTEN_VALUE_0xe60	PACA_IRQ_HMI
+#define SOFTEN_VALUE_0xe62	PACA_IRQ_HMI
 
 #define __SOFTEN_TEST(h, vec)						\
 	lbz	r10,PACASOFTIRQEN(r13);					\
@@ -513,8 +515,11 @@ label##_relon_hv:							\
  * runlatch, etc...
  */
 
-/* Exception addition: Hard disable interrupts */
-#define DISABLE_INTS	RECONCILE_IRQ_STATE(r10,r11)
+/*
+ * This addition reconciles our actual IRQ state with the various software
+ * flags that track it. This may call C code.
+ */
+#define ADD_RECONCILE	RECONCILE_IRQ_STATE(r10,r11)
 
 #define ADD_NVGPRS				\
 	bl	save_nvgprs
@@ -532,6 +537,7 @@ END_FTR_SECTION_IFSET(CPU_FTR_CTRL)
 	.globl label##_common;					\
 label##_common:							\
 	EXCEPTION_PROLOG_COMMON(trap, PACA_EXGEN);		\
+	/* Volatile regs are potentially clobbered here */	\
 	additions;						\
 	addi	r3,r1,STACK_FRAME_OVERHEAD;			\
 	bl	hdlr;						\
@@ -539,7 +545,7 @@ label##_common:							\
 
 #define STD_EXCEPTION_COMMON(trap, label, hdlr)			\
 	EXCEPTION_COMMON(trap, label, hdlr, ret_from_except,	\
-			 ADD_NVGPRS;DISABLE_INTS)
+			 ADD_NVGPRS;ADD_RECONCILE)
 
 /*
  * Like STD_EXCEPTION_COMMON, but for exceptions that can occur
@@ -548,7 +554,7 @@ label##_common:							\
  */
 #define STD_EXCEPTION_COMMON_ASYNC(trap, label, hdlr)		  \
 	EXCEPTION_COMMON(trap, label, hdlr, ret_from_except_lite, \
-			 FINISH_NAP;DISABLE_INTS;RUNLATCH_ON)
+			 FINISH_NAP;ADD_RECONCILE;RUNLATCH_ON)
 
 /*
  * When the idle code in power4_idle puts the CPU into NAP mode,
diff --git a/arch/powerpc/include/asm/fs_pd.h b/arch/powerpc/include/asm/fs_pd.h
index 9361cd5342cc..f79d6c74eb2a 100644
--- a/arch/powerpc/include/asm/fs_pd.h
+++ b/arch/powerpc/include/asm/fs_pd.h
@@ -28,7 +28,6 @@
 
 #ifdef CONFIG_8xx
 #include <asm/8xx_immap.h>
-#include <asm/mpc8xx.h>
 
 extern immap_t __iomem *mpc8xx_immr;
 
diff --git a/arch/powerpc/include/asm/hardirq.h b/arch/powerpc/include/asm/hardirq.h
index 418fb654370d..1bbb3013d6aa 100644
--- a/arch/powerpc/include/asm/hardirq.h
+++ b/arch/powerpc/include/asm/hardirq.h
@@ -11,6 +11,7 @@ typedef struct {
 	unsigned int pmu_irqs;
 	unsigned int mce_exceptions;
 	unsigned int spurious_irqs;
+	unsigned int hmi_exceptions;
 #ifdef CONFIG_PPC_DOORBELL
 	unsigned int doorbell_irqs;
 #endif
diff --git a/arch/powerpc/include/asm/hw_irq.h b/arch/powerpc/include/asm/hw_irq.h
index 10be1dd01c6b..b59ac27a6b7d 100644
--- a/arch/powerpc/include/asm/hw_irq.h
+++ b/arch/powerpc/include/asm/hw_irq.h
@@ -25,6 +25,7 @@
 #define PACA_IRQ_EE		0x04
 #define PACA_IRQ_DEC		0x08 /* Or FIT */
 #define PACA_IRQ_EE_EDGE	0x10 /* BookE only */
+#define PACA_IRQ_HMI		0x20
 
 #endif /* CONFIG_PPC64 */
 
diff --git a/arch/powerpc/include/asm/irqflags.h b/arch/powerpc/include/asm/irqflags.h
index e20eb95429a8..f2149066fe5d 100644
--- a/arch/powerpc/include/asm/irqflags.h
+++ b/arch/powerpc/include/asm/irqflags.h
@@ -32,9 +32,8 @@
 #endif
 
 /*
- * Most of the CPU's IRQ-state tracing is done from assembly code; we
- * have to call a C function so call a wrapper that saves all the
- * C-clobbered registers.
+ * These are calls to C code, so the caller must be prepared for volatiles to
+ * be clobbered.
  */
 #define TRACE_ENABLE_INTS	TRACE_WITH_FRAME_BUFFER(trace_hardirqs_on)
 #define TRACE_DISABLE_INTS	TRACE_WITH_FRAME_BUFFER(trace_hardirqs_off)
@@ -42,6 +41,9 @@
 /*
  * This is used by assembly code to soft-disable interrupts first and
  * reconcile irq state.
+ *
+ * NB: This may call C code, so the caller must be prepared for volatiles to
+ * be clobbered.
  */
 #define RECONCILE_IRQ_STATE(__rA, __rB)		\
 	lbz	__rA,PACASOFTIRQEN(r13);	\
diff --git a/arch/powerpc/include/asm/jump_label.h b/arch/powerpc/include/asm/jump_label.h
index f016bb699b5f..efbf9a322a23 100644
--- a/arch/powerpc/include/asm/jump_label.h
+++ b/arch/powerpc/include/asm/jump_label.h
@@ -10,6 +10,7 @@
  * 2 of the License, or (at your option) any later version.
  */
 
+#ifndef __ASSEMBLY__
 #include <linux/types.h>
 
 #include <asm/feature-fixups.h>
@@ -42,4 +43,12 @@ struct jump_entry {
 	jump_label_t key;
 };
 
+#else
+#define ARCH_STATIC_BRANCH(LABEL, KEY)		\
+1098:	nop;					\
+	.pushsection __jump_table, "aw";	\
+	FTR_ENTRY_LONG 1098b, LABEL, KEY;	\
+	.popsection
+#endif
+
 #endif /* _ASM_POWERPC_JUMP_LABEL_H */
diff --git a/arch/powerpc/include/asm/kvm_asm.h b/arch/powerpc/include/asm/kvm_asm.h
index 9601741080e5..ecf7e133a4f2 100644
--- a/arch/powerpc/include/asm/kvm_asm.h
+++ b/arch/powerpc/include/asm/kvm_asm.h
@@ -98,6 +98,7 @@
 #define BOOK3S_INTERRUPT_H_DATA_STORAGE	0xe00
 #define BOOK3S_INTERRUPT_H_INST_STORAGE	0xe20
 #define BOOK3S_INTERRUPT_H_EMUL_ASSIST	0xe40
+#define BOOK3S_INTERRUPT_HMI		0xe60
 #define BOOK3S_INTERRUPT_H_DOORBELL	0xe80
 #define BOOK3S_INTERRUPT_PERFMON	0xf00
 #define BOOK3S_INTERRUPT_ALTIVEC	0xf20
diff --git a/arch/powerpc/include/asm/machdep.h b/arch/powerpc/include/asm/machdep.h
index f92b0b54e921..44e90516519b 100644
--- a/arch/powerpc/include/asm/machdep.h
+++ b/arch/powerpc/include/asm/machdep.h
@@ -174,6 +174,10 @@ struct machdep_calls {
 	/* Exception handlers */
 	int		(*system_reset_exception)(struct pt_regs *regs);
 	int 		(*machine_check_exception)(struct pt_regs *regs);
+	int		(*handle_hmi_exception)(struct pt_regs *regs);
+
+	/* Early exception handlers called in realmode */
+	int		(*hmi_exception_early)(struct pt_regs *regs);
 
 	/* Called during machine check exception to retrive fixup address. */
 	bool		(*mce_check_early_recovery)(struct pt_regs *regs);
@@ -366,6 +370,7 @@ static inline void log_error(char *buf, unsigned int err_type, int fatal)
 	} \
 	__define_initcall(__machine_initcall_##mach##_##fn, id);
 
+#define machine_early_initcall(mach, fn)	__define_machine_initcall(mach, fn, early)
 #define machine_core_initcall(mach, fn)		__define_machine_initcall(mach, fn, 1)
 #define machine_core_initcall_sync(mach, fn)	__define_machine_initcall(mach, fn, 1s)
 #define machine_postcore_initcall(mach, fn)	__define_machine_initcall(mach, fn, 2)
diff --git a/arch/powerpc/include/asm/mmu-hash64.h b/arch/powerpc/include/asm/mmu-hash64.h
index c2b4dcf23d03..d76514487d6f 100644
--- a/arch/powerpc/include/asm/mmu-hash64.h
+++ b/arch/powerpc/include/asm/mmu-hash64.h
@@ -24,26 +24,6 @@
 #include <asm/bug.h>
 #include <asm/processor.h>
 
-/*
- * Segment table
- */
-
-#define STE_ESID_V	0x80
-#define STE_ESID_KS	0x20
-#define STE_ESID_KP	0x10
-#define STE_ESID_N	0x08
-
-#define STE_VSID_SHIFT	12
-
-/* Location of cpu0's segment table */
-#define STAB0_PAGE	0x8
-#define STAB0_OFFSET	(STAB0_PAGE << 12)
-#define STAB0_PHYS_ADDR	(STAB0_OFFSET + PHYSICAL_START)
-
-#ifndef __ASSEMBLY__
-extern char initial_stab[];
-#endif /* ! __ASSEMBLY */
-
 /*
  * SLB
  */
@@ -370,10 +350,8 @@ extern void hpte_init_lpar(void);
 extern void hpte_init_beat(void);
 extern void hpte_init_beat_v3(void);
 
-extern void stabs_alloc(void);
 extern void slb_initialize(void);
 extern void slb_flush_and_rebolt(void);
-extern void stab_initialize(unsigned long stab);
 
 extern void slb_vmalloc_update(void);
 extern void slb_set_size(u16 size);
diff --git a/arch/powerpc/include/asm/mmu.h b/arch/powerpc/include/asm/mmu.h
index e61f24ed4e65..3d5abfe6ba67 100644
--- a/arch/powerpc/include/asm/mmu.h
+++ b/arch/powerpc/include/asm/mmu.h
@@ -64,9 +64,9 @@
  */
 #define MMU_FTR_USE_PAIRED_MAS		ASM_CONST(0x01000000)
 
-/* MMU is SLB-based
+/* Doesn't support the B bit (1T segment) in SLBIE
  */
-#define MMU_FTR_SLB			ASM_CONST(0x02000000)
+#define MMU_FTR_NO_SLBIE_B		ASM_CONST(0x02000000)
 
 /* Support 16M large pages
  */
@@ -88,10 +88,6 @@
  */
 #define MMU_FTR_1T_SEGMENT		ASM_CONST(0x40000000)
 
-/* Doesn't support the B bit (1T segment) in SLBIE
- */
-#define MMU_FTR_NO_SLBIE_B		ASM_CONST(0x80000000)
-
 /* MMU feature bit sets for various CPUs */
 #define MMU_FTRS_DEFAULT_HPTE_ARCH_V2	\
 	MMU_FTR_HPTE_TABLE | MMU_FTR_PPCAS_ARCH_V2
diff --git a/arch/powerpc/include/asm/mmu_context.h b/arch/powerpc/include/asm/mmu_context.h
index b467530e2485..73382eba02dc 100644
--- a/arch/powerpc/include/asm/mmu_context.h
+++ b/arch/powerpc/include/asm/mmu_context.h
@@ -18,7 +18,6 @@ extern int init_new_context(struct task_struct *tsk, struct mm_struct *mm);
 extern void destroy_context(struct mm_struct *mm);
 
 extern void switch_mmu_context(struct mm_struct *prev, struct mm_struct *next);
-extern void switch_stab(struct task_struct *tsk, struct mm_struct *mm);
 extern void switch_slb(struct task_struct *tsk, struct mm_struct *mm);
 extern void set_context(unsigned long id, pgd_t *pgd);
 
@@ -77,10 +76,7 @@ static inline void switch_mm(struct mm_struct *prev, struct mm_struct *next,
 	 * sub architectures.
 	 */
 #ifdef CONFIG_PPC_STD_MMU_64
-	if (mmu_has_feature(MMU_FTR_SLB))
-		switch_slb(tsk, next);
-	else
-		switch_stab(tsk, next);
+	switch_slb(tsk, next);
 #else
 	/* Out of line for now */
 	switch_mmu_context(prev, next);
diff --git a/arch/powerpc/include/asm/mpc85xx.h b/arch/powerpc/include/asm/mpc85xx.h
index 736d4acc05a8..3bef74a9914b 100644
--- a/arch/powerpc/include/asm/mpc85xx.h
+++ b/arch/powerpc/include/asm/mpc85xx.h
@@ -77,6 +77,8 @@
 #define SVR_T1020	0x852100
 #define SVR_T1021	0x852101
 #define SVR_T1022	0x852102
+#define SVR_T2080	0x853000
+#define SVR_T2081	0x853100
 
 #define SVR_8610	0x80A000
 #define SVR_8641	0x809000
diff --git a/arch/powerpc/include/asm/mpc8xx.h b/arch/powerpc/include/asm/mpc8xx.h
deleted file mode 100644
index 98f3c4f17328..000000000000
--- a/arch/powerpc/include/asm/mpc8xx.h
+++ /dev/null
@@ -1,12 +0,0 @@
-/* This is the single file included by all MPC8xx build options.
- * Since there are many different boards and no standard configuration,
- * we have a unique include file for each.  Rather than change every
- * file that has to include MPC8xx configuration, they all include
- * this one and the configuration switching is done here.
- */
-#ifndef __CONFIG_8xx_DEFS
-#define __CONFIG_8xx_DEFS
-
-extern struct mpc8xx_pcmcia_ops m8xx_pcmcia_ops;
-
-#endif /* __CONFIG_8xx_DEFS */
diff --git a/arch/powerpc/include/asm/opal.h b/arch/powerpc/include/asm/opal.h
index 0da1dbd42e02..b2f8ce1fd0d7 100644
--- a/arch/powerpc/include/asm/opal.h
+++ b/arch/powerpc/include/asm/opal.h
@@ -147,6 +147,8 @@ struct opal_sg_list {
 #define OPAL_SET_PARAM				90
 #define OPAL_DUMP_RESEND			91
 #define OPAL_DUMP_INFO2				94
+#define OPAL_PCI_EEH_FREEZE_SET			97
+#define OPAL_HANDLE_HMI				98
 
 #ifndef __ASSEMBLY__
 
@@ -170,7 +172,11 @@ enum OpalFreezeState {
 enum OpalEehFreezeActionToken {
 	OPAL_EEH_ACTION_CLEAR_FREEZE_MMIO = 1,
 	OPAL_EEH_ACTION_CLEAR_FREEZE_DMA = 2,
-	OPAL_EEH_ACTION_CLEAR_FREEZE_ALL = 3
+	OPAL_EEH_ACTION_CLEAR_FREEZE_ALL = 3,
+
+	OPAL_EEH_ACTION_SET_FREEZE_MMIO = 1,
+	OPAL_EEH_ACTION_SET_FREEZE_DMA  = 2,
+	OPAL_EEH_ACTION_SET_FREEZE_ALL  = 3
 };
 
 enum OpalPciStatusToken {
@@ -240,6 +246,7 @@ enum OpalMessageType {
 	OPAL_MSG_MEM_ERR,
 	OPAL_MSG_EPOW,
 	OPAL_MSG_SHUTDOWN,
+	OPAL_MSG_HMI_EVT,
 	OPAL_MSG_TYPE_MAX,
 };
 
@@ -340,6 +347,12 @@ enum OpalMveEnableAction {
 	OPAL_ENABLE_MVE = 1
 };
 
+enum OpalM64EnableAction {
+	OPAL_DISABLE_M64 = 0,
+	OPAL_ENABLE_M64_SPLIT = 1,
+	OPAL_ENABLE_M64_NON_SPLIT = 2
+};
+
 enum OpalPciResetScope {
 	OPAL_PHB_COMPLETE = 1, OPAL_PCI_LINK = 2, OPAL_PHB_ERROR = 3,
 	OPAL_PCI_HOT_RESET = 4, OPAL_PCI_FUNDAMENTAL_RESET = 5,
@@ -502,6 +515,50 @@ struct OpalMemoryErrorData {
 	} u;
 };
 
+/* HMI interrupt event */
+enum OpalHMI_Version {
+	OpalHMIEvt_V1 = 1,
+};
+
+enum OpalHMI_Severity {
+	OpalHMI_SEV_NO_ERROR = 0,
+	OpalHMI_SEV_WARNING = 1,
+	OpalHMI_SEV_ERROR_SYNC = 2,
+	OpalHMI_SEV_FATAL = 3,
+};
+
+enum OpalHMI_Disposition {
+	OpalHMI_DISPOSITION_RECOVERED = 0,
+	OpalHMI_DISPOSITION_NOT_RECOVERED = 1,
+};
+
+enum OpalHMI_ErrType {
+	OpalHMI_ERROR_MALFUNC_ALERT	= 0,
+	OpalHMI_ERROR_PROC_RECOV_DONE,
+	OpalHMI_ERROR_PROC_RECOV_DONE_AGAIN,
+	OpalHMI_ERROR_PROC_RECOV_MASKED,
+	OpalHMI_ERROR_TFAC,
+	OpalHMI_ERROR_TFMR_PARITY,
+	OpalHMI_ERROR_HA_OVERFLOW_WARN,
+	OpalHMI_ERROR_XSCOM_FAIL,
+	OpalHMI_ERROR_XSCOM_DONE,
+	OpalHMI_ERROR_SCOM_FIR,
+	OpalHMI_ERROR_DEBUG_TRIG_FIR,
+	OpalHMI_ERROR_HYP_RESOURCE,
+};
+
+struct OpalHMIEvent {
+	uint8_t		version;	/* 0x00 */
+	uint8_t		severity;	/* 0x01 */
+	uint8_t		type;		/* 0x02 */
+	uint8_t		disposition;	/* 0x03 */
+	uint8_t		reserved_1[4];	/* 0x04 */
+
+	__be64		hmer;
+	/* TFMR register. Valid only for TFAC and TFMR_PARITY error type. */
+	__be64		tfmr;
+};
+
 enum {
 	OPAL_P7IOC_DIAG_TYPE_NONE	= 0,
 	OPAL_P7IOC_DIAG_TYPE_RGC	= 1,
@@ -513,40 +570,40 @@ enum {
 };
 
 struct OpalIoP7IOCErrorData {
-	uint16_t type;
+	__be16 type;
 
 	/* GEM */
-	uint64_t gemXfir;
-	uint64_t gemRfir;
-	uint64_t gemRirqfir;
-	uint64_t gemMask;
-	uint64_t gemRwof;
+	__be64 gemXfir;
+	__be64 gemRfir;
+	__be64 gemRirqfir;
+	__be64 gemMask;
+	__be64 gemRwof;
 
 	/* LEM */
-	uint64_t lemFir;
-	uint64_t lemErrMask;
-	uint64_t lemAction0;
-	uint64_t lemAction1;
-	uint64_t lemWof;
+	__be64 lemFir;
+	__be64 lemErrMask;
+	__be64 lemAction0;
+	__be64 lemAction1;
+	__be64 lemWof;
 
 	union {
 		struct OpalIoP7IOCRgcErrorData {
-			uint64_t rgcStatus;		/* 3E1C10 */
-			uint64_t rgcLdcp;		/* 3E1C18 */
+			__be64 rgcStatus;	/* 3E1C10 */
+			__be64 rgcLdcp;		/* 3E1C18 */
 		}rgc;
 		struct OpalIoP7IOCBiErrorData {
-			uint64_t biLdcp0;		/* 3C0100, 3C0118 */
-			uint64_t biLdcp1;		/* 3C0108, 3C0120 */
-			uint64_t biLdcp2;		/* 3C0110, 3C0128 */
-			uint64_t biFenceStatus;		/* 3C0130, 3C0130 */
+			__be64 biLdcp0;		/* 3C0100, 3C0118 */
+			__be64 biLdcp1;		/* 3C0108, 3C0120 */
+			__be64 biLdcp2;		/* 3C0110, 3C0128 */
+			__be64 biFenceStatus;	/* 3C0130, 3C0130 */
 
-			uint8_t  biDownbound;		/* BI Downbound or Upbound */
+			    u8 biDownbound;	/* BI Downbound or Upbound */
 		}bi;
 		struct OpalIoP7IOCCiErrorData {
-			uint64_t ciPortStatus;		/* 3Dn008 */
-			uint64_t ciPortLdcp;		/* 3Dn010 */
+			__be64 ciPortStatus;	/* 3Dn008 */
+			__be64 ciPortLdcp;	/* 3Dn010 */
 
-			uint8_t	 ciPort;		/* Index of CI port: 0/1 */
+			    u8 ciPort;		/* Index of CI port: 0/1 */
 		}ci;
 	};
 };
@@ -578,60 +635,60 @@ struct OpalIoPhbErrorCommon {
 struct OpalIoP7IOCPhbErrorData {
 	struct OpalIoPhbErrorCommon common;
 
-	uint32_t brdgCtl;
+	__be32 brdgCtl;
 
 	// P7IOC utl regs
-	uint32_t portStatusReg;
-	uint32_t rootCmplxStatus;
-	uint32_t busAgentStatus;
+	__be32 portStatusReg;
+	__be32 rootCmplxStatus;
+	__be32 busAgentStatus;
 
 	// P7IOC cfg regs
-	uint32_t deviceStatus;
-	uint32_t slotStatus;
-	uint32_t linkStatus;
-	uint32_t devCmdStatus;
-	uint32_t devSecStatus;
+	__be32 deviceStatus;
+	__be32 slotStatus;
+	__be32 linkStatus;
+	__be32 devCmdStatus;
+	__be32 devSecStatus;
 
 	// cfg AER regs
-	uint32_t rootErrorStatus;
-	uint32_t uncorrErrorStatus;
-	uint32_t corrErrorStatus;
-	uint32_t tlpHdr1;
-	uint32_t tlpHdr2;
-	uint32_t tlpHdr3;
-	uint32_t tlpHdr4;
-	uint32_t sourceId;
+	__be32 rootErrorStatus;
+	__be32 uncorrErrorStatus;
+	__be32 corrErrorStatus;
+	__be32 tlpHdr1;
+	__be32 tlpHdr2;
+	__be32 tlpHdr3;
+	__be32 tlpHdr4;
+	__be32 sourceId;
 
-	uint32_t rsv3;
+	__be32 rsv3;
 
 	// Record data about the call to allocate a buffer.
-	uint64_t errorClass;
-	uint64_t correlator;
+	__be64 errorClass;
+	__be64 correlator;
 
 	//P7IOC MMIO Error Regs
-	uint64_t p7iocPlssr;                // n120
-	uint64_t p7iocCsr;                  // n110
-	uint64_t lemFir;                    // nC00
-	uint64_t lemErrorMask;              // nC18
-	uint64_t lemWOF;                    // nC40
-	uint64_t phbErrorStatus;            // nC80
-	uint64_t phbFirstErrorStatus;       // nC88
-	uint64_t phbErrorLog0;              // nCC0
-	uint64_t phbErrorLog1;              // nCC8
-	uint64_t mmioErrorStatus;           // nD00
-	uint64_t mmioFirstErrorStatus;      // nD08
-	uint64_t mmioErrorLog0;             // nD40
-	uint64_t mmioErrorLog1;             // nD48
-	uint64_t dma0ErrorStatus;           // nD80
-	uint64_t dma0FirstErrorStatus;      // nD88
-	uint64_t dma0ErrorLog0;             // nDC0
-	uint64_t dma0ErrorLog1;             // nDC8
-	uint64_t dma1ErrorStatus;           // nE00
-	uint64_t dma1FirstErrorStatus;      // nE08
-	uint64_t dma1ErrorLog0;             // nE40
-	uint64_t dma1ErrorLog1;             // nE48
-	uint64_t pestA[OPAL_P7IOC_NUM_PEST_REGS];
-	uint64_t pestB[OPAL_P7IOC_NUM_PEST_REGS];
+	__be64 p7iocPlssr;                // n120
+	__be64 p7iocCsr;                  // n110
+	__be64 lemFir;                    // nC00
+	__be64 lemErrorMask;              // nC18
+	__be64 lemWOF;                    // nC40
+	__be64 phbErrorStatus;            // nC80
+	__be64 phbFirstErrorStatus;       // nC88
+	__be64 phbErrorLog0;              // nCC0
+	__be64 phbErrorLog1;              // nCC8
+	__be64 mmioErrorStatus;           // nD00
+	__be64 mmioFirstErrorStatus;      // nD08
+	__be64 mmioErrorLog0;             // nD40
+	__be64 mmioErrorLog1;             // nD48
+	__be64 dma0ErrorStatus;           // nD80
+	__be64 dma0FirstErrorStatus;      // nD88
+	__be64 dma0ErrorLog0;             // nDC0
+	__be64 dma0ErrorLog1;             // nDC8
+	__be64 dma1ErrorStatus;           // nE00
+	__be64 dma1FirstErrorStatus;      // nE08
+	__be64 dma1ErrorLog0;             // nE40
+	__be64 dma1ErrorLog1;             // nE48
+	__be64 pestA[OPAL_P7IOC_NUM_PEST_REGS];
+	__be64 pestB[OPAL_P7IOC_NUM_PEST_REGS];
 };
 
 struct OpalIoPhb3ErrorData {
@@ -758,6 +815,8 @@ int64_t opal_pci_eeh_freeze_status(uint64_t phb_id, uint64_t pe_number,
 				   __be64 *phb_status);
 int64_t opal_pci_eeh_freeze_clear(uint64_t phb_id, uint64_t pe_number,
 				  uint64_t eeh_action_token);
+int64_t opal_pci_eeh_freeze_set(uint64_t phb_id, uint64_t pe_number,
+				uint64_t eeh_action_token);
 int64_t opal_pci_shpc(uint64_t phb_id, uint64_t shpc_action, uint8_t *state);
 
 
@@ -768,7 +827,7 @@ int64_t opal_pci_set_phb_mem_window(uint64_t phb_id, uint16_t window_type,
 				    uint16_t window_num,
 				    uint64_t starting_real_address,
 				    uint64_t starting_pci_address,
-				    uint16_t segment_size);
+				    uint64_t size);
 int64_t opal_pci_map_pe_mmio_window(uint64_t phb_id, uint16_t pe_number,
 				    uint16_t window_type, uint16_t window_num,
 				    uint16_t segment_num);
@@ -860,6 +919,7 @@ int64_t opal_get_param(uint64_t token, uint32_t param_id, uint64_t buffer,
 int64_t opal_set_param(uint64_t token, uint32_t param_id, uint64_t buffer,
 		uint64_t length);
 int64_t opal_sensor_read(uint32_t sensor_hndl, int token, __be32 *sensor_data);
+int64_t opal_handle_hmi(void);
 
 /* Internal functions */
 extern int early_init_dt_scan_opal(unsigned long node, const char *uname,
@@ -902,6 +962,8 @@ extern void opal_msglog_init(void);
 
 extern int opal_machine_check(struct pt_regs *regs);
 extern bool opal_mce_check_early_recovery(struct pt_regs *regs);
+extern int opal_hmi_exception_early(struct pt_regs *regs);
+extern int opal_handle_hmi_exception(struct pt_regs *regs);
 
 extern void opal_shutdown(void);
 extern int opal_resync_timebase(void);
diff --git a/arch/powerpc/include/asm/oprofile_impl.h b/arch/powerpc/include/asm/oprofile_impl.h
index d697b08994c9..61fe5d6f18e1 100644
--- a/arch/powerpc/include/asm/oprofile_impl.h
+++ b/arch/powerpc/include/asm/oprofile_impl.h
@@ -61,7 +61,6 @@ struct op_powerpc_model {
 };
 
 extern struct op_powerpc_model op_model_fsl_emb;
-extern struct op_powerpc_model op_model_rs64;
 extern struct op_powerpc_model op_model_power4;
 extern struct op_powerpc_model op_model_7450;
 extern struct op_powerpc_model op_model_cell;
diff --git a/arch/powerpc/include/asm/paca.h b/arch/powerpc/include/asm/paca.h
index bb0bd25f20d0..a5139ea6910b 100644
--- a/arch/powerpc/include/asm/paca.h
+++ b/arch/powerpc/include/asm/paca.h
@@ -78,10 +78,6 @@ struct paca_struct {
 	u64 kernel_toc;			/* Kernel TOC address */
 	u64 kernelbase;			/* Base address of kernel */
 	u64 kernel_msr;			/* MSR while running in kernel */
-#ifdef CONFIG_PPC_STD_MMU_64
-	u64 stab_real;			/* Absolute address of segment table */
-	u64 stab_addr;			/* Virtual address of segment table */
-#endif /* CONFIG_PPC_STD_MMU_64 */
 	void *emergency_sp;		/* pointer to emergency stack */
 	u64 data_offset;		/* per cpu data offset */
 	s16 hw_cpu_id;			/* Physical processor number */
@@ -171,6 +167,7 @@ struct paca_struct {
 	 * and already using emergency stack.
 	 */
 	u16 in_mce;
+	u8 hmi_event_available;		 /* HMI event is available */
 #endif
 
 	/* Stuff for accurate time accounting */
diff --git a/arch/powerpc/include/asm/perf_event_server.h b/arch/powerpc/include/asm/perf_event_server.h
index b3e936027b26..814622146d5a 100644
--- a/arch/powerpc/include/asm/perf_event_server.h
+++ b/arch/powerpc/include/asm/perf_event_server.h
@@ -19,6 +19,8 @@
 #define MAX_EVENT_ALTERNATIVES	8
 #define MAX_LIMITED_HWCOUNTERS	2
 
+struct perf_event;
+
 /*
  * This struct provides the constants and functions needed to
  * describe the PMU on a particular POWER-family CPU.
@@ -30,7 +32,8 @@ struct power_pmu {
 	unsigned long	add_fields;
 	unsigned long	test_adder;
 	int		(*compute_mmcr)(u64 events[], int n_ev,
-				unsigned int hwc[], unsigned long mmcr[]);
+				unsigned int hwc[], unsigned long mmcr[],
+				struct perf_event *pevents[]);
 	int		(*get_constraint)(u64 event_id, unsigned long *mskp,
 				unsigned long *valp);
 	int		(*get_alternatives)(u64 event_id, unsigned int flags,
diff --git a/arch/powerpc/include/asm/ppc-opcode.h b/arch/powerpc/include/asm/ppc-opcode.h
index 3132bb9365f3..e316dad6ba76 100644
--- a/arch/powerpc/include/asm/ppc-opcode.h
+++ b/arch/powerpc/include/asm/ppc-opcode.h
@@ -150,8 +150,10 @@
 #define PPC_INST_MCRXR_MASK		0xfc0007fe
 #define PPC_INST_MFSPR_PVR		0x7c1f42a6
 #define PPC_INST_MFSPR_PVR_MASK		0xfc1fffff
+#define PPC_INST_MFTMR			0x7c0002dc
 #define PPC_INST_MSGSND			0x7c00019c
 #define PPC_INST_MSGSNDP		0x7c00011c
+#define PPC_INST_MTTMR			0x7c0003dc
 #define PPC_INST_NOP			0x60000000
 #define PPC_INST_POPCNTB		0x7c0000f4
 #define PPC_INST_POPCNTB_MASK		0xfc0007fe
@@ -369,4 +371,11 @@
 #define TABORT(r)		stringify_in_c(.long PPC_INST_TABORT \
 					       | __PPC_RA(r))
 
+/* book3e thread control instructions */
+#define TMRN(x)			((((x) & 0x1f) << 16) | (((x) & 0x3e0) << 6))
+#define MTTMR(tmr, r)		stringify_in_c(.long PPC_INST_MTTMR | \
+					       TMRN(tmr) | ___PPC_RS(r))
+#define MFTMR(tmr, r)		stringify_in_c(.long PPC_INST_MFTMR | \
+					       TMRN(tmr) | ___PPC_RT(r))
+
 #endif /* _ASM_POWERPC_PPC_OPCODE_H */
diff --git a/arch/powerpc/include/asm/pte-fsl-booke.h b/arch/powerpc/include/asm/pte-fsl-booke.h
index 2c12be5f677a..e84dd7ed505e 100644
--- a/arch/powerpc/include/asm/pte-fsl-booke.h
+++ b/arch/powerpc/include/asm/pte-fsl-booke.h
@@ -37,5 +37,7 @@
 #define _PMD_PRESENT_MASK (PAGE_MASK)
 #define _PMD_BAD	(~PAGE_MASK)
 
+#define PTE_WIMGE_SHIFT (6)
+
 #endif /* __KERNEL__ */
 #endif /*  _ASM_POWERPC_PTE_FSL_BOOKE_H */
diff --git a/arch/powerpc/include/asm/pte-hash64-64k.h b/arch/powerpc/include/asm/pte-hash64-64k.h
index d836d945068d..b6d2d42f84b5 100644
--- a/arch/powerpc/include/asm/pte-hash64-64k.h
+++ b/arch/powerpc/include/asm/pte-hash64-64k.h
@@ -75,7 +75,8 @@
 	(((pte) & _PAGE_COMBO)? MMU_PAGE_4K: MMU_PAGE_64K)
 
 #define remap_4k_pfn(vma, addr, pfn, prot)				\
-	remap_pfn_range((vma), (addr), (pfn), PAGE_SIZE,		\
-			__pgprot(pgprot_val((prot)) | _PAGE_4K_PFN))
+	(WARN_ON(((pfn) >= (1UL << (64 - PTE_RPN_SHIFT)))) ? -EINVAL :	\
+		remap_pfn_range((vma), (addr), (pfn), PAGE_SIZE,	\
+			__pgprot(pgprot_val((prot)) | _PAGE_4K_PFN)))
 
 #endif	/* __ASSEMBLY__ */
diff --git a/arch/powerpc/include/asm/reg.h b/arch/powerpc/include/asm/reg.h
index bffd89d27301..f7b97b895708 100644
--- a/arch/powerpc/include/asm/reg.h
+++ b/arch/powerpc/include/asm/reg.h
@@ -254,7 +254,7 @@
 #define   DSISR_PROTFAULT	0x08000000	/* protection fault */
 #define   DSISR_ISSTORE		0x02000000	/* access was a store */
 #define   DSISR_DABRMATCH	0x00400000	/* hit data breakpoint */
-#define   DSISR_NOSEGMENT	0x00200000	/* STAB/SLB miss */
+#define   DSISR_NOSEGMENT	0x00200000	/* SLB miss */
 #define   DSISR_KEYFAULT	0x00200000	/* Key fault */
 #define SPRN_TBRL	0x10C	/* Time Base Read Lower Register (user, R/O) */
 #define SPRN_TBRU	0x10D	/* Time Base Read Upper Register (user, R/O) */
diff --git a/arch/powerpc/include/asm/reg_booke.h b/arch/powerpc/include/asm/reg_booke.h
index 464f1089b532..1d653308a33c 100644
--- a/arch/powerpc/include/asm/reg_booke.h
+++ b/arch/powerpc/include/asm/reg_booke.h
@@ -15,16 +15,28 @@
 #ifndef __ASM_POWERPC_REG_BOOKE_H__
 #define __ASM_POWERPC_REG_BOOKE_H__
 
+#include <asm/ppc-opcode.h>
+
 /* Machine State Register (MSR) Fields */
-#define MSR_GS		(1<<28) /* Guest state */
-#define MSR_UCLE	(1<<26)	/* User-mode cache lock enable */
-#define MSR_SPE		(1<<25)	/* Enable SPE */
-#define MSR_DWE		(1<<10)	/* Debug Wait Enable */
-#define MSR_UBLE	(1<<10)	/* BTB lock enable (e500) */
-#define MSR_IS		MSR_IR	/* Instruction Space */
-#define MSR_DS		MSR_DR	/* Data Space */
-#define MSR_PMM		(1<<2)	/* Performance monitor mark bit */
-#define MSR_CM		(1<<31) /* Computation Mode (0=32-bit, 1=64-bit) */
+#define MSR_GS_LG	28	/* Guest state */
+#define MSR_UCLE_LG	26	/* User-mode cache lock enable */
+#define MSR_SPE_LG	25	/* Enable SPE */
+#define MSR_DWE_LG	10	/* Debug Wait Enable */
+#define MSR_UBLE_LG	10	/* BTB lock enable (e500) */
+#define MSR_IS_LG	MSR_IR_LG /* Instruction Space */
+#define MSR_DS_LG	MSR_DR_LG /* Data Space */
+#define MSR_PMM_LG	2	/* Performance monitor mark bit */
+#define MSR_CM_LG	31	/* Computation Mode (0=32-bit, 1=64-bit) */
+
+#define MSR_GS		__MASK(MSR_GS_LG)
+#define MSR_UCLE	__MASK(MSR_UCLE_LG)
+#define MSR_SPE		__MASK(MSR_SPE_LG)
+#define MSR_DWE		__MASK(MSR_DWE_LG)
+#define MSR_UBLE	__MASK(MSR_UBLE_LG)
+#define MSR_IS		__MASK(MSR_IS_LG)
+#define MSR_DS		__MASK(MSR_DS_LG)
+#define MSR_PMM		__MASK(MSR_PMM_LG)
+#define MSR_CM		__MASK(MSR_CM_LG)
 
 #if defined(CONFIG_PPC_BOOK3E_64)
 #define MSR_64BIT	MSR_CM
@@ -260,7 +272,7 @@
 
 /* e500mc */
 #define MCSR_DCPERR_MC	0x20000000UL /* D-Cache Parity Error */
-#define MCSR_L2MMU_MHIT	0x04000000UL /* Hit on multiple TLB entries */
+#define MCSR_L2MMU_MHIT	0x08000000UL /* Hit on multiple TLB entries */
 #define MCSR_NMI	0x00100000UL /* Non-Maskable Interrupt */
 #define MCSR_MAV	0x00080000UL /* MCAR address valid */
 #define MCSR_MEA	0x00040000UL /* MCAR is effective address */
@@ -598,6 +610,13 @@
 /* Bit definitions for L1CSR2. */
 #define L1CSR2_DCWS	0x40000000	/* Data Cache write shadow */
 
+/* Bit definitions for BUCSR. */
+#define BUCSR_STAC_EN	0x01000000	/* Segment Target Address Cache */
+#define BUCSR_LS_EN	0x00400000	/* Link Stack */
+#define BUCSR_BBFI	0x00000200	/* Branch Buffer flash invalidate */
+#define BUCSR_BPEN	0x00000001	/* Branch prediction enable */
+#define BUCSR_INIT	(BUCSR_STAC_EN | BUCSR_LS_EN | BUCSR_BBFI | BUCSR_BPEN)
+
 /* Bit definitions for L2CSR0. */
 #define L2CSR0_L2E	0x80000000	/* L2 Cache Enable */
 #define L2CSR0_L2PE	0x40000000	/* L2 Cache Parity/ECC Enable */
@@ -721,5 +740,23 @@
 #define MMUBE1_VBE4		0x00000002
 #define MMUBE1_VBE5		0x00000001
 
+#define TMRN_IMSR0	0x120	/* Initial MSR Register 0 (e6500) */
+#define TMRN_IMSR1	0x121	/* Initial MSR Register 1 (e6500) */
+#define TMRN_INIA0	0x140	/* Next Instruction Address Register 0 */
+#define TMRN_INIA1	0x141	/* Next Instruction Address Register 1 */
+#define SPRN_TENSR	0x1b5	/* Thread Enable Status Register */
+#define SPRN_TENS	0x1b6	/* Thread Enable Set Register */
+#define SPRN_TENC	0x1b7	/* Thread Enable Clear Register */
+
+#define TEN_THREAD(x)	(1 << (x))
+
+#ifndef __ASSEMBLY__
+#define mftmr(rn)	({unsigned long rval; \
+			asm volatile(MFTMR(rn, %0) : "=r" (rval)); rval;})
+#define mttmr(rn, v)	asm volatile(MTTMR(rn, %0) : \
+				     : "r" ((unsigned long)(v)) \
+				     : "memory")
+#endif /* !__ASSEMBLY__ */
+
 #endif /* __ASM_POWERPC_REG_BOOKE_H__ */
 #endif /* __KERNEL__ */
diff --git a/arch/powerpc/include/asm/systbl.h b/arch/powerpc/include/asm/systbl.h
index babbeca6850f..542bc0f0673f 100644
--- a/arch/powerpc/include/asm/systbl.h
+++ b/arch/powerpc/include/asm/systbl.h
@@ -77,10 +77,10 @@ SYSCALL_SPU(setreuid)
 SYSCALL_SPU(setregid)
 #define compat_sys_sigsuspend sys_sigsuspend
 SYS32ONLY(sigsuspend)
-COMPAT_SYS(sigpending)
+SYSX(sys_ni_syscall,compat_sys_sigpending,sys_sigpending)
 SYSCALL_SPU(sethostname)
 COMPAT_SYS_SPU(setrlimit)
-COMPAT_SYS(old_getrlimit)
+SYSX(sys_ni_syscall,compat_sys_old_getrlimit,sys_old_getrlimit)
 COMPAT_SYS_SPU(getrusage)
 COMPAT_SYS_SPU(gettimeofday)
 COMPAT_SYS_SPU(settimeofday)
diff --git a/arch/powerpc/include/asm/trace.h b/arch/powerpc/include/asm/trace.h
index 5712f06905a9..c15da6073cb8 100644
--- a/arch/powerpc/include/asm/trace.h
+++ b/arch/powerpc/include/asm/trace.h
@@ -99,6 +99,51 @@ TRACE_EVENT_FN(hcall_exit,
 );
 #endif
 
+#ifdef CONFIG_PPC_POWERNV
+extern void opal_tracepoint_regfunc(void);
+extern void opal_tracepoint_unregfunc(void);
+
+TRACE_EVENT_FN(opal_entry,
+
+	TP_PROTO(unsigned long opcode, unsigned long *args),
+
+	TP_ARGS(opcode, args),
+
+	TP_STRUCT__entry(
+		__field(unsigned long, opcode)
+	),
+
+	TP_fast_assign(
+		__entry->opcode = opcode;
+	),
+
+	TP_printk("opcode=%lu", __entry->opcode),
+
+	opal_tracepoint_regfunc, opal_tracepoint_unregfunc
+);
+
+TRACE_EVENT_FN(opal_exit,
+
+	TP_PROTO(unsigned long opcode, unsigned long retval),
+
+	TP_ARGS(opcode, retval),
+
+	TP_STRUCT__entry(
+		__field(unsigned long, opcode)
+		__field(unsigned long, retval)
+	),
+
+	TP_fast_assign(
+		__entry->opcode = opcode;
+		__entry->retval = retval;
+	),
+
+	TP_printk("opcode=%lu retval=%lu", __entry->opcode, __entry->retval),
+
+	opal_tracepoint_regfunc, opal_tracepoint_unregfunc
+);
+#endif
+
 #endif /* _TRACE_POWERPC_H */
 
 #undef TRACE_INCLUDE_PATH
diff --git a/arch/powerpc/kernel/asm-offsets.c b/arch/powerpc/kernel/asm-offsets.c
index f5995a912213..e35054054c32 100644
--- a/arch/powerpc/kernel/asm-offsets.c
+++ b/arch/powerpc/kernel/asm-offsets.c
@@ -216,8 +216,6 @@ int main(void)
 #endif /* CONFIG_PPC_BOOK3E */
 
 #ifdef CONFIG_PPC_STD_MMU_64
-	DEFINE(PACASTABREAL, offsetof(struct paca_struct, stab_real));
-	DEFINE(PACASTABVIRT, offsetof(struct paca_struct, stab_addr));
 	DEFINE(PACASLBCACHE, offsetof(struct paca_struct, slb_cache));
 	DEFINE(PACASLBCACHEPTR, offsetof(struct paca_struct, slb_cache_ptr));
 	DEFINE(PACAVMALLOCSLLP, offsetof(struct paca_struct, vmalloc_sllp));
diff --git a/arch/powerpc/kernel/cputable.c b/arch/powerpc/kernel/cputable.c
index 0c157642c2a1..9b6dcaaec1a3 100644
--- a/arch/powerpc/kernel/cputable.c
+++ b/arch/powerpc/kernel/cputable.c
@@ -123,96 +123,6 @@ extern void __restore_cpu_e6500(void);
 
 static struct cpu_spec __initdata cpu_specs[] = {
 #ifdef CONFIG_PPC_BOOK3S_64
-	{	/* Power3 */
-		.pvr_mask		= 0xffff0000,
-		.pvr_value		= 0x00400000,
-		.cpu_name		= "POWER3 (630)",
-		.cpu_features		= CPU_FTRS_POWER3,
-		.cpu_user_features	= COMMON_USER_PPC64|PPC_FEATURE_PPC_LE,
-		.mmu_features		= MMU_FTR_HPTE_TABLE,
-		.icache_bsize		= 128,
-		.dcache_bsize		= 128,
-		.num_pmcs		= 8,
-		.pmc_type		= PPC_PMC_IBM,
-		.oprofile_cpu_type	= "ppc64/power3",
-		.oprofile_type		= PPC_OPROFILE_RS64,
-		.platform		= "power3",
-	},
-	{	/* Power3+ */
-		.pvr_mask		= 0xffff0000,
-		.pvr_value		= 0x00410000,
-		.cpu_name		= "POWER3 (630+)",
-		.cpu_features		= CPU_FTRS_POWER3,
-		.cpu_user_features	= COMMON_USER_PPC64|PPC_FEATURE_PPC_LE,
-		.mmu_features		= MMU_FTR_HPTE_TABLE,
-		.icache_bsize		= 128,
-		.dcache_bsize		= 128,
-		.num_pmcs		= 8,
-		.pmc_type		= PPC_PMC_IBM,
-		.oprofile_cpu_type	= "ppc64/power3",
-		.oprofile_type		= PPC_OPROFILE_RS64,
-		.platform		= "power3",
-	},
-	{	/* Northstar */
-		.pvr_mask		= 0xffff0000,
-		.pvr_value		= 0x00330000,
-		.cpu_name		= "RS64-II (northstar)",
-		.cpu_features		= CPU_FTRS_RS64,
-		.cpu_user_features	= COMMON_USER_PPC64,
-		.mmu_features		= MMU_FTR_HPTE_TABLE,
-		.icache_bsize		= 128,
-		.dcache_bsize		= 128,
-		.num_pmcs		= 8,
-		.pmc_type		= PPC_PMC_IBM,
-		.oprofile_cpu_type	= "ppc64/rs64",
-		.oprofile_type		= PPC_OPROFILE_RS64,
-		.platform		= "rs64",
-	},
-	{	/* Pulsar */
-		.pvr_mask		= 0xffff0000,
-		.pvr_value		= 0x00340000,
-		.cpu_name		= "RS64-III (pulsar)",
-		.cpu_features		= CPU_FTRS_RS64,
-		.cpu_user_features	= COMMON_USER_PPC64,
-		.mmu_features		= MMU_FTR_HPTE_TABLE,
-		.icache_bsize		= 128,
-		.dcache_bsize		= 128,
-		.num_pmcs		= 8,
-		.pmc_type		= PPC_PMC_IBM,
-		.oprofile_cpu_type	= "ppc64/rs64",
-		.oprofile_type		= PPC_OPROFILE_RS64,
-		.platform		= "rs64",
-	},
-	{	/* I-star */
-		.pvr_mask		= 0xffff0000,
-		.pvr_value		= 0x00360000,
-		.cpu_name		= "RS64-III (icestar)",
-		.cpu_features		= CPU_FTRS_RS64,
-		.cpu_user_features	= COMMON_USER_PPC64,
-		.mmu_features		= MMU_FTR_HPTE_TABLE,
-		.icache_bsize		= 128,
-		.dcache_bsize		= 128,
-		.num_pmcs		= 8,
-		.pmc_type		= PPC_PMC_IBM,
-		.oprofile_cpu_type	= "ppc64/rs64",
-		.oprofile_type		= PPC_OPROFILE_RS64,
-		.platform		= "rs64",
-	},
-	{	/* S-star */
-		.pvr_mask		= 0xffff0000,
-		.pvr_value		= 0x00370000,
-		.cpu_name		= "RS64-IV (sstar)",
-		.cpu_features		= CPU_FTRS_RS64,
-		.cpu_user_features	= COMMON_USER_PPC64,
-		.mmu_features		= MMU_FTR_HPTE_TABLE,
-		.icache_bsize		= 128,
-		.dcache_bsize		= 128,
-		.num_pmcs		= 8,
-		.pmc_type		= PPC_PMC_IBM,
-		.oprofile_cpu_type	= "ppc64/rs64",
-		.oprofile_type		= PPC_OPROFILE_RS64,
-		.platform		= "rs64",
-	},
 	{	/* Power4 */
 		.pvr_mask		= 0xffff0000,
 		.pvr_value		= 0x00350000,
@@ -617,7 +527,7 @@ static struct cpu_spec __initdata cpu_specs[] = {
 #endif	/* CONFIG_PPC_BOOK3S_64 */
 
 #ifdef CONFIG_PPC32
-#if CLASSIC_PPC
+#ifdef CONFIG_PPC_BOOK3S_32
 	{	/* 601 */
 		.pvr_mask		= 0xffff0000,
 		.pvr_value		= 0x00010000,
@@ -1257,7 +1167,7 @@ static struct cpu_spec __initdata cpu_specs[] = {
 		.machine_check		= machine_check_generic,
 		.platform		= "ppc603",
 	},
-#endif /* CLASSIC_PPC */
+#endif /* CONFIG_PPC_BOOK3S_32 */
 #ifdef CONFIG_8xx
 	{	/* 8xx */
 		.pvr_mask		= 0xffff0000,
diff --git a/arch/powerpc/kernel/eeh.c b/arch/powerpc/kernel/eeh.c
index 86e25702aaca..59a64f8dc85f 100644
--- a/arch/powerpc/kernel/eeh.c
+++ b/arch/powerpc/kernel/eeh.c
@@ -27,6 +27,7 @@
 #include <linux/init.h>
 #include <linux/list.h>
 #include <linux/pci.h>
+#include <linux/iommu.h>
 #include <linux/proc_fs.h>
 #include <linux/rbtree.h>
 #include <linux/reboot.h>
@@ -40,6 +41,7 @@
 #include <asm/eeh.h>
 #include <asm/eeh_event.h>
 #include <asm/io.h>
+#include <asm/iommu.h>
 #include <asm/machdep.h>
 #include <asm/ppc-pci.h>
 #include <asm/rtas.h>
@@ -108,6 +110,9 @@ struct eeh_ops *eeh_ops = NULL;
 /* Lock to avoid races due to multiple reports of an error */
 DEFINE_RAW_SPINLOCK(confirm_error_lock);
 
+/* Lock to protect passed flags */
+static DEFINE_MUTEX(eeh_dev_mutex);
+
 /* Buffer for reporting pci register dumps. Its here in BSS, and
  * not dynamically alloced, so that it ends up in RMO where RTAS
  * can access it.
@@ -137,7 +142,7 @@ static struct eeh_stats eeh_stats;
 static int __init eeh_setup(char *str)
 {
 	if (!strcmp(str, "off"))
-		eeh_subsystem_flags |= EEH_FORCE_DISABLED;
+		eeh_add_flag(EEH_FORCE_DISABLED);
 
 	return 1;
 }
@@ -152,12 +157,13 @@ __setup("eeh=", eeh_setup);
  * This routine captures assorted PCI configuration space data,
  * and puts them into a buffer for RTAS error logging.
  */
-static size_t eeh_gather_pci_data(struct eeh_dev *edev, char * buf, size_t len)
+static size_t eeh_gather_pci_data(struct eeh_dev *edev, char *buf, size_t len)
 {
 	struct device_node *dn = eeh_dev_to_of_node(edev);
 	u32 cfg;
 	int cap, i;
-	int n = 0;
+	int n = 0, l = 0;
+	char buffer[128];
 
 	n += scnprintf(buf+n, len-n, "%s\n", dn->full_name);
 	pr_warn("EEH: of node=%s\n", dn->full_name);
@@ -202,8 +208,22 @@ static size_t eeh_gather_pci_data(struct eeh_dev *edev, char * buf, size_t len)
 		for (i=0; i<=8; i++) {
 			eeh_ops->read_config(dn, cap+4*i, 4, &cfg);
 			n += scnprintf(buf+n, len-n, "%02x:%x\n", 4*i, cfg);
-			pr_warn("EEH: PCI-E %02x: %08x\n", i, cfg);
+
+			if ((i % 4) == 0) {
+				if (i != 0)
+					pr_warn("%s\n", buffer);
+
+				l = scnprintf(buffer, sizeof(buffer),
+					      "EEH: PCI-E %02x: %08x ",
+					      4*i, cfg);
+			} else {
+				l += scnprintf(buffer+l, sizeof(buffer)-l,
+					       "%08x ", cfg);
+			}
+
 		}
+
+		pr_warn("%s\n", buffer);
 	}
 
 	/* If AER capable, dump it */
@@ -212,11 +232,24 @@ static size_t eeh_gather_pci_data(struct eeh_dev *edev, char * buf, size_t len)
 		n += scnprintf(buf+n, len-n, "pci-e AER:\n");
 		pr_warn("EEH: PCI-E AER capability register set follows:\n");
 
-		for (i=0; i<14; i++) {
+		for (i=0; i<=13; i++) {
 			eeh_ops->read_config(dn, cap+4*i, 4, &cfg);
 			n += scnprintf(buf+n, len-n, "%02x:%x\n", 4*i, cfg);
-			pr_warn("EEH: PCI-E AER %02x: %08x\n", i, cfg);
+
+			if ((i % 4) == 0) {
+				if (i != 0)
+					pr_warn("%s\n", buffer);
+
+				l = scnprintf(buffer, sizeof(buffer),
+					      "EEH: PCI-E AER %02x: %08x ",
+					      4*i, cfg);
+			} else {
+				l += scnprintf(buffer+l, sizeof(buffer)-l,
+					       "%08x ", cfg);
+			}
 		}
+
+		pr_warn("%s\n", buffer);
 	}
 
 	return n;
@@ -247,7 +280,7 @@ void eeh_slot_error_detail(struct eeh_pe *pe, int severity)
 	 * 0xFF's is always returned from PCI config space.
 	 */
 	if (!(pe->type & EEH_PE_PHB)) {
-		if (eeh_probe_mode_devtree())
+		if (eeh_has_flag(EEH_ENABLE_IO_FOR_LOG))
 			eeh_pci_enable(pe, EEH_OPT_THAW_MMIO);
 		eeh_ops->configure_bridge(pe);
 		eeh_pe_restore_bars(pe);
@@ -298,14 +331,14 @@ static int eeh_phb_check_failure(struct eeh_pe *pe)
 	unsigned long flags;
 	int ret;
 
-	if (!eeh_probe_mode_dev())
+	if (!eeh_has_flag(EEH_PROBE_MODE_DEV))
 		return -EPERM;
 
 	/* Find the PHB PE */
 	phb_pe = eeh_phb_pe_get(pe->phb);
 	if (!phb_pe) {
-		pr_warning("%s Can't find PE for PHB#%d\n",
-			   __func__, pe->phb->global_number);
+		pr_warn("%s Can't find PE for PHB#%d\n",
+			__func__, pe->phb->global_number);
 		return -EEXIST;
 	}
 
@@ -400,6 +433,14 @@ int eeh_dev_check_failure(struct eeh_dev *edev)
 	if (ret > 0)
 		return ret;
 
+	/*
+	 * If the PE isn't owned by us, we shouldn't check the
+	 * state. Instead, let the owner handle it if the PE has
+	 * been frozen.
+	 */
+	if (eeh_pe_passed(pe))
+		return 0;
+
 	/* If we already have a pending isolation event for this
 	 * slot, we know it's bad already, we don't need to check.
 	 * Do this checking under a lock; as multiple PCI devices
@@ -746,13 +787,13 @@ void eeh_save_bars(struct eeh_dev *edev)
 int __init eeh_ops_register(struct eeh_ops *ops)
 {
 	if (!ops->name) {
-		pr_warning("%s: Invalid EEH ops name for %p\n",
+		pr_warn("%s: Invalid EEH ops name for %p\n",
 			__func__, ops);
 		return -EINVAL;
 	}
 
 	if (eeh_ops && eeh_ops != ops) {
-		pr_warning("%s: EEH ops of platform %s already existing (%s)\n",
+		pr_warn("%s: EEH ops of platform %s already existing (%s)\n",
 			__func__, eeh_ops->name, ops->name);
 		return -EEXIST;
 	}
@@ -772,7 +813,7 @@ int __init eeh_ops_register(struct eeh_ops *ops)
 int __exit eeh_ops_unregister(const char *name)
 {
 	if (!name || !strlen(name)) {
-		pr_warning("%s: Invalid EEH ops name\n",
+		pr_warn("%s: Invalid EEH ops name\n",
 			__func__);
 		return -EINVAL;
 	}
@@ -788,7 +829,7 @@ int __exit eeh_ops_unregister(const char *name)
 static int eeh_reboot_notifier(struct notifier_block *nb,
 			       unsigned long action, void *unused)
 {
-	eeh_set_enable(false);
+	eeh_clear_flag(EEH_ENABLED);
 	return NOTIFY_DONE;
 }
 
@@ -837,11 +878,11 @@ int eeh_init(void)
 
 	/* call platform initialization function */
 	if (!eeh_ops) {
-		pr_warning("%s: Platform EEH operation not found\n",
+		pr_warn("%s: Platform EEH operation not found\n",
 			__func__);
 		return -EEXIST;
 	} else if ((ret = eeh_ops->init())) {
-		pr_warning("%s: Failed to call platform init function (%d)\n",
+		pr_warn("%s: Failed to call platform init function (%d)\n",
 			__func__, ret);
 		return ret;
 	}
@@ -852,13 +893,13 @@ int eeh_init(void)
 		return ret;
 
 	/* Enable EEH for all adapters */
-	if (eeh_probe_mode_devtree()) {
+	if (eeh_has_flag(EEH_PROBE_MODE_DEVTREE)) {
 		list_for_each_entry_safe(hose, tmp,
 			&hose_list, list_node) {
 			phb = hose->dn;
 			traverse_pci_devices(phb, eeh_ops->of_probe, NULL);
 		}
-	} else if (eeh_probe_mode_dev()) {
+	} else if (eeh_has_flag(EEH_PROBE_MODE_DEV)) {
 		list_for_each_entry_safe(hose, tmp,
 			&hose_list, list_node)
 			pci_walk_bus(hose->bus, eeh_ops->dev_probe, NULL);
@@ -882,7 +923,7 @@ int eeh_init(void)
 	if (eeh_enabled())
 		pr_info("EEH: PCI Enhanced I/O Error Handling Enabled\n");
 	else
-		pr_warning("EEH: No capable adapters found\n");
+		pr_warn("EEH: No capable adapters found\n");
 
 	return ret;
 }
@@ -910,7 +951,7 @@ void eeh_add_device_early(struct device_node *dn)
 	 * would delay the probe until late stage because
 	 * the PCI device isn't available this moment.
 	 */
-	if (!eeh_probe_mode_devtree())
+	if (!eeh_has_flag(EEH_PROBE_MODE_DEVTREE))
 		return;
 
 	if (!of_node_to_eeh_dev(dn))
@@ -996,7 +1037,7 @@ void eeh_add_device_late(struct pci_dev *dev)
 	 * We have to do the EEH probe here because the PCI device
 	 * hasn't been created yet in the early stage.
 	 */
-	if (eeh_probe_mode_dev())
+	if (eeh_has_flag(EEH_PROBE_MODE_DEV))
 		eeh_ops->dev_probe(dev, NULL);
 
 	eeh_addr_cache_insert_dev(dev);
@@ -1100,6 +1141,285 @@ void eeh_remove_device(struct pci_dev *dev)
 	edev->mode &= ~EEH_DEV_SYSFS;
 }
 
+/**
+ * eeh_dev_open - Increase count of pass through devices for PE
+ * @pdev: PCI device
+ *
+ * Increase count of passed through devices for the indicated
+ * PE. In the result, the EEH errors detected on the PE won't be
+ * reported. The PE owner will be responsible for detection
+ * and recovery.
+ */
+int eeh_dev_open(struct pci_dev *pdev)
+{
+	struct eeh_dev *edev;
+
+	mutex_lock(&eeh_dev_mutex);
+
+	/* No PCI device ? */
+	if (!pdev)
+		goto out;
+
+	/* No EEH device or PE ? */
+	edev = pci_dev_to_eeh_dev(pdev);
+	if (!edev || !edev->pe)
+		goto out;
+
+	/* Increase PE's pass through count */
+	atomic_inc(&edev->pe->pass_dev_cnt);
+	mutex_unlock(&eeh_dev_mutex);
+
+	return 0;
+out:
+	mutex_unlock(&eeh_dev_mutex);
+	return -ENODEV;
+}
+EXPORT_SYMBOL_GPL(eeh_dev_open);
+
+/**
+ * eeh_dev_release - Decrease count of pass through devices for PE
+ * @pdev: PCI device
+ *
+ * Decrease count of pass through devices for the indicated PE. If
+ * there is no passed through device in PE, the EEH errors detected
+ * on the PE will be reported and handled as usual.
+ */
+void eeh_dev_release(struct pci_dev *pdev)
+{
+	struct eeh_dev *edev;
+
+	mutex_lock(&eeh_dev_mutex);
+
+	/* No PCI device ? */
+	if (!pdev)
+		goto out;
+
+	/* No EEH device ? */
+	edev = pci_dev_to_eeh_dev(pdev);
+	if (!edev || !edev->pe || !eeh_pe_passed(edev->pe))
+		goto out;
+
+	/* Decrease PE's pass through count */
+	atomic_dec(&edev->pe->pass_dev_cnt);
+	WARN_ON(atomic_read(&edev->pe->pass_dev_cnt) < 0);
+out:
+	mutex_unlock(&eeh_dev_mutex);
+}
+EXPORT_SYMBOL(eeh_dev_release);
+
+#ifdef CONFIG_IOMMU_API
+
+static int dev_has_iommu_table(struct device *dev, void *data)
+{
+	struct pci_dev *pdev = to_pci_dev(dev);
+	struct pci_dev **ppdev = data;
+	struct iommu_table *tbl;
+
+	if (!dev)
+		return 0;
+
+	tbl = get_iommu_table_base(dev);
+	if (tbl && tbl->it_group) {
+		*ppdev = pdev;
+		return 1;
+	}
+
+	return 0;
+}
+
+/**
+ * eeh_iommu_group_to_pe - Convert IOMMU group to EEH PE
+ * @group: IOMMU group
+ *
+ * The routine is called to convert IOMMU group to EEH PE.
+ */
+struct eeh_pe *eeh_iommu_group_to_pe(struct iommu_group *group)
+{
+	struct pci_dev *pdev = NULL;
+	struct eeh_dev *edev;
+	int ret;
+
+	/* No IOMMU group ? */
+	if (!group)
+		return NULL;
+
+	ret = iommu_group_for_each_dev(group, &pdev, dev_has_iommu_table);
+	if (!ret || !pdev)
+		return NULL;
+
+	/* No EEH device or PE ? */
+	edev = pci_dev_to_eeh_dev(pdev);
+	if (!edev || !edev->pe)
+		return NULL;
+
+	return edev->pe;
+}
+EXPORT_SYMBOL_GPL(eeh_iommu_group_to_pe);
+
+#endif /* CONFIG_IOMMU_API */
+
+/**
+ * eeh_pe_set_option - Set options for the indicated PE
+ * @pe: EEH PE
+ * @option: requested option
+ *
+ * The routine is called to enable or disable EEH functionality
+ * on the indicated PE, to enable IO or DMA for the frozen PE.
+ */
+int eeh_pe_set_option(struct eeh_pe *pe, int option)
+{
+	int ret = 0;
+
+	/* Invalid PE ? */
+	if (!pe)
+		return -ENODEV;
+
+	/*
+	 * EEH functionality could possibly be disabled, just
+	 * return error for the case. And the EEH functinality
+	 * isn't expected to be disabled on one specific PE.
+	 */
+	switch (option) {
+	case EEH_OPT_ENABLE:
+		if (eeh_enabled())
+			break;
+		ret = -EIO;
+		break;
+	case EEH_OPT_DISABLE:
+		break;
+	case EEH_OPT_THAW_MMIO:
+	case EEH_OPT_THAW_DMA:
+		if (!eeh_ops || !eeh_ops->set_option) {
+			ret = -ENOENT;
+			break;
+		}
+
+		ret = eeh_ops->set_option(pe, option);
+		break;
+	default:
+		pr_debug("%s: Option %d out of range (%d, %d)\n",
+			__func__, option, EEH_OPT_DISABLE, EEH_OPT_THAW_DMA);
+		ret = -EINVAL;
+	}
+
+	return ret;
+}
+EXPORT_SYMBOL_GPL(eeh_pe_set_option);
+
+/**
+ * eeh_pe_get_state - Retrieve PE's state
+ * @pe: EEH PE
+ *
+ * Retrieve the PE's state, which includes 3 aspects: enabled
+ * DMA, enabled IO and asserted reset.
+ */
+int eeh_pe_get_state(struct eeh_pe *pe)
+{
+	int result, ret = 0;
+	bool rst_active, dma_en, mmio_en;
+
+	/* Existing PE ? */
+	if (!pe)
+		return -ENODEV;
+
+	if (!eeh_ops || !eeh_ops->get_state)
+		return -ENOENT;
+
+	result = eeh_ops->get_state(pe, NULL);
+	rst_active = !!(result & EEH_STATE_RESET_ACTIVE);
+	dma_en = !!(result & EEH_STATE_DMA_ENABLED);
+	mmio_en = !!(result & EEH_STATE_MMIO_ENABLED);
+
+	if (rst_active)
+		ret = EEH_PE_STATE_RESET;
+	else if (dma_en && mmio_en)
+		ret = EEH_PE_STATE_NORMAL;
+	else if (!dma_en && !mmio_en)
+		ret = EEH_PE_STATE_STOPPED_IO_DMA;
+	else if (!dma_en && mmio_en)
+		ret = EEH_PE_STATE_STOPPED_DMA;
+	else
+		ret = EEH_PE_STATE_UNAVAIL;
+
+	return ret;
+}
+EXPORT_SYMBOL_GPL(eeh_pe_get_state);
+
+/**
+ * eeh_pe_reset - Issue PE reset according to specified type
+ * @pe: EEH PE
+ * @option: reset type
+ *
+ * The routine is called to reset the specified PE with the
+ * indicated type, either fundamental reset or hot reset.
+ * PE reset is the most important part for error recovery.
+ */
+int eeh_pe_reset(struct eeh_pe *pe, int option)
+{
+	int ret = 0;
+
+	/* Invalid PE ? */
+	if (!pe)
+		return -ENODEV;
+
+	if (!eeh_ops || !eeh_ops->set_option || !eeh_ops->reset)
+		return -ENOENT;
+
+	switch (option) {
+	case EEH_RESET_DEACTIVATE:
+		ret = eeh_ops->reset(pe, option);
+		if (ret)
+			break;
+
+		/*
+		 * The PE is still in frozen state and we need to clear
+		 * that. It's good to clear frozen state after deassert
+		 * to avoid messy IO access during reset, which might
+		 * cause recursive frozen PE.
+		 */
+		ret = eeh_ops->set_option(pe, EEH_OPT_THAW_MMIO);
+		if (!ret)
+			ret = eeh_ops->set_option(pe, EEH_OPT_THAW_DMA);
+		if (!ret)
+			eeh_pe_state_clear(pe, EEH_PE_ISOLATED);
+		break;
+	case EEH_RESET_HOT:
+	case EEH_RESET_FUNDAMENTAL:
+		ret = eeh_ops->reset(pe, option);
+		break;
+	default:
+		pr_debug("%s: Unsupported option %d\n",
+			__func__, option);
+		ret = -EINVAL;
+	}
+
+	return ret;
+}
+EXPORT_SYMBOL_GPL(eeh_pe_reset);
+
+/**
+ * eeh_pe_configure - Configure PCI bridges after PE reset
+ * @pe: EEH PE
+ *
+ * The routine is called to restore the PCI config space for
+ * those PCI devices, especially PCI bridges affected by PE
+ * reset issued previously.
+ */
+int eeh_pe_configure(struct eeh_pe *pe)
+{
+	int ret = 0;
+
+	/* Invalid PE ? */
+	if (!pe)
+		return -ENODEV;
+
+	/* Restore config space for the affected devices */
+	eeh_pe_restore_bars(pe);
+
+	return ret;
+}
+EXPORT_SYMBOL_GPL(eeh_pe_configure);
+
 static int proc_eeh_show(struct seq_file *m, void *v)
 {
 	if (!eeh_enabled()) {
@@ -1143,9 +1463,9 @@ static const struct file_operations proc_eeh_operations = {
 static int eeh_enable_dbgfs_set(void *data, u64 val)
 {
 	if (val)
-		eeh_subsystem_flags &= ~EEH_FORCE_DISABLED;
+		eeh_clear_flag(EEH_FORCE_DISABLED);
 	else
-		eeh_subsystem_flags |= EEH_FORCE_DISABLED;
+		eeh_add_flag(EEH_FORCE_DISABLED);
 
 	/* Notify the backend */
 	if (eeh_ops->post_init)
diff --git a/arch/powerpc/kernel/eeh_cache.c b/arch/powerpc/kernel/eeh_cache.c
index e8c9fd546a5c..07d8a2423a61 100644
--- a/arch/powerpc/kernel/eeh_cache.c
+++ b/arch/powerpc/kernel/eeh_cache.c
@@ -143,7 +143,7 @@ eeh_addr_cache_insert(struct pci_dev *dev, unsigned long alo,
 		} else {
 			if (dev != piar->pcidev ||
 			    alo != piar->addr_lo || ahi != piar->addr_hi) {
-				pr_warning("PIAR: overlapping address range\n");
+				pr_warn("PIAR: overlapping address range\n");
 			}
 			return piar;
 		}
@@ -177,19 +177,20 @@ static void __eeh_addr_cache_insert_dev(struct pci_dev *dev)
 
 	dn = pci_device_to_OF_node(dev);
 	if (!dn) {
-		pr_warning("PCI: no pci dn found for dev=%s\n", pci_name(dev));
+		pr_warn("PCI: no pci dn found for dev=%s\n",
+			pci_name(dev));
 		return;
 	}
 
 	edev = of_node_to_eeh_dev(dn);
 	if (!edev) {
-		pr_warning("PCI: no EEH dev found for dn=%s\n",
+		pr_warn("PCI: no EEH dev found for dn=%s\n",
 			dn->full_name);
 		return;
 	}
 
 	/* Skip any devices for which EEH is not enabled. */
-	if (!eeh_probe_mode_dev() && !edev->pe) {
+	if (!edev->pe) {
 #ifdef DEBUG
 		pr_info("PCI: skip building address cache for=%s - %s\n",
 			pci_name(dev), dn->full_name);
diff --git a/arch/powerpc/kernel/eeh_dev.c b/arch/powerpc/kernel/eeh_dev.c
index 1efa28f5fc54..e5274ee9a75f 100644
--- a/arch/powerpc/kernel/eeh_dev.c
+++ b/arch/powerpc/kernel/eeh_dev.c
@@ -57,7 +57,8 @@ void *eeh_dev_init(struct device_node *dn, void *data)
 	/* Allocate EEH device */
 	edev = kzalloc(sizeof(*edev), GFP_KERNEL);
 	if (!edev) {
-		pr_warning("%s: out of memory\n", __func__);
+		pr_warn("%s: out of memory\n",
+			__func__);
 		return NULL;
 	}
 
diff --git a/arch/powerpc/kernel/eeh_driver.c b/arch/powerpc/kernel/eeh_driver.c
index 420da61d4ce0..6a0dcee8e931 100644
--- a/arch/powerpc/kernel/eeh_driver.c
+++ b/arch/powerpc/kernel/eeh_driver.c
@@ -599,7 +599,7 @@ static void eeh_handle_normal_event(struct eeh_pe *pe)
 	pe->freeze_count++;
 	if (pe->freeze_count > EEH_MAX_ALLOWED_FREEZES)
 		goto excess_failures;
-	pr_warning("EEH: This PCI device has failed %d times in the last hour\n",
+	pr_warn("EEH: This PCI device has failed %d times in the last hour\n",
 		pe->freeze_count);
 
 	/* Walk the various device drivers attached to this slot through
@@ -616,7 +616,7 @@ static void eeh_handle_normal_event(struct eeh_pe *pe)
 	 */
 	rc = eeh_ops->wait_state(pe, MAX_WAIT_FOR_RECOVERY*1000);
 	if (rc < 0 || rc == EEH_STATE_NOT_SUPPORT) {
-		pr_warning("EEH: Permanent failure\n");
+		pr_warn("EEH: Permanent failure\n");
 		goto hard_fail;
 	}
 
@@ -635,8 +635,8 @@ static void eeh_handle_normal_event(struct eeh_pe *pe)
 		pr_info("EEH: Reset with hotplug activity\n");
 		rc = eeh_reset_device(pe, frozen_bus);
 		if (rc) {
-			pr_warning("%s: Unable to reset, err=%d\n",
-				   __func__, rc);
+			pr_warn("%s: Unable to reset, err=%d\n",
+				__func__, rc);
 			goto hard_fail;
 		}
 	}
@@ -678,7 +678,7 @@ static void eeh_handle_normal_event(struct eeh_pe *pe)
 
 	/* If any device has a hard failure, then shut off everything. */
 	if (result == PCI_ERS_RESULT_DISCONNECT) {
-		pr_warning("EEH: Device driver gave up\n");
+		pr_warn("EEH: Device driver gave up\n");
 		goto hard_fail;
 	}
 
@@ -687,8 +687,8 @@ static void eeh_handle_normal_event(struct eeh_pe *pe)
 		pr_info("EEH: Reset without hotplug activity\n");
 		rc = eeh_reset_device(pe, NULL);
 		if (rc) {
-			pr_warning("%s: Cannot reset, err=%d\n",
-				   __func__, rc);
+			pr_warn("%s: Cannot reset, err=%d\n",
+				__func__, rc);
 			goto hard_fail;
 		}
 
@@ -701,7 +701,7 @@ static void eeh_handle_normal_event(struct eeh_pe *pe)
 	/* All devices should claim they have recovered by now. */
 	if ((result != PCI_ERS_RESULT_RECOVERED) &&
 	    (result != PCI_ERS_RESULT_NONE)) {
-		pr_warning("EEH: Not recovered\n");
+		pr_warn("EEH: Not recovered\n");
 		goto hard_fail;
 	}
 
diff --git a/arch/powerpc/kernel/eeh_pe.c b/arch/powerpc/kernel/eeh_pe.c
index fbd01eba4473..00e3844525a6 100644
--- a/arch/powerpc/kernel/eeh_pe.c
+++ b/arch/powerpc/kernel/eeh_pe.c
@@ -32,8 +32,23 @@
 #include <asm/pci-bridge.h>
 #include <asm/ppc-pci.h>
 
+static int eeh_pe_aux_size = 0;
 static LIST_HEAD(eeh_phb_pe);
 
+/**
+ * eeh_set_pe_aux_size - Set PE auxillary data size
+ * @size: PE auxillary data size
+ *
+ * Set PE auxillary data size
+ */
+void eeh_set_pe_aux_size(int size)
+{
+	if (size < 0)
+		return;
+
+	eeh_pe_aux_size = size;
+}
+
 /**
  * eeh_pe_alloc - Allocate PE
  * @phb: PCI controller
@@ -44,9 +59,16 @@ static LIST_HEAD(eeh_phb_pe);
 static struct eeh_pe *eeh_pe_alloc(struct pci_controller *phb, int type)
 {
 	struct eeh_pe *pe;
+	size_t alloc_size;
+
+	alloc_size = sizeof(struct eeh_pe);
+	if (eeh_pe_aux_size) {
+		alloc_size = ALIGN(alloc_size, cache_line_size());
+		alloc_size += eeh_pe_aux_size;
+	}
 
 	/* Allocate PHB PE */
-	pe = kzalloc(sizeof(struct eeh_pe), GFP_KERNEL);
+	pe = kzalloc(alloc_size, GFP_KERNEL);
 	if (!pe) return NULL;
 
 	/* Initialize PHB PE */
@@ -56,6 +78,8 @@ static struct eeh_pe *eeh_pe_alloc(struct pci_controller *phb, int type)
 	INIT_LIST_HEAD(&pe->child);
 	INIT_LIST_HEAD(&pe->edevs);
 
+	pe->data = (void *)pe + ALIGN(sizeof(struct eeh_pe),
+				      cache_line_size());
 	return pe;
 }
 
@@ -179,7 +203,8 @@ void *eeh_pe_dev_traverse(struct eeh_pe *root,
 	void *ret;
 
 	if (!root) {
-		pr_warning("%s: Invalid PE %p\n", __func__, root);
+		pr_warn("%s: Invalid PE %p\n",
+			__func__, root);
 		return NULL;
 	}
 
@@ -350,17 +375,6 @@ int eeh_add_to_parent_pe(struct eeh_dev *edev)
 	pe->addr	= edev->pe_config_addr;
 	pe->config_addr	= edev->config_addr;
 
-	/*
-	 * While doing PE reset, we probably hot-reset the
-	 * upstream bridge. However, the PCI devices including
-	 * the associated EEH devices might be removed when EEH
-	 * core is doing recovery. So that won't safe to retrieve
-	 * the bridge through downstream EEH device. We have to
-	 * trace the parent PCI bus, then the upstream bridge.
-	 */
-	if (eeh_probe_mode_dev())
-		pe->bus = eeh_dev_to_pci_dev(edev)->bus;
-
 	/*
 	 * Put the new EEH PE into hierarchy tree. If the parent
 	 * can't be found, the newly created PE will be attached
@@ -802,53 +816,33 @@ void eeh_pe_restore_bars(struct eeh_pe *pe)
  */
 const char *eeh_pe_loc_get(struct eeh_pe *pe)
 {
-	struct pci_controller *hose;
 	struct pci_bus *bus = eeh_pe_bus_get(pe);
-	struct pci_dev *pdev;
-	struct device_node *dn;
-	const char *loc;
+	struct device_node *dn = pci_bus_to_OF_node(bus);
+	const char *loc = NULL;
 
-	if (!bus)
-		return "N/A";
+	if (!dn)
+		goto out;
 
 	/* PHB PE or root PE ? */
 	if (pci_is_root_bus(bus)) {
-		hose = pci_bus_to_host(bus);
-		loc = of_get_property(hose->dn,
-				"ibm,loc-code", NULL);
+		loc = of_get_property(dn, "ibm,loc-code", NULL);
+		if (!loc)
+			loc = of_get_property(dn, "ibm,io-base-loc-code", NULL);
 		if (loc)
-			return loc;
-		loc = of_get_property(hose->dn,
-				"ibm,io-base-loc-code", NULL);
-		if (loc)
-			return loc;
+			goto out;
 
-		pdev = pci_get_slot(bus, 0x0);
-	} else {
-		pdev = bus->self;
-	}
-
-	if (!pdev) {
-		loc = "N/A";
-		goto out;
-	}
-
-	dn = pci_device_to_OF_node(pdev);
-	if (!dn) {
-		loc = "N/A";
-		goto out;
+		/* Check the root port */
+		dn = dn->child;
+		if (!dn)
+			goto out;
 	}
 
 	loc = of_get_property(dn, "ibm,loc-code", NULL);
 	if (!loc)
 		loc = of_get_property(dn, "ibm,slot-location-code", NULL);
-	if (!loc)
-		loc = "N/A";
 
 out:
-	if (pci_is_root_bus(bus) && pdev)
-		pci_dev_put(pdev);
-	return loc;
+	return loc ? loc : "N/A";
 }
 
 /**
diff --git a/arch/powerpc/kernel/entry_64.S b/arch/powerpc/kernel/entry_64.S
index 6528c5e2cc44..5bbd1bc8c3b0 100644
--- a/arch/powerpc/kernel/entry_64.S
+++ b/arch/powerpc/kernel/entry_64.S
@@ -482,16 +482,12 @@ END_FTR_SECTION_IFSET(CPU_FTR_STCX_CHECKS_ADDRESS)
 	ld	r8,KSP(r4)	/* new stack pointer */
 #ifdef CONFIG_PPC_BOOK3S
 BEGIN_FTR_SECTION
-  BEGIN_FTR_SECTION_NESTED(95)
 	clrrdi	r6,r8,28	/* get its ESID */
 	clrrdi	r9,r1,28	/* get current sp ESID */
-  FTR_SECTION_ELSE_NESTED(95)
+FTR_SECTION_ELSE
 	clrrdi	r6,r8,40	/* get its 1T ESID */
 	clrrdi	r9,r1,40	/* get current sp 1T ESID */
-  ALT_MMU_FTR_SECTION_END_NESTED_IFCLR(MMU_FTR_1T_SEGMENT, 95)
-FTR_SECTION_ELSE
-	b	2f
-ALT_MMU_FTR_SECTION_END_IFSET(MMU_FTR_SLB)
+ALT_MMU_FTR_SECTION_END_IFCLR(MMU_FTR_1T_SEGMENT)
 	clrldi.	r0,r6,2		/* is new ESID c00000000? */
 	cmpd	cr1,r6,r9	/* or is new ESID the same as current ESID? */
 	cror	eq,4*cr1+eq,eq
@@ -919,6 +915,11 @@ restore_check_irq_replay:
 	addi	r3,r1,STACK_FRAME_OVERHEAD;
  	bl	do_IRQ
 	b	ret_from_except
+1:	cmpwi	cr0,r3,0xe60
+	bne	1f
+	addi	r3,r1,STACK_FRAME_OVERHEAD;
+	bl	handle_hmi_exception
+	b	ret_from_except
 1:	cmpwi	cr0,r3,0x900
 	bne	1f
 	addi	r3,r1,STACK_FRAME_OVERHEAD;
diff --git a/arch/powerpc/kernel/exceptions-64s.S b/arch/powerpc/kernel/exceptions-64s.S
index a7d36b19221d..6144d5a6bfe7 100644
--- a/arch/powerpc/kernel/exceptions-64s.S
+++ b/arch/powerpc/kernel/exceptions-64s.S
@@ -188,10 +188,6 @@ ALT_FTR_SECTION_END_IFSET(CPU_FTR_HVMODE)
 data_access_pSeries:
 	HMT_MEDIUM_PPR_DISCARD
 	SET_SCRATCH0(r13)
-BEGIN_FTR_SECTION
-	b	data_access_check_stab
-data_access_not_stab:
-END_MMU_FTR_SECTION_IFCLR(MMU_FTR_SLB)
 	EXCEPTION_PROLOG_PSERIES(PACA_EXGEN, data_access_common, EXC_STD,
 				 KVMTEST, 0x300)
 
@@ -339,7 +335,7 @@ emulation_assist_trampoline:
 hv_exception_trampoline:
 	SET_SCRATCH0(r13)
 	EXCEPTION_PROLOG_0(PACA_EXGEN)
-	b	hmi_exception_hv
+	b	hmi_exception_early
 
 	. = 0xe80
 hv_doorbell_trampoline:
@@ -514,34 +510,6 @@ machine_check_pSeries_0:
 	EXCEPTION_PROLOG_1(PACA_EXMC, KVMTEST, 0x200)
 	EXCEPTION_PROLOG_PSERIES_1(machine_check_common, EXC_STD)
 	KVM_HANDLER_SKIP(PACA_EXMC, EXC_STD, 0x200)
-
-	/* moved from 0x300 */
-data_access_check_stab:
-	GET_PACA(r13)
-	std	r9,PACA_EXSLB+EX_R9(r13)
-	std	r10,PACA_EXSLB+EX_R10(r13)
-	mfspr	r10,SPRN_DAR
-	mfspr	r9,SPRN_DSISR
-	srdi	r10,r10,60
-	rlwimi	r10,r9,16,0x20
-#ifdef CONFIG_KVM_BOOK3S_PR_POSSIBLE
-	lbz	r9,HSTATE_IN_GUEST(r13)
-	rlwimi	r10,r9,8,0x300
-#endif
-	mfcr	r9
-	cmpwi	r10,0x2c
-	beq	do_stab_bolted_pSeries
-	mtcrf	0x80,r9
-	ld	r9,PACA_EXSLB+EX_R9(r13)
-	ld	r10,PACA_EXSLB+EX_R10(r13)
-	b	data_access_not_stab
-do_stab_bolted_pSeries:
-	std	r11,PACA_EXSLB+EX_R11(r13)
-	std	r12,PACA_EXSLB+EX_R12(r13)
-	GET_SCRATCH0(r10)
-	std	r10,PACA_EXSLB+EX_R13(r13)
-	EXCEPTION_PROLOG_PSERIES_1(do_stab_bolted, EXC_STD)
-
 	KVM_HANDLER_SKIP(PACA_EXGEN, EXC_STD, 0x300)
 	KVM_HANDLER_SKIP(PACA_EXSLB, EXC_STD, 0x380)
 	KVM_HANDLER_PR(PACA_EXGEN, EXC_STD, 0x400)
@@ -621,8 +589,64 @@ END_FTR_SECTION_IFSET(CPU_FTR_CFAR)
 	KVM_HANDLER(PACA_EXGEN, EXC_HV, 0xe22)
 	STD_EXCEPTION_HV_OOL(0xe42, emulation_assist)
 	KVM_HANDLER(PACA_EXGEN, EXC_HV, 0xe42)
-	STD_EXCEPTION_HV_OOL(0xe62, hmi_exception) /* need to flush cache ? */
+	MASKABLE_EXCEPTION_HV_OOL(0xe62, hmi_exception)
 	KVM_HANDLER(PACA_EXGEN, EXC_HV, 0xe62)
+
+	.globl hmi_exception_early
+hmi_exception_early:
+	EXCEPTION_PROLOG_1(PACA_EXGEN, NOTEST, 0xe60)
+	mr	r10,r1			/* Save r1			*/
+	ld	r1,PACAEMERGSP(r13)	/* Use emergency stack		*/
+	subi	r1,r1,INT_FRAME_SIZE	/* alloc stack frame		*/
+	std	r9,_CCR(r1)		/* save CR in stackframe	*/
+	mfspr	r11,SPRN_HSRR0		/* Save HSRR0 */
+	std	r11,_NIP(r1)		/* save HSRR0 in stackframe	*/
+	mfspr	r12,SPRN_HSRR1		/* Save SRR1 */
+	std	r12,_MSR(r1)		/* save SRR1 in stackframe	*/
+	std	r10,0(r1)		/* make stack chain pointer	*/
+	std	r0,GPR0(r1)		/* save r0 in stackframe	*/
+	std	r10,GPR1(r1)		/* save r1 in stackframe	*/
+	EXCEPTION_PROLOG_COMMON_2(PACA_EXGEN)
+	EXCEPTION_PROLOG_COMMON_3(0xe60)
+	addi	r3,r1,STACK_FRAME_OVERHEAD
+	bl	hmi_exception_realmode
+	/* Windup the stack. */
+	/* Clear MSR_RI before setting SRR0 and SRR1. */
+	li	r0,MSR_RI
+	mfmsr	r9			/* get MSR value */
+	andc	r9,r9,r0
+	mtmsrd	r9,1			/* Clear MSR_RI */
+	/* Move original HSRR0 and HSRR1 into the respective regs */
+	ld	r9,_MSR(r1)
+	mtspr	SPRN_HSRR1,r9
+	ld	r3,_NIP(r1)
+	mtspr	SPRN_HSRR0,r3
+	ld	r9,_CTR(r1)
+	mtctr	r9
+	ld	r9,_XER(r1)
+	mtxer	r9
+	ld	r9,_LINK(r1)
+	mtlr	r9
+	REST_GPR(0, r1)
+	REST_8GPRS(2, r1)
+	REST_GPR(10, r1)
+	ld	r11,_CCR(r1)
+	mtcr	r11
+	REST_GPR(11, r1)
+	REST_2GPRS(12, r1)
+	/* restore original r1. */
+	ld	r1,GPR1(r1)
+
+	/*
+	 * Go to virtual mode and pull the HMI event information from
+	 * firmware.
+	 */
+	.globl hmi_exception_after_realmode
+hmi_exception_after_realmode:
+	SET_SCRATCH0(r13)
+	EXCEPTION_PROLOG_0(PACA_EXGEN)
+	b	hmi_exception_hv
+
 	MASKABLE_EXCEPTION_HV_OOL(0xe82, h_doorbell)
 	KVM_HANDLER(PACA_EXGEN, EXC_HV, 0xe82)
 
@@ -643,6 +667,8 @@ END_FTR_SECTION_IFSET(CPU_FTR_CFAR)
  * - If it was a decrementer interrupt, we bump the dec to max and and return.
  * - If it was a doorbell we return immediately since doorbells are edge
  *   triggered and won't automatically refire.
+ * - If it was a HMI we return immediately since we handled it in realmode
+ *   and it won't refire.
  * - else we hard disable and return.
  * This is called with r10 containing the value to OR to the paca field.
  */
@@ -659,6 +685,8 @@ masked_##_H##interrupt:					\
 	mtspr	SPRN_DEC,r10;				\
 	b	2f;					\
 1:	cmpwi	r10,PACA_IRQ_DBELL;			\
+	beq	2f;					\
+	cmpwi	r10,PACA_IRQ_HMI;			\
 	beq	2f;					\
 	mfspr	r10,SPRN_##_H##SRR1;			\
 	rldicl	r10,r10,48,1; /* clear MSR_EE */	\
@@ -799,7 +827,7 @@ kvmppc_skip_Hinterrupt:
 	STD_EXCEPTION_COMMON(0xd00, single_step, single_step_exception)
 	STD_EXCEPTION_COMMON(0xe00, trap_0e, unknown_exception)
 	STD_EXCEPTION_COMMON(0xe40, emulation_assist, emulation_assist_interrupt)
-	STD_EXCEPTION_COMMON(0xe60, hmi_exception, unknown_exception)
+	STD_EXCEPTION_COMMON_ASYNC(0xe60, hmi_exception, handle_hmi_exception)
 #ifdef CONFIG_PPC_DOORBELL
 	STD_EXCEPTION_COMMON_ASYNC(0xe80, h_doorbell, doorbell_exception)
 #else
@@ -984,66 +1012,6 @@ system_call_entry:
 ppc64_runlatch_on_trampoline:
 	b	__ppc64_runlatch_on
 
-/*
- * Here we have detected that the kernel stack pointer is bad.
- * R9 contains the saved CR, r13 points to the paca,
- * r10 contains the (bad) kernel stack pointer,
- * r11 and r12 contain the saved SRR0 and SRR1.
- * We switch to using an emergency stack, save the registers there,
- * and call kernel_bad_stack(), which panics.
- */
-bad_stack:
-	ld	r1,PACAEMERGSP(r13)
-	subi	r1,r1,64+INT_FRAME_SIZE
-	std	r9,_CCR(r1)
-	std	r10,GPR1(r1)
-	std	r11,_NIP(r1)
-	std	r12,_MSR(r1)
-	mfspr	r11,SPRN_DAR
-	mfspr	r12,SPRN_DSISR
-	std	r11,_DAR(r1)
-	std	r12,_DSISR(r1)
-	mflr	r10
-	mfctr	r11
-	mfxer	r12
-	std	r10,_LINK(r1)
-	std	r11,_CTR(r1)
-	std	r12,_XER(r1)
-	SAVE_GPR(0,r1)
-	SAVE_GPR(2,r1)
-	ld	r10,EX_R3(r3)
-	std	r10,GPR3(r1)
-	SAVE_GPR(4,r1)
-	SAVE_4GPRS(5,r1)
-	ld	r9,EX_R9(r3)
-	ld	r10,EX_R10(r3)
-	SAVE_2GPRS(9,r1)
-	ld	r9,EX_R11(r3)
-	ld	r10,EX_R12(r3)
-	ld	r11,EX_R13(r3)
-	std	r9,GPR11(r1)
-	std	r10,GPR12(r1)
-	std	r11,GPR13(r1)
-BEGIN_FTR_SECTION
-	ld	r10,EX_CFAR(r3)
-	std	r10,ORIG_GPR3(r1)
-END_FTR_SECTION_IFSET(CPU_FTR_CFAR)
-	SAVE_8GPRS(14,r1)
-	SAVE_10GPRS(22,r1)
-	lhz	r12,PACA_TRAP_SAVE(r13)
-	std	r12,_TRAP(r1)
-	addi	r11,r1,INT_FRAME_SIZE
-	std	r11,0(r1)
-	li	r12,0
-	std	r12,0(r11)
-	ld	r2,PACATOC(r13)
-	ld	r11,exception_marker@toc(r2)
-	std	r12,RESULT(r1)
-	std	r11,STACK_FRAME_OVERHEAD-16(r1)
-1:	addi	r3,r1,STACK_FRAME_OVERHEAD
-	bl	kernel_bad_stack
-	b	1b
-
 /*
  * Here r13 points to the paca, r9 contains the saved CR,
  * SRR0 and SRR1 are saved in r11 and r12,
@@ -1057,7 +1025,7 @@ data_access_common:
 	mfspr	r10,SPRN_DSISR
 	stw	r10,PACA_EXGEN+EX_DSISR(r13)
 	EXCEPTION_PROLOG_COMMON(0x300, PACA_EXGEN)
-	DISABLE_INTS
+	RECONCILE_IRQ_STATE(r10, r11)
 	ld	r12,_MSR(r1)
 	ld	r3,PACA_EXGEN+EX_DAR(r13)
 	lwz	r4,PACA_EXGEN+EX_DSISR(r13)
@@ -1073,7 +1041,7 @@ h_data_storage_common:
 	stw     r10,PACA_EXGEN+EX_DSISR(r13)
 	EXCEPTION_PROLOG_COMMON(0xe00, PACA_EXGEN)
 	bl      save_nvgprs
-	DISABLE_INTS
+	RECONCILE_IRQ_STATE(r10, r11)
 	addi    r3,r1,STACK_FRAME_OVERHEAD
 	bl      unknown_exception
 	b       ret_from_except
@@ -1082,7 +1050,7 @@ h_data_storage_common:
 	.globl instruction_access_common
 instruction_access_common:
 	EXCEPTION_PROLOG_COMMON(0x400, PACA_EXGEN)
-	DISABLE_INTS
+	RECONCILE_IRQ_STATE(r10, r11)
 	ld	r12,_MSR(r1)
 	ld	r3,_NIP(r1)
 	andis.	r4,r12,0x5820
@@ -1146,7 +1114,7 @@ slb_miss_fault:
 
 unrecov_user_slb:
 	EXCEPTION_PROLOG_COMMON(0x4200, PACA_EXGEN)
-	DISABLE_INTS
+	RECONCILE_IRQ_STATE(r10, r11)
 	bl	save_nvgprs
 1:	addi	r3,r1,STACK_FRAME_OVERHEAD
 	bl	unrecoverable_exception
@@ -1169,7 +1137,7 @@ machine_check_common:
 	stw	r10,PACA_EXGEN+EX_DSISR(r13)
 	EXCEPTION_PROLOG_COMMON(0x200, PACA_EXMC)
 	FINISH_NAP
-	DISABLE_INTS
+	RECONCILE_IRQ_STATE(r10, r11)
 	ld	r3,PACA_EXGEN+EX_DAR(r13)
 	lwz	r4,PACA_EXGEN+EX_DSISR(r13)
 	std	r3,_DAR(r1)
@@ -1192,7 +1160,7 @@ alignment_common:
 	std	r3,_DAR(r1)
 	std	r4,_DSISR(r1)
 	bl	save_nvgprs
-	DISABLE_INTS
+	RECONCILE_IRQ_STATE(r10, r11)
 	addi	r3,r1,STACK_FRAME_OVERHEAD
 	bl	alignment_exception
 	b	ret_from_except
@@ -1202,7 +1170,7 @@ alignment_common:
 program_check_common:
 	EXCEPTION_PROLOG_COMMON(0x700, PACA_EXGEN)
 	bl	save_nvgprs
-	DISABLE_INTS
+	RECONCILE_IRQ_STATE(r10, r11)
 	addi	r3,r1,STACK_FRAME_OVERHEAD
 	bl	program_check_exception
 	b	ret_from_except
@@ -1213,7 +1181,7 @@ fp_unavailable_common:
 	EXCEPTION_PROLOG_COMMON(0x800, PACA_EXGEN)
 	bne	1f			/* if from user, just load it up */
 	bl	save_nvgprs
-	DISABLE_INTS
+	RECONCILE_IRQ_STATE(r10, r11)
 	addi	r3,r1,STACK_FRAME_OVERHEAD
 	bl	kernel_fp_unavailable_exception
 	BUG_OPCODE
@@ -1232,7 +1200,7 @@ END_FTR_SECTION_IFSET(CPU_FTR_TM)
 #ifdef CONFIG_PPC_TRANSACTIONAL_MEM
 2:	/* User process was in a transaction */
 	bl	save_nvgprs
-	DISABLE_INTS
+	RECONCILE_IRQ_STATE(r10, r11)
 	addi	r3,r1,STACK_FRAME_OVERHEAD
 	bl	fp_unavailable_tm
 	b	ret_from_except
@@ -1258,7 +1226,7 @@ BEGIN_FTR_SECTION
 #ifdef CONFIG_PPC_TRANSACTIONAL_MEM
 2:	/* User process was in a transaction */
 	bl	save_nvgprs
-	DISABLE_INTS
+	RECONCILE_IRQ_STATE(r10, r11)
 	addi	r3,r1,STACK_FRAME_OVERHEAD
 	bl	altivec_unavailable_tm
 	b	ret_from_except
@@ -1267,7 +1235,7 @@ BEGIN_FTR_SECTION
 END_FTR_SECTION_IFSET(CPU_FTR_ALTIVEC)
 #endif
 	bl	save_nvgprs
-	DISABLE_INTS
+	RECONCILE_IRQ_STATE(r10, r11)
 	addi	r3,r1,STACK_FRAME_OVERHEAD
 	bl	altivec_unavailable_exception
 	b	ret_from_except
@@ -1292,7 +1260,7 @@ BEGIN_FTR_SECTION
 #ifdef CONFIG_PPC_TRANSACTIONAL_MEM
 2:	/* User process was in a transaction */
 	bl	save_nvgprs
-	DISABLE_INTS
+	RECONCILE_IRQ_STATE(r10, r11)
 	addi	r3,r1,STACK_FRAME_OVERHEAD
 	bl	vsx_unavailable_tm
 	b	ret_from_except
@@ -1301,7 +1269,7 @@ BEGIN_FTR_SECTION
 END_FTR_SECTION_IFSET(CPU_FTR_VSX)
 #endif
 	bl	save_nvgprs
-	DISABLE_INTS
+	RECONCILE_IRQ_STATE(r10, r11)
 	addi	r3,r1,STACK_FRAME_OVERHEAD
 	bl	vsx_unavailable_exception
 	b	ret_from_except
@@ -1338,12 +1306,6 @@ fwnmi_data_area:
 	. = 0x8000
 #endif /* defined(CONFIG_PPC_PSERIES) || defined(CONFIG_PPC_POWERNV) */
 
-/* Space for CPU0's segment table */
-	.balign 4096
-	.globl initial_stab
-initial_stab:
-	.space	4096
-
 #ifdef CONFIG_PPC_POWERNV
 _GLOBAL(opal_mc_secondary_handler)
 	HMT_MEDIUM_PPR_DISCARD
@@ -1566,7 +1528,7 @@ slb_miss_realmode:
 
 unrecov_slb:
 	EXCEPTION_PROLOG_COMMON(0x4100, PACA_EXSLB)
-	DISABLE_INTS
+	RECONCILE_IRQ_STATE(r10, r11)
 	bl	save_nvgprs
 1:	addi	r3,r1,STACK_FRAME_OVERHEAD
 	bl	unrecoverable_exception
@@ -1594,12 +1556,6 @@ do_hash_page:
 	bne-	handle_page_fault	/* if not, try to insert a HPTE */
 	andis.  r0,r4,DSISR_DABRMATCH@h
 	bne-    handle_dabr_fault
-
-BEGIN_FTR_SECTION
-	andis.	r0,r4,0x0020		/* Is it a segment table fault? */
-	bne-	do_ste_alloc		/* If so handle it */
-END_MMU_FTR_SECTION_IFCLR(MMU_FTR_SLB)
-
 	CURRENT_THREAD_INFO(r11, r1)
 	lwz	r0,TI_PREEMPT(r11)	/* If we're in an "NMI" */
 	andis.	r0,r0,NMI_MASK@h	/* (i.e. an irq when soft-disabled) */
@@ -1681,113 +1637,62 @@ handle_dabr_fault:
 	bl	bad_page_fault
 	b	ret_from_except
 
-	/* here we have a segment miss */
-do_ste_alloc:
-	bl	ste_allocate		/* try to insert stab entry */
-	cmpdi	r3,0
-	bne-	handle_page_fault
-	b	fast_exception_return
-
 /*
- * r13 points to the PACA, r9 contains the saved CR,
+ * Here we have detected that the kernel stack pointer is bad.
+ * R9 contains the saved CR, r13 points to the paca,
+ * r10 contains the (bad) kernel stack pointer,
  * r11 and r12 contain the saved SRR0 and SRR1.
- * r9 - r13 are saved in paca->exslb.
- * We assume we aren't going to take any exceptions during this procedure.
- * We assume (DAR >> 60) == 0xc.
+ * We switch to using an emergency stack, save the registers there,
+ * and call kernel_bad_stack(), which panics.
  */
-	.align	7
-do_stab_bolted:
-	stw	r9,PACA_EXSLB+EX_CCR(r13)	/* save CR in exc. frame */
-	std	r11,PACA_EXSLB+EX_SRR0(r13)	/* save SRR0 in exc. frame */
-	mfspr	r11,SPRN_DAR			/* ea */
-
-	/*
-	 * check for bad kernel/user address
-	 * (ea & ~REGION_MASK) >= PGTABLE_RANGE
-	 */
-	rldicr. r9,r11,4,(63 - 46 - 4)
-	li	r9,0	/* VSID = 0 for bad address */
-	bne-	0f
-
-	/*
-	 * Calculate VSID:
-	 * This is the kernel vsid, we take the top for context from
-	 * the range. context = (MAX_USER_CONTEXT) + ((ea >> 60) - 0xc) + 1
-	 * Here we know that (ea >> 60) == 0xc
-	 */
-	lis	r9,(MAX_USER_CONTEXT + 1)@ha
-	addi	r9,r9,(MAX_USER_CONTEXT + 1)@l
-
-	srdi	r10,r11,SID_SHIFT
-	rldimi  r10,r9,ESID_BITS,0 /* proto vsid */
-	ASM_VSID_SCRAMBLE(r10, r9, 256M)
-	rldic	r9,r10,12,16	/* r9 = vsid << 12 */
-
-0:
-	/* Hash to the primary group */
-	ld	r10,PACASTABVIRT(r13)
-	srdi	r11,r11,SID_SHIFT
-	rldimi	r10,r11,7,52	/* r10 = first ste of the group */
-
-	/* Search the primary group for a free entry */
-1:	ld	r11,0(r10)	/* Test valid bit of the current ste	*/
-	andi.	r11,r11,0x80
-	beq	2f
-	addi	r10,r10,16
-	andi.	r11,r10,0x70
-	bne	1b
-
-	/* Stick for only searching the primary group for now.		*/
-	/* At least for now, we use a very simple random castout scheme */
-	/* Use the TB as a random number ;  OR in 1 to avoid entry 0	*/
-	mftb	r11
-	rldic	r11,r11,4,57	/* r11 = (r11 << 4) & 0x70 */
-	ori	r11,r11,0x10
-
-	/* r10 currently points to an ste one past the group of interest */
-	/* make it point to the randomly selected entry			*/
-	subi	r10,r10,128
-	or 	r10,r10,r11	/* r10 is the entry to invalidate	*/
-
-	isync			/* mark the entry invalid		*/
-	ld	r11,0(r10)
-	rldicl	r11,r11,56,1	/* clear the valid bit */
-	rotldi	r11,r11,8
-	std	r11,0(r10)
-	sync
-
-	clrrdi	r11,r11,28	/* Get the esid part of the ste		*/
-	slbie	r11
-
-2:	std	r9,8(r10)	/* Store the vsid part of the ste	*/
-	eieio
-
-	mfspr	r11,SPRN_DAR		/* Get the new esid			*/
-	clrrdi	r11,r11,28	/* Permits a full 32b of ESID		*/
-	ori	r11,r11,0x90	/* Turn on valid and kp			*/
-	std	r11,0(r10)	/* Put new entry back into the stab	*/
-
-	sync
-
-	/* All done -- return from exception. */
-	lwz	r9,PACA_EXSLB+EX_CCR(r13)	/* get saved CR */
-	ld	r11,PACA_EXSLB+EX_SRR0(r13)	/* get saved SRR0 */
-
-	andi.	r10,r12,MSR_RI
-	beq-	unrecov_slb
-
-	mtcrf	0x80,r9			/* restore CR */
-
-	mfmsr	r10
-	clrrdi	r10,r10,2
-	mtmsrd	r10,1
-
-	mtspr	SPRN_SRR0,r11
-	mtspr	SPRN_SRR1,r12
-	ld	r9,PACA_EXSLB+EX_R9(r13)
-	ld	r10,PACA_EXSLB+EX_R10(r13)
-	ld	r11,PACA_EXSLB+EX_R11(r13)
-	ld	r12,PACA_EXSLB+EX_R12(r13)
-	ld	r13,PACA_EXSLB+EX_R13(r13)
-	rfid
-	b	.	/* prevent speculative execution */
+bad_stack:
+	ld	r1,PACAEMERGSP(r13)
+	subi	r1,r1,64+INT_FRAME_SIZE
+	std	r9,_CCR(r1)
+	std	r10,GPR1(r1)
+	std	r11,_NIP(r1)
+	std	r12,_MSR(r1)
+	mfspr	r11,SPRN_DAR
+	mfspr	r12,SPRN_DSISR
+	std	r11,_DAR(r1)
+	std	r12,_DSISR(r1)
+	mflr	r10
+	mfctr	r11
+	mfxer	r12
+	std	r10,_LINK(r1)
+	std	r11,_CTR(r1)
+	std	r12,_XER(r1)
+	SAVE_GPR(0,r1)
+	SAVE_GPR(2,r1)
+	ld	r10,EX_R3(r3)
+	std	r10,GPR3(r1)
+	SAVE_GPR(4,r1)
+	SAVE_4GPRS(5,r1)
+	ld	r9,EX_R9(r3)
+	ld	r10,EX_R10(r3)
+	SAVE_2GPRS(9,r1)
+	ld	r9,EX_R11(r3)
+	ld	r10,EX_R12(r3)
+	ld	r11,EX_R13(r3)
+	std	r9,GPR11(r1)
+	std	r10,GPR12(r1)
+	std	r11,GPR13(r1)
+BEGIN_FTR_SECTION
+	ld	r10,EX_CFAR(r3)
+	std	r10,ORIG_GPR3(r1)
+END_FTR_SECTION_IFSET(CPU_FTR_CFAR)
+	SAVE_8GPRS(14,r1)
+	SAVE_10GPRS(22,r1)
+	lhz	r12,PACA_TRAP_SAVE(r13)
+	std	r12,_TRAP(r1)
+	addi	r11,r1,INT_FRAME_SIZE
+	std	r11,0(r1)
+	li	r12,0
+	std	r12,0(r11)
+	ld	r2,PACATOC(r13)
+	ld	r11,exception_marker@toc(r2)
+	std	r12,RESULT(r1)
+	std	r11,STACK_FRAME_OVERHEAD-16(r1)
+1:	addi	r3,r1,STACK_FRAME_OVERHEAD
+	bl	kernel_bad_stack
+	b	1b
diff --git a/arch/powerpc/kernel/head_64.S b/arch/powerpc/kernel/head_64.S
index a95145d7f61b..d48125d0c048 100644
--- a/arch/powerpc/kernel/head_64.S
+++ b/arch/powerpc/kernel/head_64.S
@@ -180,6 +180,28 @@ exception_marker:
 #include "exceptions-64s.S"
 #endif
 
+#ifdef CONFIG_PPC_BOOK3E
+_GLOBAL(fsl_secondary_thread_init)
+	/* Enable branch prediction */
+	lis     r3,BUCSR_INIT@h
+	ori     r3,r3,BUCSR_INIT@l
+	mtspr   SPRN_BUCSR,r3
+	isync
+
+	/*
+	 * Fix PIR to match the linear numbering in the device tree.
+	 *
+	 * On e6500, the reset value of PIR uses the low three bits for
+	 * the thread within a core, and the upper bits for the core
+	 * number.  There are two threads per core, so shift everything
+	 * but the low bit right by two bits so that the cpu numbering is
+	 * continuous.
+	 */
+	mfspr	r3, SPRN_PIR
+	rlwimi	r3, r3, 30, 2, 30
+	mtspr	SPRN_PIR, r3
+#endif
+
 _GLOBAL(generic_secondary_thread_init)
 	mr	r24,r3
 
@@ -618,7 +640,7 @@ __secondary_start:
 	addi	r14,r14,THREAD_SIZE-STACK_FRAME_OVERHEAD
 	std	r14,PACAKSAVE(r13)
 
-	/* Do early setup for that CPU (stab, slb, hash table pointer) */
+	/* Do early setup for that CPU (SLB and hash table pointer) */
 	bl	early_setup_secondary
 
 	/*
@@ -771,8 +793,10 @@ start_here_multiplatform:
 	li	r0,0
 	stdu	r0,-STACK_FRAME_OVERHEAD(r1)
 
-	/* Do very early kernel initializations, including initial hash table,
-	 * stab and slb setup before we turn on relocation.	*/
+	/*
+	 * Do very early kernel initializations, including initial hash table
+	 * and SLB setup before we turn on relocation.
+	 */
 
 	/* Restore parameters passed from prom_init/kexec */
 	mr	r3,r31
diff --git a/arch/powerpc/kernel/idle_power7.S b/arch/powerpc/kernel/idle_power7.S
index 5cf3d367190d..be05841396cf 100644
--- a/arch/powerpc/kernel/idle_power7.S
+++ b/arch/powerpc/kernel/idle_power7.S
@@ -135,17 +135,68 @@ _GLOBAL(power7_sleep)
 	b	power7_powersave_common
 	/* No return */
 
+/*
+ * Make opal call in realmode. This is a generic function to be called
+ * from realmode from reset vector. It handles endianess.
+ *
+ * r13 - paca pointer
+ * r1  - stack pointer
+ * r3  - opal token
+ */
+opal_call_realmode:
+	mflr	r12
+	std	r12,_LINK(r1)
+	ld	r2,PACATOC(r13)
+	/* Set opal return address */
+	LOAD_REG_ADDR(r0,return_from_opal_call)
+	mtlr	r0
+	/* Handle endian-ness */
+	li	r0,MSR_LE
+	mfmsr	r12
+	andc	r12,r12,r0
+	mtspr	SPRN_HSRR1,r12
+	mr	r0,r3			/* Move opal token to r0 */
+	LOAD_REG_ADDR(r11,opal)
+	ld	r12,8(r11)
+	ld	r2,0(r11)
+	mtspr	SPRN_HSRR0,r12
+	hrfid
+
+return_from_opal_call:
+	FIXUP_ENDIAN
+	ld	r0,_LINK(r1)
+	mtlr	r0
+	blr
+
+#define CHECK_HMI_INTERRUPT						\
+	mfspr	r0,SPRN_SRR1;						\
+BEGIN_FTR_SECTION_NESTED(66);						\
+	rlwinm	r0,r0,45-31,0xf;  /* extract wake reason field (P8) */	\
+FTR_SECTION_ELSE_NESTED(66);						\
+	rlwinm	r0,r0,45-31,0xe;  /* P7 wake reason field is 3 bits */	\
+ALT_FTR_SECTION_END_NESTED_IFSET(CPU_FTR_ARCH_207S, 66);		\
+	cmpwi	r0,0xa;			/* Hypervisor maintenance ? */	\
+	bne	20f;							\
+	/* Invoke opal call to handle hmi */				\
+	ld	r2,PACATOC(r13);					\
+	ld	r1,PACAR1(r13);						\
+	std	r3,ORIG_GPR3(r1);	/* Save original r3 */		\
+	li	r3,OPAL_HANDLE_HMI;	/* Pass opal token argument*/	\
+	bl	opal_call_realmode;					\
+	ld	r3,ORIG_GPR3(r1);	/* Restore original r3 */	\
+20:	nop;
+
+
 _GLOBAL(power7_wakeup_tb_loss)
 	ld	r2,PACATOC(r13);
 	ld	r1,PACAR1(r13)
 
+BEGIN_FTR_SECTION
+	CHECK_HMI_INTERRUPT
+END_FTR_SECTION_IFSET(CPU_FTR_HVMODE)
 	/* Time base re-sync */
-	li	r0,OPAL_RESYNC_TIMEBASE
-	LOAD_REG_ADDR(r11,opal);
-	ld	r12,8(r11);
-	ld	r2,0(r11);
-	mtctr	r12
-	bctrl
+	li	r3,OPAL_RESYNC_TIMEBASE
+	bl	opal_call_realmode;
 
 	/* TODO: Check r3 for failure */
 
@@ -163,6 +214,9 @@ _GLOBAL(power7_wakeup_tb_loss)
 
 _GLOBAL(power7_wakeup_loss)
 	ld	r1,PACAR1(r13)
+BEGIN_FTR_SECTION
+	CHECK_HMI_INTERRUPT
+END_FTR_SECTION_IFSET(CPU_FTR_HVMODE)
 	REST_NVGPRS(r1)
 	REST_GPR(2, r1)
 	ld	r3,_CCR(r1)
@@ -178,6 +232,9 @@ _GLOBAL(power7_wakeup_noloss)
 	lbz	r0,PACA_NAPSTATELOST(r13)
 	cmpwi	r0,0
 	bne	power7_wakeup_loss
+BEGIN_FTR_SECTION
+	CHECK_HMI_INTERRUPT
+END_FTR_SECTION_IFSET(CPU_FTR_HVMODE)
 	ld	r1,PACAR1(r13)
 	ld	r4,_MSR(r1)
 	ld	r5,_NIP(r1)
diff --git a/arch/powerpc/kernel/iommu.c b/arch/powerpc/kernel/iommu.c
index 88e3ec6e1d96..f84f799babb1 100644
--- a/arch/powerpc/kernel/iommu.c
+++ b/arch/powerpc/kernel/iommu.c
@@ -1037,7 +1037,7 @@ int iommu_tce_build(struct iommu_table *tbl, unsigned long entry,
 
 	/* if (unlikely(ret))
 		pr_err("iommu_tce: %s failed on hwaddr=%lx ioba=%lx kva=%lx ret=%d\n",
-			__func__, hwaddr, entry << IOMMU_PAGE_SHIFT(tbl),
+			__func__, hwaddr, entry << tbl->it_page_shift,
 				hwaddr, ret); */
 
 	return ret;
@@ -1056,7 +1056,7 @@ int iommu_put_tce_user_mode(struct iommu_table *tbl, unsigned long entry,
 			direction != DMA_TO_DEVICE, &page);
 	if (unlikely(ret != 1)) {
 		/* pr_err("iommu_tce: get_user_pages_fast failed tce=%lx ioba=%lx ret=%d\n",
-				tce, entry << IOMMU_PAGE_SHIFT(tbl), ret); */
+				tce, entry << tbl->it_page_shift, ret); */
 		return -EFAULT;
 	}
 	hwaddr = (unsigned long) page_address(page) + offset;
diff --git a/arch/powerpc/kernel/irq.c b/arch/powerpc/kernel/irq.c
index 248ee7e5bebd..4c5891de162e 100644
--- a/arch/powerpc/kernel/irq.c
+++ b/arch/powerpc/kernel/irq.c
@@ -189,6 +189,11 @@ notrace unsigned int __check_irq_replay(void)
 	}
 #endif /* CONFIG_PPC_BOOK3E */
 
+	/* Check if an hypervisor Maintenance interrupt happened */
+	local_paca->irq_happened &= ~PACA_IRQ_HMI;
+	if (happened & PACA_IRQ_HMI)
+		return 0xe60;
+
 	/* There should be nothing left ! */
 	BUG_ON(local_paca->irq_happened != 0);
 
@@ -377,6 +382,14 @@ int arch_show_interrupts(struct seq_file *p, int prec)
 		seq_printf(p, "%10u ", per_cpu(irq_stat, j).mce_exceptions);
 	seq_printf(p, "  Machine check exceptions\n");
 
+	if (cpu_has_feature(CPU_FTR_HVMODE)) {
+		seq_printf(p, "%*s: ", prec, "HMI");
+		for_each_online_cpu(j)
+			seq_printf(p, "%10u ",
+					per_cpu(irq_stat, j).hmi_exceptions);
+		seq_printf(p, "  Hypervisor Maintenance Interrupts\n");
+	}
+
 #ifdef CONFIG_PPC_DOORBELL
 	if (cpu_has_feature(CPU_FTR_DBELL)) {
 		seq_printf(p, "%*s: ", prec, "DBL");
@@ -400,6 +413,7 @@ u64 arch_irq_stat_cpu(unsigned int cpu)
 	sum += per_cpu(irq_stat, cpu).mce_exceptions;
 	sum += per_cpu(irq_stat, cpu).spurious_irqs;
 	sum += per_cpu(irq_stat, cpu).timer_irqs_others;
+	sum += per_cpu(irq_stat, cpu).hmi_exceptions;
 #ifdef CONFIG_PPC_DOORBELL
 	sum += per_cpu(irq_stat, cpu).doorbell_irqs;
 #endif
diff --git a/arch/powerpc/kernel/process.c b/arch/powerpc/kernel/process.c
index be99774d3f44..bf44ae962ab8 100644
--- a/arch/powerpc/kernel/process.c
+++ b/arch/powerpc/kernel/process.c
@@ -1095,6 +1095,23 @@ int arch_dup_task_struct(struct task_struct *dst, struct task_struct *src)
 	return 0;
 }
 
+static void setup_ksp_vsid(struct task_struct *p, unsigned long sp)
+{
+#ifdef CONFIG_PPC_STD_MMU_64
+	unsigned long sp_vsid;
+	unsigned long llp = mmu_psize_defs[mmu_linear_psize].sllp;
+
+	if (mmu_has_feature(MMU_FTR_1T_SEGMENT))
+		sp_vsid = get_kernel_vsid(sp, MMU_SEGSIZE_1T)
+			<< SLB_VSID_SHIFT_1T;
+	else
+		sp_vsid = get_kernel_vsid(sp, MMU_SEGSIZE_256M)
+			<< SLB_VSID_SHIFT;
+	sp_vsid |= SLB_VSID_KERNEL | llp;
+	p->thread.ksp_vsid = sp_vsid;
+#endif
+}
+
 /*
  * Copy a thread..
  */
@@ -1174,21 +1191,8 @@ int copy_thread(unsigned long clone_flags, unsigned long usp,
 	p->thread.vr_save_area = NULL;
 #endif
 
-#ifdef CONFIG_PPC_STD_MMU_64
-	if (mmu_has_feature(MMU_FTR_SLB)) {
-		unsigned long sp_vsid;
-		unsigned long llp = mmu_psize_defs[mmu_linear_psize].sllp;
+	setup_ksp_vsid(p, sp);
 
-		if (mmu_has_feature(MMU_FTR_1T_SEGMENT))
-			sp_vsid = get_kernel_vsid(sp, MMU_SEGSIZE_1T)
-				<< SLB_VSID_SHIFT_1T;
-		else
-			sp_vsid = get_kernel_vsid(sp, MMU_SEGSIZE_256M)
-				<< SLB_VSID_SHIFT;
-		sp_vsid |= SLB_VSID_KERNEL | llp;
-		p->thread.ksp_vsid = sp_vsid;
-	}
-#endif /* CONFIG_PPC_STD_MMU_64 */
 #ifdef CONFIG_PPC64 
 	if (cpu_has_feature(CPU_FTR_DSCR)) {
 		p->thread.dscr_inherit = current->thread.dscr_inherit;
@@ -1577,7 +1581,7 @@ void show_stack(struct task_struct *tsk, unsigned long *stack)
 			struct pt_regs *regs = (struct pt_regs *)
 				(sp + STACK_FRAME_OVERHEAD);
 			lr = regs->link;
-			printk("--- Exception: %lx at %pS\n    LR = %pS\n",
+			printk("--- interrupt: %lx at %pS\n    LR = %pS\n",
 			       regs->trap, (void *)regs->nip, (void *)lr);
 			firstframe = 1;
 		}
diff --git a/arch/powerpc/kernel/prom.c b/arch/powerpc/kernel/prom.c
index b694b0730971..1a3b1055f5eb 100644
--- a/arch/powerpc/kernel/prom.c
+++ b/arch/powerpc/kernel/prom.c
@@ -155,7 +155,6 @@ static struct ibm_pa_feature {
 } ibm_pa_features[] __initdata = {
 	{0, 0, PPC_FEATURE_HAS_MMU,	0, 0, 0},
 	{0, 0, PPC_FEATURE_HAS_FPU,	0, 1, 0},
-	{0, MMU_FTR_SLB, 0,		0, 2, 0},
 	{CPU_FTR_CTRL, 0, 0,		0, 3, 0},
 	{CPU_FTR_NOEXECUTE, 0, 0,	0, 6, 0},
 	{CPU_FTR_NODSISRALIGN, 0, 0,	1, 1, 1},
@@ -309,12 +308,10 @@ static int __init early_init_dt_scan_cpus(unsigned long node,
 
 	/* Get physical cpuid */
 	intserv = of_get_flat_dt_prop(node, "ibm,ppc-interrupt-server#s", &len);
-	if (intserv) {
-		nthreads = len / sizeof(int);
-	} else {
-		intserv = of_get_flat_dt_prop(node, "reg", NULL);
-		nthreads = 1;
-	}
+	if (!intserv)
+		intserv = of_get_flat_dt_prop(node, "reg", &len);
+
+	nthreads = len / sizeof(int);
 
 	/*
 	 * Now see if any of these threads match our boot cpu.
diff --git a/arch/powerpc/kernel/setup-common.c b/arch/powerpc/kernel/setup-common.c
index e5b022c55ccd..1b0e26013a62 100644
--- a/arch/powerpc/kernel/setup-common.c
+++ b/arch/powerpc/kernel/setup-common.c
@@ -456,18 +456,20 @@ void __init smp_setup_cpu_maps(void)
 		intserv = of_get_property(dn, "ibm,ppc-interrupt-server#s",
 				&len);
 		if (intserv) {
-			nthreads = len / sizeof(int);
 			DBG("    ibm,ppc-interrupt-server#s -> %d threads\n",
 			    nthreads);
 		} else {
 			DBG("    no ibm,ppc-interrupt-server#s -> 1 thread\n");
-			intserv = of_get_property(dn, "reg", NULL);
+			intserv = of_get_property(dn, "reg", &len);
 			if (!intserv) {
 				cpu_be = cpu_to_be32(cpu);
 				intserv = &cpu_be;	/* assume logical == phys */
+				len = 4;
 			}
 		}
 
+		nthreads = len / sizeof(int);
+
 		for (j = 0; j < nthreads && cpu < nr_cpu_ids; j++) {
 			bool avail;
 
diff --git a/arch/powerpc/kernel/setup_64.c b/arch/powerpc/kernel/setup_64.c
index ee082d771178..d0225572faa1 100644
--- a/arch/powerpc/kernel/setup_64.c
+++ b/arch/powerpc/kernel/setup_64.c
@@ -201,7 +201,11 @@ static void cpu_ready_for_interrupts(void)
 	/* Set IR and DR in PACA MSR */
 	get_paca()->kernel_msr = MSR_KERNEL;
 
-	/* Enable AIL if supported */
+	/*
+	 * Enable AIL if supported, and we are in hypervisor mode. If we are
+	 * not in hypervisor mode, we enable relocation-on interrupts later
+	 * in pSeries_setup_arch() using the H_SET_MODE hcall.
+	 */
 	if (cpu_has_feature(CPU_FTR_HVMODE) &&
 	    cpu_has_feature(CPU_FTR_ARCH_207S)) {
 		unsigned long lpcr = mfspr(SPRN_LPCR);
@@ -507,7 +511,11 @@ void __init setup_system(void)
 	check_smt_enabled();
 	setup_tlb_core_data();
 
-#ifdef CONFIG_SMP
+	/*
+	 * Freescale Book3e parts spin in a loop provided by firmware,
+	 * so smp_release_cpus() does nothing for them
+	 */
+#if defined(CONFIG_SMP) && !defined(CONFIG_PPC_FSL_BOOK3E)
 	/* Release secondary cpus out of their spinloops at 0x60 now that
 	 * we can map physical -> logical CPU ids
 	 */
@@ -673,9 +681,6 @@ void __init setup_arch(char **cmdline_p)
 	exc_lvl_early_init();
 	emergency_stack_init();
 
-#ifdef CONFIG_PPC_STD_MMU_64
-	stabs_alloc();
-#endif
 	/* set up the bootmem stuff with available memory */
 	do_init_bootmem();
 	sparse_init();
diff --git a/arch/powerpc/kernel/systbl.S b/arch/powerpc/kernel/systbl.S
index 895c50ca943c..7ab5d434e2ee 100644
--- a/arch/powerpc/kernel/systbl.S
+++ b/arch/powerpc/kernel/systbl.S
@@ -39,9 +39,6 @@
 .section .rodata,"a"
 
 #ifdef CONFIG_PPC64
-#define sys_sigpending	sys_ni_syscall
-#define sys_old_getrlimit sys_ni_syscall
-
 	.p2align	3
 #endif
 
diff --git a/arch/powerpc/kernel/traps.c b/arch/powerpc/kernel/traps.c
index 239f1cde3fff..0dc43f9932cf 100644
--- a/arch/powerpc/kernel/traps.c
+++ b/arch/powerpc/kernel/traps.c
@@ -302,6 +302,16 @@ long machine_check_early(struct pt_regs *regs)
 	return handled;
 }
 
+long hmi_exception_realmode(struct pt_regs *regs)
+{
+	__get_cpu_var(irq_stat).hmi_exceptions++;
+
+	if (ppc_md.hmi_exception_early)
+		ppc_md.hmi_exception_early(regs);
+
+	return 0;
+}
+
 #endif
 
 /*
@@ -609,7 +619,7 @@ int machine_check_e500(struct pt_regs *regs)
 	if (reason & MCSR_BUS_RBERR)
 		printk("Bus - Read Data Bus Error\n");
 	if (reason & MCSR_BUS_WBERR)
-		printk("Bus - Read Data Bus Error\n");
+		printk("Bus - Write Data Bus Error\n");
 	if (reason & MCSR_BUS_IPERR)
 		printk("Bus - Instruction Parity Error\n");
 	if (reason & MCSR_BUS_RPERR)
@@ -738,6 +748,20 @@ void SMIException(struct pt_regs *regs)
 	die("System Management Interrupt", regs, SIGABRT);
 }
 
+void handle_hmi_exception(struct pt_regs *regs)
+{
+	struct pt_regs *old_regs;
+
+	old_regs = set_irq_regs(regs);
+	irq_enter();
+
+	if (ppc_md.handle_hmi_exception)
+		ppc_md.handle_hmi_exception(regs);
+
+	irq_exit();
+	set_irq_regs(old_regs);
+}
+
 void unknown_exception(struct pt_regs *regs)
 {
 	enum ctx_state prev_state = exception_enter();
diff --git a/arch/powerpc/kvm/book3s_hv_rmhandlers.S b/arch/powerpc/kvm/book3s_hv_rmhandlers.S
index 558a67df8126..7faf8fd05738 100644
--- a/arch/powerpc/kvm/book3s_hv_rmhandlers.S
+++ b/arch/powerpc/kvm/book3s_hv_rmhandlers.S
@@ -159,6 +159,8 @@ END_FTR_SECTION_IFSET(CPU_FTR_ARCH_207S)
 	cmpwi	r12, BOOK3S_INTERRUPT_EXTERNAL
 BEGIN_FTR_SECTION
 	beq	11f
+	cmpwi	cr2, r12, BOOK3S_INTERRUPT_HMI
+	beq	cr2, 14f			/* HMI check */
 END_FTR_SECTION_IFSET(CPU_FTR_ARCH_206)
 
 	/* RFI into the highmem handler, or branch to interrupt handler */
@@ -179,6 +181,10 @@ END_FTR_SECTION_IFSET(CPU_FTR_ARCH_206)
 
 13:	b	machine_check_fwnmi
 
+14:	mtspr	SPRN_HSRR0, r8
+	mtspr	SPRN_HSRR1, r7
+	b	hmi_exception_after_realmode
+
 kvmppc_primary_no_guest:
 	/* We handle this much like a ceded vcpu */
 	/* set our bit in napping_threads */
diff --git a/arch/powerpc/lib/copyuser_64.S b/arch/powerpc/lib/copyuser_64.S
index 0860ee46013c..f09899e35991 100644
--- a/arch/powerpc/lib/copyuser_64.S
+++ b/arch/powerpc/lib/copyuser_64.S
@@ -461,8 +461,7 @@ END_FTR_SECTION_IFCLR(CPU_FTR_UNALIGNED_LD_STD)
 /*
  * Routine to copy a whole page of data, optimized for POWER4.
  * On POWER4 it is more than 50% faster than the simple loop
- * above (following the .Ldst_aligned label) but it runs slightly
- * slower on POWER3.
+ * above (following the .Ldst_aligned label).
  */
 .Lcopy_page_4K:
 	std	r31,-32(1)
diff --git a/arch/powerpc/mm/Makefile b/arch/powerpc/mm/Makefile
index 51230ee6a407..d0130fff20e5 100644
--- a/arch/powerpc/mm/Makefile
+++ b/arch/powerpc/mm/Makefile
@@ -13,9 +13,7 @@ obj-$(CONFIG_PPC_MMU_NOHASH)	+= mmu_context_nohash.o tlb_nohash.o \
 				   tlb_nohash_low.o
 obj-$(CONFIG_PPC_BOOK3E)	+= tlb_low_$(CONFIG_WORD_SIZE)e.o
 hash64-$(CONFIG_PPC_NATIVE)	:= hash_native_64.o
-obj-$(CONFIG_PPC_STD_MMU_64)	+= hash_utils_64.o \
-				   slb_low.o slb.o stab.o \
-				   $(hash64-y)
+obj-$(CONFIG_PPC_STD_MMU_64)	+= hash_utils_64.o slb_low.o slb.o $(hash64-y)
 obj-$(CONFIG_PPC_STD_MMU_32)	+= ppc_mmu_32.o
 obj-$(CONFIG_PPC_STD_MMU)	+= hash_low_$(CONFIG_WORD_SIZE).o \
 				   tlb_hash$(CONFIG_WORD_SIZE).o \
diff --git a/arch/powerpc/mm/hash_utils_64.c b/arch/powerpc/mm/hash_utils_64.c
index 88fdd9d25077..daee7f4e5a14 100644
--- a/arch/powerpc/mm/hash_utils_64.c
+++ b/arch/powerpc/mm/hash_utils_64.c
@@ -243,7 +243,7 @@ int htab_bolt_mapping(unsigned long vstart, unsigned long vend,
 }
 
 #ifdef CONFIG_MEMORY_HOTPLUG
-static int htab_remove_mapping(unsigned long vstart, unsigned long vend,
+int htab_remove_mapping(unsigned long vstart, unsigned long vend,
 		      int psize, int ssize)
 {
 	unsigned long vaddr;
@@ -821,21 +821,14 @@ static void __init htab_initialize(void)
 
 void __init early_init_mmu(void)
 {
-	/* Setup initial STAB address in the PACA */
-	get_paca()->stab_real = __pa((u64)&initial_stab);
-	get_paca()->stab_addr = (u64)&initial_stab;
-
 	/* Initialize the MMU Hash table and create the linear mapping
-	 * of memory. Has to be done before stab/slb initialization as
-	 * this is currently where the page size encoding is obtained
+	 * of memory. Has to be done before SLB initialization as this is
+	 * currently where the page size encoding is obtained.
 	 */
 	htab_initialize();
 
-	/* Initialize stab / SLB management */
-	if (mmu_has_feature(MMU_FTR_SLB))
-		slb_initialize();
-	else
-		stab_initialize(get_paca()->stab_real);
+	/* Initialize SLB management */
+	slb_initialize();
 }
 
 #ifdef CONFIG_SMP
@@ -845,13 +838,8 @@ void early_init_mmu_secondary(void)
 	if (!firmware_has_feature(FW_FEATURE_LPAR))
 		mtspr(SPRN_SDR1, _SDR1);
 
-	/* Initialize STAB/SLB. We use a virtual address as it works
-	 * in real mode on pSeries.
-	 */
-	if (mmu_has_feature(MMU_FTR_SLB))
-		slb_initialize();
-	else
-		stab_initialize(get_paca()->stab_addr);
+	/* Initialize SLB */
+	slb_initialize();
 }
 #endif /* CONFIG_SMP */
 
diff --git a/arch/powerpc/mm/init_64.c b/arch/powerpc/mm/init_64.c
index e3734edffa69..253b4b971c8a 100644
--- a/arch/powerpc/mm/init_64.c
+++ b/arch/powerpc/mm/init_64.c
@@ -175,9 +175,10 @@ static unsigned long __meminit vmemmap_section_start(unsigned long page)
 static int __meminit vmemmap_populated(unsigned long start, int page_size)
 {
 	unsigned long end = start + page_size;
+	start = (unsigned long)(pfn_to_page(vmemmap_section_start(start)));
 
 	for (; start < end; start += (PAGES_PER_SECTION * sizeof(struct page)))
-		if (pfn_valid(vmemmap_section_start(start)))
+		if (pfn_valid(page_to_pfn((struct page *)start)))
 			return 1;
 
 	return 0;
@@ -212,6 +213,13 @@ static void __meminit vmemmap_create_mapping(unsigned long start,
 	for (i = 0; i < page_size; i += PAGE_SIZE)
 		BUG_ON(map_kernel_page(start + i, phys, flags));
 }
+
+#ifdef CONFIG_MEMORY_HOTPLUG
+static void vmemmap_remove_mapping(unsigned long start,
+				   unsigned long page_size)
+{
+}
+#endif
 #else /* CONFIG_PPC_BOOK3E */
 static void __meminit vmemmap_create_mapping(unsigned long start,
 					     unsigned long page_size,
@@ -223,17 +231,42 @@ static void __meminit vmemmap_create_mapping(unsigned long start,
 					mmu_kernel_ssize);
 	BUG_ON(mapped < 0);
 }
+
+#ifdef CONFIG_MEMORY_HOTPLUG
+extern int htab_remove_mapping(unsigned long vstart, unsigned long vend,
+			int psize, int ssize);
+
+static void vmemmap_remove_mapping(unsigned long start,
+				   unsigned long page_size)
+{
+	int mapped = htab_remove_mapping(start, start + page_size,
+					 mmu_vmemmap_psize,
+					 mmu_kernel_ssize);
+	BUG_ON(mapped < 0);
+}
+#endif
+
 #endif /* CONFIG_PPC_BOOK3E */
 
 struct vmemmap_backing *vmemmap_list;
+static struct vmemmap_backing *next;
+static int num_left;
+static int num_freed;
 
 static __meminit struct vmemmap_backing * vmemmap_list_alloc(int node)
 {
-	static struct vmemmap_backing *next;
-	static int num_left;
+	struct vmemmap_backing *vmem_back;
+	/* get from freed entries first */
+	if (num_freed) {
+		num_freed--;
+		vmem_back = next;
+		next = next->list;
+
+		return vmem_back;
+	}
 
 	/* allocate a page when required and hand out chunks */
-	if (!next || !num_left) {
+	if (!num_left) {
 		next = vmemmap_alloc_block(PAGE_SIZE, node);
 		if (unlikely(!next)) {
 			WARN_ON(1);
@@ -296,10 +329,85 @@ int __meminit vmemmap_populate(unsigned long start, unsigned long end, int node)
 	return 0;
 }
 
-void vmemmap_free(unsigned long start, unsigned long end)
+#ifdef CONFIG_MEMORY_HOTPLUG
+static unsigned long vmemmap_list_free(unsigned long start)
 {
+	struct vmemmap_backing *vmem_back, *vmem_back_prev;
+
+	vmem_back_prev = vmem_back = vmemmap_list;
+
+	/* look for it with prev pointer recorded */
+	for (; vmem_back; vmem_back = vmem_back->list) {
+		if (vmem_back->virt_addr == start)
+			break;
+		vmem_back_prev = vmem_back;
+	}
+
+	if (unlikely(!vmem_back)) {
+		WARN_ON(1);
+		return 0;
+	}
+
+	/* remove it from vmemmap_list */
+	if (vmem_back == vmemmap_list) /* remove head */
+		vmemmap_list = vmem_back->list;
+	else
+		vmem_back_prev->list = vmem_back->list;
+
+	/* next point to this freed entry */
+	vmem_back->list = next;
+	next = vmem_back;
+	num_freed++;
+
+	return vmem_back->phys;
 }
 
+void __ref vmemmap_free(unsigned long start, unsigned long end)
+{
+	unsigned long page_size = 1 << mmu_psize_defs[mmu_vmemmap_psize].shift;
+
+	start = _ALIGN_DOWN(start, page_size);
+
+	pr_debug("vmemmap_free %lx...%lx\n", start, end);
+
+	for (; start < end; start += page_size) {
+		unsigned long addr;
+
+		/*
+		 * the section has already be marked as invalid, so
+		 * vmemmap_populated() true means some other sections still
+		 * in this page, so skip it.
+		 */
+		if (vmemmap_populated(start, page_size))
+			continue;
+
+		addr = vmemmap_list_free(start);
+		if (addr) {
+			struct page *page = pfn_to_page(addr >> PAGE_SHIFT);
+
+			if (PageReserved(page)) {
+				/* allocated from bootmem */
+				if (page_size < PAGE_SIZE) {
+					/*
+					 * this shouldn't happen, but if it is
+					 * the case, leave the memory there
+					 */
+					WARN_ON_ONCE(1);
+				} else {
+					unsigned int nr_pages =
+						1 << get_order(page_size);
+					while (nr_pages--)
+						free_reserved_page(page++);
+				}
+			} else
+				free_pages((unsigned long)(__va(addr)),
+							get_order(page_size));
+
+			vmemmap_remove_mapping(start, page_size);
+		}
+	}
+}
+#endif
 void register_page_bootmem_memmap(unsigned long section_nr,
 				  struct page *start_page, unsigned long size)
 {
@@ -331,16 +439,16 @@ struct page *realmode_pfn_to_page(unsigned long pfn)
 		if (pg_va < vmem_back->virt_addr)
 			continue;
 
-		/* Check that page struct is not split between real pages */
-		if ((pg_va + sizeof(struct page)) >
-				(vmem_back->virt_addr + page_size))
-			return NULL;
-
-		page = (struct page *) (vmem_back->phys + pg_va -
+		/* After vmemmap_list entry free is possible, need check all */
+		if ((pg_va + sizeof(struct page)) <=
+				(vmem_back->virt_addr + page_size)) {
+			page = (struct page *) (vmem_back->phys + pg_va -
 				vmem_back->virt_addr);
-		return page;
+			return page;
+		}
 	}
 
+	/* Probably that page struct is split between real pages */
 	return NULL;
 }
 EXPORT_SYMBOL_GPL(realmode_pfn_to_page);
diff --git a/arch/powerpc/mm/mmu_context_hash32.c b/arch/powerpc/mm/mmu_context_hash32.c
index 78fef6726e10..aa5a7fd89461 100644
--- a/arch/powerpc/mm/mmu_context_hash32.c
+++ b/arch/powerpc/mm/mmu_context_hash32.c
@@ -2,7 +2,7 @@
  * This file contains the routines for handling the MMU on those
  * PowerPC implementations where the MMU substantially follows the
  * architecture specification.  This includes the 6xx, 7xx, 7xxx,
- * 8260, and POWER3 implementations but excludes the 8xx and 4xx.
+ * and 8260 implementations but excludes the 8xx and 4xx.
  *  -- paulus
  *
  *  Derived from arch/ppc/mm/init.c:
diff --git a/arch/powerpc/mm/numa.c b/arch/powerpc/mm/numa.c
index 3b181b22cd46..d3e9a78eaed3 100644
--- a/arch/powerpc/mm/numa.c
+++ b/arch/powerpc/mm/numa.c
@@ -611,8 +611,8 @@ static int cpu_numa_callback(struct notifier_block *nfb, unsigned long action,
 	case CPU_UP_CANCELED:
 	case CPU_UP_CANCELED_FROZEN:
 		unmap_cpu_from_node(lcpu);
-		break;
 		ret = NOTIFY_OK;
+		break;
 #endif
 	}
 	return ret;
diff --git a/arch/powerpc/mm/pgtable_32.c b/arch/powerpc/mm/pgtable_32.c
index 343a87fa78b5..cf11342bf519 100644
--- a/arch/powerpc/mm/pgtable_32.c
+++ b/arch/powerpc/mm/pgtable_32.c
@@ -41,7 +41,7 @@ unsigned long ioremap_base;
 unsigned long ioremap_bot;
 EXPORT_SYMBOL(ioremap_bot);	/* aka VMALLOC_END */
 
-#if defined(CONFIG_6xx) || defined(CONFIG_POWER3)
+#ifdef CONFIG_6xx
 #define HAVE_BATS	1
 #endif
 
diff --git a/arch/powerpc/mm/pgtable_64.c b/arch/powerpc/mm/pgtable_64.c
index f6ce1f111f5b..3b3c4d34c7a0 100644
--- a/arch/powerpc/mm/pgtable_64.c
+++ b/arch/powerpc/mm/pgtable_64.c
@@ -68,7 +68,7 @@
 unsigned long ioremap_bot = IOREMAP_BASE;
 
 #ifdef CONFIG_PPC_MMU_NOHASH
-static void *early_alloc_pgtable(unsigned long size)
+static __ref void *early_alloc_pgtable(unsigned long size)
 {
 	void *pt;
 
diff --git a/arch/powerpc/mm/ppc_mmu_32.c b/arch/powerpc/mm/ppc_mmu_32.c
index 11571e118831..5029dc19b517 100644
--- a/arch/powerpc/mm/ppc_mmu_32.c
+++ b/arch/powerpc/mm/ppc_mmu_32.c
@@ -2,7 +2,7 @@
  * This file contains the routines for handling the MMU on those
  * PowerPC implementations where the MMU substantially follows the
  * architecture specification.  This includes the 6xx, 7xx, 7xxx,
- * 8260, and POWER3 implementations but excludes the 8xx and 4xx.
+ * and 8260 implementations but excludes the 8xx and 4xx.
  *  -- paulus
  *
  *  Derived from arch/ppc/mm/init.c:
diff --git a/arch/powerpc/mm/stab.c b/arch/powerpc/mm/stab.c
deleted file mode 100644
index 3f8efa6f2997..000000000000
--- a/arch/powerpc/mm/stab.c
+++ /dev/null
@@ -1,286 +0,0 @@
-/*
- * PowerPC64 Segment Translation Support.
- *
- * Dave Engebretsen and Mike Corrigan {engebret|mikejc}@us.ibm.com
- *    Copyright (c) 2001 Dave Engebretsen
- *
- * Copyright (C) 2002 Anton Blanchard <anton@au.ibm.com>, IBM
- *
- *      This program is free software; you can redistribute it and/or
- *      modify it under the terms of the GNU General Public License
- *      as published by the Free Software Foundation; either version
- *      2 of the License, or (at your option) any later version.
- */
-
-#include <linux/memblock.h>
-
-#include <asm/pgtable.h>
-#include <asm/mmu.h>
-#include <asm/mmu_context.h>
-#include <asm/paca.h>
-#include <asm/cputable.h>
-#include <asm/prom.h>
-
-struct stab_entry {
-	unsigned long esid_data;
-	unsigned long vsid_data;
-};
-
-#define NR_STAB_CACHE_ENTRIES 8
-static DEFINE_PER_CPU(long, stab_cache_ptr);
-static DEFINE_PER_CPU(long [NR_STAB_CACHE_ENTRIES], stab_cache);
-
-/*
- * Create a segment table entry for the given esid/vsid pair.
- */
-static int make_ste(unsigned long stab, unsigned long esid, unsigned long vsid)
-{
-	unsigned long esid_data, vsid_data;
-	unsigned long entry, group, old_esid, castout_entry, i;
-	unsigned int global_entry;
-	struct stab_entry *ste, *castout_ste;
-	unsigned long kernel_segment = (esid << SID_SHIFT) >= PAGE_OFFSET;
-
-	vsid_data = vsid << STE_VSID_SHIFT;
-	esid_data = esid << SID_SHIFT | STE_ESID_KP | STE_ESID_V;
-	if (! kernel_segment)
-		esid_data |= STE_ESID_KS;
-
-	/* Search the primary group first. */
-	global_entry = (esid & 0x1f) << 3;
-	ste = (struct stab_entry *)(stab | ((esid & 0x1f) << 7));
-
-	/* Find an empty entry, if one exists. */
-	for (group = 0; group < 2; group++) {
-		for (entry = 0; entry < 8; entry++, ste++) {
-			if (!(ste->esid_data & STE_ESID_V)) {
-				ste->vsid_data = vsid_data;
-				eieio();
-				ste->esid_data = esid_data;
-				return (global_entry | entry);
-			}
-		}
-		/* Now search the secondary group. */
-		global_entry = ((~esid) & 0x1f) << 3;
-		ste = (struct stab_entry *)(stab | (((~esid) & 0x1f) << 7));
-	}
-
-	/*
-	 * Could not find empty entry, pick one with a round robin selection.
-	 * Search all entries in the two groups.
-	 */
-	castout_entry = get_paca()->stab_rr;
-	for (i = 0; i < 16; i++) {
-		if (castout_entry < 8) {
-			global_entry = (esid & 0x1f) << 3;
-			ste = (struct stab_entry *)(stab | ((esid & 0x1f) << 7));
-			castout_ste = ste + castout_entry;
-		} else {
-			global_entry = ((~esid) & 0x1f) << 3;
-			ste = (struct stab_entry *)(stab | (((~esid) & 0x1f) << 7));
-			castout_ste = ste + (castout_entry - 8);
-		}
-
-		/* Dont cast out the first kernel segment */
-		if ((castout_ste->esid_data & ESID_MASK) != PAGE_OFFSET)
-			break;
-
-		castout_entry = (castout_entry + 1) & 0xf;
-	}
-
-	get_paca()->stab_rr = (castout_entry + 1) & 0xf;
-
-	/* Modify the old entry to the new value. */
-
-	/* Force previous translations to complete. DRENG */
-	asm volatile("isync" : : : "memory");
-
-	old_esid = castout_ste->esid_data >> SID_SHIFT;
-	castout_ste->esid_data = 0;		/* Invalidate old entry */
-
-	asm volatile("sync" : : : "memory");    /* Order update */
-
-	castout_ste->vsid_data = vsid_data;
-	eieio();				/* Order update */
-	castout_ste->esid_data = esid_data;
-
-	asm volatile("slbie  %0" : : "r" (old_esid << SID_SHIFT));
-	/* Ensure completion of slbie */
-	asm volatile("sync" : : : "memory");
-
-	return (global_entry | (castout_entry & 0x7));
-}
-
-/*
- * Allocate a segment table entry for the given ea and mm
- */
-static int __ste_allocate(unsigned long ea, struct mm_struct *mm)
-{
-	unsigned long vsid;
-	unsigned char stab_entry;
-	unsigned long offset;
-
-	/* Kernel or user address? */
-	if (is_kernel_addr(ea)) {
-		vsid = get_kernel_vsid(ea, MMU_SEGSIZE_256M);
-	} else {
-		if ((ea >= TASK_SIZE_USER64) || (! mm))
-			return 1;
-
-		vsid = get_vsid(mm->context.id, ea, MMU_SEGSIZE_256M);
-	}
-
-	stab_entry = make_ste(get_paca()->stab_addr, GET_ESID(ea), vsid);
-
-	if (!is_kernel_addr(ea)) {
-		offset = __get_cpu_var(stab_cache_ptr);
-		if (offset < NR_STAB_CACHE_ENTRIES)
-			__get_cpu_var(stab_cache[offset++]) = stab_entry;
-		else
-			offset = NR_STAB_CACHE_ENTRIES+1;
-		__get_cpu_var(stab_cache_ptr) = offset;
-
-		/* Order update */
-		asm volatile("sync":::"memory");
-	}
-
-	return 0;
-}
-
-int ste_allocate(unsigned long ea)
-{
-	return __ste_allocate(ea, current->mm);
-}
-
-/*
- * Do the segment table work for a context switch: flush all user
- * entries from the table, then preload some probably useful entries
- * for the new task
- */
-void switch_stab(struct task_struct *tsk, struct mm_struct *mm)
-{
-	struct stab_entry *stab = (struct stab_entry *) get_paca()->stab_addr;
-	struct stab_entry *ste;
-	unsigned long offset;
-	unsigned long pc = KSTK_EIP(tsk);
-	unsigned long stack = KSTK_ESP(tsk);
-	unsigned long unmapped_base;
-
-	/* Force previous translations to complete. DRENG */
-	asm volatile("isync" : : : "memory");
-
-	/*
-	 * We need interrupts hard-disabled here, not just soft-disabled,
-	 * so that a PMU interrupt can't occur, which might try to access
-	 * user memory (to get a stack trace) and possible cause an STAB miss
-	 * which would update the stab_cache/stab_cache_ptr per-cpu variables.
-	 */
-	hard_irq_disable();
-
-	offset = __get_cpu_var(stab_cache_ptr);
-	if (offset <= NR_STAB_CACHE_ENTRIES) {
-		int i;
-
-		for (i = 0; i < offset; i++) {
-			ste = stab + __get_cpu_var(stab_cache[i]);
-			ste->esid_data = 0; /* invalidate entry */
-		}
-	} else {
-		unsigned long entry;
-
-		/* Invalidate all entries. */
-		ste = stab;
-
-		/* Never flush the first entry. */
-		ste += 1;
-		for (entry = 1;
-		     entry < (HW_PAGE_SIZE / sizeof(struct stab_entry));
-		     entry++, ste++) {
-			unsigned long ea;
-			ea = ste->esid_data & ESID_MASK;
-			if (!is_kernel_addr(ea)) {
-				ste->esid_data = 0;
-			}
-		}
-	}
-
-	asm volatile("sync; slbia; sync":::"memory");
-
-	__get_cpu_var(stab_cache_ptr) = 0;
-
-	/* Now preload some entries for the new task */
-	if (test_tsk_thread_flag(tsk, TIF_32BIT))
-		unmapped_base = TASK_UNMAPPED_BASE_USER32;
-	else
-		unmapped_base = TASK_UNMAPPED_BASE_USER64;
-
-	__ste_allocate(pc, mm);
-
-	if (GET_ESID(pc) == GET_ESID(stack))
-		return;
-
-	__ste_allocate(stack, mm);
-
-	if ((GET_ESID(pc) == GET_ESID(unmapped_base))
-	    || (GET_ESID(stack) == GET_ESID(unmapped_base)))
-		return;
-
-	__ste_allocate(unmapped_base, mm);
-
-	/* Order update */
-	asm volatile("sync" : : : "memory");
-}
-
-/*
- * Allocate segment tables for secondary CPUs.  These must all go in
- * the first (bolted) segment, so that do_stab_bolted won't get a
- * recursive segment miss on the segment table itself.
- */
-void __init stabs_alloc(void)
-{
-	int cpu;
-
-	if (mmu_has_feature(MMU_FTR_SLB))
-		return;
-
-	for_each_possible_cpu(cpu) {
-		unsigned long newstab;
-
-		if (cpu == 0)
-			continue; /* stab for CPU 0 is statically allocated */
-
-		newstab = memblock_alloc_base(HW_PAGE_SIZE, HW_PAGE_SIZE,
-					 1<<SID_SHIFT);
-		newstab = (unsigned long)__va(newstab);
-
-		memset((void *)newstab, 0, HW_PAGE_SIZE);
-
-		paca[cpu].stab_addr = newstab;
-		paca[cpu].stab_real = __pa(newstab);
-		printk(KERN_INFO "Segment table for CPU %d at 0x%llx "
-		       "virtual, 0x%llx absolute\n",
-		       cpu, paca[cpu].stab_addr, paca[cpu].stab_real);
-	}
-}
-
-/*
- * Build an entry for the base kernel segment and put it into
- * the segment table or SLB.  All other segment table or SLB
- * entries are faulted in.
- */
-void stab_initialize(unsigned long stab)
-{
-	unsigned long vsid = get_kernel_vsid(PAGE_OFFSET, MMU_SEGSIZE_256M);
-	unsigned long stabreal;
-
-	asm volatile("isync; slbia; isync":::"memory");
-	make_ste(stab, GET_ESID(PAGE_OFFSET), vsid);
-
-	/* Order update */
-	asm volatile("sync":::"memory");
-
-	/* Set ASR */
-	stabreal = get_paca()->stab_real | 0x1ul;
-
-	mtspr(SPRN_ASR, stabreal);
-}
diff --git a/arch/powerpc/mm/tlb_low_64e.S b/arch/powerpc/mm/tlb_low_64e.S
index 356e8b41fb09..89bf95bd63b1 100644
--- a/arch/powerpc/mm/tlb_low_64e.S
+++ b/arch/powerpc/mm/tlb_low_64e.S
@@ -296,9 +296,12 @@ itlb_miss_fault_bolted:
  * r14 = page table base
  * r13 = PACA
  * r11 = tlb_per_core ptr
- * r10 = cpu number
+ * r10 = crap (free to use)
  */
 tlb_miss_common_e6500:
+	crmove	cr2*4+2,cr0*4+2		/* cr2.eq != 0 if kernel address */
+
+BEGIN_FTR_SECTION		/* CPU_FTR_SMT */
 	/*
 	 * Search if we already have an indirect entry for that virtual
 	 * address, and if we do, bail out.
@@ -309,6 +312,7 @@ tlb_miss_common_e6500:
 	lhz	r10,PACAPACAINDEX(r13)
 	cmpdi	r15,0
 	cmpdi	cr1,r15,1	/* set cr1.eq = 0 for non-recursive */
+	addi	r10,r10,1
 	bne	2f
 	stbcx.	r10,0,r11
 	bne	1b
@@ -322,18 +326,62 @@ tlb_miss_common_e6500:
 	b	1b
 	.previous
 
+	/*
+	 * Erratum A-008139 says that we can't use tlbwe to change
+	 * an indirect entry in any way (including replacing or
+	 * invalidating) if the other thread could be in the process
+	 * of a lookup.  The workaround is to invalidate the entry
+	 * with tlbilx before overwriting.
+	 */
+
+	lbz	r15,TCD_ESEL_NEXT(r11)
+	rlwinm	r10,r15,16,0xff0000
+	oris	r10,r10,MAS0_TLBSEL(1)@h
+	mtspr	SPRN_MAS0,r10
+	isync
+	tlbre
+	mfspr	r15,SPRN_MAS1
+	andis.	r15,r15,MAS1_VALID@h
+	beq	5f
+
+BEGIN_FTR_SECTION_NESTED(532)
+	mfspr	r10,SPRN_MAS8
+	rlwinm	r10,r10,0,0x80000fff  /* tgs,tlpid -> sgs,slpid */
+	mtspr	SPRN_MAS5,r10
+END_FTR_SECTION_NESTED(CPU_FTR_EMB_HV,CPU_FTR_EMB_HV,532)
+
+	mfspr	r10,SPRN_MAS1
+	rlwinm	r15,r10,0,0x3fff0000  /* tid -> spid */
+	rlwimi	r15,r10,20,0x00000003 /* ind,ts -> sind,sas */
+	mfspr	r10,SPRN_MAS6
+	mtspr	SPRN_MAS6,r15
+
 	mfspr	r15,SPRN_MAS2
+	isync
+	tlbilxva 0,r15
+	isync
+
+	mtspr	SPRN_MAS6,r10
+
+5:
+BEGIN_FTR_SECTION_NESTED(532)
+	li	r10,0
+	mtspr	SPRN_MAS8,r10
+	mtspr	SPRN_MAS5,r10
+END_FTR_SECTION_NESTED(CPU_FTR_EMB_HV,CPU_FTR_EMB_HV,532)
 
 	tlbsx	0,r16
 	mfspr	r10,SPRN_MAS1
-	andis.	r10,r10,MAS1_VALID@h
+	andis.	r15,r10,MAS1_VALID@h
 	bne	tlb_miss_done_e6500
-
-	/* Undo MAS-damage from the tlbsx */
+FTR_SECTION_ELSE
 	mfspr	r10,SPRN_MAS1
+ALT_FTR_SECTION_END_IFSET(CPU_FTR_SMT)
+
 	oris	r10,r10,MAS1_VALID@h
-	mtspr	SPRN_MAS1,r10
-	mtspr	SPRN_MAS2,r15
+	beq	cr2,4f
+	rlwinm	r10,r10,0,16,1		/* Clear TID */
+4:	mtspr	SPRN_MAS1,r10
 
 	/* Now, we need to walk the page tables. First check if we are in
 	 * range.
@@ -394,11 +442,13 @@ tlb_miss_common_e6500:
 
 tlb_miss_done_e6500:
 	.macro	tlb_unlock_e6500
+BEGIN_FTR_SECTION
 	beq	cr1,1f		/* no unlock if lock was recursively grabbed */
 	li	r15,0
 	isync
 	stb	r15,0(r11)
 1:
+END_FTR_SECTION_IFSET(CPU_FTR_SMT)
 	.endm
 
 	tlb_unlock_e6500
@@ -407,12 +457,9 @@ tlb_miss_done_e6500:
 	rfi
 
 tlb_miss_kernel_e6500:
-	mfspr	r10,SPRN_MAS1
 	ld	r14,PACA_KERNELPGD(r13)
-	cmpldi	cr0,r15,8		/* Check for vmalloc region */
-	rlwinm	r10,r10,0,16,1		/* Clear TID */
-	mtspr	SPRN_MAS1,r10
-	beq+	tlb_miss_common_e6500
+	cmpldi	cr1,r15,8		/* Check for vmalloc region */
+	beq+	cr1,tlb_miss_common_e6500
 
 tlb_miss_fault_e6500:
 	tlb_unlock_e6500
diff --git a/arch/powerpc/oprofile/Makefile b/arch/powerpc/oprofile/Makefile
index 751ec7bd5018..cedbbeced632 100644
--- a/arch/powerpc/oprofile/Makefile
+++ b/arch/powerpc/oprofile/Makefile
@@ -14,6 +14,6 @@ oprofile-y := $(DRIVER_OBJS) common.o backtrace.o
 oprofile-$(CONFIG_OPROFILE_CELL) += op_model_cell.o \
 		cell/spu_profiler.o cell/vma_map.o \
 		cell/spu_task_sync.o
-oprofile-$(CONFIG_PPC_BOOK3S_64) += op_model_rs64.o op_model_power4.o op_model_pa6t.o
+oprofile-$(CONFIG_PPC_BOOK3S_64) += op_model_power4.o op_model_pa6t.o
 oprofile-$(CONFIG_FSL_EMB_PERFMON) += op_model_fsl_emb.o
 oprofile-$(CONFIG_6xx) += op_model_7450.o
diff --git a/arch/powerpc/oprofile/common.c b/arch/powerpc/oprofile/common.c
index c77348c5d463..bf094c5a4bd9 100644
--- a/arch/powerpc/oprofile/common.c
+++ b/arch/powerpc/oprofile/common.c
@@ -205,9 +205,6 @@ int __init oprofile_arch_init(struct oprofile_operations *ops)
 			ops->sync_stop = model->sync_stop;
 			break;
 #endif
-		case PPC_OPROFILE_RS64:
-			model = &op_model_rs64;
-			break;
 		case PPC_OPROFILE_POWER4:
 			model = &op_model_power4;
 			break;
diff --git a/arch/powerpc/oprofile/op_model_rs64.c b/arch/powerpc/oprofile/op_model_rs64.c
deleted file mode 100644
index 7e5b8ed3a1b7..000000000000
--- a/arch/powerpc/oprofile/op_model_rs64.c
+++ /dev/null
@@ -1,222 +0,0 @@
-/*
- * Copyright (C) 2004 Anton Blanchard <anton@au.ibm.com>, IBM
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License
- * as published by the Free Software Foundation; either version
- * 2 of the License, or (at your option) any later version.
- */
-
-#include <linux/oprofile.h>
-#include <linux/smp.h>
-#include <asm/ptrace.h>
-#include <asm/processor.h>
-#include <asm/cputable.h>
-#include <asm/oprofile_impl.h>
-
-#define dbg(args...)
-
-static void ctrl_write(unsigned int i, unsigned int val)
-{
-	unsigned int tmp = 0;
-	unsigned long shift = 0, mask = 0;
-
-	dbg("ctrl_write %d %x\n", i, val);
-
-	switch(i) {
-	case 0:
-		tmp = mfspr(SPRN_MMCR0);
-		shift = 6;
-		mask = 0x7F;
-		break;
-	case 1:
-		tmp = mfspr(SPRN_MMCR0);
-		shift = 0;
-		mask = 0x3F;
-		break;
-	case 2:
-		tmp = mfspr(SPRN_MMCR1);
-		shift = 31 - 4;
-		mask = 0x1F;
-		break;
-	case 3:
-		tmp = mfspr(SPRN_MMCR1);
-		shift = 31 - 9;
-		mask = 0x1F;
-		break;
-	case 4:
-		tmp = mfspr(SPRN_MMCR1);
-		shift = 31 - 14;
-		mask = 0x1F;
-		break;
-	case 5:
-		tmp = mfspr(SPRN_MMCR1);
-		shift = 31 - 19;
-		mask = 0x1F;
-		break;
-	case 6:
-		tmp = mfspr(SPRN_MMCR1);
-		shift = 31 - 24;
-		mask = 0x1F;
-		break;
-	case 7:
-		tmp = mfspr(SPRN_MMCR1);
-		shift = 31 - 28;
-		mask = 0xF;
-		break;
-	}
-
-	tmp = tmp & ~(mask << shift);
-	tmp |= val << shift;
-
-	switch(i) {
-		case 0:
-		case 1:
-			mtspr(SPRN_MMCR0, tmp);
-			break;
-		default:
-			mtspr(SPRN_MMCR1, tmp);
-	}
-
-	dbg("ctrl_write mmcr0 %lx mmcr1 %lx\n", mfspr(SPRN_MMCR0),
-	       mfspr(SPRN_MMCR1));
-}
-
-static unsigned long reset_value[OP_MAX_COUNTER];
-
-static int num_counters;
-
-static int rs64_reg_setup(struct op_counter_config *ctr,
-			   struct op_system_config *sys,
-			   int num_ctrs)
-{
-	int i;
-
-	num_counters = num_ctrs;
-
-	for (i = 0; i < num_counters; ++i)
-		reset_value[i] = 0x80000000UL - ctr[i].count;
-
-	/* XXX setup user and kernel profiling */
-	return 0;
-}
-
-static int rs64_cpu_setup(struct op_counter_config *ctr)
-{
-	unsigned int mmcr0;
-
-	/* reset MMCR0 and set the freeze bit */
-	mmcr0 = MMCR0_FC;
-	mtspr(SPRN_MMCR0, mmcr0);
-
-	/* reset MMCR1, MMCRA */
-	mtspr(SPRN_MMCR1, 0);
-
-	if (cpu_has_feature(CPU_FTR_MMCRA))
-		mtspr(SPRN_MMCRA, 0);
-
-	mmcr0 |= MMCR0_FCM1|MMCR0_PMXE|MMCR0_FCECE;
-	/* Only applies to POWER3, but should be safe on RS64 */
-	mmcr0 |= MMCR0_PMC1CE|MMCR0_PMCjCE;
-	mtspr(SPRN_MMCR0, mmcr0);
-
-	dbg("setup on cpu %d, mmcr0 %lx\n", smp_processor_id(),
-	    mfspr(SPRN_MMCR0));
-	dbg("setup on cpu %d, mmcr1 %lx\n", smp_processor_id(),
-	    mfspr(SPRN_MMCR1));
-
-	return 0;
-}
-
-static int rs64_start(struct op_counter_config *ctr)
-{
-	int i;
-	unsigned int mmcr0;
-
-	/* set the PMM bit (see comment below) */
-	mtmsrd(mfmsr() | MSR_PMM);
-
-	for (i = 0; i < num_counters; ++i) {
-		if (ctr[i].enabled) {
-			classic_ctr_write(i, reset_value[i]);
-			ctrl_write(i, ctr[i].event);
-		} else {
-			classic_ctr_write(i, 0);
-		}
-	}
-
-	mmcr0 = mfspr(SPRN_MMCR0);
-
-	/*
-	 * now clear the freeze bit, counting will not start until we
-	 * rfid from this excetion, because only at that point will
-	 * the PMM bit be cleared
-	 */
-	mmcr0 &= ~MMCR0_FC;
-	mtspr(SPRN_MMCR0, mmcr0);
-
-	dbg("start on cpu %d, mmcr0 %x\n", smp_processor_id(), mmcr0);
-	return 0;
-}
-
-static void rs64_stop(void)
-{
-	unsigned int mmcr0;
-
-	/* freeze counters */
-	mmcr0 = mfspr(SPRN_MMCR0);
-	mmcr0 |= MMCR0_FC;
-	mtspr(SPRN_MMCR0, mmcr0);
-
-	dbg("stop on cpu %d, mmcr0 %x\n", smp_processor_id(), mmcr0);
-
-	mb();
-}
-
-static void rs64_handle_interrupt(struct pt_regs *regs,
-				  struct op_counter_config *ctr)
-{
-	unsigned int mmcr0;
-	int is_kernel;
-	int val;
-	int i;
-	unsigned long pc = mfspr(SPRN_SIAR);
-
-	is_kernel = is_kernel_addr(pc);
-
-	/* set the PMM bit (see comment below) */
-	mtmsrd(mfmsr() | MSR_PMM);
-
-	for (i = 0; i < num_counters; ++i) {
-		val = classic_ctr_read(i);
-		if (val < 0) {
-			if (ctr[i].enabled) {
-				oprofile_add_ext_sample(pc, regs, i, is_kernel);
-				classic_ctr_write(i, reset_value[i]);
-			} else {
-				classic_ctr_write(i, 0);
-			}
-		}
-	}
-
-	mmcr0 = mfspr(SPRN_MMCR0);
-
-	/* reset the perfmon trigger */
-	mmcr0 |= MMCR0_PMXE;
-
-	/*
-	 * now clear the freeze bit, counting will not start until we
-	 * rfid from this exception, because only at that point will
-	 * the PMM bit be cleared
-	 */
-	mmcr0 &= ~MMCR0_FC;
-	mtspr(SPRN_MMCR0, mmcr0);
-}
-
-struct op_powerpc_model op_model_rs64 = {
-	.reg_setup		= rs64_reg_setup,
-	.cpu_setup		= rs64_cpu_setup,
-	.start			= rs64_start,
-	.stop			= rs64_stop,
-	.handle_interrupt	= rs64_handle_interrupt,
-};
diff --git a/arch/powerpc/perf/core-book3s.c b/arch/powerpc/perf/core-book3s.c
index fe52db2eea6a..b7cd00b0171e 100644
--- a/arch/powerpc/perf/core-book3s.c
+++ b/arch/powerpc/perf/core-book3s.c
@@ -36,7 +36,12 @@ struct cpu_hw_events {
 	struct perf_event *event[MAX_HWEVENTS];
 	u64 events[MAX_HWEVENTS];
 	unsigned int flags[MAX_HWEVENTS];
-	unsigned long mmcr[3];
+	/*
+	 * The order of the MMCR array is:
+	 *  - 64-bit, MMCR0, MMCR1, MMCRA, MMCR2
+	 *  - 32-bit, MMCR0, MMCR1, MMCR2
+	 */
+	unsigned long mmcr[4];
 	struct perf_event *limited_counter[MAX_LIMITED_HWCOUNTERS];
 	u8  limited_hwidx[MAX_LIMITED_HWCOUNTERS];
 	u64 alternatives[MAX_HWEVENTS][MAX_EVENT_ALTERNATIVES];
@@ -112,9 +117,9 @@ static bool is_ebb_event(struct perf_event *event) { return false; }
 static int ebb_event_check(struct perf_event *event) { return 0; }
 static void ebb_event_add(struct perf_event *event) { }
 static void ebb_switch_out(unsigned long mmcr0) { }
-static unsigned long ebb_switch_in(bool ebb, unsigned long mmcr0)
+static unsigned long ebb_switch_in(bool ebb, struct cpu_hw_events *cpuhw)
 {
-	return mmcr0;
+	return cpuhw->mmcr[0];
 }
 
 static inline void power_pmu_bhrb_enable(struct perf_event *event) {}
@@ -542,8 +547,10 @@ static void ebb_switch_out(unsigned long mmcr0)
 	current->thread.mmcr2 = mfspr(SPRN_MMCR2) & MMCR2_USER_MASK;
 }
 
-static unsigned long ebb_switch_in(bool ebb, unsigned long mmcr0)
+static unsigned long ebb_switch_in(bool ebb, struct cpu_hw_events *cpuhw)
 {
+	unsigned long mmcr0 = cpuhw->mmcr[0];
+
 	if (!ebb)
 		goto out;
 
@@ -568,7 +575,15 @@ static unsigned long ebb_switch_in(bool ebb, unsigned long mmcr0)
 	mtspr(SPRN_SIAR, current->thread.siar);
 	mtspr(SPRN_SIER, current->thread.sier);
 	mtspr(SPRN_SDAR, current->thread.sdar);
-	mtspr(SPRN_MMCR2, current->thread.mmcr2);
+
+	/*
+	 * Merge the kernel & user values of MMCR2. The semantics we implement
+	 * are that the user MMCR2 can set bits, ie. cause counters to freeze,
+	 * but not clear bits. If a task wants to be able to clear bits, ie.
+	 * unfreeze counters, it should not set exclude_xxx in its events and
+	 * instead manage the MMCR2 entirely by itself.
+	 */
+	mtspr(SPRN_MMCR2, cpuhw->mmcr[3] | current->thread.mmcr2);
 out:
 	return mmcr0;
 }
@@ -915,6 +930,14 @@ static int check_excludes(struct perf_event **ctrs, unsigned int cflags[],
 	int i, n, first;
 	struct perf_event *event;
 
+	/*
+	 * If the PMU we're on supports per event exclude settings then we
+	 * don't need to do any of this logic. NB. This assumes no PMU has both
+	 * per event exclude and limited PMCs.
+	 */
+	if (ppmu->flags & PPMU_ARCH_207S)
+		return 0;
+
 	n = n_prev + n_new;
 	if (n <= 1)
 		return 0;
@@ -1219,28 +1242,31 @@ static void power_pmu_enable(struct pmu *pmu)
 	}
 
 	/*
-	 * Compute MMCR* values for the new set of events
+	 * Clear all MMCR settings and recompute them for the new set of events.
 	 */
+	memset(cpuhw->mmcr, 0, sizeof(cpuhw->mmcr));
+
 	if (ppmu->compute_mmcr(cpuhw->events, cpuhw->n_events, hwc_index,
-			       cpuhw->mmcr)) {
+			       cpuhw->mmcr, cpuhw->event)) {
 		/* shouldn't ever get here */
 		printk(KERN_ERR "oops compute_mmcr failed\n");
 		goto out;
 	}
 
-	/*
-	 * Add in MMCR0 freeze bits corresponding to the
-	 * attr.exclude_* bits for the first event.
-	 * We have already checked that all events have the
-	 * same values for these bits as the first event.
-	 */
-	event = cpuhw->event[0];
-	if (event->attr.exclude_user)
-		cpuhw->mmcr[0] |= MMCR0_FCP;
-	if (event->attr.exclude_kernel)
-		cpuhw->mmcr[0] |= freeze_events_kernel;
-	if (event->attr.exclude_hv)
-		cpuhw->mmcr[0] |= MMCR0_FCHV;
+	if (!(ppmu->flags & PPMU_ARCH_207S)) {
+		/*
+		 * Add in MMCR0 freeze bits corresponding to the attr.exclude_*
+		 * bits for the first event. We have already checked that all
+		 * events have the same value for these bits as the first event.
+		 */
+		event = cpuhw->event[0];
+		if (event->attr.exclude_user)
+			cpuhw->mmcr[0] |= MMCR0_FCP;
+		if (event->attr.exclude_kernel)
+			cpuhw->mmcr[0] |= freeze_events_kernel;
+		if (event->attr.exclude_hv)
+			cpuhw->mmcr[0] |= MMCR0_FCHV;
+	}
 
 	/*
 	 * Write the new configuration to MMCR* with the freeze
@@ -1252,6 +1278,8 @@ static void power_pmu_enable(struct pmu *pmu)
 	mtspr(SPRN_MMCR1, cpuhw->mmcr[1]);
 	mtspr(SPRN_MMCR0, (cpuhw->mmcr[0] & ~(MMCR0_PMC1CE | MMCR0_PMCjCE))
 				| MMCR0_FC);
+	if (ppmu->flags & PPMU_ARCH_207S)
+		mtspr(SPRN_MMCR2, cpuhw->mmcr[3]);
 
 	/*
 	 * Read off any pre-existing events that need to move
@@ -1307,10 +1335,7 @@ static void power_pmu_enable(struct pmu *pmu)
  out_enable:
 	pmao_restore_workaround(ebb);
 
-	if (ppmu->flags & PPMU_ARCH_207S)
-		mtspr(SPRN_MMCR2, 0);
-
-	mmcr0 = ebb_switch_in(ebb, cpuhw->mmcr[0]);
+	mmcr0 = ebb_switch_in(ebb, cpuhw);
 
 	mb();
 	if (cpuhw->bhrb_users)
diff --git a/arch/powerpc/perf/mpc7450-pmu.c b/arch/powerpc/perf/mpc7450-pmu.c
index fe21b515ca44..d115c5635bf3 100644
--- a/arch/powerpc/perf/mpc7450-pmu.c
+++ b/arch/powerpc/perf/mpc7450-pmu.c
@@ -260,8 +260,9 @@ static const u32 pmcsel_mask[N_COUNTER] = {
 /*
  * Compute MMCR0/1/2 values for a set of events.
  */
-static int mpc7450_compute_mmcr(u64 event[], int n_ev,
-				unsigned int hwc[], unsigned long mmcr[])
+static int mpc7450_compute_mmcr(u64 event[], int n_ev, unsigned int hwc[],
+				unsigned long mmcr[],
+				struct perf_event *pevents[])
 {
 	u8 event_index[N_CLASSES][N_COUNTER];
 	int n_classevent[N_CLASSES];
diff --git a/arch/powerpc/perf/power4-pmu.c b/arch/powerpc/perf/power4-pmu.c
index 9103a1de864d..ce6072fa481b 100644
--- a/arch/powerpc/perf/power4-pmu.c
+++ b/arch/powerpc/perf/power4-pmu.c
@@ -356,7 +356,7 @@ static int p4_get_alternatives(u64 event, unsigned int flags, u64 alt[])
 }
 
 static int p4_compute_mmcr(u64 event[], int n_ev,
-			   unsigned int hwc[], unsigned long mmcr[])
+			   unsigned int hwc[], unsigned long mmcr[], struct perf_event *pevents[])
 {
 	unsigned long mmcr0 = 0, mmcr1 = 0, mmcra = 0;
 	unsigned int pmc, unit, byte, psel, lower;
diff --git a/arch/powerpc/perf/power5+-pmu.c b/arch/powerpc/perf/power5+-pmu.c
index b03b6dc0172d..0526dac66007 100644
--- a/arch/powerpc/perf/power5+-pmu.c
+++ b/arch/powerpc/perf/power5+-pmu.c
@@ -452,7 +452,7 @@ static int power5p_marked_instr_event(u64 event)
 }
 
 static int power5p_compute_mmcr(u64 event[], int n_ev,
-				unsigned int hwc[], unsigned long mmcr[])
+				unsigned int hwc[], unsigned long mmcr[], struct perf_event *pevents[])
 {
 	unsigned long mmcr1 = 0;
 	unsigned long mmcra = 0;
diff --git a/arch/powerpc/perf/power5-pmu.c b/arch/powerpc/perf/power5-pmu.c
index 1e8ce423c3af..4dc99f9f7962 100644
--- a/arch/powerpc/perf/power5-pmu.c
+++ b/arch/powerpc/perf/power5-pmu.c
@@ -383,7 +383,7 @@ static int power5_marked_instr_event(u64 event)
 }
 
 static int power5_compute_mmcr(u64 event[], int n_ev,
-			       unsigned int hwc[], unsigned long mmcr[])
+			       unsigned int hwc[], unsigned long mmcr[], struct perf_event *pevents[])
 {
 	unsigned long mmcr1 = 0;
 	unsigned long mmcra = MMCRA_SDAR_DCACHE_MISS | MMCRA_SDAR_ERAT_MISS;
diff --git a/arch/powerpc/perf/power6-pmu.c b/arch/powerpc/perf/power6-pmu.c
index 31128e086fed..9c9d646b68a1 100644
--- a/arch/powerpc/perf/power6-pmu.c
+++ b/arch/powerpc/perf/power6-pmu.c
@@ -175,7 +175,7 @@ static int power6_marked_instr_event(u64 event)
  * Assign PMC numbers and compute MMCR1 value for a set of events
  */
 static int p6_compute_mmcr(u64 event[], int n_ev,
-			   unsigned int hwc[], unsigned long mmcr[])
+			   unsigned int hwc[], unsigned long mmcr[], struct perf_event *pevents[])
 {
 	unsigned long mmcr1 = 0;
 	unsigned long mmcra = MMCRA_SDAR_DCACHE_MISS | MMCRA_SDAR_ERAT_MISS;
diff --git a/arch/powerpc/perf/power7-pmu.c b/arch/powerpc/perf/power7-pmu.c
index 56c67bca2f75..5b62f2389290 100644
--- a/arch/powerpc/perf/power7-pmu.c
+++ b/arch/powerpc/perf/power7-pmu.c
@@ -245,7 +245,7 @@ static int power7_marked_instr_event(u64 event)
 }
 
 static int power7_compute_mmcr(u64 event[], int n_ev,
-			       unsigned int hwc[], unsigned long mmcr[])
+			       unsigned int hwc[], unsigned long mmcr[], struct perf_event *pevents[])
 {
 	unsigned long mmcr1 = 0;
 	unsigned long mmcra = MMCRA_SDAR_DCACHE_MISS | MMCRA_SDAR_ERAT_MISS;
diff --git a/arch/powerpc/perf/power8-pmu.c b/arch/powerpc/perf/power8-pmu.c
index 639cd9156585..396351db601b 100644
--- a/arch/powerpc/perf/power8-pmu.c
+++ b/arch/powerpc/perf/power8-pmu.c
@@ -15,6 +15,7 @@
 #include <linux/kernel.h>
 #include <linux/perf_event.h>
 #include <asm/firmware.h>
+#include <asm/cputable.h>
 
 
 /*
@@ -266,6 +267,11 @@
 #define MMCRA_SDAR_MODE_TLB		(1ull << 42)
 #define MMCRA_IFM_SHIFT			30
 
+/* Bits in MMCR2 for POWER8 */
+#define MMCR2_FCS(pmc)			(1ull << (63 - (((pmc) - 1) * 9)))
+#define MMCR2_FCP(pmc)			(1ull << (62 - (((pmc) - 1) * 9)))
+#define MMCR2_FCH(pmc)			(1ull << (57 - (((pmc) - 1) * 9)))
+
 
 static inline bool event_is_fab_match(u64 event)
 {
@@ -393,9 +399,10 @@ static int power8_get_constraint(u64 event, unsigned long *maskp, unsigned long
 }
 
 static int power8_compute_mmcr(u64 event[], int n_ev,
-			       unsigned int hwc[], unsigned long mmcr[])
+			       unsigned int hwc[], unsigned long mmcr[],
+			       struct perf_event *pevents[])
 {
-	unsigned long mmcra, mmcr1, unit, combine, psel, cache, val;
+	unsigned long mmcra, mmcr1, mmcr2, unit, combine, psel, cache, val;
 	unsigned int pmc, pmc_inuse;
 	int i;
 
@@ -410,7 +417,7 @@ static int power8_compute_mmcr(u64 event[], int n_ev,
 
 	/* In continous sampling mode, update SDAR on TLB miss */
 	mmcra = MMCRA_SDAR_MODE_TLB;
-	mmcr1 = 0;
+	mmcr1 = mmcr2 = 0;
 
 	/* Second pass: assign PMCs, set all MMCR1 fields */
 	for (i = 0; i < n_ev; ++i) {
@@ -472,6 +479,19 @@ static int power8_compute_mmcr(u64 event[], int n_ev,
 			mmcra |= val << MMCRA_IFM_SHIFT;
 		}
 
+		if (pevents[i]->attr.exclude_user)
+			mmcr2 |= MMCR2_FCP(pmc);
+
+		if (pevents[i]->attr.exclude_hv)
+			mmcr2 |= MMCR2_FCH(pmc);
+
+		if (pevents[i]->attr.exclude_kernel) {
+			if (cpu_has_feature(CPU_FTR_HVMODE))
+				mmcr2 |= MMCR2_FCH(pmc);
+			else
+				mmcr2 |= MMCR2_FCS(pmc);
+		}
+
 		hwc[i] = pmc - 1;
 	}
 
@@ -491,6 +511,7 @@ static int power8_compute_mmcr(u64 event[], int n_ev,
 
 	mmcr[1] = mmcr1;
 	mmcr[2] = mmcra;
+	mmcr[3] = mmcr2;
 
 	return 0;
 }
diff --git a/arch/powerpc/perf/ppc970-pmu.c b/arch/powerpc/perf/ppc970-pmu.c
index 20139ceeacf6..8b6a8a36fa38 100644
--- a/arch/powerpc/perf/ppc970-pmu.c
+++ b/arch/powerpc/perf/ppc970-pmu.c
@@ -257,7 +257,7 @@ static int p970_get_alternatives(u64 event, unsigned int flags, u64 alt[])
 }
 
 static int p970_compute_mmcr(u64 event[], int n_ev,
-			     unsigned int hwc[], unsigned long mmcr[])
+			     unsigned int hwc[], unsigned long mmcr[], struct perf_event *pevents[])
 {
 	unsigned long mmcr0 = 0, mmcr1 = 0, mmcra = 0;
 	unsigned int pmc, unit, byte, psel;
diff --git a/arch/powerpc/platforms/85xx/Kconfig b/arch/powerpc/platforms/85xx/Kconfig
index f442120e0033..0c1e6903597e 100644
--- a/arch/powerpc/platforms/85xx/Kconfig
+++ b/arch/powerpc/platforms/85xx/Kconfig
@@ -274,7 +274,7 @@ config CORENET_GENERIC
 	  For 32bit kernel, the following boards are supported:
 	    P2041 RDB, P3041 DS, P4080 DS, kmcoge4, and OCA4080
 	  For 64bit kernel, the following boards are supported:
-	    T4240 QDS and B4 QDS
+	    T208x QDS/RDB, T4240 QDS/RDB and B4 QDS
 	  The following boards are supported for both 32bit and 64bit kernel:
 	    P5020 DS, P5040 DS and T104xQDS
 
diff --git a/arch/powerpc/platforms/85xx/corenet_generic.c b/arch/powerpc/platforms/85xx/corenet_generic.c
index 5db1e117fdde..d22dd85e50bf 100644
--- a/arch/powerpc/platforms/85xx/corenet_generic.c
+++ b/arch/powerpc/platforms/85xx/corenet_generic.c
@@ -119,7 +119,11 @@ static const char * const boards[] __initconst = {
 	"fsl,P4080DS",
 	"fsl,P5020DS",
 	"fsl,P5040DS",
+	"fsl,T2080QDS",
+	"fsl,T2080RDB",
+	"fsl,T2081QDS",
 	"fsl,T4240QDS",
+	"fsl,T4240RDB",
 	"fsl,B4860QDS",
 	"fsl,B4420QDS",
 	"fsl,B4220QDS",
@@ -129,28 +133,14 @@ static const char * const boards[] __initconst = {
 	NULL
 };
 
-static const char * const hv_boards[] __initconst = {
-	"fsl,P2041RDB-hv",
-	"fsl,P3041DS-hv",
-	"fsl,OCA4080-hv",
-	"fsl,P4080DS-hv",
-	"fsl,P5020DS-hv",
-	"fsl,P5040DS-hv",
-	"fsl,T4240QDS-hv",
-	"fsl,B4860QDS-hv",
-	"fsl,B4420QDS-hv",
-	"fsl,B4220QDS-hv",
-	"fsl,T1040QDS-hv",
-	"fsl,T1042QDS-hv",
-	NULL
-};
-
 /*
  * Called very early, device-tree isn't unflattened
  */
 static int __init corenet_generic_probe(void)
 {
 	unsigned long root = of_get_flat_dt_root();
+	char hv_compat[24];
+	int i;
 #ifdef CONFIG_SMP
 	extern struct smp_ops_t smp_85xx_ops;
 #endif
@@ -159,21 +149,26 @@ static int __init corenet_generic_probe(void)
 		return 1;
 
 	/* Check if we're running under the Freescale hypervisor */
-	if (of_flat_dt_match(root, hv_boards)) {
-		ppc_md.init_IRQ = ehv_pic_init;
-		ppc_md.get_irq = ehv_pic_get_irq;
-		ppc_md.restart = fsl_hv_restart;
-		ppc_md.power_off = fsl_hv_halt;
-		ppc_md.halt = fsl_hv_halt;
+	for (i = 0; boards[i]; i++) {
+		snprintf(hv_compat, sizeof(hv_compat), "%s-hv", boards[i]);
+		if (of_flat_dt_is_compatible(root, hv_compat)) {
+			ppc_md.init_IRQ = ehv_pic_init;
+
+			ppc_md.get_irq = ehv_pic_get_irq;
+			ppc_md.restart = fsl_hv_restart;
+			ppc_md.power_off = fsl_hv_halt;
+			ppc_md.halt = fsl_hv_halt;
 #ifdef CONFIG_SMP
-		/*
-		 * Disable the timebase sync operations because we can't write
-		 * to the timebase registers under the hypervisor.
-		  */
-		smp_85xx_ops.give_timebase = NULL;
-		smp_85xx_ops.take_timebase = NULL;
+			/*
+			 * Disable the timebase sync operations because we
+			 * can't write to the timebase registers under the
+			 * hypervisor.
+			 */
+			smp_85xx_ops.give_timebase = NULL;
+			smp_85xx_ops.take_timebase = NULL;
 #endif
-		return 1;
+			return 1;
+		}
 	}
 
 	return 0;
diff --git a/arch/powerpc/platforms/85xx/smp.c b/arch/powerpc/platforms/85xx/smp.c
index ba093f553678..d7c1e69f3070 100644
--- a/arch/powerpc/platforms/85xx/smp.c
+++ b/arch/powerpc/platforms/85xx/smp.c
@@ -28,6 +28,7 @@
 #include <asm/dbell.h>
 #include <asm/fsl_guts.h>
 #include <asm/code-patching.h>
+#include <asm/cputhreads.h>
 
 #include <sysdev/fsl_soc.h>
 #include <sysdev/mpic.h>
@@ -168,6 +169,24 @@ static inline u32 read_spin_table_addr_l(void *spin_table)
 	return in_be32(&((struct epapr_spin_table *)spin_table)->addr_l);
 }
 
+#ifdef CONFIG_PPC64
+static void wake_hw_thread(void *info)
+{
+	void fsl_secondary_thread_init(void);
+	unsigned long imsr1, inia1;
+	int nr = *(const int *)info;
+
+	imsr1 = MSR_KERNEL;
+	inia1 = *(unsigned long *)fsl_secondary_thread_init;
+
+	mttmr(TMRN_IMSR1, imsr1);
+	mttmr(TMRN_INIA1, inia1);
+	mtspr(SPRN_TENS, TEN_THREAD(1));
+
+	smp_generic_kick_cpu(nr);
+}
+#endif
+
 static int smp_85xx_kick_cpu(int nr)
 {
 	unsigned long flags;
@@ -183,6 +202,31 @@ static int smp_85xx_kick_cpu(int nr)
 
 	pr_debug("smp_85xx_kick_cpu: kick CPU #%d\n", nr);
 
+#ifdef CONFIG_PPC64
+	/* Threads don't use the spin table */
+	if (cpu_thread_in_core(nr) != 0) {
+		int primary = cpu_first_thread_sibling(nr);
+
+		if (WARN_ON_ONCE(!cpu_has_feature(CPU_FTR_SMT)))
+			return -ENOENT;
+
+		if (cpu_thread_in_core(nr) != 1) {
+			pr_err("%s: cpu %d: invalid hw thread %d\n",
+			       __func__, nr, cpu_thread_in_core(nr));
+			return -ENOENT;
+		}
+
+		if (!cpu_online(primary)) {
+			pr_err("%s: cpu %d: primary %d not online\n",
+			       __func__, nr, primary);
+			return -ENOENT;
+		}
+
+		smp_call_function_single(primary, wake_hw_thread, &nr, 0);
+		return 0;
+	}
+#endif
+
 	np = of_get_cpu_node(nr, NULL);
 	cpu_rel_addr = of_get_property(np, "cpu-release-addr", NULL);
 
diff --git a/arch/powerpc/platforms/8xx/m8xx_setup.c b/arch/powerpc/platforms/8xx/m8xx_setup.c
index 587a2828b06c..d3037747031d 100644
--- a/arch/powerpc/platforms/8xx/m8xx_setup.c
+++ b/arch/powerpc/platforms/8xx/m8xx_setup.c
@@ -18,7 +18,6 @@
 #include <linux/fsl_devices.h>
 
 #include <asm/io.h>
-#include <asm/mpc8xx.h>
 #include <asm/8xx_immap.h>
 #include <asm/prom.h>
 #include <asm/fs_pd.h>
@@ -28,8 +27,6 @@
 
 #include "mpc8xx.h"
 
-struct mpc8xx_pcmcia_ops m8xx_pcmcia_ops;
-
 extern int cpm_pic_init(void);
 extern int cpm_get_irq(void);
 
diff --git a/arch/powerpc/platforms/8xx/mpc885ads_setup.c b/arch/powerpc/platforms/8xx/mpc885ads_setup.c
index c1262581b63c..5921dcb498fd 100644
--- a/arch/powerpc/platforms/8xx/mpc885ads_setup.c
+++ b/arch/powerpc/platforms/8xx/mpc885ads_setup.c
@@ -35,7 +35,6 @@
 #include <asm/page.h>
 #include <asm/processor.h>
 #include <asm/time.h>
-#include <asm/mpc8xx.h>
 #include <asm/8xx_immap.h>
 #include <asm/cpm1.h>
 #include <asm/fs_pd.h>
@@ -46,61 +45,6 @@
 
 static u32 __iomem *bcsr, *bcsr5;
 
-#ifdef CONFIG_PCMCIA_M8XX
-static void pcmcia_hw_setup(int slot, int enable)
-{
-	if (enable)
-		clrbits32(&bcsr[1], BCSR1_PCCEN);
-	else
-		setbits32(&bcsr[1], BCSR1_PCCEN);
-}
-
-static int pcmcia_set_voltage(int slot, int vcc, int vpp)
-{
-	u32 reg = 0;
-
-	switch (vcc) {
-	case 0:
-		break;
-	case 33:
-		reg |= BCSR1_PCCVCC0;
-		break;
-	case 50:
-		reg |= BCSR1_PCCVCC1;
-		break;
-	default:
-		return 1;
-	}
-
-	switch (vpp) {
-	case 0:
-		break;
-	case 33:
-	case 50:
-		if (vcc == vpp)
-			reg |= BCSR1_PCCVPP1;
-		else
-			return 1;
-		break;
-	case 120:
-		if ((vcc == 33) || (vcc == 50))
-			reg |= BCSR1_PCCVPP0;
-		else
-			return 1;
-	default:
-		return 1;
-	}
-
-	/* first, turn off all power */
-	clrbits32(&bcsr[1], 0x00610000);
-
-	/* enable new powersettings */
-	setbits32(&bcsr[1], reg);
-
-	return 0;
-}
-#endif
-
 struct cpm_pin {
 	int port, pin, flags;
 };
@@ -245,12 +189,6 @@ static void __init mpc885ads_setup_arch(void)
 		of_detach_node(np);
 		of_node_put(np);
 	}
-
-#ifdef CONFIG_PCMCIA_M8XX
-	/* Set up board specific hook-ups.*/
-	m8xx_pcmcia_ops.hw_ctrl = pcmcia_hw_setup;
-	m8xx_pcmcia_ops.voltage_set = pcmcia_set_voltage;
-#endif
 }
 
 static int __init mpc885ads_probe(void)
diff --git a/arch/powerpc/platforms/8xx/tqm8xx_setup.c b/arch/powerpc/platforms/8xx/tqm8xx_setup.c
index 251aba8759e4..dda607807def 100644
--- a/arch/powerpc/platforms/8xx/tqm8xx_setup.c
+++ b/arch/powerpc/platforms/8xx/tqm8xx_setup.c
@@ -37,7 +37,6 @@
 #include <asm/page.h>
 #include <asm/processor.h>
 #include <asm/time.h>
-#include <asm/mpc8xx.h>
 #include <asm/8xx_immap.h>
 #include <asm/cpm1.h>
 #include <asm/fs_pd.h>
diff --git a/arch/powerpc/platforms/Kconfig.cputype b/arch/powerpc/platforms/Kconfig.cputype
index a41bd023647a..e8bc40869cbd 100644
--- a/arch/powerpc/platforms/Kconfig.cputype
+++ b/arch/powerpc/platforms/Kconfig.cputype
@@ -61,7 +61,7 @@ choice
 	help
 	  There are two families of 64 bit PowerPC chips supported.
 	  The most common ones are the desktop and server CPUs
-	  (POWER3, RS64, POWER4, POWER5, POWER5+, POWER6, ...)
+	  (POWER4, POWER5, 970, POWER5+, POWER6, POWER7, POWER8 ...)
 
 	  The other are the "embedded" processors compliant with the
 	  "Book 3E" variant of the architecture
@@ -140,14 +140,6 @@ config 6xx
 	depends on PPC32 && PPC_BOOK3S
 	select PPC_HAVE_PMU_SUPPORT
 
-config POWER3
-	depends on PPC64 && PPC_BOOK3S
-	def_bool y
-
-config POWER4
-	depends on PPC64 && PPC_BOOK3S
-	def_bool y
-
 config TUNE_CELL
 	bool "Optimize for Cell Broadband Engine"
 	depends on PPC64 && PPC_BOOK3S
@@ -244,7 +236,7 @@ config PHYS_64BIT
 
 config ALTIVEC
 	bool "AltiVec Support"
-	depends on 6xx || POWER4 || (PPC_E500MC && PPC64)
+	depends on 6xx || PPC_BOOK3S_64 || (PPC_E500MC && PPC64)
 	---help---
 	  This option enables kernel support for the Altivec extensions to the
 	  PowerPC processor. The kernel currently supports saving and restoring
@@ -260,7 +252,7 @@ config ALTIVEC
 
 config VSX
 	bool "VSX Support"
-	depends on POWER4 && ALTIVEC && PPC_FPU
+	depends on PPC_BOOK3S_64 && ALTIVEC && PPC_FPU
 	---help---
 
 	  This option enables kernel support for the Vector Scaler extensions
@@ -276,7 +268,7 @@ config VSX
 
 config PPC_ICSWX
 	bool "Support for PowerPC icswx coprocessor instruction"
-	depends on POWER4
+	depends on PPC_BOOK3S_64
 	default n
 	---help---
 
@@ -294,7 +286,7 @@ config PPC_ICSWX
 
 config PPC_ICSWX_PID
 	bool "icswx requires direct PID management"
-	depends on PPC_ICSWX && POWER4
+	depends on PPC_ICSWX
 	default y
 	---help---
 	  The PID register in server is used explicitly for ICSWX.  In
diff --git a/arch/powerpc/platforms/powermac/Kconfig b/arch/powerpc/platforms/powermac/Kconfig
index 1afd10f67858..607124bae2e7 100644
--- a/arch/powerpc/platforms/powermac/Kconfig
+++ b/arch/powerpc/platforms/powermac/Kconfig
@@ -10,7 +10,7 @@ config PPC_PMAC
 
 config PPC_PMAC64
 	bool
-	depends on PPC_PMAC && POWER4
+	depends on PPC_PMAC && PPC64
 	select MPIC
 	select U3_DART
 	select MPIC_U3_HT_IRQS
diff --git a/arch/powerpc/platforms/powermac/feature.c b/arch/powerpc/platforms/powermac/feature.c
index 63d82bbc05e9..1413e72bc2e1 100644
--- a/arch/powerpc/platforms/powermac/feature.c
+++ b/arch/powerpc/platforms/powermac/feature.c
@@ -158,7 +158,7 @@ static inline int simple_feature_tweak(struct device_node *node, int type,
 	return 0;
 }
 
-#ifndef CONFIG_POWER4
+#ifndef CONFIG_PPC64
 
 static long ohare_htw_scc_enable(struct device_node *node, long param,
 				 long value)
@@ -1318,7 +1318,7 @@ intrepid_aack_delay_enable(struct device_node *node, long param, long value)
 }
 
 
-#endif /* CONFIG_POWER4 */
+#endif /* CONFIG_PPC64 */
 
 static long
 core99_read_gpio(struct device_node *node, long param, long value)
@@ -1338,7 +1338,7 @@ core99_write_gpio(struct device_node *node, long param, long value)
 	return 0;
 }
 
-#ifdef CONFIG_POWER4
+#ifdef CONFIG_PPC64
 static long g5_gmac_enable(struct device_node *node, long param, long value)
 {
 	struct macio_chip *macio = &macio_chips[0];
@@ -1550,9 +1550,9 @@ void g5_phy_disable_cpu1(void)
 	if (uninorth_maj == 3)
 		UN_OUT(U3_API_PHY_CONFIG_1, 0);
 }
-#endif /* CONFIG_POWER4 */
+#endif /* CONFIG_PPC64 */
 
-#ifndef CONFIG_POWER4
+#ifndef CONFIG_PPC64
 
 
 #ifdef CONFIG_PM
@@ -1864,7 +1864,7 @@ core99_sleep_state(struct device_node *node, long param, long value)
 	return 0;
 }
 
-#endif /* CONFIG_POWER4 */
+#endif /* CONFIG_PPC64 */
 
 static long
 generic_dev_can_wake(struct device_node *node, long param, long value)
@@ -1906,7 +1906,7 @@ static struct feature_table_entry any_features[] = {
 	{ 0, NULL }
 };
 
-#ifndef CONFIG_POWER4
+#ifndef CONFIG_PPC64
 
 /* OHare based motherboards. Currently, we only use these on the
  * 2400,3400 and 3500 series powerbooks. Some older desktops seem
@@ -2056,7 +2056,7 @@ static struct feature_table_entry intrepid_features[] = {
 	{ 0, NULL }
 };
 
-#else /* CONFIG_POWER4 */
+#else /* CONFIG_PPC64 */
 
 /* G5 features
  */
@@ -2074,10 +2074,10 @@ static struct feature_table_entry g5_features[] = {
 	{ 0, NULL }
 };
 
-#endif /* CONFIG_POWER4 */
+#endif /* CONFIG_PPC64 */
 
 static struct pmac_mb_def pmac_mb_defs[] = {
-#ifndef CONFIG_POWER4
+#ifndef CONFIG_PPC64
 	/*
 	 * Desktops
 	 */
@@ -2342,7 +2342,7 @@ static struct pmac_mb_def pmac_mb_defs[] = {
 		PMAC_TYPE_UNKNOWN_INTREPID,	intrepid_features,
 		PMAC_MB_MAY_SLEEP | PMAC_MB_HAS_FW_POWER | PMAC_MB_MOBILE,
 	},
-#else /* CONFIG_POWER4 */
+#else /* CONFIG_PPC64 */
 	{	"PowerMac7,2",			"PowerMac G5",
 		PMAC_TYPE_POWERMAC_G5,		g5_features,
 		0,
@@ -2373,7 +2373,7 @@ static struct pmac_mb_def pmac_mb_defs[] = {
 		0,
 	},
 #endif /* CONFIG_PPC64 */
-#endif /* CONFIG_POWER4 */
+#endif /* CONFIG_PPC64 */
 };
 
 /*
@@ -2441,7 +2441,7 @@ static int __init probe_motherboard(void)
 
 	/* Fallback to selection depending on mac-io chip type */
 	switch(macio->type) {
-#ifndef CONFIG_POWER4
+#ifndef CONFIG_PPC64
 	    case macio_grand_central:
 		pmac_mb.model_id = PMAC_TYPE_PSURGE;
 		pmac_mb.model_name = "Unknown PowerSurge";
@@ -2475,7 +2475,7 @@ static int __init probe_motherboard(void)
 		pmac_mb.model_name = "Unknown Intrepid-based";
 		pmac_mb.features = intrepid_features;
 		break;
-#else /* CONFIG_POWER4 */
+#else /* CONFIG_PPC64 */
 	case macio_keylargo2:
 		pmac_mb.model_id = PMAC_TYPE_UNKNOWN_K2;
 		pmac_mb.model_name = "Unknown K2-based";
@@ -2486,13 +2486,13 @@ static int __init probe_motherboard(void)
 		pmac_mb.model_name = "Unknown Shasta-based";
 		pmac_mb.features = g5_features;
 		break;
-#endif /* CONFIG_POWER4 */
+#endif /* CONFIG_PPC64 */
 	default:
 		ret = -ENODEV;
 		goto done;
 	}
 found:
-#ifndef CONFIG_POWER4
+#ifndef CONFIG_PPC64
 	/* Fixup Hooper vs. Comet */
 	if (pmac_mb.model_id == PMAC_TYPE_HOOPER) {
 		u32 __iomem * mach_id_ptr = ioremap(0xf3000034, 4);
@@ -2546,9 +2546,9 @@ found:
 	 */
 	powersave_lowspeed = 1;
 
-#else /* CONFIG_POWER4 */
+#else /* CONFIG_PPC64 */
 	powersave_nap = 1;
-#endif  /* CONFIG_POWER4 */
+#endif  /* CONFIG_PPC64 */
 
 	/* Check for "mobile" machine */
 	if (model && (strncmp(model, "PowerBook", 9) == 0
@@ -2786,7 +2786,7 @@ set_initial_features(void)
 		MACIO_BIS(OHARE_FCR, OH_IOBUS_ENABLE);
 	}
 
-#ifdef CONFIG_POWER4
+#ifdef CONFIG_PPC64
 	if (macio_chips[0].type == macio_keylargo2 ||
 	    macio_chips[0].type == macio_shasta) {
 #ifndef CONFIG_SMP
@@ -2826,7 +2826,7 @@ set_initial_features(void)
 			np = of_find_node_by_name(np, "firewire");
 		}
 	}
-#else /* CONFIG_POWER4 */
+#else /* CONFIG_PPC64 */
 
 	if (macio_chips[0].type == macio_keylargo ||
 	    macio_chips[0].type == macio_pangea ||
@@ -2895,7 +2895,7 @@ set_initial_features(void)
 		MACIO_BIC(HEATHROW_FCR, HRW_SOUND_POWER_N);
 	}
 
-#endif /* CONFIG_POWER4 */
+#endif /* CONFIG_PPC64 */
 
 	/* On all machines, switch modem & serial ports off */
 	for_each_node_by_name(np, "ch-a")
diff --git a/arch/powerpc/platforms/powernv/Makefile b/arch/powerpc/platforms/powernv/Makefile
index 4ad227d04c1a..f241accc053d 100644
--- a/arch/powerpc/platforms/powernv/Makefile
+++ b/arch/powerpc/platforms/powernv/Makefile
@@ -1,10 +1,11 @@
 obj-y			+= setup.o opal-wrappers.o opal.o opal-async.o
 obj-y			+= opal-rtc.o opal-nvram.o opal-lpc.o opal-flash.o
 obj-y			+= rng.o opal-elog.o opal-dump.o opal-sysparam.o opal-sensor.o
-obj-y			+= opal-msglog.o
+obj-y			+= opal-msglog.o opal-hmi.o
 
 obj-$(CONFIG_SMP)	+= smp.o subcore.o subcore-asm.o
 obj-$(CONFIG_PCI)	+= pci.o pci-p5ioc2.o pci-ioda.o
 obj-$(CONFIG_EEH)	+= eeh-ioda.o eeh-powernv.o
 obj-$(CONFIG_PPC_SCOM)	+= opal-xscom.o
 obj-$(CONFIG_MEMORY_FAILURE)	+= opal-memory-errors.o
+obj-$(CONFIG_TRACEPOINTS)	+= opal-tracepoints.o
diff --git a/arch/powerpc/platforms/powernv/eeh-ioda.c b/arch/powerpc/platforms/powernv/eeh-ioda.c
index 8ad0c5b891f4..c945bed4dc9e 100644
--- a/arch/powerpc/platforms/powernv/eeh-ioda.c
+++ b/arch/powerpc/platforms/powernv/eeh-ioda.c
@@ -187,10 +187,10 @@ static int ioda_eeh_post_init(struct pci_controller *hose)
  */
 static int ioda_eeh_set_option(struct eeh_pe *pe, int option)
 {
-	s64 ret;
-	u32 pe_no;
 	struct pci_controller *hose = pe->phb;
 	struct pnv_phb *phb = hose->private_data;
+	int enable, ret = 0;
+	s64 rc;
 
 	/* Check on PE number */
 	if (pe->addr < 0 || pe->addr >= phb->ioda.total_pe) {
@@ -201,58 +201,185 @@ static int ioda_eeh_set_option(struct eeh_pe *pe, int option)
 		return -EINVAL;
 	}
 
-	pe_no = pe->addr;
 	switch (option) {
 	case EEH_OPT_DISABLE:
-		ret = -EEXIST;
-		break;
+		return -EPERM;
 	case EEH_OPT_ENABLE:
-		ret = 0;
-		break;
+		return 0;
 	case EEH_OPT_THAW_MMIO:
-		ret = opal_pci_eeh_freeze_clear(phb->opal_id, pe_no,
-				OPAL_EEH_ACTION_CLEAR_FREEZE_MMIO);
-		if (ret) {
-			pr_warning("%s: Failed to enable MMIO for "
-				   "PHB#%x-PE#%x, err=%lld\n",
-				__func__, hose->global_number, pe_no, ret);
-			return -EIO;
-		}
-
+		enable = OPAL_EEH_ACTION_CLEAR_FREEZE_MMIO;
 		break;
 	case EEH_OPT_THAW_DMA:
-		ret = opal_pci_eeh_freeze_clear(phb->opal_id, pe_no,
-				OPAL_EEH_ACTION_CLEAR_FREEZE_DMA);
-		if (ret) {
-			pr_warning("%s: Failed to enable DMA for "
-				   "PHB#%x-PE#%x, err=%lld\n",
-				__func__, hose->global_number, pe_no, ret);
-			return -EIO;
-		}
-
+		enable = OPAL_EEH_ACTION_CLEAR_FREEZE_DMA;
 		break;
 	default:
-		pr_warning("%s: Invalid option %d\n", __func__, option);
+		pr_warn("%s: Invalid option %d\n",
+			__func__, option);
 		return -EINVAL;
 	}
 
+	/* If PHB supports compound PE, to handle it */
+	if (phb->unfreeze_pe) {
+		ret = phb->unfreeze_pe(phb, pe->addr, enable);
+	} else {
+		rc = opal_pci_eeh_freeze_clear(phb->opal_id,
+					       pe->addr,
+					       enable);
+		if (rc != OPAL_SUCCESS) {
+			pr_warn("%s: Failure %lld enable %d for PHB#%x-PE#%x\n",
+				__func__, rc, option, phb->hose->global_number,
+				pe->addr);
+			ret = -EIO;
+		}
+	}
+
 	return ret;
 }
 
-static void ioda_eeh_phb_diag(struct pci_controller *hose)
+static void ioda_eeh_phb_diag(struct eeh_pe *pe)
 {
-	struct pnv_phb *phb = hose->private_data;
+	struct pnv_phb *phb = pe->phb->private_data;
 	long rc;
 
-	rc = opal_pci_get_phb_diag_data2(phb->opal_id, phb->diag.blob,
+	rc = opal_pci_get_phb_diag_data2(phb->opal_id, pe->data,
 					 PNV_PCI_DIAG_BUF_SIZE);
+	if (rc != OPAL_SUCCESS)
+		pr_warn("%s: Failed to get diag-data for PHB#%x (%ld)\n",
+			__func__, pe->phb->global_number, rc);
+}
+
+static int ioda_eeh_get_phb_state(struct eeh_pe *pe)
+{
+	struct pnv_phb *phb = pe->phb->private_data;
+	u8 fstate;
+	__be16 pcierr;
+	s64 rc;
+	int result = 0;
+
+	rc = opal_pci_eeh_freeze_status(phb->opal_id,
+					pe->addr,
+					&fstate,
+					&pcierr,
+					NULL);
 	if (rc != OPAL_SUCCESS) {
-		pr_warning("%s: Failed to get diag-data for PHB#%x (%ld)\n",
-			    __func__, hose->global_number, rc);
-		return;
+		pr_warn("%s: Failure %lld getting PHB#%x state\n",
+			__func__, rc, phb->hose->global_number);
+		return EEH_STATE_NOT_SUPPORT;
 	}
 
-	pnv_pci_dump_phb_diag_data(hose, phb->diag.blob);
+	/*
+	 * Check PHB state. If the PHB is frozen for the
+	 * first time, to dump the PHB diag-data.
+	 */
+	if (be16_to_cpu(pcierr) != OPAL_EEH_PHB_ERROR) {
+		result = (EEH_STATE_MMIO_ACTIVE  |
+			  EEH_STATE_DMA_ACTIVE   |
+			  EEH_STATE_MMIO_ENABLED |
+			  EEH_STATE_DMA_ENABLED);
+	} else if (!(pe->state & EEH_PE_ISOLATED)) {
+		eeh_pe_state_mark(pe, EEH_PE_ISOLATED);
+		ioda_eeh_phb_diag(pe);
+	}
+
+	return result;
+}
+
+static int ioda_eeh_get_pe_state(struct eeh_pe *pe)
+{
+	struct pnv_phb *phb = pe->phb->private_data;
+	u8 fstate;
+	__be16 pcierr;
+	s64 rc;
+	int result;
+
+	/*
+	 * We don't clobber hardware frozen state until PE
+	 * reset is completed. In order to keep EEH core
+	 * moving forward, we have to return operational
+	 * state during PE reset.
+	 */
+	if (pe->state & EEH_PE_RESET) {
+		result = (EEH_STATE_MMIO_ACTIVE  |
+			  EEH_STATE_DMA_ACTIVE   |
+			  EEH_STATE_MMIO_ENABLED |
+			  EEH_STATE_DMA_ENABLED);
+		return result;
+	}
+
+	/*
+	 * Fetch PE state from hardware. If the PHB
+	 * supports compound PE, let it handle that.
+	 */
+	if (phb->get_pe_state) {
+		fstate = phb->get_pe_state(phb, pe->addr);
+	} else {
+		rc = opal_pci_eeh_freeze_status(phb->opal_id,
+						pe->addr,
+						&fstate,
+						&pcierr,
+						NULL);
+		if (rc != OPAL_SUCCESS) {
+			pr_warn("%s: Failure %lld getting PHB#%x-PE%x state\n",
+				__func__, rc, phb->hose->global_number, pe->addr);
+			return EEH_STATE_NOT_SUPPORT;
+		}
+	}
+
+	/* Figure out state */
+	switch (fstate) {
+	case OPAL_EEH_STOPPED_NOT_FROZEN:
+		result = (EEH_STATE_MMIO_ACTIVE  |
+			  EEH_STATE_DMA_ACTIVE   |
+			  EEH_STATE_MMIO_ENABLED |
+			  EEH_STATE_DMA_ENABLED);
+		break;
+	case OPAL_EEH_STOPPED_MMIO_FREEZE:
+		result = (EEH_STATE_DMA_ACTIVE |
+			  EEH_STATE_DMA_ENABLED);
+		break;
+	case OPAL_EEH_STOPPED_DMA_FREEZE:
+		result = (EEH_STATE_MMIO_ACTIVE |
+			  EEH_STATE_MMIO_ENABLED);
+		break;
+	case OPAL_EEH_STOPPED_MMIO_DMA_FREEZE:
+		result = 0;
+		break;
+	case OPAL_EEH_STOPPED_RESET:
+		result = EEH_STATE_RESET_ACTIVE;
+		break;
+	case OPAL_EEH_STOPPED_TEMP_UNAVAIL:
+		result = EEH_STATE_UNAVAILABLE;
+		break;
+	case OPAL_EEH_STOPPED_PERM_UNAVAIL:
+		result = EEH_STATE_NOT_SUPPORT;
+		break;
+	default:
+		result = EEH_STATE_NOT_SUPPORT;
+		pr_warn("%s: Invalid PHB#%x-PE#%x state %x\n",
+			__func__, phb->hose->global_number,
+			pe->addr, fstate);
+	}
+
+	/*
+	 * If PHB supports compound PE, to freeze all
+	 * slave PEs for consistency.
+	 *
+	 * If the PE is switching to frozen state for the
+	 * first time, to dump the PHB diag-data.
+	 */
+	if (!(result & EEH_STATE_NOT_SUPPORT) &&
+	    !(result & EEH_STATE_UNAVAILABLE) &&
+	    !(result & EEH_STATE_MMIO_ACTIVE) &&
+	    !(result & EEH_STATE_DMA_ACTIVE)  &&
+	    !(pe->state & EEH_PE_ISOLATED)) {
+		if (phb->freeze_pe)
+			phb->freeze_pe(phb, pe->addr);
+
+		eeh_pe_state_mark(pe, EEH_PE_ISOLATED);
+		ioda_eeh_phb_diag(pe);
+	}
+
+	return result;
 }
 
 /**
@@ -265,118 +392,21 @@ static void ioda_eeh_phb_diag(struct pci_controller *hose)
  */
 static int ioda_eeh_get_state(struct eeh_pe *pe)
 {
-	s64 ret = 0;
-	u8 fstate;
-	__be16 pcierr;
-	u32 pe_no;
-	int result;
-	struct pci_controller *hose = pe->phb;
-	struct pnv_phb *phb = hose->private_data;
+	struct pnv_phb *phb = pe->phb->private_data;
 
-	/*
-	 * Sanity check on PE address. The PHB PE address should
-	 * be zero.
-	 */
-	if (pe->addr < 0 || pe->addr >= phb->ioda.total_pe) {
-		pr_err("%s: PE address %x out of range [0, %x] "
-		       "on PHB#%x\n",
-		       __func__, pe->addr, phb->ioda.total_pe,
-		       hose->global_number);
+	/* Sanity check on PE number. PHB PE should have 0 */
+	if (pe->addr < 0 ||
+	    pe->addr >= phb->ioda.total_pe) {
+		pr_warn("%s: PHB#%x-PE#%x out of range [0, %x]\n",
+			__func__, phb->hose->global_number,
+			pe->addr, phb->ioda.total_pe);
 		return EEH_STATE_NOT_SUPPORT;
 	}
 
-	/*
-	 * If we're in middle of PE reset, return normal
-	 * state to keep EEH core going. For PHB reset, we
-	 * still expect to have fenced PHB cleared with
-	 * PHB reset.
-	 */
-	if (!(pe->type & EEH_PE_PHB) &&
-	    (pe->state & EEH_PE_RESET)) {
-		result = (EEH_STATE_MMIO_ACTIVE |
-			  EEH_STATE_DMA_ACTIVE |
-			  EEH_STATE_MMIO_ENABLED |
-			  EEH_STATE_DMA_ENABLED);
-		return result;
-	}
+	if (pe->type & EEH_PE_PHB)
+		return ioda_eeh_get_phb_state(pe);
 
-	/* Retrieve PE status through OPAL */
-	pe_no = pe->addr;
-	ret = opal_pci_eeh_freeze_status(phb->opal_id, pe_no,
-			&fstate, &pcierr, NULL);
-	if (ret) {
-		pr_err("%s: Failed to get EEH status on "
-		       "PHB#%x-PE#%x\n, err=%lld\n",
-		       __func__, hose->global_number, pe_no, ret);
-		return EEH_STATE_NOT_SUPPORT;
-	}
-
-	/* Check PHB status */
-	if (pe->type & EEH_PE_PHB) {
-		result = 0;
-		result &= ~EEH_STATE_RESET_ACTIVE;
-
-		if (be16_to_cpu(pcierr) != OPAL_EEH_PHB_ERROR) {
-			result |= EEH_STATE_MMIO_ACTIVE;
-			result |= EEH_STATE_DMA_ACTIVE;
-			result |= EEH_STATE_MMIO_ENABLED;
-			result |= EEH_STATE_DMA_ENABLED;
-		} else if (!(pe->state & EEH_PE_ISOLATED)) {
-			eeh_pe_state_mark(pe, EEH_PE_ISOLATED);
-			ioda_eeh_phb_diag(hose);
-		}
-
-		return result;
-	}
-
-	/* Parse result out */
-	result = 0;
-	switch (fstate) {
-	case OPAL_EEH_STOPPED_NOT_FROZEN:
-		result &= ~EEH_STATE_RESET_ACTIVE;
-		result |= EEH_STATE_MMIO_ACTIVE;
-		result |= EEH_STATE_DMA_ACTIVE;
-		result |= EEH_STATE_MMIO_ENABLED;
-		result |= EEH_STATE_DMA_ENABLED;
-		break;
-	case OPAL_EEH_STOPPED_MMIO_FREEZE:
-		result &= ~EEH_STATE_RESET_ACTIVE;
-		result |= EEH_STATE_DMA_ACTIVE;
-		result |= EEH_STATE_DMA_ENABLED;
-		break;
-	case OPAL_EEH_STOPPED_DMA_FREEZE:
-		result &= ~EEH_STATE_RESET_ACTIVE;
-		result |= EEH_STATE_MMIO_ACTIVE;
-		result |= EEH_STATE_MMIO_ENABLED;
-		break;
-	case OPAL_EEH_STOPPED_MMIO_DMA_FREEZE:
-		result &= ~EEH_STATE_RESET_ACTIVE;
-		break;
-	case OPAL_EEH_STOPPED_RESET:
-		result |= EEH_STATE_RESET_ACTIVE;
-		break;
-	case OPAL_EEH_STOPPED_TEMP_UNAVAIL:
-		result |= EEH_STATE_UNAVAILABLE;
-		break;
-	case OPAL_EEH_STOPPED_PERM_UNAVAIL:
-		result |= EEH_STATE_NOT_SUPPORT;
-		break;
-	default:
-		pr_warning("%s: Unexpected EEH status 0x%x "
-			   "on PHB#%x-PE#%x\n",
-			   __func__, fstate, hose->global_number, pe_no);
-	}
-
-	/* Dump PHB diag-data for frozen PE */
-	if (result != EEH_STATE_NOT_SUPPORT &&
-	    (result & (EEH_STATE_MMIO_ACTIVE | EEH_STATE_DMA_ACTIVE)) !=
-	    (EEH_STATE_MMIO_ACTIVE | EEH_STATE_DMA_ACTIVE) &&
-	    !(pe->state & EEH_PE_ISOLATED)) {
-		eeh_pe_state_mark(pe, EEH_PE_ISOLATED);
-		ioda_eeh_phb_diag(hose);
-	}
-
-	return result;
+	return ioda_eeh_get_pe_state(pe);
 }
 
 static s64 ioda_eeh_phb_poll(struct pnv_phb *phb)
@@ -588,6 +618,24 @@ static int ioda_eeh_reset(struct eeh_pe *pe, int option)
 	return ret;
 }
 
+/**
+ * ioda_eeh_get_log - Retrieve error log
+ * @pe: frozen PE
+ * @severity: permanent or temporary error
+ * @drv_log: device driver log
+ * @len: length of device driver log
+ *
+ * Retrieve error log, which contains log from device driver
+ * and firmware.
+ */
+int ioda_eeh_get_log(struct eeh_pe *pe, int severity,
+		     char *drv_log, unsigned long len)
+{
+	pnv_pci_dump_phb_diag_data(pe->phb, pe->data);
+
+	return 0;
+}
+
 /**
  * ioda_eeh_configure_bridge - Configure the PCI bridges for the indicated PE
  * @pe: EEH PE
@@ -605,18 +653,24 @@ static int ioda_eeh_configure_bridge(struct eeh_pe *pe)
 static void ioda_eeh_hub_diag_common(struct OpalIoP7IOCErrorData *data)
 {
 	/* GEM */
-	pr_info("  GEM XFIR:        %016llx\n", data->gemXfir);
-	pr_info("  GEM RFIR:        %016llx\n", data->gemRfir);
-	pr_info("  GEM RIRQFIR:     %016llx\n", data->gemRirqfir);
-	pr_info("  GEM Mask:        %016llx\n", data->gemMask);
-	pr_info("  GEM RWOF:        %016llx\n", data->gemRwof);
+	if (data->gemXfir || data->gemRfir ||
+	    data->gemRirqfir || data->gemMask || data->gemRwof)
+		pr_info("  GEM: %016llx %016llx %016llx %016llx %016llx\n",
+			be64_to_cpu(data->gemXfir),
+			be64_to_cpu(data->gemRfir),
+			be64_to_cpu(data->gemRirqfir),
+			be64_to_cpu(data->gemMask),
+			be64_to_cpu(data->gemRwof));
 
 	/* LEM */
-	pr_info("  LEM FIR:         %016llx\n", data->lemFir);
-	pr_info("  LEM Error Mask:  %016llx\n", data->lemErrMask);
-	pr_info("  LEM Action 0:    %016llx\n", data->lemAction0);
-	pr_info("  LEM Action 1:    %016llx\n", data->lemAction1);
-	pr_info("  LEM WOF:         %016llx\n", data->lemWof);
+	if (data->lemFir || data->lemErrMask ||
+	    data->lemAction0 || data->lemAction1 || data->lemWof)
+		pr_info("  LEM: %016llx %016llx %016llx %016llx %016llx\n",
+			be64_to_cpu(data->lemFir),
+			be64_to_cpu(data->lemErrMask),
+			be64_to_cpu(data->lemAction0),
+			be64_to_cpu(data->lemAction1),
+			be64_to_cpu(data->lemWof));
 }
 
 static void ioda_eeh_hub_diag(struct pci_controller *hose)
@@ -627,8 +681,8 @@ static void ioda_eeh_hub_diag(struct pci_controller *hose)
 
 	rc = opal_pci_get_hub_diag_data(phb->hub_id, data, sizeof(*data));
 	if (rc != OPAL_SUCCESS) {
-		pr_warning("%s: Failed to get HUB#%llx diag-data (%ld)\n",
-			   __func__, phb->hub_id, rc);
+		pr_warn("%s: Failed to get HUB#%llx diag-data (%ld)\n",
+			__func__, phb->hub_id, rc);
 		return;
 	}
 
@@ -636,24 +690,31 @@ static void ioda_eeh_hub_diag(struct pci_controller *hose)
 	case OPAL_P7IOC_DIAG_TYPE_RGC:
 		pr_info("P7IOC diag-data for RGC\n\n");
 		ioda_eeh_hub_diag_common(data);
-		pr_info("  RGC Status:      %016llx\n", data->rgc.rgcStatus);
-		pr_info("  RGC LDCP:        %016llx\n", data->rgc.rgcLdcp);
+		if (data->rgc.rgcStatus || data->rgc.rgcLdcp)
+			pr_info("  RGC: %016llx %016llx\n",
+				be64_to_cpu(data->rgc.rgcStatus),
+				be64_to_cpu(data->rgc.rgcLdcp));
 		break;
 	case OPAL_P7IOC_DIAG_TYPE_BI:
 		pr_info("P7IOC diag-data for BI %s\n\n",
 			data->bi.biDownbound ? "Downbound" : "Upbound");
 		ioda_eeh_hub_diag_common(data);
-		pr_info("  BI LDCP 0:       %016llx\n", data->bi.biLdcp0);
-		pr_info("  BI LDCP 1:       %016llx\n", data->bi.biLdcp1);
-		pr_info("  BI LDCP 2:       %016llx\n", data->bi.biLdcp2);
-		pr_info("  BI Fence Status: %016llx\n", data->bi.biFenceStatus);
+		if (data->bi.biLdcp0 || data->bi.biLdcp1 ||
+		    data->bi.biLdcp2 || data->bi.biFenceStatus)
+			pr_info("  BI:  %016llx %016llx %016llx %016llx\n",
+				be64_to_cpu(data->bi.biLdcp0),
+				be64_to_cpu(data->bi.biLdcp1),
+				be64_to_cpu(data->bi.biLdcp2),
+				be64_to_cpu(data->bi.biFenceStatus));
 		break;
 	case OPAL_P7IOC_DIAG_TYPE_CI:
-		pr_info("P7IOC diag-data for CI Port %d\\nn",
+		pr_info("P7IOC diag-data for CI Port %d\n\n",
 			data->ci.ciPort);
 		ioda_eeh_hub_diag_common(data);
-		pr_info("  CI Port Status:  %016llx\n", data->ci.ciPortStatus);
-		pr_info("  CI Port LDCP:    %016llx\n", data->ci.ciPortLdcp);
+		if (data->ci.ciPortStatus || data->ci.ciPortLdcp)
+			pr_info("  CI:  %016llx %016llx\n",
+				be64_to_cpu(data->ci.ciPortStatus),
+				be64_to_cpu(data->ci.ciPortLdcp));
 		break;
 	case OPAL_P7IOC_DIAG_TYPE_MISC:
 		pr_info("P7IOC diag-data for MISC\n\n");
@@ -664,30 +725,51 @@ static void ioda_eeh_hub_diag(struct pci_controller *hose)
 		ioda_eeh_hub_diag_common(data);
 		break;
 	default:
-		pr_warning("%s: Invalid type of HUB#%llx diag-data (%d)\n",
-			   __func__, phb->hub_id, data->type);
+		pr_warn("%s: Invalid type of HUB#%llx diag-data (%d)\n",
+			__func__, phb->hub_id, data->type);
 	}
 }
 
 static int ioda_eeh_get_pe(struct pci_controller *hose,
 			   u16 pe_no, struct eeh_pe **pe)
 {
-	struct eeh_pe *phb_pe, *dev_pe;
-	struct eeh_dev dev;
+	struct pnv_phb *phb = hose->private_data;
+	struct pnv_ioda_pe *pnv_pe;
+	struct eeh_pe *dev_pe;
+	struct eeh_dev edev;
 
-	/* Find the PHB PE */
-	phb_pe = eeh_phb_pe_get(hose);
-	if (!phb_pe)
-		return -EEXIST;
+	/*
+	 * If PHB supports compound PE, to fetch
+	 * the master PE because slave PE is invisible
+	 * to EEH core.
+	 */
+	if (phb->get_pe_state) {
+		pnv_pe = &phb->ioda.pe_array[pe_no];
+		if (pnv_pe->flags & PNV_IODA_PE_SLAVE) {
+			pnv_pe = pnv_pe->master;
+			WARN_ON(!pnv_pe ||
+				!(pnv_pe->flags & PNV_IODA_PE_MASTER));
+			pe_no = pnv_pe->pe_number;
+		}
+	}
 
 	/* Find the PE according to PE# */
-	memset(&dev, 0, sizeof(struct eeh_dev));
-	dev.phb = hose;
-	dev.pe_config_addr = pe_no;
-	dev_pe = eeh_pe_get(&dev);
-	if (!dev_pe) return -EEXIST;
+	memset(&edev, 0, sizeof(struct eeh_dev));
+	edev.phb = hose;
+	edev.pe_config_addr = pe_no;
+	dev_pe = eeh_pe_get(&edev);
+	if (!dev_pe)
+		return -EEXIST;
 
+	/*
+	 * At this point, we're sure the compound PE should
+	 * be put into frozen state.
+	 */
 	*pe = dev_pe;
+	if (phb->freeze_pe &&
+	    !(dev_pe->state & EEH_PE_ISOLATED))
+		phb->freeze_pe(phb, pe_no);
+
 	return 0;
 }
 
@@ -792,7 +874,8 @@ static int ioda_eeh_next_error(struct eeh_pe **pe)
 					"detected, location: %s\n",
 					hose->global_number,
 					eeh_pe_loc_get(phb_pe));
-				ioda_eeh_phb_diag(hose);
+				ioda_eeh_phb_diag(phb_pe);
+				pnv_pci_dump_phb_diag_data(hose, phb_pe->data);
 				ret = EEH_NEXT_ERR_NONE;
 			}
 
@@ -812,7 +895,8 @@ static int ioda_eeh_next_error(struct eeh_pe **pe)
 				opal_pci_eeh_freeze_clear(phb->opal_id, frozen_pe_no,
 					OPAL_EEH_ACTION_CLEAR_FREEZE_ALL);
 				ret = EEH_NEXT_ERR_NONE;
-			} else if ((*pe)->state & EEH_PE_ISOLATED) {
+			} else if ((*pe)->state & EEH_PE_ISOLATED ||
+				   eeh_pe_passed(*pe)) {
 				ret = EEH_NEXT_ERR_NONE;
 			} else {
 				pr_err("EEH: Frozen PE#%x on PHB#%x detected\n",
@@ -839,7 +923,7 @@ static int ioda_eeh_next_error(struct eeh_pe **pe)
 		    ret == EEH_NEXT_ERR_FENCED_PHB) &&
 		    !((*pe)->state & EEH_PE_ISOLATED)) {
 			eeh_pe_state_mark(*pe, EEH_PE_ISOLATED);
-			ioda_eeh_phb_diag(hose);
+			ioda_eeh_phb_diag(*pe);
 		}
 
 		/*
@@ -885,6 +969,7 @@ struct pnv_eeh_ops ioda_eeh_ops = {
 	.set_option		= ioda_eeh_set_option,
 	.get_state		= ioda_eeh_get_state,
 	.reset			= ioda_eeh_reset,
+	.get_log		= ioda_eeh_get_log,
 	.configure_bridge	= ioda_eeh_configure_bridge,
 	.next_error		= ioda_eeh_next_error
 };
diff --git a/arch/powerpc/platforms/powernv/eeh-powernv.c b/arch/powerpc/platforms/powernv/eeh-powernv.c
index 56a206f32f77..fd7a16f855ed 100644
--- a/arch/powerpc/platforms/powernv/eeh-powernv.c
+++ b/arch/powerpc/platforms/powernv/eeh-powernv.c
@@ -45,14 +45,31 @@
  */
 static int powernv_eeh_init(void)
 {
+	struct pci_controller *hose;
+	struct pnv_phb *phb;
+
 	/* We require OPALv3 */
 	if (!firmware_has_feature(FW_FEATURE_OPALv3)) {
-		pr_warning("%s: OPALv3 is required !\n", __func__);
+		pr_warn("%s: OPALv3 is required !\n",
+			__func__);
 		return -EINVAL;
 	}
 
-	/* Set EEH probe mode */
-	eeh_probe_mode_set(EEH_PROBE_MODE_DEV);
+	/* Set probe mode */
+	eeh_add_flag(EEH_PROBE_MODE_DEV);
+
+	/*
+	 * P7IOC blocks PCI config access to frozen PE, but PHB3
+	 * doesn't do that. So we have to selectively enable I/O
+	 * prior to collecting error log.
+	 */
+	list_for_each_entry(hose, &hose_list, list_node) {
+		phb = hose->private_data;
+
+		if (phb->model == PNV_PHB_MODEL_P7IOC)
+			eeh_add_flag(EEH_ENABLE_IO_FOR_LOG);
+		break;
+	}
 
 	return 0;
 }
@@ -107,6 +124,7 @@ static int powernv_eeh_dev_probe(struct pci_dev *dev, void *flag)
 	struct pnv_phb *phb = hose->private_data;
 	struct device_node *dn = pci_device_to_OF_node(dev);
 	struct eeh_dev *edev = of_node_to_eeh_dev(dn);
+	int ret;
 
 	/*
 	 * When probing the root bridge, which doesn't have any
@@ -143,13 +161,27 @@ static int powernv_eeh_dev_probe(struct pci_dev *dev, void *flag)
 	edev->pe_config_addr	= phb->bdfn_to_pe(phb, dev->bus, dev->devfn & 0xff);
 
 	/* Create PE */
-	eeh_add_to_parent_pe(edev);
+	ret = eeh_add_to_parent_pe(edev);
+	if (ret) {
+		pr_warn("%s: Can't add PCI dev %s to parent PE (%d)\n",
+			__func__, pci_name(dev), ret);
+		return ret;
+	}
+
+	/*
+	 * Cache the PE primary bus, which can't be fetched when
+	 * full hotplug is in progress. In that case, all child
+	 * PCI devices of the PE are expected to be removed prior
+	 * to PE reset.
+	 */
+	if (!edev->pe->bus)
+		edev->pe->bus = dev->bus;
 
 	/*
 	 * Enable EEH explicitly so that we will do EEH check
 	 * while accessing I/O stuff
 	 */
-	eeh_set_enable(true);
+	eeh_add_flag(EEH_ENABLED);
 
 	/* Save memory bars */
 	eeh_save_bars(edev);
@@ -273,8 +305,8 @@ static int powernv_eeh_wait_state(struct eeh_pe *pe, int max_wait)
 
 		max_wait -= mwait;
 		if (max_wait <= 0) {
-			pr_warning("%s: Timeout getting PE#%x's state (%d)\n",
-				   __func__, pe->addr, max_wait);
+			pr_warn("%s: Timeout getting PE#%x's state (%d)\n",
+				__func__, pe->addr, max_wait);
 			return EEH_STATE_NOT_SUPPORT;
 		}
 
@@ -294,7 +326,7 @@ static int powernv_eeh_wait_state(struct eeh_pe *pe, int max_wait)
  * Retrieve the temporary or permanent error from the PE.
  */
 static int powernv_eeh_get_log(struct eeh_pe *pe, int severity,
-			char *drv_log, unsigned long len)
+			       char *drv_log, unsigned long len)
 {
 	struct pci_controller *hose = pe->phb;
 	struct pnv_phb *phb = hose->private_data;
@@ -398,9 +430,7 @@ static int __init eeh_powernv_init(void)
 {
 	int ret = -EINVAL;
 
-	if (!machine_is(powernv))
-		return ret;
-
+	eeh_set_pe_aux_size(PNV_PCI_DIAG_BUF_SIZE);
 	ret = eeh_ops_register(&powernv_eeh_ops);
 	if (!ret)
 		pr_info("EEH: PowerNV platform initialized\n");
@@ -409,5 +439,4 @@ static int __init eeh_powernv_init(void)
 
 	return ret;
 }
-
-early_initcall(eeh_powernv_init);
+machine_early_initcall(powernv, eeh_powernv_init);
diff --git a/arch/powerpc/platforms/powernv/opal-async.c b/arch/powerpc/platforms/powernv/opal-async.c
index 32e2adfa5320..e462ab947d16 100644
--- a/arch/powerpc/platforms/powernv/opal-async.c
+++ b/arch/powerpc/platforms/powernv/opal-async.c
@@ -20,6 +20,7 @@
 #include <linux/wait.h>
 #include <linux/gfp.h>
 #include <linux/of.h>
+#include <asm/machdep.h>
 #include <asm/opal.h>
 
 #define N_ASYNC_COMPLETIONS	64
@@ -201,4 +202,4 @@ out_opal_node:
 out:
 	return err;
 }
-subsys_initcall(opal_async_comp_init);
+machine_subsys_initcall(powernv, opal_async_comp_init);
diff --git a/arch/powerpc/platforms/powernv/opal-hmi.c b/arch/powerpc/platforms/powernv/opal-hmi.c
new file mode 100644
index 000000000000..97ac8dc33667
--- /dev/null
+++ b/arch/powerpc/platforms/powernv/opal-hmi.c
@@ -0,0 +1,188 @@
+/*
+ * OPAL hypervisor Maintenance interrupt handling support in PowreNV.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; If not, see <http://www.gnu.org/licenses/>.
+ *
+ * Copyright 2014 IBM Corporation
+ * Author: Mahesh Salgaonkar <mahesh@linux.vnet.ibm.com>
+ */
+
+#undef DEBUG
+
+#include <linux/kernel.h>
+#include <linux/init.h>
+#include <linux/of.h>
+#include <linux/mm.h>
+#include <linux/slab.h>
+
+#include <asm/opal.h>
+#include <asm/cputable.h>
+
+static int opal_hmi_handler_nb_init;
+struct OpalHmiEvtNode {
+	struct list_head list;
+	struct OpalHMIEvent hmi_evt;
+};
+static LIST_HEAD(opal_hmi_evt_list);
+static DEFINE_SPINLOCK(opal_hmi_evt_lock);
+
+static void print_hmi_event_info(struct OpalHMIEvent *hmi_evt)
+{
+	const char *level, *sevstr, *error_info;
+	static const char *hmi_error_types[] = {
+		"Malfunction Alert",
+		"Processor Recovery done",
+		"Processor recovery occurred again",
+		"Processor recovery occurred for masked error",
+		"Timer facility experienced an error",
+		"TFMR SPR is corrupted",
+		"UPS (Uniterrupted Power System) Overflow indication",
+		"An XSCOM operation failure",
+		"An XSCOM operation completed",
+		"SCOM has set a reserved FIR bit to cause recovery",
+		"Debug trigger has set a reserved FIR bit to cause recovery",
+		"A hypervisor resource error occurred"
+	};
+
+	/* Print things out */
+	if (hmi_evt->version != OpalHMIEvt_V1) {
+		pr_err("HMI Interrupt, Unknown event version %d !\n",
+			hmi_evt->version);
+		return;
+	}
+	switch (hmi_evt->severity) {
+	case OpalHMI_SEV_NO_ERROR:
+		level = KERN_INFO;
+		sevstr = "Harmless";
+		break;
+	case OpalHMI_SEV_WARNING:
+		level = KERN_WARNING;
+		sevstr = "";
+		break;
+	case OpalHMI_SEV_ERROR_SYNC:
+		level = KERN_ERR;
+		sevstr = "Severe";
+		break;
+	case OpalHMI_SEV_FATAL:
+	default:
+		level = KERN_ERR;
+		sevstr = "Fatal";
+		break;
+	}
+
+	printk("%s%s Hypervisor Maintenance interrupt [%s]\n",
+		level, sevstr,
+		hmi_evt->disposition == OpalHMI_DISPOSITION_RECOVERED ?
+		"Recovered" : "Not recovered");
+	error_info = hmi_evt->type < ARRAY_SIZE(hmi_error_types) ?
+			hmi_error_types[hmi_evt->type]
+			: "Unknown";
+	printk("%s Error detail: %s\n", level, error_info);
+	printk("%s	HMER: %016llx\n", level, be64_to_cpu(hmi_evt->hmer));
+	if ((hmi_evt->type == OpalHMI_ERROR_TFAC) ||
+		(hmi_evt->type == OpalHMI_ERROR_TFMR_PARITY))
+		printk("%s	TFMR: %016llx\n", level,
+						be64_to_cpu(hmi_evt->tfmr));
+}
+
+static void hmi_event_handler(struct work_struct *work)
+{
+	unsigned long flags;
+	struct OpalHMIEvent *hmi_evt;
+	struct OpalHmiEvtNode *msg_node;
+	uint8_t disposition;
+
+	spin_lock_irqsave(&opal_hmi_evt_lock, flags);
+	while (!list_empty(&opal_hmi_evt_list)) {
+		msg_node = list_entry(opal_hmi_evt_list.next,
+					   struct OpalHmiEvtNode, list);
+		list_del(&msg_node->list);
+		spin_unlock_irqrestore(&opal_hmi_evt_lock, flags);
+
+		hmi_evt = (struct OpalHMIEvent *) &msg_node->hmi_evt;
+		print_hmi_event_info(hmi_evt);
+		disposition = hmi_evt->disposition;
+		kfree(msg_node);
+
+		/*
+		 * Check if HMI event has been recovered or not. If not
+		 * then we can't continue, invoke panic.
+		 */
+		if (disposition != OpalHMI_DISPOSITION_RECOVERED)
+			panic("Unrecoverable HMI exception");
+
+		spin_lock_irqsave(&opal_hmi_evt_lock, flags);
+	}
+	spin_unlock_irqrestore(&opal_hmi_evt_lock, flags);
+}
+
+static DECLARE_WORK(hmi_event_work, hmi_event_handler);
+/*
+ * opal_handle_hmi_event - notifier handler that queues up HMI events
+ * to be preocessed later.
+ */
+static int opal_handle_hmi_event(struct notifier_block *nb,
+			  unsigned long msg_type, void *msg)
+{
+	unsigned long flags;
+	struct OpalHMIEvent *hmi_evt;
+	struct opal_msg *hmi_msg = msg;
+	struct OpalHmiEvtNode *msg_node;
+
+	/* Sanity Checks */
+	if (msg_type != OPAL_MSG_HMI_EVT)
+		return 0;
+
+	/* HMI event info starts from param[0] */
+	hmi_evt = (struct OpalHMIEvent *)&hmi_msg->params[0];
+
+	/* Delay the logging of HMI events to workqueue. */
+	msg_node = kzalloc(sizeof(*msg_node), GFP_ATOMIC);
+	if (!msg_node) {
+		pr_err("HMI: out of memory, Opal message event not handled\n");
+		return -ENOMEM;
+	}
+	memcpy(&msg_node->hmi_evt, hmi_evt, sizeof(struct OpalHMIEvent));
+
+	spin_lock_irqsave(&opal_hmi_evt_lock, flags);
+	list_add(&msg_node->list, &opal_hmi_evt_list);
+	spin_unlock_irqrestore(&opal_hmi_evt_lock, flags);
+
+	schedule_work(&hmi_event_work);
+	return 0;
+}
+
+static struct notifier_block opal_hmi_handler_nb = {
+	.notifier_call	= opal_handle_hmi_event,
+	.next		= NULL,
+	.priority	= 0,
+};
+
+static int __init opal_hmi_handler_init(void)
+{
+	int ret;
+
+	if (!opal_hmi_handler_nb_init) {
+		ret = opal_message_notifier_register(
+				OPAL_MSG_HMI_EVT, &opal_hmi_handler_nb);
+		if (ret) {
+			pr_err("%s: Can't register OPAL event notifier (%d)\n",
+			       __func__, ret);
+			return ret;
+		}
+		opal_hmi_handler_nb_init = 1;
+	}
+	return 0;
+}
+subsys_initcall(opal_hmi_handler_init);
diff --git a/arch/powerpc/platforms/powernv/opal-lpc.c b/arch/powerpc/platforms/powernv/opal-lpc.c
index f04b4d8aca5a..ad4b31df779a 100644
--- a/arch/powerpc/platforms/powernv/opal-lpc.c
+++ b/arch/powerpc/platforms/powernv/opal-lpc.c
@@ -324,7 +324,7 @@ static int opal_lpc_init_debugfs(void)
 	rc |= opal_lpc_debugfs_create_type(root, "fw", OPAL_LPC_FW);
 	return rc;
 }
-device_initcall(opal_lpc_init_debugfs);
+machine_device_initcall(powernv, opal_lpc_init_debugfs);
 #endif  /* CONFIG_DEBUG_FS */
 
 void opal_lpc_init(void)
diff --git a/arch/powerpc/platforms/powernv/opal-memory-errors.c b/arch/powerpc/platforms/powernv/opal-memory-errors.c
index b17a34b695ef..43db2136dbff 100644
--- a/arch/powerpc/platforms/powernv/opal-memory-errors.c
+++ b/arch/powerpc/platforms/powernv/opal-memory-errors.c
@@ -27,6 +27,7 @@
 #include <linux/mm.h>
 #include <linux/slab.h>
 
+#include <asm/machdep.h>
 #include <asm/opal.h>
 #include <asm/cputable.h>
 
@@ -143,4 +144,4 @@ static int __init opal_mem_err_init(void)
 	}
 	return 0;
 }
-subsys_initcall(opal_mem_err_init);
+machine_subsys_initcall(powernv, opal_mem_err_init);
diff --git a/arch/powerpc/platforms/powernv/opal-tracepoints.c b/arch/powerpc/platforms/powernv/opal-tracepoints.c
new file mode 100644
index 000000000000..d8a000a9988b
--- /dev/null
+++ b/arch/powerpc/platforms/powernv/opal-tracepoints.c
@@ -0,0 +1,84 @@
+#include <linux/percpu.h>
+#include <linux/jump_label.h>
+#include <asm/trace.h>
+
+#ifdef CONFIG_JUMP_LABEL
+struct static_key opal_tracepoint_key = STATIC_KEY_INIT;
+
+void opal_tracepoint_regfunc(void)
+{
+	static_key_slow_inc(&opal_tracepoint_key);
+}
+
+void opal_tracepoint_unregfunc(void)
+{
+	static_key_slow_dec(&opal_tracepoint_key);
+}
+#else
+/*
+ * We optimise OPAL calls by placing opal_tracepoint_refcount
+ * directly in the TOC so we can check if the opal tracepoints are
+ * enabled via a single load.
+ */
+
+/* NB: reg/unreg are called while guarded with the tracepoints_mutex */
+extern long opal_tracepoint_refcount;
+
+void opal_tracepoint_regfunc(void)
+{
+	opal_tracepoint_refcount++;
+}
+
+void opal_tracepoint_unregfunc(void)
+{
+	opal_tracepoint_refcount--;
+}
+#endif
+
+/*
+ * Since the tracing code might execute OPAL calls we need to guard against
+ * recursion.
+ */
+static DEFINE_PER_CPU(unsigned int, opal_trace_depth);
+
+void __trace_opal_entry(unsigned long opcode, unsigned long *args)
+{
+	unsigned long flags;
+	unsigned int *depth;
+
+	local_irq_save(flags);
+
+	depth = &__get_cpu_var(opal_trace_depth);
+
+	if (*depth)
+		goto out;
+
+	(*depth)++;
+	preempt_disable();
+	trace_opal_entry(opcode, args);
+	(*depth)--;
+
+out:
+	local_irq_restore(flags);
+}
+
+void __trace_opal_exit(long opcode, unsigned long retval)
+{
+	unsigned long flags;
+	unsigned int *depth;
+
+	local_irq_save(flags);
+
+	depth = &__get_cpu_var(opal_trace_depth);
+
+	if (*depth)
+		goto out;
+
+	(*depth)++;
+	trace_opal_exit(opcode, retval);
+	preempt_enable();
+	(*depth)--;
+
+out:
+	local_irq_restore(flags);
+}
diff --git a/arch/powerpc/platforms/powernv/opal-wrappers.S b/arch/powerpc/platforms/powernv/opal-wrappers.S
index 4abbff22a61f..a328be44880f 100644
--- a/arch/powerpc/platforms/powernv/opal-wrappers.S
+++ b/arch/powerpc/platforms/powernv/opal-wrappers.S
@@ -13,30 +13,69 @@
 #include <asm/hvcall.h>
 #include <asm/asm-offsets.h>
 #include <asm/opal.h>
+#include <asm/jump_label.h>
+
+	.section	".text"
+
+#ifdef CONFIG_TRACEPOINTS
+#ifdef CONFIG_JUMP_LABEL
+#define OPAL_BRANCH(LABEL)					\
+	ARCH_STATIC_BRANCH(LABEL, opal_tracepoint_key)
+#else
+
+	.section	".toc","aw"
+
+	.globl opal_tracepoint_refcount
+opal_tracepoint_refcount:
+	.llong	0
+
+	.section	".text"
+
+/*
+ * We branch around this in early init by using an unconditional cpu
+ * feature.
+ */
+#define OPAL_BRANCH(LABEL)					\
+BEGIN_FTR_SECTION;						\
+	b	1f;						\
+END_FTR_SECTION(0, 1);						\
+	ld	r12,opal_tracepoint_refcount@toc(r2);		\
+	std	r12,32(r1);					\
+	cmpdi	r12,0;						\
+	bne-	LABEL;						\
+1:
+
+#endif
+
+#else
+#define OPAL_BRANCH(LABEL)
+#endif
 
 /* TODO:
  *
  * - Trace irqs in/off (needs saving/restoring all args, argh...)
  * - Get r11 feed up by Dave so I can have better register usage
  */
+
 #define OPAL_CALL(name, token)		\
  _GLOBAL(name);				\
 	mflr	r0;			\
-	mfcr	r12;			\
 	std	r0,16(r1);		\
+	li	r0,token;		\
+	OPAL_BRANCH(opal_tracepoint_entry) \
+	mfcr	r12;			\
 	stw	r12,8(r1);		\
 	std	r1,PACAR1(r13);		\
-	li	r0,0;			\
+	li	r11,0;			\
 	mfmsr	r12;			\
-	ori	r0,r0,MSR_EE;		\
+	ori	r11,r11,MSR_EE;		\
 	std	r12,PACASAVEDMSR(r13);	\
-	andc	r12,r12,r0;		\
+	andc	r12,r12,r11;		\
 	mtmsrd	r12,1;			\
-	LOAD_REG_ADDR(r0,opal_return);	\
-	mtlr	r0;			\
-	li	r0,MSR_DR|MSR_IR|MSR_LE;\
-	andc	r12,r12,r0;		\
-	li	r0,token;		\
+	LOAD_REG_ADDR(r11,opal_return);	\
+	mtlr	r11;			\
+	li	r11,MSR_DR|MSR_IR|MSR_LE;\
+	andc	r12,r12,r11;		\
 	mtspr	SPRN_HSRR1,r12;		\
 	LOAD_REG_ADDR(r11,opal);	\
 	ld	r12,8(r11);		\
@@ -61,6 +100,64 @@ opal_return:
 	mtcr	r4;
 	rfid
 
+#ifdef CONFIG_TRACEPOINTS
+opal_tracepoint_entry:
+	stdu	r1,-STACKFRAMESIZE(r1)
+	std	r0,STK_REG(R23)(r1)
+	std	r3,STK_REG(R24)(r1)
+	std	r4,STK_REG(R25)(r1)
+	std	r5,STK_REG(R26)(r1)
+	std	r6,STK_REG(R27)(r1)
+	std	r7,STK_REG(R28)(r1)
+	std	r8,STK_REG(R29)(r1)
+	std	r9,STK_REG(R30)(r1)
+	std	r10,STK_REG(R31)(r1)
+	mr	r3,r0
+	addi	r4,r1,STK_REG(R24)
+	bl	__trace_opal_entry
+	ld	r0,STK_REG(R23)(r1)
+	ld	r3,STK_REG(R24)(r1)
+	ld	r4,STK_REG(R25)(r1)
+	ld	r5,STK_REG(R26)(r1)
+	ld	r6,STK_REG(R27)(r1)
+	ld	r7,STK_REG(R28)(r1)
+	ld	r8,STK_REG(R29)(r1)
+	ld	r9,STK_REG(R30)(r1)
+	ld	r10,STK_REG(R31)(r1)
+	LOAD_REG_ADDR(r11,opal_tracepoint_return)
+	mfcr	r12
+	std	r11,16(r1)
+	stw	r12,8(r1)
+	std	r1,PACAR1(r13)
+	li	r11,0
+	mfmsr	r12
+	ori	r11,r11,MSR_EE
+	std	r12,PACASAVEDMSR(r13)
+	andc	r12,r12,r11
+	mtmsrd	r12,1
+	LOAD_REG_ADDR(r11,opal_return)
+	mtlr	r11
+	li	r11,MSR_DR|MSR_IR|MSR_LE
+	andc	r12,r12,r11
+	mtspr	SPRN_HSRR1,r12
+	LOAD_REG_ADDR(r11,opal)
+	ld	r12,8(r11)
+	ld	r2,0(r11)
+	mtspr	SPRN_HSRR0,r12
+	hrfid
+
+opal_tracepoint_return:
+	std	r3,STK_REG(R31)(r1)
+	mr	r4,r3
+	ld	r0,STK_REG(R23)(r1)
+	bl	__trace_opal_exit
+	ld	r3,STK_REG(R31)(r1)
+	addi	r1,r1,STACKFRAMESIZE
+	ld	r0,16(r1)
+	mtlr	r0
+	blr
+#endif
+
 OPAL_CALL(opal_invalid_call,			OPAL_INVALID_CALL);
 OPAL_CALL(opal_console_write,			OPAL_CONSOLE_WRITE);
 OPAL_CALL(opal_console_read,			OPAL_CONSOLE_READ);
@@ -86,6 +183,7 @@ OPAL_CALL(opal_get_xive,			OPAL_GET_XIVE);
 OPAL_CALL(opal_register_exception_handler,	OPAL_REGISTER_OPAL_EXCEPTION_HANDLER);
 OPAL_CALL(opal_pci_eeh_freeze_status,		OPAL_PCI_EEH_FREEZE_STATUS);
 OPAL_CALL(opal_pci_eeh_freeze_clear,		OPAL_PCI_EEH_FREEZE_CLEAR);
+OPAL_CALL(opal_pci_eeh_freeze_set,		OPAL_PCI_EEH_FREEZE_SET);
 OPAL_CALL(opal_pci_shpc,			OPAL_PCI_SHPC);
 OPAL_CALL(opal_pci_phb_mmio_enable,		OPAL_PCI_PHB_MMIO_ENABLE);
 OPAL_CALL(opal_pci_set_phb_mem_window,		OPAL_PCI_SET_PHB_MEM_WINDOW);
@@ -146,3 +244,4 @@ OPAL_CALL(opal_sync_host_reboot,		OPAL_SYNC_HOST_REBOOT);
 OPAL_CALL(opal_sensor_read,			OPAL_SENSOR_READ);
 OPAL_CALL(opal_get_param,			OPAL_GET_PARAM);
 OPAL_CALL(opal_set_param,			OPAL_SET_PARAM);
+OPAL_CALL(opal_handle_hmi,			OPAL_HANDLE_HMI);
diff --git a/arch/powerpc/platforms/powernv/opal-xscom.c b/arch/powerpc/platforms/powernv/opal-xscom.c
index 4cd2ea6c0dbe..7634d1c62299 100644
--- a/arch/powerpc/platforms/powernv/opal-xscom.c
+++ b/arch/powerpc/platforms/powernv/opal-xscom.c
@@ -130,4 +130,4 @@ static int opal_xscom_init(void)
 		scom_init(&opal_scom_controller);
 	return 0;
 }
-arch_initcall(opal_xscom_init);
+machine_arch_initcall(powernv, opal_xscom_init);
diff --git a/arch/powerpc/platforms/powernv/opal.c b/arch/powerpc/platforms/powernv/opal.c
index 199975613fe9..f0a01a46a57d 100644
--- a/arch/powerpc/platforms/powernv/opal.c
+++ b/arch/powerpc/platforms/powernv/opal.c
@@ -22,6 +22,8 @@
 #include <linux/kobject.h>
 #include <linux/delay.h>
 #include <linux/memblock.h>
+
+#include <asm/machdep.h>
 #include <asm/opal.h>
 #include <asm/firmware.h>
 #include <asm/mce.h>
@@ -192,16 +194,12 @@ static int __init opal_register_exception_handlers(void)
 	 * fwnmi area at 0x7000 to provide the glue space to OPAL
 	 */
 	glue = 0x7000;
-	opal_register_exception_handler(OPAL_HYPERVISOR_MAINTENANCE_HANDLER,
-					0, glue);
-	glue += 128;
 	opal_register_exception_handler(OPAL_SOFTPATCH_HANDLER, 0, glue);
 #endif
 
 	return 0;
 }
-
-early_initcall(opal_register_exception_handlers);
+machine_early_initcall(powernv, opal_register_exception_handlers);
 
 int opal_notifier_register(struct notifier_block *nb)
 {
@@ -368,7 +366,7 @@ static int __init opal_message_init(void)
 	}
 	return 0;
 }
-early_initcall(opal_message_init);
+machine_early_initcall(powernv, opal_message_init);
 
 int opal_get_chars(uint32_t vtermno, char *buf, int count)
 {
@@ -513,6 +511,46 @@ int opal_machine_check(struct pt_regs *regs)
 	return 0;
 }
 
+/* Early hmi handler called in real mode. */
+int opal_hmi_exception_early(struct pt_regs *regs)
+{
+	s64 rc;
+
+	/*
+	 * call opal hmi handler. Pass paca address as token.
+	 * The return value OPAL_SUCCESS is an indication that there is
+	 * an HMI event generated waiting to pull by Linux.
+	 */
+	rc = opal_handle_hmi();
+	if (rc == OPAL_SUCCESS) {
+		local_paca->hmi_event_available = 1;
+		return 1;
+	}
+	return 0;
+}
+
+/* HMI exception handler called in virtual mode during check_irq_replay. */
+int opal_handle_hmi_exception(struct pt_regs *regs)
+{
+	s64 rc;
+	__be64 evt = 0;
+
+	/*
+	 * Check if HMI event is available.
+	 * if Yes, then call opal_poll_events to pull opal messages and
+	 * process them.
+	 */
+	if (!local_paca->hmi_event_available)
+		return 0;
+
+	local_paca->hmi_event_available = 0;
+	rc = opal_poll_events(&evt);
+	if (rc == OPAL_SUCCESS && evt)
+		opal_do_notifier(be64_to_cpu(evt));
+
+	return 1;
+}
+
 static uint64_t find_recovery_address(uint64_t nip)
 {
 	int i;
@@ -630,7 +668,7 @@ static int __init opal_init(void)
 
 	return 0;
 }
-subsys_initcall(opal_init);
+machine_subsys_initcall(powernv, opal_init);
 
 void opal_shutdown(void)
 {
diff --git a/arch/powerpc/platforms/powernv/pci-ioda.c b/arch/powerpc/platforms/powernv/pci-ioda.c
index de19edeaa7a7..b136108ddc99 100644
--- a/arch/powerpc/platforms/powernv/pci-ioda.c
+++ b/arch/powerpc/platforms/powernv/pci-ioda.c
@@ -36,6 +36,7 @@
 #include <asm/tce.h>
 #include <asm/xics.h>
 #include <asm/debug.h>
+#include <asm/firmware.h>
 
 #include "powernv.h"
 #include "pci.h"
@@ -82,6 +83,12 @@ static inline void __raw_rm_writeq(u64 val, volatile void __iomem *paddr)
 		: : "r" (val), "r" (paddr) : "memory");
 }
 
+static inline bool pnv_pci_is_mem_pref_64(unsigned long flags)
+{
+	return ((flags & (IORESOURCE_MEM_64 | IORESOURCE_PREFETCH)) ==
+		(IORESOURCE_MEM_64 | IORESOURCE_PREFETCH));
+}
+
 static int pnv_ioda_alloc_pe(struct pnv_phb *phb)
 {
 	unsigned long pe;
@@ -106,6 +113,380 @@ static void pnv_ioda_free_pe(struct pnv_phb *phb, int pe)
 	clear_bit(pe, phb->ioda.pe_alloc);
 }
 
+/* The default M64 BAR is shared by all PEs */
+static int pnv_ioda2_init_m64(struct pnv_phb *phb)
+{
+	const char *desc;
+	struct resource *r;
+	s64 rc;
+
+	/* Configure the default M64 BAR */
+	rc = opal_pci_set_phb_mem_window(phb->opal_id,
+					 OPAL_M64_WINDOW_TYPE,
+					 phb->ioda.m64_bar_idx,
+					 phb->ioda.m64_base,
+					 0, /* unused */
+					 phb->ioda.m64_size);
+	if (rc != OPAL_SUCCESS) {
+		desc = "configuring";
+		goto fail;
+	}
+
+	/* Enable the default M64 BAR */
+	rc = opal_pci_phb_mmio_enable(phb->opal_id,
+				      OPAL_M64_WINDOW_TYPE,
+				      phb->ioda.m64_bar_idx,
+				      OPAL_ENABLE_M64_SPLIT);
+	if (rc != OPAL_SUCCESS) {
+		desc = "enabling";
+		goto fail;
+	}
+
+	/* Mark the M64 BAR assigned */
+	set_bit(phb->ioda.m64_bar_idx, &phb->ioda.m64_bar_alloc);
+
+	/*
+	 * Strip off the segment used by the reserved PE, which is
+	 * expected to be 0 or last one of PE capabicity.
+	 */
+	r = &phb->hose->mem_resources[1];
+	if (phb->ioda.reserved_pe == 0)
+		r->start += phb->ioda.m64_segsize;
+	else if (phb->ioda.reserved_pe == (phb->ioda.total_pe - 1))
+		r->end -= phb->ioda.m64_segsize;
+	else
+		pr_warn("  Cannot strip M64 segment for reserved PE#%d\n",
+			phb->ioda.reserved_pe);
+
+	return 0;
+
+fail:
+	pr_warn("  Failure %lld %s M64 BAR#%d\n",
+		rc, desc, phb->ioda.m64_bar_idx);
+	opal_pci_phb_mmio_enable(phb->opal_id,
+				 OPAL_M64_WINDOW_TYPE,
+				 phb->ioda.m64_bar_idx,
+				 OPAL_DISABLE_M64);
+	return -EIO;
+}
+
+static void pnv_ioda2_alloc_m64_pe(struct pnv_phb *phb)
+{
+	resource_size_t sgsz = phb->ioda.m64_segsize;
+	struct pci_dev *pdev;
+	struct resource *r;
+	int base, step, i;
+
+	/*
+	 * Root bus always has full M64 range and root port has
+	 * M64 range used in reality. So we're checking root port
+	 * instead of root bus.
+	 */
+	list_for_each_entry(pdev, &phb->hose->bus->devices, bus_list) {
+		for (i = PCI_BRIDGE_RESOURCES;
+		     i <= PCI_BRIDGE_RESOURCE_END; i++) {
+			r = &pdev->resource[i];
+			if (!r->parent ||
+			    !pnv_pci_is_mem_pref_64(r->flags))
+				continue;
+
+			base = (r->start - phb->ioda.m64_base) / sgsz;
+			for (step = 0; step < resource_size(r) / sgsz; step++)
+				set_bit(base + step, phb->ioda.pe_alloc);
+		}
+	}
+}
+
+static int pnv_ioda2_pick_m64_pe(struct pnv_phb *phb,
+				 struct pci_bus *bus, int all)
+{
+	resource_size_t segsz = phb->ioda.m64_segsize;
+	struct pci_dev *pdev;
+	struct resource *r;
+	struct pnv_ioda_pe *master_pe, *pe;
+	unsigned long size, *pe_alloc;
+	bool found;
+	int start, i, j;
+
+	/* Root bus shouldn't use M64 */
+	if (pci_is_root_bus(bus))
+		return IODA_INVALID_PE;
+
+	/* We support only one M64 window on each bus */
+	found = false;
+	pci_bus_for_each_resource(bus, r, i) {
+		if (r && r->parent &&
+		    pnv_pci_is_mem_pref_64(r->flags)) {
+			found = true;
+			break;
+		}
+	}
+
+	/* No M64 window found ? */
+	if (!found)
+		return IODA_INVALID_PE;
+
+	/* Allocate bitmap */
+	size = _ALIGN_UP(phb->ioda.total_pe / 8, sizeof(unsigned long));
+	pe_alloc = kzalloc(size, GFP_KERNEL);
+	if (!pe_alloc) {
+		pr_warn("%s: Out of memory !\n",
+			__func__);
+		return IODA_INVALID_PE;
+	}
+
+	/*
+	 * Figure out reserved PE numbers by the PE
+	 * the its child PEs.
+	 */
+	start = (r->start - phb->ioda.m64_base) / segsz;
+	for (i = 0; i < resource_size(r) / segsz; i++)
+		set_bit(start + i, pe_alloc);
+
+	if (all)
+		goto done;
+
+	/*
+	 * If the PE doesn't cover all subordinate buses,
+	 * we need subtract from reserved PEs for children.
+	 */
+	list_for_each_entry(pdev, &bus->devices, bus_list) {
+		if (!pdev->subordinate)
+			continue;
+
+		pci_bus_for_each_resource(pdev->subordinate, r, i) {
+			if (!r || !r->parent ||
+			    !pnv_pci_is_mem_pref_64(r->flags))
+				continue;
+
+			start = (r->start - phb->ioda.m64_base) / segsz;
+			for (j = 0; j < resource_size(r) / segsz ; j++)
+				clear_bit(start + j, pe_alloc);
+                }
+        }
+
+	/*
+	 * the current bus might not own M64 window and that's all
+	 * contributed by its child buses. For the case, we needn't
+	 * pick M64 dependent PE#.
+	 */
+	if (bitmap_empty(pe_alloc, phb->ioda.total_pe)) {
+		kfree(pe_alloc);
+		return IODA_INVALID_PE;
+	}
+
+	/*
+	 * Figure out the master PE and put all slave PEs to master
+	 * PE's list to form compound PE.
+	 */
+done:
+	master_pe = NULL;
+	i = -1;
+	while ((i = find_next_bit(pe_alloc, phb->ioda.total_pe, i + 1)) <
+		phb->ioda.total_pe) {
+		pe = &phb->ioda.pe_array[i];
+		pe->phb = phb;
+		pe->pe_number = i;
+
+		if (!master_pe) {
+			pe->flags |= PNV_IODA_PE_MASTER;
+			INIT_LIST_HEAD(&pe->slaves);
+			master_pe = pe;
+		} else {
+			pe->flags |= PNV_IODA_PE_SLAVE;
+			pe->master = master_pe;
+			list_add_tail(&pe->list, &master_pe->slaves);
+		}
+	}
+
+	kfree(pe_alloc);
+	return master_pe->pe_number;
+}
+
+static void __init pnv_ioda_parse_m64_window(struct pnv_phb *phb)
+{
+	struct pci_controller *hose = phb->hose;
+	struct device_node *dn = hose->dn;
+	struct resource *res;
+	const u32 *r;
+	u64 pci_addr;
+
+	if (!firmware_has_feature(FW_FEATURE_OPALv3)) {
+		pr_info("  Firmware too old to support M64 window\n");
+		return;
+	}
+
+	r = of_get_property(dn, "ibm,opal-m64-window", NULL);
+	if (!r) {
+		pr_info("  No <ibm,opal-m64-window> on %s\n",
+			dn->full_name);
+		return;
+	}
+
+	/* FIXME: Support M64 for P7IOC */
+	if (phb->type != PNV_PHB_IODA2) {
+		pr_info("  Not support M64 window\n");
+		return;
+	}
+
+	res = &hose->mem_resources[1];
+	res->start = of_translate_address(dn, r + 2);
+	res->end = res->start + of_read_number(r + 4, 2) - 1;
+	res->flags = (IORESOURCE_MEM | IORESOURCE_MEM_64 | IORESOURCE_PREFETCH);
+	pci_addr = of_read_number(r, 2);
+	hose->mem_offset[1] = res->start - pci_addr;
+
+	phb->ioda.m64_size = resource_size(res);
+	phb->ioda.m64_segsize = phb->ioda.m64_size / phb->ioda.total_pe;
+	phb->ioda.m64_base = pci_addr;
+
+	/* Use last M64 BAR to cover M64 window */
+	phb->ioda.m64_bar_idx = 15;
+	phb->init_m64 = pnv_ioda2_init_m64;
+	phb->alloc_m64_pe = pnv_ioda2_alloc_m64_pe;
+	phb->pick_m64_pe = pnv_ioda2_pick_m64_pe;
+}
+
+static void pnv_ioda_freeze_pe(struct pnv_phb *phb, int pe_no)
+{
+	struct pnv_ioda_pe *pe = &phb->ioda.pe_array[pe_no];
+	struct pnv_ioda_pe *slave;
+	s64 rc;
+
+	/* Fetch master PE */
+	if (pe->flags & PNV_IODA_PE_SLAVE) {
+		pe = pe->master;
+		WARN_ON(!pe || !(pe->flags & PNV_IODA_PE_MASTER));
+		pe_no = pe->pe_number;
+	}
+
+	/* Freeze master PE */
+	rc = opal_pci_eeh_freeze_set(phb->opal_id,
+				     pe_no,
+				     OPAL_EEH_ACTION_SET_FREEZE_ALL);
+	if (rc != OPAL_SUCCESS) {
+		pr_warn("%s: Failure %lld freezing PHB#%x-PE#%x\n",
+			__func__, rc, phb->hose->global_number, pe_no);
+		return;
+	}
+
+	/* Freeze slave PEs */
+	if (!(pe->flags & PNV_IODA_PE_MASTER))
+		return;
+
+	list_for_each_entry(slave, &pe->slaves, list) {
+		rc = opal_pci_eeh_freeze_set(phb->opal_id,
+					     slave->pe_number,
+					     OPAL_EEH_ACTION_SET_FREEZE_ALL);
+		if (rc != OPAL_SUCCESS)
+			pr_warn("%s: Failure %lld freezing PHB#%x-PE#%x\n",
+				__func__, rc, phb->hose->global_number,
+				slave->pe_number);
+	}
+}
+
+int pnv_ioda_unfreeze_pe(struct pnv_phb *phb, int pe_no, int opt)
+{
+	struct pnv_ioda_pe *pe, *slave;
+	s64 rc;
+
+	/* Find master PE */
+	pe = &phb->ioda.pe_array[pe_no];
+	if (pe->flags & PNV_IODA_PE_SLAVE) {
+		pe = pe->master;
+		WARN_ON(!pe || !(pe->flags & PNV_IODA_PE_MASTER));
+		pe_no = pe->pe_number;
+	}
+
+	/* Clear frozen state for master PE */
+	rc = opal_pci_eeh_freeze_clear(phb->opal_id, pe_no, opt);
+	if (rc != OPAL_SUCCESS) {
+		pr_warn("%s: Failure %lld clear %d on PHB#%x-PE#%x\n",
+			__func__, rc, opt, phb->hose->global_number, pe_no);
+		return -EIO;
+	}
+
+	if (!(pe->flags & PNV_IODA_PE_MASTER))
+		return 0;
+
+	/* Clear frozen state for slave PEs */
+	list_for_each_entry(slave, &pe->slaves, list) {
+		rc = opal_pci_eeh_freeze_clear(phb->opal_id,
+					     slave->pe_number,
+					     opt);
+		if (rc != OPAL_SUCCESS) {
+			pr_warn("%s: Failure %lld clear %d on PHB#%x-PE#%x\n",
+				__func__, rc, opt, phb->hose->global_number,
+				slave->pe_number);
+			return -EIO;
+		}
+	}
+
+	return 0;
+}
+
+static int pnv_ioda_get_pe_state(struct pnv_phb *phb, int pe_no)
+{
+	struct pnv_ioda_pe *slave, *pe;
+	u8 fstate, state;
+	__be16 pcierr;
+	s64 rc;
+
+	/* Sanity check on PE number */
+	if (pe_no < 0 || pe_no >= phb->ioda.total_pe)
+		return OPAL_EEH_STOPPED_PERM_UNAVAIL;
+
+	/*
+	 * Fetch the master PE and the PE instance might be
+	 * not initialized yet.
+	 */
+	pe = &phb->ioda.pe_array[pe_no];
+	if (pe->flags & PNV_IODA_PE_SLAVE) {
+		pe = pe->master;
+		WARN_ON(!pe || !(pe->flags & PNV_IODA_PE_MASTER));
+		pe_no = pe->pe_number;
+	}
+
+	/* Check the master PE */
+	rc = opal_pci_eeh_freeze_status(phb->opal_id, pe_no,
+					&state, &pcierr, NULL);
+	if (rc != OPAL_SUCCESS) {
+		pr_warn("%s: Failure %lld getting "
+			"PHB#%x-PE#%x state\n",
+			__func__, rc,
+			phb->hose->global_number, pe_no);
+		return OPAL_EEH_STOPPED_TEMP_UNAVAIL;
+	}
+
+	/* Check the slave PE */
+	if (!(pe->flags & PNV_IODA_PE_MASTER))
+		return state;
+
+	list_for_each_entry(slave, &pe->slaves, list) {
+		rc = opal_pci_eeh_freeze_status(phb->opal_id,
+						slave->pe_number,
+						&fstate,
+						&pcierr,
+						NULL);
+		if (rc != OPAL_SUCCESS) {
+			pr_warn("%s: Failure %lld getting "
+				"PHB#%x-PE#%x state\n",
+				__func__, rc,
+				phb->hose->global_number, slave->pe_number);
+			return OPAL_EEH_STOPPED_TEMP_UNAVAIL;
+		}
+
+		/*
+		 * Override the result based on the ascending
+		 * priority.
+		 */
+		if (fstate > state)
+			state = fstate;
+	}
+
+	return state;
+}
+
 /* Currently those 2 are only used when MSIs are enabled, this will change
  * but in the meantime, we need to protect them to avoid warnings
  */
@@ -363,9 +744,16 @@ static void pnv_ioda_setup_bus_PE(struct pci_bus *bus, int all)
 	struct pci_controller *hose = pci_bus_to_host(bus);
 	struct pnv_phb *phb = hose->private_data;
 	struct pnv_ioda_pe *pe;
-	int pe_num;
+	int pe_num = IODA_INVALID_PE;
+
+	/* Check if PE is determined by M64 */
+	if (phb->pick_m64_pe)
+		pe_num = phb->pick_m64_pe(phb, bus, all);
+
+	/* The PE number isn't pinned by M64 */
+	if (pe_num == IODA_INVALID_PE)
+		pe_num = pnv_ioda_alloc_pe(phb);
 
-	pe_num = pnv_ioda_alloc_pe(phb);
 	if (pe_num == IODA_INVALID_PE) {
 		pr_warning("%s: Not enough PE# available for PCI bus %04x:%02x\n",
 			__func__, pci_domain_nr(bus), bus->number);
@@ -373,7 +761,7 @@ static void pnv_ioda_setup_bus_PE(struct pci_bus *bus, int all)
 	}
 
 	pe = &phb->ioda.pe_array[pe_num];
-	pe->flags = (all ? PNV_IODA_PE_BUS_ALL : PNV_IODA_PE_BUS);
+	pe->flags |= (all ? PNV_IODA_PE_BUS_ALL : PNV_IODA_PE_BUS);
 	pe->pbus = bus;
 	pe->pdev = NULL;
 	pe->tce32_seg = -1;
@@ -441,8 +829,15 @@ static void pnv_ioda_setup_PEs(struct pci_bus *bus)
 static void pnv_pci_ioda_setup_PEs(void)
 {
 	struct pci_controller *hose, *tmp;
+	struct pnv_phb *phb;
 
 	list_for_each_entry_safe(hose, tmp, &hose_list, list_node) {
+		phb = hose->private_data;
+
+		/* M64 layout might affect PE allocation */
+		if (phb->alloc_m64_pe)
+			phb->alloc_m64_pe(phb);
+
 		pnv_ioda_setup_PEs(hose->bus);
 	}
 }
@@ -491,17 +886,26 @@ static int pnv_pci_ioda_dma_set_mask(struct pnv_phb *phb,
 		set_dma_ops(&pdev->dev, &dma_iommu_ops);
 		set_iommu_table_base(&pdev->dev, &pe->tce32_table);
 	}
+	*pdev->dev.dma_mask = dma_mask;
 	return 0;
 }
 
-static void pnv_ioda_setup_bus_dma(struct pnv_ioda_pe *pe, struct pci_bus *bus)
+static void pnv_ioda_setup_bus_dma(struct pnv_ioda_pe *pe,
+				   struct pci_bus *bus,
+				   bool add_to_iommu_group)
 {
 	struct pci_dev *dev;
 
 	list_for_each_entry(dev, &bus->devices, bus_list) {
-		set_iommu_table_base_and_group(&dev->dev, &pe->tce32_table);
+		if (add_to_iommu_group)
+			set_iommu_table_base_and_group(&dev->dev,
+						       &pe->tce32_table);
+		else
+			set_iommu_table_base(&dev->dev, &pe->tce32_table);
+
 		if (dev->subordinate)
-			pnv_ioda_setup_bus_dma(pe, dev->subordinate);
+			pnv_ioda_setup_bus_dma(pe, dev->subordinate,
+					       add_to_iommu_group);
 	}
 }
 
@@ -513,15 +917,16 @@ static void pnv_pci_ioda1_tce_invalidate(struct pnv_ioda_pe *pe,
 		(__be64 __iomem *)pe->tce_inval_reg_phys :
 		(__be64 __iomem *)tbl->it_index;
 	unsigned long start, end, inc;
+	const unsigned shift = tbl->it_page_shift;
 
 	start = __pa(startp);
 	end = __pa(endp);
 
 	/* BML uses this case for p6/p7/galaxy2: Shift addr and put in node */
 	if (tbl->it_busno) {
-		start <<= 12;
-		end <<= 12;
-		inc = 128 << 12;
+		start <<= shift;
+		end <<= shift;
+		inc = 128ull << shift;
 		start |= tbl->it_busno;
 		end |= tbl->it_busno;
 	} else if (tbl->it_type & TCE_PCI_SWINV_PAIR) {
@@ -559,18 +964,19 @@ static void pnv_pci_ioda2_tce_invalidate(struct pnv_ioda_pe *pe,
 	__be64 __iomem *invalidate = rm ?
 		(__be64 __iomem *)pe->tce_inval_reg_phys :
 		(__be64 __iomem *)tbl->it_index;
+	const unsigned shift = tbl->it_page_shift;
 
 	/* We'll invalidate DMA address in PE scope */
-	start = 0x2ul << 60;
+	start = 0x2ull << 60;
 	start |= (pe->pe_number & 0xFF);
 	end = start;
 
 	/* Figure out the start, end and step */
 	inc = tbl->it_offset + (((u64)startp - tbl->it_base) / sizeof(u64));
-	start |= (inc << 12);
+	start |= (inc << shift);
 	inc = tbl->it_offset + (((u64)endp - tbl->it_base) / sizeof(u64));
-	end |= (inc << 12);
-	inc = (0x1ul << 12);
+	end |= (inc << shift);
+	inc = (0x1ull << shift);
 	mb();
 
 	while (start <= end) {
@@ -654,7 +1060,7 @@ static void pnv_pci_ioda_setup_dma_pe(struct pnv_phb *phb,
 	/* Setup linux iommu table */
 	tbl = &pe->tce32_table;
 	pnv_pci_setup_iommu_table(tbl, addr, TCE32_TABLE_SIZE * segs,
-				  base << 28);
+				  base << 28, IOMMU_PAGE_SHIFT_4K);
 
 	/* OPAL variant of P7IOC SW invalidated TCEs */
 	swinvp = of_get_property(phb->hose->dn, "ibm,opal-tce-kill", NULL);
@@ -677,7 +1083,7 @@ static void pnv_pci_ioda_setup_dma_pe(struct pnv_phb *phb,
 	if (pe->pdev)
 		set_iommu_table_base_and_group(&pe->pdev->dev, tbl);
 	else
-		pnv_ioda_setup_bus_dma(pe, pe->pbus);
+		pnv_ioda_setup_bus_dma(pe, pe->pbus, true);
 
 	return;
  fail:
@@ -713,11 +1119,15 @@ static void pnv_pci_ioda2_set_bypass(struct iommu_table *tbl, bool enable)
 						     0);
 
 		/*
-		 * We might want to reset the DMA ops of all devices on
-		 * this PE. However in theory, that shouldn't be necessary
-		 * as this is used for VFIO/KVM pass-through and the device
-		 * hasn't yet been returned to its kernel driver
+		 * EEH needs the mapping between IOMMU table and group
+		 * of those VFIO/KVM pass-through devices. We can postpone
+		 * resetting DMA ops until the DMA mask is configured in
+		 * host side.
 		 */
+		if (pe->pdev)
+			set_iommu_table_base(&pe->pdev->dev, tbl);
+		else
+			pnv_ioda_setup_bus_dma(pe, pe->pbus, false);
 	}
 	if (rc)
 		pe_err(pe, "OPAL error %lld configuring bypass window\n", rc);
@@ -784,7 +1194,8 @@ static void pnv_pci_ioda2_setup_dma_pe(struct pnv_phb *phb,
 
 	/* Setup linux iommu table */
 	tbl = &pe->tce32_table;
-	pnv_pci_setup_iommu_table(tbl, addr, tce_table_size, 0);
+	pnv_pci_setup_iommu_table(tbl, addr, tce_table_size, 0,
+			IOMMU_PAGE_SHIFT_4K);
 
 	/* OPAL variant of PHB3 invalidated TCEs */
 	swinvp = of_get_property(phb->hose->dn, "ibm,opal-tce-kill", NULL);
@@ -805,7 +1216,7 @@ static void pnv_pci_ioda2_setup_dma_pe(struct pnv_phb *phb,
 	if (pe->pdev)
 		set_iommu_table_base_and_group(&pe->pdev->dev, tbl);
 	else
-		pnv_ioda_setup_bus_dma(pe, pe->pbus);
+		pnv_ioda_setup_bus_dma(pe, pe->pbus, true);
 
 	/* Also create a bypass window */
 	pnv_pci_ioda2_setup_bypass_pe(phb, pe);
@@ -1055,9 +1466,6 @@ static void pnv_ioda_setup_pe_seg(struct pci_controller *hose,
 				index++;
 			}
 		} else if (res->flags & IORESOURCE_MEM) {
-			/* WARNING: Assumes M32 is mem region 0 in PHB. We need to
-			 * harden that algorithm when we start supporting M64
-			 */
 			region.start = res->start -
 				       hose->mem_offset[0] -
 				       phb->ioda.m32_pci_base;
@@ -1141,9 +1549,8 @@ static void pnv_pci_ioda_fixup(void)
 	pnv_pci_ioda_create_dbgfs();
 
 #ifdef CONFIG_EEH
-	eeh_probe_mode_set(EEH_PROBE_MODE_DEV);
-	eeh_addr_cache_build();
 	eeh_init();
+	eeh_addr_cache_build();
 #endif
 }
 
@@ -1178,7 +1585,10 @@ static resource_size_t pnv_pci_window_alignment(struct pci_bus *bus,
 		bridge = bridge->bus->self;
 	}
 
-	/* We need support prefetchable memory window later */
+	/* We fail back to M32 if M64 isn't supported */
+	if (phb->ioda.m64_segsize &&
+	    pnv_pci_is_mem_pref_64(type))
+		return phb->ioda.m64_segsize;
 	if (type & IORESOURCE_MEM)
 		return phb->ioda.m32_segsize;
 
@@ -1299,6 +1709,10 @@ void __init pnv_pci_init_ioda_phb(struct device_node *np,
 	prop32 = of_get_property(np, "ibm,opal-reserved-pe", NULL);
 	if (prop32)
 		phb->ioda.reserved_pe = be32_to_cpup(prop32);
+
+	/* Parse 64-bit MMIO range */
+	pnv_ioda_parse_m64_window(phb);
+
 	phb->ioda.m32_size = resource_size(&hose->mem_resources[0]);
 	/* FW Has already off top 64k of M32 space (MSI space) */
 	phb->ioda.m32_size += 0x10000;
@@ -1334,14 +1748,6 @@ void __init pnv_pci_init_ioda_phb(struct device_node *np,
 	/* Calculate how many 32-bit TCE segments we have */
 	phb->ioda.tce32_count = phb->ioda.m32_pci_base >> 28;
 
-	/* Clear unusable m64 */
-	hose->mem_resources[1].flags = 0;
-	hose->mem_resources[1].start = 0;
-	hose->mem_resources[1].end = 0;
-	hose->mem_resources[2].flags = 0;
-	hose->mem_resources[2].start = 0;
-	hose->mem_resources[2].end = 0;
-
 #if 0 /* We should really do that ... */
 	rc = opal_pci_set_phb_mem_window(opal->phb_id,
 					 window_type,
@@ -1351,14 +1757,21 @@ void __init pnv_pci_init_ioda_phb(struct device_node *np,
 					 segment_size);
 #endif
 
-	pr_info("  %d (%d) PE's M32: 0x%x [segment=0x%x]"
-		" IO: 0x%x [segment=0x%x]\n",
-		phb->ioda.total_pe,
-		phb->ioda.reserved_pe,
-		phb->ioda.m32_size, phb->ioda.m32_segsize,
-		phb->ioda.io_size, phb->ioda.io_segsize);
+	pr_info("  %03d (%03d) PE's M32: 0x%x [segment=0x%x]\n",
+		phb->ioda.total_pe, phb->ioda.reserved_pe,
+		phb->ioda.m32_size, phb->ioda.m32_segsize);
+	if (phb->ioda.m64_size)
+		pr_info("                 M64: 0x%lx [segment=0x%lx]\n",
+			phb->ioda.m64_size, phb->ioda.m64_segsize);
+	if (phb->ioda.io_size)
+		pr_info("                  IO: 0x%x [segment=0x%x]\n",
+			phb->ioda.io_size, phb->ioda.io_segsize);
+
 
 	phb->hose->ops = &pnv_pci_ops;
+	phb->get_pe_state = pnv_ioda_get_pe_state;
+	phb->freeze_pe = pnv_ioda_freeze_pe;
+	phb->unfreeze_pe = pnv_ioda_unfreeze_pe;
 #ifdef CONFIG_EEH
 	phb->eeh_ops = &ioda_eeh_ops;
 #endif
@@ -1404,6 +1817,10 @@ void __init pnv_pci_init_ioda_phb(struct device_node *np,
 		ioda_eeh_phb_reset(hose, EEH_RESET_FUNDAMENTAL);
 		ioda_eeh_phb_reset(hose, OPAL_DEASSERT_RESET);
 	}
+
+	/* Configure M64 window */
+	if (phb->init_m64 && phb->init_m64(phb))
+		hose->mem_resources[1].flags = 0;
 }
 
 void __init pnv_pci_init_ioda2_phb(struct device_node *np)
diff --git a/arch/powerpc/platforms/powernv/pci-p5ioc2.c b/arch/powerpc/platforms/powernv/pci-p5ioc2.c
index e3807d69393e..94ce3481490b 100644
--- a/arch/powerpc/platforms/powernv/pci-p5ioc2.c
+++ b/arch/powerpc/platforms/powernv/pci-p5ioc2.c
@@ -172,7 +172,8 @@ static void __init pnv_pci_init_p5ioc2_phb(struct device_node *np, u64 hub_id,
 	/* Setup TCEs */
 	phb->dma_dev_setup = pnv_pci_p5ioc2_dma_dev_setup;
 	pnv_pci_setup_iommu_table(&phb->p5ioc2.iommu_table,
-				  tce_mem, tce_size, 0);
+				  tce_mem, tce_size, 0,
+				  IOMMU_PAGE_SHIFT_4K);
 }
 
 void __init pnv_pci_init_p5ioc2_hub(struct device_node *np)
diff --git a/arch/powerpc/platforms/powernv/pci.c b/arch/powerpc/platforms/powernv/pci.c
index f91a4e5d872e..b854b57ed5e1 100644
--- a/arch/powerpc/platforms/powernv/pci.c
+++ b/arch/powerpc/platforms/powernv/pci.c
@@ -132,61 +132,78 @@ static void pnv_pci_dump_p7ioc_diag_data(struct pci_controller *hose,
 
 	data = (struct OpalIoP7IOCPhbErrorData *)common;
 	pr_info("P7IOC PHB#%d Diag-data (Version: %d)\n",
-		hose->global_number, common->version);
+		hose->global_number, be32_to_cpu(common->version));
 
 	if (data->brdgCtl)
 		pr_info("brdgCtl:     %08x\n",
-			data->brdgCtl);
+			be32_to_cpu(data->brdgCtl));
 	if (data->portStatusReg || data->rootCmplxStatus ||
 	    data->busAgentStatus)
 		pr_info("UtlSts:      %08x %08x %08x\n",
-			data->portStatusReg, data->rootCmplxStatus,
-			data->busAgentStatus);
+			be32_to_cpu(data->portStatusReg),
+			be32_to_cpu(data->rootCmplxStatus),
+			be32_to_cpu(data->busAgentStatus));
 	if (data->deviceStatus || data->slotStatus   ||
 	    data->linkStatus   || data->devCmdStatus ||
 	    data->devSecStatus)
 		pr_info("RootSts:     %08x %08x %08x %08x %08x\n",
-			data->deviceStatus, data->slotStatus,
-			data->linkStatus, data->devCmdStatus,
-			data->devSecStatus);
+			be32_to_cpu(data->deviceStatus),
+			be32_to_cpu(data->slotStatus),
+			be32_to_cpu(data->linkStatus),
+			be32_to_cpu(data->devCmdStatus),
+			be32_to_cpu(data->devSecStatus));
 	if (data->rootErrorStatus   || data->uncorrErrorStatus ||
 	    data->corrErrorStatus)
 		pr_info("RootErrSts:  %08x %08x %08x\n",
-			data->rootErrorStatus, data->uncorrErrorStatus,
-			data->corrErrorStatus);
+			be32_to_cpu(data->rootErrorStatus),
+			be32_to_cpu(data->uncorrErrorStatus),
+			be32_to_cpu(data->corrErrorStatus));
 	if (data->tlpHdr1 || data->tlpHdr2 ||
 	    data->tlpHdr3 || data->tlpHdr4)
 		pr_info("RootErrLog:  %08x %08x %08x %08x\n",
-			data->tlpHdr1, data->tlpHdr2,
-			data->tlpHdr3, data->tlpHdr4);
+			be32_to_cpu(data->tlpHdr1),
+			be32_to_cpu(data->tlpHdr2),
+			be32_to_cpu(data->tlpHdr3),
+			be32_to_cpu(data->tlpHdr4));
 	if (data->sourceId || data->errorClass ||
 	    data->correlator)
 		pr_info("RootErrLog1: %08x %016llx %016llx\n",
-			data->sourceId, data->errorClass,
-			data->correlator);
+			be32_to_cpu(data->sourceId),
+			be64_to_cpu(data->errorClass),
+			be64_to_cpu(data->correlator));
 	if (data->p7iocPlssr || data->p7iocCsr)
 		pr_info("PhbSts:      %016llx %016llx\n",
-			data->p7iocPlssr, data->p7iocCsr);
+			be64_to_cpu(data->p7iocPlssr),
+			be64_to_cpu(data->p7iocCsr));
 	if (data->lemFir)
 		pr_info("Lem:         %016llx %016llx %016llx\n",
-			data->lemFir, data->lemErrorMask,
-			data->lemWOF);
+			be64_to_cpu(data->lemFir),
+			be64_to_cpu(data->lemErrorMask),
+			be64_to_cpu(data->lemWOF));
 	if (data->phbErrorStatus)
 		pr_info("PhbErr:      %016llx %016llx %016llx %016llx\n",
-			data->phbErrorStatus, data->phbFirstErrorStatus,
-			data->phbErrorLog0, data->phbErrorLog1);
+			be64_to_cpu(data->phbErrorStatus),
+			be64_to_cpu(data->phbFirstErrorStatus),
+			be64_to_cpu(data->phbErrorLog0),
+			be64_to_cpu(data->phbErrorLog1));
 	if (data->mmioErrorStatus)
 		pr_info("OutErr:      %016llx %016llx %016llx %016llx\n",
-			data->mmioErrorStatus, data->mmioFirstErrorStatus,
-			data->mmioErrorLog0, data->mmioErrorLog1);
+			be64_to_cpu(data->mmioErrorStatus),
+			be64_to_cpu(data->mmioFirstErrorStatus),
+			be64_to_cpu(data->mmioErrorLog0),
+			be64_to_cpu(data->mmioErrorLog1));
 	if (data->dma0ErrorStatus)
 		pr_info("InAErr:      %016llx %016llx %016llx %016llx\n",
-			data->dma0ErrorStatus, data->dma0FirstErrorStatus,
-			data->dma0ErrorLog0, data->dma0ErrorLog1);
+			be64_to_cpu(data->dma0ErrorStatus),
+			be64_to_cpu(data->dma0FirstErrorStatus),
+			be64_to_cpu(data->dma0ErrorLog0),
+			be64_to_cpu(data->dma0ErrorLog1));
 	if (data->dma1ErrorStatus)
 		pr_info("InBErr:      %016llx %016llx %016llx %016llx\n",
-			data->dma1ErrorStatus, data->dma1FirstErrorStatus,
-			data->dma1ErrorLog0, data->dma1ErrorLog1);
+			be64_to_cpu(data->dma1ErrorStatus),
+			be64_to_cpu(data->dma1FirstErrorStatus),
+			be64_to_cpu(data->dma1ErrorLog0),
+			be64_to_cpu(data->dma1ErrorLog1));
 
 	for (i = 0; i < OPAL_P7IOC_NUM_PEST_REGS; i++) {
 		if ((data->pestA[i] >> 63) == 0 &&
@@ -194,7 +211,8 @@ static void pnv_pci_dump_p7ioc_diag_data(struct pci_controller *hose,
 			continue;
 
 		pr_info("PE[%3d] A/B: %016llx %016llx\n",
-			i, data->pestA[i], data->pestB[i]);
+			i, be64_to_cpu(data->pestA[i]),
+			be64_to_cpu(data->pestB[i]));
 	}
 }
 
@@ -319,43 +337,52 @@ void pnv_pci_dump_phb_diag_data(struct pci_controller *hose,
 static void pnv_pci_handle_eeh_config(struct pnv_phb *phb, u32 pe_no)
 {
 	unsigned long flags, rc;
-	int has_diag;
+	int has_diag, ret = 0;
 
 	spin_lock_irqsave(&phb->lock, flags);
 
+	/* Fetch PHB diag-data */
 	rc = opal_pci_get_phb_diag_data2(phb->opal_id, phb->diag.blob,
 					 PNV_PCI_DIAG_BUF_SIZE);
 	has_diag = (rc == OPAL_SUCCESS);
 
-	rc = opal_pci_eeh_freeze_clear(phb->opal_id, pe_no,
+	/* If PHB supports compound PE, to handle it */
+	if (phb->unfreeze_pe) {
+		ret = phb->unfreeze_pe(phb,
+				       pe_no,
 				       OPAL_EEH_ACTION_CLEAR_FREEZE_ALL);
-	if (rc) {
-		pr_warning("PCI %d: Failed to clear EEH freeze state"
-			   " for PE#%d, err %ld\n",
-			   phb->hose->global_number, pe_no, rc);
-
-		/* For now, let's only display the diag buffer when we fail to clear
-		 * the EEH status. We'll do more sensible things later when we have
-		 * proper EEH support. We need to make sure we don't pollute ourselves
-		 * with the normal errors generated when probing empty slots
-		 */
-		if (has_diag)
-			pnv_pci_dump_phb_diag_data(phb->hose, phb->diag.blob);
-		else
-			pr_warning("PCI %d: No diag data available\n",
-				   phb->hose->global_number);
+	} else {
+		rc = opal_pci_eeh_freeze_clear(phb->opal_id,
+					     pe_no,
+					     OPAL_EEH_ACTION_CLEAR_FREEZE_ALL);
+		if (rc) {
+			pr_warn("%s: Failure %ld clearing frozen "
+				"PHB#%x-PE#%x\n",
+				__func__, rc, phb->hose->global_number,
+				pe_no);
+			ret = -EIO;
+		}
 	}
 
+	/*
+	 * For now, let's only display the diag buffer when we fail to clear
+	 * the EEH status. We'll do more sensible things later when we have
+	 * proper EEH support. We need to make sure we don't pollute ourselves
+	 * with the normal errors generated when probing empty slots
+	 */
+	if (has_diag && ret)
+		pnv_pci_dump_phb_diag_data(phb->hose, phb->diag.blob);
+
 	spin_unlock_irqrestore(&phb->lock, flags);
 }
 
 static void pnv_pci_config_check_eeh(struct pnv_phb *phb,
 				     struct device_node *dn)
 {
-	s64	rc;
 	u8	fstate;
 	__be16	pcierr;
-	u32	pe_no;
+	int	pe_no;
+	s64	rc;
 
 	/*
 	 * Get the PE#. During the PCI probe stage, we might not
@@ -370,20 +397,42 @@ static void pnv_pci_config_check_eeh(struct pnv_phb *phb,
 			pe_no = phb->ioda.reserved_pe;
 	}
 
-	/* Read freeze status */
-	rc = opal_pci_eeh_freeze_status(phb->opal_id, pe_no, &fstate, &pcierr,
-					NULL);
-	if (rc) {
-		pr_warning("%s: Can't read EEH status (PE#%d) for "
-			   "%s, err %lld\n",
-			   __func__, pe_no, dn->full_name, rc);
-		return;
+	/*
+	 * Fetch frozen state. If the PHB support compound PE,
+	 * we need handle that case.
+	 */
+	if (phb->get_pe_state) {
+		fstate = phb->get_pe_state(phb, pe_no);
+	} else {
+		rc = opal_pci_eeh_freeze_status(phb->opal_id,
+						pe_no,
+						&fstate,
+						&pcierr,
+						NULL);
+		if (rc) {
+			pr_warn("%s: Failure %lld getting PHB#%x-PE#%x state\n",
+				__func__, rc, phb->hose->global_number, pe_no);
+			return;
+		}
 	}
+
 	cfg_dbg(" -> EEH check, bdfn=%04x PE#%d fstate=%x\n",
 		(PCI_DN(dn)->busno << 8) | (PCI_DN(dn)->devfn),
 		pe_no, fstate);
-	if (fstate != 0)
+
+	/* Clear the frozen state if applicable */
+	if (fstate == OPAL_EEH_STOPPED_MMIO_FREEZE ||
+	    fstate == OPAL_EEH_STOPPED_DMA_FREEZE  ||
+	    fstate == OPAL_EEH_STOPPED_MMIO_DMA_FREEZE) {
+		/*
+		 * If PHB supports compound PE, freeze it for
+		 * consistency.
+		 */
+		if (phb->freeze_pe)
+			phb->freeze_pe(phb, pe_no);
+
 		pnv_pci_handle_eeh_config(phb, pe_no);
+	}
 }
 
 int pnv_pci_cfg_read(struct device_node *dn,
@@ -564,10 +613,11 @@ static int pnv_tce_build(struct iommu_table *tbl, long index, long npages,
 		proto_tce |= TCE_PCI_WRITE;
 
 	tces = tcep = ((__be64 *)tbl->it_base) + index - tbl->it_offset;
-	rpn = __pa(uaddr) >> TCE_SHIFT;
+	rpn = __pa(uaddr) >> tbl->it_page_shift;
 
 	while (npages--)
-		*(tcep++) = cpu_to_be64(proto_tce | (rpn++ << TCE_RPN_SHIFT));
+		*(tcep++) = cpu_to_be64(proto_tce |
+				(rpn++ << tbl->it_page_shift));
 
 	/* Some implementations won't cache invalid TCEs and thus may not
 	 * need that flush. We'll probably turn it_type into a bit mask
@@ -627,11 +677,11 @@ static void pnv_tce_free_rm(struct iommu_table *tbl, long index, long npages)
 
 void pnv_pci_setup_iommu_table(struct iommu_table *tbl,
 			       void *tce_mem, u64 tce_size,
-			       u64 dma_offset)
+			       u64 dma_offset, unsigned page_shift)
 {
 	tbl->it_blocksize = 16;
 	tbl->it_base = (unsigned long)tce_mem;
-	tbl->it_page_shift = IOMMU_PAGE_SHIFT_4K;
+	tbl->it_page_shift = page_shift;
 	tbl->it_offset = dma_offset >> tbl->it_page_shift;
 	tbl->it_index = 0;
 	tbl->it_size = tce_size >> 3;
@@ -656,7 +706,7 @@ static struct iommu_table *pnv_pci_setup_bml_iommu(struct pci_controller *hose)
 	if (WARN_ON(!tbl))
 		return NULL;
 	pnv_pci_setup_iommu_table(tbl, __va(be64_to_cpup(basep)),
-				  be32_to_cpup(sizep), 0);
+				  be32_to_cpup(sizep), 0, IOMMU_PAGE_SHIFT_4K);
 	iommu_init_table(tbl, hose->node);
 	iommu_register_group(tbl, pci_domain_nr(hose->bus), 0);
 
@@ -842,5 +892,4 @@ static int __init tce_iommu_bus_notifier_init(void)
 	bus_register_notifier(&pci_bus_type, &tce_iommu_bus_nb);
 	return 0;
 }
-
-subsys_initcall_sync(tce_iommu_bus_notifier_init);
+machine_subsys_initcall_sync(powernv, tce_iommu_bus_notifier_init);
diff --git a/arch/powerpc/platforms/powernv/pci.h b/arch/powerpc/platforms/powernv/pci.h
index 676232c34328..48494d4b6058 100644
--- a/arch/powerpc/platforms/powernv/pci.h
+++ b/arch/powerpc/platforms/powernv/pci.h
@@ -21,6 +21,8 @@ enum pnv_phb_model {
 #define PNV_IODA_PE_DEV		(1 << 0)	/* PE has single PCI device	*/
 #define PNV_IODA_PE_BUS		(1 << 1)	/* PE has primary PCI bus	*/
 #define PNV_IODA_PE_BUS_ALL	(1 << 2)	/* PE has subordinate buses	*/
+#define PNV_IODA_PE_MASTER	(1 << 3)	/* Master PE in compound case	*/
+#define PNV_IODA_PE_SLAVE	(1 << 4)	/* Slave PE in compound case	*/
 
 /* Data associated with a PE, including IOMMU tracking etc.. */
 struct pnv_phb;
@@ -64,6 +66,10 @@ struct pnv_ioda_pe {
 	 */
 	int			mve_number;
 
+	/* PEs in compound case */
+	struct pnv_ioda_pe	*master;
+	struct list_head	slaves;
+
 	/* Link in list of PE#s */
 	struct list_head	dma_link;
 	struct list_head	list;
@@ -119,6 +125,12 @@ struct pnv_phb {
 	void (*fixup_phb)(struct pci_controller *hose);
 	u32 (*bdfn_to_pe)(struct pnv_phb *phb, struct pci_bus *bus, u32 devfn);
 	void (*shutdown)(struct pnv_phb *phb);
+	int (*init_m64)(struct pnv_phb *phb);
+	void (*alloc_m64_pe)(struct pnv_phb *phb);
+	int (*pick_m64_pe)(struct pnv_phb *phb, struct pci_bus *bus, int all);
+	int (*get_pe_state)(struct pnv_phb *phb, int pe_no);
+	void (*freeze_pe)(struct pnv_phb *phb, int pe_no);
+	int (*unfreeze_pe)(struct pnv_phb *phb, int pe_no, int opt);
 
 	union {
 		struct {
@@ -129,9 +141,20 @@ struct pnv_phb {
 			/* Global bridge info */
 			unsigned int		total_pe;
 			unsigned int		reserved_pe;
+
+			/* 32-bit MMIO window */
 			unsigned int		m32_size;
 			unsigned int		m32_segsize;
 			unsigned int		m32_pci_base;
+
+			/* 64-bit MMIO window */
+			unsigned int		m64_bar_idx;
+			unsigned long		m64_size;
+			unsigned long		m64_segsize;
+			unsigned long		m64_base;
+			unsigned long		m64_bar_alloc;
+
+			/* IO ports */
 			unsigned int		io_size;
 			unsigned int		io_segsize;
 			unsigned int		io_pci_base;
@@ -198,7 +221,7 @@ int pnv_pci_cfg_write(struct device_node *dn,
 		      int where, int size, u32 val);
 extern void pnv_pci_setup_iommu_table(struct iommu_table *tbl,
 				      void *tce_mem, u64 tce_size,
-				      u64 dma_offset);
+				      u64 dma_offset, unsigned page_shift);
 extern void pnv_pci_init_p5ioc2_hub(struct device_node *np);
 extern void pnv_pci_init_ioda_hub(struct device_node *np);
 extern void pnv_pci_init_ioda2_phb(struct device_node *np);
diff --git a/arch/powerpc/platforms/powernv/rng.c b/arch/powerpc/platforms/powernv/rng.c
index 1cb160dc1609..80db43944afe 100644
--- a/arch/powerpc/platforms/powernv/rng.c
+++ b/arch/powerpc/platforms/powernv/rng.c
@@ -123,4 +123,4 @@ static __init int rng_init(void)
 
 	return 0;
 }
-subsys_initcall(rng_init);
+machine_subsys_initcall(powernv, rng_init);
diff --git a/arch/powerpc/platforms/powernv/setup.c b/arch/powerpc/platforms/powernv/setup.c
index d9b88fa7c5a3..5a0e2dc6de5f 100644
--- a/arch/powerpc/platforms/powernv/setup.c
+++ b/arch/powerpc/platforms/powernv/setup.c
@@ -264,6 +264,8 @@ static void __init pnv_setup_machdep_opal(void)
 	ppc_md.halt = pnv_halt;
 	ppc_md.machine_check_exception = opal_machine_check;
 	ppc_md.mce_check_early_recovery = opal_mce_check_early_recovery;
+	ppc_md.hmi_exception_early = opal_hmi_exception_early;
+	ppc_md.handle_hmi_exception = opal_handle_hmi_exception;
 }
 
 #ifdef CONFIG_PPC_POWERNV_RTAS
diff --git a/arch/powerpc/platforms/pseries/dtl.c b/arch/powerpc/platforms/pseries/dtl.c
index 7d61498e45c0..1062f71f5a85 100644
--- a/arch/powerpc/platforms/pseries/dtl.c
+++ b/arch/powerpc/platforms/pseries/dtl.c
@@ -29,6 +29,7 @@
 #include <asm/lppaca.h>
 #include <asm/debug.h>
 #include <asm/plpar_wrappers.h>
+#include <asm/machdep.h>
 
 struct dtl {
 	struct dtl_entry	*buf;
@@ -391,4 +392,4 @@ err_remove_dir:
 err:
 	return rc;
 }
-arch_initcall(dtl_init);
+machine_arch_initcall(pseries, dtl_init);
diff --git a/arch/powerpc/platforms/pseries/eeh_pseries.c b/arch/powerpc/platforms/pseries/eeh_pseries.c
index 0bec0c02c5e7..b08053819d99 100644
--- a/arch/powerpc/platforms/pseries/eeh_pseries.c
+++ b/arch/powerpc/platforms/pseries/eeh_pseries.c
@@ -89,26 +89,26 @@ static int pseries_eeh_init(void)
 	 * of domain/bus/slot/function for EEH RTAS operations.
 	 */
 	if (ibm_set_eeh_option == RTAS_UNKNOWN_SERVICE) {
-		pr_warning("%s: RTAS service <ibm,set-eeh-option> invalid\n",
+		pr_warn("%s: RTAS service <ibm,set-eeh-option> invalid\n",
 			__func__);
 		return -EINVAL;
 	} else if (ibm_set_slot_reset == RTAS_UNKNOWN_SERVICE) {
-		pr_warning("%s: RTAS service <ibm,set-slot-reset> invalid\n",
+		pr_warn("%s: RTAS service <ibm,set-slot-reset> invalid\n",
 			__func__);
 		return -EINVAL;
 	} else if (ibm_read_slot_reset_state2 == RTAS_UNKNOWN_SERVICE &&
 		   ibm_read_slot_reset_state == RTAS_UNKNOWN_SERVICE) {
-		pr_warning("%s: RTAS service <ibm,read-slot-reset-state2> and "
+		pr_warn("%s: RTAS service <ibm,read-slot-reset-state2> and "
 			"<ibm,read-slot-reset-state> invalid\n",
 			__func__);
 		return -EINVAL;
 	} else if (ibm_slot_error_detail == RTAS_UNKNOWN_SERVICE) {
-		pr_warning("%s: RTAS service <ibm,slot-error-detail> invalid\n",
+		pr_warn("%s: RTAS service <ibm,slot-error-detail> invalid\n",
 			__func__);
 		return -EINVAL;
 	} else if (ibm_configure_pe == RTAS_UNKNOWN_SERVICE &&
 		   ibm_configure_bridge == RTAS_UNKNOWN_SERVICE) {
-		pr_warning("%s: RTAS service <ibm,configure-pe> and "
+		pr_warn("%s: RTAS service <ibm,configure-pe> and "
 			"<ibm,configure-bridge> invalid\n",
 			__func__);
 		return -EINVAL;
@@ -118,17 +118,17 @@ static int pseries_eeh_init(void)
 	spin_lock_init(&slot_errbuf_lock);
 	eeh_error_buf_size = rtas_token("rtas-error-log-max");
 	if (eeh_error_buf_size == RTAS_UNKNOWN_SERVICE) {
-		pr_warning("%s: unknown EEH error log size\n",
+		pr_warn("%s: unknown EEH error log size\n",
 			__func__);
 		eeh_error_buf_size = 1024;
 	} else if (eeh_error_buf_size > RTAS_ERROR_LOG_MAX) {
-		pr_warning("%s: EEH error log size %d exceeds the maximal %d\n",
+		pr_warn("%s: EEH error log size %d exceeds the maximal %d\n",
 			__func__, eeh_error_buf_size, RTAS_ERROR_LOG_MAX);
 		eeh_error_buf_size = RTAS_ERROR_LOG_MAX;
 	}
 
 	/* Set EEH probe mode */
-	eeh_probe_mode_set(EEH_PROBE_MODE_DEVTREE);
+	eeh_add_flag(EEH_PROBE_MODE_DEVTREE | EEH_ENABLE_IO_FOR_LOG);
 
 	return 0;
 }
@@ -270,7 +270,7 @@ static void *pseries_eeh_of_probe(struct device_node *dn, void *flag)
 	/* Retrieve the device address */
 	regs = of_get_property(dn, "reg", NULL);
 	if (!regs) {
-		pr_warning("%s: OF node property %s::reg not found\n",
+		pr_warn("%s: OF node property %s::reg not found\n",
 			__func__, dn->full_name);
 		return NULL;
 	}
@@ -297,7 +297,7 @@ static void *pseries_eeh_of_probe(struct device_node *dn, void *flag)
 			enable = 1;
 
 		if (enable) {
-			eeh_set_enable(true);
+			eeh_add_flag(EEH_ENABLED);
 			eeh_add_to_parent_pe(edev);
 
 			pr_debug("%s: EEH enabled on %s PHB#%d-PE#%x, config addr#%x\n",
@@ -398,7 +398,7 @@ static int pseries_eeh_get_pe_addr(struct eeh_pe *pe)
 				pe->config_addr, BUID_HI(pe->phb->buid),
 				BUID_LO(pe->phb->buid), 0);
 		if (ret) {
-			pr_warning("%s: Failed to get address for PHB#%d-PE#%x\n",
+			pr_warn("%s: Failed to get address for PHB#%d-PE#%x\n",
 				__func__, pe->phb->global_number, pe->config_addr);
 			return 0;
 		}
@@ -411,7 +411,7 @@ static int pseries_eeh_get_pe_addr(struct eeh_pe *pe)
 				pe->config_addr, BUID_HI(pe->phb->buid),
 				BUID_LO(pe->phb->buid), 0);
 		if (ret) {
-			pr_warning("%s: Failed to get address for PHB#%d-PE#%x\n",
+			pr_warn("%s: Failed to get address for PHB#%d-PE#%x\n",
 				__func__, pe->phb->global_number, pe->config_addr);
 			return 0;
 		}
@@ -584,17 +584,17 @@ static int pseries_eeh_wait_state(struct eeh_pe *pe, int max_wait)
 			return ret;
 
 		if (max_wait <= 0) {
-			pr_warning("%s: Timeout when getting PE's state (%d)\n",
+			pr_warn("%s: Timeout when getting PE's state (%d)\n",
 				__func__, max_wait);
 			return EEH_STATE_NOT_SUPPORT;
 		}
 
 		if (mwait <= 0) {
-			pr_warning("%s: Firmware returned bad wait value %d\n",
+			pr_warn("%s: Firmware returned bad wait value %d\n",
 				__func__, mwait);
 			mwait = EEH_STATE_MIN_WAIT_TIME;
 		} else if (mwait > EEH_STATE_MAX_WAIT_TIME) {
-			pr_warning("%s: Firmware returned too long wait value %d\n",
+			pr_warn("%s: Firmware returned too long wait value %d\n",
 				__func__, mwait);
 			mwait = EEH_STATE_MAX_WAIT_TIME;
 		}
@@ -675,7 +675,7 @@ static int pseries_eeh_configure_bridge(struct eeh_pe *pe)
 	}
 
 	if (ret)
-		pr_warning("%s: Unable to configure bridge PHB#%d-PE#%x (%d)\n",
+		pr_warn("%s: Unable to configure bridge PHB#%d-PE#%x (%d)\n",
 			__func__, pe->phb->global_number, pe->addr, ret);
 
 	return ret;
@@ -743,10 +743,7 @@ static struct eeh_ops pseries_eeh_ops = {
  */
 static int __init eeh_pseries_init(void)
 {
-	int ret = -EINVAL;
-
-	if (!machine_is(pseries))
-		return ret;
+	int ret;
 
 	ret = eeh_ops_register(&pseries_eeh_ops);
 	if (!ret)
@@ -757,5 +754,4 @@ static int __init eeh_pseries_init(void)
 
 	return ret;
 }
-
-early_initcall(eeh_pseries_init);
+machine_early_initcall(pseries, eeh_pseries_init);
diff --git a/arch/powerpc/platforms/pseries/hvCall.S b/arch/powerpc/platforms/pseries/hvCall.S
index 99ecf0a5a929..3fda3f17b84e 100644
--- a/arch/powerpc/platforms/pseries/hvCall.S
+++ b/arch/powerpc/platforms/pseries/hvCall.S
@@ -12,9 +12,13 @@
 #include <asm/ppc_asm.h>
 #include <asm/asm-offsets.h>
 #include <asm/ptrace.h>
+#include <asm/jump_label.h>
+
+	.section	".text"
 	
 #ifdef CONFIG_TRACEPOINTS
 
+#ifndef CONFIG_JUMP_LABEL
 	.section	".toc","aw"
 
 	.globl hcall_tracepoint_refcount
@@ -22,21 +26,13 @@ hcall_tracepoint_refcount:
 	.llong	0
 
 	.section	".text"
+#endif
 
 /*
  * precall must preserve all registers.  use unused STK_PARAM()
- * areas to save snapshots and opcode. We branch around this
- * in early init (eg when populating the MMU hashtable) by using an
- * unconditional cpu feature.
+ * areas to save snapshots and opcode.
  */
 #define HCALL_INST_PRECALL(FIRST_REG)				\
-BEGIN_FTR_SECTION;						\
-	b	1f;						\
-END_FTR_SECTION(0, 1);						\
-	ld      r12,hcall_tracepoint_refcount@toc(r2);		\
-	std	r12,32(r1);					\
-	cmpdi	r12,0;						\
-	beq+	1f;						\
 	mflr	r0;						\
 	std	r3,STK_PARAM(R3)(r1);				\
 	std	r4,STK_PARAM(R4)(r1);				\
@@ -50,45 +46,29 @@ END_FTR_SECTION(0, 1);						\
 	addi	r4,r1,STK_PARAM(FIRST_REG);			\
 	stdu	r1,-STACK_FRAME_OVERHEAD(r1);			\
 	bl	__trace_hcall_entry;				\
-	addi	r1,r1,STACK_FRAME_OVERHEAD;			\
-	ld	r0,16(r1);					\
-	ld	r3,STK_PARAM(R3)(r1);				\
-	ld	r4,STK_PARAM(R4)(r1);				\
-	ld	r5,STK_PARAM(R5)(r1);				\
-	ld	r6,STK_PARAM(R6)(r1);				\
-	ld	r7,STK_PARAM(R7)(r1);				\
-	ld	r8,STK_PARAM(R8)(r1);				\
-	ld	r9,STK_PARAM(R9)(r1);				\
-	ld	r10,STK_PARAM(R10)(r1);				\
-	mtlr	r0;						\
-1:
+	ld	r3,STACK_FRAME_OVERHEAD+STK_PARAM(R3)(r1);	\
+	ld	r4,STACK_FRAME_OVERHEAD+STK_PARAM(R4)(r1);	\
+	ld	r5,STACK_FRAME_OVERHEAD+STK_PARAM(R5)(r1);	\
+	ld	r6,STACK_FRAME_OVERHEAD+STK_PARAM(R6)(r1);	\
+	ld	r7,STACK_FRAME_OVERHEAD+STK_PARAM(R7)(r1);	\
+	ld	r8,STACK_FRAME_OVERHEAD+STK_PARAM(R8)(r1);	\
+	ld	r9,STACK_FRAME_OVERHEAD+STK_PARAM(R9)(r1);	\
+	ld	r10,STACK_FRAME_OVERHEAD+STK_PARAM(R10)(r1)
 
 /*
  * postcall is performed immediately before function return which
- * allows liberal use of volatile registers.  We branch around this
- * in early init (eg when populating the MMU hashtable) by using an
- * unconditional cpu feature.
+ * allows liberal use of volatile registers.
  */
 #define __HCALL_INST_POSTCALL					\
-BEGIN_FTR_SECTION;						\
-	b	1f;						\
-END_FTR_SECTION(0, 1);						\
-	ld      r12,32(r1);					\
-	cmpdi	r12,0;						\
-	beq+	1f;						\
-	mflr	r0;						\
-	ld	r6,STK_PARAM(R3)(r1);				\
-	std	r3,STK_PARAM(R3)(r1);				\
+	ld	r0,STACK_FRAME_OVERHEAD+STK_PARAM(R3)(r1);	\
+	std	r3,STACK_FRAME_OVERHEAD+STK_PARAM(R3)(r1);	\
 	mr	r4,r3;						\
-	mr	r3,r6;						\
-	std	r0,16(r1);					\
-	stdu	r1,-STACK_FRAME_OVERHEAD(r1);			\
+	mr	r3,r0;						\
 	bl	__trace_hcall_exit;				\
+	ld	r0,STACK_FRAME_OVERHEAD+16(r1);			\
 	addi	r1,r1,STACK_FRAME_OVERHEAD;			\
-	ld	r0,16(r1);					\
 	ld	r3,STK_PARAM(R3)(r1);				\
-	mtlr	r0;						\
-1:
+	mtlr	r0
 
 #define HCALL_INST_POSTCALL_NORETS				\
 	li	r5,0;						\
@@ -98,37 +78,62 @@ END_FTR_SECTION(0, 1);						\
 	mr	r5,BUFREG;					\
 	__HCALL_INST_POSTCALL
 
+#ifdef CONFIG_JUMP_LABEL
+#define HCALL_BRANCH(LABEL)					\
+	ARCH_STATIC_BRANCH(LABEL, hcall_tracepoint_key)
+#else
+
+/*
+ * We branch around this in early init (eg when populating the MMU
+ * hashtable) by using an unconditional cpu feature.
+ */
+#define HCALL_BRANCH(LABEL)					\
+BEGIN_FTR_SECTION;						\
+	b	1f;						\
+END_FTR_SECTION(0, 1);						\
+	ld	r12,hcall_tracepoint_refcount@toc(r2);		\
+	std	r12,32(r1);					\
+	cmpdi	r12,0;						\
+	bne-	LABEL;						\
+1:
+#endif
+
 #else
 #define HCALL_INST_PRECALL(FIRST_ARG)
 #define HCALL_INST_POSTCALL_NORETS
 #define HCALL_INST_POSTCALL(BUFREG)
+#define HCALL_BRANCH(LABEL)
 #endif
 
-	.text
-
 _GLOBAL_TOC(plpar_hcall_norets)
 	HMT_MEDIUM
 
 	mfcr	r0
 	stw	r0,8(r1)
-
-	HCALL_INST_PRECALL(R4)
-
+	HCALL_BRANCH(plpar_hcall_norets_trace)
 	HVSC				/* invoke the hypervisor */
 
-	HCALL_INST_POSTCALL_NORETS
-
 	lwz	r0,8(r1)
 	mtcrf	0xff,r0
 	blr				/* return r3 = status */
 
+#ifdef CONFIG_TRACEPOINTS
+plpar_hcall_norets_trace:
+	HCALL_INST_PRECALL(R4)
+	HVSC
+	HCALL_INST_POSTCALL_NORETS
+	lwz	r0,8(r1)
+	mtcrf	0xff,r0
+	blr
+#endif
+
 _GLOBAL_TOC(plpar_hcall)
 	HMT_MEDIUM
 
 	mfcr	r0
 	stw	r0,8(r1)
 
-	HCALL_INST_PRECALL(R5)
+	HCALL_BRANCH(plpar_hcall_trace)
 
 	std     r4,STK_PARAM(R4)(r1)     /* Save ret buffer */
 
@@ -147,12 +152,40 @@ _GLOBAL_TOC(plpar_hcall)
 	std	r6, 16(r12)
 	std	r7, 24(r12)
 
+	lwz	r0,8(r1)
+	mtcrf	0xff,r0
+
+	blr				/* return r3 = status */
+
+#ifdef CONFIG_TRACEPOINTS
+plpar_hcall_trace:
+	HCALL_INST_PRECALL(R5)
+
+	std	r4,STK_PARAM(R4)(r1)
+	mr	r0,r4
+
+	mr	r4,r5
+	mr	r5,r6
+	mr	r6,r7
+	mr	r7,r8
+	mr	r8,r9
+	mr	r9,r10
+
+	HVSC
+
+	ld	r12,STK_PARAM(R4)(r1)
+	std	r4,0(r12)
+	std	r5,8(r12)
+	std	r6,16(r12)
+	std	r7,24(r12)
+
 	HCALL_INST_POSTCALL(r12)
 
 	lwz	r0,8(r1)
 	mtcrf	0xff,r0
 
-	blr				/* return r3 = status */
+	blr
+#endif
 
 /*
  * plpar_hcall_raw can be called in real mode. kexec/kdump need some
@@ -194,7 +227,7 @@ _GLOBAL_TOC(plpar_hcall9)
 	mfcr	r0
 	stw	r0,8(r1)
 
-	HCALL_INST_PRECALL(R5)
+	HCALL_BRANCH(plpar_hcall9_trace)
 
 	std     r4,STK_PARAM(R4)(r1)     /* Save ret buffer */
 
@@ -222,12 +255,49 @@ _GLOBAL_TOC(plpar_hcall9)
 	std	r11,56(r12)
 	std	r0, 64(r12)
 
+	lwz	r0,8(r1)
+	mtcrf	0xff,r0
+
+	blr				/* return r3 = status */
+
+#ifdef CONFIG_TRACEPOINTS
+plpar_hcall9_trace:
+	HCALL_INST_PRECALL(R5)
+
+	std	r4,STK_PARAM(R4)(r1)
+	mr	r0,r4
+
+	mr	r4,r5
+	mr	r5,r6
+	mr	r6,r7
+	mr	r7,r8
+	mr	r8,r9
+	mr	r9,r10
+	ld	r10,STACK_FRAME_OVERHEAD+STK_PARAM(R11)(r1)
+	ld	r11,STACK_FRAME_OVERHEAD+STK_PARAM(R12)(r1)
+	ld	r12,STACK_FRAME_OVERHEAD+STK_PARAM(R13)(r1)
+
+	HVSC
+
+	mr	r0,r12
+	ld	r12,STACK_FRAME_OVERHEAD+STK_PARAM(R4)(r1)
+	std	r4,0(r12)
+	std	r5,8(r12)
+	std	r6,16(r12)
+	std	r7,24(r12)
+	std	r8,32(r12)
+	std	r9,40(r12)
+	std	r10,48(r12)
+	std	r11,56(r12)
+	std	r0,64(r12)
+
 	HCALL_INST_POSTCALL(r12)
 
 	lwz	r0,8(r1)
 	mtcrf	0xff,r0
 
-	blr				/* return r3 = status */
+	blr
+#endif
 
 /* See plpar_hcall_raw to see why this is needed */
 _GLOBAL(plpar_hcall9_raw)
diff --git a/arch/powerpc/platforms/pseries/hvCall_inst.c b/arch/powerpc/platforms/pseries/hvCall_inst.c
index cf4e7736e4f1..4575f0c9e521 100644
--- a/arch/powerpc/platforms/pseries/hvCall_inst.c
+++ b/arch/powerpc/platforms/pseries/hvCall_inst.c
@@ -27,6 +27,7 @@
 #include <asm/firmware.h>
 #include <asm/cputable.h>
 #include <asm/trace.h>
+#include <asm/machdep.h>
 
 DEFINE_PER_CPU(struct hcall_stats[HCALL_STAT_ARRAY_SIZE], hcall_stats);
 
@@ -162,4 +163,4 @@ static int __init hcall_inst_init(void)
 
 	return 0;
 }
-__initcall(hcall_inst_init);
+machine_device_initcall(pseries, hcall_inst_init);
diff --git a/arch/powerpc/platforms/pseries/lpar.c b/arch/powerpc/platforms/pseries/lpar.c
index b02af9ef3ff6..fbfcef514aa7 100644
--- a/arch/powerpc/platforms/pseries/lpar.c
+++ b/arch/powerpc/platforms/pseries/lpar.c
@@ -26,6 +26,7 @@
 #include <linux/dma-mapping.h>
 #include <linux/console.h>
 #include <linux/export.h>
+#include <linux/static_key.h>
 #include <asm/processor.h>
 #include <asm/mmu.h>
 #include <asm/page.h>
@@ -649,6 +650,19 @@ EXPORT_SYMBOL(arch_free_page);
 #endif
 
 #ifdef CONFIG_TRACEPOINTS
+#ifdef CONFIG_JUMP_LABEL
+struct static_key hcall_tracepoint_key = STATIC_KEY_INIT;
+
+void hcall_tracepoint_regfunc(void)
+{
+	static_key_slow_inc(&hcall_tracepoint_key);
+}
+
+void hcall_tracepoint_unregfunc(void)
+{
+	static_key_slow_dec(&hcall_tracepoint_key);
+}
+#else
 /*
  * We optimise our hcall path by placing hcall_tracepoint_refcount
  * directly in the TOC so we can check if the hcall tracepoints are
@@ -658,13 +672,6 @@ EXPORT_SYMBOL(arch_free_page);
 /* NB: reg/unreg are called while guarded with the tracepoints_mutex */
 extern long hcall_tracepoint_refcount;
 
-/* 
- * Since the tracing code might execute hcalls we need to guard against
- * recursion. One example of this are spinlocks calling H_YIELD on
- * shared processor partitions.
- */
-static DEFINE_PER_CPU(unsigned int, hcall_trace_depth);
-
 void hcall_tracepoint_regfunc(void)
 {
 	hcall_tracepoint_refcount++;
@@ -674,6 +681,15 @@ void hcall_tracepoint_unregfunc(void)
 {
 	hcall_tracepoint_refcount--;
 }
+#endif
+
+/*
+ * Since the tracing code might execute hcalls we need to guard against
+ * recursion. One example of this are spinlocks calling H_YIELD on
+ * shared processor partitions.
+ */
+static DEFINE_PER_CPU(unsigned int, hcall_trace_depth);
+
 
 void __trace_hcall_entry(unsigned long opcode, unsigned long *args)
 {
diff --git a/arch/powerpc/platforms/pseries/mobility.c b/arch/powerpc/platforms/pseries/mobility.c
index bde7ebad3949..d146fef038b8 100644
--- a/arch/powerpc/platforms/pseries/mobility.c
+++ b/arch/powerpc/platforms/pseries/mobility.c
@@ -18,6 +18,7 @@
 #include <linux/delay.h>
 #include <linux/slab.h>
 
+#include <asm/machdep.h>
 #include <asm/rtas.h>
 #include "pseries.h"
 
@@ -362,4 +363,4 @@ static int __init mobility_sysfs_init(void)
 
 	return rc;
 }
-device_initcall(mobility_sysfs_init);
+machine_device_initcall(pseries, mobility_sysfs_init);
diff --git a/arch/powerpc/platforms/pseries/msi.c b/arch/powerpc/platforms/pseries/msi.c
index 0c882e83c4ce..18ff4626d74e 100644
--- a/arch/powerpc/platforms/pseries/msi.c
+++ b/arch/powerpc/platforms/pseries/msi.c
@@ -16,6 +16,7 @@
 #include <asm/rtas.h>
 #include <asm/hw_irq.h>
 #include <asm/ppc-pci.h>
+#include <asm/machdep.h>
 
 static int query_token, change_token;
 
@@ -532,5 +533,4 @@ static int rtas_msi_init(void)
 
 	return 0;
 }
-arch_initcall(rtas_msi_init);
-
+machine_arch_initcall(pseries, rtas_msi_init);
diff --git a/arch/powerpc/platforms/pseries/pci_dlpar.c b/arch/powerpc/platforms/pseries/pci_dlpar.c
index 203cbf0dc101..89e23811199c 100644
--- a/arch/powerpc/platforms/pseries/pci_dlpar.c
+++ b/arch/powerpc/platforms/pseries/pci_dlpar.c
@@ -118,10 +118,10 @@ int remove_phb_dynamic(struct pci_controller *phb)
 		}
 	}
 
-	/* Unregister the bridge device from sysfs and remove the PCI bus */
-	device_unregister(b->bridge);
+	/* Remove the PCI bus and unregister the bridge device from sysfs */
 	phb->bus = NULL;
 	pci_remove_bus(b);
+	device_unregister(b->bridge);
 
 	/* Now release the IO resource */
 	if (res->flags & IORESOURCE_IO)
diff --git a/arch/powerpc/platforms/pseries/power.c b/arch/powerpc/platforms/pseries/power.c
index 6d6266236446..c26eadde434c 100644
--- a/arch/powerpc/platforms/pseries/power.c
+++ b/arch/powerpc/platforms/pseries/power.c
@@ -25,6 +25,7 @@
 #include <linux/string.h>
 #include <linux/errno.h>
 #include <linux/init.h>
+#include <asm/machdep.h>
 
 unsigned long rtas_poweron_auto; /* default and normal state is 0 */
 
@@ -71,11 +72,11 @@ static int __init pm_init(void)
 		return -ENOMEM;
 	return sysfs_create_group(power_kobj, &attr_group);
 }
-core_initcall(pm_init);
+machine_core_initcall(pseries, pm_init);
 #else
 static int __init apo_pm_init(void)
 {
 	return (sysfs_create_file(power_kobj, &auto_poweron_attr.attr));
 }
-__initcall(apo_pm_init);
+machine_device_initcall(pseries, apo_pm_init);
 #endif
diff --git a/arch/powerpc/platforms/pseries/ras.c b/arch/powerpc/platforms/pseries/ras.c
index 9c5778e6ed4b..dff05b9eb946 100644
--- a/arch/powerpc/platforms/pseries/ras.c
+++ b/arch/powerpc/platforms/pseries/ras.c
@@ -71,7 +71,7 @@ static int __init init_ras_IRQ(void)
 
 	return 0;
 }
-subsys_initcall(init_ras_IRQ);
+machine_subsys_initcall(pseries, init_ras_IRQ);
 
 #define EPOW_SHUTDOWN_NORMAL				1
 #define EPOW_SHUTDOWN_ON_UPS				2
diff --git a/arch/powerpc/platforms/pseries/reconfig.c b/arch/powerpc/platforms/pseries/reconfig.c
index 1c0a60d98867..0f319521e002 100644
--- a/arch/powerpc/platforms/pseries/reconfig.c
+++ b/arch/powerpc/platforms/pseries/reconfig.c
@@ -446,13 +446,10 @@ static int proc_ppc64_create_ofdt(void)
 {
 	struct proc_dir_entry *ent;
 
-	if (!machine_is(pseries))
-		return 0;
-
 	ent = proc_create("powerpc/ofdt", S_IWUSR, NULL, &ofdt_fops);
 	if (ent)
 		proc_set_size(ent, 0);
 
 	return 0;
 }
-__initcall(proc_ppc64_create_ofdt);
+machine_device_initcall(pseries, proc_ppc64_create_ofdt);
diff --git a/arch/powerpc/platforms/pseries/rng.c b/arch/powerpc/platforms/pseries/rng.c
index 72a102758d4e..e09608770909 100644
--- a/arch/powerpc/platforms/pseries/rng.c
+++ b/arch/powerpc/platforms/pseries/rng.c
@@ -42,4 +42,4 @@ static __init int rng_init(void)
 
 	return 0;
 }
-subsys_initcall(rng_init);
+machine_subsys_initcall(pseries, rng_init);
diff --git a/arch/powerpc/platforms/pseries/setup.c b/arch/powerpc/platforms/pseries/setup.c
index f2f40e64658f..cfe8a6389a51 100644
--- a/arch/powerpc/platforms/pseries/setup.c
+++ b/arch/powerpc/platforms/pseries/setup.c
@@ -351,7 +351,7 @@ static int alloc_dispatch_log_kmem_cache(void)
 
 	return alloc_dispatch_logs();
 }
-early_initcall(alloc_dispatch_log_kmem_cache);
+machine_early_initcall(pseries, alloc_dispatch_log_kmem_cache);
 
 static void pseries_lpar_idle(void)
 {
diff --git a/arch/powerpc/platforms/pseries/suspend.c b/arch/powerpc/platforms/pseries/suspend.c
index b87b97849d4c..e76aefae2aa2 100644
--- a/arch/powerpc/platforms/pseries/suspend.c
+++ b/arch/powerpc/platforms/pseries/suspend.c
@@ -265,7 +265,7 @@ static int __init pseries_suspend_init(void)
 {
 	int rc;
 
-	if (!machine_is(pseries) || !firmware_has_feature(FW_FEATURE_LPAR))
+	if (!firmware_has_feature(FW_FEATURE_LPAR))
 		return 0;
 
 	suspend_data.token = rtas_token("ibm,suspend-me");
@@ -280,5 +280,4 @@ static int __init pseries_suspend_init(void)
 	suspend_set_ops(&pseries_suspend_ops);
 	return 0;
 }
-
-__initcall(pseries_suspend_init);
+machine_device_initcall(pseries, pseries_suspend_init);
diff --git a/arch/powerpc/sysdev/fsl_pci.c b/arch/powerpc/sysdev/fsl_pci.c
index 4bd091a05583..c5077673bd94 100644
--- a/arch/powerpc/sysdev/fsl_pci.c
+++ b/arch/powerpc/sysdev/fsl_pci.c
@@ -853,8 +853,8 @@ u64 fsl_pci_immrbar_base(struct pci_controller *hose)
 		in = pcie->cfg_type0 + PEX_RC_INWIN_BASE;
 		for (i = 0; i < 4; i++) {
 			/* not enabled, skip */
-			if (!in_le32(&in[i].ar) & PEX_RCIWARn_EN)
-				 continue;
+			if (!(in_le32(&in[i].ar) & PEX_RCIWARn_EN))
+				continue;
 
 			if (get_immrbase() == in_le32(&in[i].tar))
 				return (u64)in_le32(&in[i].barh) << 32 |
diff --git a/arch/powerpc/sysdev/micropatch.c b/arch/powerpc/sysdev/micropatch.c
index c0bb76ef7242..6727dc54d549 100644
--- a/arch/powerpc/sysdev/micropatch.c
+++ b/arch/powerpc/sysdev/micropatch.c
@@ -13,7 +13,6 @@
 #include <linux/mm.h>
 #include <linux/interrupt.h>
 #include <asm/irq.h>
-#include <asm/mpc8xx.h>
 #include <asm/page.h>
 #include <asm/pgtable.h>
 #include <asm/8xx_immap.h>
diff --git a/arch/powerpc/sysdev/mpic_msgr.c b/arch/powerpc/sysdev/mpic_msgr.c
index 2c9b52aa266c..7bdf3cc741e4 100644
--- a/arch/powerpc/sysdev/mpic_msgr.c
+++ b/arch/powerpc/sysdev/mpic_msgr.c
@@ -184,7 +184,7 @@ static int mpic_msgr_probe(struct platform_device *dev)
 		dev_info(&dev->dev, "Found %d message registers\n",
 				mpic_msgr_count);
 
-		mpic_msgrs = kzalloc(sizeof(struct mpic_msgr) * mpic_msgr_count,
+		mpic_msgrs = kcalloc(mpic_msgr_count, sizeof(*mpic_msgrs),
 							 GFP_KERNEL);
 		if (!mpic_msgrs) {
 			dev_err(&dev->dev,
diff --git a/arch/powerpc/xmon/xmon.c b/arch/powerpc/xmon/xmon.c
index d199bfa2f1fa..8d198b5e9e0a 100644
--- a/arch/powerpc/xmon/xmon.c
+++ b/arch/powerpc/xmon/xmon.c
@@ -2058,10 +2058,6 @@ static void dump_one_paca(int cpu)
 	DUMP(p, kernel_toc, "lx");
 	DUMP(p, kernelbase, "lx");
 	DUMP(p, kernel_msr, "lx");
-#ifdef CONFIG_PPC_STD_MMU_64
-	DUMP(p, stab_real, "lx");
-	DUMP(p, stab_addr, "lx");
-#endif
 	DUMP(p, emergency_sp, "p");
 #ifdef CONFIG_PPC_BOOK3S_64
 	DUMP(p, mc_emergency_sp, "p");
@@ -2694,7 +2690,7 @@ static void xmon_print_symbol(unsigned long address, const char *mid,
 }
 
 #ifdef CONFIG_PPC_BOOK3S_64
-static void dump_slb(void)
+void dump_segments(void)
 {
 	int i;
 	unsigned long esid,vsid,valid;
@@ -2726,34 +2722,6 @@ static void dump_slb(void)
 		}
 	}
 }
-
-static void dump_stab(void)
-{
-	int i;
-	unsigned long *tmp = (unsigned long *)local_paca->stab_addr;
-
-	printf("Segment table contents of cpu 0x%x\n", smp_processor_id());
-
-	for (i = 0; i < PAGE_SIZE/16; i++) {
-		unsigned long a, b;
-
-		a = *tmp++;
-		b = *tmp++;
-
-		if (a || b) {
-			printf("%03d %016lx ", i, a);
-			printf("%016lx\n", b);
-		}
-	}
-}
-
-void dump_segments(void)
-{
-	if (mmu_has_feature(MMU_FTR_SLB))
-		dump_slb();
-	else
-		dump_stab();
-}
 #endif
 
 #ifdef CONFIG_PPC_STD_MMU_32
diff --git a/drivers/cpufreq/powernv-cpufreq.c b/drivers/cpufreq/powernv-cpufreq.c
index bb1d08dc8cc8..379c0837f5a9 100644
--- a/drivers/cpufreq/powernv-cpufreq.c
+++ b/drivers/cpufreq/powernv-cpufreq.c
@@ -28,6 +28,7 @@
 #include <linux/of.h>
 
 #include <asm/cputhreads.h>
+#include <asm/firmware.h>
 #include <asm/reg.h>
 #include <asm/smp.h> /* Required for cpu_sibling_mask() in UP configs */
 
@@ -98,7 +99,11 @@ static int init_powernv_pstates(void)
 		return -ENODEV;
 	}
 
-	WARN_ON(len_ids != len_freqs);
+	if (len_ids != len_freqs) {
+		pr_warn("Entries in ibm,pstate-ids and "
+			"ibm,pstate-frequencies-mhz does not match\n");
+	}
+
 	nr_pstates = min(len_ids, len_freqs) / sizeof(u32);
 	if (!nr_pstates) {
 		pr_warn("No PStates found\n");
@@ -131,7 +136,12 @@ static unsigned int pstate_id_to_freq(int pstate_id)
 	int i;
 
 	i = powernv_pstate_info.max - pstate_id;
-	BUG_ON(i >= powernv_pstate_info.nr_pstates || i < 0);
+	if (i >= powernv_pstate_info.nr_pstates || i < 0) {
+		pr_warn("PState id %d outside of PState table, "
+			"reporting nominal id %d instead\n",
+			pstate_id, powernv_pstate_info.nominal);
+		i = powernv_pstate_info.max - powernv_pstate_info.nominal;
+	}
 
 	return powernv_freqs[i].frequency;
 }
@@ -321,6 +331,10 @@ static int __init powernv_cpufreq_init(void)
 {
 	int rc = 0;
 
+	/* Don't probe on pseries (guest) platforms */
+	if (!firmware_has_feature(FW_FEATURE_OPALv3))
+		return -ENODEV;
+
 	/* Discover pstates from device tree and init */
 	rc = init_powernv_pstates();
 	if (rc) {
diff --git a/drivers/cpuidle/cpuidle-powernv.c b/drivers/cpuidle/cpuidle-powernv.c
index 74f5788d50b1..a64be578dab2 100644
--- a/drivers/cpuidle/cpuidle-powernv.c
+++ b/drivers/cpuidle/cpuidle-powernv.c
@@ -160,10 +160,10 @@ static int powernv_cpuidle_driver_init(void)
 static int powernv_add_idle_states(void)
 {
 	struct device_node *power_mgt;
-	struct property *prop;
 	int nr_idle_states = 1; /* Snooze */
 	int dt_idle_states;
-	u32 *flags;
+	const __be32 *idle_state_flags;
+	u32 len_flags, flags;
 	int i;
 
 	/* Currently we have snooze statically defined */
@@ -174,18 +174,18 @@ static int powernv_add_idle_states(void)
 		return nr_idle_states;
 	}
 
-	prop = of_find_property(power_mgt, "ibm,cpu-idle-state-flags", NULL);
-	if (!prop) {
+	idle_state_flags = of_get_property(power_mgt, "ibm,cpu-idle-state-flags", &len_flags);
+	if (!idle_state_flags) {
 		pr_warn("DT-PowerMgmt: missing ibm,cpu-idle-state-flags\n");
 		return nr_idle_states;
 	}
 
-	dt_idle_states = prop->length / sizeof(u32);
-	flags = (u32 *) prop->value;
+	dt_idle_states = len_flags / sizeof(u32);
 
 	for (i = 0; i < dt_idle_states; i++) {
 
-		if (flags[i] & IDLE_USE_INST_NAP) {
+		flags = be32_to_cpu(idle_state_flags[i]);
+		if (flags & IDLE_USE_INST_NAP) {
 			/* Add NAP state */
 			strcpy(powernv_states[nr_idle_states].name, "Nap");
 			strcpy(powernv_states[nr_idle_states].desc, "Nap");
@@ -196,7 +196,7 @@ static int powernv_add_idle_states(void)
 			nr_idle_states++;
 		}
 
-		if (flags[i] & IDLE_USE_INST_SLEEP) {
+		if (flags & IDLE_USE_INST_SLEEP) {
 			/* Add FASTSLEEP state */
 			strcpy(powernv_states[nr_idle_states].name, "FastSleep");
 			strcpy(powernv_states[nr_idle_states].desc, "FastSleep");
diff --git a/drivers/memory/Kconfig b/drivers/memory/Kconfig
index c59e9c96e86d..fab81a143bd7 100644
--- a/drivers/memory/Kconfig
+++ b/drivers/memory/Kconfig
@@ -61,6 +61,16 @@ config TEGRA30_MC
 	  analysis, especially for IOMMU/SMMU(System Memory Management
 	  Unit) module.
 
+config FSL_CORENET_CF
+	tristate "Freescale CoreNet Error Reporting"
+	depends on FSL_SOC_BOOKE
+	help
+	  Say Y for reporting of errors from the Freescale CoreNet
+	  Coherency Fabric.  Errors reported include accesses to
+	  physical addresses that mapped by no local access window
+	  (LAW) or an invalid LAW, as well as bad cache state that
+	  represents a coherency violation.
+
 config FSL_IFC
 	bool
 	depends on FSL_SOC
diff --git a/drivers/memory/Makefile b/drivers/memory/Makefile
index 71160a2b7313..4055c47f45ab 100644
--- a/drivers/memory/Makefile
+++ b/drivers/memory/Makefile
@@ -7,6 +7,7 @@ obj-$(CONFIG_OF)		+= of_memory.o
 endif
 obj-$(CONFIG_TI_AEMIF)		+= ti-aemif.o
 obj-$(CONFIG_TI_EMIF)		+= emif.o
+obj-$(CONFIG_FSL_CORENET_CF)	+= fsl-corenet-cf.o
 obj-$(CONFIG_FSL_IFC)		+= fsl_ifc.o
 obj-$(CONFIG_MVEBU_DEVBUS)	+= mvebu-devbus.o
 obj-$(CONFIG_TEGRA20_MC)	+= tegra20-mc.o
diff --git a/drivers/memory/fsl-corenet-cf.c b/drivers/memory/fsl-corenet-cf.c
new file mode 100644
index 000000000000..c9443fc136db
--- /dev/null
+++ b/drivers/memory/fsl-corenet-cf.c
@@ -0,0 +1,251 @@
+/*
+ * CoreNet Coherency Fabric error reporting
+ *
+ * Copyright 2014 Freescale Semiconductor Inc.
+ *
+ * This program is free software; you can redistribute  it and/or modify it
+ * under  the terms of  the GNU General  Public License as published by the
+ * Free Software Foundation;  either version 2 of the  License, or (at your
+ * option) any later version.
+ */
+
+#include <linux/interrupt.h>
+#include <linux/io.h>
+#include <linux/irq.h>
+#include <linux/module.h>
+#include <linux/of.h>
+#include <linux/of_address.h>
+#include <linux/of_device.h>
+#include <linux/of_irq.h>
+#include <linux/platform_device.h>
+
+enum ccf_version {
+	CCF1,
+	CCF2,
+};
+
+struct ccf_info {
+	enum ccf_version version;
+	int err_reg_offs;
+};
+
+static const struct ccf_info ccf1_info = {
+	.version = CCF1,
+	.err_reg_offs = 0xa00,
+};
+
+static const struct ccf_info ccf2_info = {
+	.version = CCF2,
+	.err_reg_offs = 0xe40,
+};
+
+static const struct of_device_id ccf_matches[] = {
+	{
+		.compatible = "fsl,corenet1-cf",
+		.data = &ccf1_info,
+	},
+	{
+		.compatible = "fsl,corenet2-cf",
+		.data = &ccf2_info,
+	},
+	{}
+};
+
+struct ccf_err_regs {
+	u32 errdet;		/* 0x00 Error Detect Register */
+	/* 0x04 Error Enable (ccf1)/Disable (ccf2) Register */
+	u32 errdis;
+	/* 0x08 Error Interrupt Enable Register (ccf2 only) */
+	u32 errinten;
+	u32 cecar;		/* 0x0c Error Capture Attribute Register */
+	u32 cecaddrh;		/* 0x10 Error Capture Address High */
+	u32 cecaddrl;		/* 0x14 Error Capture Address Low */
+	u32 cecar2;		/* 0x18 Error Capture Attribute Register 2 */
+};
+
+/* LAE/CV also valid for errdis and errinten */
+#define ERRDET_LAE		(1 << 0)  /* Local Access Error */
+#define ERRDET_CV		(1 << 1)  /* Coherency Violation */
+#define ERRDET_CTYPE_SHIFT	26	  /* Capture Type (ccf2 only) */
+#define ERRDET_CTYPE_MASK	(0x1f << ERRDET_CTYPE_SHIFT)
+#define ERRDET_CAP		(1 << 31) /* Capture Valid (ccf2 only) */
+
+#define CECAR_VAL		(1 << 0)  /* Valid (ccf1 only) */
+#define CECAR_UVT		(1 << 15) /* Unavailable target ID (ccf1) */
+#define CECAR_SRCID_SHIFT_CCF1	24
+#define CECAR_SRCID_MASK_CCF1	(0xff << CECAR_SRCID_SHIFT_CCF1)
+#define CECAR_SRCID_SHIFT_CCF2	18
+#define CECAR_SRCID_MASK_CCF2	(0xff << CECAR_SRCID_SHIFT_CCF2)
+
+#define CECADDRH_ADDRH		0xff
+
+struct ccf_private {
+	const struct ccf_info *info;
+	struct device *dev;
+	void __iomem *regs;
+	struct ccf_err_regs __iomem *err_regs;
+};
+
+static irqreturn_t ccf_irq(int irq, void *dev_id)
+{
+	struct ccf_private *ccf = dev_id;
+	static DEFINE_RATELIMIT_STATE(ratelimit, DEFAULT_RATELIMIT_INTERVAL,
+				      DEFAULT_RATELIMIT_BURST);
+	u32 errdet, cecar, cecar2;
+	u64 addr;
+	u32 src_id;
+	bool uvt = false;
+	bool cap_valid = false;
+
+	errdet = ioread32be(&ccf->err_regs->errdet);
+	cecar = ioread32be(&ccf->err_regs->cecar);
+	cecar2 = ioread32be(&ccf->err_regs->cecar2);
+	addr = ioread32be(&ccf->err_regs->cecaddrl);
+	addr |= ((u64)(ioread32be(&ccf->err_regs->cecaddrh) &
+		       CECADDRH_ADDRH)) << 32;
+
+	if (!__ratelimit(&ratelimit))
+		goto out;
+
+	switch (ccf->info->version) {
+	case CCF1:
+		if (cecar & CECAR_VAL) {
+			if (cecar & CECAR_UVT)
+				uvt = true;
+
+			src_id = (cecar & CECAR_SRCID_MASK_CCF1) >>
+				 CECAR_SRCID_SHIFT_CCF1;
+			cap_valid = true;
+		}
+
+		break;
+	case CCF2:
+		if (errdet & ERRDET_CAP) {
+			src_id = (cecar & CECAR_SRCID_MASK_CCF2) >>
+				 CECAR_SRCID_SHIFT_CCF2;
+			cap_valid = true;
+		}
+
+		break;
+	}
+
+	dev_crit(ccf->dev, "errdet 0x%08x cecar 0x%08x cecar2 0x%08x\n",
+		 errdet, cecar, cecar2);
+
+	if (errdet & ERRDET_LAE) {
+		if (uvt)
+			dev_crit(ccf->dev, "LAW Unavailable Target ID\n");
+		else
+			dev_crit(ccf->dev, "Local Access Window Error\n");
+	}
+
+	if (errdet & ERRDET_CV)
+		dev_crit(ccf->dev, "Coherency Violation\n");
+
+	if (cap_valid) {
+		dev_crit(ccf->dev, "address 0x%09llx, src id 0x%x\n",
+			 addr, src_id);
+	}
+
+out:
+	iowrite32be(errdet, &ccf->err_regs->errdet);
+	return errdet ? IRQ_HANDLED : IRQ_NONE;
+}
+
+static int ccf_probe(struct platform_device *pdev)
+{
+	struct ccf_private *ccf;
+	struct resource *r;
+	const struct of_device_id *match;
+	int ret, irq;
+
+	match = of_match_device(ccf_matches, &pdev->dev);
+	if (WARN_ON(!match))
+		return -ENODEV;
+
+	ccf = devm_kzalloc(&pdev->dev, sizeof(*ccf), GFP_KERNEL);
+	if (!ccf)
+		return -ENOMEM;
+
+	r = platform_get_resource(pdev, IORESOURCE_MEM, 0);
+	if (!r) {
+		dev_err(&pdev->dev, "%s: no mem resource\n", __func__);
+		return -ENXIO;
+	}
+
+	ccf->regs = devm_ioremap_resource(&pdev->dev, r);
+	if (IS_ERR(ccf->regs)) {
+		dev_err(&pdev->dev, "%s: can't map mem resource\n", __func__);
+		return PTR_ERR(ccf->regs);
+	}
+
+	ccf->dev = &pdev->dev;
+	ccf->info = match->data;
+	ccf->err_regs = ccf->regs + ccf->info->err_reg_offs;
+
+	dev_set_drvdata(&pdev->dev, ccf);
+
+	irq = platform_get_irq(pdev, 0);
+	if (!irq) {
+		dev_err(&pdev->dev, "%s: no irq\n", __func__);
+		return -ENXIO;
+	}
+
+	ret = devm_request_irq(&pdev->dev, irq, ccf_irq, 0, pdev->name, ccf);
+	if (ret) {
+		dev_err(&pdev->dev, "%s: can't request irq\n", __func__);
+		return ret;
+	}
+
+	switch (ccf->info->version) {
+	case CCF1:
+		/* On CCF1 this register enables rather than disables. */
+		iowrite32be(ERRDET_LAE | ERRDET_CV, &ccf->err_regs->errdis);
+		break;
+
+	case CCF2:
+		iowrite32be(0, &ccf->err_regs->errdis);
+		iowrite32be(ERRDET_LAE | ERRDET_CV, &ccf->err_regs->errinten);
+		break;
+	}
+
+	return 0;
+}
+
+static int ccf_remove(struct platform_device *pdev)
+{
+	struct ccf_private *ccf = dev_get_drvdata(&pdev->dev);
+
+	switch (ccf->info->version) {
+	case CCF1:
+		iowrite32be(0, &ccf->err_regs->errdis);
+		break;
+
+	case CCF2:
+		/*
+		 * We clear errdis on ccf1 because that's the only way to
+		 * disable interrupts, but on ccf2 there's no need to disable
+		 * detection.
+		 */
+		iowrite32be(0, &ccf->err_regs->errinten);
+		break;
+	}
+
+	return 0;
+}
+
+static struct platform_driver ccf_driver = {
+	.driver = {
+		.name = KBUILD_MODNAME,
+		.owner = THIS_MODULE,
+		.of_match_table = ccf_matches,
+	},
+	.probe = ccf_probe,
+	.remove = ccf_remove,
+};
+
+module_platform_driver(ccf_driver);
+
+MODULE_LICENSE("GPL");
+MODULE_AUTHOR("Freescale Semiconductor");
+MODULE_DESCRIPTION("Freescale CoreNet Coherency Fabric error reporting");
diff --git a/drivers/net/ethernet/freescale/fs_enet/mac-fec.c b/drivers/net/ethernet/freescale/fs_enet/mac-fec.c
index fc5413488496..1eedfba2ad3c 100644
--- a/drivers/net/ethernet/freescale/fs_enet/mac-fec.c
+++ b/drivers/net/ethernet/freescale/fs_enet/mac-fec.c
@@ -41,7 +41,6 @@
 #ifdef CONFIG_8xx
 #include <asm/8xx_immap.h>
 #include <asm/pgtable.h>
-#include <asm/mpc8xx.h>
 #include <asm/cpm1.h>
 #endif
 
diff --git a/drivers/net/ethernet/freescale/fs_enet/mac-scc.c b/drivers/net/ethernet/freescale/fs_enet/mac-scc.c
index b4bf02f57d43..90b3b19b7cd3 100644
--- a/drivers/net/ethernet/freescale/fs_enet/mac-scc.c
+++ b/drivers/net/ethernet/freescale/fs_enet/mac-scc.c
@@ -40,7 +40,6 @@
 #ifdef CONFIG_8xx
 #include <asm/8xx_immap.h>
 #include <asm/pgtable.h>
-#include <asm/mpc8xx.h>
 #include <asm/cpm1.h>
 #endif
 
diff --git a/drivers/pcmcia/Kconfig b/drivers/pcmcia/Kconfig
index 51cf8083b299..b0ce7cdee0c2 100644
--- a/drivers/pcmcia/Kconfig
+++ b/drivers/pcmcia/Kconfig
@@ -144,16 +144,6 @@ config TCIC
 	  "Bridge" is the name used for the hardware inside your computer that
 	  PCMCIA cards are plugged into. If unsure, say N.
 
-config PCMCIA_M8XX
-	tristate "MPC8xx PCMCIA support"
-	depends on PCCARD && PPC && 8xx
-	select PCCARD_IODYN if PCMCIA != n
-	help
-	  Say Y here to include support for PowerPC 8xx series PCMCIA
-	  controller.
-
-	  This driver is also available as a module called m8xx_pcmcia.
-
 config PCMCIA_ALCHEMY_DEVBOARD
 	tristate "Alchemy Db/Pb1xxx PCMCIA socket services"
 	depends on MIPS_ALCHEMY && PCMCIA
diff --git a/drivers/pcmcia/Makefile b/drivers/pcmcia/Makefile
index fd55a6951402..27e94b30cf96 100644
--- a/drivers/pcmcia/Makefile
+++ b/drivers/pcmcia/Makefile
@@ -23,7 +23,6 @@ obj-$(CONFIG_PD6729)				+= pd6729.o
 obj-$(CONFIG_I82365)				+= i82365.o
 obj-$(CONFIG_I82092)				+= i82092.o
 obj-$(CONFIG_TCIC)				+= tcic.o
-obj-$(CONFIG_PCMCIA_M8XX)			+= m8xx_pcmcia.o
 obj-$(CONFIG_PCMCIA_SOC_COMMON)			+= soc_common.o
 obj-$(CONFIG_PCMCIA_SA11XX_BASE)		+= sa11xx_base.o
 obj-$(CONFIG_PCMCIA_SA1100)			+= sa1100_cs.o
diff --git a/drivers/pcmcia/m8xx_pcmcia.c b/drivers/pcmcia/m8xx_pcmcia.c
deleted file mode 100644
index 182034d2ef58..000000000000
--- a/drivers/pcmcia/m8xx_pcmcia.c
+++ /dev/null
@@ -1,1168 +0,0 @@
-/*
- * m8xx_pcmcia.c - Linux PCMCIA socket driver for the mpc8xx series.
- *
- * (C) 1999-2000 Magnus Damm <damm@opensource.se>
- * (C) 2001-2002 Montavista Software, Inc.
- *     <mlocke@mvista.com>
- *
- * Support for two slots by Cyclades Corporation
- *     <oliver.kurth@cyclades.de>
- * Further fixes, v2.6 kernel port
- *     <marcelo.tosatti@cyclades.com>
- * 
- * Some fixes, additions (C) 2005-2007 Montavista Software, Inc.
- *     <vbordug@ru.mvista.com>
- *
- * "The ExCA standard specifies that socket controllers should provide
- * two IO and five memory windows per socket, which can be independently
- * configured and positioned in the host address space and mapped to
- * arbitrary segments of card address space. " - David A Hinds. 1999
- *
- * This controller does _not_ meet the ExCA standard.
- *
- * m8xx pcmcia controller brief info:
- * + 8 windows (attrib, mem, i/o)
- * + up to two slots (SLOT_A and SLOT_B)
- * + inputpins, outputpins, event and mask registers.
- * - no offset register. sigh.
- *
- * Because of the lacking offset register we must map the whole card.
- * We assign each memory window PCMCIA_MEM_WIN_SIZE address space.
- * Make sure there is (PCMCIA_MEM_WIN_SIZE * PCMCIA_MEM_WIN_NO
- * * PCMCIA_SOCKETS_NO) bytes at PCMCIA_MEM_WIN_BASE.
- * The i/o windows are dynamically allocated at PCMCIA_IO_WIN_BASE.
- * They are maximum 64KByte each...
- */
-
-#include <linux/module.h>
-#include <linux/init.h>
-#include <linux/types.h>
-#include <linux/fcntl.h>
-#include <linux/string.h>
-
-#include <linux/kernel.h>
-#include <linux/errno.h>
-#include <linux/timer.h>
-#include <linux/ioport.h>
-#include <linux/delay.h>
-#include <linux/interrupt.h>
-#include <linux/fsl_devices.h>
-#include <linux/bitops.h>
-#include <linux/of_address.h>
-#include <linux/of_device.h>
-#include <linux/of_irq.h>
-#include <linux/of_platform.h>
-
-#include <asm/io.h>
-#include <asm/time.h>
-#include <asm/mpc8xx.h>
-#include <asm/8xx_immap.h>
-#include <asm/irq.h>
-#include <asm/fs_pd.h>
-
-#include <pcmcia/ss.h>
-
-#define pcmcia_info(args...) printk(KERN_INFO "m8xx_pcmcia: "args)
-#define pcmcia_error(args...) printk(KERN_ERR "m8xx_pcmcia: "args)
-
-static const char *version = "Version 0.06, Aug 2005";
-MODULE_LICENSE("Dual MPL/GPL");
-
-#if !defined(CONFIG_PCMCIA_SLOT_A) && !defined(CONFIG_PCMCIA_SLOT_B)
-
-/* The ADS board use SLOT_A */
-#ifdef CONFIG_ADS
-#define CONFIG_PCMCIA_SLOT_A
-#define CONFIG_BD_IS_MHZ
-#endif
-
-/* The FADS series are a mess */
-#ifdef CONFIG_FADS
-#if defined(CONFIG_MPC860T) || defined(CONFIG_MPC860) || defined(CONFIG_MPC821)
-#define CONFIG_PCMCIA_SLOT_A
-#else
-#define CONFIG_PCMCIA_SLOT_B
-#endif
-#endif
-
-#if defined(CONFIG_MPC885ADS)
-#define CONFIG_PCMCIA_SLOT_A
-#define PCMCIA_GLITCHY_CD
-#endif
-
-/* Cyclades ACS uses both slots */
-#ifdef CONFIG_PRxK
-#define CONFIG_PCMCIA_SLOT_A
-#define CONFIG_PCMCIA_SLOT_B
-#endif
-
-#endif				/* !defined(CONFIG_PCMCIA_SLOT_A) && !defined(CONFIG_PCMCIA_SLOT_B) */
-
-#if defined(CONFIG_PCMCIA_SLOT_A) && defined(CONFIG_PCMCIA_SLOT_B)
-
-#define PCMCIA_SOCKETS_NO 2
-/* We have only 8 windows, dualsocket support will be limited. */
-#define PCMCIA_MEM_WIN_NO 2
-#define PCMCIA_IO_WIN_NO  2
-#define PCMCIA_SLOT_MSG "SLOT_A and SLOT_B"
-
-#elif defined(CONFIG_PCMCIA_SLOT_A) || defined(CONFIG_PCMCIA_SLOT_B)
-
-#define PCMCIA_SOCKETS_NO 1
-/* full support for one slot */
-#define PCMCIA_MEM_WIN_NO 5
-#define PCMCIA_IO_WIN_NO  2
-
-/* define _slot_ to be able to optimize macros */
-
-#ifdef CONFIG_PCMCIA_SLOT_A
-#define _slot_ 0
-#define PCMCIA_SLOT_MSG "SLOT_A"
-#else
-#define _slot_ 1
-#define PCMCIA_SLOT_MSG "SLOT_B"
-#endif
-
-#else
-#error m8xx_pcmcia: Bad configuration!
-#endif
-
-/* ------------------------------------------------------------------------- */
-
-#define PCMCIA_MEM_WIN_BASE 0xe0000000	/* base address for memory window 0   */
-#define PCMCIA_MEM_WIN_SIZE 0x04000000	/* each memory window is 64 MByte     */
-#define PCMCIA_IO_WIN_BASE  _IO_BASE	/* base address for io window 0       */
-/* ------------------------------------------------------------------------- */
-
-static int pcmcia_schlvl;
-
-static DEFINE_SPINLOCK(events_lock);
-
-#define PCMCIA_SOCKET_KEY_5V 1
-#define PCMCIA_SOCKET_KEY_LV 2
-
-/* look up table for pgcrx registers */
-static u32 *m8xx_pgcrx[2];
-
-/*
- * This structure is used to address each window in the PCMCIA controller.
- *
- * Keep in mind that we assume that pcmcia_win[n+1] is mapped directly
- * after pcmcia_win[n]...
- */
-
-struct pcmcia_win {
-	u32 br;
-	u32 or;
-};
-
-/*
- * For some reason the hardware guys decided to make both slots share
- * some registers.
- *
- * Could someone invent object oriented hardware ?
- *
- * The macros are used to get the right bit from the registers.
- * SLOT_A : slot = 0
- * SLOT_B : slot = 1
- */
-
-#define M8XX_PCMCIA_VS1(slot)      (0x80000000 >> (slot << 4))
-#define M8XX_PCMCIA_VS2(slot)      (0x40000000 >> (slot << 4))
-#define M8XX_PCMCIA_VS_MASK(slot)  (0xc0000000 >> (slot << 4))
-#define M8XX_PCMCIA_VS_SHIFT(slot) (30 - (slot << 4))
-
-#define M8XX_PCMCIA_WP(slot)       (0x20000000 >> (slot << 4))
-#define M8XX_PCMCIA_CD2(slot)      (0x10000000 >> (slot << 4))
-#define M8XX_PCMCIA_CD1(slot)      (0x08000000 >> (slot << 4))
-#define M8XX_PCMCIA_BVD2(slot)     (0x04000000 >> (slot << 4))
-#define M8XX_PCMCIA_BVD1(slot)     (0x02000000 >> (slot << 4))
-#define M8XX_PCMCIA_RDY(slot)      (0x01000000 >> (slot << 4))
-#define M8XX_PCMCIA_RDY_L(slot)    (0x00800000 >> (slot << 4))
-#define M8XX_PCMCIA_RDY_H(slot)    (0x00400000 >> (slot << 4))
-#define M8XX_PCMCIA_RDY_R(slot)    (0x00200000 >> (slot << 4))
-#define M8XX_PCMCIA_RDY_F(slot)    (0x00100000 >> (slot << 4))
-#define M8XX_PCMCIA_MASK(slot)     (0xFFFF0000 >> (slot << 4))
-
-#define M8XX_PCMCIA_POR_VALID    0x00000001
-#define M8XX_PCMCIA_POR_WRPROT   0x00000002
-#define M8XX_PCMCIA_POR_ATTRMEM  0x00000010
-#define M8XX_PCMCIA_POR_IO       0x00000018
-#define M8XX_PCMCIA_POR_16BIT    0x00000040
-
-#define M8XX_PGCRX(slot)  m8xx_pgcrx[slot]
-
-#define M8XX_PGCRX_CXOE    0x00000080
-#define M8XX_PGCRX_CXRESET 0x00000040
-
-/* we keep one lookup table per socket to check flags */
-
-#define PCMCIA_EVENTS_MAX 5	/* 4 max at a time + termination */
-
-struct event_table {
-	u32 regbit;
-	u32 eventbit;
-};
-
-static const char driver_name[] = "m8xx-pcmcia";
-
-struct socket_info {
-	void (*handler) (void *info, u32 events);
-	void *info;
-
-	u32 slot;
-	pcmconf8xx_t *pcmcia;
-	u32 bus_freq;
-	int hwirq;
-
-	socket_state_t state;
-	struct pccard_mem_map mem_win[PCMCIA_MEM_WIN_NO];
-	struct pccard_io_map io_win[PCMCIA_IO_WIN_NO];
-	struct event_table events[PCMCIA_EVENTS_MAX];
-	struct pcmcia_socket socket;
-};
-
-static struct socket_info socket[PCMCIA_SOCKETS_NO];
-
-/*
- * Search this table to see if the windowsize is
- * supported...
- */
-
-#define M8XX_SIZES_NO 32
-
-static const u32 m8xx_size_to_gray[M8XX_SIZES_NO] = {
-	0x00000001, 0x00000002, 0x00000008, 0x00000004,
-	0x00000080, 0x00000040, 0x00000010, 0x00000020,
-	0x00008000, 0x00004000, 0x00001000, 0x00002000,
-	0x00000100, 0x00000200, 0x00000800, 0x00000400,
-
-	0x0fffffff, 0xffffffff, 0xffffffff, 0xffffffff,
-	0x01000000, 0x02000000, 0xffffffff, 0x04000000,
-	0x00010000, 0x00020000, 0x00080000, 0x00040000,
-	0x00800000, 0x00400000, 0x00100000, 0x00200000
-};
-
-/* ------------------------------------------------------------------------- */
-
-static irqreturn_t m8xx_interrupt(int irq, void *dev);
-
-#define PCMCIA_BMT_LIMIT (15*4)	/* Bus Monitor Timeout value */
-
-/* FADS Boards from Motorola                                               */
-
-#if defined(CONFIG_FADS)
-
-#define PCMCIA_BOARD_MSG "FADS"
-
-static int voltage_set(int slot, int vcc, int vpp)
-{
-	u32 reg = 0;
-
-	switch (vcc) {
-	case 0:
-		break;
-	case 33:
-		reg |= BCSR1_PCCVCC0;
-		break;
-	case 50:
-		reg |= BCSR1_PCCVCC1;
-		break;
-	default:
-		return 1;
-	}
-
-	switch (vpp) {
-	case 0:
-		break;
-	case 33:
-	case 50:
-		if (vcc == vpp)
-			reg |= BCSR1_PCCVPP1;
-		else
-			return 1;
-		break;
-	case 120:
-		if ((vcc == 33) || (vcc == 50))
-			reg |= BCSR1_PCCVPP0;
-		else
-			return 1;
-	default:
-		return 1;
-	}
-
-	/* first, turn off all power */
-	out_be32((u32 *) BCSR1,
-		 in_be32((u32 *) BCSR1) & ~(BCSR1_PCCVCC_MASK |
-					    BCSR1_PCCVPP_MASK));
-
-	/* enable new powersettings */
-	out_be32((u32 *) BCSR1, in_be32((u32 *) BCSR1) | reg);
-
-	return 0;
-}
-
-#define socket_get(_slot_) PCMCIA_SOCKET_KEY_5V
-
-static void hardware_enable(int slot)
-{
-	out_be32((u32 *) BCSR1, in_be32((u32 *) BCSR1) & ~BCSR1_PCCEN);
-}
-
-static void hardware_disable(int slot)
-{
-	out_be32((u32 *) BCSR1, in_be32((u32 *) BCSR1) | BCSR1_PCCEN);
-}
-
-#endif
-
-/* MPC885ADS Boards */
-
-#if defined(CONFIG_MPC885ADS)
-
-#define PCMCIA_BOARD_MSG "MPC885ADS"
-#define socket_get(_slot_) PCMCIA_SOCKET_KEY_5V
-
-static inline void hardware_enable(int slot)
-{
-	m8xx_pcmcia_ops.hw_ctrl(slot, 1);
-}
-
-static inline void hardware_disable(int slot)
-{
-	m8xx_pcmcia_ops.hw_ctrl(slot, 0);
-}
-
-static inline int voltage_set(int slot, int vcc, int vpp)
-{
-	return m8xx_pcmcia_ops.voltage_set(slot, vcc, vpp);
-}
-
-#endif
-
-#if defined(CONFIG_PRxK)
-#include <asm/cpld.h>
-extern volatile fpga_pc_regs *fpga_pc;
-
-#define PCMCIA_BOARD_MSG "MPC855T"
-
-static int voltage_set(int slot, int vcc, int vpp)
-{
-	u8 reg = 0;
-	u8 regread;
-	cpld_regs *ccpld = get_cpld();
-
-	switch (vcc) {
-	case 0:
-		break;
-	case 33:
-		reg |= PCMCIA_VCC_33;
-		break;
-	case 50:
-		reg |= PCMCIA_VCC_50;
-		break;
-	default:
-		return 1;
-	}
-
-	switch (vpp) {
-	case 0:
-		break;
-	case 33:
-	case 50:
-		if (vcc == vpp)
-			reg |= PCMCIA_VPP_VCC;
-		else
-			return 1;
-		break;
-	case 120:
-		if ((vcc == 33) || (vcc == 50))
-			reg |= PCMCIA_VPP_12;
-		else
-			return 1;
-	default:
-		return 1;
-	}
-
-	reg = reg >> (slot << 2);
-	regread = in_8(&ccpld->fpga_pc_ctl);
-	if (reg !=
-	    (regread & ((PCMCIA_VCC_MASK | PCMCIA_VPP_MASK) >> (slot << 2)))) {
-		/* enable new powersettings */
-		regread =
-		    regread & ~((PCMCIA_VCC_MASK | PCMCIA_VPP_MASK) >>
-				(slot << 2));
-		out_8(&ccpld->fpga_pc_ctl, reg | regread);
-		msleep(100);
-	}
-
-	return 0;
-}
-
-#define socket_get(_slot_) PCMCIA_SOCKET_KEY_LV
-#define hardware_enable(_slot_)	/* No hardware to enable */
-#define hardware_disable(_slot_)	/* No hardware to disable */
-
-#endif				/* CONFIG_PRxK */
-
-static u32 pending_events[PCMCIA_SOCKETS_NO];
-static DEFINE_SPINLOCK(pending_event_lock);
-
-static irqreturn_t m8xx_interrupt(int irq, void *dev)
-{
-	struct socket_info *s;
-	struct event_table *e;
-	unsigned int i, events, pscr, pipr, per;
-	pcmconf8xx_t *pcmcia = socket[0].pcmcia;
-
-	pr_debug("m8xx_pcmcia: Interrupt!\n");
-	/* get interrupt sources */
-
-	pscr = in_be32(&pcmcia->pcmc_pscr);
-	pipr = in_be32(&pcmcia->pcmc_pipr);
-	per = in_be32(&pcmcia->pcmc_per);
-
-	for (i = 0; i < PCMCIA_SOCKETS_NO; i++) {
-		s = &socket[i];
-		e = &s->events[0];
-		events = 0;
-
-		while (e->regbit) {
-			if (pscr & e->regbit)
-				events |= e->eventbit;
-
-			e++;
-		}
-
-		/*
-		 * report only if both card detect signals are the same
-		 * not too nice done,
-		 * we depend on that CD2 is the bit to the left of CD1...
-		 */
-		if (events & SS_DETECT)
-			if (((pipr & M8XX_PCMCIA_CD2(i)) >> 1) ^
-			    (pipr & M8XX_PCMCIA_CD1(i))) {
-				events &= ~SS_DETECT;
-			}
-#ifdef PCMCIA_GLITCHY_CD
-		/*
-		 * I've experienced CD problems with my ADS board.
-		 * We make an extra check to see if there was a
-		 * real change of Card detection.
-		 */
-
-		if ((events & SS_DETECT) &&
-		    ((pipr &
-		      (M8XX_PCMCIA_CD2(i) | M8XX_PCMCIA_CD1(i))) == 0) &&
-		    (s->state.Vcc | s->state.Vpp)) {
-			events &= ~SS_DETECT;
-			/*printk( "CD glitch workaround - CD = 0x%08x!\n",
-			   (pipr & (M8XX_PCMCIA_CD2(i)
-			   | M8XX_PCMCIA_CD1(i)))); */
-		}
-#endif
-
-		/* call the handler */
-
-		pr_debug("m8xx_pcmcia: slot %u: events = 0x%02x, pscr = 0x%08x, "
-			"pipr = 0x%08x\n", i, events, pscr, pipr);
-
-		if (events) {
-			spin_lock(&pending_event_lock);
-			pending_events[i] |= events;
-			spin_unlock(&pending_event_lock);
-			/*
-			 * Turn off RDY_L bits in the PER mask on
-			 * CD interrupt receival.
-			 *
-			 * They can generate bad interrupts on the
-			 * ACS4,8,16,32.   - marcelo
-			 */
-			per &= ~M8XX_PCMCIA_RDY_L(0);
-			per &= ~M8XX_PCMCIA_RDY_L(1);
-
-			out_be32(&pcmcia->pcmc_per, per);
-
-			if (events)
-				pcmcia_parse_events(&socket[i].socket, events);
-		}
-	}
-
-	/* clear the interrupt sources */
-	out_be32(&pcmcia->pcmc_pscr, pscr);
-
-	pr_debug("m8xx_pcmcia: Interrupt done.\n");
-
-	return IRQ_HANDLED;
-}
-
-static u32 m8xx_get_graycode(u32 size)
-{
-	u32 k;
-
-	for (k = 0; k < M8XX_SIZES_NO; k++)
-		if (m8xx_size_to_gray[k] == size)
-			break;
-
-	if ((k == M8XX_SIZES_NO) || (m8xx_size_to_gray[k] == -1))
-		k = -1;
-
-	return k;
-}
-
-static u32 m8xx_get_speed(u32 ns, u32 is_io, u32 bus_freq)
-{
-	u32 reg, clocks, psst, psl, psht;
-
-	if (!ns) {
-
-		/*
-		 * We get called with IO maps setup to 0ns
-		 * if not specified by the user.
-		 * They should be 255ns.
-		 */
-
-		if (is_io)
-			ns = 255;
-		else
-			ns = 100;	/* fast memory if 0 */
-	}
-
-	/*
-	 * In PSST, PSL, PSHT fields we tell the controller
-	 * timing parameters in CLKOUT clock cycles.
-	 * CLKOUT is the same as GCLK2_50.
-	 */
-
-/* how we want to adjust the timing - in percent */
-
-#define ADJ 180			/* 80 % longer accesstime - to be sure */
-
-	clocks = ((bus_freq / 1000) * ns) / 1000;
-	clocks = (clocks * ADJ) / (100 * 1000);
-	if (clocks >= PCMCIA_BMT_LIMIT) {
-		printk("Max access time limit reached\n");
-		clocks = PCMCIA_BMT_LIMIT - 1;
-	}
-
-	psst = clocks / 7;	/* setup time */
-	psht = clocks / 7;	/* hold time */
-	psl = (clocks * 5) / 7;	/* strobe length */
-
-	psst += clocks - (psst + psht + psl);
-
-	reg = psst << 12;
-	reg |= psl << 7;
-	reg |= psht << 16;
-
-	return reg;
-}
-
-static int m8xx_get_status(struct pcmcia_socket *sock, unsigned int *value)
-{
-	int lsock = container_of(sock, struct socket_info, socket)->slot;
-	struct socket_info *s = &socket[lsock];
-	unsigned int pipr, reg;
-	pcmconf8xx_t *pcmcia = s->pcmcia;
-
-	pipr = in_be32(&pcmcia->pcmc_pipr);
-
-	*value = ((pipr & (M8XX_PCMCIA_CD1(lsock)
-			   | M8XX_PCMCIA_CD2(lsock))) == 0) ? SS_DETECT : 0;
-	*value |= (pipr & M8XX_PCMCIA_WP(lsock)) ? SS_WRPROT : 0;
-
-	if (s->state.flags & SS_IOCARD)
-		*value |= (pipr & M8XX_PCMCIA_BVD1(lsock)) ? SS_STSCHG : 0;
-	else {
-		*value |= (pipr & M8XX_PCMCIA_RDY(lsock)) ? SS_READY : 0;
-		*value |= (pipr & M8XX_PCMCIA_BVD1(lsock)) ? SS_BATDEAD : 0;
-		*value |= (pipr & M8XX_PCMCIA_BVD2(lsock)) ? SS_BATWARN : 0;
-	}
-
-	if (s->state.Vcc | s->state.Vpp)
-		*value |= SS_POWERON;
-
-	/*
-	 * Voltage detection:
-	 * This driver only supports 16-Bit pc-cards.
-	 * Cardbus is not handled here.
-	 *
-	 * To determine what voltage to use we must read the VS1 and VS2 pin.
-	 * Depending on what socket type is present,
-	 * different combinations mean different things.
-	 *
-	 * Card Key  Socket Key   VS1   VS2   Card         Vcc for CIS parse
-	 *
-	 * 5V        5V, LV*      NC    NC    5V only       5V (if available)
-	 *
-	 * 5V        5V, LV*      GND   NC    5 or 3.3V     as low as possible
-	 *
-	 * 5V        5V, LV*      GND   GND   5, 3.3, x.xV  as low as possible
-	 *
-	 * LV*       5V            -     -    shall not fit into socket
-	 *
-	 * LV*       LV*          GND   NC    3.3V only     3.3V
-	 *
-	 * LV*       LV*          NC    GND   x.xV          x.xV (if avail.)
-	 *
-	 * LV*       LV*          GND   GND   3.3 or x.xV   as low as possible
-	 *
-	 * *LV means Low Voltage
-	 *
-	 *
-	 * That gives us the following table:
-	 *
-	 * Socket    VS1  VS2   Voltage
-	 *
-	 * 5V        NC   NC    5V
-	 * 5V        NC   GND   none (should not be possible)
-	 * 5V        GND  NC    >= 3.3V
-	 * 5V        GND  GND   >= x.xV
-	 *
-	 * LV        NC   NC    5V   (if available)
-	 * LV        NC   GND   x.xV (if available)
-	 * LV        GND  NC    3.3V
-	 * LV        GND  GND   >= x.xV
-	 *
-	 * So, how do I determine if I have a 5V or a LV
-	 * socket on my board?  Look at the socket!
-	 *
-	 *
-	 * Socket with 5V key:
-	 * ++--------------------------------------------+
-	 * ||                                            |
-	 * ||                                           ||
-	 * ||                                           ||
-	 * |                                             |
-	 * +---------------------------------------------+
-	 *
-	 * Socket with LV key:
-	 * ++--------------------------------------------+
-	 * ||                                            |
-	 * |                                            ||
-	 * |                                            ||
-	 * |                                             |
-	 * +---------------------------------------------+
-	 *
-	 *
-	 * With other words - LV only cards does not fit
-	 * into the 5V socket!
-	 */
-
-	/* read out VS1 and VS2 */
-
-	reg = (pipr & M8XX_PCMCIA_VS_MASK(lsock))
-	    >> M8XX_PCMCIA_VS_SHIFT(lsock);
-
-	if (socket_get(lsock) == PCMCIA_SOCKET_KEY_LV) {
-		switch (reg) {
-		case 1:
-			*value |= SS_3VCARD;
-			break;	/* GND, NC - 3.3V only */
-		case 2:
-			*value |= SS_XVCARD;
-			break;	/* NC. GND - x.xV only */
-		};
-	}
-
-	pr_debug("m8xx_pcmcia: GetStatus(%d) = %#2.2x\n", lsock, *value);
-	return 0;
-}
-
-static int m8xx_set_socket(struct pcmcia_socket *sock, socket_state_t * state)
-{
-	int lsock = container_of(sock, struct socket_info, socket)->slot;
-	struct socket_info *s = &socket[lsock];
-	struct event_table *e;
-	unsigned int reg;
-	unsigned long flags;
-	pcmconf8xx_t *pcmcia = socket[0].pcmcia;
-
-	pr_debug("m8xx_pcmcia: SetSocket(%d, flags %#3.3x, Vcc %d, Vpp %d, "
-		"io_irq %d, csc_mask %#2.2x)\n", lsock, state->flags,
-		state->Vcc, state->Vpp, state->io_irq, state->csc_mask);
-
-	/* First, set voltage - bail out if invalid */
-	if (voltage_set(lsock, state->Vcc, state->Vpp))
-		return -EINVAL;
-
-	/* Take care of reset... */
-	if (state->flags & SS_RESET)
-		out_be32(M8XX_PGCRX(lsock), in_be32(M8XX_PGCRX(lsock)) | M8XX_PGCRX_CXRESET);	/* active high */
-	else
-		out_be32(M8XX_PGCRX(lsock),
-			 in_be32(M8XX_PGCRX(lsock)) & ~M8XX_PGCRX_CXRESET);
-
-	/* ... and output enable. */
-
-	/* The CxOE signal is connected to a 74541 on the ADS.
-	   I guess most other boards used the ADS as a reference.
-	   I tried to control the CxOE signal with SS_OUTPUT_ENA,
-	   but the reset signal seems connected via the 541.
-	   If the CxOE is left high are some signals tristated and
-	   no pullups are present -> the cards act weird.
-	   So right now the buffers are enabled if the power is on. */
-
-	if (state->Vcc || state->Vpp)
-		out_be32(M8XX_PGCRX(lsock), in_be32(M8XX_PGCRX(lsock)) & ~M8XX_PGCRX_CXOE);	/* active low */
-	else
-		out_be32(M8XX_PGCRX(lsock),
-			 in_be32(M8XX_PGCRX(lsock)) | M8XX_PGCRX_CXOE);
-
-	/*
-	 * We'd better turn off interrupts before
-	 * we mess with the events-table..
-	 */
-
-	spin_lock_irqsave(&events_lock, flags);
-
-	/*
-	 * Play around with the interrupt mask to be able to
-	 * give the events the generic pcmcia driver wants us to.
-	 */
-
-	e = &s->events[0];
-	reg = 0;
-
-	if (state->csc_mask & SS_DETECT) {
-		e->eventbit = SS_DETECT;
-		reg |= e->regbit = (M8XX_PCMCIA_CD2(lsock)
-				    | M8XX_PCMCIA_CD1(lsock));
-		e++;
-	}
-	if (state->flags & SS_IOCARD) {
-		/*
-		 * I/O card
-		 */
-		if (state->csc_mask & SS_STSCHG) {
-			e->eventbit = SS_STSCHG;
-			reg |= e->regbit = M8XX_PCMCIA_BVD1(lsock);
-			e++;
-		}
-		/*
-		 * If io_irq is non-zero we should enable irq.
-		 */
-		if (state->io_irq) {
-			out_be32(M8XX_PGCRX(lsock),
-				 in_be32(M8XX_PGCRX(lsock)) |
-				 mk_int_int_mask(s->hwirq) << 24);
-			/*
-			 * Strange thing here:
-			 * The manual does not tell us which interrupt
-			 * the sources generate.
-			 * Anyhow, I found out that RDY_L generates IREQLVL.
-			 *
-			 * We use level triggerd interrupts, and they don't
-			 * have to be cleared in PSCR in the interrupt handler.
-			 */
-			reg |= M8XX_PCMCIA_RDY_L(lsock);
-		} else
-			out_be32(M8XX_PGCRX(lsock),
-				 in_be32(M8XX_PGCRX(lsock)) & 0x00ffffff);
-	} else {
-		/*
-		 * Memory card
-		 */
-		if (state->csc_mask & SS_BATDEAD) {
-			e->eventbit = SS_BATDEAD;
-			reg |= e->regbit = M8XX_PCMCIA_BVD1(lsock);
-			e++;
-		}
-		if (state->csc_mask & SS_BATWARN) {
-			e->eventbit = SS_BATWARN;
-			reg |= e->regbit = M8XX_PCMCIA_BVD2(lsock);
-			e++;
-		}
-		/* What should I trigger on - low/high,raise,fall? */
-		if (state->csc_mask & SS_READY) {
-			e->eventbit = SS_READY;
-			reg |= e->regbit = 0;	//??
-			e++;
-		}
-	}
-
-	e->regbit = 0;		/* terminate list */
-
-	/*
-	 * Clear the status changed .
-	 * Port A and Port B share the same port.
-	 * Writing ones will clear the bits.
-	 */
-
-	out_be32(&pcmcia->pcmc_pscr, reg);
-
-	/*
-	 * Write the mask.
-	 * Port A and Port B share the same port.
-	 * Need for read-modify-write.
-	 * Ones will enable the interrupt.
-	 */
-
-	reg |=
-	    in_be32(&pcmcia->
-		    pcmc_per) & (M8XX_PCMCIA_MASK(0) | M8XX_PCMCIA_MASK(1));
-	out_be32(&pcmcia->pcmc_per, reg);
-
-	spin_unlock_irqrestore(&events_lock, flags);
-
-	/* copy the struct and modify the copy */
-
-	s->state = *state;
-
-	return 0;
-}
-
-static int m8xx_set_io_map(struct pcmcia_socket *sock, struct pccard_io_map *io)
-{
-	int lsock = container_of(sock, struct socket_info, socket)->slot;
-
-	struct socket_info *s = &socket[lsock];
-	struct pcmcia_win *w;
-	unsigned int reg, winnr;
-	pcmconf8xx_t *pcmcia = s->pcmcia;
-
-#define M8XX_SIZE (io->stop - io->start + 1)
-#define M8XX_BASE (PCMCIA_IO_WIN_BASE + io->start)
-
-	pr_debug("m8xx_pcmcia: SetIOMap(%d, %d, %#2.2x, %d ns, "
-		"%#4.4llx-%#4.4llx)\n", lsock, io->map, io->flags,
-		io->speed, (unsigned long long)io->start,
-		(unsigned long long)io->stop);
-
-	if ((io->map >= PCMCIA_IO_WIN_NO) || (io->start > 0xffff)
-	    || (io->stop > 0xffff) || (io->stop < io->start))
-		return -EINVAL;
-
-	if ((reg = m8xx_get_graycode(M8XX_SIZE)) == -1)
-		return -EINVAL;
-
-	if (io->flags & MAP_ACTIVE) {
-
-		pr_debug("m8xx_pcmcia: io->flags & MAP_ACTIVE\n");
-
-		winnr = (PCMCIA_MEM_WIN_NO * PCMCIA_SOCKETS_NO)
-		    + (lsock * PCMCIA_IO_WIN_NO) + io->map;
-
-		/* setup registers */
-
-		w = (void *)&pcmcia->pcmc_pbr0;
-		w += winnr;
-
-		out_be32(&w->or, 0);	/* turn off window first */
-		out_be32(&w->br, M8XX_BASE);
-
-		reg <<= 27;
-		reg |= M8XX_PCMCIA_POR_IO | (lsock << 2);
-
-		reg |= m8xx_get_speed(io->speed, 1, s->bus_freq);
-
-		if (io->flags & MAP_WRPROT)
-			reg |= M8XX_PCMCIA_POR_WRPROT;
-
-		/*if(io->flags & (MAP_16BIT | MAP_AUTOSZ)) */
-		if (io->flags & MAP_16BIT)
-			reg |= M8XX_PCMCIA_POR_16BIT;
-
-		if (io->flags & MAP_ACTIVE)
-			reg |= M8XX_PCMCIA_POR_VALID;
-
-		out_be32(&w->or, reg);
-
-		pr_debug("m8xx_pcmcia: Socket %u: Mapped io window %u at "
-			"%#8.8x, OR = %#8.8x.\n", lsock, io->map, w->br, w->or);
-	} else {
-		/* shutdown IO window */
-		winnr = (PCMCIA_MEM_WIN_NO * PCMCIA_SOCKETS_NO)
-		    + (lsock * PCMCIA_IO_WIN_NO) + io->map;
-
-		/* setup registers */
-
-		w = (void *)&pcmcia->pcmc_pbr0;
-		w += winnr;
-
-		out_be32(&w->or, 0);	/* turn off window */
-		out_be32(&w->br, 0);	/* turn off base address */
-
-		pr_debug("m8xx_pcmcia: Socket %u: Unmapped io window %u at "
-			"%#8.8x, OR = %#8.8x.\n", lsock, io->map, w->br, w->or);
-	}
-
-	/* copy the struct and modify the copy */
-	s->io_win[io->map] = *io;
-	s->io_win[io->map].flags &= (MAP_WRPROT | MAP_16BIT | MAP_ACTIVE);
-	pr_debug("m8xx_pcmcia: SetIOMap exit\n");
-
-	return 0;
-}
-
-static int m8xx_set_mem_map(struct pcmcia_socket *sock,
-			    struct pccard_mem_map *mem)
-{
-	int lsock = container_of(sock, struct socket_info, socket)->slot;
-	struct socket_info *s = &socket[lsock];
-	struct pcmcia_win *w;
-	struct pccard_mem_map *old;
-	unsigned int reg, winnr;
-	pcmconf8xx_t *pcmcia = s->pcmcia;
-
-	pr_debug("m8xx_pcmcia: SetMemMap(%d, %d, %#2.2x, %d ns, "
-		"%#5.5llx, %#5.5x)\n", lsock, mem->map, mem->flags,
-		mem->speed, (unsigned long long)mem->static_start,
-		mem->card_start);
-
-	if ((mem->map >= PCMCIA_MEM_WIN_NO)
-//          || ((mem->s) >= PCMCIA_MEM_WIN_SIZE)
-	    || (mem->card_start >= 0x04000000)
-	    || (mem->static_start & 0xfff)	/* 4KByte resolution */
-	    ||(mem->card_start & 0xfff))
-		return -EINVAL;
-
-	if ((reg = m8xx_get_graycode(PCMCIA_MEM_WIN_SIZE)) == -1) {
-		printk("Cannot set size to 0x%08x.\n", PCMCIA_MEM_WIN_SIZE);
-		return -EINVAL;
-	}
-	reg <<= 27;
-
-	winnr = (lsock * PCMCIA_MEM_WIN_NO) + mem->map;
-
-	/* Setup the window in the pcmcia controller */
-
-	w = (void *)&pcmcia->pcmc_pbr0;
-	w += winnr;
-
-	reg |= lsock << 2;
-
-	reg |= m8xx_get_speed(mem->speed, 0, s->bus_freq);
-
-	if (mem->flags & MAP_ATTRIB)
-		reg |= M8XX_PCMCIA_POR_ATTRMEM;
-
-	if (mem->flags & MAP_WRPROT)
-		reg |= M8XX_PCMCIA_POR_WRPROT;
-
-	if (mem->flags & MAP_16BIT)
-		reg |= M8XX_PCMCIA_POR_16BIT;
-
-	if (mem->flags & MAP_ACTIVE)
-		reg |= M8XX_PCMCIA_POR_VALID;
-
-	out_be32(&w->or, reg);
-
-	pr_debug("m8xx_pcmcia: Socket %u: Mapped memory window %u at %#8.8x, "
-		"OR = %#8.8x.\n", lsock, mem->map, w->br, w->or);
-
-	if (mem->flags & MAP_ACTIVE) {
-		/* get the new base address */
-		mem->static_start = PCMCIA_MEM_WIN_BASE +
-		    (PCMCIA_MEM_WIN_SIZE * winnr)
-		    + mem->card_start;
-	}
-
-	pr_debug("m8xx_pcmcia: SetMemMap(%d, %d, %#2.2x, %d ns, "
-		"%#5.5llx, %#5.5x)\n", lsock, mem->map, mem->flags,
-		mem->speed, (unsigned long long)mem->static_start,
-		mem->card_start);
-
-	/* copy the struct and modify the copy */
-
-	old = &s->mem_win[mem->map];
-
-	*old = *mem;
-	old->flags &= (MAP_ATTRIB | MAP_WRPROT | MAP_16BIT | MAP_ACTIVE);
-
-	return 0;
-}
-
-static int m8xx_sock_init(struct pcmcia_socket *sock)
-{
-	int i;
-	pccard_io_map io = { 0, 0, 0, 0, 1 };
-	pccard_mem_map mem = { 0, 0, 0, 0, 0, 0 };
-
-	pr_debug("m8xx_pcmcia: sock_init(%d)\n", s);
-
-	m8xx_set_socket(sock, &dead_socket);
-	for (i = 0; i < PCMCIA_IO_WIN_NO; i++) {
-		io.map = i;
-		m8xx_set_io_map(sock, &io);
-	}
-	for (i = 0; i < PCMCIA_MEM_WIN_NO; i++) {
-		mem.map = i;
-		m8xx_set_mem_map(sock, &mem);
-	}
-
-	return 0;
-
-}
-
-static int m8xx_sock_suspend(struct pcmcia_socket *sock)
-{
-	return m8xx_set_socket(sock, &dead_socket);
-}
-
-static struct pccard_operations m8xx_services = {
-	.init = m8xx_sock_init,
-	.suspend = m8xx_sock_suspend,
-	.get_status = m8xx_get_status,
-	.set_socket = m8xx_set_socket,
-	.set_io_map = m8xx_set_io_map,
-	.set_mem_map = m8xx_set_mem_map,
-};
-
-static int __init m8xx_probe(struct platform_device *ofdev)
-{
-	struct pcmcia_win *w;
-	unsigned int i, m, hwirq;
-	pcmconf8xx_t *pcmcia;
-	int status;
-	struct device_node *np = ofdev->dev.of_node;
-
-	pcmcia_info("%s\n", version);
-
-	pcmcia = of_iomap(np, 0);
-	if (pcmcia == NULL)
-		return -EINVAL;
-
-	pcmcia_schlvl = irq_of_parse_and_map(np, 0);
-	hwirq = irq_map[pcmcia_schlvl].hwirq;
-	if (pcmcia_schlvl < 0) {
-		iounmap(pcmcia);
-		return -EINVAL;
-	}
-
-	m8xx_pgcrx[0] = &pcmcia->pcmc_pgcra;
-	m8xx_pgcrx[1] = &pcmcia->pcmc_pgcrb;
-
-	pcmcia_info(PCMCIA_BOARD_MSG " using " PCMCIA_SLOT_MSG
-		    " with IRQ %u  (%d). \n", pcmcia_schlvl, hwirq);
-
-	/* Configure Status change interrupt */
-
-	if (request_irq(pcmcia_schlvl, m8xx_interrupt, IRQF_SHARED,
-			driver_name, socket)) {
-		pcmcia_error("Cannot allocate IRQ %u for SCHLVL!\n",
-			     pcmcia_schlvl);
-		iounmap(pcmcia);
-		return -1;
-	}
-
-	w = (void *)&pcmcia->pcmc_pbr0;
-
-	out_be32(&pcmcia->pcmc_pscr, M8XX_PCMCIA_MASK(0) | M8XX_PCMCIA_MASK(1));
-	clrbits32(&pcmcia->pcmc_per, M8XX_PCMCIA_MASK(0) | M8XX_PCMCIA_MASK(1));
-
-	/* connect interrupt and disable CxOE */
-
-	out_be32(M8XX_PGCRX(0),
-		 M8XX_PGCRX_CXOE | (mk_int_int_mask(hwirq) << 16));
-	out_be32(M8XX_PGCRX(1),
-		 M8XX_PGCRX_CXOE | (mk_int_int_mask(hwirq) << 16));
-
-	/* initialize the fixed memory windows */
-
-	for (i = 0; i < PCMCIA_SOCKETS_NO; i++) {
-		for (m = 0; m < PCMCIA_MEM_WIN_NO; m++) {
-			out_be32(&w->br, PCMCIA_MEM_WIN_BASE +
-				 (PCMCIA_MEM_WIN_SIZE
-				  * (m + i * PCMCIA_MEM_WIN_NO)));
-
-			out_be32(&w->or, 0);	/* set to not valid */
-
-			w++;
-		}
-	}
-
-	/* turn off voltage */
-	voltage_set(0, 0, 0);
-	voltage_set(1, 0, 0);
-
-	/* Enable external hardware */
-	hardware_enable(0);
-	hardware_enable(1);
-
-	for (i = 0; i < PCMCIA_SOCKETS_NO; i++) {
-		socket[i].slot = i;
-		socket[i].socket.owner = THIS_MODULE;
-		socket[i].socket.features =
-		    SS_CAP_PCCARD | SS_CAP_MEM_ALIGN | SS_CAP_STATIC_MAP;
-		socket[i].socket.irq_mask = 0x000;
-		socket[i].socket.map_size = 0x1000;
-		socket[i].socket.io_offset = 0;
-		socket[i].socket.pci_irq = pcmcia_schlvl;
-		socket[i].socket.ops = &m8xx_services;
-		socket[i].socket.resource_ops = &pccard_iodyn_ops;
-		socket[i].socket.cb_dev = NULL;
-		socket[i].socket.dev.parent = &ofdev->dev;
-		socket[i].pcmcia = pcmcia;
-		socket[i].bus_freq = ppc_proc_freq;
-		socket[i].hwirq = hwirq;
-
-	}
-
-	for (i = 0; i < PCMCIA_SOCKETS_NO; i++) {
-		status = pcmcia_register_socket(&socket[i].socket);
-		if (status < 0)
-			pcmcia_error("Socket register failed\n");
-	}
-
-	return 0;
-}
-
-static int m8xx_remove(struct platform_device *ofdev)
-{
-	u32 m, i;
-	struct pcmcia_win *w;
-	pcmconf8xx_t *pcmcia = socket[0].pcmcia;
-
-	for (i = 0; i < PCMCIA_SOCKETS_NO; i++) {
-		w = (void *)&pcmcia->pcmc_pbr0;
-
-		out_be32(&pcmcia->pcmc_pscr, M8XX_PCMCIA_MASK(i));
-		out_be32(&pcmcia->pcmc_per,
-			 in_be32(&pcmcia->pcmc_per) & ~M8XX_PCMCIA_MASK(i));
-
-		/* turn off interrupt and disable CxOE */
-		out_be32(M8XX_PGCRX(i), M8XX_PGCRX_CXOE);
-
-		/* turn off memory windows */
-		for (m = 0; m < PCMCIA_MEM_WIN_NO; m++) {
-			out_be32(&w->or, 0);	/* set to not valid */
-			w++;
-		}
-
-		/* turn off voltage */
-		voltage_set(i, 0, 0);
-
-		/* disable external hardware */
-		hardware_disable(i);
-	}
-	for (i = 0; i < PCMCIA_SOCKETS_NO; i++)
-		pcmcia_unregister_socket(&socket[i].socket);
-	iounmap(pcmcia);
-
-	free_irq(pcmcia_schlvl, NULL);
-
-	return 0;
-}
-
-static const struct of_device_id m8xx_pcmcia_match[] = {
-	{
-	 .type = "pcmcia",
-	 .compatible = "fsl,pq-pcmcia",
-	 },
-	{},
-};
-
-MODULE_DEVICE_TABLE(of, m8xx_pcmcia_match);
-
-static struct platform_driver m8xx_pcmcia_driver = {
-	.driver = {
-		.name = driver_name,
-		.owner = THIS_MODULE,
-		.of_match_table = m8xx_pcmcia_match,
-	},
-	.probe = m8xx_probe,
-	.remove = m8xx_remove,
-};
-
-module_platform_driver(m8xx_pcmcia_driver);
diff --git a/drivers/vfio/Makefile b/drivers/vfio/Makefile
index 72bfabc8629e..50e30bc75e85 100644
--- a/drivers/vfio/Makefile
+++ b/drivers/vfio/Makefile
@@ -1,4 +1,5 @@
 obj-$(CONFIG_VFIO) += vfio.o
 obj-$(CONFIG_VFIO_IOMMU_TYPE1) += vfio_iommu_type1.o
 obj-$(CONFIG_VFIO_IOMMU_SPAPR_TCE) += vfio_iommu_spapr_tce.o
+obj-$(CONFIG_EEH) += vfio_spapr_eeh.o
 obj-$(CONFIG_VFIO_PCI) += pci/
diff --git a/drivers/vfio/pci/vfio_pci.c b/drivers/vfio/pci/vfio_pci.c
index 010e0f8b8e4f..e2ee80f36e3e 100644
--- a/drivers/vfio/pci/vfio_pci.c
+++ b/drivers/vfio/pci/vfio_pci.c
@@ -157,8 +157,10 @@ static void vfio_pci_release(void *device_data)
 {
 	struct vfio_pci_device *vdev = device_data;
 
-	if (atomic_dec_and_test(&vdev->refcnt))
+	if (atomic_dec_and_test(&vdev->refcnt)) {
+		vfio_spapr_pci_eeh_release(vdev->pdev);
 		vfio_pci_disable(vdev);
+	}
 
 	module_put(THIS_MODULE);
 }
@@ -166,19 +168,27 @@ static void vfio_pci_release(void *device_data)
 static int vfio_pci_open(void *device_data)
 {
 	struct vfio_pci_device *vdev = device_data;
+	int ret;
 
 	if (!try_module_get(THIS_MODULE))
 		return -ENODEV;
 
 	if (atomic_inc_return(&vdev->refcnt) == 1) {
-		int ret = vfio_pci_enable(vdev);
+		ret = vfio_pci_enable(vdev);
+		if (ret)
+			goto error;
+
+		ret = vfio_spapr_pci_eeh_open(vdev->pdev);
 		if (ret) {
-			module_put(THIS_MODULE);
-			return ret;
+			vfio_pci_disable(vdev);
+			goto error;
 		}
 	}
 
 	return 0;
+error:
+	module_put(THIS_MODULE);
+	return ret;
 }
 
 static int vfio_pci_get_irq_count(struct vfio_pci_device *vdev, int irq_type)
diff --git a/drivers/vfio/vfio_iommu_spapr_tce.c b/drivers/vfio/vfio_iommu_spapr_tce.c
index a84788ba662c..730b4ef3e0cc 100644
--- a/drivers/vfio/vfio_iommu_spapr_tce.c
+++ b/drivers/vfio/vfio_iommu_spapr_tce.c
@@ -156,7 +156,16 @@ static long tce_iommu_ioctl(void *iommu_data,
 
 	switch (cmd) {
 	case VFIO_CHECK_EXTENSION:
-		return (arg == VFIO_SPAPR_TCE_IOMMU) ? 1 : 0;
+		switch (arg) {
+		case VFIO_SPAPR_TCE_IOMMU:
+			ret = 1;
+			break;
+		default:
+			ret = vfio_spapr_iommu_eeh_ioctl(NULL, cmd, arg);
+			break;
+		}
+
+		return (ret < 0) ? 0 : ret;
 
 	case VFIO_IOMMU_SPAPR_TCE_GET_INFO: {
 		struct vfio_iommu_spapr_tce_info info;
@@ -283,6 +292,12 @@ static long tce_iommu_ioctl(void *iommu_data,
 		tce_iommu_disable(container);
 		mutex_unlock(&container->lock);
 		return 0;
+	case VFIO_EEH_PE_OP:
+		if (!container->tbl || !container->tbl->it_group)
+			return -ENODEV;
+
+		return vfio_spapr_iommu_eeh_ioctl(container->tbl->it_group,
+						  cmd, arg);
 	}
 
 	return -ENOTTY;
diff --git a/drivers/vfio/vfio_spapr_eeh.c b/drivers/vfio/vfio_spapr_eeh.c
new file mode 100644
index 000000000000..f834b4ce1431
--- /dev/null
+++ b/drivers/vfio/vfio_spapr_eeh.c
@@ -0,0 +1,87 @@
+/*
+ * EEH functionality support for VFIO devices. The feature is only
+ * available on sPAPR compatible platforms.
+ *
+ * Copyright Gavin Shan, IBM Corporation 2014.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+#include <linux/uaccess.h>
+#include <linux/vfio.h>
+#include <asm/eeh.h>
+
+/* We might build address mapping here for "fast" path later */
+int vfio_spapr_pci_eeh_open(struct pci_dev *pdev)
+{
+	return eeh_dev_open(pdev);
+}
+
+void vfio_spapr_pci_eeh_release(struct pci_dev *pdev)
+{
+	eeh_dev_release(pdev);
+}
+
+long vfio_spapr_iommu_eeh_ioctl(struct iommu_group *group,
+				unsigned int cmd, unsigned long arg)
+{
+	struct eeh_pe *pe;
+	struct vfio_eeh_pe_op op;
+	unsigned long minsz;
+	long ret = -EINVAL;
+
+	switch (cmd) {
+	case VFIO_CHECK_EXTENSION:
+		if (arg == VFIO_EEH)
+			ret = eeh_enabled() ? 1 : 0;
+		else
+			ret = 0;
+		break;
+	case VFIO_EEH_PE_OP:
+		pe = eeh_iommu_group_to_pe(group);
+		if (!pe)
+			return -ENODEV;
+
+		minsz = offsetofend(struct vfio_eeh_pe_op, op);
+		if (copy_from_user(&op, (void __user *)arg, minsz))
+			return -EFAULT;
+		if (op.argsz < minsz || op.flags)
+			return -EINVAL;
+
+		switch (op.op) {
+		case VFIO_EEH_PE_DISABLE:
+			ret = eeh_pe_set_option(pe, EEH_OPT_DISABLE);
+			break;
+		case VFIO_EEH_PE_ENABLE:
+			ret = eeh_pe_set_option(pe, EEH_OPT_ENABLE);
+			break;
+		case VFIO_EEH_PE_UNFREEZE_IO:
+			ret = eeh_pe_set_option(pe, EEH_OPT_THAW_MMIO);
+			break;
+		case VFIO_EEH_PE_UNFREEZE_DMA:
+			ret = eeh_pe_set_option(pe, EEH_OPT_THAW_DMA);
+			break;
+		case VFIO_EEH_PE_GET_STATE:
+			ret = eeh_pe_get_state(pe);
+			break;
+		case VFIO_EEH_PE_RESET_DEACTIVATE:
+			ret = eeh_pe_reset(pe, EEH_RESET_DEACTIVATE);
+			break;
+		case VFIO_EEH_PE_RESET_HOT:
+			ret = eeh_pe_reset(pe, EEH_RESET_HOT);
+			break;
+		case VFIO_EEH_PE_RESET_FUNDAMENTAL:
+			ret = eeh_pe_reset(pe, EEH_RESET_FUNDAMENTAL);
+			break;
+		case VFIO_EEH_PE_CONFIGURE:
+			ret = eeh_pe_configure(pe);
+			break;
+		default:
+			ret = -EINVAL;
+		}
+	}
+
+	return ret;
+}
diff --git a/include/linux/vfio.h b/include/linux/vfio.h
index 8ec980b5e3af..25a0fbd4b998 100644
--- a/include/linux/vfio.h
+++ b/include/linux/vfio.h
@@ -98,4 +98,27 @@ extern int vfio_external_user_iommu_id(struct vfio_group *group);
 extern long vfio_external_check_extension(struct vfio_group *group,
 					  unsigned long arg);
 
+#ifdef CONFIG_EEH
+extern int vfio_spapr_pci_eeh_open(struct pci_dev *pdev);
+extern void vfio_spapr_pci_eeh_release(struct pci_dev *pdev);
+extern long vfio_spapr_iommu_eeh_ioctl(struct iommu_group *group,
+				       unsigned int cmd,
+				       unsigned long arg);
+#else
+static inline int vfio_spapr_pci_eeh_open(struct pci_dev *pdev)
+{
+	return 0;
+}
+
+static inline void vfio_spapr_pci_eeh_release(struct pci_dev *pdev)
+{
+}
+
+static inline long vfio_spapr_iommu_eeh_ioctl(struct iommu_group *group,
+					      unsigned int cmd,
+					      unsigned long arg)
+{
+	return -ENOTTY;
+}
+#endif /* CONFIG_EEH */
 #endif /* VFIO_H */
diff --git a/include/uapi/linux/vfio.h b/include/uapi/linux/vfio.h
index cb9023d4f063..6612974c64bf 100644
--- a/include/uapi/linux/vfio.h
+++ b/include/uapi/linux/vfio.h
@@ -30,6 +30,9 @@
  */
 #define VFIO_DMA_CC_IOMMU		4
 
+/* Check if EEH is supported */
+#define VFIO_EEH			5
+
 /*
  * The IOCTL interface is designed for extensibility by embedding the
  * structure length (argsz) and flags into structures passed between
@@ -455,6 +458,37 @@ struct vfio_iommu_spapr_tce_info {
 
 #define VFIO_IOMMU_SPAPR_TCE_GET_INFO	_IO(VFIO_TYPE, VFIO_BASE + 12)
 
+/*
+ * EEH PE operation struct provides ways to:
+ * - enable/disable EEH functionality;
+ * - unfreeze IO/DMA for frozen PE;
+ * - read PE state;
+ * - reset PE;
+ * - configure PE.
+ */
+struct vfio_eeh_pe_op {
+	__u32 argsz;
+	__u32 flags;
+	__u32 op;
+};
+
+#define VFIO_EEH_PE_DISABLE		0	/* Disable EEH functionality */
+#define VFIO_EEH_PE_ENABLE		1	/* Enable EEH functionality  */
+#define VFIO_EEH_PE_UNFREEZE_IO		2	/* Enable IO for frozen PE   */
+#define VFIO_EEH_PE_UNFREEZE_DMA	3	/* Enable DMA for frozen PE  */
+#define VFIO_EEH_PE_GET_STATE		4	/* PE state retrieval        */
+#define  VFIO_EEH_PE_STATE_NORMAL	0	/* PE in functional state    */
+#define  VFIO_EEH_PE_STATE_RESET	1	/* PE reset in progress      */
+#define  VFIO_EEH_PE_STATE_STOPPED	2	/* Stopped DMA and IO        */
+#define  VFIO_EEH_PE_STATE_STOPPED_DMA	4	/* Stopped DMA only          */
+#define  VFIO_EEH_PE_STATE_UNAVAIL	5	/* State unavailable         */
+#define VFIO_EEH_PE_RESET_DEACTIVATE	5	/* Deassert PE reset         */
+#define VFIO_EEH_PE_RESET_HOT		6	/* Assert hot reset          */
+#define VFIO_EEH_PE_RESET_FUNDAMENTAL	7	/* Assert fundamental reset  */
+#define VFIO_EEH_PE_CONFIGURE		8	/* PE configuration          */
+
+#define VFIO_EEH_PE_OP			_IO(VFIO_TYPE, VFIO_BASE + 21)
+
 /* ***************************************************************** */
 
 #endif /* _UAPIVFIO_H */
diff --git a/tools/testing/selftests/powerpc/Makefile b/tools/testing/selftests/powerpc/Makefile
index 54833a791a44..74a78cedce37 100644
--- a/tools/testing/selftests/powerpc/Makefile
+++ b/tools/testing/selftests/powerpc/Makefile
@@ -17,10 +17,10 @@ TARGETS = pmu copyloops mm tm
 
 endif
 
-all:
-	@for TARGET in $(TARGETS); do \
-		$(MAKE) -C $$TARGET all; \
-	done;
+all: $(TARGETS)
+
+$(TARGETS):
+	$(MAKE) -k -C $@ all
 
 run_tests: all
 	@for TARGET in $(TARGETS); do \
@@ -36,4 +36,4 @@ clean:
 tags:
 	find . -name '*.c' -o -name '*.h' | xargs ctags
 
-.PHONY: all run_tests clean tags
+.PHONY: all run_tests clean tags $(TARGETS)
diff --git a/tools/testing/selftests/powerpc/pmu/Makefile b/tools/testing/selftests/powerpc/pmu/Makefile
index b9ff0db42c79..c9f4263906a5 100644
--- a/tools/testing/selftests/powerpc/pmu/Makefile
+++ b/tools/testing/selftests/powerpc/pmu/Makefile
@@ -1,10 +1,12 @@
 noarg:
 	$(MAKE) -C ../
 
-PROGS := count_instructions
-EXTRA_SOURCES := ../harness.c event.c
+PROGS := count_instructions l3_bank_test per_event_excludes
+EXTRA_SOURCES := ../harness.c event.c lib.c
 
-all: $(PROGS) sub_all
+SUB_TARGETS = ebb
+
+all: $(PROGS) $(SUB_TARGETS)
 
 $(PROGS): $(EXTRA_SOURCES)
 
@@ -20,13 +22,8 @@ run_tests: all sub_run_tests
 clean: sub_clean
 	rm -f $(PROGS) loop.o
 
-
-SUB_TARGETS = ebb
-
-sub_all:
-	@for TARGET in $(SUB_TARGETS); do \
-		$(MAKE) -C $$TARGET all; \
-	done;
+$(SUB_TARGETS):
+	$(MAKE) -k -C $@ all
 
 sub_run_tests: all
 	@for TARGET in $(SUB_TARGETS); do \
@@ -38,4 +35,4 @@ sub_clean:
 		$(MAKE) -C $$TARGET clean; \
 	done;
 
-.PHONY: all run_tests clean sub_all sub_run_tests sub_clean
+.PHONY: all run_tests clean sub_run_tests sub_clean $(SUB_TARGETS)
diff --git a/tools/testing/selftests/powerpc/pmu/count_instructions.c b/tools/testing/selftests/powerpc/pmu/count_instructions.c
index 312b4f0fd27c..4622117b24c0 100644
--- a/tools/testing/selftests/powerpc/pmu/count_instructions.c
+++ b/tools/testing/selftests/powerpc/pmu/count_instructions.c
@@ -12,6 +12,7 @@
 
 #include "event.h"
 #include "utils.h"
+#include "lib.h"
 
 extern void thirty_two_instruction_loop(u64 loops);
 
@@ -90,7 +91,7 @@ static u64 determine_overhead(struct event *events)
 	return overhead;
 }
 
-static int count_instructions(void)
+static int test_body(void)
 {
 	struct event events[2];
 	u64 overhead;
@@ -111,17 +112,23 @@ static int count_instructions(void)
 	overhead = determine_overhead(events);
 	printf("Overhead of null loop: %llu instructions\n", overhead);
 
-	/* Run for 1M instructions */
-	FAIL_IF(do_count_loop(events, 0x100000, overhead, true));
+	/* Run for 1Mi instructions */
+	FAIL_IF(do_count_loop(events, 1000000, overhead, true));
 
-	/* Run for 10M instructions */
-	FAIL_IF(do_count_loop(events, 0xa00000, overhead, true));
+	/* Run for 10Mi instructions */
+	FAIL_IF(do_count_loop(events, 10000000, overhead, true));
 
-	/* Run for 100M instructions */
-	FAIL_IF(do_count_loop(events, 0x6400000, overhead, true));
+	/* Run for 100Mi instructions */
+	FAIL_IF(do_count_loop(events, 100000000, overhead, true));
 
-	/* Run for 1G instructions */
-	FAIL_IF(do_count_loop(events, 0x40000000, overhead, true));
+	/* Run for 1Bi instructions */
+	FAIL_IF(do_count_loop(events, 1000000000, overhead, true));
+
+	/* Run for 16Bi instructions */
+	FAIL_IF(do_count_loop(events, 16000000000, overhead, true));
+
+	/* Run for 64Bi instructions */
+	FAIL_IF(do_count_loop(events, 64000000000, overhead, true));
 
 	event_close(&events[0]);
 	event_close(&events[1]);
@@ -129,6 +136,11 @@ static int count_instructions(void)
 	return 0;
 }
 
+static int count_instructions(void)
+{
+	return eat_cpu(test_body);
+}
+
 int main(void)
 {
 	return test_harness(count_instructions, "count_instructions");
diff --git a/tools/testing/selftests/powerpc/pmu/ebb/Makefile b/tools/testing/selftests/powerpc/pmu/ebb/Makefile
index edbba2affc2c..3dc4332698cb 100644
--- a/tools/testing/selftests/powerpc/pmu/ebb/Makefile
+++ b/tools/testing/selftests/powerpc/pmu/ebb/Makefile
@@ -13,11 +13,12 @@ PROGS := reg_access_test event_attributes_test cycles_test	\
 	 close_clears_pmcc_test instruction_count_test		\
 	 fork_cleanup_test ebb_on_child_test			\
 	 ebb_on_willing_child_test back_to_back_ebbs_test	\
-	 lost_exception_test no_handler_test
+	 lost_exception_test no_handler_test			\
+	 cycles_with_mmcr2_test
 
 all: $(PROGS)
 
-$(PROGS): ../../harness.c ../event.c ../lib.c ebb.c ebb_handler.S trace.c
+$(PROGS): ../../harness.c ../event.c ../lib.c ebb.c ebb_handler.S trace.c busy_loop.S
 
 instruction_count_test: ../loop.S
 
diff --git a/tools/testing/selftests/powerpc/pmu/ebb/busy_loop.S b/tools/testing/selftests/powerpc/pmu/ebb/busy_loop.S
new file mode 100644
index 000000000000..c7e4093f1cd3
--- /dev/null
+++ b/tools/testing/selftests/powerpc/pmu/ebb/busy_loop.S
@@ -0,0 +1,271 @@
+/*
+ * Copyright 2014, Michael Ellerman, IBM Corp.
+ * Licensed under GPLv2.
+ */
+
+#include <ppc-asm.h>
+
+	.text
+
+FUNC_START(core_busy_loop)
+	stdu	%r1, -168(%r1)
+	std	r14, 160(%r1)
+	std	r15, 152(%r1)
+	std	r16, 144(%r1)
+	std	r17, 136(%r1)
+	std	r18, 128(%r1)
+	std	r19, 120(%r1)
+	std	r20, 112(%r1)
+	std	r21, 104(%r1)
+	std	r22, 96(%r1)
+	std	r23, 88(%r1)
+	std	r24, 80(%r1)
+	std	r25, 72(%r1)
+	std	r26, 64(%r1)
+	std	r27, 56(%r1)
+	std	r28, 48(%r1)
+	std	r29, 40(%r1)
+	std	r30, 32(%r1)
+	std	r31, 24(%r1)
+
+	li	 r3, 0x3030
+	std	 r3, -96(%r1)
+	li	 r4, 0x4040
+	std	 r4, -104(%r1)
+	li	 r5, 0x5050
+	std	 r5, -112(%r1)
+	li	 r6, 0x6060
+	std	 r6, -120(%r1)
+	li	 r7, 0x7070
+	std	 r7, -128(%r1)
+	li	 r8, 0x0808
+	std	 r8, -136(%r1)
+	li	 r9, 0x0909
+	std	 r9, -144(%r1)
+	li	r10, 0x1010
+	std	r10, -152(%r1)
+	li	r11, 0x1111
+	std	r11, -160(%r1)
+	li	r14, 0x1414
+	std	r14, -168(%r1)
+	li	r15, 0x1515
+	std	r15, -176(%r1)
+	li	r16, 0x1616
+	std	r16, -184(%r1)
+	li	r17, 0x1717
+	std	r17, -192(%r1)
+	li	r18, 0x1818
+	std	r18, -200(%r1)
+	li	r19, 0x1919
+	std	r19, -208(%r1)
+	li	r20, 0x2020
+	std	r20, -216(%r1)
+	li	r21, 0x2121
+	std	r21, -224(%r1)
+	li	r22, 0x2222
+	std	r22, -232(%r1)
+	li	r23, 0x2323
+	std	r23, -240(%r1)
+	li	r24, 0x2424
+	std	r24, -248(%r1)
+	li	r25, 0x2525
+	std	r25, -256(%r1)
+	li	r26, 0x2626
+	std	r26, -264(%r1)
+	li	r27, 0x2727
+	std	r27, -272(%r1)
+	li	r28, 0x2828
+	std	r28, -280(%r1)
+	li	r29, 0x2929
+	std	r29, -288(%r1)
+	li	r30, 0x3030
+	li	r31, 0x3131
+
+	li	r3, 0
+0:	addi	r3, r3, 1
+	cmpwi	r3, 100
+	blt	0b
+
+	/* Return 1 (fail) unless we get through all the checks */
+	li	r3, 1
+
+	/* Check none of our registers have been corrupted */
+	cmpwi	r4,  0x4040
+	bne	1f
+	cmpwi	r5,  0x5050
+	bne	1f
+	cmpwi	r6,  0x6060
+	bne	1f
+	cmpwi	r7,  0x7070
+	bne	1f
+	cmpwi	r8,  0x0808
+	bne	1f
+	cmpwi	r9,  0x0909
+	bne	1f
+	cmpwi	r10, 0x1010
+	bne	1f
+	cmpwi	r11, 0x1111
+	bne	1f
+	cmpwi	r14, 0x1414
+	bne	1f
+	cmpwi	r15, 0x1515
+	bne	1f
+	cmpwi	r16, 0x1616
+	bne	1f
+	cmpwi	r17, 0x1717
+	bne	1f
+	cmpwi	r18, 0x1818
+	bne	1f
+	cmpwi	r19, 0x1919
+	bne	1f
+	cmpwi	r20, 0x2020
+	bne	1f
+	cmpwi	r21, 0x2121
+	bne	1f
+	cmpwi	r22, 0x2222
+	bne	1f
+	cmpwi	r23, 0x2323
+	bne	1f
+	cmpwi	r24, 0x2424
+	bne	1f
+	cmpwi	r25, 0x2525
+	bne	1f
+	cmpwi	r26, 0x2626
+	bne	1f
+	cmpwi	r27, 0x2727
+	bne	1f
+	cmpwi	r28, 0x2828
+	bne	1f
+	cmpwi	r29, 0x2929
+	bne	1f
+	cmpwi	r30, 0x3030
+	bne	1f
+	cmpwi	r31, 0x3131
+	bne	1f
+
+	/* Load junk into all our registers before we reload them from the stack. */
+	li	r3,  0xde
+	li	r4,  0xad
+	li	r5,  0xbe
+	li	r6,  0xef
+	li	r7,  0xde
+	li	r8,  0xad
+	li	r9,  0xbe
+	li	r10, 0xef
+	li	r11, 0xde
+	li	r14, 0xad
+	li	r15, 0xbe
+	li	r16, 0xef
+	li	r17, 0xde
+	li	r18, 0xad
+	li	r19, 0xbe
+	li	r20, 0xef
+	li	r21, 0xde
+	li	r22, 0xad
+	li	r23, 0xbe
+	li	r24, 0xef
+	li	r25, 0xde
+	li	r26, 0xad
+	li	r27, 0xbe
+	li	r28, 0xef
+	li	r29, 0xdd
+
+	ld	r3,	-96(%r1)
+	cmpwi	r3,  0x3030
+	bne	1f
+	ld	r4,	-104(%r1)
+	cmpwi	r4,  0x4040
+	bne	1f
+	ld	r5,	-112(%r1)
+	cmpwi	r5,  0x5050
+	bne	1f
+	ld	r6,	-120(%r1)
+	cmpwi	r6,  0x6060
+	bne	1f
+	ld	r7,	-128(%r1)
+	cmpwi	r7,  0x7070
+	bne	1f
+	ld	r8,	-136(%r1)
+	cmpwi	r8,  0x0808
+	bne	1f
+	ld	r9,	-144(%r1)
+	cmpwi	r9,  0x0909
+	bne	1f
+	ld	r10, -152(%r1)
+	cmpwi	r10, 0x1010
+	bne	1f
+	ld	r11, -160(%r1)
+	cmpwi	r11, 0x1111
+	bne	1f
+	ld	r14, -168(%r1)
+	cmpwi	r14, 0x1414
+	bne	1f
+	ld	r15, -176(%r1)
+	cmpwi	r15, 0x1515
+	bne	1f
+	ld	r16, -184(%r1)
+	cmpwi	r16, 0x1616
+	bne	1f
+	ld	r17, -192(%r1)
+	cmpwi	r17, 0x1717
+	bne	1f
+	ld	r18, -200(%r1)
+	cmpwi	r18, 0x1818
+	bne	1f
+	ld	r19, -208(%r1)
+	cmpwi	r19, 0x1919
+	bne	1f
+	ld	r20, -216(%r1)
+	cmpwi	r20, 0x2020
+	bne	1f
+	ld	r21, -224(%r1)
+	cmpwi	r21, 0x2121
+	bne	1f
+	ld	r22, -232(%r1)
+	cmpwi	r22, 0x2222
+	bne	1f
+	ld	r23, -240(%r1)
+	cmpwi	r23, 0x2323
+	bne	1f
+	ld	r24, -248(%r1)
+	cmpwi	r24, 0x2424
+	bne	1f
+	ld	r25, -256(%r1)
+	cmpwi	r25, 0x2525
+	bne	1f
+	ld	r26, -264(%r1)
+	cmpwi	r26, 0x2626
+	bne	1f
+	ld	r27, -272(%r1)
+	cmpwi	r27, 0x2727
+	bne	1f
+	ld	r28, -280(%r1)
+	cmpwi	r28, 0x2828
+	bne	1f
+	ld	r29, -288(%r1)
+	cmpwi	r29, 0x2929
+	bne	1f
+
+	/* Load 0 (success) to return */
+	li	r3, 0
+
+1:	ld	r14, 160(%r1)
+	ld	r15, 152(%r1)
+	ld	r16, 144(%r1)
+	ld	r17, 136(%r1)
+	ld	r18, 128(%r1)
+	ld	r19, 120(%r1)
+	ld	r20, 112(%r1)
+	ld	r21, 104(%r1)
+	ld	r22, 96(%r1)
+	ld	r23, 88(%r1)
+	ld	r24, 80(%r1)
+	ld	r25, 72(%r1)
+	ld	r26, 64(%r1)
+	ld	r27, 56(%r1)
+	ld	r28, 48(%r1)
+	ld	r29, 40(%r1)
+	ld	r30, 32(%r1)
+	ld	r31, 24(%r1)
+	addi	%r1, %r1, 168
+	blr
diff --git a/tools/testing/selftests/powerpc/pmu/ebb/cycles_with_mmcr2_test.c b/tools/testing/selftests/powerpc/pmu/ebb/cycles_with_mmcr2_test.c
new file mode 100644
index 000000000000..d43029b0800c
--- /dev/null
+++ b/tools/testing/selftests/powerpc/pmu/ebb/cycles_with_mmcr2_test.c
@@ -0,0 +1,91 @@
+/*
+ * Copyright 2014, Michael Ellerman, IBM Corp.
+ * Licensed under GPLv2.
+ */
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <stdbool.h>
+
+#include "ebb.h"
+
+
+/*
+ * Test of counting cycles while manipulating the user accessible bits in MMCR2.
+ */
+
+/* We use two values because the first freezes PMC1 and so we would get no EBBs */
+#define MMCR2_EXPECTED_1 0x4020100804020000UL /* (FC1P|FC2P|FC3P|FC4P|FC5P|FC6P) */
+#define MMCR2_EXPECTED_2 0x0020100804020000UL /* (     FC2P|FC3P|FC4P|FC5P|FC6P) */
+
+
+int cycles_with_mmcr2(void)
+{
+	struct event event;
+	uint64_t val, expected[2], actual;
+	int i;
+	bool bad_mmcr2;
+
+	event_init_named(&event, 0x1001e, "cycles");
+	event_leader_ebb_init(&event);
+
+	event.attr.exclude_kernel = 1;
+	event.attr.exclude_hv = 1;
+	event.attr.exclude_idle = 1;
+
+	FAIL_IF(event_open(&event));
+
+	ebb_enable_pmc_counting(1);
+	setup_ebb_handler(standard_ebb_callee);
+	ebb_global_enable();
+
+	FAIL_IF(ebb_event_enable(&event));
+
+	mtspr(SPRN_PMC1, pmc_sample_period(sample_period));
+
+	/* XXX Set of MMCR2 must be after enable */
+	expected[0] = MMCR2_EXPECTED_1;
+	expected[1] = MMCR2_EXPECTED_2;
+	i = 0;
+	bad_mmcr2 = false;
+
+	/* Make sure we loop until we take at least one EBB */
+	while ((ebb_state.stats.ebb_count < 20 && !bad_mmcr2) ||
+		ebb_state.stats.ebb_count < 1)
+	{
+		mtspr(SPRN_MMCR2, expected[i % 2]);
+
+		FAIL_IF(core_busy_loop());
+
+		val = mfspr(SPRN_MMCR2);
+		if (val != expected[i % 2]) {
+			bad_mmcr2 = true;
+			actual = val;
+		}
+
+		i++;
+	}
+
+	ebb_global_disable();
+	ebb_freeze_pmcs();
+
+	count_pmc(1, sample_period);
+
+	dump_ebb_state();
+
+	event_close(&event);
+
+	FAIL_IF(ebb_state.stats.ebb_count == 0);
+
+	if (bad_mmcr2)
+		printf("Bad MMCR2 value seen is 0x%lx\n", actual);
+
+	FAIL_IF(bad_mmcr2);
+
+	return 0;
+}
+
+int main(void)
+{
+	return test_harness(cycles_with_mmcr2, "cycles_with_mmcr2");
+}
diff --git a/tools/testing/selftests/powerpc/pmu/ebb/ebb.c b/tools/testing/selftests/powerpc/pmu/ebb/ebb.c
index 1b46be94b64c..d7a72ce696b5 100644
--- a/tools/testing/selftests/powerpc/pmu/ebb/ebb.c
+++ b/tools/testing/selftests/powerpc/pmu/ebb/ebb.c
@@ -224,6 +224,7 @@ void dump_ebb_hw_state(void)
 
 	printf("HW state:\n"		\
 	       "MMCR0 0x%016x %s\n"	\
+	       "MMCR2 0x%016lx\n"	\
 	       "EBBHR 0x%016lx\n"	\
 	       "BESCR 0x%016llx %s\n"	\
 	       "PMC1  0x%016lx\n"	\
@@ -233,10 +234,11 @@ void dump_ebb_hw_state(void)
 	       "PMC5  0x%016lx\n"	\
 	       "PMC6  0x%016lx\n"	\
 	       "SIAR  0x%016lx\n",
-	       mmcr0, decode_mmcr0(mmcr0), mfspr(SPRN_EBBHR), bescr,
-	       decode_bescr(bescr), mfspr(SPRN_PMC1), mfspr(SPRN_PMC2),
-	       mfspr(SPRN_PMC3), mfspr(SPRN_PMC4), mfspr(SPRN_PMC5),
-	       mfspr(SPRN_PMC6), mfspr(SPRN_SIAR));
+	       mmcr0, decode_mmcr0(mmcr0), mfspr(SPRN_MMCR2),
+	       mfspr(SPRN_EBBHR), bescr, decode_bescr(bescr),
+	       mfspr(SPRN_PMC1), mfspr(SPRN_PMC2), mfspr(SPRN_PMC3),
+	       mfspr(SPRN_PMC4), mfspr(SPRN_PMC5), mfspr(SPRN_PMC6),
+	       mfspr(SPRN_SIAR));
 }
 
 void dump_ebb_state(void)
@@ -335,257 +337,6 @@ void event_leader_ebb_init(struct event *e)
 	e->attr.pinned = 1;
 }
 
-int core_busy_loop(void)
-{
-	int rc;
-
-	asm volatile (
-		"li  3,  0x3030\n"
-		"std 3,  -96(1)\n"
-		"li  4,  0x4040\n"
-		"std 4,  -104(1)\n"
-		"li  5,  0x5050\n"
-		"std 5,  -112(1)\n"
-		"li  6,  0x6060\n"
-		"std 6,  -120(1)\n"
-		"li  7,  0x7070\n"
-		"std 7,  -128(1)\n"
-		"li  8,  0x0808\n"
-		"std 8,  -136(1)\n"
-		"li  9,  0x0909\n"
-		"std 9,  -144(1)\n"
-		"li  10, 0x1010\n"
-		"std 10, -152(1)\n"
-		"li  11, 0x1111\n"
-		"std 11, -160(1)\n"
-		"li  14, 0x1414\n"
-		"std 14, -168(1)\n"
-		"li  15, 0x1515\n"
-		"std 15, -176(1)\n"
-		"li  16, 0x1616\n"
-		"std 16, -184(1)\n"
-		"li  17, 0x1717\n"
-		"std 17, -192(1)\n"
-		"li  18, 0x1818\n"
-		"std 18, -200(1)\n"
-		"li  19, 0x1919\n"
-		"std 19, -208(1)\n"
-		"li  20, 0x2020\n"
-		"std 20, -216(1)\n"
-		"li  21, 0x2121\n"
-		"std 21, -224(1)\n"
-		"li  22, 0x2222\n"
-		"std 22, -232(1)\n"
-		"li  23, 0x2323\n"
-		"std 23, -240(1)\n"
-		"li  24, 0x2424\n"
-		"std 24, -248(1)\n"
-		"li  25, 0x2525\n"
-		"std 25, -256(1)\n"
-		"li  26, 0x2626\n"
-		"std 26, -264(1)\n"
-		"li  27, 0x2727\n"
-		"std 27, -272(1)\n"
-		"li  28, 0x2828\n"
-		"std 28, -280(1)\n"
-		"li  29, 0x2929\n"
-		"std 29, -288(1)\n"
-		"li  30, 0x3030\n"
-		"li  31, 0x3131\n"
-
-		"li    3,  0\n"
-		"0: "
-		"addi  3, 3, 1\n"
-		"cmpwi 3, 100\n"
-		"blt   0b\n"
-
-		/* Return 1 (fail) unless we get through all the checks */
-		"li	0, 1\n"
-
-		/* Check none of our registers have been corrupted */
-		"cmpwi  4,  0x4040\n"
-		"bne	1f\n"
-		"cmpwi  5,  0x5050\n"
-		"bne	1f\n"
-		"cmpwi  6,  0x6060\n"
-		"bne	1f\n"
-		"cmpwi  7,  0x7070\n"
-		"bne	1f\n"
-		"cmpwi  8,  0x0808\n"
-		"bne	1f\n"
-		"cmpwi  9,  0x0909\n"
-		"bne	1f\n"
-		"cmpwi  10, 0x1010\n"
-		"bne	1f\n"
-		"cmpwi  11, 0x1111\n"
-		"bne	1f\n"
-		"cmpwi  14, 0x1414\n"
-		"bne	1f\n"
-		"cmpwi  15, 0x1515\n"
-		"bne	1f\n"
-		"cmpwi  16, 0x1616\n"
-		"bne	1f\n"
-		"cmpwi  17, 0x1717\n"
-		"bne	1f\n"
-		"cmpwi  18, 0x1818\n"
-		"bne	1f\n"
-		"cmpwi  19, 0x1919\n"
-		"bne	1f\n"
-		"cmpwi  20, 0x2020\n"
-		"bne	1f\n"
-		"cmpwi  21, 0x2121\n"
-		"bne	1f\n"
-		"cmpwi  22, 0x2222\n"
-		"bne	1f\n"
-		"cmpwi  23, 0x2323\n"
-		"bne	1f\n"
-		"cmpwi  24, 0x2424\n"
-		"bne	1f\n"
-		"cmpwi  25, 0x2525\n"
-		"bne	1f\n"
-		"cmpwi  26, 0x2626\n"
-		"bne	1f\n"
-		"cmpwi  27, 0x2727\n"
-		"bne	1f\n"
-		"cmpwi  28, 0x2828\n"
-		"bne	1f\n"
-		"cmpwi  29, 0x2929\n"
-		"bne	1f\n"
-		"cmpwi  30, 0x3030\n"
-		"bne	1f\n"
-		"cmpwi  31, 0x3131\n"
-		"bne	1f\n"
-
-		/* Load junk into all our registers before we reload them from the stack. */
-		"li  3,  0xde\n"
-		"li  4,  0xad\n"
-		"li  5,  0xbe\n"
-		"li  6,  0xef\n"
-		"li  7,  0xde\n"
-		"li  8,  0xad\n"
-		"li  9,  0xbe\n"
-		"li  10, 0xef\n"
-		"li  11, 0xde\n"
-		"li  14, 0xad\n"
-		"li  15, 0xbe\n"
-		"li  16, 0xef\n"
-		"li  17, 0xde\n"
-		"li  18, 0xad\n"
-		"li  19, 0xbe\n"
-		"li  20, 0xef\n"
-		"li  21, 0xde\n"
-		"li  22, 0xad\n"
-		"li  23, 0xbe\n"
-		"li  24, 0xef\n"
-		"li  25, 0xde\n"
-		"li  26, 0xad\n"
-		"li  27, 0xbe\n"
-		"li  28, 0xef\n"
-		"li  29, 0xdd\n"
-
-		"ld     3,  -96(1)\n"
-		"cmpwi  3,  0x3030\n"
-		"bne	1f\n"
-		"ld     4,  -104(1)\n"
-		"cmpwi  4,  0x4040\n"
-		"bne	1f\n"
-		"ld     5,  -112(1)\n"
-		"cmpwi  5,  0x5050\n"
-		"bne	1f\n"
-		"ld     6,  -120(1)\n"
-		"cmpwi  6,  0x6060\n"
-		"bne	1f\n"
-		"ld     7,  -128(1)\n"
-		"cmpwi  7,  0x7070\n"
-		"bne	1f\n"
-		"ld     8,  -136(1)\n"
-		"cmpwi  8,  0x0808\n"
-		"bne	1f\n"
-		"ld     9,  -144(1)\n"
-		"cmpwi  9,  0x0909\n"
-		"bne	1f\n"
-		"ld     10, -152(1)\n"
-		"cmpwi  10, 0x1010\n"
-		"bne	1f\n"
-		"ld     11, -160(1)\n"
-		"cmpwi  11, 0x1111\n"
-		"bne	1f\n"
-		"ld     14, -168(1)\n"
-		"cmpwi  14, 0x1414\n"
-		"bne	1f\n"
-		"ld     15, -176(1)\n"
-		"cmpwi  15, 0x1515\n"
-		"bne	1f\n"
-		"ld     16, -184(1)\n"
-		"cmpwi  16, 0x1616\n"
-		"bne	1f\n"
-		"ld     17, -192(1)\n"
-		"cmpwi  17, 0x1717\n"
-		"bne	1f\n"
-		"ld     18, -200(1)\n"
-		"cmpwi  18, 0x1818\n"
-		"bne	1f\n"
-		"ld     19, -208(1)\n"
-		"cmpwi  19, 0x1919\n"
-		"bne	1f\n"
-		"ld     20, -216(1)\n"
-		"cmpwi  20, 0x2020\n"
-		"bne	1f\n"
-		"ld     21, -224(1)\n"
-		"cmpwi  21, 0x2121\n"
-		"bne	1f\n"
-		"ld     22, -232(1)\n"
-		"cmpwi  22, 0x2222\n"
-		"bne	1f\n"
-		"ld     23, -240(1)\n"
-		"cmpwi  23, 0x2323\n"
-		"bne	1f\n"
-		"ld     24, -248(1)\n"
-		"cmpwi  24, 0x2424\n"
-		"bne	1f\n"
-		"ld     25, -256(1)\n"
-		"cmpwi  25, 0x2525\n"
-		"bne	1f\n"
-		"ld     26, -264(1)\n"
-		"cmpwi  26, 0x2626\n"
-		"bne	1f\n"
-		"ld     27, -272(1)\n"
-		"cmpwi  27, 0x2727\n"
-		"bne	1f\n"
-		"ld     28, -280(1)\n"
-		"cmpwi  28, 0x2828\n"
-		"bne	1f\n"
-		"ld     29, -288(1)\n"
-		"cmpwi  29, 0x2929\n"
-		"bne	1f\n"
-
-		/* Load 0 (success) to return */
-		"li	0, 0\n"
-
-		"1: 	mr %0, 0\n"
-
-		: "=r" (rc)
-		: /* no inputs */
-		: "3", "4", "5", "6", "7", "8", "9", "10", "11", "14",
-		  "15", "16", "17", "18", "19", "20", "21", "22", "23",
-		   "24", "25", "26", "27", "28", "29", "30", "31",
-		   "memory"
-	);
-
-	return rc;
-}
-
-int core_busy_loop_with_freeze(void)
-{
-	int rc;
-
-	mtspr(SPRN_MMCR0, mfspr(SPRN_MMCR0) & ~MMCR0_FC);
-	rc = core_busy_loop();
-	mtspr(SPRN_MMCR0, mfspr(SPRN_MMCR0) |  MMCR0_FC);
-
-	return rc;
-}
-
 int ebb_child(union pipe read_pipe, union pipe write_pipe)
 {
 	struct event event;
diff --git a/tools/testing/selftests/powerpc/pmu/ebb/ebb.h b/tools/testing/selftests/powerpc/pmu/ebb/ebb.h
index e62bde05bf78..e44eee5d97ca 100644
--- a/tools/testing/selftests/powerpc/pmu/ebb/ebb.h
+++ b/tools/testing/selftests/powerpc/pmu/ebb/ebb.h
@@ -70,7 +70,6 @@ int ebb_check_mmcr0(void);
 extern u64 sample_period;
 
 int core_busy_loop(void);
-int core_busy_loop_with_freeze(void);
 int ebb_child(union pipe read_pipe, union pipe write_pipe);
 int catch_sigill(void (*func)(void));
 void write_pmc1(void);
diff --git a/tools/testing/selftests/powerpc/pmu/l3_bank_test.c b/tools/testing/selftests/powerpc/pmu/l3_bank_test.c
new file mode 100644
index 000000000000..77472f31441e
--- /dev/null
+++ b/tools/testing/selftests/powerpc/pmu/l3_bank_test.c
@@ -0,0 +1,48 @@
+/*
+ * Copyright 2014, Michael Ellerman, IBM Corp.
+ * Licensed under GPLv2.
+ */
+
+#include <stdio.h>
+#include <stdlib.h>
+
+#include "event.h"
+#include "utils.h"
+
+#define MALLOC_SIZE     (0x10000 * 10)  /* Ought to be enough .. */
+
+/*
+ * Tests that the L3 bank handling is correct. We fixed it in commit e9aaac1.
+ */
+static int l3_bank_test(void)
+{
+	struct event event;
+	char *p;
+	int i;
+
+	p = malloc(MALLOC_SIZE);
+	FAIL_IF(!p);
+
+	event_init(&event, 0x84918F);
+
+	FAIL_IF(event_open(&event));
+
+	for (i = 0; i < MALLOC_SIZE; i += 0x10000)
+		p[i] = i;
+
+	event_read(&event);
+	event_report(&event);
+
+	FAIL_IF(event.result.running == 0);
+	FAIL_IF(event.result.enabled == 0);
+
+	event_close(&event);
+	free(p);
+
+	return 0;
+}
+
+int main(void)
+{
+	return test_harness(l3_bank_test, "l3_bank_test");
+}
diff --git a/tools/testing/selftests/powerpc/pmu/lib.c b/tools/testing/selftests/powerpc/pmu/lib.c
index 0f6a4731d546..9768dea37bf3 100644
--- a/tools/testing/selftests/powerpc/pmu/lib.c
+++ b/tools/testing/selftests/powerpc/pmu/lib.c
@@ -5,10 +5,15 @@
 
 #define _GNU_SOURCE	/* For CPU_ZERO etc. */
 
+#include <elf.h>
 #include <errno.h>
+#include <fcntl.h>
+#include <link.h>
 #include <sched.h>
 #include <setjmp.h>
 #include <stdlib.h>
+#include <sys/stat.h>
+#include <sys/types.h>
 #include <sys/wait.h>
 
 #include "utils.h"
@@ -177,8 +182,8 @@ struct addr_range libc, vdso;
 
 int parse_proc_maps(void)
 {
+	unsigned long start, end;
 	char execute, name[128];
-	uint64_t start, end;
 	FILE *f;
 	int rc;
 
@@ -250,3 +255,46 @@ out_close:
 out:
 	return rc;
 }
+
+static char auxv[4096];
+
+void *get_auxv_entry(int type)
+{
+	ElfW(auxv_t) *p;
+	void *result;
+	ssize_t num;
+	int fd;
+
+	fd = open("/proc/self/auxv", O_RDONLY);
+	if (fd == -1) {
+		perror("open");
+		return NULL;
+	}
+
+	result = NULL;
+
+	num = read(fd, auxv, sizeof(auxv));
+	if (num < 0) {
+		perror("read");
+		goto out;
+	}
+
+	if (num > sizeof(auxv)) {
+		printf("Overflowed auxv buffer\n");
+		goto out;
+	}
+
+	p = (ElfW(auxv_t) *)auxv;
+
+	while (p->a_type != AT_NULL) {
+		if (p->a_type == type) {
+			result = (void *)p->a_un.a_val;
+			break;
+		}
+
+		p++;
+	}
+out:
+	close(fd);
+	return result;
+}
diff --git a/tools/testing/selftests/powerpc/pmu/lib.h b/tools/testing/selftests/powerpc/pmu/lib.h
index ca5d72ae3be6..0f0339c8a6f6 100644
--- a/tools/testing/selftests/powerpc/pmu/lib.h
+++ b/tools/testing/selftests/powerpc/pmu/lib.h
@@ -29,6 +29,7 @@ extern int notify_parent(union pipe write_pipe);
 extern int notify_parent_of_error(union pipe write_pipe);
 extern pid_t eat_cpu(int (test_function)(void));
 extern bool require_paranoia_below(int level);
+extern void *get_auxv_entry(int type);
 
 struct addr_range {
 	uint64_t first, last;
diff --git a/tools/testing/selftests/powerpc/pmu/per_event_excludes.c b/tools/testing/selftests/powerpc/pmu/per_event_excludes.c
new file mode 100644
index 000000000000..fddbbc9cae2f
--- /dev/null
+++ b/tools/testing/selftests/powerpc/pmu/per_event_excludes.c
@@ -0,0 +1,114 @@
+/*
+ * Copyright 2014, Michael Ellerman, IBM Corp.
+ * Licensed under GPLv2.
+ */
+
+#define _GNU_SOURCE
+
+#include <elf.h>
+#include <limits.h>
+#include <stdio.h>
+#include <stdbool.h>
+#include <string.h>
+#include <sys/prctl.h>
+
+#include "event.h"
+#include "lib.h"
+#include "utils.h"
+
+/*
+ * Test that per-event excludes work.
+ */
+
+static int per_event_excludes(void)
+{
+	struct event *e, events[4];
+	char *platform;
+	int i;
+
+	platform = (char *)get_auxv_entry(AT_BASE_PLATFORM);
+	FAIL_IF(!platform);
+	SKIP_IF(strcmp(platform, "power8") != 0);
+
+	/*
+	 * We need to create the events disabled, otherwise the running/enabled
+	 * counts don't match up.
+	 */
+	e = &events[0];
+	event_init_opts(e, PERF_COUNT_HW_INSTRUCTIONS,
+			PERF_TYPE_HARDWARE, "instructions");
+	e->attr.disabled = 1;
+
+	e = &events[1];
+	event_init_opts(e, PERF_COUNT_HW_INSTRUCTIONS,
+			PERF_TYPE_HARDWARE, "instructions(k)");
+	e->attr.disabled = 1;
+	e->attr.exclude_user = 1;
+	e->attr.exclude_hv = 1;
+
+	e = &events[2];
+	event_init_opts(e, PERF_COUNT_HW_INSTRUCTIONS,
+			PERF_TYPE_HARDWARE, "instructions(h)");
+	e->attr.disabled = 1;
+	e->attr.exclude_user = 1;
+	e->attr.exclude_kernel = 1;
+
+	e = &events[3];
+	event_init_opts(e, PERF_COUNT_HW_INSTRUCTIONS,
+			PERF_TYPE_HARDWARE, "instructions(u)");
+	e->attr.disabled = 1;
+	e->attr.exclude_hv = 1;
+	e->attr.exclude_kernel = 1;
+
+	FAIL_IF(event_open(&events[0]));
+
+	/*
+	 * The open here will fail if we don't have per event exclude support,
+	 * because the second event has an incompatible set of exclude settings
+	 * and we're asking for the events to be in a group.
+	 */
+	for (i = 1; i < 4; i++)
+		FAIL_IF(event_open_with_group(&events[i], events[0].fd));
+
+	/*
+	 * Even though the above will fail without per-event excludes we keep
+	 * testing in order to be thorough.
+	 */
+	prctl(PR_TASK_PERF_EVENTS_ENABLE);
+
+	/* Spin for a while */
+	for (i = 0; i < INT_MAX; i++)
+		asm volatile("" : : : "memory");
+
+	prctl(PR_TASK_PERF_EVENTS_DISABLE);
+
+	for (i = 0; i < 4; i++) {
+		FAIL_IF(event_read(&events[i]));
+		event_report(&events[i]);
+	}
+
+	/*
+	 * We should see that all events have enabled == running. That
+	 * shows that they were all on the PMU at once.
+	 */
+	for (i = 0; i < 4; i++)
+		FAIL_IF(events[i].result.running != events[i].result.enabled);
+
+	/*
+	 * We can also check that the result for instructions is >= all the
+	 * other counts. That's because it is counting all instructions while
+	 * the others are counting a subset.
+	 */
+	for (i = 1; i < 4; i++)
+		FAIL_IF(events[0].result.value < events[i].result.value);
+
+	for (i = 0; i < 4; i++)
+		event_close(&events[i]);
+
+	return 0;
+}
+
+int main(void)
+{
+	return test_harness(per_event_excludes, "per_event_excludes");
+}