From a23c279059723aa7d01a35139f3f3cc941d73f30 Mon Sep 17 00:00:00 2001
From: Stefan Roese <sr@denx.de>
Date: Wed, 2 Sep 2020 08:29:02 +0200
Subject: [PATCH 01/27] mips: octeon: dts: mrvl, cn73xx.dtsi: Add memory
 controller DT node

This patch adds the memory controller (LMC) DT node to the Octeon 3 dtsi
file. It also adds the L2C DT node, as this is referenced by the DDR
driver.

Signed-off-by: Stefan Roese <sr@denx.de>
---
 arch/mips/dts/mrvl,cn73xx.dtsi | 17 +++++++++++++++++
 1 file changed, 17 insertions(+)

diff --git a/arch/mips/dts/mrvl,cn73xx.dtsi b/arch/mips/dts/mrvl,cn73xx.dtsi
index f5ad4a6213..44a5a03014 100644
--- a/arch/mips/dts/mrvl,cn73xx.dtsi
+++ b/arch/mips/dts/mrvl,cn73xx.dtsi
@@ -72,6 +72,23 @@
 				     <0x0300e 4>, <0x0300f 4>;
 		};
 
+		l2c: l2c@1180080000000 {
+			#address-cells = <1>;
+			#size-cells = <0>;
+			compatible = "cavium,octeon-7xxx-l2c";
+			reg = <0x11800 0x80000000 0x0 0x01000000>;
+			u-boot,dm-pre-reloc;
+		};
+
+		lmc: lmc@1180088000000 {
+			#address-cells = <1>;
+			#size-cells = <0>;
+			compatible = "cavium,octeon-7xxx-ddr4";
+			reg = <0x11800 0x88000000 0x0 0x02000000>; // 2 IFs
+			u-boot,dm-pre-reloc;
+			l2c-handle = <&l2c>;
+		};
+
 		reset: reset@1180006001600 {
 			compatible = "mrvl,cn7xxx-rst";
 			reg = <0x11800 0x06001600 0x0 0x200>;

From 75168b4aa775aac5a8bbbf578cdd1da3ce395044 Mon Sep 17 00:00:00 2001
From: Aaron Williams <awilliams@marvell.com>
Date: Wed, 2 Sep 2020 08:29:03 +0200
Subject: [PATCH 02/27] mips: octeon: Add octeon-model.h header

This header is used by the upcoming DDR driver and potentially by other
drivers ported from the 2013 Cavium / Marvell U-Boot repository.

Signed-off-by: Aaron Williams <awilliams@marvell.com>
Signed-off-by: Stefan Roese <sr@denx.de>
---
 .../mach-octeon/include/mach/octeon-model.h   | 313 ++++++++++++++++++
 1 file changed, 313 insertions(+)
 create mode 100644 arch/mips/mach-octeon/include/mach/octeon-model.h

diff --git a/arch/mips/mach-octeon/include/mach/octeon-model.h b/arch/mips/mach-octeon/include/mach/octeon-model.h
new file mode 100644
index 0000000000..a346b3472b
--- /dev/null
+++ b/arch/mips/mach-octeon/include/mach/octeon-model.h
@@ -0,0 +1,313 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * Copyright (C) 2020 Marvell International Ltd.
+ */
+
+#ifndef __OCTEON_MODEL_H__
+#define __OCTEON_MODEL_H__
+
+/*
+ * NOTE: These must match what is checked in common-config.mk
+ * Defines to represent the different versions of Octeon.
+ *
+ * IMPORTANT: When the default pass is updated for an Octeon Model,
+ * the corresponding change must also be made in the oct-sim script.
+ *
+ * The defines below should be used with the OCTEON_IS_MODEL() macro to
+ * determine what model of chip the software is running on.  Models ending
+ * in 'XX' match multiple models (families), while specific models match only
+ * that model.  If a pass (revision) is specified, then only that revision
+ * will be matched.  Care should be taken when checking for both specific
+ * models and families that the specific models are checked for first.
+ * While these defines are similar to the processor ID, they are not intended
+ * to be used by anything other that the OCTEON_IS_MODEL framework, and
+ * the values are subject to change at anytime without notice.
+ *
+ * NOTE: only the OCTEON_IS_MODEL() macro/function and the OCTEON_CN* macros
+ * should be used outside of this file.  All other macros are for internal
+ * use only, and may change without notice.
+ */
+
+#define OCTEON_FAMILY_MASK      0x00ffff00
+#define OCTEON_PRID_MASK	0x00ffffff
+
+/* Flag bits in top byte */
+/* Ignores revision in model checks */
+#define OM_IGNORE_REVISION        0x01000000
+/* Check submodels */
+#define OM_CHECK_SUBMODEL         0x02000000
+/* Match all models previous than the one specified */
+#define OM_MATCH_PREVIOUS_MODELS  0x04000000
+/* Ignores the minor revison on newer parts */
+#define OM_IGNORE_MINOR_REVISION  0x08000000
+#define OM_FLAG_MASK              0xff000000
+
+/* Match all cn5XXX Octeon models. */
+#define OM_MATCH_5XXX_FAMILY_MODELS     0x20000000
+/* Match all cn6XXX Octeon models. */
+#define OM_MATCH_6XXX_FAMILY_MODELS     0x40000000
+/* Match all cnf7XXX Octeon models. */
+#define OM_MATCH_F7XXX_FAMILY_MODELS    0x80000000
+/* Match all cn7XXX Octeon models. */
+#define OM_MATCH_7XXX_FAMILY_MODELS     0x10000000
+#define OM_MATCH_FAMILY_MODELS		(OM_MATCH_5XXX_FAMILY_MODELS | \
+					 OM_MATCH_6XXX_FAMILY_MODELS |	\
+					 OM_MATCH_F7XXX_FAMILY_MODELS | \
+					 OM_MATCH_7XXX_FAMILY_MODELS)
+
+/*
+ * CN7XXX models with new revision encoding
+ */
+
+#define OCTEON_CNF75XX_PASS1_0  0x000d9800
+#define OCTEON_CNF75XX_PASS1_2  0x000d9802
+#define OCTEON_CNF75XX_PASS1_3  0x000d9803
+#define OCTEON_CNF75XX          (OCTEON_CNF75XX_PASS1_0 | OM_IGNORE_REVISION)
+#define OCTEON_CNF75XX_PASS1_X					\
+	(OCTEON_CNF75XX_PASS1_0 | OM_IGNORE_MINOR_REVISION)
+
+#define OCTEON_CN73XX_PASS1_0   0x000d9700
+#define OCTEON_CN73XX_PASS1_1   0x000d9701
+#define OCTEON_CN73XX_PASS1_2   0x000d9702
+#define OCTEON_CN73XX_PASS1_3   0x000d9703
+#define OCTEON_CN73XX           (OCTEON_CN73XX_PASS1_0 | OM_IGNORE_REVISION)
+#define OCTEON_CN73XX_PASS1_X					\
+	(OCTEON_CN73XX_PASS1_0 | OM_IGNORE_MINOR_REVISION)
+
+#define OCTEON_CN72XX		OCTEON_CN73XX
+
+#define OCTEON_CN23XX		OCTEON_CN73XX
+#define OCTEON_CN23XX_PASS1_2	OCTEON_CN73XX_PASS1_2
+#define OCTEON_CN23XX_PASS1_3	OCTEON_CN73XX_PASS1_3
+
+#define OCTEON_CN70XX_PASS1_0   0x000d9600
+#define OCTEON_CN70XX_PASS1_1   0x000d9601
+#define OCTEON_CN70XX_PASS1_2   0x000d9602
+
+#define OCTEON_CN70XX_PASS2_0   0x000d9608
+
+#define OCTEON_CN70XX           (OCTEON_CN70XX_PASS1_0 | OM_IGNORE_REVISION)
+#define OCTEON_CN70XX_PASS1_X					\
+	(OCTEON_CN70XX_PASS1_0 | OM_IGNORE_MINOR_REVISION)
+#define OCTEON_CN70XX_PASS2_X					\
+	(OCTEON_CN70XX_PASS2_0 | OM_IGNORE_MINOR_REVISION)
+
+#define OCTEON_CN71XX		OCTEON_CN70XX
+
+#define OCTEON_CN78XX_PASS1_0   0x000d9500
+#define OCTEON_CN78XX_PASS1_1   0x000d9501
+#define OCTEON_CN78XX_PASS2_0   0x000d9508
+
+#define OCTEON_CN78XX           (OCTEON_CN78XX_PASS2_0 | OM_IGNORE_REVISION)
+#define OCTEON_CN78XX_PASS1_X					\
+	(OCTEON_CN78XX_PASS1_0 | OM_IGNORE_MINOR_REVISION)
+#define OCTEON_CN78XX_PASS2_X					\
+	(OCTEON_CN78XX_PASS2_0 | OM_IGNORE_MINOR_REVISION)
+
+#define OCTEON_CN76XX		  (0x000d9540 | OM_CHECK_SUBMODEL)
+
+/*
+ * CNF7XXX models with new revision encoding
+ */
+#define OCTEON_CNF71XX_PASS1_0  0x000d9400
+#define OCTEON_CNF71XX_PASS1_1  0x000d9401
+
+#define OCTEON_CNF71XX          (OCTEON_CNF71XX_PASS1_0 | OM_IGNORE_REVISION)
+#define OCTEON_CNF71XX_PASS1_X					\
+	(OCTEON_CNF71XX_PASS1_0 | OM_IGNORE_MINOR_REVISION)
+
+/*
+ * CN6XXX models with new revision encoding
+ */
+#define OCTEON_CN68XX_PASS1_0   0x000d9100
+#define OCTEON_CN68XX_PASS1_1   0x000d9101
+#define OCTEON_CN68XX_PASS2_0   0x000d9108
+#define OCTEON_CN68XX_PASS2_1   0x000d9109
+#define OCTEON_CN68XX_PASS2_2   0x000d910a
+
+#define OCTEON_CN68XX           (OCTEON_CN68XX_PASS2_0 | OM_IGNORE_REVISION)
+#define OCTEON_CN68XX_PASS1_X					\
+	(OCTEON_CN68XX_PASS1_0 | OM_IGNORE_MINOR_REVISION)
+#define OCTEON_CN68XX_PASS2_X					\
+	(OCTEON_CN68XX_PASS2_0 | OM_IGNORE_MINOR_REVISION)
+
+#define OCTEON_CN68XX_PASS1	OCTEON_CN68XX_PASS1_X
+#define OCTEON_CN68XX_PASS2	OCTEON_CN68XX_PASS2_X
+
+#define OCTEON_CN66XX_PASS1_0   0x000d9200
+#define OCTEON_CN66XX_PASS1_2   0x000d9202
+
+#define OCTEON_CN66XX           (OCTEON_CN66XX_PASS1_0 | OM_IGNORE_REVISION)
+#define OCTEON_CN66XX_PASS1_X					\
+	(OCTEON_CN66XX_PASS1_0 | OM_IGNORE_MINOR_REVISION)
+
+#define OCTEON_CN63XX_PASS1_0   0x000d9000
+#define OCTEON_CN63XX_PASS1_1   0x000d9001
+#define OCTEON_CN63XX_PASS1_2   0x000d9002
+#define OCTEON_CN63XX_PASS2_0   0x000d9008
+#define OCTEON_CN63XX_PASS2_1   0x000d9009
+#define OCTEON_CN63XX_PASS2_2   0x000d900a
+
+#define OCTEON_CN63XX           (OCTEON_CN63XX_PASS2_0 | OM_IGNORE_REVISION)
+#define OCTEON_CN63XX_PASS1_X					\
+	(OCTEON_CN63XX_PASS1_0 | OM_IGNORE_MINOR_REVISION)
+#define OCTEON_CN63XX_PASS2_X					\
+	(OCTEON_CN63XX_PASS2_0 | OM_IGNORE_MINOR_REVISION)
+
+/* CN62XX is same as CN63XX with 1 MB cache */
+#define OCTEON_CN62XX           OCTEON_CN63XX
+
+#define OCTEON_CN61XX_PASS1_0   0x000d9300
+#define OCTEON_CN61XX_PASS1_1   0x000d9301
+
+#define OCTEON_CN61XX           (OCTEON_CN61XX_PASS1_0 | OM_IGNORE_REVISION)
+#define OCTEON_CN61XX_PASS1_X					\
+	(OCTEON_CN61XX_PASS1_0 | OM_IGNORE_MINOR_REVISION)
+
+/* CN60XX is same as CN61XX with 512 KB cache */
+#define OCTEON_CN60XX           OCTEON_CN61XX
+
+/* This matches the complete family of CN3xxx CPUs, and not subsequent models */
+#define OCTEON_CN6XXX						\
+	(OCTEON_CN63XX_PASS1_0 | OM_MATCH_6XXX_FAMILY_MODELS)
+#define OCTEON_CNF7XXX						\
+	(OCTEON_CNF71XX_PASS1_0 | OM_MATCH_F7XXX_FAMILY_MODELS)
+#define OCTEON_CN7XXX						\
+	(OCTEON_CN78XX_PASS1_0 | OM_MATCH_7XXX_FAMILY_MODELS)
+
+/*
+ * The revision byte (low byte) has two different encodings.
+ * CN3XXX:
+ *
+ *     bits
+ *     <7:5>: reserved (0)
+ *     <4>:   alternate package
+ *     <3:0>: revision
+ *
+ * CN5XXX and older models:
+ *
+ *     bits
+ *     <7>:   reserved (0)
+ *     <6>:   alternate package
+ *     <5:3>: major revision
+ *     <2:0>: minor revision
+ */
+
+/* Masks used for the various types of model/family/revision matching */
+#define OCTEON_38XX_FAMILY_MASK      0x00ffff00
+#define OCTEON_38XX_FAMILY_REV_MASK  0x00ffff0f
+#define OCTEON_38XX_MODEL_MASK       0x00ffff10
+#define OCTEON_38XX_MODEL_REV_MASK				\
+	(OCTEON_38XX_FAMILY_REV_MASK | OCTEON_38XX_MODEL_MASK)
+
+/* CN5XXX and later use different layout of bits in the revision ID field */
+#define OCTEON_58XX_FAMILY_MASK      OCTEON_38XX_FAMILY_MASK
+#define OCTEON_58XX_FAMILY_REV_MASK  0x00ffff3f
+#define OCTEON_58XX_MODEL_MASK       0x00ffff40
+#define OCTEON_58XX_MODEL_REV_MASK				\
+	(OCTEON_58XX_FAMILY_REV_MASK | OCTEON_58XX_MODEL_MASK)
+#define OCTEON_58XX_MODEL_MINOR_REV_MASK		\
+	(OCTEON_58XX_MODEL_REV_MASK & 0x00ffff38)
+#define OCTEON_5XXX_MODEL_MASK       0x00ff0fc0
+
+#define __OCTEON_MATCH_MASK__(X, Y, Z)		     \
+	({					     \
+		typeof(X) x = (X);		     \
+		typeof(Y) y = (Y);		     \
+		typeof(Z) z = (Z);		     \
+		(x & z) == (y & z);		     \
+	 })
+
+/*
+ * __OCTEON_IS_MODEL_COMPILE__(arg_model, chip_model)
+ * returns true if chip_model is identical or belong to the OCTEON
+ * model group specified in arg_model.
+ */
+
+/* Helper macros to make to following macro compacter */
+#define OM_MASK			OM_FLAG_MASK
+#define OM_MATCH_MASK		__OCTEON_MATCH_MASK__
+#define OM_MATCH_PREVIOUS	OM_MATCH_PREVIOUS_MODELS
+
+#define __OCTEON_IS_MODEL_COMPILE__(A, B)				\
+	({								\
+	typeof(A) a = (A);						\
+	typeof(B) b = (B);						\
+	(((((((a) & OM_MASK) == (OM_IGNORE_REVISION | OM_CHECK_SUBMODEL)) && \
+	    OM_MATCH_MASK((b), (a), OCTEON_58XX_MODEL_MASK)) ||		\
+	   ((((a) & OM_MASK) == 0) &&					\
+	    OM_MATCH_MASK((b), (a), OCTEON_58XX_FAMILY_REV_MASK)) ||	\
+	   ((((a) & OM_MASK) == OM_IGNORE_MINOR_REVISION) &&		\
+	    OM_MATCH_MASK((b), (a), OCTEON_58XX_MODEL_MINOR_REV_MASK)) || \
+	   ((((a) & OM_MASK) == OM_CHECK_SUBMODEL) &&			\
+	    OM_MATCH_MASK((b), (a), OCTEON_58XX_MODEL_MASK)) ||		\
+	   ((((a) & OM_MASK) == OM_IGNORE_REVISION) &&			\
+	    OM_MATCH_MASK((b), (a), OCTEON_58XX_FAMILY_MASK)) ||	\
+	   ((((a) & (OM_MATCH_5XXX_FAMILY_MODELS)) ==			\
+	     OM_MATCH_5XXX_FAMILY_MODELS) &&				\
+	    ((b & OCTEON_PRID_MASK) < OCTEON_CN63XX_PASS1_0)) ||	\
+	   ((((a) & (OM_MATCH_6XXX_FAMILY_MODELS)) ==			\
+	     OM_MATCH_6XXX_FAMILY_MODELS) &&				\
+	    ((b & OCTEON_PRID_MASK) >= OCTEON_CN63XX_PASS1_0) &&	\
+	    ((b & OCTEON_PRID_MASK) < OCTEON_CNF71XX_PASS1_0)) ||	\
+	   ((((a) & (OM_MATCH_F7XXX_FAMILY_MODELS)) ==			\
+	     OM_MATCH_F7XXX_FAMILY_MODELS) &&				\
+	    ((b & OCTEON_PRID_MASK) >= OCTEON_CNF71XX_PASS1_0) &&	\
+	    ((b & OCTEON_PRID_MASK) < OCTEON_CN78XX_PASS1_0)) ||	\
+	   ((((a) & (OM_MATCH_7XXX_FAMILY_MODELS)) ==			\
+	     OM_MATCH_7XXX_FAMILY_MODELS) && ((b & OCTEON_PRID_MASK) >=	\
+					      OCTEON_CN78XX_PASS1_0)) || \
+	   ((((a) & (OM_MATCH_PREVIOUS)) == OM_MATCH_PREVIOUS) &&	\
+	    (((b) & OCTEON_58XX_MODEL_MASK) < ((a) & OCTEON_58XX_MODEL_MASK))) \
+		  )));							\
+	})
+
+#ifndef OCTEON_IS_MODEL
+
+static inline int __octeon_is_model_runtime_internal__(u32 model)
+{
+	u32 cpuid = read_c0_prid();
+
+	return __OCTEON_IS_MODEL_COMPILE__(model, cpuid);
+}
+
+static inline int __octeon_is_model_runtime__(u32 model)
+{
+	return __octeon_is_model_runtime_internal__(model);
+}
+
+/*
+ * The OCTEON_IS_MODEL macro should be used for all Octeon model checking done
+ * in a program.
+ * This should be kept runtime if at all possible  and must be conditionalized
+ * with OCTEON_IS_COMMON_BINARY() if runtime checking support is required.
+ *
+ * Use of the macro in preprocessor directives ( #if OCTEON_IS_MODEL(...) )
+ * is NOT SUPPORTED, and should be replaced with CVMX_COMPILED_FOR()
+ * I.e.:
+ * #if OCTEON_IS_MODEL(OCTEON_CN56XX)  ->  #if CVMX_COMPILED_FOR(OCTEON_CN56XX)
+ */
+#define OCTEON_IS_MODEL(x)	__octeon_is_model_runtime__(x)
+#define OCTEON_IS_COMMON_BINARY() 1
+#undef OCTEON_MODEL
+#endif
+
+#define OCTEON_IS_OCTEON2()						\
+	(OCTEON_IS_MODEL(OCTEON_CN6XXX) || OCTEON_IS_MODEL(OCTEON_CNF71XX))
+
+#define OCTEON_IS_OCTEON3()	OCTEON_IS_MODEL(OCTEON_CN7XXX)
+
+const char *octeon_model_get_string(u32 chip_id);
+const char *octeon_model_get_string_buffer(u32 chip_id, char *buffer);
+
+/**
+ * Return the octeon family, i.e., ProcessorID of the PrID register.
+ *
+ * @return the octeon family on success, ((u32)-1) on error.
+ */
+static inline u32 cvmx_get_octeon_family(void)
+{
+	return (read_c0_prid() & OCTEON_FAMILY_MASK);
+}
+
+#endif /* __OCTEON_MODEL_H__ */

From 91e34fcb41c4ca17f717372ff6ca57c256e95c62 Mon Sep 17 00:00:00 2001
From: Aaron Williams <awilliams@marvell.com>
Date: Wed, 2 Sep 2020 08:29:04 +0200
Subject: [PATCH 03/27] mips: octeon Add cvmx/cvmx-lmcx-defs.h header

This header will be used by the DDR driver (lmc). Its ported from the
2013 Cavium / Marvell U-Boot repository.

Signed-off-by: Aaron Williams <awilliams@marvell.com>
Signed-off-by: Stefan Roese <sr@denx.de>
---
 .../include/mach/cvmx/cvmx-lmcx-defs.h        | 4574 +++++++++++++++++
 1 file changed, 4574 insertions(+)
 create mode 100644 arch/mips/mach-octeon/include/mach/cvmx/cvmx-lmcx-defs.h

diff --git a/arch/mips/mach-octeon/include/mach/cvmx/cvmx-lmcx-defs.h b/arch/mips/mach-octeon/include/mach/cvmx/cvmx-lmcx-defs.h
new file mode 100644
index 0000000000..3b4cba9241
--- /dev/null
+++ b/arch/mips/mach-octeon/include/mach/cvmx/cvmx-lmcx-defs.h
@@ -0,0 +1,4574 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * Copyright (C) 2020 Marvell International Ltd.
+ */
+
+#ifndef __CVMX_LMCX_DEFS_H__
+#define __CVMX_LMCX_DEFS_H__
+
+#define CVMX_LMCX_BANK_CONFLICT1(offs)			\
+	((0x000360ull) + ((offs) & 3) * 0x1000000ull)
+#define CVMX_LMCX_BANK_CONFLICT2(offs)			\
+	((0x000368ull) + ((offs) & 3) * 0x1000000ull)
+#define CVMX_LMCX_BIST_RESULT(offs)			\
+	((0x0000F8ull) + ((offs) & 1) * 0x60000000ull)
+#define CVMX_LMCX_CHAR_CTL(offs)			\
+	((0x000220ull) + ((offs) & 3) * 0x1000000ull)
+#define CVMX_LMCX_CHAR_DQ_ERR_COUNT(offs)		\
+	((0x000040ull) + ((offs) & 3) * 0x1000000ull)
+#define CVMX_LMCX_CHAR_MASK0(offs)			\
+	((0x000228ull) + ((offs) & 3) * 0x1000000ull)
+#define CVMX_LMCX_CHAR_MASK1(offs)			\
+	((0x000230ull) + ((offs) & 3) * 0x1000000ull)
+#define CVMX_LMCX_CHAR_MASK2(offs)			\
+	((0x000238ull) + ((offs) & 3) * 0x1000000ull)
+#define CVMX_LMCX_CHAR_MASK3(offs)			\
+	((0x000240ull) + ((offs) & 3) * 0x1000000ull)
+#define CVMX_LMCX_CHAR_MASK4(offs)			\
+	((0x000318ull) + ((offs) & 3) * 0x1000000ull)
+#define CVMX_LMCX_COMP_CTL(offs)			\
+	((0x000028ull) + ((offs) & 1) * 0x60000000ull)
+#define CVMX_LMCX_COMP_CTL2(offs)			\
+	((0x0001B8ull) + ((offs) & 3) * 0x1000000ull)
+#define CVMX_LMCX_CONFIG(offs)				\
+	((0x000188ull) + ((offs) & 3) * 0x1000000ull)
+#define CVMX_LMCX_CONTROL(offs)				\
+	((0x000190ull) + ((offs) & 3) * 0x1000000ull)
+#define CVMX_LMCX_CTL(offs)				\
+	((0x000010ull) + ((offs) & 1) * 0x60000000ull)
+#define CVMX_LMCX_CTL1(offs)				\
+	((0x000090ull) + ((offs) & 1) * 0x60000000ull)
+#define CVMX_LMCX_DBTRAIN_CTL(offs)			\
+	((0x0003F8ull) + ((offs) & 3) * 0x1000000ull)
+#define CVMX_LMCX_DCLK_CNT(offs)			\
+	((0x0001E0ull) + ((offs) & 3) * 0x1000000ull)
+#define CVMX_LMCX_DCLK_CNT_HI(offs)			\
+	((0x000070ull) + ((offs) & 1) * 0x60000000ull)
+#define CVMX_LMCX_DCLK_CNT_LO(offs)			\
+	((0x000068ull) + ((offs) & 1) * 0x60000000ull)
+#define CVMX_LMCX_DCLK_CTL(offs)			\
+	((0x0000B8ull) + ((offs) & 1) * 0x60000000ull)
+#define CVMX_LMCX_DDR2_CTL(offs)			\
+	((0x000018ull) + ((offs) & 1) * 0x60000000ull)
+#define CVMX_LMCX_DDR4_DIMM_CTL(offs)			\
+	((0x0003F0ull) + ((offs) & 3) * 0x1000000ull)
+#define CVMX_LMCX_DDR_PLL_CTL(offs)			\
+	((0x000258ull) + ((offs) & 3) * 0x1000000ull)
+#define CVMX_LMCX_DELAY_CFG(offs)			\
+	((0x000088ull) + ((offs) & 1) * 0x60000000ull)
+#define CVMX_LMCX_DIMMX_DDR4_PARAMS0(offs, id)				\
+	((0x0000D0ull) + (((offs) & 1) + ((id) & 3) * 0x200000ull) * 8)
+#define CVMX_LMCX_DIMMX_DDR4_PARAMS1(offs, id)				\
+	((0x000140ull) + (((offs) & 1) + ((id) & 3) * 0x200000ull) * 8)
+#define CVMX_LMCX_DIMMX_PARAMS(offs, id)				\
+	((0x000270ull) + (((offs) & 1) + ((id) & 3) * 0x200000ull) * 8)
+#define CVMX_LMCX_DIMM_CTL(offs)			\
+	((0x000310ull) + ((offs) & 3) * 0x1000000ull)
+#define CVMX_LMCX_DLL_CTL(offs)				\
+	((0x0000C0ull) + ((offs) & 1) * 0x60000000ull)
+#define CVMX_LMCX_DLL_CTL2(offs)			\
+	((0x0001C8ull) + ((offs) & 3) * 0x1000000ull)
+#define CVMX_LMCX_DLL_CTL3(offs)			\
+	((0x000218ull) + ((offs) & 3) * 0x1000000ull)
+#define CVMX_LMCX_ECC_PARITY_TEST(offs)			\
+	((0x000108ull) + ((offs) & 3) * 0x1000000ull)
+#define CVMX_LMCX_EXT_CONFIG(offs)			\
+	((0x000030ull) + ((offs) & 3) * 0x1000000ull)
+#define CVMX_LMCX_EXT_CONFIG2(offs)			\
+	((0x000090ull) + ((offs) & 3) * 0x1000000ull)
+#define CVMX_LMCX_GENERAL_PURPOSE0(offs)		\
+	((0x000340ull) + ((offs) & 3) * 0x1000000ull)
+#define CVMX_LMCX_GENERAL_PURPOSE1(offs)		\
+	((0x000348ull) + ((offs) & 3) * 0x1000000ull)
+#define CVMX_LMCX_GENERAL_PURPOSE2(offs)		\
+	((0x000350ull) + ((offs) & 3) * 0x1000000ull)
+#define CVMX_LMCX_IFB_CNT(offs)				\
+	((0x0001D0ull) + ((offs) & 3) * 0x1000000ull)
+#define CVMX_LMCX_IFB_CNT_HI(offs)			\
+	((0x000050ull) + ((offs) & 1) * 0x60000000ull)
+#define CVMX_LMCX_IFB_CNT_LO(offs)			\
+	((0x000048ull) + ((offs) & 1) * 0x60000000ull)
+#define CVMX_LMCX_INT(offs)				\
+	((0x0001F0ull) + ((offs) & 3) * 0x1000000ull)
+#define CVMX_LMCX_INT_EN(offs)				\
+	((0x0001E8ull) + ((offs) & 3) * 0x1000000ull)
+#define CVMX_LMCX_LANEX_CRC_SWIZ(x, id)					\
+	((0x000380ull) + (((offs) & 15) + ((id) & 3) * 0x200000ull) * 8)
+#define CVMX_LMCX_MEM_CFG0(offs)			\
+	((0x000000ull) + ((offs) & 1) * 0x60000000ull)
+#define CVMX_LMCX_MEM_CFG1(offs)			\
+	((0x000008ull) + ((offs) & 1) * 0x60000000ull)
+#define CVMX_LMCX_MODEREG_PARAMS0(offs)			\
+	((0x0001A8ull) + ((offs) & 3) * 0x1000000ull)
+#define CVMX_LMCX_MODEREG_PARAMS1(offs)			\
+	((0x000260ull) + ((offs) & 3) * 0x1000000ull)
+#define CVMX_LMCX_MODEREG_PARAMS2(offs)			\
+	((0x000050ull) + ((offs) & 3) * 0x1000000ull)
+#define CVMX_LMCX_MODEREG_PARAMS3(offs)			\
+	((0x000058ull) + ((offs) & 3) * 0x1000000ull)
+#define CVMX_LMCX_MPR_DATA0(offs)			\
+	((0x000070ull) + ((offs) & 3) * 0x1000000ull)
+#define CVMX_LMCX_MPR_DATA1(offs)			\
+	((0x000078ull) + ((offs) & 3) * 0x1000000ull)
+#define CVMX_LMCX_MPR_DATA2(offs)			\
+	((0x000080ull) + ((offs) & 3) * 0x1000000ull)
+#define CVMX_LMCX_MR_MPR_CTL(offs)			\
+	((0x000068ull) + ((offs) & 3) * 0x1000000ull)
+#define CVMX_LMCX_NS_CTL(offs)				\
+	((0x000178ull) + ((offs) & 3) * 0x1000000ull)
+
+static inline uint64_t CVMX_LMCX_NXM(unsigned long offs)
+{
+	switch (cvmx_get_octeon_family()) {
+	case OCTEON_CNF71XX & OCTEON_FAMILY_MASK:
+	case OCTEON_CN61XX & OCTEON_FAMILY_MASK:
+	case OCTEON_CN70XX & OCTEON_FAMILY_MASK:
+	case OCTEON_CN66XX & OCTEON_FAMILY_MASK:
+	case OCTEON_CN63XX & OCTEON_FAMILY_MASK:
+		return (0x0000C8ull) + (offs) * 0x60000000ull;
+	case OCTEON_CNF75XX & OCTEON_FAMILY_MASK:
+	case OCTEON_CN73XX & OCTEON_FAMILY_MASK:
+		return (0x0000C8ull) + (offs) * 0x1000000ull;
+	case OCTEON_CN78XX & OCTEON_FAMILY_MASK:
+		if (OCTEON_IS_MODEL(OCTEON_CN78XX_PASS1_X))
+			return (0x0000C8ull) + (offs) * 0x1000000ull;
+		if (OCTEON_IS_MODEL(OCTEON_CN78XX))
+			return (0x0000C8ull) + (offs) * 0x1000000ull;
+	case OCTEON_CN68XX & OCTEON_FAMILY_MASK:
+		return (0x0000C8ull) + (offs) * 0x1000000ull;
+	}
+	return (0x0000C8ull) + (offs) * 0x1000000ull;
+}
+
+#define CVMX_LMCX_NXM_FADR(offs)			\
+	((0x000028ull) + ((offs) & 3) * 0x1000000ull)
+#define CVMX_LMCX_OPS_CNT(offs)				\
+	((0x0001D8ull) + ((offs) & 3) * 0x1000000ull)
+#define CVMX_LMCX_OPS_CNT_HI(offs)			\
+	((0x000060ull) + ((offs) & 1) * 0x60000000ull)
+#define CVMX_LMCX_OPS_CNT_LO(offs)			\
+	((0x000058ull) + ((offs) & 1) * 0x60000000ull)
+#define CVMX_LMCX_PHY_CTL(offs)				\
+	((0x000210ull) + ((offs) & 3) * 0x1000000ull)
+#define CVMX_LMCX_PHY_CTL2(offs)			\
+	((0x000250ull) + ((offs) & 3) * 0x1000000ull)
+#define CVMX_LMCX_PLL_BWCTL(offs)		\
+	((0x000040ull))
+#define CVMX_LMCX_PLL_CTL(offs)				\
+	((0x0000A8ull) + ((offs) & 1) * 0x60000000ull)
+#define CVMX_LMCX_PLL_STATUS(offs)			\
+	((0x0000B0ull) + ((offs) & 1) * 0x60000000ull)
+#define CVMX_LMCX_PPR_CTL(offs)				\
+	((0x0003E0ull) + ((offs) & 3) * 0x1000000ull)
+#define CVMX_LMCX_READ_LEVEL_CTL(offs)			\
+	((0x000140ull) + ((offs) & 1) * 0x60000000ull)
+#define CVMX_LMCX_READ_LEVEL_DBG(offs)			\
+	((0x000148ull) + ((offs) & 1) * 0x60000000ull)
+#define CVMX_LMCX_READ_LEVEL_RANKX(offs, id)				\
+	((0x000100ull) + (((offs) & 3) + ((id) & 1) * 0xC000000ull) * 8)
+#define CVMX_LMCX_REF_STATUS(offs)			\
+	((0x0000A0ull) + ((offs) & 3) * 0x1000000ull)
+#define CVMX_LMCX_RESET_CTL(offs)			\
+	((0x000180ull) + ((offs) & 3) * 0x1000000ull)
+#define CVMX_LMCX_RETRY_CONFIG(offs)			\
+	((0x000110ull) + ((offs) & 3) * 0x1000000ull)
+#define CVMX_LMCX_RETRY_STATUS(offs)			\
+	((0x000118ull) + ((offs) & 3) * 0x1000000ull)
+#define CVMX_LMCX_RLEVEL_CTL(offs)			\
+	((0x0002A0ull) + ((offs) & 3) * 0x1000000ull)
+#define CVMX_LMCX_RLEVEL_DBG(offs)			\
+	((0x0002A8ull) + ((offs) & 3) * 0x1000000ull)
+#define CVMX_LMCX_RLEVEL_RANKX(offs, id)				\
+	((0x000280ull) + (((offs) & 3) + ((id) & 3) * 0x200000ull) * 8)
+#define CVMX_LMCX_RODT_COMP_CTL(offs)			\
+	((0x0000A0ull) + ((offs) & 1) * 0x60000000ull)
+#define CVMX_LMCX_RODT_CTL(offs)			\
+	((0x000078ull) + ((offs) & 1) * 0x60000000ull)
+#define CVMX_LMCX_RODT_MASK(offs)			\
+	((0x000268ull) + ((offs) & 3) * 0x1000000ull)
+#define CVMX_LMCX_SCRAMBLED_FADR(offs)			\
+	((0x000330ull) + ((offs) & 3) * 0x1000000ull)
+#define CVMX_LMCX_SCRAMBLE_CFG0(offs)			\
+	((0x000320ull) + ((offs) & 3) * 0x1000000ull)
+#define CVMX_LMCX_SCRAMBLE_CFG1(offs)			\
+	((0x000328ull) + ((offs) & 3) * 0x1000000ull)
+#define CVMX_LMCX_SCRAMBLE_CFG2(offs)			\
+	((0x000338ull) + ((offs) & 3) * 0x1000000ull)
+#define CVMX_LMCX_SEQ_CTL(offs)				\
+	((0x000048ull) + ((offs) & 3) * 0x1000000ull)
+#define CVMX_LMCX_SLOT_CTL0(offs)			\
+	((0x0001F8ull) + ((offs) & 3) * 0x1000000ull)
+#define CVMX_LMCX_SLOT_CTL1(offs)			\
+	((0x000200ull) + ((offs) & 3) * 0x1000000ull)
+#define CVMX_LMCX_SLOT_CTL2(offs)			\
+	((0x000208ull) + ((offs) & 3) * 0x1000000ull)
+#define CVMX_LMCX_SLOT_CTL3(offs)			\
+	((0x000248ull) + ((offs) & 3) * 0x1000000ull)
+#define CVMX_LMCX_TIMING_PARAMS0(offs)			\
+	((0x000198ull) + ((offs) & 3) * 0x1000000ull)
+#define CVMX_LMCX_TIMING_PARAMS1(offs)			\
+	((0x0001A0ull) + ((offs) & 3) * 0x1000000ull)
+#define CVMX_LMCX_TIMING_PARAMS2(offs)			\
+	((0x000060ull) + ((offs) & 3) * 0x1000000ull)
+#define CVMX_LMCX_TRO_CTL(offs)				\
+	((0x000248ull) + ((offs) & 3) * 0x1000000ull)
+#define CVMX_LMCX_TRO_STAT(offs)			\
+	((0x000250ull) + ((offs) & 3) * 0x1000000ull)
+#define CVMX_LMCX_WLEVEL_CTL(offs)			\
+	((0x000300ull) + ((offs) & 3) * 0x1000000ull)
+#define CVMX_LMCX_WLEVEL_DBG(offs)			\
+	((0x000308ull) + ((offs) & 3) * 0x1000000ull)
+
+static inline uint64_t CVMX_LMCX_WLEVEL_RANKX(unsigned long offs,
+					      unsigned long id)
+{
+	switch (cvmx_get_octeon_family()) {
+	case OCTEON_CN70XX & OCTEON_FAMILY_MASK:
+		return (0x0002C0ull) + ((offs) + (id) * 0x200000ull) * 8;
+	case OCTEON_CNF75XX & OCTEON_FAMILY_MASK:
+	case OCTEON_CN73XX & OCTEON_FAMILY_MASK:
+		return (0x0002C0ull) + ((offs) + (id) * 0x200000ull) * 8;
+	case OCTEON_CN78XX & OCTEON_FAMILY_MASK:
+		if (OCTEON_IS_MODEL(OCTEON_CN78XX_PASS1_X))
+			return (0x0002C0ull) + ((offs) +
+						(id) * 0x200000ull) * 8;
+		if (OCTEON_IS_MODEL(OCTEON_CN78XX))
+			return (0x0002C0ull) + ((offs) +
+						(id) * 0x200000ull) * 8;
+
+	case OCTEON_CN66XX & OCTEON_FAMILY_MASK:
+	case OCTEON_CN63XX & OCTEON_FAMILY_MASK:
+		return (0x0002B0ull) + ((offs) + (id) * 0x0ull) * 8;
+	case OCTEON_CNF71XX & OCTEON_FAMILY_MASK:
+	case OCTEON_CN61XX & OCTEON_FAMILY_MASK:
+		return (0x0002B0ull) + ((offs) + (id) * 0x200000ull) * 8;
+	case OCTEON_CN68XX & OCTEON_FAMILY_MASK:
+		return (0x0002B0ull) + ((offs) + (id) * 0x200000ull) * 8;
+	}
+	return (0x0002C0ull) + ((offs) + (id) * 0x200000ull) * 8;
+}
+
+#define CVMX_LMCX_WODT_CTL0(offs)			\
+	((0x000030ull) + ((offs) & 1) * 0x60000000ull)
+#define CVMX_LMCX_WODT_CTL1(offs)			\
+	((0x000080ull) + ((offs) & 1) * 0x60000000ull)
+#define CVMX_LMCX_WODT_MASK(offs)			\
+	((0x0001B0ull) + ((offs) & 3) * 0x1000000ull)
+
+/**
+ * cvmx_lmc#_char_ctl
+ *
+ * This register provides an assortment of various control fields needed
+ * to characterize the DDR3 interface.
+ */
+union cvmx_lmcx_char_ctl {
+	u64 u64;
+	struct cvmx_lmcx_char_ctl_s {
+		uint64_t reserved_54_63:10;
+		uint64_t dq_char_byte_check:1;
+		uint64_t dq_char_check_lock:1;
+		uint64_t dq_char_check_enable:1;
+		uint64_t dq_char_bit_sel:3;
+		uint64_t dq_char_byte_sel:4;
+		uint64_t dr:1;
+		uint64_t skew_on:1;
+		uint64_t en:1;
+		uint64_t sel:1;
+		uint64_t prog:8;
+		uint64_t prbs:32;
+	} s;
+	struct cvmx_lmcx_char_ctl_cn61xx {
+		uint64_t reserved_44_63:20;
+		uint64_t dr:1;
+		uint64_t skew_on:1;
+		uint64_t en:1;
+		uint64_t sel:1;
+		uint64_t prog:8;
+		uint64_t prbs:32;
+	} cn61xx;
+	struct cvmx_lmcx_char_ctl_cn63xx {
+		uint64_t reserved_42_63:22;
+		uint64_t en:1;
+		uint64_t sel:1;
+		uint64_t prog:8;
+		uint64_t prbs:32;
+	} cn63xx;
+	struct cvmx_lmcx_char_ctl_cn63xx cn63xxp1;
+	struct cvmx_lmcx_char_ctl_cn61xx cn66xx;
+	struct cvmx_lmcx_char_ctl_cn61xx cn68xx;
+	struct cvmx_lmcx_char_ctl_cn63xx cn68xxp1;
+	struct cvmx_lmcx_char_ctl_cn70xx {
+		uint64_t reserved_53_63:11;
+		uint64_t dq_char_check_lock:1;
+		uint64_t dq_char_check_enable:1;
+		uint64_t dq_char_bit_sel:3;
+		uint64_t dq_char_byte_sel:4;
+		uint64_t dr:1;
+		uint64_t skew_on:1;
+		uint64_t en:1;
+		uint64_t sel:1;
+		uint64_t prog:8;
+		uint64_t prbs:32;
+	} cn70xx;
+	struct cvmx_lmcx_char_ctl_cn70xx cn70xxp1;
+	struct cvmx_lmcx_char_ctl_s cn73xx;
+	struct cvmx_lmcx_char_ctl_s cn78xx;
+	struct cvmx_lmcx_char_ctl_s cn78xxp1;
+	struct cvmx_lmcx_char_ctl_cn61xx cnf71xx;
+	struct cvmx_lmcx_char_ctl_s cnf75xx;
+};
+
+/**
+ * cvmx_lmc#_comp_ctl2
+ *
+ * LMC_COMP_CTL2 = LMC Compensation control
+ *
+ */
+union cvmx_lmcx_comp_ctl2 {
+	u64 u64;
+	struct cvmx_lmcx_comp_ctl2_s {
+		uint64_t reserved_51_63:13;
+		uint64_t rclk_char_mode:1;
+		uint64_t reserved_40_49:10;
+		uint64_t ptune_offset:4;
+		uint64_t reserved_12_35:24;
+		uint64_t cmd_ctl:4;
+		uint64_t ck_ctl:4;
+		uint64_t dqx_ctl:4;
+	} s;
+	struct cvmx_lmcx_comp_ctl2_cn61xx {
+		uint64_t reserved_34_63:30;
+		uint64_t ddr__ptune:4;
+		uint64_t ddr__ntune:4;
+		uint64_t m180:1;
+		uint64_t byp:1;
+		uint64_t ptune:4;
+		uint64_t ntune:4;
+		uint64_t rodt_ctl:4;
+		uint64_t cmd_ctl:4;
+		uint64_t ck_ctl:4;
+		uint64_t dqx_ctl:4;
+	} cn61xx;
+	struct cvmx_lmcx_comp_ctl2_cn61xx cn63xx;
+	struct cvmx_lmcx_comp_ctl2_cn61xx cn63xxp1;
+	struct cvmx_lmcx_comp_ctl2_cn61xx cn66xx;
+	struct cvmx_lmcx_comp_ctl2_cn61xx cn68xx;
+	struct cvmx_lmcx_comp_ctl2_cn61xx cn68xxp1;
+	struct cvmx_lmcx_comp_ctl2_cn70xx {
+		uint64_t reserved_51_63:13;
+		uint64_t rclk_char_mode:1;
+		uint64_t ddr__ptune:5;
+		uint64_t ddr__ntune:5;
+		uint64_t ptune_offset:4;
+		uint64_t ntune_offset:4;
+		uint64_t m180:1;
+		uint64_t byp:1;
+		uint64_t ptune:5;
+		uint64_t ntune:5;
+		uint64_t rodt_ctl:4;
+		uint64_t control_ctl:4;
+		uint64_t cmd_ctl:4;
+		uint64_t ck_ctl:4;
+		uint64_t dqx_ctl:4;
+	} cn70xx;
+	struct cvmx_lmcx_comp_ctl2_cn70xx cn70xxp1;
+	struct cvmx_lmcx_comp_ctl2_cn70xx cn73xx;
+	struct cvmx_lmcx_comp_ctl2_cn70xx cn78xx;
+	struct cvmx_lmcx_comp_ctl2_cn70xx cn78xxp1;
+	struct cvmx_lmcx_comp_ctl2_cn61xx cnf71xx;
+	struct cvmx_lmcx_comp_ctl2_cn70xx cnf75xx;
+};
+
+/**
+ * cvmx_lmc#_config
+ *
+ * This register controls certain parameters required for memory configuration.
+ * Note the following:
+ * * Priority order for hardware write operations to
+ * LMC()_CONFIG/LMC()_FADR/LMC()_ECC_SYND: DED error > SEC error.
+ * * The self-refresh entry sequence(s) power the DLL up/down (depending on
+ * LMC()_MODEREG_PARAMS0[DLL]) when LMC()_CONFIG[SREF_WITH_DLL] is set.
+ * * Prior to the self-refresh exit sequence, LMC()_MODEREG_PARAMS0 should
+ * be reprogrammed
+ * (if needed) to the appropriate values.
+ *
+ * See LMC initialization sequence for the LMC bringup sequence.
+ */
+union cvmx_lmcx_config {
+	u64 u64;
+	struct cvmx_lmcx_config_s {
+		uint64_t lrdimm_ena:1;
+		uint64_t bg2_enable:1;
+		uint64_t mode_x4dev:1;
+		uint64_t mode32b:1;
+		uint64_t scrz:1;
+		uint64_t early_unload_d1_r1:1;
+		uint64_t early_unload_d1_r0:1;
+		uint64_t early_unload_d0_r1:1;
+		uint64_t early_unload_d0_r0:1;
+		uint64_t init_status:4;
+		uint64_t mirrmask:4;
+		uint64_t rankmask:4;
+		uint64_t rank_ena:1;
+		uint64_t sref_with_dll:1;
+		uint64_t early_dqx:1;
+		uint64_t reserved_18_39:22;
+		uint64_t reset:1;
+		uint64_t ecc_adr:1;
+		uint64_t forcewrite:4;
+		uint64_t idlepower:3;
+		uint64_t pbank_lsb:4;
+		uint64_t row_lsb:3;
+		uint64_t ecc_ena:1;
+		uint64_t init_start:1;
+	} s;
+	struct cvmx_lmcx_config_cn61xx {
+		uint64_t reserved_61_63:3;
+		uint64_t mode32b:1;
+		uint64_t scrz:1;
+		uint64_t early_unload_d1_r1:1;
+		uint64_t early_unload_d1_r0:1;
+		uint64_t early_unload_d0_r1:1;
+		uint64_t early_unload_d0_r0:1;
+		uint64_t init_status:4;
+		uint64_t mirrmask:4;
+		uint64_t rankmask:4;
+		uint64_t rank_ena:1;
+		uint64_t sref_with_dll:1;
+		uint64_t early_dqx:1;
+		uint64_t sequence:3;
+		uint64_t ref_zqcs_int:19;
+		uint64_t reset:1;
+		uint64_t ecc_adr:1;
+		uint64_t forcewrite:4;
+		uint64_t idlepower:3;
+		uint64_t pbank_lsb:4;
+		uint64_t row_lsb:3;
+		uint64_t ecc_ena:1;
+		uint64_t init_start:1;
+	} cn61xx;
+	struct cvmx_lmcx_config_cn63xx {
+		uint64_t reserved_59_63:5;
+		uint64_t early_unload_d1_r1:1;
+		uint64_t early_unload_d1_r0:1;
+		uint64_t early_unload_d0_r1:1;
+		uint64_t early_unload_d0_r0:1;
+		uint64_t init_status:4;
+		uint64_t mirrmask:4;
+		uint64_t rankmask:4;
+		uint64_t rank_ena:1;
+		uint64_t sref_with_dll:1;
+		uint64_t early_dqx:1;
+		uint64_t sequence:3;
+		uint64_t ref_zqcs_int:19;
+		uint64_t reset:1;
+		uint64_t ecc_adr:1;
+		uint64_t forcewrite:4;
+		uint64_t idlepower:3;
+		uint64_t pbank_lsb:4;
+		uint64_t row_lsb:3;
+		uint64_t ecc_ena:1;
+		uint64_t init_start:1;
+	} cn63xx;
+	struct cvmx_lmcx_config_cn63xxp1 {
+		uint64_t reserved_55_63:9;
+		uint64_t init_status:4;
+		uint64_t mirrmask:4;
+		uint64_t rankmask:4;
+		uint64_t rank_ena:1;
+		uint64_t sref_with_dll:1;
+		uint64_t early_dqx:1;
+		uint64_t sequence:3;
+		uint64_t ref_zqcs_int:19;
+		uint64_t reset:1;
+		uint64_t ecc_adr:1;
+		uint64_t forcewrite:4;
+		uint64_t idlepower:3;
+		uint64_t pbank_lsb:4;
+		uint64_t row_lsb:3;
+		uint64_t ecc_ena:1;
+		uint64_t init_start:1;
+	} cn63xxp1;
+	struct cvmx_lmcx_config_cn66xx {
+		uint64_t reserved_60_63:4;
+		uint64_t scrz:1;
+		uint64_t early_unload_d1_r1:1;
+		uint64_t early_unload_d1_r0:1;
+		uint64_t early_unload_d0_r1:1;
+		uint64_t early_unload_d0_r0:1;
+		uint64_t init_status:4;
+		uint64_t mirrmask:4;
+		uint64_t rankmask:4;
+		uint64_t rank_ena:1;
+		uint64_t sref_with_dll:1;
+		uint64_t early_dqx:1;
+		uint64_t sequence:3;
+		uint64_t ref_zqcs_int:19;
+		uint64_t reset:1;
+		uint64_t ecc_adr:1;
+		uint64_t forcewrite:4;
+		uint64_t idlepower:3;
+		uint64_t pbank_lsb:4;
+		uint64_t row_lsb:3;
+		uint64_t ecc_ena:1;
+		uint64_t init_start:1;
+	} cn66xx;
+	struct cvmx_lmcx_config_cn63xx cn68xx;
+	struct cvmx_lmcx_config_cn63xx cn68xxp1;
+	struct cvmx_lmcx_config_cn70xx {
+		uint64_t reserved_63_63:1;
+		uint64_t bg2_enable:1;
+		uint64_t mode_x4dev:1;
+		uint64_t mode32b:1;
+		uint64_t scrz:1;
+		uint64_t early_unload_d1_r1:1;
+		uint64_t early_unload_d1_r0:1;
+		uint64_t early_unload_d0_r1:1;
+		uint64_t early_unload_d0_r0:1;
+		uint64_t init_status:4;
+		uint64_t mirrmask:4;
+		uint64_t rankmask:4;
+		uint64_t rank_ena:1;
+		uint64_t sref_with_dll:1;
+		uint64_t early_dqx:1;
+		uint64_t ref_zqcs_int:22;
+		uint64_t reset:1;
+		uint64_t ecc_adr:1;
+		uint64_t forcewrite:4;
+		uint64_t idlepower:3;
+		uint64_t pbank_lsb:4;
+		uint64_t row_lsb:3;
+		uint64_t ecc_ena:1;
+		uint64_t reserved_0_0:1;
+	} cn70xx;
+	struct cvmx_lmcx_config_cn70xx cn70xxp1;
+	struct cvmx_lmcx_config_cn73xx {
+		uint64_t lrdimm_ena:1;
+		uint64_t bg2_enable:1;
+		uint64_t mode_x4dev:1;
+		uint64_t mode32b:1;
+		uint64_t scrz:1;
+		uint64_t early_unload_d1_r1:1;
+		uint64_t early_unload_d1_r0:1;
+		uint64_t early_unload_d0_r1:1;
+		uint64_t early_unload_d0_r0:1;
+		uint64_t init_status:4;
+		uint64_t mirrmask:4;
+		uint64_t rankmask:4;
+		uint64_t rank_ena:1;
+		uint64_t sref_with_dll:1;
+		uint64_t early_dqx:1;
+		uint64_t ref_zqcs_int:22;
+		uint64_t reset:1;
+		uint64_t ecc_adr:1;
+		uint64_t forcewrite:4;
+		uint64_t idlepower:3;
+		uint64_t pbank_lsb:4;
+		uint64_t row_lsb:3;
+		uint64_t ecc_ena:1;
+		uint64_t reserved_0_0:1;
+	} cn73xx;
+	struct cvmx_lmcx_config_cn73xx cn78xx;
+	struct cvmx_lmcx_config_cn73xx cn78xxp1;
+	struct cvmx_lmcx_config_cn61xx cnf71xx;
+	struct cvmx_lmcx_config_cn73xx cnf75xx;
+};
+
+/**
+ * cvmx_lmc#_control
+ *
+ * LMC_CONTROL = LMC Control
+ * This register is an assortment of various control fields needed by the
+ * memory controller
+ */
+union cvmx_lmcx_control {
+	u64 u64;
+	struct cvmx_lmcx_control_s {
+		uint64_t scramble_ena:1;
+		uint64_t thrcnt:12;
+		uint64_t persub:8;
+		uint64_t thrmax:4;
+		uint64_t crm_cnt:5;
+		uint64_t crm_thr:5;
+		uint64_t crm_max:5;
+		uint64_t rodt_bprch:1;
+		uint64_t wodt_bprch:1;
+		uint64_t bprch:2;
+		uint64_t ext_zqcs_dis:1;
+		uint64_t int_zqcs_dis:1;
+		uint64_t auto_dclkdis:1;
+		uint64_t xor_bank:1;
+		uint64_t max_write_batch:4;
+		uint64_t nxm_write_en:1;
+		uint64_t elev_prio_dis:1;
+		uint64_t inorder_wr:1;
+		uint64_t inorder_rd:1;
+		uint64_t throttle_wr:1;
+		uint64_t throttle_rd:1;
+		uint64_t fprch2:2;
+		uint64_t pocas:1;
+		uint64_t ddr2t:1;
+		uint64_t bwcnt:1;
+		uint64_t rdimm_ena:1;
+	} s;
+	struct cvmx_lmcx_control_s cn61xx;
+	struct cvmx_lmcx_control_cn63xx {
+		uint64_t reserved_24_63:40;
+		uint64_t rodt_bprch:1;
+		uint64_t wodt_bprch:1;
+		uint64_t bprch:2;
+		uint64_t ext_zqcs_dis:1;
+		uint64_t int_zqcs_dis:1;
+		uint64_t auto_dclkdis:1;
+		uint64_t xor_bank:1;
+		uint64_t max_write_batch:4;
+		uint64_t nxm_write_en:1;
+		uint64_t elev_prio_dis:1;
+		uint64_t inorder_wr:1;
+		uint64_t inorder_rd:1;
+		uint64_t throttle_wr:1;
+		uint64_t throttle_rd:1;
+		uint64_t fprch2:2;
+		uint64_t pocas:1;
+		uint64_t ddr2t:1;
+		uint64_t bwcnt:1;
+		uint64_t rdimm_ena:1;
+	} cn63xx;
+	struct cvmx_lmcx_control_cn63xx cn63xxp1;
+	struct cvmx_lmcx_control_cn66xx {
+		uint64_t scramble_ena:1;
+		uint64_t reserved_24_62:39;
+		uint64_t rodt_bprch:1;
+		uint64_t wodt_bprch:1;
+		uint64_t bprch:2;
+		uint64_t ext_zqcs_dis:1;
+		uint64_t int_zqcs_dis:1;
+		uint64_t auto_dclkdis:1;
+		uint64_t xor_bank:1;
+		uint64_t max_write_batch:4;
+		uint64_t nxm_write_en:1;
+		uint64_t elev_prio_dis:1;
+		uint64_t inorder_wr:1;
+		uint64_t inorder_rd:1;
+		uint64_t throttle_wr:1;
+		uint64_t throttle_rd:1;
+		uint64_t fprch2:2;
+		uint64_t pocas:1;
+		uint64_t ddr2t:1;
+		uint64_t bwcnt:1;
+		uint64_t rdimm_ena:1;
+	} cn66xx;
+	struct cvmx_lmcx_control_cn68xx {
+		uint64_t reserved_63_63:1;
+		uint64_t thrcnt:12;
+		uint64_t persub:8;
+		uint64_t thrmax:4;
+		uint64_t crm_cnt:5;
+		uint64_t crm_thr:5;
+		uint64_t crm_max:5;
+		uint64_t rodt_bprch:1;
+		uint64_t wodt_bprch:1;
+		uint64_t bprch:2;
+		uint64_t ext_zqcs_dis:1;
+		uint64_t int_zqcs_dis:1;
+		uint64_t auto_dclkdis:1;
+		uint64_t xor_bank:1;
+		uint64_t max_write_batch:4;
+		uint64_t nxm_write_en:1;
+		uint64_t elev_prio_dis:1;
+		uint64_t inorder_wr:1;
+		uint64_t inorder_rd:1;
+		uint64_t throttle_wr:1;
+		uint64_t throttle_rd:1;
+		uint64_t fprch2:2;
+		uint64_t pocas:1;
+		uint64_t ddr2t:1;
+		uint64_t bwcnt:1;
+		uint64_t rdimm_ena:1;
+	} cn68xx;
+	struct cvmx_lmcx_control_cn68xx cn68xxp1;
+	struct cvmx_lmcx_control_s cn70xx;
+	struct cvmx_lmcx_control_s cn70xxp1;
+	struct cvmx_lmcx_control_s cn73xx;
+	struct cvmx_lmcx_control_s cn78xx;
+	struct cvmx_lmcx_control_s cn78xxp1;
+	struct cvmx_lmcx_control_cn66xx cnf71xx;
+	struct cvmx_lmcx_control_s cnf75xx;
+};
+
+/**
+ * cvmx_lmc#_ctl
+ *
+ * LMC_CTL = LMC Control
+ * This register is an assortment of various control fields needed by the
+ * memory controller
+ */
+union cvmx_lmcx_ctl {
+	u64 u64;
+	struct cvmx_lmcx_ctl_s {
+		uint64_t reserved_32_63:32;
+		uint64_t ddr__nctl:4;
+		uint64_t ddr__pctl:4;
+		uint64_t slow_scf:1;
+		uint64_t xor_bank:1;
+		uint64_t max_write_batch:4;
+		uint64_t pll_div2:1;
+		uint64_t pll_bypass:1;
+		uint64_t rdimm_ena:1;
+		uint64_t r2r_slot:1;
+		uint64_t inorder_mwf:1;
+		uint64_t inorder_mrf:1;
+		uint64_t reserved_10_11:2;
+		uint64_t fprch2:1;
+		uint64_t bprch:1;
+		uint64_t sil_lat:2;
+		uint64_t tskw:2;
+		uint64_t qs_dic:2;
+		uint64_t dic:2;
+	} s;
+	struct cvmx_lmcx_ctl_cn30xx {
+		uint64_t reserved_32_63:32;
+		uint64_t ddr__nctl:4;
+		uint64_t ddr__pctl:4;
+		uint64_t slow_scf:1;
+		uint64_t xor_bank:1;
+		uint64_t max_write_batch:4;
+		uint64_t pll_div2:1;
+		uint64_t pll_bypass:1;
+		uint64_t rdimm_ena:1;
+		uint64_t r2r_slot:1;
+		uint64_t inorder_mwf:1;
+		uint64_t inorder_mrf:1;
+		uint64_t dreset:1;
+		uint64_t mode32b:1;
+		uint64_t fprch2:1;
+		uint64_t bprch:1;
+		uint64_t sil_lat:2;
+		uint64_t tskw:2;
+		uint64_t qs_dic:2;
+		uint64_t dic:2;
+	} cn30xx;
+	struct cvmx_lmcx_ctl_cn30xx cn31xx;
+	struct cvmx_lmcx_ctl_cn38xx {
+		uint64_t reserved_32_63:32;
+		uint64_t ddr__nctl:4;
+		uint64_t ddr__pctl:4;
+		uint64_t slow_scf:1;
+		uint64_t xor_bank:1;
+		uint64_t max_write_batch:4;
+		uint64_t reserved_16_17:2;
+		uint64_t rdimm_ena:1;
+		uint64_t r2r_slot:1;
+		uint64_t inorder_mwf:1;
+		uint64_t inorder_mrf:1;
+		uint64_t set_zero:1;
+		uint64_t mode128b:1;
+		uint64_t fprch2:1;
+		uint64_t bprch:1;
+		uint64_t sil_lat:2;
+		uint64_t tskw:2;
+		uint64_t qs_dic:2;
+		uint64_t dic:2;
+	} cn38xx;
+	struct cvmx_lmcx_ctl_cn38xx cn38xxp2;
+	struct cvmx_lmcx_ctl_cn50xx {
+		uint64_t reserved_32_63:32;
+		uint64_t ddr__nctl:4;
+		uint64_t ddr__pctl:4;
+		uint64_t slow_scf:1;
+		uint64_t xor_bank:1;
+		uint64_t max_write_batch:4;
+		uint64_t reserved_17_17:1;
+		uint64_t pll_bypass:1;
+		uint64_t rdimm_ena:1;
+		uint64_t r2r_slot:1;
+		uint64_t inorder_mwf:1;
+		uint64_t inorder_mrf:1;
+		uint64_t dreset:1;
+		uint64_t mode32b:1;
+		uint64_t fprch2:1;
+		uint64_t bprch:1;
+		uint64_t sil_lat:2;
+		uint64_t tskw:2;
+		uint64_t qs_dic:2;
+		uint64_t dic:2;
+	} cn50xx;
+	struct cvmx_lmcx_ctl_cn52xx {
+		uint64_t reserved_32_63:32;
+		uint64_t ddr__nctl:4;
+		uint64_t ddr__pctl:4;
+		uint64_t slow_scf:1;
+		uint64_t xor_bank:1;
+		uint64_t max_write_batch:4;
+		uint64_t reserved_16_17:2;
+		uint64_t rdimm_ena:1;
+		uint64_t r2r_slot:1;
+		uint64_t inorder_mwf:1;
+		uint64_t inorder_mrf:1;
+		uint64_t dreset:1;
+		uint64_t mode32b:1;
+		uint64_t fprch2:1;
+		uint64_t bprch:1;
+		uint64_t sil_lat:2;
+		uint64_t tskw:2;
+		uint64_t qs_dic:2;
+		uint64_t dic:2;
+	} cn52xx;
+	struct cvmx_lmcx_ctl_cn52xx cn52xxp1;
+	struct cvmx_lmcx_ctl_cn52xx cn56xx;
+	struct cvmx_lmcx_ctl_cn52xx cn56xxp1;
+	struct cvmx_lmcx_ctl_cn58xx {
+		uint64_t reserved_32_63:32;
+		uint64_t ddr__nctl:4;
+		uint64_t ddr__pctl:4;
+		uint64_t slow_scf:1;
+		uint64_t xor_bank:1;
+		uint64_t max_write_batch:4;
+		uint64_t reserved_16_17:2;
+		uint64_t rdimm_ena:1;
+		uint64_t r2r_slot:1;
+		uint64_t inorder_mwf:1;
+		uint64_t inorder_mrf:1;
+		uint64_t dreset:1;
+		uint64_t mode128b:1;
+		uint64_t fprch2:1;
+		uint64_t bprch:1;
+		uint64_t sil_lat:2;
+		uint64_t tskw:2;
+		uint64_t qs_dic:2;
+		uint64_t dic:2;
+	} cn58xx;
+	struct cvmx_lmcx_ctl_cn58xx cn58xxp1;
+};
+
+/**
+ * cvmx_lmc#_ctl1
+ *
+ * LMC_CTL1 = LMC Control1
+ * This register is an assortment of various control fields needed by the
+ * memory controller
+ */
+union cvmx_lmcx_ctl1 {
+	u64 u64;
+	struct cvmx_lmcx_ctl1_s {
+		uint64_t reserved_21_63:43;
+		uint64_t ecc_adr:1;
+		uint64_t forcewrite:4;
+		uint64_t idlepower:3;
+		uint64_t sequence:3;
+		uint64_t sil_mode:1;
+		uint64_t dcc_enable:1;
+		uint64_t reserved_2_7:6;
+		uint64_t data_layout:2;
+	} s;
+	struct cvmx_lmcx_ctl1_cn30xx {
+		uint64_t reserved_2_63:62;
+		uint64_t data_layout:2;
+	} cn30xx;
+	struct cvmx_lmcx_ctl1_cn50xx {
+		uint64_t reserved_10_63:54;
+		uint64_t sil_mode:1;
+		uint64_t dcc_enable:1;
+		uint64_t reserved_2_7:6;
+		uint64_t data_layout:2;
+	} cn50xx;
+	struct cvmx_lmcx_ctl1_cn52xx {
+		uint64_t reserved_21_63:43;
+		uint64_t ecc_adr:1;
+		uint64_t forcewrite:4;
+		uint64_t idlepower:3;
+		uint64_t sequence:3;
+		uint64_t sil_mode:1;
+		uint64_t dcc_enable:1;
+		uint64_t reserved_0_7:8;
+	} cn52xx;
+	struct cvmx_lmcx_ctl1_cn52xx cn52xxp1;
+	struct cvmx_lmcx_ctl1_cn52xx cn56xx;
+	struct cvmx_lmcx_ctl1_cn52xx cn56xxp1;
+	struct cvmx_lmcx_ctl1_cn58xx {
+		uint64_t reserved_10_63:54;
+		uint64_t sil_mode:1;
+		uint64_t dcc_enable:1;
+		uint64_t reserved_0_7:8;
+	} cn58xx;
+	struct cvmx_lmcx_ctl1_cn58xx cn58xxp1;
+};
+
+/**
+ * cvmx_lmc#_dbtrain_ctl
+ *
+ * Reserved.
+ *
+ */
+union cvmx_lmcx_dbtrain_ctl {
+	u64 u64;
+	struct cvmx_lmcx_dbtrain_ctl_s {
+		uint64_t reserved_63_63:1;
+		uint64_t lfsr_pattern_sel:1;
+		uint64_t cmd_count_ext:2;
+		uint64_t db_output_impedance:3;
+		uint64_t db_sel:1;
+		uint64_t tccd_sel:1;
+		uint64_t rw_train:1;
+		uint64_t read_dq_count:7;
+		uint64_t read_cmd_count:5;
+		uint64_t write_ena:1;
+		uint64_t activate:1;
+		uint64_t prank:2;
+		uint64_t lrank:3;
+		uint64_t row_a:18;
+		uint64_t bg:2;
+		uint64_t ba:2;
+		uint64_t column_a:13;
+	} s;
+	struct cvmx_lmcx_dbtrain_ctl_cn73xx {
+		uint64_t reserved_60_63:4;
+		uint64_t db_output_impedance:3;
+		uint64_t db_sel:1;
+		uint64_t tccd_sel:1;
+		uint64_t rw_train:1;
+		uint64_t read_dq_count:7;
+		uint64_t read_cmd_count:5;
+		uint64_t write_ena:1;
+		uint64_t activate:1;
+		uint64_t prank:2;
+		uint64_t lrank:3;
+		uint64_t row_a:18;
+		uint64_t bg:2;
+		uint64_t ba:2;
+		uint64_t column_a:13;
+	} cn73xx;
+	struct cvmx_lmcx_dbtrain_ctl_s cn78xx;
+	struct cvmx_lmcx_dbtrain_ctl_cnf75xx {
+		uint64_t reserved_62_63:2;
+		uint64_t cmd_count_ext:2;
+		uint64_t db_output_impedance:3;
+		uint64_t db_sel:1;
+		uint64_t tccd_sel:1;
+		uint64_t rw_train:1;
+		uint64_t read_dq_count:7;
+		uint64_t read_cmd_count:5;
+		uint64_t write_ena:1;
+		uint64_t activate:1;
+		uint64_t prank:2;
+		uint64_t lrank:3;
+		uint64_t row_a:18;
+		uint64_t bg:2;
+		uint64_t ba:2;
+		uint64_t column_a:13;
+	} cnf75xx;
+};
+
+/**
+ * cvmx_lmc#_dclk_cnt
+ *
+ * LMC_DCLK_CNT  = Performance Counters
+ *
+ */
+union cvmx_lmcx_dclk_cnt {
+	u64 u64;
+	struct cvmx_lmcx_dclk_cnt_s {
+		uint64_t dclkcnt:64;
+	} s;
+	struct cvmx_lmcx_dclk_cnt_s cn61xx;
+	struct cvmx_lmcx_dclk_cnt_s cn63xx;
+	struct cvmx_lmcx_dclk_cnt_s cn63xxp1;
+	struct cvmx_lmcx_dclk_cnt_s cn66xx;
+	struct cvmx_lmcx_dclk_cnt_s cn68xx;
+	struct cvmx_lmcx_dclk_cnt_s cn68xxp1;
+	struct cvmx_lmcx_dclk_cnt_s cn70xx;
+	struct cvmx_lmcx_dclk_cnt_s cn70xxp1;
+	struct cvmx_lmcx_dclk_cnt_s cn73xx;
+	struct cvmx_lmcx_dclk_cnt_s cn78xx;
+	struct cvmx_lmcx_dclk_cnt_s cn78xxp1;
+	struct cvmx_lmcx_dclk_cnt_s cnf71xx;
+	struct cvmx_lmcx_dclk_cnt_s cnf75xx;
+};
+
+/**
+ * cvmx_lmc#_dclk_cnt_hi
+ *
+ * LMC_DCLK_CNT_HI  = Performance Counters
+ *
+ */
+union cvmx_lmcx_dclk_cnt_hi {
+	u64 u64;
+	struct cvmx_lmcx_dclk_cnt_hi_s {
+		uint64_t reserved_32_63:32;
+		uint64_t dclkcnt_hi:32;
+	} s;
+	struct cvmx_lmcx_dclk_cnt_hi_s cn30xx;
+	struct cvmx_lmcx_dclk_cnt_hi_s cn31xx;
+	struct cvmx_lmcx_dclk_cnt_hi_s cn38xx;
+	struct cvmx_lmcx_dclk_cnt_hi_s cn38xxp2;
+	struct cvmx_lmcx_dclk_cnt_hi_s cn50xx;
+	struct cvmx_lmcx_dclk_cnt_hi_s cn52xx;
+	struct cvmx_lmcx_dclk_cnt_hi_s cn52xxp1;
+	struct cvmx_lmcx_dclk_cnt_hi_s cn56xx;
+	struct cvmx_lmcx_dclk_cnt_hi_s cn56xxp1;
+	struct cvmx_lmcx_dclk_cnt_hi_s cn58xx;
+	struct cvmx_lmcx_dclk_cnt_hi_s cn58xxp1;
+};
+
+/**
+ * cvmx_lmc#_dclk_cnt_lo
+ *
+ * LMC_DCLK_CNT_LO  = Performance Counters
+ *
+ */
+union cvmx_lmcx_dclk_cnt_lo {
+	u64 u64;
+	struct cvmx_lmcx_dclk_cnt_lo_s {
+		uint64_t reserved_32_63:32;
+		uint64_t dclkcnt_lo:32;
+	} s;
+	struct cvmx_lmcx_dclk_cnt_lo_s cn30xx;
+	struct cvmx_lmcx_dclk_cnt_lo_s cn31xx;
+	struct cvmx_lmcx_dclk_cnt_lo_s cn38xx;
+	struct cvmx_lmcx_dclk_cnt_lo_s cn38xxp2;
+	struct cvmx_lmcx_dclk_cnt_lo_s cn50xx;
+	struct cvmx_lmcx_dclk_cnt_lo_s cn52xx;
+	struct cvmx_lmcx_dclk_cnt_lo_s cn52xxp1;
+	struct cvmx_lmcx_dclk_cnt_lo_s cn56xx;
+	struct cvmx_lmcx_dclk_cnt_lo_s cn56xxp1;
+	struct cvmx_lmcx_dclk_cnt_lo_s cn58xx;
+	struct cvmx_lmcx_dclk_cnt_lo_s cn58xxp1;
+};
+
+/**
+ * cvmx_lmc#_dclk_ctl
+ *
+ * LMC_DCLK_CTL = LMC DCLK generation control
+ *
+ *
+ * Notes:
+ * This CSR is only relevant for LMC1. LMC0_DCLK_CTL is not used.
+ *
+ */
+union cvmx_lmcx_dclk_ctl {
+	u64 u64;
+	struct cvmx_lmcx_dclk_ctl_s {
+		uint64_t reserved_8_63:56;
+		uint64_t off90_ena:1;
+		uint64_t dclk90_byp:1;
+		uint64_t dclk90_ld:1;
+		uint64_t dclk90_vlu:5;
+	} s;
+	struct cvmx_lmcx_dclk_ctl_s cn56xx;
+	struct cvmx_lmcx_dclk_ctl_s cn56xxp1;
+};
+
+/**
+ * cvmx_lmc#_ddr2_ctl
+ *
+ * LMC_DDR2_CTL = LMC DDR2 & DLL Control Register
+ *
+ */
+union cvmx_lmcx_ddr2_ctl {
+	u64 u64;
+	struct cvmx_lmcx_ddr2_ctl_s {
+		uint64_t reserved_32_63:32;
+		uint64_t bank8:1;
+		uint64_t burst8:1;
+		uint64_t addlat:3;
+		uint64_t pocas:1;
+		uint64_t bwcnt:1;
+		uint64_t twr:3;
+		uint64_t silo_hc:1;
+		uint64_t ddr_eof:4;
+		uint64_t tfaw:5;
+		uint64_t crip_mode:1;
+		uint64_t ddr2t:1;
+		uint64_t odt_ena:1;
+		uint64_t qdll_ena:1;
+		uint64_t dll90_vlu:5;
+		uint64_t dll90_byp:1;
+		uint64_t rdqs:1;
+		uint64_t ddr2:1;
+	} s;
+	struct cvmx_lmcx_ddr2_ctl_cn30xx {
+		uint64_t reserved_32_63:32;
+		uint64_t bank8:1;
+		uint64_t burst8:1;
+		uint64_t addlat:3;
+		uint64_t pocas:1;
+		uint64_t bwcnt:1;
+		uint64_t twr:3;
+		uint64_t silo_hc:1;
+		uint64_t ddr_eof:4;
+		uint64_t tfaw:5;
+		uint64_t crip_mode:1;
+		uint64_t ddr2t:1;
+		uint64_t odt_ena:1;
+		uint64_t qdll_ena:1;
+		uint64_t dll90_vlu:5;
+		uint64_t dll90_byp:1;
+		uint64_t reserved_1_1:1;
+		uint64_t ddr2:1;
+	} cn30xx;
+	struct cvmx_lmcx_ddr2_ctl_cn30xx cn31xx;
+	struct cvmx_lmcx_ddr2_ctl_s cn38xx;
+	struct cvmx_lmcx_ddr2_ctl_s cn38xxp2;
+	struct cvmx_lmcx_ddr2_ctl_s cn50xx;
+	struct cvmx_lmcx_ddr2_ctl_s cn52xx;
+	struct cvmx_lmcx_ddr2_ctl_s cn52xxp1;
+	struct cvmx_lmcx_ddr2_ctl_s cn56xx;
+	struct cvmx_lmcx_ddr2_ctl_s cn56xxp1;
+	struct cvmx_lmcx_ddr2_ctl_s cn58xx;
+	struct cvmx_lmcx_ddr2_ctl_s cn58xxp1;
+};
+
+/**
+ * cvmx_lmc#_ddr4_dimm_ctl
+ *
+ * Bits 0-21 of this register are used only when LMC()_CONTROL[RDIMM_ENA] = 1.
+ *
+ * During an RCW initialization sequence, bits 0-21 control LMC's write
+ * operations to the extended DDR4 control words in the JEDEC standard
+ * registering clock driver on an RDIMM.
+ */
+union cvmx_lmcx_ddr4_dimm_ctl {
+	u64 u64;
+	struct cvmx_lmcx_ddr4_dimm_ctl_s {
+		uint64_t reserved_28_63:36;
+		uint64_t rank_timing_enable:1;
+		uint64_t bodt_trans_mode:1;
+		uint64_t trans_mode_ena:1;
+		uint64_t read_preamble_mode:1;
+		uint64_t buff_config_da3:1;
+		uint64_t mpr_over_ena:1;
+		uint64_t ddr4_dimm1_wmask:11;
+		uint64_t ddr4_dimm0_wmask:11;
+	} s;
+	struct cvmx_lmcx_ddr4_dimm_ctl_cn70xx {
+		uint64_t reserved_22_63:42;
+		uint64_t ddr4_dimm1_wmask:11;
+		uint64_t ddr4_dimm0_wmask:11;
+	} cn70xx;
+	struct cvmx_lmcx_ddr4_dimm_ctl_cn70xx cn70xxp1;
+	struct cvmx_lmcx_ddr4_dimm_ctl_s cn73xx;
+	struct cvmx_lmcx_ddr4_dimm_ctl_s cn78xx;
+	struct cvmx_lmcx_ddr4_dimm_ctl_s cn78xxp1;
+	struct cvmx_lmcx_ddr4_dimm_ctl_s cnf75xx;
+};
+
+/**
+ * cvmx_lmc#_ddr_pll_ctl
+ *
+ * This register controls the DDR_CK frequency. For details, refer to CK
+ * speed programming. See LMC initialization sequence for the initialization
+ * sequence.
+ * DDR PLL bringup sequence:
+ *
+ * 1. Write [CLKF], [CLKR], [DDR_PS_EN].
+ *
+ * 2. Wait 128 ref clock cycles (7680 core-clock cycles).
+ *
+ * 3. Write 1 to [RESET_N].
+ *
+ * 4. Wait 1152 ref clocks (1152*16 core-clock cycles).
+ *
+ * 5. Write 0 to [DDR_DIV_RESET].
+ *
+ * 6. Wait 10 ref clock cycles (160 core-clock cycles) before bringing up
+ * the DDR interface.
+ */
+union cvmx_lmcx_ddr_pll_ctl {
+	u64 u64;
+	struct cvmx_lmcx_ddr_pll_ctl_s {
+		uint64_t reserved_45_63:19;
+		uint64_t dclk_alt_refclk_sel:1;
+		uint64_t bwadj:12;
+		uint64_t dclk_invert:1;
+		uint64_t phy_dcok:1;
+		uint64_t ddr4_mode:1;
+		uint64_t pll_fbslip:1;
+		uint64_t pll_lock:1;
+		uint64_t reserved_18_26:9;
+		uint64_t diffamp:4;
+		uint64_t cps:3;
+		uint64_t reserved_8_10:3;
+		uint64_t reset_n:1;
+		uint64_t clkf:7;
+	} s;
+	struct cvmx_lmcx_ddr_pll_ctl_cn61xx {
+		uint64_t reserved_27_63:37;
+		uint64_t jtg_test_mode:1;
+		uint64_t dfm_div_reset:1;
+		uint64_t dfm_ps_en:3;
+		uint64_t ddr_div_reset:1;
+		uint64_t ddr_ps_en:3;
+		uint64_t diffamp:4;
+		uint64_t cps:3;
+		uint64_t cpb:3;
+		uint64_t reset_n:1;
+		uint64_t clkf:7;
+	} cn61xx;
+	struct cvmx_lmcx_ddr_pll_ctl_cn61xx cn63xx;
+	struct cvmx_lmcx_ddr_pll_ctl_cn61xx cn63xxp1;
+	struct cvmx_lmcx_ddr_pll_ctl_cn61xx cn66xx;
+	struct cvmx_lmcx_ddr_pll_ctl_cn61xx cn68xx;
+	struct cvmx_lmcx_ddr_pll_ctl_cn61xx cn68xxp1;
+	struct cvmx_lmcx_ddr_pll_ctl_cn70xx {
+		uint64_t reserved_31_63:33;
+		uint64_t phy_dcok:1;
+		uint64_t ddr4_mode:1;
+		uint64_t pll_fbslip:1;
+		uint64_t pll_lock:1;
+		uint64_t pll_rfslip:1;
+		uint64_t clkr:2;
+		uint64_t jtg_test_mode:1;
+		uint64_t ddr_div_reset:1;
+		uint64_t ddr_ps_en:4;
+		uint64_t reserved_8_17:10;
+		uint64_t reset_n:1;
+		uint64_t clkf:7;
+	} cn70xx;
+	struct cvmx_lmcx_ddr_pll_ctl_cn70xx cn70xxp1;
+	struct cvmx_lmcx_ddr_pll_ctl_cn73xx {
+		uint64_t reserved_45_63:19;
+		uint64_t dclk_alt_refclk_sel:1;
+		uint64_t bwadj:12;
+		uint64_t dclk_invert:1;
+		uint64_t phy_dcok:1;
+		uint64_t ddr4_mode:1;
+		uint64_t pll_fbslip:1;
+		uint64_t pll_lock:1;
+		uint64_t pll_rfslip:1;
+		uint64_t clkr:2;
+		uint64_t jtg_test_mode:1;
+		uint64_t ddr_div_reset:1;
+		uint64_t ddr_ps_en:4;
+		uint64_t reserved_9_17:9;
+		uint64_t clkf_ext:1;
+		uint64_t reset_n:1;
+		uint64_t clkf:7;
+	} cn73xx;
+	struct cvmx_lmcx_ddr_pll_ctl_cn73xx cn78xx;
+	struct cvmx_lmcx_ddr_pll_ctl_cn73xx cn78xxp1;
+	struct cvmx_lmcx_ddr_pll_ctl_cn61xx cnf71xx;
+	struct cvmx_lmcx_ddr_pll_ctl_cn73xx cnf75xx;
+};
+
+/**
+ * cvmx_lmc#_delay_cfg
+ *
+ * LMC_DELAY_CFG = Open-loop delay line settings
+ *
+ *
+ * Notes:
+ * The DQ bits add OUTGOING delay only to dq, dqs_[p,n], cb, cbs_[p,n], dqm.
+ * Delay is approximately 50-80ps per setting depending on process/voltage.
+ * There is no need to add incoming delay since by default all strobe bits
+ * are delayed internally by 90 degrees (as was always the case in previous
+ * passes and past chips.
+ *
+ * The CMD add delay to all command bits DDR_RAS, DDR_CAS, DDR_A<15:0>,
+ * DDR_BA<2:0>, DDR_n_CS<1:0>_L, DDR_WE, DDR_CKE and DDR_ODT_<7:0>.
+ * Again, delay is 50-80ps per tap.
+ *
+ * The CLK bits add delay to all clock signals DDR_CK_<5:0>_P and
+ * DDR_CK_<5:0>_N.  Again, delay is 50-80ps per tap.
+ *
+ * The usage scenario is the following: There is too much delay on command
+ * signals and setup on command is not met. The user can then delay the
+ * clock until setup is met.
+ *
+ * At the same time though, dq/dqs should be delayed because there is also
+ * a DDR spec tying dqs with clock. If clock is too much delayed with
+ * respect to dqs, writes will start to fail.
+ *
+ * This scheme should eliminate the board need of adding routing delay to
+ * clock signals to make high frequencies work.
+ */
+union cvmx_lmcx_delay_cfg {
+	u64 u64;
+	struct cvmx_lmcx_delay_cfg_s {
+		uint64_t reserved_15_63:49;
+		uint64_t dq:5;
+		uint64_t cmd:5;
+		uint64_t clk:5;
+	} s;
+	struct cvmx_lmcx_delay_cfg_s cn30xx;
+	struct cvmx_lmcx_delay_cfg_cn38xx {
+		uint64_t reserved_14_63:50;
+		uint64_t dq:4;
+		uint64_t reserved_9_9:1;
+		uint64_t cmd:4;
+		uint64_t reserved_4_4:1;
+		uint64_t clk:4;
+	} cn38xx;
+	struct cvmx_lmcx_delay_cfg_cn38xx cn50xx;
+	struct cvmx_lmcx_delay_cfg_cn38xx cn52xx;
+	struct cvmx_lmcx_delay_cfg_cn38xx cn52xxp1;
+	struct cvmx_lmcx_delay_cfg_cn38xx cn56xx;
+	struct cvmx_lmcx_delay_cfg_cn38xx cn56xxp1;
+	struct cvmx_lmcx_delay_cfg_cn38xx cn58xx;
+	struct cvmx_lmcx_delay_cfg_cn38xx cn58xxp1;
+};
+
+/**
+ * cvmx_lmc#_dimm#_ddr4_params0
+ *
+ * This register contains values to be programmed into the extra DDR4 control
+ * words in the corresponding (registered) DIMM. These are control words
+ * RC1x through RC8x.
+ */
+union cvmx_lmcx_dimmx_ddr4_params0 {
+	u64 u64;
+	struct cvmx_lmcx_dimmx_ddr4_params0_s {
+		uint64_t rc8x:8;
+		uint64_t rc7x:8;
+		uint64_t rc6x:8;
+		uint64_t rc5x:8;
+		uint64_t rc4x:8;
+		uint64_t rc3x:8;
+		uint64_t rc2x:8;
+		uint64_t rc1x:8;
+	} s;
+	struct cvmx_lmcx_dimmx_ddr4_params0_s cn70xx;
+	struct cvmx_lmcx_dimmx_ddr4_params0_s cn70xxp1;
+	struct cvmx_lmcx_dimmx_ddr4_params0_s cn73xx;
+	struct cvmx_lmcx_dimmx_ddr4_params0_s cn78xx;
+	struct cvmx_lmcx_dimmx_ddr4_params0_s cn78xxp1;
+	struct cvmx_lmcx_dimmx_ddr4_params0_s cnf75xx;
+};
+
+/**
+ * cvmx_lmc#_dimm#_ddr4_params1
+ *
+ * This register contains values to be programmed into the extra DDR4 control
+ * words in the corresponding (registered) DIMM. These are control words
+ * RC9x through RCBx.
+ */
+union cvmx_lmcx_dimmx_ddr4_params1 {
+	u64 u64;
+	struct cvmx_lmcx_dimmx_ddr4_params1_s {
+		uint64_t reserved_24_63:40;
+		uint64_t rcbx:8;
+		uint64_t rcax:8;
+		uint64_t rc9x:8;
+	} s;
+	struct cvmx_lmcx_dimmx_ddr4_params1_s cn70xx;
+	struct cvmx_lmcx_dimmx_ddr4_params1_s cn70xxp1;
+	struct cvmx_lmcx_dimmx_ddr4_params1_s cn73xx;
+	struct cvmx_lmcx_dimmx_ddr4_params1_s cn78xx;
+	struct cvmx_lmcx_dimmx_ddr4_params1_s cn78xxp1;
+	struct cvmx_lmcx_dimmx_ddr4_params1_s cnf75xx;
+};
+
+/**
+ * cvmx_lmc#_dimm#_params
+ *
+ * This register contains values to be programmed into each control word in
+ * the corresponding (registered) DIMM. The control words allow optimization
+ * of the device properties for different raw card designs. Note that LMC
+ * only uses this CSR when LMC()_CONTROL[RDIMM_ENA]=1. During a power-up/init
+ * sequence, LMC writes these fields into the control words in the JEDEC
+ * standard DDR3 SSTE32882 registering clock driver or DDR4 Register
+ * DDR4RCD01 on an RDIMM when corresponding LMC()_DIMM_CTL[DIMM*_WMASK]
+ * bits are set.
+ */
+union cvmx_lmcx_dimmx_params {
+	u64 u64;
+	struct cvmx_lmcx_dimmx_params_s {
+		uint64_t rc15:4;
+		uint64_t rc14:4;
+		uint64_t rc13:4;
+		uint64_t rc12:4;
+		uint64_t rc11:4;
+		uint64_t rc10:4;
+		uint64_t rc9:4;
+		uint64_t rc8:4;
+		uint64_t rc7:4;
+		uint64_t rc6:4;
+		uint64_t rc5:4;
+		uint64_t rc4:4;
+		uint64_t rc3:4;
+		uint64_t rc2:4;
+		uint64_t rc1:4;
+		uint64_t rc0:4;
+	} s;
+	struct cvmx_lmcx_dimmx_params_s cn61xx;
+	struct cvmx_lmcx_dimmx_params_s cn63xx;
+	struct cvmx_lmcx_dimmx_params_s cn63xxp1;
+	struct cvmx_lmcx_dimmx_params_s cn66xx;
+	struct cvmx_lmcx_dimmx_params_s cn68xx;
+	struct cvmx_lmcx_dimmx_params_s cn68xxp1;
+	struct cvmx_lmcx_dimmx_params_s cn70xx;
+	struct cvmx_lmcx_dimmx_params_s cn70xxp1;
+	struct cvmx_lmcx_dimmx_params_s cn73xx;
+	struct cvmx_lmcx_dimmx_params_s cn78xx;
+	struct cvmx_lmcx_dimmx_params_s cn78xxp1;
+	struct cvmx_lmcx_dimmx_params_s cnf71xx;
+	struct cvmx_lmcx_dimmx_params_s cnf75xx;
+};
+
+/**
+ * cvmx_lmc#_dimm_ctl
+ *
+ * Note that this CSR is only used when LMC()_CONTROL[RDIMM_ENA] = 1. During
+ * a power-up/init sequence, this CSR controls LMC's write operations to the
+ * control words in the JEDEC standard DDR3 SSTE32882 registering clock
+ * driver or DDR4 Register DDR4RCD01 on an RDIMM.
+ */
+union cvmx_lmcx_dimm_ctl {
+	u64 u64;
+	struct cvmx_lmcx_dimm_ctl_s {
+		uint64_t reserved_46_63:18;
+		uint64_t parity:1;
+		uint64_t tcws:13;
+		uint64_t dimm1_wmask:16;
+		uint64_t dimm0_wmask:16;
+	} s;
+	struct cvmx_lmcx_dimm_ctl_s cn61xx;
+	struct cvmx_lmcx_dimm_ctl_s cn63xx;
+	struct cvmx_lmcx_dimm_ctl_s cn63xxp1;
+	struct cvmx_lmcx_dimm_ctl_s cn66xx;
+	struct cvmx_lmcx_dimm_ctl_s cn68xx;
+	struct cvmx_lmcx_dimm_ctl_s cn68xxp1;
+	struct cvmx_lmcx_dimm_ctl_s cn70xx;
+	struct cvmx_lmcx_dimm_ctl_s cn70xxp1;
+	struct cvmx_lmcx_dimm_ctl_s cn73xx;
+	struct cvmx_lmcx_dimm_ctl_s cn78xx;
+	struct cvmx_lmcx_dimm_ctl_s cn78xxp1;
+	struct cvmx_lmcx_dimm_ctl_s cnf71xx;
+	struct cvmx_lmcx_dimm_ctl_s cnf75xx;
+};
+
+/**
+ * cvmx_lmc#_dll_ctl
+ *
+ * LMC_DLL_CTL = LMC DLL control and DCLK reset
+ *
+ */
+union cvmx_lmcx_dll_ctl {
+	u64 u64;
+	struct cvmx_lmcx_dll_ctl_s {
+		uint64_t reserved_8_63:56;
+		uint64_t dreset:1;
+		uint64_t dll90_byp:1;
+		uint64_t dll90_ena:1;
+		uint64_t dll90_vlu:5;
+	} s;
+	struct cvmx_lmcx_dll_ctl_s cn52xx;
+	struct cvmx_lmcx_dll_ctl_s cn52xxp1;
+	struct cvmx_lmcx_dll_ctl_s cn56xx;
+	struct cvmx_lmcx_dll_ctl_s cn56xxp1;
+};
+
+/**
+ * cvmx_lmc#_dll_ctl2
+ *
+ * See LMC initialization sequence for the initialization sequence.
+ *
+ */
+union cvmx_lmcx_dll_ctl2 {
+	u64 u64;
+	struct cvmx_lmcx_dll_ctl2_s {
+		uint64_t reserved_0_63:64;
+	} s;
+	struct cvmx_lmcx_dll_ctl2_cn61xx {
+		uint64_t reserved_16_63:48;
+		uint64_t intf_en:1;
+		uint64_t dll_bringup:1;
+		uint64_t dreset:1;
+		uint64_t quad_dll_ena:1;
+		uint64_t byp_sel:4;
+		uint64_t byp_setting:8;
+	} cn61xx;
+	struct cvmx_lmcx_dll_ctl2_cn63xx {
+		uint64_t reserved_15_63:49;
+		uint64_t dll_bringup:1;
+		uint64_t dreset:1;
+		uint64_t quad_dll_ena:1;
+		uint64_t byp_sel:4;
+		uint64_t byp_setting:8;
+	} cn63xx;
+	struct cvmx_lmcx_dll_ctl2_cn63xx cn63xxp1;
+	struct cvmx_lmcx_dll_ctl2_cn63xx cn66xx;
+	struct cvmx_lmcx_dll_ctl2_cn61xx cn68xx;
+	struct cvmx_lmcx_dll_ctl2_cn61xx cn68xxp1;
+	struct cvmx_lmcx_dll_ctl2_cn70xx {
+		uint64_t reserved_17_63:47;
+		uint64_t intf_en:1;
+		uint64_t dll_bringup:1;
+		uint64_t dreset:1;
+		uint64_t quad_dll_ena:1;
+		uint64_t byp_sel:4;
+		uint64_t byp_setting:9;
+	} cn70xx;
+	struct cvmx_lmcx_dll_ctl2_cn70xx cn70xxp1;
+	struct cvmx_lmcx_dll_ctl2_cn70xx cn73xx;
+	struct cvmx_lmcx_dll_ctl2_cn70xx cn78xx;
+	struct cvmx_lmcx_dll_ctl2_cn70xx cn78xxp1;
+	struct cvmx_lmcx_dll_ctl2_cn61xx cnf71xx;
+	struct cvmx_lmcx_dll_ctl2_cn70xx cnf75xx;
+};
+
+/**
+ * cvmx_lmc#_dll_ctl3
+ *
+ * LMC_DLL_CTL3 = LMC DLL control and DCLK reset
+ *
+ */
+union cvmx_lmcx_dll_ctl3 {
+	u64 u64;
+	struct cvmx_lmcx_dll_ctl3_s {
+		uint64_t reserved_50_63:14;
+		uint64_t wr_deskew_ena:1;
+		uint64_t wr_deskew_ld:1;
+		uint64_t bit_select:4;
+		uint64_t reserved_0_43:44;
+	} s;
+	struct cvmx_lmcx_dll_ctl3_cn61xx {
+		uint64_t reserved_41_63:23;
+		uint64_t dclk90_fwd:1;
+		uint64_t ddr_90_dly_byp:1;
+		uint64_t dclk90_recal_dis:1;
+		uint64_t dclk90_byp_sel:1;
+		uint64_t dclk90_byp_setting:8;
+		uint64_t dll_fast:1;
+		uint64_t dll90_setting:8;
+		uint64_t fine_tune_mode:1;
+		uint64_t dll_mode:1;
+		uint64_t dll90_byte_sel:4;
+		uint64_t offset_ena:1;
+		uint64_t load_offset:1;
+		uint64_t mode_sel:2;
+		uint64_t byte_sel:4;
+		uint64_t offset:6;
+	} cn61xx;
+	struct cvmx_lmcx_dll_ctl3_cn63xx {
+		uint64_t reserved_29_63:35;
+		uint64_t dll_fast:1;
+		uint64_t dll90_setting:8;
+		uint64_t fine_tune_mode:1;
+		uint64_t dll_mode:1;
+		uint64_t dll90_byte_sel:4;
+		uint64_t offset_ena:1;
+		uint64_t load_offset:1;
+		uint64_t mode_sel:2;
+		uint64_t byte_sel:4;
+		uint64_t offset:6;
+	} cn63xx;
+	struct cvmx_lmcx_dll_ctl3_cn63xx cn63xxp1;
+	struct cvmx_lmcx_dll_ctl3_cn63xx cn66xx;
+	struct cvmx_lmcx_dll_ctl3_cn61xx cn68xx;
+	struct cvmx_lmcx_dll_ctl3_cn61xx cn68xxp1;
+	struct cvmx_lmcx_dll_ctl3_cn70xx {
+		uint64_t reserved_44_63:20;
+		uint64_t dclk90_fwd:1;
+		uint64_t ddr_90_dly_byp:1;
+		uint64_t dclk90_recal_dis:1;
+		uint64_t dclk90_byp_sel:1;
+		uint64_t dclk90_byp_setting:9;
+		uint64_t dll_fast:1;
+		uint64_t dll90_setting:9;
+		uint64_t fine_tune_mode:1;
+		uint64_t dll_mode:1;
+		uint64_t dll90_byte_sel:4;
+		uint64_t offset_ena:1;
+		uint64_t load_offset:1;
+		uint64_t mode_sel:2;
+		uint64_t byte_sel:4;
+		uint64_t offset:7;
+	} cn70xx;
+	struct cvmx_lmcx_dll_ctl3_cn70xx cn70xxp1;
+	struct cvmx_lmcx_dll_ctl3_cn73xx {
+		uint64_t reserved_50_63:14;
+		uint64_t wr_deskew_ena:1;
+		uint64_t wr_deskew_ld:1;
+		uint64_t bit_select:4;
+		uint64_t dclk90_fwd:1;
+		uint64_t ddr_90_dly_byp:1;
+		uint64_t dclk90_recal_dis:1;
+		uint64_t dclk90_byp_sel:1;
+		uint64_t dclk90_byp_setting:9;
+		uint64_t dll_fast:1;
+		uint64_t dll90_setting:9;
+		uint64_t fine_tune_mode:1;
+		uint64_t dll_mode:1;
+		uint64_t dll90_byte_sel:4;
+		uint64_t offset_ena:1;
+		uint64_t load_offset:1;
+		uint64_t mode_sel:2;
+		uint64_t byte_sel:4;
+		uint64_t offset:7;
+	} cn73xx;
+	struct cvmx_lmcx_dll_ctl3_cn73xx cn78xx;
+	struct cvmx_lmcx_dll_ctl3_cn73xx cn78xxp1;
+	struct cvmx_lmcx_dll_ctl3_cn61xx cnf71xx;
+	struct cvmx_lmcx_dll_ctl3_cn73xx cnf75xx;
+};
+
+/**
+ * cvmx_lmc#_dual_memcfg
+ *
+ * This register controls certain parameters of dual-memory configuration.
+ *
+ * This register enables the design to have two separate memory
+ * configurations, selected dynamically by the reference address. Note
+ * however, that both configurations share LMC()_CONTROL[XOR_BANK],
+ * LMC()_CONFIG [PBANK_LSB], LMC()_CONFIG[RANK_ENA], and all timing parameters.
+ *
+ * In this description:
+ * * config0 refers to the normal memory configuration that is defined by the
+ * LMC()_CONFIG[ROW_LSB] parameter
+ * * config1 refers to the dual (or second) memory configuration that is
+ * defined by this register.
+ */
+union cvmx_lmcx_dual_memcfg {
+	u64 u64;
+	struct cvmx_lmcx_dual_memcfg_s {
+		uint64_t reserved_20_63:44;
+		uint64_t bank8:1;
+		uint64_t row_lsb:3;
+		uint64_t reserved_8_15:8;
+		uint64_t cs_mask:8;
+	} s;
+	struct cvmx_lmcx_dual_memcfg_s cn50xx;
+	struct cvmx_lmcx_dual_memcfg_s cn52xx;
+	struct cvmx_lmcx_dual_memcfg_s cn52xxp1;
+	struct cvmx_lmcx_dual_memcfg_s cn56xx;
+	struct cvmx_lmcx_dual_memcfg_s cn56xxp1;
+	struct cvmx_lmcx_dual_memcfg_s cn58xx;
+	struct cvmx_lmcx_dual_memcfg_s cn58xxp1;
+	struct cvmx_lmcx_dual_memcfg_cn61xx {
+		uint64_t reserved_19_63:45;
+		uint64_t row_lsb:3;
+		uint64_t reserved_8_15:8;
+		uint64_t cs_mask:8;
+	} cn61xx;
+	struct cvmx_lmcx_dual_memcfg_cn61xx cn63xx;
+	struct cvmx_lmcx_dual_memcfg_cn61xx cn63xxp1;
+	struct cvmx_lmcx_dual_memcfg_cn61xx cn66xx;
+	struct cvmx_lmcx_dual_memcfg_cn61xx cn68xx;
+	struct cvmx_lmcx_dual_memcfg_cn61xx cn68xxp1;
+	struct cvmx_lmcx_dual_memcfg_cn70xx {
+		uint64_t reserved_19_63:45;
+		uint64_t row_lsb:3;
+		uint64_t reserved_4_15:12;
+		uint64_t cs_mask:4;
+	} cn70xx;
+	struct cvmx_lmcx_dual_memcfg_cn70xx cn70xxp1;
+	struct cvmx_lmcx_dual_memcfg_cn70xx cn73xx;
+	struct cvmx_lmcx_dual_memcfg_cn70xx cn78xx;
+	struct cvmx_lmcx_dual_memcfg_cn70xx cn78xxp1;
+	struct cvmx_lmcx_dual_memcfg_cn61xx cnf71xx;
+	struct cvmx_lmcx_dual_memcfg_cn70xx cnf75xx;
+};
+
+/**
+ * cvmx_lmc#_ecc_parity_test
+ *
+ * This register has bits to control the generation of ECC and command
+ * address parity errors. ECC error is generated by enabling
+ * [CA_PARITY_CORRUPT_ENA] and selecting any of the [ECC_CORRUPT_IDX]
+ * index of the dataword from the cacheline to be corrupted.
+ * User needs to select which bit of the 128-bit dataword to corrupt by
+ * asserting any of the CHAR_MASK0 and CHAR_MASK2 bits. (CHAR_MASK0 and
+ * CHAR_MASK2 corresponds to the lower and upper 64-bit signal that can
+ * corrupt any individual bit of the data).
+ *
+ * Command address parity error is generated by enabling
+ * [CA_PARITY_CORRUPT_ENA] and selecting the DDR command that the parity
+ * is to be corrupted with through [CA_PARITY_SEL].
+ */
+union cvmx_lmcx_ecc_parity_test {
+	u64 u64;
+	struct cvmx_lmcx_ecc_parity_test_s {
+		uint64_t reserved_12_63:52;
+		uint64_t ecc_corrupt_ena:1;
+		uint64_t ecc_corrupt_idx:3;
+		uint64_t reserved_6_7:2;
+		uint64_t ca_parity_corrupt_ena:1;
+		uint64_t ca_parity_sel:5;
+	} s;
+	struct cvmx_lmcx_ecc_parity_test_s cn73xx;
+	struct cvmx_lmcx_ecc_parity_test_s cn78xx;
+	struct cvmx_lmcx_ecc_parity_test_s cn78xxp1;
+	struct cvmx_lmcx_ecc_parity_test_s cnf75xx;
+};
+
+/**
+ * cvmx_lmc#_ecc_synd
+ *
+ * LMC_ECC_SYND = MRD ECC Syndromes
+ *
+ */
+union cvmx_lmcx_ecc_synd {
+	u64 u64;
+	struct cvmx_lmcx_ecc_synd_s {
+		uint64_t reserved_32_63:32;
+		uint64_t mrdsyn3:8;
+		uint64_t mrdsyn2:8;
+		uint64_t mrdsyn1:8;
+		uint64_t mrdsyn0:8;
+	} s;
+	struct cvmx_lmcx_ecc_synd_s cn30xx;
+	struct cvmx_lmcx_ecc_synd_s cn31xx;
+	struct cvmx_lmcx_ecc_synd_s cn38xx;
+	struct cvmx_lmcx_ecc_synd_s cn38xxp2;
+	struct cvmx_lmcx_ecc_synd_s cn50xx;
+	struct cvmx_lmcx_ecc_synd_s cn52xx;
+	struct cvmx_lmcx_ecc_synd_s cn52xxp1;
+	struct cvmx_lmcx_ecc_synd_s cn56xx;
+	struct cvmx_lmcx_ecc_synd_s cn56xxp1;
+	struct cvmx_lmcx_ecc_synd_s cn58xx;
+	struct cvmx_lmcx_ecc_synd_s cn58xxp1;
+	struct cvmx_lmcx_ecc_synd_s cn61xx;
+	struct cvmx_lmcx_ecc_synd_s cn63xx;
+	struct cvmx_lmcx_ecc_synd_s cn63xxp1;
+	struct cvmx_lmcx_ecc_synd_s cn66xx;
+	struct cvmx_lmcx_ecc_synd_s cn68xx;
+	struct cvmx_lmcx_ecc_synd_s cn68xxp1;
+	struct cvmx_lmcx_ecc_synd_s cn70xx;
+	struct cvmx_lmcx_ecc_synd_s cn70xxp1;
+	struct cvmx_lmcx_ecc_synd_s cn73xx;
+	struct cvmx_lmcx_ecc_synd_s cn78xx;
+	struct cvmx_lmcx_ecc_synd_s cn78xxp1;
+	struct cvmx_lmcx_ecc_synd_s cnf71xx;
+	struct cvmx_lmcx_ecc_synd_s cnf75xx;
+};
+
+/**
+ * cvmx_lmc#_ext_config
+ *
+ * This register has additional configuration and control bits for the LMC.
+ *
+ */
+union cvmx_lmcx_ext_config {
+	u64 u64;
+	struct cvmx_lmcx_ext_config_s {
+		uint64_t reserved_61_63:3;
+		uint64_t bc4_dqs_ena:1;
+		uint64_t ref_block:1;
+		uint64_t mrs_side:1;
+		uint64_t mrs_one_side:1;
+		uint64_t mrs_bside_invert_disable:1;
+		uint64_t dimm_sel_invert_off:1;
+		uint64_t dimm_sel_force_invert:1;
+		uint64_t coalesce_address_mode:1;
+		uint64_t dimm1_cid:2;
+		uint64_t dimm0_cid:2;
+		uint64_t rcd_parity_check:1;
+		uint64_t reserved_46_47:2;
+		uint64_t error_alert_n_sample:1;
+		uint64_t ea_int_polarity:1;
+		uint64_t reserved_43_43:1;
+		uint64_t par_addr_mask:3;
+		uint64_t reserved_38_39:2;
+		uint64_t mrs_cmd_override:1;
+		uint64_t mrs_cmd_select:1;
+		uint64_t reserved_33_35:3;
+		uint64_t invert_data:1;
+		uint64_t reserved_30_31:2;
+		uint64_t cmd_rti:1;
+		uint64_t cal_ena:1;
+		uint64_t reserved_27_27:1;
+		uint64_t par_include_a17:1;
+		uint64_t par_include_bg1:1;
+		uint64_t gen_par:1;
+		uint64_t reserved_21_23:3;
+		uint64_t vrefint_seq_deskew:1;
+		uint64_t read_ena_bprch:1;
+		uint64_t read_ena_fprch:1;
+		uint64_t slot_ctl_reset_force:1;
+		uint64_t ref_int_lsbs:9;
+		uint64_t drive_ena_bprch:1;
+		uint64_t drive_ena_fprch:1;
+		uint64_t dlcram_flip_synd:2;
+		uint64_t dlcram_cor_dis:1;
+		uint64_t dlc_nxm_rd:1;
+		uint64_t l2c_nxm_rd:1;
+		uint64_t l2c_nxm_wr:1;
+	} s;
+	struct cvmx_lmcx_ext_config_cn70xx {
+		uint64_t reserved_21_63:43;
+		uint64_t vrefint_seq_deskew:1;
+		uint64_t read_ena_bprch:1;
+		uint64_t read_ena_fprch:1;
+		uint64_t slot_ctl_reset_force:1;
+		uint64_t ref_int_lsbs:9;
+		uint64_t drive_ena_bprch:1;
+		uint64_t drive_ena_fprch:1;
+		uint64_t dlcram_flip_synd:2;
+		uint64_t dlcram_cor_dis:1;
+		uint64_t dlc_nxm_rd:1;
+		uint64_t l2c_nxm_rd:1;
+		uint64_t l2c_nxm_wr:1;
+	} cn70xx;
+	struct cvmx_lmcx_ext_config_cn70xx cn70xxp1;
+	struct cvmx_lmcx_ext_config_cn73xx {
+		uint64_t reserved_60_63:4;
+		uint64_t ref_block:1;
+		uint64_t mrs_side:1;
+		uint64_t mrs_one_side:1;
+		uint64_t mrs_bside_invert_disable:1;
+		uint64_t dimm_sel_invert_off:1;
+		uint64_t dimm_sel_force_invert:1;
+		uint64_t coalesce_address_mode:1;
+		uint64_t dimm1_cid:2;
+		uint64_t dimm0_cid:2;
+		uint64_t rcd_parity_check:1;
+		uint64_t reserved_46_47:2;
+		uint64_t error_alert_n_sample:1;
+		uint64_t ea_int_polarity:1;
+		uint64_t reserved_43_43:1;
+		uint64_t par_addr_mask:3;
+		uint64_t reserved_38_39:2;
+		uint64_t mrs_cmd_override:1;
+		uint64_t mrs_cmd_select:1;
+		uint64_t reserved_33_35:3;
+		uint64_t invert_data:1;
+		uint64_t reserved_30_31:2;
+		uint64_t cmd_rti:1;
+		uint64_t cal_ena:1;
+		uint64_t reserved_27_27:1;
+		uint64_t par_include_a17:1;
+		uint64_t par_include_bg1:1;
+		uint64_t gen_par:1;
+		uint64_t reserved_21_23:3;
+		uint64_t vrefint_seq_deskew:1;
+		uint64_t read_ena_bprch:1;
+		uint64_t read_ena_fprch:1;
+		uint64_t slot_ctl_reset_force:1;
+		uint64_t ref_int_lsbs:9;
+		uint64_t drive_ena_bprch:1;
+		uint64_t drive_ena_fprch:1;
+		uint64_t dlcram_flip_synd:2;
+		uint64_t dlcram_cor_dis:1;
+		uint64_t dlc_nxm_rd:1;
+		uint64_t l2c_nxm_rd:1;
+		uint64_t l2c_nxm_wr:1;
+	} cn73xx;
+	struct cvmx_lmcx_ext_config_s cn78xx;
+	struct cvmx_lmcx_ext_config_s cn78xxp1;
+	struct cvmx_lmcx_ext_config_cn73xx cnf75xx;
+};
+
+/**
+ * cvmx_lmc#_ext_config2
+ *
+ * This register has additional configuration and control bits for the LMC.
+ *
+ */
+union cvmx_lmcx_ext_config2 {
+	u64 u64;
+	struct cvmx_lmcx_ext_config2_s {
+		uint64_t reserved_27_63:37;
+		uint64_t sref_auto_idle_thres:5;
+		uint64_t sref_auto_enable:1;
+		uint64_t delay_unload_r3:1;
+		uint64_t delay_unload_r2:1;
+		uint64_t delay_unload_r1:1;
+		uint64_t delay_unload_r0:1;
+		uint64_t early_dqx2:1;
+		uint64_t xor_bank_sel:4;
+		uint64_t reserved_10_11:2;
+		uint64_t row_col_switch:1;
+		uint64_t trr_on:1;
+		uint64_t mac:3;
+		uint64_t macram_scrub_done:1;
+		uint64_t macram_scrub:1;
+		uint64_t macram_flip_synd:2;
+		uint64_t macram_cor_dis:1;
+	} s;
+	struct cvmx_lmcx_ext_config2_cn73xx {
+		uint64_t reserved_10_63:54;
+		uint64_t row_col_switch:1;
+		uint64_t trr_on:1;
+		uint64_t mac:3;
+		uint64_t macram_scrub_done:1;
+		uint64_t macram_scrub:1;
+		uint64_t macram_flip_synd:2;
+		uint64_t macram_cor_dis:1;
+	} cn73xx;
+	struct cvmx_lmcx_ext_config2_s cn78xx;
+	struct cvmx_lmcx_ext_config2_cnf75xx {
+		uint64_t reserved_21_63:43;
+		uint64_t delay_unload_r3:1;
+		uint64_t delay_unload_r2:1;
+		uint64_t delay_unload_r1:1;
+		uint64_t delay_unload_r0:1;
+		uint64_t early_dqx2:1;
+		uint64_t xor_bank_sel:4;
+		uint64_t reserved_10_11:2;
+		uint64_t row_col_switch:1;
+		uint64_t trr_on:1;
+		uint64_t mac:3;
+		uint64_t macram_scrub_done:1;
+		uint64_t macram_scrub:1;
+		uint64_t macram_flip_synd:2;
+		uint64_t macram_cor_dis:1;
+	} cnf75xx;
+};
+
+/**
+ * cvmx_lmc#_fadr
+ *
+ * This register only captures the first transaction with ECC errors. A DED
+ * error can over-write this register with its failing addresses if the
+ * first error was a SEC. If you write LMC()_INT -> SEC_ERR/DED_ERR, it
+ * clears the error bits and captures the next failing address. If FDIMM
+ * is 1, that means the error is in the high DIMM. LMC()_FADR captures the
+ * failing pre-scrambled address location (split into DIMM, bunk, bank, etc).
+ * If scrambling is off, then LMC()_FADR will also capture the failing
+ * physical location in the DRAM parts. LMC()_SCRAMBLED_FADR captures the
+ * actual failing address location in the physical DRAM parts, i.e.,
+ * If scrambling is on, LMC()_SCRAMBLED_FADR contains the failing physical
+ * location in the DRAM parts (split into DIMM, bunk, bank, etc.)
+ * If scrambling is off, the pre-scramble and post-scramble addresses are
+ * the same; and so the contents of LMC()_SCRAMBLED_FADR match the contents
+ * of LMC()_FADR.
+ */
+union cvmx_lmcx_fadr {
+	u64 u64;
+	struct cvmx_lmcx_fadr_s {
+		uint64_t reserved_43_63:21;
+		uint64_t fcid:3;
+		uint64_t fill_order:2;
+		uint64_t reserved_0_37:38;
+	} s;
+	struct cvmx_lmcx_fadr_cn30xx {
+		uint64_t reserved_32_63:32;
+		uint64_t fdimm:2;
+		uint64_t fbunk:1;
+		uint64_t fbank:3;
+		uint64_t frow:14;
+		uint64_t fcol:12;
+	} cn30xx;
+	struct cvmx_lmcx_fadr_cn30xx cn31xx;
+	struct cvmx_lmcx_fadr_cn30xx cn38xx;
+	struct cvmx_lmcx_fadr_cn30xx cn38xxp2;
+	struct cvmx_lmcx_fadr_cn30xx cn50xx;
+	struct cvmx_lmcx_fadr_cn30xx cn52xx;
+	struct cvmx_lmcx_fadr_cn30xx cn52xxp1;
+	struct cvmx_lmcx_fadr_cn30xx cn56xx;
+	struct cvmx_lmcx_fadr_cn30xx cn56xxp1;
+	struct cvmx_lmcx_fadr_cn30xx cn58xx;
+	struct cvmx_lmcx_fadr_cn30xx cn58xxp1;
+	struct cvmx_lmcx_fadr_cn61xx {
+		uint64_t reserved_36_63:28;
+		uint64_t fdimm:2;
+		uint64_t fbunk:1;
+		uint64_t fbank:3;
+		uint64_t frow:16;
+		uint64_t fcol:14;
+	} cn61xx;
+	struct cvmx_lmcx_fadr_cn61xx cn63xx;
+	struct cvmx_lmcx_fadr_cn61xx cn63xxp1;
+	struct cvmx_lmcx_fadr_cn61xx cn66xx;
+	struct cvmx_lmcx_fadr_cn61xx cn68xx;
+	struct cvmx_lmcx_fadr_cn61xx cn68xxp1;
+	struct cvmx_lmcx_fadr_cn70xx {
+		uint64_t reserved_40_63:24;
+		uint64_t fill_order:2;
+		uint64_t fdimm:1;
+		uint64_t fbunk:1;
+		uint64_t fbank:4;
+		uint64_t frow:18;
+		uint64_t fcol:14;
+	} cn70xx;
+	struct cvmx_lmcx_fadr_cn70xx cn70xxp1;
+	struct cvmx_lmcx_fadr_cn73xx {
+		uint64_t reserved_43_63:21;
+		uint64_t fcid:3;
+		uint64_t fill_order:2;
+		uint64_t fdimm:1;
+		uint64_t fbunk:1;
+		uint64_t fbank:4;
+		uint64_t frow:18;
+		uint64_t fcol:14;
+	} cn73xx;
+	struct cvmx_lmcx_fadr_cn73xx cn78xx;
+	struct cvmx_lmcx_fadr_cn73xx cn78xxp1;
+	struct cvmx_lmcx_fadr_cn61xx cnf71xx;
+	struct cvmx_lmcx_fadr_cn73xx cnf75xx;
+};
+
+/**
+ * cvmx_lmc#_general_purpose0
+ */
+union cvmx_lmcx_general_purpose0 {
+	u64 u64;
+	struct cvmx_lmcx_general_purpose0_s {
+		uint64_t data:64;
+	} s;
+	struct cvmx_lmcx_general_purpose0_s cn73xx;
+	struct cvmx_lmcx_general_purpose0_s cn78xx;
+	struct cvmx_lmcx_general_purpose0_s cnf75xx;
+};
+
+/**
+ * cvmx_lmc#_general_purpose1
+ */
+union cvmx_lmcx_general_purpose1 {
+	u64 u64;
+	struct cvmx_lmcx_general_purpose1_s {
+		uint64_t data:64;
+	} s;
+	struct cvmx_lmcx_general_purpose1_s cn73xx;
+	struct cvmx_lmcx_general_purpose1_s cn78xx;
+	struct cvmx_lmcx_general_purpose1_s cnf75xx;
+};
+
+/**
+ * cvmx_lmc#_general_purpose2
+ */
+union cvmx_lmcx_general_purpose2 {
+	u64 u64;
+	struct cvmx_lmcx_general_purpose2_s {
+		uint64_t reserved_16_63:48;
+		uint64_t data:16;
+	} s;
+	struct cvmx_lmcx_general_purpose2_s cn73xx;
+	struct cvmx_lmcx_general_purpose2_s cn78xx;
+	struct cvmx_lmcx_general_purpose2_s cnf75xx;
+};
+
+/**
+ * cvmx_lmc#_ifb_cnt
+ *
+ * LMC_IFB_CNT  = Performance Counters
+ *
+ */
+union cvmx_lmcx_ifb_cnt {
+	u64 u64;
+	struct cvmx_lmcx_ifb_cnt_s {
+		uint64_t ifbcnt:64;
+	} s;
+	struct cvmx_lmcx_ifb_cnt_s cn61xx;
+	struct cvmx_lmcx_ifb_cnt_s cn63xx;
+	struct cvmx_lmcx_ifb_cnt_s cn63xxp1;
+	struct cvmx_lmcx_ifb_cnt_s cn66xx;
+	struct cvmx_lmcx_ifb_cnt_s cn68xx;
+	struct cvmx_lmcx_ifb_cnt_s cn68xxp1;
+	struct cvmx_lmcx_ifb_cnt_s cn70xx;
+	struct cvmx_lmcx_ifb_cnt_s cn70xxp1;
+	struct cvmx_lmcx_ifb_cnt_s cn73xx;
+	struct cvmx_lmcx_ifb_cnt_s cn78xx;
+	struct cvmx_lmcx_ifb_cnt_s cn78xxp1;
+	struct cvmx_lmcx_ifb_cnt_s cnf71xx;
+	struct cvmx_lmcx_ifb_cnt_s cnf75xx;
+};
+
+/**
+ * cvmx_lmc#_ifb_cnt_hi
+ *
+ * LMC_IFB_CNT_HI  = Performance Counters
+ *
+ */
+union cvmx_lmcx_ifb_cnt_hi {
+	u64 u64;
+	struct cvmx_lmcx_ifb_cnt_hi_s {
+		uint64_t reserved_32_63:32;
+		uint64_t ifbcnt_hi:32;
+	} s;
+	struct cvmx_lmcx_ifb_cnt_hi_s cn30xx;
+	struct cvmx_lmcx_ifb_cnt_hi_s cn31xx;
+	struct cvmx_lmcx_ifb_cnt_hi_s cn38xx;
+	struct cvmx_lmcx_ifb_cnt_hi_s cn38xxp2;
+	struct cvmx_lmcx_ifb_cnt_hi_s cn50xx;
+	struct cvmx_lmcx_ifb_cnt_hi_s cn52xx;
+	struct cvmx_lmcx_ifb_cnt_hi_s cn52xxp1;
+	struct cvmx_lmcx_ifb_cnt_hi_s cn56xx;
+	struct cvmx_lmcx_ifb_cnt_hi_s cn56xxp1;
+	struct cvmx_lmcx_ifb_cnt_hi_s cn58xx;
+	struct cvmx_lmcx_ifb_cnt_hi_s cn58xxp1;
+};
+
+/**
+ * cvmx_lmc#_ifb_cnt_lo
+ *
+ * LMC_IFB_CNT_LO  = Performance Counters
+ *
+ */
+union cvmx_lmcx_ifb_cnt_lo {
+	u64 u64;
+	struct cvmx_lmcx_ifb_cnt_lo_s {
+		uint64_t reserved_32_63:32;
+		uint64_t ifbcnt_lo:32;
+	} s;
+	struct cvmx_lmcx_ifb_cnt_lo_s cn30xx;
+	struct cvmx_lmcx_ifb_cnt_lo_s cn31xx;
+	struct cvmx_lmcx_ifb_cnt_lo_s cn38xx;
+	struct cvmx_lmcx_ifb_cnt_lo_s cn38xxp2;
+	struct cvmx_lmcx_ifb_cnt_lo_s cn50xx;
+	struct cvmx_lmcx_ifb_cnt_lo_s cn52xx;
+	struct cvmx_lmcx_ifb_cnt_lo_s cn52xxp1;
+	struct cvmx_lmcx_ifb_cnt_lo_s cn56xx;
+	struct cvmx_lmcx_ifb_cnt_lo_s cn56xxp1;
+	struct cvmx_lmcx_ifb_cnt_lo_s cn58xx;
+	struct cvmx_lmcx_ifb_cnt_lo_s cn58xxp1;
+};
+
+/**
+ * cvmx_lmc#_int
+ *
+ * This register contains the different interrupt-summary bits of the LMC.
+ *
+ */
+union cvmx_lmcx_int {
+	u64 u64;
+	struct cvmx_lmcx_int_s {
+		uint64_t reserved_14_63:50;
+		uint64_t macram_ded_err:1;
+		uint64_t macram_sec_err:1;
+		uint64_t ddr_err:1;
+		uint64_t dlcram_ded_err:1;
+		uint64_t dlcram_sec_err:1;
+		uint64_t ded_err:4;
+		uint64_t sec_err:4;
+		uint64_t nxm_wr_err:1;
+	} s;
+	struct cvmx_lmcx_int_cn61xx {
+		uint64_t reserved_9_63:55;
+		uint64_t ded_err:4;
+		uint64_t sec_err:4;
+		uint64_t nxm_wr_err:1;
+	} cn61xx;
+	struct cvmx_lmcx_int_cn61xx cn63xx;
+	struct cvmx_lmcx_int_cn61xx cn63xxp1;
+	struct cvmx_lmcx_int_cn61xx cn66xx;
+	struct cvmx_lmcx_int_cn61xx cn68xx;
+	struct cvmx_lmcx_int_cn61xx cn68xxp1;
+	struct cvmx_lmcx_int_cn70xx {
+		uint64_t reserved_12_63:52;
+		uint64_t ddr_err:1;
+		uint64_t dlcram_ded_err:1;
+		uint64_t dlcram_sec_err:1;
+		uint64_t ded_err:4;
+		uint64_t sec_err:4;
+		uint64_t nxm_wr_err:1;
+	} cn70xx;
+	struct cvmx_lmcx_int_cn70xx cn70xxp1;
+	struct cvmx_lmcx_int_s cn73xx;
+	struct cvmx_lmcx_int_s cn78xx;
+	struct cvmx_lmcx_int_s cn78xxp1;
+	struct cvmx_lmcx_int_cn61xx cnf71xx;
+	struct cvmx_lmcx_int_s cnf75xx;
+};
+
+/**
+ * cvmx_lmc#_int_en
+ *
+ * Unused CSR in O75.
+ *
+ */
+union cvmx_lmcx_int_en {
+	u64 u64;
+	struct cvmx_lmcx_int_en_s {
+		uint64_t reserved_6_63:58;
+		uint64_t ddr_error_alert_ena:1;
+		uint64_t dlcram_ded_ena:1;
+		uint64_t dlcram_sec_ena:1;
+		uint64_t intr_ded_ena:1;
+		uint64_t intr_sec_ena:1;
+		uint64_t intr_nxm_wr_ena:1;
+	} s;
+	struct cvmx_lmcx_int_en_cn61xx {
+		uint64_t reserved_3_63:61;
+		uint64_t intr_ded_ena:1;
+		uint64_t intr_sec_ena:1;
+		uint64_t intr_nxm_wr_ena:1;
+	} cn61xx;
+	struct cvmx_lmcx_int_en_cn61xx cn63xx;
+	struct cvmx_lmcx_int_en_cn61xx cn63xxp1;
+	struct cvmx_lmcx_int_en_cn61xx cn66xx;
+	struct cvmx_lmcx_int_en_cn61xx cn68xx;
+	struct cvmx_lmcx_int_en_cn61xx cn68xxp1;
+	struct cvmx_lmcx_int_en_s cn70xx;
+	struct cvmx_lmcx_int_en_s cn70xxp1;
+	struct cvmx_lmcx_int_en_s cn73xx;
+	struct cvmx_lmcx_int_en_s cn78xx;
+	struct cvmx_lmcx_int_en_s cn78xxp1;
+	struct cvmx_lmcx_int_en_cn61xx cnf71xx;
+	struct cvmx_lmcx_int_en_s cnf75xx;
+};
+
+/**
+ * cvmx_lmc#_lane#_crc_swiz
+ *
+ * This register contains the CRC bit swizzle for even and odd ranks.
+ *
+ */
+union cvmx_lmcx_lanex_crc_swiz {
+	u64 u64;
+	struct cvmx_lmcx_lanex_crc_swiz_s {
+		uint64_t reserved_56_63:8;
+		uint64_t r1_swiz7:3;
+		uint64_t r1_swiz6:3;
+		uint64_t r1_swiz5:3;
+		uint64_t r1_swiz4:3;
+		uint64_t r1_swiz3:3;
+		uint64_t r1_swiz2:3;
+		uint64_t r1_swiz1:3;
+		uint64_t r1_swiz0:3;
+		uint64_t reserved_24_31:8;
+		uint64_t r0_swiz7:3;
+		uint64_t r0_swiz6:3;
+		uint64_t r0_swiz5:3;
+		uint64_t r0_swiz4:3;
+		uint64_t r0_swiz3:3;
+		uint64_t r0_swiz2:3;
+		uint64_t r0_swiz1:3;
+		uint64_t r0_swiz0:3;
+	} s;
+	struct cvmx_lmcx_lanex_crc_swiz_s cn73xx;
+	struct cvmx_lmcx_lanex_crc_swiz_s cn78xx;
+	struct cvmx_lmcx_lanex_crc_swiz_s cn78xxp1;
+	struct cvmx_lmcx_lanex_crc_swiz_s cnf75xx;
+};
+
+/**
+ * cvmx_lmc#_mem_cfg0
+ *
+ * Specify the RSL base addresses for the block
+ *
+ *                  LMC_MEM_CFG0 = LMC Memory Configuration Register0
+ *
+ * This register controls certain parameters of  Memory Configuration
+ */
+union cvmx_lmcx_mem_cfg0 {
+	u64 u64;
+	struct cvmx_lmcx_mem_cfg0_s {
+		uint64_t reserved_32_63:32;
+		uint64_t reset:1;
+		uint64_t silo_qc:1;
+		uint64_t bunk_ena:1;
+		uint64_t ded_err:4;
+		uint64_t sec_err:4;
+		uint64_t intr_ded_ena:1;
+		uint64_t intr_sec_ena:1;
+		uint64_t tcl:4;
+		uint64_t ref_int:6;
+		uint64_t pbank_lsb:4;
+		uint64_t row_lsb:3;
+		uint64_t ecc_ena:1;
+		uint64_t init_start:1;
+	} s;
+	struct cvmx_lmcx_mem_cfg0_s cn30xx;
+	struct cvmx_lmcx_mem_cfg0_s cn31xx;
+	struct cvmx_lmcx_mem_cfg0_s cn38xx;
+	struct cvmx_lmcx_mem_cfg0_s cn38xxp2;
+	struct cvmx_lmcx_mem_cfg0_s cn50xx;
+	struct cvmx_lmcx_mem_cfg0_s cn52xx;
+	struct cvmx_lmcx_mem_cfg0_s cn52xxp1;
+	struct cvmx_lmcx_mem_cfg0_s cn56xx;
+	struct cvmx_lmcx_mem_cfg0_s cn56xxp1;
+	struct cvmx_lmcx_mem_cfg0_s cn58xx;
+	struct cvmx_lmcx_mem_cfg0_s cn58xxp1;
+};
+
+/**
+ * cvmx_lmc#_mem_cfg1
+ *
+ * LMC_MEM_CFG1 = LMC Memory Configuration Register1
+ *
+ * This register controls the External Memory Configuration Timing Parameters.
+ * Please refer to the appropriate DDR part spec from your memory vendor for
+ * the various values in this CSR. The details of each of these timing
+ * parameters can be found in the JEDEC spec or the vendor spec of the
+ * memory parts.
+ */
+union cvmx_lmcx_mem_cfg1 {
+	u64 u64;
+	struct cvmx_lmcx_mem_cfg1_s {
+		uint64_t reserved_32_63:32;
+		uint64_t comp_bypass:1;
+		uint64_t trrd:3;
+		uint64_t caslat:3;
+		uint64_t tmrd:3;
+		uint64_t trfc:5;
+		uint64_t trp:4;
+		uint64_t twtr:4;
+		uint64_t trcd:4;
+		uint64_t tras:5;
+	} s;
+	struct cvmx_lmcx_mem_cfg1_s cn30xx;
+	struct cvmx_lmcx_mem_cfg1_s cn31xx;
+	struct cvmx_lmcx_mem_cfg1_cn38xx {
+		uint64_t reserved_31_63:33;
+		uint64_t trrd:3;
+		uint64_t caslat:3;
+		uint64_t tmrd:3;
+		uint64_t trfc:5;
+		uint64_t trp:4;
+		uint64_t twtr:4;
+		uint64_t trcd:4;
+		uint64_t tras:5;
+	} cn38xx;
+	struct cvmx_lmcx_mem_cfg1_cn38xx cn38xxp2;
+	struct cvmx_lmcx_mem_cfg1_s cn50xx;
+	struct cvmx_lmcx_mem_cfg1_cn38xx cn52xx;
+	struct cvmx_lmcx_mem_cfg1_cn38xx cn52xxp1;
+	struct cvmx_lmcx_mem_cfg1_cn38xx cn56xx;
+	struct cvmx_lmcx_mem_cfg1_cn38xx cn56xxp1;
+	struct cvmx_lmcx_mem_cfg1_cn38xx cn58xx;
+	struct cvmx_lmcx_mem_cfg1_cn38xx cn58xxp1;
+};
+
+/**
+ * cvmx_lmc#_modereg_params0
+ *
+ * These parameters are written into the DDR3/DDR4 MR0, MR1, MR2 and MR3
+ * registers.
+ *
+ */
+union cvmx_lmcx_modereg_params0 {
+	u64 u64;
+	struct cvmx_lmcx_modereg_params0_s {
+		uint64_t reserved_28_63:36;
+		uint64_t wrp_ext:1;
+		uint64_t cl_ext:1;
+		uint64_t al_ext:1;
+		uint64_t ppd:1;
+		uint64_t wrp:3;
+		uint64_t dllr:1;
+		uint64_t tm:1;
+		uint64_t rbt:1;
+		uint64_t cl:4;
+		uint64_t bl:2;
+		uint64_t qoff:1;
+		uint64_t tdqs:1;
+		uint64_t wlev:1;
+		uint64_t al:2;
+		uint64_t dll:1;
+		uint64_t mpr:1;
+		uint64_t mprloc:2;
+		uint64_t cwl:3;
+	} s;
+	struct cvmx_lmcx_modereg_params0_cn61xx {
+		uint64_t reserved_25_63:39;
+		uint64_t ppd:1;
+		uint64_t wrp:3;
+		uint64_t dllr:1;
+		uint64_t tm:1;
+		uint64_t rbt:1;
+		uint64_t cl:4;
+		uint64_t bl:2;
+		uint64_t qoff:1;
+		uint64_t tdqs:1;
+		uint64_t wlev:1;
+		uint64_t al:2;
+		uint64_t dll:1;
+		uint64_t mpr:1;
+		uint64_t mprloc:2;
+		uint64_t cwl:3;
+	} cn61xx;
+	struct cvmx_lmcx_modereg_params0_cn61xx cn63xx;
+	struct cvmx_lmcx_modereg_params0_cn61xx cn63xxp1;
+	struct cvmx_lmcx_modereg_params0_cn61xx cn66xx;
+	struct cvmx_lmcx_modereg_params0_cn61xx cn68xx;
+	struct cvmx_lmcx_modereg_params0_cn61xx cn68xxp1;
+	struct cvmx_lmcx_modereg_params0_cn61xx cn70xx;
+	struct cvmx_lmcx_modereg_params0_cn61xx cn70xxp1;
+	struct cvmx_lmcx_modereg_params0_s cn73xx;
+	struct cvmx_lmcx_modereg_params0_s cn78xx;
+	struct cvmx_lmcx_modereg_params0_s cn78xxp1;
+	struct cvmx_lmcx_modereg_params0_cn61xx cnf71xx;
+	struct cvmx_lmcx_modereg_params0_s cnf75xx;
+};
+
+/**
+ * cvmx_lmc#_modereg_params1
+ *
+ * These parameters are written into the DDR3 MR0, MR1, MR2 and MR3 registers.
+ *
+ */
+union cvmx_lmcx_modereg_params1 {
+	u64 u64;
+	struct cvmx_lmcx_modereg_params1_s {
+		uint64_t reserved_55_63:9;
+		uint64_t rtt_wr_11_ext:1;
+		uint64_t rtt_wr_10_ext:1;
+		uint64_t rtt_wr_01_ext:1;
+		uint64_t rtt_wr_00_ext:1;
+		uint64_t db_output_impedance:3;
+		uint64_t rtt_nom_11:3;
+		uint64_t dic_11:2;
+		uint64_t rtt_wr_11:2;
+		uint64_t srt_11:1;
+		uint64_t asr_11:1;
+		uint64_t pasr_11:3;
+		uint64_t rtt_nom_10:3;
+		uint64_t dic_10:2;
+		uint64_t rtt_wr_10:2;
+		uint64_t srt_10:1;
+		uint64_t asr_10:1;
+		uint64_t pasr_10:3;
+		uint64_t rtt_nom_01:3;
+		uint64_t dic_01:2;
+		uint64_t rtt_wr_01:2;
+		uint64_t srt_01:1;
+		uint64_t asr_01:1;
+		uint64_t pasr_01:3;
+		uint64_t rtt_nom_00:3;
+		uint64_t dic_00:2;
+		uint64_t rtt_wr_00:2;
+		uint64_t srt_00:1;
+		uint64_t asr_00:1;
+		uint64_t pasr_00:3;
+	} s;
+	struct cvmx_lmcx_modereg_params1_cn61xx {
+		uint64_t reserved_48_63:16;
+		uint64_t rtt_nom_11:3;
+		uint64_t dic_11:2;
+		uint64_t rtt_wr_11:2;
+		uint64_t srt_11:1;
+		uint64_t asr_11:1;
+		uint64_t pasr_11:3;
+		uint64_t rtt_nom_10:3;
+		uint64_t dic_10:2;
+		uint64_t rtt_wr_10:2;
+		uint64_t srt_10:1;
+		uint64_t asr_10:1;
+		uint64_t pasr_10:3;
+		uint64_t rtt_nom_01:3;
+		uint64_t dic_01:2;
+		uint64_t rtt_wr_01:2;
+		uint64_t srt_01:1;
+		uint64_t asr_01:1;
+		uint64_t pasr_01:3;
+		uint64_t rtt_nom_00:3;
+		uint64_t dic_00:2;
+		uint64_t rtt_wr_00:2;
+		uint64_t srt_00:1;
+		uint64_t asr_00:1;
+		uint64_t pasr_00:3;
+	} cn61xx;
+	struct cvmx_lmcx_modereg_params1_cn61xx cn63xx;
+	struct cvmx_lmcx_modereg_params1_cn61xx cn63xxp1;
+	struct cvmx_lmcx_modereg_params1_cn61xx cn66xx;
+	struct cvmx_lmcx_modereg_params1_cn61xx cn68xx;
+	struct cvmx_lmcx_modereg_params1_cn61xx cn68xxp1;
+	struct cvmx_lmcx_modereg_params1_cn61xx cn70xx;
+	struct cvmx_lmcx_modereg_params1_cn61xx cn70xxp1;
+	struct cvmx_lmcx_modereg_params1_s cn73xx;
+	struct cvmx_lmcx_modereg_params1_s cn78xx;
+	struct cvmx_lmcx_modereg_params1_s cn78xxp1;
+	struct cvmx_lmcx_modereg_params1_cn61xx cnf71xx;
+	struct cvmx_lmcx_modereg_params1_s cnf75xx;
+};
+
+/**
+ * cvmx_lmc#_modereg_params2
+ *
+ * These parameters are written into the DDR4 mode registers.
+ *
+ */
+union cvmx_lmcx_modereg_params2 {
+	u64 u64;
+	struct cvmx_lmcx_modereg_params2_s {
+		uint64_t reserved_41_63:23;
+		uint64_t vrefdq_train_en:1;
+		uint64_t vref_range_11:1;
+		uint64_t vref_value_11:6;
+		uint64_t rtt_park_11:3;
+		uint64_t vref_range_10:1;
+		uint64_t vref_value_10:6;
+		uint64_t rtt_park_10:3;
+		uint64_t vref_range_01:1;
+		uint64_t vref_value_01:6;
+		uint64_t rtt_park_01:3;
+		uint64_t vref_range_00:1;
+		uint64_t vref_value_00:6;
+		uint64_t rtt_park_00:3;
+	} s;
+	struct cvmx_lmcx_modereg_params2_s cn70xx;
+	struct cvmx_lmcx_modereg_params2_cn70xxp1 {
+		uint64_t reserved_40_63:24;
+		uint64_t vref_range_11:1;
+		uint64_t vref_value_11:6;
+		uint64_t rtt_park_11:3;
+		uint64_t vref_range_10:1;
+		uint64_t vref_value_10:6;
+		uint64_t rtt_park_10:3;
+		uint64_t vref_range_01:1;
+		uint64_t vref_value_01:6;
+		uint64_t rtt_park_01:3;
+		uint64_t vref_range_00:1;
+		uint64_t vref_value_00:6;
+		uint64_t rtt_park_00:3;
+	} cn70xxp1;
+	struct cvmx_lmcx_modereg_params2_s cn73xx;
+	struct cvmx_lmcx_modereg_params2_s cn78xx;
+	struct cvmx_lmcx_modereg_params2_s cn78xxp1;
+	struct cvmx_lmcx_modereg_params2_s cnf75xx;
+};
+
+/**
+ * cvmx_lmc#_modereg_params3
+ *
+ * These parameters are written into the DDR4 mode registers.
+ *
+ */
+union cvmx_lmcx_modereg_params3 {
+	u64 u64;
+	struct cvmx_lmcx_modereg_params3_s {
+		uint64_t reserved_39_63:25;
+		uint64_t xrank_add_tccd_l:3;
+		uint64_t xrank_add_tccd_s:3;
+		uint64_t mpr_fmt:2;
+		uint64_t wr_cmd_lat:2;
+		uint64_t fgrm:3;
+		uint64_t temp_sense:1;
+		uint64_t pda:1;
+		uint64_t gd:1;
+		uint64_t crc:1;
+		uint64_t lpasr:2;
+		uint64_t tccd_l:3;
+		uint64_t rd_dbi:1;
+		uint64_t wr_dbi:1;
+		uint64_t dm:1;
+		uint64_t ca_par_pers:1;
+		uint64_t odt_pd:1;
+		uint64_t par_lat_mode:3;
+		uint64_t wr_preamble:1;
+		uint64_t rd_preamble:1;
+		uint64_t sre_abort:1;
+		uint64_t cal:3;
+		uint64_t vref_mon:1;
+		uint64_t tc_ref:1;
+		uint64_t max_pd:1;
+	} s;
+	struct cvmx_lmcx_modereg_params3_cn70xx {
+		uint64_t reserved_33_63:31;
+		uint64_t mpr_fmt:2;
+		uint64_t wr_cmd_lat:2;
+		uint64_t fgrm:3;
+		uint64_t temp_sense:1;
+		uint64_t pda:1;
+		uint64_t gd:1;
+		uint64_t crc:1;
+		uint64_t lpasr:2;
+		uint64_t tccd_l:3;
+		uint64_t rd_dbi:1;
+		uint64_t wr_dbi:1;
+		uint64_t dm:1;
+		uint64_t ca_par_pers:1;
+		uint64_t odt_pd:1;
+		uint64_t par_lat_mode:3;
+		uint64_t wr_preamble:1;
+		uint64_t rd_preamble:1;
+		uint64_t sre_abort:1;
+		uint64_t cal:3;
+		uint64_t vref_mon:1;
+		uint64_t tc_ref:1;
+		uint64_t max_pd:1;
+	} cn70xx;
+	struct cvmx_lmcx_modereg_params3_cn70xx cn70xxp1;
+	struct cvmx_lmcx_modereg_params3_s cn73xx;
+	struct cvmx_lmcx_modereg_params3_s cn78xx;
+	struct cvmx_lmcx_modereg_params3_s cn78xxp1;
+	struct cvmx_lmcx_modereg_params3_s cnf75xx;
+};
+
+/**
+ * cvmx_lmc#_mpr_data0
+ *
+ * This register provides bits <63:0> of MPR data register.
+ *
+ */
+union cvmx_lmcx_mpr_data0 {
+	u64 u64;
+	struct cvmx_lmcx_mpr_data0_s {
+		uint64_t mpr_data:64;
+	} s;
+	struct cvmx_lmcx_mpr_data0_s cn70xx;
+	struct cvmx_lmcx_mpr_data0_s cn70xxp1;
+	struct cvmx_lmcx_mpr_data0_s cn73xx;
+	struct cvmx_lmcx_mpr_data0_s cn78xx;
+	struct cvmx_lmcx_mpr_data0_s cn78xxp1;
+	struct cvmx_lmcx_mpr_data0_s cnf75xx;
+};
+
+/**
+ * cvmx_lmc#_mpr_data1
+ *
+ * This register provides bits <127:64> of MPR data register.
+ *
+ */
+union cvmx_lmcx_mpr_data1 {
+	u64 u64;
+	struct cvmx_lmcx_mpr_data1_s {
+		uint64_t mpr_data:64;
+	} s;
+	struct cvmx_lmcx_mpr_data1_s cn70xx;
+	struct cvmx_lmcx_mpr_data1_s cn70xxp1;
+	struct cvmx_lmcx_mpr_data1_s cn73xx;
+	struct cvmx_lmcx_mpr_data1_s cn78xx;
+	struct cvmx_lmcx_mpr_data1_s cn78xxp1;
+	struct cvmx_lmcx_mpr_data1_s cnf75xx;
+};
+
+/**
+ * cvmx_lmc#_mpr_data2
+ *
+ * This register provides bits <143:128> of MPR data register.
+ *
+ */
+union cvmx_lmcx_mpr_data2 {
+	u64 u64;
+	struct cvmx_lmcx_mpr_data2_s {
+		uint64_t reserved_16_63:48;
+		uint64_t mpr_data:16;
+	} s;
+	struct cvmx_lmcx_mpr_data2_s cn70xx;
+	struct cvmx_lmcx_mpr_data2_s cn70xxp1;
+	struct cvmx_lmcx_mpr_data2_s cn73xx;
+	struct cvmx_lmcx_mpr_data2_s cn78xx;
+	struct cvmx_lmcx_mpr_data2_s cn78xxp1;
+	struct cvmx_lmcx_mpr_data2_s cnf75xx;
+};
+
+/**
+ * cvmx_lmc#_mr_mpr_ctl
+ *
+ * This register provides the control functions when programming the MPR
+ * of DDR4 DRAMs.
+ *
+ */
+union cvmx_lmcx_mr_mpr_ctl {
+	u64 u64;
+	struct cvmx_lmcx_mr_mpr_ctl_s {
+		uint64_t reserved_61_63:3;
+		uint64_t mr_wr_secure_key_ena:1;
+		uint64_t pba_func_space:3;
+		uint64_t mr_wr_bg1:1;
+		uint64_t mpr_sample_dq_enable:1;
+		uint64_t pda_early_dqx:1;
+		uint64_t mr_wr_pba_enable:1;
+		uint64_t mr_wr_use_default_value:1;
+		uint64_t mpr_whole_byte_enable:1;
+		uint64_t mpr_byte_select:4;
+		uint64_t mpr_bit_select:2;
+		uint64_t mpr_wr:1;
+		uint64_t mpr_loc:2;
+		uint64_t mr_wr_pda_enable:1;
+		uint64_t mr_wr_pda_mask:18;
+		uint64_t mr_wr_rank:2;
+		uint64_t mr_wr_sel:3;
+		uint64_t mr_wr_addr:18;
+	} s;
+	struct cvmx_lmcx_mr_mpr_ctl_cn70xx {
+		uint64_t reserved_52_63:12;
+		uint64_t mpr_whole_byte_enable:1;
+		uint64_t mpr_byte_select:4;
+		uint64_t mpr_bit_select:2;
+		uint64_t mpr_wr:1;
+		uint64_t mpr_loc:2;
+		uint64_t mr_wr_pda_enable:1;
+		uint64_t mr_wr_pda_mask:18;
+		uint64_t mr_wr_rank:2;
+		uint64_t mr_wr_sel:3;
+		uint64_t mr_wr_addr:18;
+	} cn70xx;
+	struct cvmx_lmcx_mr_mpr_ctl_cn70xx cn70xxp1;
+	struct cvmx_lmcx_mr_mpr_ctl_s cn73xx;
+	struct cvmx_lmcx_mr_mpr_ctl_s cn78xx;
+	struct cvmx_lmcx_mr_mpr_ctl_s cn78xxp1;
+	struct cvmx_lmcx_mr_mpr_ctl_s cnf75xx;
+};
+
+/**
+ * cvmx_lmc#_ns_ctl
+ *
+ * This register contains control parameters for handling nonsecure accesses.
+ *
+ */
+union cvmx_lmcx_ns_ctl {
+	u64 u64;
+	struct cvmx_lmcx_ns_ctl_s {
+		uint64_t reserved_26_63:38;
+		uint64_t ns_scramble_dis:1;
+		uint64_t reserved_18_24:7;
+		uint64_t adr_offset:18;
+	} s;
+	struct cvmx_lmcx_ns_ctl_s cn73xx;
+	struct cvmx_lmcx_ns_ctl_s cn78xx;
+	struct cvmx_lmcx_ns_ctl_s cnf75xx;
+};
+
+/**
+ * cvmx_lmc#_nxm
+ *
+ * Following is the decoding for mem_msb/rank:
+ * 0x0: mem_msb = mem_adr[25].
+ * 0x1: mem_msb = mem_adr[26].
+ * 0x2: mem_msb = mem_adr[27].
+ * 0x3: mem_msb = mem_adr[28].
+ * 0x4: mem_msb = mem_adr[29].
+ * 0x5: mem_msb = mem_adr[30].
+ * 0x6: mem_msb = mem_adr[31].
+ * 0x7: mem_msb = mem_adr[32].
+ * 0x8: mem_msb = mem_adr[33].
+ * 0x9: mem_msb = mem_adr[34].
+ * 0xA: mem_msb = mem_adr[35].
+ * 0xB: mem_msb = mem_adr[36].
+ * 0xC-0xF = Reserved.
+ *
+ * For example, for a DIMM made of Samsung's K4B1G0846C-ZCF7 1Gb
+ * (16M * 8 bit * 8 bank) parts, the column address width = 10; so with
+ * 10b of col, 3b of bus, 3b of bank, row_lsb = 16.
+ * Therefore, row = mem_adr[29:16] and mem_msb = 4.
+ *
+ * Note also that addresses greater than the max defined space (pbank_msb)
+ * are also treated as NXM accesses.
+ */
+union cvmx_lmcx_nxm {
+	u64 u64;
+	struct cvmx_lmcx_nxm_s {
+		uint64_t reserved_40_63:24;
+		uint64_t mem_msb_d3_r1:4;
+		uint64_t mem_msb_d3_r0:4;
+		uint64_t mem_msb_d2_r1:4;
+		uint64_t mem_msb_d2_r0:4;
+		uint64_t mem_msb_d1_r1:4;
+		uint64_t mem_msb_d1_r0:4;
+		uint64_t mem_msb_d0_r1:4;
+		uint64_t mem_msb_d0_r0:4;
+		uint64_t cs_mask:8;
+	} s;
+	struct cvmx_lmcx_nxm_cn52xx {
+		uint64_t reserved_8_63:56;
+		uint64_t cs_mask:8;
+	} cn52xx;
+	struct cvmx_lmcx_nxm_cn52xx cn56xx;
+	struct cvmx_lmcx_nxm_cn52xx cn58xx;
+	struct cvmx_lmcx_nxm_s cn61xx;
+	struct cvmx_lmcx_nxm_s cn63xx;
+	struct cvmx_lmcx_nxm_s cn63xxp1;
+	struct cvmx_lmcx_nxm_s cn66xx;
+	struct cvmx_lmcx_nxm_s cn68xx;
+	struct cvmx_lmcx_nxm_s cn68xxp1;
+	struct cvmx_lmcx_nxm_cn70xx {
+		uint64_t reserved_24_63:40;
+		uint64_t mem_msb_d1_r1:4;
+		uint64_t mem_msb_d1_r0:4;
+		uint64_t mem_msb_d0_r1:4;
+		uint64_t mem_msb_d0_r0:4;
+		uint64_t reserved_4_7:4;
+		uint64_t cs_mask:4;
+	} cn70xx;
+	struct cvmx_lmcx_nxm_cn70xx cn70xxp1;
+	struct cvmx_lmcx_nxm_cn70xx cn73xx;
+	struct cvmx_lmcx_nxm_cn70xx cn78xx;
+	struct cvmx_lmcx_nxm_cn70xx cn78xxp1;
+	struct cvmx_lmcx_nxm_s cnf71xx;
+	struct cvmx_lmcx_nxm_cn70xx cnf75xx;
+};
+
+/**
+ * cvmx_lmc#_nxm_fadr
+ *
+ * This register captures only the first transaction with a NXM error while
+ * an interrupt is pending, and only captures a subsequent event once the
+ * interrupt is cleared by writing a one to LMC()_INT[NXM_ERR]. It captures
+ * the actual L2C-LMC address provided to the LMC that caused the NXM error.
+ * A read or write NXM error is captured only if enabled using the NXM
+ * event enables.
+ */
+union cvmx_lmcx_nxm_fadr {
+	u64 u64;
+	struct cvmx_lmcx_nxm_fadr_s {
+		uint64_t reserved_40_63:24;
+		uint64_t nxm_faddr_ext:1;
+		uint64_t nxm_src:1;
+		uint64_t nxm_type:1;
+		uint64_t nxm_faddr:37;
+	} s;
+	struct cvmx_lmcx_nxm_fadr_cn70xx {
+		uint64_t reserved_39_63:25;
+		uint64_t nxm_src:1;
+		uint64_t nxm_type:1;
+		uint64_t nxm_faddr:37;
+	} cn70xx;
+	struct cvmx_lmcx_nxm_fadr_cn70xx cn70xxp1;
+	struct cvmx_lmcx_nxm_fadr_s cn73xx;
+	struct cvmx_lmcx_nxm_fadr_s cn78xx;
+	struct cvmx_lmcx_nxm_fadr_s cn78xxp1;
+	struct cvmx_lmcx_nxm_fadr_s cnf75xx;
+};
+
+/**
+ * cvmx_lmc#_ops_cnt
+ *
+ * LMC_OPS_CNT  = Performance Counters
+ *
+ */
+union cvmx_lmcx_ops_cnt {
+	u64 u64;
+	struct cvmx_lmcx_ops_cnt_s {
+		uint64_t opscnt:64;
+	} s;
+	struct cvmx_lmcx_ops_cnt_s cn61xx;
+	struct cvmx_lmcx_ops_cnt_s cn63xx;
+	struct cvmx_lmcx_ops_cnt_s cn63xxp1;
+	struct cvmx_lmcx_ops_cnt_s cn66xx;
+	struct cvmx_lmcx_ops_cnt_s cn68xx;
+	struct cvmx_lmcx_ops_cnt_s cn68xxp1;
+	struct cvmx_lmcx_ops_cnt_s cn70xx;
+	struct cvmx_lmcx_ops_cnt_s cn70xxp1;
+	struct cvmx_lmcx_ops_cnt_s cn73xx;
+	struct cvmx_lmcx_ops_cnt_s cn78xx;
+	struct cvmx_lmcx_ops_cnt_s cn78xxp1;
+	struct cvmx_lmcx_ops_cnt_s cnf71xx;
+	struct cvmx_lmcx_ops_cnt_s cnf75xx;
+};
+
+/**
+ * cvmx_lmc#_ops_cnt_hi
+ *
+ * LMC_OPS_CNT_HI  = Performance Counters
+ *
+ */
+union cvmx_lmcx_ops_cnt_hi {
+	u64 u64;
+	struct cvmx_lmcx_ops_cnt_hi_s {
+		uint64_t reserved_32_63:32;
+		uint64_t opscnt_hi:32;
+	} s;
+	struct cvmx_lmcx_ops_cnt_hi_s cn30xx;
+	struct cvmx_lmcx_ops_cnt_hi_s cn31xx;
+	struct cvmx_lmcx_ops_cnt_hi_s cn38xx;
+	struct cvmx_lmcx_ops_cnt_hi_s cn38xxp2;
+	struct cvmx_lmcx_ops_cnt_hi_s cn50xx;
+	struct cvmx_lmcx_ops_cnt_hi_s cn52xx;
+	struct cvmx_lmcx_ops_cnt_hi_s cn52xxp1;
+	struct cvmx_lmcx_ops_cnt_hi_s cn56xx;
+	struct cvmx_lmcx_ops_cnt_hi_s cn56xxp1;
+	struct cvmx_lmcx_ops_cnt_hi_s cn58xx;
+	struct cvmx_lmcx_ops_cnt_hi_s cn58xxp1;
+};
+
+/**
+ * cvmx_lmc#_ops_cnt_lo
+ *
+ * LMC_OPS_CNT_LO  = Performance Counters
+ *
+ */
+union cvmx_lmcx_ops_cnt_lo {
+	u64 u64;
+	struct cvmx_lmcx_ops_cnt_lo_s {
+		uint64_t reserved_32_63:32;
+		uint64_t opscnt_lo:32;
+	} s;
+	struct cvmx_lmcx_ops_cnt_lo_s cn30xx;
+	struct cvmx_lmcx_ops_cnt_lo_s cn31xx;
+	struct cvmx_lmcx_ops_cnt_lo_s cn38xx;
+	struct cvmx_lmcx_ops_cnt_lo_s cn38xxp2;
+	struct cvmx_lmcx_ops_cnt_lo_s cn50xx;
+	struct cvmx_lmcx_ops_cnt_lo_s cn52xx;
+	struct cvmx_lmcx_ops_cnt_lo_s cn52xxp1;
+	struct cvmx_lmcx_ops_cnt_lo_s cn56xx;
+	struct cvmx_lmcx_ops_cnt_lo_s cn56xxp1;
+	struct cvmx_lmcx_ops_cnt_lo_s cn58xx;
+	struct cvmx_lmcx_ops_cnt_lo_s cn58xxp1;
+};
+
+/**
+ * cvmx_lmc#_phy_ctl
+ *
+ * LMC_PHY_CTL = LMC PHY Control
+ *
+ */
+union cvmx_lmcx_phy_ctl {
+	u64 u64;
+	struct cvmx_lmcx_phy_ctl_s {
+		uint64_t reserved_61_63:3;
+		uint64_t dsk_dbg_load_dis:1;
+		uint64_t dsk_dbg_overwrt_ena:1;
+		uint64_t dsk_dbg_wr_mode:1;
+		uint64_t data_rate_loopback:1;
+		uint64_t dq_shallow_loopback:1;
+		uint64_t dm_disable:1;
+		uint64_t c1_sel:2;
+		uint64_t c0_sel:2;
+		uint64_t phy_reset:1;
+		uint64_t dsk_dbg_rd_complete:1;
+		uint64_t dsk_dbg_rd_data:10;
+		uint64_t dsk_dbg_rd_start:1;
+		uint64_t dsk_dbg_clk_scaler:2;
+		uint64_t dsk_dbg_offset:2;
+		uint64_t dsk_dbg_num_bits_sel:1;
+		uint64_t dsk_dbg_byte_sel:4;
+		uint64_t dsk_dbg_bit_sel:4;
+		uint64_t dbi_mode_ena:1;
+		uint64_t ddr_error_n_ena:1;
+		uint64_t ref_pin_on:1;
+		uint64_t dac_on:1;
+		uint64_t int_pad_loopback_ena:1;
+		uint64_t int_phy_loopback_ena:1;
+		uint64_t phy_dsk_reset:1;
+		uint64_t phy_dsk_byp:1;
+		uint64_t phy_pwr_save_disable:1;
+		uint64_t ten:1;
+		uint64_t rx_always_on:1;
+		uint64_t lv_mode:1;
+		uint64_t ck_tune1:1;
+		uint64_t ck_dlyout1:4;
+		uint64_t ck_tune0:1;
+		uint64_t ck_dlyout0:4;
+		uint64_t loopback:1;
+		uint64_t loopback_pos:1;
+		uint64_t ts_stagger:1;
+	} s;
+	struct cvmx_lmcx_phy_ctl_cn61xx {
+		uint64_t reserved_15_63:49;
+		uint64_t rx_always_on:1;
+		uint64_t lv_mode:1;
+		uint64_t ck_tune1:1;
+		uint64_t ck_dlyout1:4;
+		uint64_t ck_tune0:1;
+		uint64_t ck_dlyout0:4;
+		uint64_t loopback:1;
+		uint64_t loopback_pos:1;
+		uint64_t ts_stagger:1;
+	} cn61xx;
+	struct cvmx_lmcx_phy_ctl_cn61xx cn63xx;
+	struct cvmx_lmcx_phy_ctl_cn63xxp1 {
+		uint64_t reserved_14_63:50;
+		uint64_t lv_mode:1;
+		uint64_t ck_tune1:1;
+		uint64_t ck_dlyout1:4;
+		uint64_t ck_tune0:1;
+		uint64_t ck_dlyout0:4;
+		uint64_t loopback:1;
+		uint64_t loopback_pos:1;
+		uint64_t ts_stagger:1;
+	} cn63xxp1;
+	struct cvmx_lmcx_phy_ctl_cn61xx cn66xx;
+	struct cvmx_lmcx_phy_ctl_cn61xx cn68xx;
+	struct cvmx_lmcx_phy_ctl_cn61xx cn68xxp1;
+	struct cvmx_lmcx_phy_ctl_cn70xx {
+		uint64_t reserved_51_63:13;
+		uint64_t phy_reset:1;
+		uint64_t dsk_dbg_rd_complete:1;
+		uint64_t dsk_dbg_rd_data:10;
+		uint64_t dsk_dbg_rd_start:1;
+		uint64_t dsk_dbg_clk_scaler:2;
+		uint64_t dsk_dbg_offset:2;
+		uint64_t dsk_dbg_num_bits_sel:1;
+		uint64_t dsk_dbg_byte_sel:4;
+		uint64_t dsk_dbg_bit_sel:4;
+		uint64_t dbi_mode_ena:1;
+		uint64_t ddr_error_n_ena:1;
+		uint64_t ref_pin_on:1;
+		uint64_t dac_on:1;
+		uint64_t int_pad_loopback_ena:1;
+		uint64_t int_phy_loopback_ena:1;
+		uint64_t phy_dsk_reset:1;
+		uint64_t phy_dsk_byp:1;
+		uint64_t phy_pwr_save_disable:1;
+		uint64_t ten:1;
+		uint64_t rx_always_on:1;
+		uint64_t lv_mode:1;
+		uint64_t ck_tune1:1;
+		uint64_t ck_dlyout1:4;
+		uint64_t ck_tune0:1;
+		uint64_t ck_dlyout0:4;
+		uint64_t loopback:1;
+		uint64_t loopback_pos:1;
+		uint64_t ts_stagger:1;
+	} cn70xx;
+	struct cvmx_lmcx_phy_ctl_cn70xx cn70xxp1;
+	struct cvmx_lmcx_phy_ctl_cn73xx {
+		uint64_t reserved_58_63:6;
+		uint64_t data_rate_loopback:1;
+		uint64_t dq_shallow_loopback:1;
+		uint64_t dm_disable:1;
+		uint64_t c1_sel:2;
+		uint64_t c0_sel:2;
+		uint64_t phy_reset:1;
+		uint64_t dsk_dbg_rd_complete:1;
+		uint64_t dsk_dbg_rd_data:10;
+		uint64_t dsk_dbg_rd_start:1;
+		uint64_t dsk_dbg_clk_scaler:2;
+		uint64_t dsk_dbg_offset:2;
+		uint64_t dsk_dbg_num_bits_sel:1;
+		uint64_t dsk_dbg_byte_sel:4;
+		uint64_t dsk_dbg_bit_sel:4;
+		uint64_t dbi_mode_ena:1;
+		uint64_t ddr_error_n_ena:1;
+		uint64_t ref_pin_on:1;
+		uint64_t dac_on:1;
+		uint64_t int_pad_loopback_ena:1;
+		uint64_t int_phy_loopback_ena:1;
+		uint64_t phy_dsk_reset:1;
+		uint64_t phy_dsk_byp:1;
+		uint64_t phy_pwr_save_disable:1;
+		uint64_t ten:1;
+		uint64_t rx_always_on:1;
+		uint64_t lv_mode:1;
+		uint64_t ck_tune1:1;
+		uint64_t ck_dlyout1:4;
+		uint64_t ck_tune0:1;
+		uint64_t ck_dlyout0:4;
+		uint64_t loopback:1;
+		uint64_t loopback_pos:1;
+		uint64_t ts_stagger:1;
+	} cn73xx;
+	struct cvmx_lmcx_phy_ctl_s cn78xx;
+	struct cvmx_lmcx_phy_ctl_s cn78xxp1;
+	struct cvmx_lmcx_phy_ctl_cn61xx cnf71xx;
+	struct cvmx_lmcx_phy_ctl_s cnf75xx;
+};
+
+/**
+ * cvmx_lmc#_phy_ctl2
+ */
+union cvmx_lmcx_phy_ctl2 {
+	u64 u64;
+	struct cvmx_lmcx_phy_ctl2_s {
+		uint64_t reserved_27_63:37;
+		uint64_t dqs8_dsk_adj:3;
+		uint64_t dqs7_dsk_adj:3;
+		uint64_t dqs6_dsk_adj:3;
+		uint64_t dqs5_dsk_adj:3;
+		uint64_t dqs4_dsk_adj:3;
+		uint64_t dqs3_dsk_adj:3;
+		uint64_t dqs2_dsk_adj:3;
+		uint64_t dqs1_dsk_adj:3;
+		uint64_t dqs0_dsk_adj:3;
+	} s;
+	struct cvmx_lmcx_phy_ctl2_s cn78xx;
+	struct cvmx_lmcx_phy_ctl2_s cnf75xx;
+};
+
+/**
+ * cvmx_lmc#_pll_bwctl
+ *
+ * LMC_PLL_BWCTL  = DDR PLL Bandwidth Control Register
+ *
+ */
+union cvmx_lmcx_pll_bwctl {
+	u64 u64;
+	struct cvmx_lmcx_pll_bwctl_s {
+		uint64_t reserved_5_63:59;
+		uint64_t bwupd:1;
+		uint64_t bwctl:4;
+	} s;
+	struct cvmx_lmcx_pll_bwctl_s cn30xx;
+	struct cvmx_lmcx_pll_bwctl_s cn31xx;
+	struct cvmx_lmcx_pll_bwctl_s cn38xx;
+	struct cvmx_lmcx_pll_bwctl_s cn38xxp2;
+};
+
+/**
+ * cvmx_lmc#_pll_ctl
+ *
+ * LMC_PLL_CTL = LMC pll control
+ *
+ *
+ * Notes:
+ * This CSR is only relevant for LMC0. LMC1_PLL_CTL is not used.
+ *
+ * Exactly one of EN2, EN4, EN6, EN8, EN12, EN16 must be set.
+ *
+ * The resultant DDR_CK frequency is the DDR2_REF_CLK
+ * frequency multiplied by:
+ *
+ *     (CLKF + 1) / ((CLKR + 1) * EN(2,4,6,8,12,16))
+ *
+ * The PLL frequency, which is:
+ *
+ *     (DDR2_REF_CLK freq) * ((CLKF + 1) / (CLKR + 1))
+ *
+ * must reside between 1.2 and 2.5 GHz. A faster PLL frequency is
+ * desirable if there is a choice.
+ */
+union cvmx_lmcx_pll_ctl {
+	u64 u64;
+	struct cvmx_lmcx_pll_ctl_s {
+		uint64_t reserved_30_63:34;
+		uint64_t bypass:1;
+		uint64_t fasten_n:1;
+		uint64_t div_reset:1;
+		uint64_t reset_n:1;
+		uint64_t clkf:12;
+		uint64_t clkr:6;
+		uint64_t reserved_6_7:2;
+		uint64_t en16:1;
+		uint64_t en12:1;
+		uint64_t en8:1;
+		uint64_t en6:1;
+		uint64_t en4:1;
+		uint64_t en2:1;
+	} s;
+	struct cvmx_lmcx_pll_ctl_cn50xx {
+		uint64_t reserved_29_63:35;
+		uint64_t fasten_n:1;
+		uint64_t div_reset:1;
+		uint64_t reset_n:1;
+		uint64_t clkf:12;
+		uint64_t clkr:6;
+		uint64_t reserved_6_7:2;
+		uint64_t en16:1;
+		uint64_t en12:1;
+		uint64_t en8:1;
+		uint64_t en6:1;
+		uint64_t en4:1;
+		uint64_t en2:1;
+	} cn50xx;
+	struct cvmx_lmcx_pll_ctl_s cn52xx;
+	struct cvmx_lmcx_pll_ctl_s cn52xxp1;
+	struct cvmx_lmcx_pll_ctl_cn50xx cn56xx;
+	struct cvmx_lmcx_pll_ctl_cn56xxp1 {
+		uint64_t reserved_28_63:36;
+		uint64_t div_reset:1;
+		uint64_t reset_n:1;
+		uint64_t clkf:12;
+		uint64_t clkr:6;
+		uint64_t reserved_6_7:2;
+		uint64_t en16:1;
+		uint64_t en12:1;
+		uint64_t en8:1;
+		uint64_t en6:1;
+		uint64_t en4:1;
+		uint64_t en2:1;
+	} cn56xxp1;
+	struct cvmx_lmcx_pll_ctl_cn56xxp1 cn58xx;
+	struct cvmx_lmcx_pll_ctl_cn56xxp1 cn58xxp1;
+};
+
+/**
+ * cvmx_lmc#_pll_status
+ *
+ * LMC_PLL_STATUS = LMC pll status
+ *
+ */
+union cvmx_lmcx_pll_status {
+	u64 u64;
+	struct cvmx_lmcx_pll_status_s {
+		uint64_t reserved_32_63:32;
+		uint64_t ddr__nctl:5;
+		uint64_t ddr__pctl:5;
+		uint64_t reserved_2_21:20;
+		uint64_t rfslip:1;
+		uint64_t fbslip:1;
+	} s;
+	struct cvmx_lmcx_pll_status_s cn50xx;
+	struct cvmx_lmcx_pll_status_s cn52xx;
+	struct cvmx_lmcx_pll_status_s cn52xxp1;
+	struct cvmx_lmcx_pll_status_s cn56xx;
+	struct cvmx_lmcx_pll_status_s cn56xxp1;
+	struct cvmx_lmcx_pll_status_s cn58xx;
+	struct cvmx_lmcx_pll_status_cn58xxp1 {
+		uint64_t reserved_2_63:62;
+		uint64_t rfslip:1;
+		uint64_t fbslip:1;
+	} cn58xxp1;
+};
+
+/**
+ * cvmx_lmc#_ppr_ctl
+ *
+ * This register contains programmable timing and control parameters used
+ * when running the post package repair sequence. The timing fields
+ * PPR_CTL[TPGMPST], PPR_CTL[TPGM_EXIT] and PPR_CTL[TPGM] need to be set as
+ * to satisfy the minimum values mentioned in the JEDEC DDR4 spec before
+ * running the PPR sequence. See LMC()_SEQ_CTL[SEQ_SEL,INIT_START] to run
+ * the PPR sequence.
+ *
+ * Running hard PPR may require LMC to issue security key as four consecutive
+ * MR0 commands, each with a unique address field A[17:0]. Set the security
+ * key in the general purpose CSRs as follows:
+ *
+ * _ Security key 0 = LMC()_GENERAL_PURPOSE0[DATA]<17:0>.
+ * _ Security key 1 = LMC()_GENERAL_PURPOSE0[DATA]<35:18>.
+ * _ Security key 2 = LMC()_GENERAL_PURPOSE1[DATA]<17:0>.
+ * _ Security key 3 = LMC()_GENERAL_PURPOSE1[DATA]<35:18>.
+ */
+union cvmx_lmcx_ppr_ctl {
+	u64 u64;
+	struct cvmx_lmcx_ppr_ctl_s {
+		uint64_t reserved_27_63:37;
+		uint64_t lrank_sel:3;
+		uint64_t skip_issue_security:1;
+		uint64_t sppr:1;
+		uint64_t tpgm:10;
+		uint64_t tpgm_exit:5;
+		uint64_t tpgmpst:7;
+	} s;
+	struct cvmx_lmcx_ppr_ctl_cn73xx {
+		uint64_t reserved_24_63:40;
+		uint64_t skip_issue_security:1;
+		uint64_t sppr:1;
+		uint64_t tpgm:10;
+		uint64_t tpgm_exit:5;
+		uint64_t tpgmpst:7;
+	} cn73xx;
+	struct cvmx_lmcx_ppr_ctl_s cn78xx;
+	struct cvmx_lmcx_ppr_ctl_cn73xx cnf75xx;
+};
+
+/**
+ * cvmx_lmc#_read_level_ctl
+ *
+ * Notes:
+ * The HW writes and reads the cache block selected by ROW, COL, BNK and
+ * the rank as part of a read-leveling sequence for a rank.
+ * A cache block write is 16 72-bit words. PATTERN selects the write value.
+ * For the first 8 words, the write value is the bit PATTERN<i> duplicated
+ * into a 72-bit vector. The write value of the last 8 words is the inverse
+ * of the write value of the first 8 words. See LMC*_READ_LEVEL_RANK*.
+ */
+union cvmx_lmcx_read_level_ctl {
+	u64 u64;
+	struct cvmx_lmcx_read_level_ctl_s {
+		uint64_t reserved_44_63:20;
+		uint64_t rankmask:4;
+		uint64_t pattern:8;
+		uint64_t row:16;
+		uint64_t col:12;
+		uint64_t reserved_3_3:1;
+		uint64_t bnk:3;
+	} s;
+	struct cvmx_lmcx_read_level_ctl_s cn52xx;
+	struct cvmx_lmcx_read_level_ctl_s cn52xxp1;
+	struct cvmx_lmcx_read_level_ctl_s cn56xx;
+	struct cvmx_lmcx_read_level_ctl_s cn56xxp1;
+};
+
+/**
+ * cvmx_lmc#_read_level_dbg
+ *
+ * Notes:
+ * A given read of LMC*_READ_LEVEL_DBG returns the read-leveling pass/fail
+ * results for all possible delay settings (i.e. the BITMASK) for only one
+ * byte in the last rank that the HW read-leveled.
+ * LMC*_READ_LEVEL_DBG[BYTE] selects the particular byte.
+ * To get these pass/fail results for another different rank, you must run
+ * the hardware read-leveling again. For example, it is possible to get the
+ * BITMASK results for every byte of every rank if you run read-leveling
+ * separately for each rank, probing LMC*_READ_LEVEL_DBG between each
+ * read-leveling.
+ */
+union cvmx_lmcx_read_level_dbg {
+	u64 u64;
+	struct cvmx_lmcx_read_level_dbg_s {
+		uint64_t reserved_32_63:32;
+		uint64_t bitmask:16;
+		uint64_t reserved_4_15:12;
+		uint64_t byte:4;
+	} s;
+	struct cvmx_lmcx_read_level_dbg_s cn52xx;
+	struct cvmx_lmcx_read_level_dbg_s cn52xxp1;
+	struct cvmx_lmcx_read_level_dbg_s cn56xx;
+	struct cvmx_lmcx_read_level_dbg_s cn56xxp1;
+};
+
+/**
+ * cvmx_lmc#_read_level_rank#
+ *
+ * Notes:
+ * This is four CSRs per LMC, one per each rank.
+ * Each CSR is written by HW during a read-leveling sequence for the rank.
+ * (HW sets STATUS==3 after HW read-leveling completes for the rank.)
+ * Each CSR may also be written by SW, but not while a read-leveling sequence
+ * is in progress. (HW sets STATUS==1 after a CSR write.)
+ * Deskew setting is measured in units of 1/4 DCLK, so the above BYTE*
+ * values can range over 4 DCLKs.
+ * SW initiates a HW read-leveling sequence by programming
+ * LMC*_READ_LEVEL_CTL and writing INIT_START=1 with SEQUENCE=1.
+ * See LMC*_READ_LEVEL_CTL.
+ */
+union cvmx_lmcx_read_level_rankx {
+	u64 u64;
+	struct cvmx_lmcx_read_level_rankx_s {
+		uint64_t reserved_38_63:26;
+		uint64_t status:2;
+		uint64_t byte8:4;
+		uint64_t byte7:4;
+		uint64_t byte6:4;
+		uint64_t byte5:4;
+		uint64_t byte4:4;
+		uint64_t byte3:4;
+		uint64_t byte2:4;
+		uint64_t byte1:4;
+		uint64_t byte0:4;
+	} s;
+	struct cvmx_lmcx_read_level_rankx_s cn52xx;
+	struct cvmx_lmcx_read_level_rankx_s cn52xxp1;
+	struct cvmx_lmcx_read_level_rankx_s cn56xx;
+	struct cvmx_lmcx_read_level_rankx_s cn56xxp1;
+};
+
+/**
+ * cvmx_lmc#_ref_status
+ *
+ * This register contains the status of the refresh pending counter.
+ *
+ */
+union cvmx_lmcx_ref_status {
+	u64 u64;
+	struct cvmx_lmcx_ref_status_s {
+		uint64_t reserved_4_63:60;
+		uint64_t ref_pend_max_clr:1;
+		uint64_t ref_count:3;
+	} s;
+	struct cvmx_lmcx_ref_status_s cn73xx;
+	struct cvmx_lmcx_ref_status_s cn78xx;
+	struct cvmx_lmcx_ref_status_s cnf75xx;
+};
+
+/**
+ * cvmx_lmc#_reset_ctl
+ *
+ * Specify the RSL base addresses for the block.
+ *
+ */
+union cvmx_lmcx_reset_ctl {
+	u64 u64;
+	struct cvmx_lmcx_reset_ctl_s {
+		uint64_t reserved_4_63:60;
+		uint64_t ddr3psv:1;
+		uint64_t ddr3psoft:1;
+		uint64_t ddr3pwarm:1;
+		uint64_t ddr3rst:1;
+	} s;
+	struct cvmx_lmcx_reset_ctl_s cn61xx;
+	struct cvmx_lmcx_reset_ctl_s cn63xx;
+	struct cvmx_lmcx_reset_ctl_s cn63xxp1;
+	struct cvmx_lmcx_reset_ctl_s cn66xx;
+	struct cvmx_lmcx_reset_ctl_s cn68xx;
+	struct cvmx_lmcx_reset_ctl_s cn68xxp1;
+	struct cvmx_lmcx_reset_ctl_s cn70xx;
+	struct cvmx_lmcx_reset_ctl_s cn70xxp1;
+	struct cvmx_lmcx_reset_ctl_s cn73xx;
+	struct cvmx_lmcx_reset_ctl_s cn78xx;
+	struct cvmx_lmcx_reset_ctl_s cn78xxp1;
+	struct cvmx_lmcx_reset_ctl_s cnf71xx;
+	struct cvmx_lmcx_reset_ctl_s cnf75xx;
+};
+
+/**
+ * cvmx_lmc#_retry_config
+ *
+ * This register configures automatic retry operation.
+ *
+ */
+union cvmx_lmcx_retry_config {
+	u64 u64;
+	struct cvmx_lmcx_retry_config_s {
+		uint64_t reserved_56_63:8;
+		uint64_t max_errors:24;
+		uint64_t reserved_13_31:19;
+		uint64_t error_continue:1;
+		uint64_t reserved_9_11:3;
+		uint64_t auto_error_continue:1;
+		uint64_t reserved_5_7:3;
+		uint64_t pulse_count_auto_clr:1;
+		uint64_t reserved_1_3:3;
+		uint64_t retry_enable:1;
+	} s;
+	struct cvmx_lmcx_retry_config_s cn73xx;
+	struct cvmx_lmcx_retry_config_s cn78xx;
+	struct cvmx_lmcx_retry_config_s cnf75xx;
+};
+
+/**
+ * cvmx_lmc#_retry_status
+ *
+ * This register provides status on automatic retry operation.
+ *
+ */
+union cvmx_lmcx_retry_status {
+	u64 u64;
+	struct cvmx_lmcx_retry_status_s {
+		uint64_t clear_error_count:1;
+		uint64_t clear_error_pulse_count:1;
+		uint64_t reserved_57_61:5;
+		uint64_t error_pulse_count_valid:1;
+		uint64_t error_pulse_count_sat:1;
+		uint64_t reserved_52_54:3;
+		uint64_t error_pulse_count:4;
+		uint64_t reserved_45_47:3;
+		uint64_t error_sequence:5;
+		uint64_t reserved_33_39:7;
+		uint64_t error_type:1;
+		uint64_t reserved_24_31:8;
+		uint64_t error_count:24;
+	} s;
+	struct cvmx_lmcx_retry_status_s cn73xx;
+	struct cvmx_lmcx_retry_status_s cn78xx;
+	struct cvmx_lmcx_retry_status_s cnf75xx;
+};
+
+/**
+ * cvmx_lmc#_rlevel_ctl
+ */
+union cvmx_lmcx_rlevel_ctl {
+	u64 u64;
+	struct cvmx_lmcx_rlevel_ctl_s {
+		uint64_t reserved_33_63:31;
+		uint64_t tccd_sel:1;
+		uint64_t pattern:8;
+		uint64_t reserved_22_23:2;
+		uint64_t delay_unload_3:1;
+		uint64_t delay_unload_2:1;
+		uint64_t delay_unload_1:1;
+		uint64_t delay_unload_0:1;
+		uint64_t bitmask:8;
+		uint64_t or_dis:1;
+		uint64_t offset_en:1;
+		uint64_t offset:4;
+		uint64_t byte:4;
+	} s;
+	struct cvmx_lmcx_rlevel_ctl_cn61xx {
+		uint64_t reserved_22_63:42;
+		uint64_t delay_unload_3:1;
+		uint64_t delay_unload_2:1;
+		uint64_t delay_unload_1:1;
+		uint64_t delay_unload_0:1;
+		uint64_t bitmask:8;
+		uint64_t or_dis:1;
+		uint64_t offset_en:1;
+		uint64_t offset:4;
+		uint64_t byte:4;
+	} cn61xx;
+	struct cvmx_lmcx_rlevel_ctl_cn61xx cn63xx;
+	struct cvmx_lmcx_rlevel_ctl_cn63xxp1 {
+		uint64_t reserved_9_63:55;
+		uint64_t offset_en:1;
+		uint64_t offset:4;
+		uint64_t byte:4;
+	} cn63xxp1;
+	struct cvmx_lmcx_rlevel_ctl_cn61xx cn66xx;
+	struct cvmx_lmcx_rlevel_ctl_cn61xx cn68xx;
+	struct cvmx_lmcx_rlevel_ctl_cn61xx cn68xxp1;
+	struct cvmx_lmcx_rlevel_ctl_cn70xx {
+		uint64_t reserved_32_63:32;
+		uint64_t pattern:8;
+		uint64_t reserved_22_23:2;
+		uint64_t delay_unload_3:1;
+		uint64_t delay_unload_2:1;
+		uint64_t delay_unload_1:1;
+		uint64_t delay_unload_0:1;
+		uint64_t bitmask:8;
+		uint64_t or_dis:1;
+		uint64_t offset_en:1;
+		uint64_t offset:4;
+		uint64_t byte:4;
+	} cn70xx;
+	struct cvmx_lmcx_rlevel_ctl_cn70xx cn70xxp1;
+	struct cvmx_lmcx_rlevel_ctl_cn70xx cn73xx;
+	struct cvmx_lmcx_rlevel_ctl_s cn78xx;
+	struct cvmx_lmcx_rlevel_ctl_s cn78xxp1;
+	struct cvmx_lmcx_rlevel_ctl_cn61xx cnf71xx;
+	struct cvmx_lmcx_rlevel_ctl_s cnf75xx;
+};
+
+/**
+ * cvmx_lmc#_rlevel_dbg
+ *
+ * A given read of LMC()_RLEVEL_DBG returns the read leveling pass/fail
+ * results for all possible delay settings (i.e. the BITMASK) for only
+ * one byte in the last rank that the hardware ran read leveling on.
+ * LMC()_RLEVEL_CTL[BYTE] selects the particular byte. To get these
+ * pass/fail results for a different rank, you must run the hardware
+ * read leveling again. For example, it is possible to get the [BITMASK]
+ * results for every byte of every rank if you run read leveling separately
+ * for each rank, probing LMC()_RLEVEL_DBG between each read- leveling.
+ */
+union cvmx_lmcx_rlevel_dbg {
+	u64 u64;
+	struct cvmx_lmcx_rlevel_dbg_s {
+		uint64_t bitmask:64;
+	} s;
+	struct cvmx_lmcx_rlevel_dbg_s cn61xx;
+	struct cvmx_lmcx_rlevel_dbg_s cn63xx;
+	struct cvmx_lmcx_rlevel_dbg_s cn63xxp1;
+	struct cvmx_lmcx_rlevel_dbg_s cn66xx;
+	struct cvmx_lmcx_rlevel_dbg_s cn68xx;
+	struct cvmx_lmcx_rlevel_dbg_s cn68xxp1;
+	struct cvmx_lmcx_rlevel_dbg_s cn70xx;
+	struct cvmx_lmcx_rlevel_dbg_s cn70xxp1;
+	struct cvmx_lmcx_rlevel_dbg_s cn73xx;
+	struct cvmx_lmcx_rlevel_dbg_s cn78xx;
+	struct cvmx_lmcx_rlevel_dbg_s cn78xxp1;
+	struct cvmx_lmcx_rlevel_dbg_s cnf71xx;
+	struct cvmx_lmcx_rlevel_dbg_s cnf75xx;
+};
+
+/**
+ * cvmx_lmc#_rlevel_rank#
+ *
+ * Four of these CSRs exist per LMC, one for each rank. Read level setting
+ * is measured in units of 1/4 CK, so the BYTEn values can range over 16 CK
+ * cycles. Each CSR is written by hardware during a read leveling sequence
+ * for the rank. (Hardware sets [STATUS] to 3 after hardware read leveling
+ * completes for the rank.)
+ *
+ * If hardware is unable to find a match per LMC()_RLEVEL_CTL[OFFSET_EN] and
+ * LMC()_RLEVEL_CTL[OFFSET], then hardware sets
+ * LMC()_RLEVEL_RANK()[BYTEn<5:0>] to 0x0.
+ *
+ * Each CSR may also be written by software, but not while a read leveling
+ * sequence is in progress. (Hardware sets [STATUS] to 1 after a CSR write.)
+ * Software initiates a hardware read leveling sequence by programming
+ * LMC()_RLEVEL_CTL and writing [INIT_START] = 1 with [SEQ_SEL]=1.
+ * See LMC()_RLEVEL_CTL.
+ *
+ * LMC()_RLEVEL_RANKi values for ranks i without attached DRAM should be set
+ * such that they do not increase the range of possible BYTE values for any
+ * byte lane. The easiest way to do this is to set LMC()_RLEVEL_RANKi =
+ * LMC()_RLEVEL_RANKj, where j is some rank with attached DRAM whose
+ * LMC()_RLEVEL_RANKj is already fully initialized.
+ */
+union cvmx_lmcx_rlevel_rankx {
+	u64 u64;
+	struct cvmx_lmcx_rlevel_rankx_s {
+		uint64_t reserved_56_63:8;
+		uint64_t status:2;
+		uint64_t byte8:6;
+		uint64_t byte7:6;
+		uint64_t byte6:6;
+		uint64_t byte5:6;
+		uint64_t byte4:6;
+		uint64_t byte3:6;
+		uint64_t byte2:6;
+		uint64_t byte1:6;
+		uint64_t byte0:6;
+	} s;
+	struct cvmx_lmcx_rlevel_rankx_s cn61xx;
+	struct cvmx_lmcx_rlevel_rankx_s cn63xx;
+	struct cvmx_lmcx_rlevel_rankx_s cn63xxp1;
+	struct cvmx_lmcx_rlevel_rankx_s cn66xx;
+	struct cvmx_lmcx_rlevel_rankx_s cn68xx;
+	struct cvmx_lmcx_rlevel_rankx_s cn68xxp1;
+	struct cvmx_lmcx_rlevel_rankx_s cn70xx;
+	struct cvmx_lmcx_rlevel_rankx_s cn70xxp1;
+	struct cvmx_lmcx_rlevel_rankx_s cn73xx;
+	struct cvmx_lmcx_rlevel_rankx_s cn78xx;
+	struct cvmx_lmcx_rlevel_rankx_s cn78xxp1;
+	struct cvmx_lmcx_rlevel_rankx_s cnf71xx;
+	struct cvmx_lmcx_rlevel_rankx_s cnf75xx;
+};
+
+/**
+ * cvmx_lmc#_rodt_comp_ctl
+ *
+ * LMC_RODT_COMP_CTL = LMC Compensation control
+ *
+ */
+union cvmx_lmcx_rodt_comp_ctl {
+	u64 u64;
+	struct cvmx_lmcx_rodt_comp_ctl_s {
+		uint64_t reserved_17_63:47;
+		uint64_t enable:1;
+		uint64_t reserved_12_15:4;
+		uint64_t nctl:4;
+		uint64_t reserved_5_7:3;
+		uint64_t pctl:5;
+	} s;
+	struct cvmx_lmcx_rodt_comp_ctl_s cn50xx;
+	struct cvmx_lmcx_rodt_comp_ctl_s cn52xx;
+	struct cvmx_lmcx_rodt_comp_ctl_s cn52xxp1;
+	struct cvmx_lmcx_rodt_comp_ctl_s cn56xx;
+	struct cvmx_lmcx_rodt_comp_ctl_s cn56xxp1;
+	struct cvmx_lmcx_rodt_comp_ctl_s cn58xx;
+	struct cvmx_lmcx_rodt_comp_ctl_s cn58xxp1;
+};
+
+/**
+ * cvmx_lmc#_rodt_ctl
+ *
+ * LMC_RODT_CTL = Obsolete LMC Read OnDieTermination control
+ * See the description in LMC_WODT_CTL1. On Reads, Octeon only supports
+ * turning on ODT's in the lower 2 DIMM's with the masks as below.
+ *
+ * Notes:
+ * When a given RANK in position N is selected, the RODT _HI and _LO masks
+ * for that position are used.
+ * Mask[3:0] is used for RODT control of the RANKs in positions 3, 2, 1,
+ * and 0, respectively.
+ * In  64b mode, DIMMs are assumed to be ordered in the following order:
+ *  position 3: [unused        , DIMM1_RANK1_LO]
+ *  position 2: [unused        , DIMM1_RANK0_LO]
+ *  position 1: [unused        , DIMM0_RANK1_LO]
+ *  position 0: [unused        , DIMM0_RANK0_LO]
+ * In 128b mode, DIMMs are assumed to be ordered in the following order:
+ *  position 3: [DIMM3_RANK1_HI, DIMM1_RANK1_LO]
+ *  position 2: [DIMM3_RANK0_HI, DIMM1_RANK0_LO]
+ *  position 1: [DIMM2_RANK1_HI, DIMM0_RANK1_LO]
+ *  position 0: [DIMM2_RANK0_HI, DIMM0_RANK0_LO]
+ */
+union cvmx_lmcx_rodt_ctl {
+	u64 u64;
+	struct cvmx_lmcx_rodt_ctl_s {
+		uint64_t reserved_32_63:32;
+		uint64_t rodt_hi3:4;
+		uint64_t rodt_hi2:4;
+		uint64_t rodt_hi1:4;
+		uint64_t rodt_hi0:4;
+		uint64_t rodt_lo3:4;
+		uint64_t rodt_lo2:4;
+		uint64_t rodt_lo1:4;
+		uint64_t rodt_lo0:4;
+	} s;
+	struct cvmx_lmcx_rodt_ctl_s cn30xx;
+	struct cvmx_lmcx_rodt_ctl_s cn31xx;
+	struct cvmx_lmcx_rodt_ctl_s cn38xx;
+	struct cvmx_lmcx_rodt_ctl_s cn38xxp2;
+	struct cvmx_lmcx_rodt_ctl_s cn50xx;
+	struct cvmx_lmcx_rodt_ctl_s cn52xx;
+	struct cvmx_lmcx_rodt_ctl_s cn52xxp1;
+	struct cvmx_lmcx_rodt_ctl_s cn56xx;
+	struct cvmx_lmcx_rodt_ctl_s cn56xxp1;
+	struct cvmx_lmcx_rodt_ctl_s cn58xx;
+	struct cvmx_lmcx_rodt_ctl_s cn58xxp1;
+};
+
+/**
+ * cvmx_lmc#_rodt_mask
+ *
+ * System designers may desire to terminate DQ/DQS lines for higher frequency
+ * DDR operations, especially on a multirank system. DDR3 DQ/DQS I/Os have
+ * built-in termination resistors that can be turned on or off by the
+ * controller, after meeting TAOND and TAOF timing requirements.
+ *
+ * Each rank has its own ODT pin that fans out to all the memory parts in
+ * that DIMM. System designers may prefer different combinations of ODT ONs
+ * for read operations into different ranks. CNXXXX supports full
+ * programmability by way of the mask register below. Each rank position has
+ * its own 4-bit programmable field. When the controller does a read to that
+ * rank, it sets the 4 ODT pins to the MASK pins below. For example, when
+ * doing a read from Rank0, a system designer may desire to terminate the
+ * lines with the resistor on DIMM0/Rank1. The mask [RODT_D0_R0] would then
+ * be [0010].
+ *
+ * CNXXXX drives the appropriate mask values on the ODT pins by default.
+ * If this feature is not required, write 0x0 in this register. Note that,
+ * as per the JEDEC DDR3 specifications, the ODT pin for the rank that is
+ * being read should always be 0x0. When a given RANK is selected, the RODT
+ * mask for that rank is used. The resulting RODT mask is driven to the
+ * DIMMs in the following manner:
+ */
+union cvmx_lmcx_rodt_mask {
+	u64 u64;
+	struct cvmx_lmcx_rodt_mask_s {
+		uint64_t rodt_d3_r1:8;
+		uint64_t rodt_d3_r0:8;
+		uint64_t rodt_d2_r1:8;
+		uint64_t rodt_d2_r0:8;
+		uint64_t rodt_d1_r1:8;
+		uint64_t rodt_d1_r0:8;
+		uint64_t rodt_d0_r1:8;
+		uint64_t rodt_d0_r0:8;
+	} s;
+	struct cvmx_lmcx_rodt_mask_s cn61xx;
+	struct cvmx_lmcx_rodt_mask_s cn63xx;
+	struct cvmx_lmcx_rodt_mask_s cn63xxp1;
+	struct cvmx_lmcx_rodt_mask_s cn66xx;
+	struct cvmx_lmcx_rodt_mask_s cn68xx;
+	struct cvmx_lmcx_rodt_mask_s cn68xxp1;
+	struct cvmx_lmcx_rodt_mask_cn70xx {
+		uint64_t reserved_28_63:36;
+		uint64_t rodt_d1_r1:4;
+		uint64_t reserved_20_23:4;
+		uint64_t rodt_d1_r0:4;
+		uint64_t reserved_12_15:4;
+		uint64_t rodt_d0_r1:4;
+		uint64_t reserved_4_7:4;
+		uint64_t rodt_d0_r0:4;
+	} cn70xx;
+	struct cvmx_lmcx_rodt_mask_cn70xx cn70xxp1;
+	struct cvmx_lmcx_rodt_mask_cn70xx cn73xx;
+	struct cvmx_lmcx_rodt_mask_cn70xx cn78xx;
+	struct cvmx_lmcx_rodt_mask_cn70xx cn78xxp1;
+	struct cvmx_lmcx_rodt_mask_s cnf71xx;
+	struct cvmx_lmcx_rodt_mask_cn70xx cnf75xx;
+};
+
+/**
+ * cvmx_lmc#_scramble_cfg0
+ *
+ * LMC_SCRAMBLE_CFG0 = LMC Scramble Config0
+ *
+ */
+union cvmx_lmcx_scramble_cfg0 {
+	u64 u64;
+	struct cvmx_lmcx_scramble_cfg0_s {
+		uint64_t key:64;
+	} s;
+	struct cvmx_lmcx_scramble_cfg0_s cn61xx;
+	struct cvmx_lmcx_scramble_cfg0_s cn66xx;
+	struct cvmx_lmcx_scramble_cfg0_s cn70xx;
+	struct cvmx_lmcx_scramble_cfg0_s cn70xxp1;
+	struct cvmx_lmcx_scramble_cfg0_s cn73xx;
+	struct cvmx_lmcx_scramble_cfg0_s cn78xx;
+	struct cvmx_lmcx_scramble_cfg0_s cn78xxp1;
+	struct cvmx_lmcx_scramble_cfg0_s cnf71xx;
+	struct cvmx_lmcx_scramble_cfg0_s cnf75xx;
+};
+
+/**
+ * cvmx_lmc#_scramble_cfg1
+ *
+ * These registers set the aliasing that uses the lowest, legal chip select(s).
+ *
+ */
+union cvmx_lmcx_scramble_cfg1 {
+	u64 u64;
+	struct cvmx_lmcx_scramble_cfg1_s {
+		uint64_t key:64;
+	} s;
+	struct cvmx_lmcx_scramble_cfg1_s cn61xx;
+	struct cvmx_lmcx_scramble_cfg1_s cn66xx;
+	struct cvmx_lmcx_scramble_cfg1_s cn70xx;
+	struct cvmx_lmcx_scramble_cfg1_s cn70xxp1;
+	struct cvmx_lmcx_scramble_cfg1_s cn73xx;
+	struct cvmx_lmcx_scramble_cfg1_s cn78xx;
+	struct cvmx_lmcx_scramble_cfg1_s cn78xxp1;
+	struct cvmx_lmcx_scramble_cfg1_s cnf71xx;
+	struct cvmx_lmcx_scramble_cfg1_s cnf75xx;
+};
+
+/**
+ * cvmx_lmc#_scramble_cfg2
+ */
+union cvmx_lmcx_scramble_cfg2 {
+	u64 u64;
+	struct cvmx_lmcx_scramble_cfg2_s {
+		uint64_t key:64;
+	} s;
+	struct cvmx_lmcx_scramble_cfg2_s cn73xx;
+	struct cvmx_lmcx_scramble_cfg2_s cn78xx;
+	struct cvmx_lmcx_scramble_cfg2_s cnf75xx;
+};
+
+/**
+ * cvmx_lmc#_scrambled_fadr
+ *
+ * LMC()_FADR captures the failing pre-scrambled address location (split into
+ * DIMM, bunk, bank, etc). If scrambling is off, LMC()_FADR also captures the
+ * failing physical location in the DRAM parts. LMC()_SCRAMBLED_FADR captures
+ * the actual failing address location in the physical DRAM parts, i.e.:
+ *
+ * * If scrambling is on, LMC()_SCRAMBLED_FADR contains the failing physical
+ * location in the
+ * DRAM parts (split into DIMM, bunk, bank, etc).
+ *
+ * * If scrambling is off, the pre-scramble and post-scramble addresses are
+ * the same, and so the
+ * contents of LMC()_SCRAMBLED_FADR match the contents of LMC()_FADR.
+ *
+ * This register only captures the first transaction with ECC errors. A DED
+ * error can over-write this register with its failing addresses if the first
+ * error was a SEC. If you write LMC()_CONFIG -> SEC_ERR/DED_ERR, it clears
+ * the error bits and captures the next failing address. If [FDIMM] is 1,
+ * that means the error is in the higher DIMM.
+ */
+union cvmx_lmcx_scrambled_fadr {
+	u64 u64;
+	struct cvmx_lmcx_scrambled_fadr_s {
+		uint64_t reserved_43_63:21;
+		uint64_t fcid:3;
+		uint64_t fill_order:2;
+		uint64_t reserved_14_37:24;
+		uint64_t fcol:14;
+	} s;
+	struct cvmx_lmcx_scrambled_fadr_cn61xx {
+		uint64_t reserved_36_63:28;
+		uint64_t fdimm:2;
+		uint64_t fbunk:1;
+		uint64_t fbank:3;
+		uint64_t frow:16;
+		uint64_t fcol:14;
+	} cn61xx;
+	struct cvmx_lmcx_scrambled_fadr_cn61xx cn66xx;
+	struct cvmx_lmcx_scrambled_fadr_cn70xx {
+		uint64_t reserved_40_63:24;
+		uint64_t fill_order:2;
+		uint64_t fdimm:1;
+		uint64_t fbunk:1;
+		uint64_t fbank:4;
+		uint64_t frow:18;
+		uint64_t fcol:14;
+	} cn70xx;
+	struct cvmx_lmcx_scrambled_fadr_cn70xx cn70xxp1;
+	struct cvmx_lmcx_scrambled_fadr_cn73xx {
+		uint64_t reserved_43_63:21;
+		uint64_t fcid:3;
+		uint64_t fill_order:2;
+		uint64_t fdimm:1;
+		uint64_t fbunk:1;
+		uint64_t fbank:4;
+		uint64_t frow:18;
+		uint64_t fcol:14;
+	} cn73xx;
+	struct cvmx_lmcx_scrambled_fadr_cn73xx cn78xx;
+	struct cvmx_lmcx_scrambled_fadr_cn73xx cn78xxp1;
+	struct cvmx_lmcx_scrambled_fadr_cn61xx cnf71xx;
+	struct cvmx_lmcx_scrambled_fadr_cn73xx cnf75xx;
+};
+
+/**
+ * cvmx_lmc#_seq_ctl
+ *
+ * This register is used to initiate the various control sequences in the LMC.
+ *
+ */
+union cvmx_lmcx_seq_ctl {
+	u64 u64;
+	struct cvmx_lmcx_seq_ctl_s {
+		uint64_t reserved_6_63:58;
+		uint64_t seq_complete:1;
+		uint64_t seq_sel:4;
+		uint64_t init_start:1;
+	} s;
+	struct cvmx_lmcx_seq_ctl_s cn70xx;
+	struct cvmx_lmcx_seq_ctl_s cn70xxp1;
+	struct cvmx_lmcx_seq_ctl_s cn73xx;
+	struct cvmx_lmcx_seq_ctl_s cn78xx;
+	struct cvmx_lmcx_seq_ctl_s cn78xxp1;
+	struct cvmx_lmcx_seq_ctl_s cnf75xx;
+};
+
+/**
+ * cvmx_lmc#_slot_ctl0
+ *
+ * This register is an assortment of control fields needed by the memory
+ * controller. If software has not previously written to this register
+ * (since the last DRESET), hardware updates the fields in this register to
+ * the minimum allowed value when any of LMC()_RLEVEL_RANK(),
+ * LMC()_WLEVEL_RANK(), LMC()_CONTROL, and LMC()_MODEREG_PARAMS0 registers
+ * change. Ideally, only read this register after LMC has been initialized and
+ * LMC()_RLEVEL_RANK(), LMC()_WLEVEL_RANK() have valid data.
+ *
+ * The interpretation of the fields in this register depends on
+ * LMC(0)_CONFIG[DDR2T]:
+ *
+ * * If LMC()_CONFIG[DDR2T]=1, (FieldValue + 4) is the minimum CK cycles
+ * between when the DRAM part registers CAS commands of the first and
+ * second types from different cache blocks.
+ *
+ * If LMC()_CONFIG[DDR2T]=0, (FieldValue + 3) is the minimum CK cycles
+ * between when the DRAM part registers CAS commands of the first and second
+ * types from different cache blocks.
+ * FieldValue = 0 is always illegal in this case.
+ * The hardware-calculated minimums for these fields are shown in
+ * LMC(0)_SLOT_CTL0 Hardware-Calculated Minimums.
+ */
+union cvmx_lmcx_slot_ctl0 {
+	u64 u64;
+	struct cvmx_lmcx_slot_ctl0_s {
+		uint64_t reserved_50_63:14;
+		uint64_t w2r_l_init_ext:1;
+		uint64_t w2r_init_ext:1;
+		uint64_t w2w_l_init:6;
+		uint64_t w2r_l_init:6;
+		uint64_t r2w_l_init:6;
+		uint64_t r2r_l_init:6;
+		uint64_t w2w_init:6;
+		uint64_t w2r_init:6;
+		uint64_t r2w_init:6;
+		uint64_t r2r_init:6;
+	} s;
+	struct cvmx_lmcx_slot_ctl0_cn61xx {
+		uint64_t reserved_24_63:40;
+		uint64_t w2w_init:6;
+		uint64_t w2r_init:6;
+		uint64_t r2w_init:6;
+		uint64_t r2r_init:6;
+	} cn61xx;
+	struct cvmx_lmcx_slot_ctl0_cn61xx cn63xx;
+	struct cvmx_lmcx_slot_ctl0_cn61xx cn63xxp1;
+	struct cvmx_lmcx_slot_ctl0_cn61xx cn66xx;
+	struct cvmx_lmcx_slot_ctl0_cn61xx cn68xx;
+	struct cvmx_lmcx_slot_ctl0_cn61xx cn68xxp1;
+	struct cvmx_lmcx_slot_ctl0_cn70xx {
+		uint64_t reserved_48_63:16;
+		uint64_t w2w_l_init:6;
+		uint64_t w2r_l_init:6;
+		uint64_t r2w_l_init:6;
+		uint64_t r2r_l_init:6;
+		uint64_t w2w_init:6;
+		uint64_t w2r_init:6;
+		uint64_t r2w_init:6;
+		uint64_t r2r_init:6;
+	} cn70xx;
+	struct cvmx_lmcx_slot_ctl0_cn70xx cn70xxp1;
+	struct cvmx_lmcx_slot_ctl0_s cn73xx;
+	struct cvmx_lmcx_slot_ctl0_s cn78xx;
+	struct cvmx_lmcx_slot_ctl0_s cn78xxp1;
+	struct cvmx_lmcx_slot_ctl0_cn61xx cnf71xx;
+	struct cvmx_lmcx_slot_ctl0_s cnf75xx;
+};
+
+/**
+ * cvmx_lmc#_slot_ctl1
+ *
+ * This register is an assortment of control fields needed by the memory
+ * controller. If software has not previously written to this register
+ * (since the last DRESET), hardware updates the fields in this register to
+ * the minimum allowed value when any of LMC()_RLEVEL_RANK(),
+ * LMC()_WLEVEL_RANK(), LMC()_CONTROL and LMC()_MODEREG_PARAMS0 change.
+ * Ideally, only read this register after LMC has been initialized and
+ * LMC()_RLEVEL_RANK(), LMC()_WLEVEL_RANK() have valid data.
+ *
+ * The interpretation of the fields in this CSR depends on
+ * LMC(0)_CONFIG[DDR2T]:
+ *
+ * * If LMC()_CONFIG[DDR2T]=1, (FieldValue + 4) is the minimum CK cycles
+ * between when the DRAM part registers CAS commands of the first and
+ * second types from different cache blocks.
+ *
+ * * If LMC()_CONFIG[DDR2T]=0, (FieldValue + 3) is the minimum CK cycles
+ * between when the DRAM part registers CAS commands of the first and
+ * second types from different cache blocks.
+ * FieldValue = 0 is always illegal in this case.
+ *
+ * The hardware-calculated minimums for these fields are shown in
+ * LMC(0)_SLOT_CTL1 Hardware-Calculated Minimums.
+ */
+union cvmx_lmcx_slot_ctl1 {
+	u64 u64;
+	struct cvmx_lmcx_slot_ctl1_s {
+		uint64_t reserved_24_63:40;
+		uint64_t w2w_xrank_init:6;
+		uint64_t w2r_xrank_init:6;
+		uint64_t r2w_xrank_init:6;
+		uint64_t r2r_xrank_init:6;
+	} s;
+	struct cvmx_lmcx_slot_ctl1_s cn61xx;
+	struct cvmx_lmcx_slot_ctl1_s cn63xx;
+	struct cvmx_lmcx_slot_ctl1_s cn63xxp1;
+	struct cvmx_lmcx_slot_ctl1_s cn66xx;
+	struct cvmx_lmcx_slot_ctl1_s cn68xx;
+	struct cvmx_lmcx_slot_ctl1_s cn68xxp1;
+	struct cvmx_lmcx_slot_ctl1_s cn70xx;
+	struct cvmx_lmcx_slot_ctl1_s cn70xxp1;
+	struct cvmx_lmcx_slot_ctl1_s cn73xx;
+	struct cvmx_lmcx_slot_ctl1_s cn78xx;
+	struct cvmx_lmcx_slot_ctl1_s cn78xxp1;
+	struct cvmx_lmcx_slot_ctl1_s cnf71xx;
+	struct cvmx_lmcx_slot_ctl1_s cnf75xx;
+};
+
+/**
+ * cvmx_lmc#_slot_ctl2
+ *
+ * This register is an assortment of control fields needed by the memory
+ * controller. If software has not previously written to this register
+ * (since the last DRESET), hardware updates the fields in this register
+ * to the minimum allowed value when any of LMC()_RLEVEL_RANK(),
+ * LMC()_WLEVEL_RANK(), LMC()_CONTROL and LMC()_MODEREG_PARAMS0 change.
+ * Ideally, only read this register after LMC has been initialized and
+ * LMC()_RLEVEL_RANK(), LMC()_WLEVEL_RANK() have valid data.
+ *
+ * The interpretation of the fields in this CSR depends on LMC(0)_CONFIG[DDR2T]:
+ *
+ * * If LMC()_CONFIG[DDR2T] = 1, (FieldValue + 4) is the minimum CK cycles
+ * between when the DRAM part registers CAS commands of the first and
+ * second types from different cache blocks.
+ *
+ * * If LMC()_CONFIG[DDR2T] = 0, (FieldValue + 3) is the minimum CK cycles
+ * between when the DRAM part registers CAS commands of the first and second
+ * types from different cache blocks.
+ * FieldValue = 0 is always illegal in this case.
+ *
+ * The hardware-calculated minimums for these fields are shown in LMC Registers.
+ */
+union cvmx_lmcx_slot_ctl2 {
+	u64 u64;
+	struct cvmx_lmcx_slot_ctl2_s {
+		uint64_t reserved_24_63:40;
+		uint64_t w2w_xdimm_init:6;
+		uint64_t w2r_xdimm_init:6;
+		uint64_t r2w_xdimm_init:6;
+		uint64_t r2r_xdimm_init:6;
+	} s;
+	struct cvmx_lmcx_slot_ctl2_s cn61xx;
+	struct cvmx_lmcx_slot_ctl2_s cn63xx;
+	struct cvmx_lmcx_slot_ctl2_s cn63xxp1;
+	struct cvmx_lmcx_slot_ctl2_s cn66xx;
+	struct cvmx_lmcx_slot_ctl2_s cn68xx;
+	struct cvmx_lmcx_slot_ctl2_s cn68xxp1;
+	struct cvmx_lmcx_slot_ctl2_s cn70xx;
+	struct cvmx_lmcx_slot_ctl2_s cn70xxp1;
+	struct cvmx_lmcx_slot_ctl2_s cn73xx;
+	struct cvmx_lmcx_slot_ctl2_s cn78xx;
+	struct cvmx_lmcx_slot_ctl2_s cn78xxp1;
+	struct cvmx_lmcx_slot_ctl2_s cnf71xx;
+	struct cvmx_lmcx_slot_ctl2_s cnf75xx;
+};
+
+/**
+ * cvmx_lmc#_slot_ctl3
+ *
+ * This register is an assortment of control fields needed by the memory
+ * controller. If software has not previously written to this register
+ * (since the last DRESET), hardware updates the fields in this register
+ * to the minimum allowed value when any of LMC()_RLEVEL_RANK(),
+ * LMC()_WLEVEL_RANK(), LMC()_CONTROL and LMC()_MODEREG_PARAMS0 change.
+ * Ideally, only read this register after LMC has been initialized and
+ * LMC()_RLEVEL_RANK(), LMC()_WLEVEL_RANK() have valid data.
+ *
+ * The interpretation of the fields in this CSR depends on LMC(0)_CONFIG[DDR2T]:
+ *
+ * * If LMC()_CONFIG[DDR2T] = 1, (FieldValue + 4) is the minimum CK cycles
+ * between when the DRAM part registers CAS commands of the first and
+ * second types from different cache blocks.
+ *
+ * * If LMC()_CONFIG[DDR2T] = 0, (FieldValue + 3) is the minimum CK cycles
+ * between when the DRAM part registers CAS commands of the first and second
+ * types from different cache blocks.
+ * FieldValue = 0 is always illegal in this case.
+ *
+ * The hardware-calculated minimums for these fields are shown in LMC Registers.
+ */
+union cvmx_lmcx_slot_ctl3 {
+	u64 u64;
+	struct cvmx_lmcx_slot_ctl3_s {
+		uint64_t reserved_50_63:14;
+		uint64_t w2r_l_xrank_init_ext:1;
+		uint64_t w2r_xrank_init_ext:1;
+		uint64_t w2w_l_xrank_init:6;
+		uint64_t w2r_l_xrank_init:6;
+		uint64_t r2w_l_xrank_init:6;
+		uint64_t r2r_l_xrank_init:6;
+		uint64_t w2w_xrank_init:6;
+		uint64_t w2r_xrank_init:6;
+		uint64_t r2w_xrank_init:6;
+		uint64_t r2r_xrank_init:6;
+	} s;
+	struct cvmx_lmcx_slot_ctl3_s cn73xx;
+	struct cvmx_lmcx_slot_ctl3_s cn78xx;
+	struct cvmx_lmcx_slot_ctl3_s cnf75xx;
+};
+
+/**
+ * cvmx_lmc#_timing_params0
+ */
+union cvmx_lmcx_timing_params0 {
+	u64 u64;
+	struct cvmx_lmcx_timing_params0_s {
+		uint64_t reserved_54_63:10;
+		uint64_t tbcw:6;
+		uint64_t reserved_26_47:22;
+		uint64_t tmrd:4;
+		uint64_t reserved_8_21:14;
+		uint64_t tckeon:8;
+	} s;
+	struct cvmx_lmcx_timing_params0_cn61xx {
+		uint64_t reserved_47_63:17;
+		uint64_t trp_ext:1;
+		uint64_t tcksre:4;
+		uint64_t trp:4;
+		uint64_t tzqinit:4;
+		uint64_t tdllk:4;
+		uint64_t tmod:4;
+		uint64_t tmrd:4;
+		uint64_t txpr:4;
+		uint64_t tcke:4;
+		uint64_t tzqcs:4;
+		uint64_t reserved_0_9:10;
+	} cn61xx;
+	struct cvmx_lmcx_timing_params0_cn61xx cn63xx;
+	struct cvmx_lmcx_timing_params0_cn63xxp1 {
+		uint64_t reserved_46_63:18;
+		uint64_t tcksre:4;
+		uint64_t trp:4;
+		uint64_t tzqinit:4;
+		uint64_t tdllk:4;
+		uint64_t tmod:4;
+		uint64_t tmrd:4;
+		uint64_t txpr:4;
+		uint64_t tcke:4;
+		uint64_t tzqcs:4;
+		uint64_t tckeon:10;
+	} cn63xxp1;
+	struct cvmx_lmcx_timing_params0_cn61xx cn66xx;
+	struct cvmx_lmcx_timing_params0_cn61xx cn68xx;
+	struct cvmx_lmcx_timing_params0_cn61xx cn68xxp1;
+	struct cvmx_lmcx_timing_params0_cn70xx {
+		uint64_t reserved_48_63:16;
+		uint64_t tcksre:4;
+		uint64_t trp:5;
+		uint64_t tzqinit:4;
+		uint64_t tdllk:4;
+		uint64_t tmod:5;
+		uint64_t tmrd:4;
+		uint64_t txpr:6;
+		uint64_t tcke:4;
+		uint64_t tzqcs:4;
+		uint64_t reserved_0_7:8;
+	} cn70xx;
+	struct cvmx_lmcx_timing_params0_cn70xx cn70xxp1;
+	struct cvmx_lmcx_timing_params0_cn73xx {
+		uint64_t reserved_54_63:10;
+		uint64_t tbcw:6;
+		uint64_t tcksre:4;
+		uint64_t trp:5;
+		uint64_t tzqinit:4;
+		uint64_t tdllk:4;
+		uint64_t tmod:5;
+		uint64_t tmrd:4;
+		uint64_t txpr:6;
+		uint64_t tcke:4;
+		uint64_t tzqcs:4;
+		uint64_t reserved_0_7:8;
+	} cn73xx;
+	struct cvmx_lmcx_timing_params0_cn73xx cn78xx;
+	struct cvmx_lmcx_timing_params0_cn73xx cn78xxp1;
+	struct cvmx_lmcx_timing_params0_cn61xx cnf71xx;
+	struct cvmx_lmcx_timing_params0_cn73xx cnf75xx;
+};
+
+/**
+ * cvmx_lmc#_timing_params1
+ */
+union cvmx_lmcx_timing_params1 {
+	u64 u64;
+	struct cvmx_lmcx_timing_params1_s {
+		uint64_t reserved_59_63:5;
+		uint64_t txp_ext:1;
+		uint64_t trcd_ext:1;
+		uint64_t tpdm_full_cycle_ena:1;
+		uint64_t trfc_dlr:7;
+		uint64_t reserved_4_48:45;
+		uint64_t tmprr:4;
+	} s;
+	struct cvmx_lmcx_timing_params1_cn61xx {
+		uint64_t reserved_47_63:17;
+		uint64_t tras_ext:1;
+		uint64_t txpdll:5;
+		uint64_t tfaw:5;
+		uint64_t twldqsen:4;
+		uint64_t twlmrd:4;
+		uint64_t txp:3;
+		uint64_t trrd:3;
+		uint64_t trfc:5;
+		uint64_t twtr:4;
+		uint64_t trcd:4;
+		uint64_t tras:5;
+		uint64_t tmprr:4;
+	} cn61xx;
+	struct cvmx_lmcx_timing_params1_cn61xx cn63xx;
+	struct cvmx_lmcx_timing_params1_cn63xxp1 {
+		uint64_t reserved_46_63:18;
+		uint64_t txpdll:5;
+		uint64_t tfaw:5;
+		uint64_t twldqsen:4;
+		uint64_t twlmrd:4;
+		uint64_t txp:3;
+		uint64_t trrd:3;
+		uint64_t trfc:5;
+		uint64_t twtr:4;
+		uint64_t trcd:4;
+		uint64_t tras:5;
+		uint64_t tmprr:4;
+	} cn63xxp1;
+	struct cvmx_lmcx_timing_params1_cn61xx cn66xx;
+	struct cvmx_lmcx_timing_params1_cn61xx cn68xx;
+	struct cvmx_lmcx_timing_params1_cn61xx cn68xxp1;
+	struct cvmx_lmcx_timing_params1_cn70xx {
+		uint64_t reserved_49_63:15;
+		uint64_t txpdll:5;
+		uint64_t tfaw:5;
+		uint64_t twldqsen:4;
+		uint64_t twlmrd:4;
+		uint64_t txp:3;
+		uint64_t trrd:3;
+		uint64_t trfc:7;
+		uint64_t twtr:4;
+		uint64_t trcd:4;
+		uint64_t tras:6;
+		uint64_t tmprr:4;
+	} cn70xx;
+	struct cvmx_lmcx_timing_params1_cn70xx cn70xxp1;
+	struct cvmx_lmcx_timing_params1_cn73xx {
+		uint64_t reserved_59_63:5;
+		uint64_t txp_ext:1;
+		uint64_t trcd_ext:1;
+		uint64_t tpdm_full_cycle_ena:1;
+		uint64_t trfc_dlr:7;
+		uint64_t txpdll:5;
+		uint64_t tfaw:5;
+		uint64_t twldqsen:4;
+		uint64_t twlmrd:4;
+		uint64_t txp:3;
+		uint64_t trrd:3;
+		uint64_t trfc:7;
+		uint64_t twtr:4;
+		uint64_t trcd:4;
+		uint64_t tras:6;
+		uint64_t tmprr:4;
+	} cn73xx;
+	struct cvmx_lmcx_timing_params1_cn73xx cn78xx;
+	struct cvmx_lmcx_timing_params1_cn73xx cn78xxp1;
+	struct cvmx_lmcx_timing_params1_cn61xx cnf71xx;
+	struct cvmx_lmcx_timing_params1_cn73xx cnf75xx;
+};
+
+/**
+ * cvmx_lmc#_timing_params2
+ *
+ * This register sets timing parameters for DDR4.
+ *
+ */
+union cvmx_lmcx_timing_params2 {
+	u64 u64;
+	struct cvmx_lmcx_timing_params2_s {
+		uint64_t reserved_16_63:48;
+		uint64_t trrd_l_ext:1;
+		uint64_t trtp:4;
+		uint64_t t_rw_op_max:4;
+		uint64_t twtr_l:4;
+		uint64_t trrd_l:3;
+	} s;
+	struct cvmx_lmcx_timing_params2_cn70xx {
+		uint64_t reserved_15_63:49;
+		uint64_t trtp:4;
+		uint64_t t_rw_op_max:4;
+		uint64_t twtr_l:4;
+		uint64_t trrd_l:3;
+	} cn70xx;
+	struct cvmx_lmcx_timing_params2_cn70xx cn70xxp1;
+	struct cvmx_lmcx_timing_params2_s cn73xx;
+	struct cvmx_lmcx_timing_params2_s cn78xx;
+	struct cvmx_lmcx_timing_params2_s cn78xxp1;
+	struct cvmx_lmcx_timing_params2_s cnf75xx;
+};
+
+/**
+ * cvmx_lmc#_tro_ctl
+ *
+ * LMC_TRO_CTL = LMC Temperature Ring Osc Control
+ * This register is an assortment of various control fields needed to
+ * control the temperature ring oscillator
+ *
+ * Notes:
+ * To bring up the temperature ring oscillator, write TRESET to 0, and
+ * follow by initializing RCLK_CNT to desired value
+ */
+union cvmx_lmcx_tro_ctl {
+	u64 u64;
+	struct cvmx_lmcx_tro_ctl_s {
+		uint64_t reserved_33_63:31;
+		uint64_t rclk_cnt:32;
+		uint64_t treset:1;
+	} s;
+	struct cvmx_lmcx_tro_ctl_s cn61xx;
+	struct cvmx_lmcx_tro_ctl_s cn63xx;
+	struct cvmx_lmcx_tro_ctl_s cn63xxp1;
+	struct cvmx_lmcx_tro_ctl_s cn66xx;
+	struct cvmx_lmcx_tro_ctl_s cn68xx;
+	struct cvmx_lmcx_tro_ctl_s cn68xxp1;
+	struct cvmx_lmcx_tro_ctl_s cnf71xx;
+};
+
+/**
+ * cvmx_lmc#_tro_stat
+ *
+ * LMC_TRO_STAT = LMC Temperature Ring Osc Status
+ * This register is an assortment of various control fields needed to
+ * control the temperature ring oscillator
+ */
+union cvmx_lmcx_tro_stat {
+	u64 u64;
+	struct cvmx_lmcx_tro_stat_s {
+		uint64_t reserved_32_63:32;
+		uint64_t ring_cnt:32;
+	} s;
+	struct cvmx_lmcx_tro_stat_s cn61xx;
+	struct cvmx_lmcx_tro_stat_s cn63xx;
+	struct cvmx_lmcx_tro_stat_s cn63xxp1;
+	struct cvmx_lmcx_tro_stat_s cn66xx;
+	struct cvmx_lmcx_tro_stat_s cn68xx;
+	struct cvmx_lmcx_tro_stat_s cn68xxp1;
+	struct cvmx_lmcx_tro_stat_s cnf71xx;
+};
+
+/**
+ * cvmx_lmc#_wlevel_ctl
+ */
+union cvmx_lmcx_wlevel_ctl {
+	u64 u64;
+	struct cvmx_lmcx_wlevel_ctl_s {
+		uint64_t reserved_22_63:42;
+		uint64_t rtt_nom:3;
+		uint64_t bitmask:8;
+		uint64_t or_dis:1;
+		uint64_t sset:1;
+		uint64_t lanemask:9;
+	} s;
+	struct cvmx_lmcx_wlevel_ctl_s cn61xx;
+	struct cvmx_lmcx_wlevel_ctl_s cn63xx;
+	struct cvmx_lmcx_wlevel_ctl_cn63xxp1 {
+		uint64_t reserved_10_63:54;
+		uint64_t sset:1;
+		uint64_t lanemask:9;
+	} cn63xxp1;
+	struct cvmx_lmcx_wlevel_ctl_s cn66xx;
+	struct cvmx_lmcx_wlevel_ctl_s cn68xx;
+	struct cvmx_lmcx_wlevel_ctl_s cn68xxp1;
+	struct cvmx_lmcx_wlevel_ctl_s cn70xx;
+	struct cvmx_lmcx_wlevel_ctl_s cn70xxp1;
+	struct cvmx_lmcx_wlevel_ctl_s cn73xx;
+	struct cvmx_lmcx_wlevel_ctl_s cn78xx;
+	struct cvmx_lmcx_wlevel_ctl_s cn78xxp1;
+	struct cvmx_lmcx_wlevel_ctl_s cnf71xx;
+	struct cvmx_lmcx_wlevel_ctl_s cnf75xx;
+};
+
+/**
+ * cvmx_lmc#_wlevel_dbg
+ *
+ * A given write of LMC()_WLEVEL_DBG returns the write leveling pass/fail
+ * results for all possible delay settings (i.e. the BITMASK) for only one
+ * byte in the last rank that the hardware write leveled.
+ * LMC()_WLEVEL_DBG[BYTE] selects the particular byte. To get these
+ * pass/fail results for a different rank, you must run the hardware write
+ * leveling again. For example, it is possible to get the [BITMASK] results
+ * for every byte of every rank if you run write leveling separately for
+ * each rank, probing LMC()_WLEVEL_DBG between each write-leveling.
+ */
+union cvmx_lmcx_wlevel_dbg {
+	u64 u64;
+	struct cvmx_lmcx_wlevel_dbg_s {
+		uint64_t reserved_12_63:52;
+		uint64_t bitmask:8;
+		uint64_t byte:4;
+	} s;
+	struct cvmx_lmcx_wlevel_dbg_s cn61xx;
+	struct cvmx_lmcx_wlevel_dbg_s cn63xx;
+	struct cvmx_lmcx_wlevel_dbg_s cn63xxp1;
+	struct cvmx_lmcx_wlevel_dbg_s cn66xx;
+	struct cvmx_lmcx_wlevel_dbg_s cn68xx;
+	struct cvmx_lmcx_wlevel_dbg_s cn68xxp1;
+	struct cvmx_lmcx_wlevel_dbg_s cn70xx;
+	struct cvmx_lmcx_wlevel_dbg_s cn70xxp1;
+	struct cvmx_lmcx_wlevel_dbg_s cn73xx;
+	struct cvmx_lmcx_wlevel_dbg_s cn78xx;
+	struct cvmx_lmcx_wlevel_dbg_s cn78xxp1;
+	struct cvmx_lmcx_wlevel_dbg_s cnf71xx;
+	struct cvmx_lmcx_wlevel_dbg_s cnf75xx;
+};
+
+/**
+ * cvmx_lmc#_wlevel_rank#
+ *
+ * Four of these CSRs exist per LMC, one for each rank. Write level setting
+ * is measured in units of 1/8 CK, so the below BYTEn values can range over
+ * 4 CK cycles. Assuming LMC()_WLEVEL_CTL[SSET]=0, the BYTEn<2:0> values are
+ * not used during write leveling, and they are overwritten by the hardware
+ * as part of the write leveling sequence. (Hardware sets [STATUS] to 3 after
+ * hardware write leveling completes for the rank). Software needs to set
+ * BYTEn<4:3> bits.
+ *
+ * Each CSR may also be written by software, but not while a write leveling
+ * sequence is in progress. (Hardware sets [STATUS] to 1 after a CSR write.)
+ * Software initiates a hardware write-leveling sequence by programming
+ * LMC()_WLEVEL_CTL and writing RANKMASK and INIT_START=1 with SEQ_SEL=6 in
+ * LMC*0_CONFIG.
+ *
+ * LMC will then step through and accumulate write leveling results for 8
+ * unique delay settings (twice), starting at a delay of LMC()_WLEVEL_RANK()
+ * [BYTEn<4:3>]* 8 CK increasing by 1/8 CK each setting. Hardware will then
+ * set LMC()_WLEVEL_RANK()[BYTEn<2:0>] to indicate the first write leveling
+ * result of 1 that followed a result of 0 during the sequence by searching
+ * for a '1100' pattern in the generated bitmask, except that LMC will always
+ * write LMC()_WLEVEL_RANK()[BYTEn<0>]=0. If hardware is unable to find a match
+ * for a '1100' pattern, then hardware sets LMC()_WLEVEL_RANK() [BYTEn<2:0>]
+ * to 0x4. See LMC()_WLEVEL_CTL.
+ *
+ * LMC()_WLEVEL_RANKi values for ranks i without attached DRAM should be set
+ * such that they do not increase the range of possible BYTE values for any
+ * byte lane. The easiest way to do this is to set LMC()_WLEVEL_RANKi =
+ * LMC()_WLEVEL_RANKj, where j is some rank with attached DRAM whose
+ * LMC()_WLEVEL_RANKj is already fully initialized.
+ */
+union cvmx_lmcx_wlevel_rankx {
+	u64 u64;
+	struct cvmx_lmcx_wlevel_rankx_s {
+		uint64_t reserved_47_63:17;
+		uint64_t status:2;
+		uint64_t byte8:5;
+		uint64_t byte7:5;
+		uint64_t byte6:5;
+		uint64_t byte5:5;
+		uint64_t byte4:5;
+		uint64_t byte3:5;
+		uint64_t byte2:5;
+		uint64_t byte1:5;
+		uint64_t byte0:5;
+	} s;
+	struct cvmx_lmcx_wlevel_rankx_s cn61xx;
+	struct cvmx_lmcx_wlevel_rankx_s cn63xx;
+	struct cvmx_lmcx_wlevel_rankx_s cn63xxp1;
+	struct cvmx_lmcx_wlevel_rankx_s cn66xx;
+	struct cvmx_lmcx_wlevel_rankx_s cn68xx;
+	struct cvmx_lmcx_wlevel_rankx_s cn68xxp1;
+	struct cvmx_lmcx_wlevel_rankx_s cn70xx;
+	struct cvmx_lmcx_wlevel_rankx_s cn70xxp1;
+	struct cvmx_lmcx_wlevel_rankx_s cn73xx;
+	struct cvmx_lmcx_wlevel_rankx_s cn78xx;
+	struct cvmx_lmcx_wlevel_rankx_s cn78xxp1;
+	struct cvmx_lmcx_wlevel_rankx_s cnf71xx;
+	struct cvmx_lmcx_wlevel_rankx_s cnf75xx;
+};
+
+/**
+ * cvmx_lmc#_wodt_ctl0
+ *
+ * LMC_WODT_CTL0 = LMC Write OnDieTermination control
+ * See the description in LMC_WODT_CTL1.
+ *
+ * Notes:
+ * Together, the LMC_WODT_CTL1 and LMC_WODT_CTL0 CSRs control the write
+ * ODT mask.  See LMC_WODT_CTL1.
+ *
+ */
+union cvmx_lmcx_wodt_ctl0 {
+	u64 u64;
+	struct cvmx_lmcx_wodt_ctl0_s {
+		uint64_t reserved_0_63:64;
+	} s;
+	struct cvmx_lmcx_wodt_ctl0_cn30xx {
+		uint64_t reserved_32_63:32;
+		uint64_t wodt_d1_r1:8;
+		uint64_t wodt_d1_r0:8;
+		uint64_t wodt_d0_r1:8;
+		uint64_t wodt_d0_r0:8;
+	} cn30xx;
+	struct cvmx_lmcx_wodt_ctl0_cn30xx cn31xx;
+	struct cvmx_lmcx_wodt_ctl0_cn38xx {
+		uint64_t reserved_32_63:32;
+		uint64_t wodt_hi3:4;
+		uint64_t wodt_hi2:4;
+		uint64_t wodt_hi1:4;
+		uint64_t wodt_hi0:4;
+		uint64_t wodt_lo3:4;
+		uint64_t wodt_lo2:4;
+		uint64_t wodt_lo1:4;
+		uint64_t wodt_lo0:4;
+	} cn38xx;
+	struct cvmx_lmcx_wodt_ctl0_cn38xx cn38xxp2;
+	struct cvmx_lmcx_wodt_ctl0_cn38xx cn50xx;
+	struct cvmx_lmcx_wodt_ctl0_cn30xx cn52xx;
+	struct cvmx_lmcx_wodt_ctl0_cn30xx cn52xxp1;
+	struct cvmx_lmcx_wodt_ctl0_cn30xx cn56xx;
+	struct cvmx_lmcx_wodt_ctl0_cn30xx cn56xxp1;
+	struct cvmx_lmcx_wodt_ctl0_cn38xx cn58xx;
+	struct cvmx_lmcx_wodt_ctl0_cn38xx cn58xxp1;
+};
+
+/**
+ * cvmx_lmc#_wodt_ctl1
+ *
+ * LMC_WODT_CTL1 = LMC Write OnDieTermination control
+ * System designers may desire to terminate DQ/DQS/DM lines for higher
+ * frequency DDR operations (667MHz and faster), especially on a multi-rank
+ * system. DDR2 DQ/DM/DQS I/O's have built in Termination resistor that can
+ * be turned on or off by the controller, after meeting tAOND and tAOF
+ * timing requirements. Each Rank has its own ODT pin that fans out to all
+ * the memory parts in that DIMM. System designers may prefer different
+ * combinations of ODT ON's for read and write into different ranks. Octeon
+ * supports full programmability by way of the mask register below.
+ * Each Rank position has its own 8-bit programmable field.
+ * When the controller does a write to that rank, it sets the 8 ODT pins
+ * to the MASK pins below. For eg., When doing a write into Rank0, a system
+ * designer may desire to terminate the lines with the resistor on
+ * Dimm0/Rank1. The mask WODT_D0_R0 would then be [00000010]. If ODT feature
+ * is not desired, the DDR parts can be programmed to not look at these pins by
+ * writing 0 in QS_DIC. Octeon drives the appropriate mask values on the ODT
+ * pins by default.
+ * If this feature is not required, write 0 in this register.
+ *
+ * Notes:
+ * Together, the LMC_WODT_CTL1 and LMC_WODT_CTL0 CSRs control the write
+ * ODT mask. When a given RANK is selected, the WODT mask for that RANK
+ * is used.  The resulting WODT mask is driven to the DIMMs in the following
+ * manner:
+ *            BUNK_ENA=1     BUNK_ENA=0
+ * Mask[7] -> DIMM3, RANK1    DIMM3
+ * Mask[6] -> DIMM3, RANK0
+ * Mask[5] -> DIMM2, RANK1    DIMM2
+ * Mask[4] -> DIMM2, RANK0
+ * Mask[3] -> DIMM1, RANK1    DIMM1
+ * Mask[2] -> DIMM1, RANK0
+ * Mask[1] -> DIMM0, RANK1    DIMM0
+ * Mask[0] -> DIMM0, RANK0
+ */
+union cvmx_lmcx_wodt_ctl1 {
+	u64 u64;
+	struct cvmx_lmcx_wodt_ctl1_s {
+		uint64_t reserved_32_63:32;
+		uint64_t wodt_d3_r1:8;
+		uint64_t wodt_d3_r0:8;
+		uint64_t wodt_d2_r1:8;
+		uint64_t wodt_d2_r0:8;
+	} s;
+	struct cvmx_lmcx_wodt_ctl1_s cn30xx;
+	struct cvmx_lmcx_wodt_ctl1_s cn31xx;
+	struct cvmx_lmcx_wodt_ctl1_s cn52xx;
+	struct cvmx_lmcx_wodt_ctl1_s cn52xxp1;
+	struct cvmx_lmcx_wodt_ctl1_s cn56xx;
+	struct cvmx_lmcx_wodt_ctl1_s cn56xxp1;
+};
+
+/**
+ * cvmx_lmc#_wodt_mask
+ *
+ * System designers may desire to terminate DQ/DQS lines for higher-frequency
+ * DDR operations, especially on a multirank system. DDR3 DQ/DQS I/Os have
+ * built-in termination resistors that can be turned on or off by the
+ * controller, after meeting TAOND and TAOF timing requirements. Each rank
+ * has its own ODT pin that fans out to all of the memory parts in that DIMM.
+ * System designers may prefer different combinations of ODT ONs for write
+ * operations into different ranks. CNXXXX supports full programmability by
+ * way of the mask register below. Each rank position has its own 8-bit
+ * programmable field. When the controller does a write to that rank,
+ * it sets the four ODT pins to the mask pins below. For example, when
+ * doing a write into Rank0, a system designer may desire to terminate the
+ * lines with the resistor on DIMM0/Rank1. The mask [WODT_D0_R0] would then
+ * be [00000010].
+ *
+ * CNXXXX drives the appropriate mask values on the ODT pins by default.
+ * If this feature is not required, write 0x0 in this register. When a
+ * given RANK is selected, the WODT mask for that RANK is used. The
+ * resulting WODT mask is driven to the DIMMs in the following manner:
+ */
+union cvmx_lmcx_wodt_mask {
+	u64 u64;
+	struct cvmx_lmcx_wodt_mask_s {
+		uint64_t wodt_d3_r1:8;
+		uint64_t wodt_d3_r0:8;
+		uint64_t wodt_d2_r1:8;
+		uint64_t wodt_d2_r0:8;
+		uint64_t wodt_d1_r1:8;
+		uint64_t wodt_d1_r0:8;
+		uint64_t wodt_d0_r1:8;
+		uint64_t wodt_d0_r0:8;
+	} s;
+	struct cvmx_lmcx_wodt_mask_s cn61xx;
+	struct cvmx_lmcx_wodt_mask_s cn63xx;
+	struct cvmx_lmcx_wodt_mask_s cn63xxp1;
+	struct cvmx_lmcx_wodt_mask_s cn66xx;
+	struct cvmx_lmcx_wodt_mask_s cn68xx;
+	struct cvmx_lmcx_wodt_mask_s cn68xxp1;
+	struct cvmx_lmcx_wodt_mask_cn70xx {
+		uint64_t reserved_28_63:36;
+		uint64_t wodt_d1_r1:4;
+		uint64_t reserved_20_23:4;
+		uint64_t wodt_d1_r0:4;
+		uint64_t reserved_12_15:4;
+		uint64_t wodt_d0_r1:4;
+		uint64_t reserved_4_7:4;
+		uint64_t wodt_d0_r0:4;
+	} cn70xx;
+	struct cvmx_lmcx_wodt_mask_cn70xx cn70xxp1;
+	struct cvmx_lmcx_wodt_mask_cn70xx cn73xx;
+	struct cvmx_lmcx_wodt_mask_cn70xx cn78xx;
+	struct cvmx_lmcx_wodt_mask_cn70xx cn78xxp1;
+	struct cvmx_lmcx_wodt_mask_s cnf71xx;
+	struct cvmx_lmcx_wodt_mask_cn70xx cnf75xx;
+};
+
+#endif

From 073e8ee5df4844f53b68ddf108c9ef43c7f18251 Mon Sep 17 00:00:00 2001
From: Aaron Williams <awilliams@marvell.com>
Date: Wed, 2 Sep 2020 08:29:05 +0200
Subject: [PATCH 04/27] mips: octeon: Add octeon_ddr.h header

This header will be used by the DDR driver (lmc). Its ported from the
2013 Cavium / Marvell U-Boot repository.

Signed-off-by: Aaron Williams <awilliams@marvell.com>
Signed-off-by: Stefan Roese <sr@denx.de>
---
 .../mach-octeon/include/mach/octeon_ddr.h     | 982 ++++++++++++++++++
 1 file changed, 982 insertions(+)
 create mode 100644 arch/mips/mach-octeon/include/mach/octeon_ddr.h

diff --git a/arch/mips/mach-octeon/include/mach/octeon_ddr.h b/arch/mips/mach-octeon/include/mach/octeon_ddr.h
new file mode 100644
index 0000000000..4473be4d44
--- /dev/null
+++ b/arch/mips/mach-octeon/include/mach/octeon_ddr.h
@@ -0,0 +1,982 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * Copyright (C) 2020 Marvell International Ltd.
+ */
+
+#ifndef __OCTEON_DDR_H_
+#define __OCTEON_DDR_H_
+
+#include <env.h>
+#include <linux/compat.h>
+#include <linux/delay.h>
+#include <linux/io.h>
+#include <mach/octeon-model.h>
+#include <mach/cvmx/cvmx-lmcx-defs.h>
+
+/* Mapping is done starting from 0x11800.80000000 */
+#define CVMX_L2C_CTL		0x00800000
+#define CVMX_L2C_BIG_CTL	0x00800030
+#define CVMX_L2C_TADX_INT(i)	(0x00a00028 + (((i) & 7) * 0x40000))
+#define CVMX_L2C_MCIX_INT(i)	(0x00c00028 + (((i) & 3) * 0x40000))
+
+/* Some "external" (non-LMC) registers */
+#define CVMX_IPD_CLK_COUNT		0x00014F0000000338
+#define CVMX_FPA_CLK_COUNT		0x00012800000000F0
+
+#define CVMX_NODE_MEM_SHIFT	40
+
+#define DDR_INTERFACE_MAX	4
+
+/* Private data struct */
+struct ddr_priv {
+	void __iomem *lmc_base;
+	void __iomem *l2c_base;
+
+	bool ddr_clock_initialized[DDR_INTERFACE_MAX];
+	bool ddr_memory_preserved;
+	u32 flags;
+
+	struct ram_info info;
+};
+
+/* Short cut to convert a number to megabytes */
+#define MB(X)			((u64)(X) * (u64)(1024 * 1024))
+
+#define octeon_is_cpuid(x)	(__OCTEON_IS_MODEL_COMPILE__(x, read_c0_prid()))
+
+#define strtoull		simple_strtoull
+
+/* Access LMC registers */
+static inline u64 lmc_rd(struct ddr_priv *priv, u64 addr)
+{
+	return ioread64(priv->lmc_base + addr);
+}
+
+static inline void lmc_wr(struct ddr_priv *priv, u64 addr, u64 val)
+{
+	iowrite64(val, priv->lmc_base + addr);
+}
+
+/* Access L2C registers */
+static inline u64 l2c_rd(struct ddr_priv *priv, u64 addr)
+{
+	return ioread64(priv->l2c_base + addr);
+}
+
+static inline void l2c_wr(struct ddr_priv *priv, u64 addr, u64 val)
+{
+	iowrite64(val, priv->l2c_base + addr);
+}
+
+/* Access other CSR registers not located inside the LMC address space */
+static inline u64 csr_rd(u64 addr)
+{
+	void __iomem *base;
+
+	base = ioremap_nocache(addr, 0x100);
+	return ioread64(base);
+}
+
+static inline void csr_wr(u64 addr, u64 val)
+{
+	void __iomem *base;
+
+	base = ioremap_nocache(addr, 0x100);
+	return iowrite64(val, base);
+}
+
+/* "Normal" access, without any offsets and/or mapping */
+static inline u64 cvmx_read64_uint64(u64 addr)
+{
+	return readq((void *)addr);
+}
+
+static inline void cvmx_write64_uint64(u64 addr, u64 val)
+{
+	writeq(val, (void *)addr);
+}
+
+/* Failsafe mode */
+#define FLAG_FAILSAFE_MODE		0x01000
+/* Note that the DDR clock initialized flags must be contiguous */
+/* Clock for DDR 0 initialized */
+#define FLAG_DDR0_CLK_INITIALIZED	0x02000
+/* Clock for DDR 1 initialized */
+#define FLAG_DDR1_CLK_INITIALIZED	0x04000
+/* Clock for DDR 2 initialized */
+#define FLAG_DDR2_CLK_INITIALIZED	0x08000
+/* Clock for DDR 3 initialized */
+#define FLAG_DDR3_CLK_INITIALIZED	0x10000
+/* Loaded into RAM externally */
+#define FLAG_RAM_RESIDENT		0x20000
+/* Verbose DDR information */
+#define FLAG_DDR_VERBOSE		0x40000
+/* Check env. for DDR variables */
+#define FLAG_DDR_DEBUG			0x80000
+#define FLAG_DDR_TRACE_INIT		0x100000
+#define FLAG_MEMORY_PRESERVED		0x200000
+#define FLAG_DFM_VERBOSE		0x400000
+#define FLAG_DFM_TRACE_INIT		0x800000
+/* DFM memory clock initialized */
+#define FLAG_DFM_CLK_INITIALIZED	0x1000000
+/* EEPROM clock descr. missing */
+#define FLAG_CLOCK_DESC_MISSING		0x2000000
+/* EEPROM board descr. missing */
+#define FLAG_BOARD_DESC_MISSING		0x4000000
+#define FLAG_DDR_PROMPT			0x8000000
+
+#ifndef DDR_NO_DEBUG
+static inline int ddr_verbose(struct ddr_priv *priv)
+{
+	return !!(priv->flags & FLAG_DDR_VERBOSE);
+}
+
+static inline char *ddr_getenv_debug(struct ddr_priv *priv, char *name)
+{
+	if (priv->flags & FLAG_FAILSAFE_MODE)
+		return NULL;
+
+	if (priv->flags & FLAG_DDR_DEBUG)
+		return env_get(name);
+
+	return NULL;
+}
+#else
+static inline int ddr_verbose(void)
+{
+	return 0;
+}
+#endif
+
+/* turn the variable name into a string */
+#define CVMX_TMP_STR(x) CVMX_TMP_STR2(x)
+#define CVMX_TMP_STR2(x) #x
+
+#define CVMX_SYNC asm volatile ("sync" : : : "memory")
+
+#define CVMX_CACHE(op, address, offset)					\
+	asm volatile ("cache " CVMX_TMP_STR(op) ", "			\
+		      CVMX_TMP_STR(offset) "(%[rbase])"			\
+		      : : [rbase] "d" (address))
+
+/* unlock the state */
+#define CVMX_CACHE_WBIL2(address, offset)	\
+	CVMX_CACHE(23, address, offset)
+
+/* complete prefetches, invalidate entire dcache */
+#define CVMX_DCACHE_INVALIDATE					\
+	{ CVMX_SYNC; asm volatile ("cache 9, 0($0)" : : ); }
+
+/**
+ * cvmx_l2c_cfg
+ *
+ * Specify the RSL base addresses for the block
+ *
+ *                  L2C_CFG = L2C Configuration
+ *
+ * Description:
+ */
+union cvmx_l2c_cfg {
+	u64 u64;
+	struct cvmx_l2c_cfg_s {
+		uint64_t reserved_20_63:44;
+		uint64_t bstrun:1;
+		uint64_t lbist:1;
+		uint64_t xor_bank:1;
+		uint64_t dpres1:1;
+		uint64_t dpres0:1;
+		uint64_t dfill_dis:1;
+		uint64_t fpexp:4;
+		uint64_t fpempty:1;
+		uint64_t fpen:1;
+		uint64_t idxalias:1;
+		uint64_t mwf_crd:4;
+		uint64_t rsp_arb_mode:1;
+		uint64_t rfb_arb_mode:1;
+		uint64_t lrf_arb_mode:1;
+	} s;
+};
+
+/**
+ * cvmx_l2c_ctl
+ *
+ * L2C_CTL = L2C Control
+ *
+ *
+ * Notes:
+ * (1) If MAXVAB is != 0, VAB_THRESH should be less than MAXVAB.
+ *
+ * (2) L2DFDBE and L2DFSBE allows software to generate L2DSBE, L2DDBE, VBFSBE,
+ * and VBFDBE errors for the purposes of testing error handling code.  When
+ * one (or both) of these bits are set a PL2 which misses in the L2 will fill
+ * with the appropriate error in the first 2 OWs of the fill. Software can
+ * determine which OW pair gets the error by choosing the desired fill order
+ * (address<6:5>).  A PL2 which hits in the L2 will not inject any errors.
+ * Therefore sending a WBIL2 prior to the PL2 is recommended to make a miss
+ * likely (if multiple processors are involved software must be careful to be
+ * sure no other processor or IO device can bring the block into the L2).
+ *
+ * To generate a VBFSBE or VBFDBE, software must first get the cache block
+ * into the cache with an error using a PL2 which misses the L2.  Then a
+ * store partial to a portion of the cache block without the error must
+ * change the block to dirty.  Then, a subsequent WBL2/WBIL2/victim will
+ * trigger the VBFSBE/VBFDBE error.
+ */
+union cvmx_l2c_ctl {
+	u64 u64;
+	struct cvmx_l2c_ctl_s {
+		uint64_t reserved_29_63:35;
+		uint64_t rdf_fast:1;
+		uint64_t disstgl2i:1;
+		uint64_t l2dfsbe:1;
+		uint64_t l2dfdbe:1;
+		uint64_t discclk:1;
+		uint64_t maxvab:4;
+		uint64_t maxlfb:4;
+		uint64_t rsp_arb_mode:1;
+		uint64_t xmc_arb_mode:1;
+		uint64_t reserved_2_13:12;
+		uint64_t disecc:1;
+		uint64_t disidxalias:1;
+	} s;
+
+	struct cvmx_l2c_ctl_cn73xx {
+		uint64_t reserved_32_63:32;
+		uint64_t ocla_qos:3;
+		uint64_t reserved_28_28:1;
+		uint64_t disstgl2i:1;
+		uint64_t reserved_25_26:2;
+		uint64_t discclk:1;
+		uint64_t reserved_16_23:8;
+		uint64_t rsp_arb_mode:1;
+		uint64_t xmc_arb_mode:1;
+		uint64_t rdf_cnt:8;
+		uint64_t reserved_4_5:2;
+		uint64_t disldwb:1;
+		uint64_t dissblkdty:1;
+		uint64_t disecc:1;
+		uint64_t disidxalias:1;
+	} cn73xx;
+
+	struct cvmx_l2c_ctl_cn73xx cn78xx;
+};
+
+/**
+ * cvmx_l2c_big_ctl
+ *
+ * L2C_BIG_CTL = L2C Big memory control register
+ *
+ *
+ * Notes:
+ * (1) BIGRD interrupts can occur during normal operation as the PP's are
+ * allowed to prefetch to non-existent memory locations.  Therefore,
+ * BIGRD is for informational purposes only.
+ *
+ * (2) When HOLEWR/BIGWR blocks a store L2C_VER_ID, L2C_VER_PP, L2C_VER_IOB,
+ * and L2C_VER_MSC will be loaded just like a store which is blocked by VRTWR.
+ * Additionally, L2C_ERR_XMC will be loaded.
+ */
+union cvmx_l2c_big_ctl {
+	u64 u64;
+	struct cvmx_l2c_big_ctl_s {
+		uint64_t reserved_8_63:56;
+		uint64_t maxdram:4;
+		uint64_t reserved_0_3:4;
+	} s;
+	struct cvmx_l2c_big_ctl_cn61xx {
+		uint64_t reserved_8_63:56;
+		uint64_t maxdram:4;
+		uint64_t reserved_1_3:3;
+		uint64_t disable:1;
+	} cn61xx;
+	struct cvmx_l2c_big_ctl_cn61xx cn63xx;
+	struct cvmx_l2c_big_ctl_cn61xx cn66xx;
+	struct cvmx_l2c_big_ctl_cn61xx cn68xx;
+	struct cvmx_l2c_big_ctl_cn61xx cn68xxp1;
+	struct cvmx_l2c_big_ctl_cn70xx {
+		uint64_t reserved_8_63:56;
+		uint64_t maxdram:4;
+		uint64_t reserved_1_3:3;
+		uint64_t disbig:1;
+	} cn70xx;
+	struct cvmx_l2c_big_ctl_cn70xx cn70xxp1;
+	struct cvmx_l2c_big_ctl_cn70xx cn73xx;
+	struct cvmx_l2c_big_ctl_cn70xx cn78xx;
+	struct cvmx_l2c_big_ctl_cn70xx cn78xxp1;
+	struct cvmx_l2c_big_ctl_cn61xx cnf71xx;
+	struct cvmx_l2c_big_ctl_cn70xx cnf75xx;
+};
+
+struct rlevel_byte_data {
+	int delay;
+	int loop_total;
+	int loop_count;
+	int best;
+	u64 bm;
+	int bmerrs;
+	int sqerrs;
+	int bestsq;
+};
+
+#define DEBUG_VALIDATE_BITMASK 0
+#if DEBUG_VALIDATE_BITMASK
+#define debug_bitmask_print printf
+#else
+#define debug_bitmask_print(...)
+#endif
+
+#define RLEVEL_BITMASK_TRAILING_BITS_ERROR      5
+// FIXME? now less than TOOLONG
+#define RLEVEL_BITMASK_BUBBLE_BITS_ERROR        11
+#define RLEVEL_BITMASK_NARROW_ERROR             6
+#define RLEVEL_BITMASK_BLANK_ERROR              100
+#define RLEVEL_BITMASK_TOOLONG_ERROR            12
+#define RLEVEL_NONSEQUENTIAL_DELAY_ERROR        50
+#define RLEVEL_ADJACENT_DELAY_ERROR             30
+
+/*
+ * Apply a filter to the BITMASK results returned from Octeon
+ * read-leveling to determine the most likely delay result.  This
+ * computed delay may be used to qualify the delay result returned by
+ * Octeon. Accumulate an error penalty for invalid characteristics of
+ * the bitmask so that they can be used to select the most reliable
+ * results.
+ *
+ * The algorithm searches for the largest contiguous MASK within a
+ * maximum RANGE of bits beginning with the MSB.
+ *
+ * 1. a MASK with a WIDTH less than 4 will be penalized
+ * 2. Bubbles in the bitmask that occur before or after the MASK
+ *    will be penalized
+ * 3. If there are no trailing bubbles then extra bits that occur
+ *    beyond the maximum RANGE will be penalized.
+ *
+ *   +++++++++++++++++++++++++++++++++++++++++++++++++++
+ *   +                                                 +
+ *   +   e.g. bitmask = 27B00                          +
+ *   +                                                 +
+ *   +   63                  +--- mstart           0   +
+ *   +   |                   |                     |   +
+ *   +   |         +---------+     +--- fb         |   +
+ *   +   |         |  range  |     |               |   +
+ *   +   V         V         V     V               V   +
+ *   +                                                 +
+ *   +   0 0 ... 1 0 0 1 1 1 1 0 1 1 0 0 0 0 0 0 0 0   +
+ *   +                                                 +
+ *   +           ^     ^     ^                         +
+ *   +           |     | mask|                         +
+ *   +     lb ---+     +-----+                         +
+ *   +                  width                          +
+ *   +                                                 +
+ *   +++++++++++++++++++++++++++++++++++++++++++++++++++
+ */
+
+struct rlevel_bitmask {
+	u64 bm;
+	u8 mstart;
+	u8 width;
+	int errs;
+};
+
+#define MASKRANGE_BITS	6
+#define MASKRANGE	((1 << MASKRANGE_BITS) - 1)
+
+/* data field addresses in the DDR2 SPD eeprom */
+enum ddr2_spd_addrs {
+	DDR2_SPD_BYTES_PROGRAMMED	= 0,
+	DDR2_SPD_TOTAL_BYTES		= 1,
+	DDR2_SPD_MEM_TYPE		= 2,
+	DDR2_SPD_NUM_ROW_BITS		= 3,
+	DDR2_SPD_NUM_COL_BITS		= 4,
+	DDR2_SPD_NUM_RANKS		= 5,
+	DDR2_SPD_CYCLE_CLX		= 9,
+	DDR2_SPD_CONFIG_TYPE		= 11,
+	DDR2_SPD_REFRESH		= 12,
+	DDR2_SPD_SDRAM_WIDTH		= 13,
+	DDR2_SPD_BURST_LENGTH		= 16,
+	DDR2_SPD_NUM_BANKS		= 17,
+	DDR2_SPD_CAS_LATENCY		= 18,
+	DDR2_SPD_DIMM_TYPE		= 20,
+	DDR2_SPD_CYCLE_CLX1		= 23,
+	DDR2_SPD_CYCLE_CLX2		= 25,
+	DDR2_SPD_TRP			= 27,
+	DDR2_SPD_TRRD			= 28,
+	DDR2_SPD_TRCD			= 29,
+	DDR2_SPD_TRAS			= 30,
+	DDR2_SPD_TWR			= 36,
+	DDR2_SPD_TWTR			= 37,
+	DDR2_SPD_TRFC_EXT		= 40,
+	DDR2_SPD_TRFC			= 42,
+	DDR2_SPD_CHECKSUM		= 63,
+	DDR2_SPD_MFR_ID			= 64
+};
+
+/* data field addresses in the DDR2 SPD eeprom */
+enum ddr3_spd_addrs {
+	DDR3_SPD_BYTES_PROGRAMMED			=  0,
+	DDR3_SPD_REVISION				=  1,
+	DDR3_SPD_KEY_BYTE_DEVICE_TYPE			=  2,
+	DDR3_SPD_KEY_BYTE_MODULE_TYPE			=  3,
+	DDR3_SPD_DENSITY_BANKS				=  4,
+	DDR3_SPD_ADDRESSING_ROW_COL_BITS		=  5,
+	DDR3_SPD_NOMINAL_VOLTAGE			=  6,
+	DDR3_SPD_MODULE_ORGANIZATION			=  7,
+	DDR3_SPD_MEMORY_BUS_WIDTH			=  8,
+	DDR3_SPD_FINE_TIMEBASE_DIVIDEND_DIVISOR		=  9,
+	DDR3_SPD_MEDIUM_TIMEBASE_DIVIDEND		= 10,
+	DDR3_SPD_MEDIUM_TIMEBASE_DIVISOR		= 11,
+	DDR3_SPD_MINIMUM_CYCLE_TIME_TCKMIN		= 12,
+	DDR3_SPD_CAS_LATENCIES_LSB			= 14,
+	DDR3_SPD_CAS_LATENCIES_MSB			= 15,
+	DDR3_SPD_MIN_CAS_LATENCY_TAAMIN			= 16,
+	DDR3_SPD_MIN_WRITE_RECOVERY_TWRMIN		= 17,
+	DDR3_SPD_MIN_RAS_CAS_DELAY_TRCDMIN		= 18,
+	DDR3_SPD_MIN_ROW_ACTIVE_DELAY_TRRDMIN		= 19,
+	DDR3_SPD_MIN_ROW_PRECHARGE_DELAY_TRPMIN		= 20,
+	DDR3_SPD_UPPER_NIBBLES_TRAS_TRC			= 21,
+	DDR3_SPD_MIN_ACTIVE_PRECHARGE_LSB_TRASMIN	= 22,
+	DDR3_SPD_MIN_ACTIVE_REFRESH_LSB_TRCMIN		= 23,
+	DDR3_SPD_MIN_REFRESH_RECOVERY_LSB_TRFCMIN	= 24,
+	DDR3_SPD_MIN_REFRESH_RECOVERY_MSB_TRFCMIN       = 25,
+	DDR3_SPD_MIN_INTERNAL_WRITE_READ_CMD_TWTRMIN    = 26,
+	DDR3_SPD_MIN_INTERNAL_READ_PRECHARGE_CMD_TRTPMIN = 27,
+	DDR3_SPD_UPPER_NIBBLE_TFAW                      = 28,
+	DDR3_SPD_MIN_FOUR_ACTIVE_WINDOW_TFAWMIN         = 29,
+	DDR3_SPD_SDRAM_OPTIONAL_FEATURES		= 30,
+	DDR3_SPD_SDRAM_THERMAL_REFRESH_OPTIONS		= 31,
+	DDR3_SPD_MODULE_THERMAL_SENSOR			= 32,
+	DDR3_SPD_SDRAM_DEVICE_TYPE			= 33,
+	DDR3_SPD_MINIMUM_CYCLE_TIME_FINE_TCKMIN		= 34,
+	DDR3_SPD_MIN_CAS_LATENCY_FINE_TAAMIN		= 35,
+	DDR3_SPD_MIN_RAS_CAS_DELAY_FINE_TRCDMIN		= 36,
+	DDR3_SPD_MIN_ROW_PRECHARGE_DELAY_FINE_TRPMIN	= 37,
+	DDR3_SPD_MIN_ACTIVE_REFRESH_LSB_FINE_TRCMIN	= 38,
+	DDR3_SPD_REFERENCE_RAW_CARD                     = 62,
+	DDR3_SPD_ADDRESS_MAPPING                        = 63,
+	DDR3_SPD_REGISTER_MANUFACTURER_ID_LSB		= 65,
+	DDR3_SPD_REGISTER_MANUFACTURER_ID_MSB		= 66,
+	DDR3_SPD_REGISTER_REVISION_NUMBER		= 67,
+	DDR3_SPD_MODULE_SERIAL_NUMBER                   = 122,
+	DDR3_SPD_CYCLICAL_REDUNDANCY_CODE_LOWER_NIBBLE  = 126,
+	DDR3_SPD_CYCLICAL_REDUNDANCY_CODE_UPPER_NIBBLE  = 127,
+	DDR3_SPD_MODULE_PART_NUMBER                     = 128
+};
+
+/* data field addresses in the DDR4 SPD eeprom */
+enum ddr4_spd_addrs {
+	DDR4_SPD_BYTES_PROGRAMMED			=  0,
+	DDR4_SPD_REVISION				=  1,
+	DDR4_SPD_KEY_BYTE_DEVICE_TYPE			=  2,
+	DDR4_SPD_KEY_BYTE_MODULE_TYPE			=  3,
+	DDR4_SPD_DENSITY_BANKS				=  4,
+	DDR4_SPD_ADDRESSING_ROW_COL_BITS		=  5,
+	DDR4_SPD_PACKAGE_TYPE				=  6,
+	DDR4_SPD_OPTIONAL_FEATURES			=  7,
+	DDR4_SPD_THERMAL_REFRESH_OPTIONS		=  8,
+	DDR4_SPD_OTHER_OPTIONAL_FEATURES		=  9,
+	DDR4_SPD_SECONDARY_PACKAGE_TYPE			= 10,
+	DDR4_SPD_MODULE_NOMINAL_VOLTAGE			= 11,
+	DDR4_SPD_MODULE_ORGANIZATION			= 12,
+	DDR4_SPD_MODULE_MEMORY_BUS_WIDTH		= 13,
+	DDR4_SPD_MODULE_THERMAL_SENSOR			= 14,
+	DDR4_SPD_RESERVED_BYTE15			= 15,
+	DDR4_SPD_RESERVED_BYTE16			= 16,
+	DDR4_SPD_TIMEBASES				= 17,
+	DDR4_SPD_MINIMUM_CYCLE_TIME_TCKAVGMIN		= 18,
+	DDR4_SPD_MAXIMUM_CYCLE_TIME_TCKAVGMAX		= 19,
+	DDR4_SPD_CAS_LATENCIES_BYTE0			= 20,
+	DDR4_SPD_CAS_LATENCIES_BYTE1			= 21,
+	DDR4_SPD_CAS_LATENCIES_BYTE2			= 22,
+	DDR4_SPD_CAS_LATENCIES_BYTE3			= 23,
+	DDR4_SPD_MIN_CAS_LATENCY_TAAMIN			= 24,
+	DDR4_SPD_MIN_RAS_CAS_DELAY_TRCDMIN		= 25,
+	DDR4_SPD_MIN_ROW_PRECHARGE_DELAY_TRPMIN		= 26,
+	DDR4_SPD_UPPER_NIBBLES_TRAS_TRC			= 27,
+	DDR4_SPD_MIN_ACTIVE_PRECHARGE_LSB_TRASMIN	= 28,
+	DDR4_SPD_MIN_ACTIVE_REFRESH_LSB_TRCMIN		= 29,
+	DDR4_SPD_MIN_REFRESH_RECOVERY_LSB_TRFC1MIN	= 30,
+	DDR4_SPD_MIN_REFRESH_RECOVERY_MSB_TRFC1MIN      = 31,
+	DDR4_SPD_MIN_REFRESH_RECOVERY_LSB_TRFC2MIN	= 32,
+	DDR4_SPD_MIN_REFRESH_RECOVERY_MSB_TRFC2MIN      = 33,
+	DDR4_SPD_MIN_REFRESH_RECOVERY_LSB_TRFC4MIN	= 34,
+	DDR4_SPD_MIN_REFRESH_RECOVERY_MSB_TRFC4MIN      = 35,
+	DDR4_SPD_MIN_FOUR_ACTIVE_WINDOW_MSN_TFAWMIN     = 36,
+	DDR4_SPD_MIN_FOUR_ACTIVE_WINDOW_LSB_TFAWMIN     = 37,
+	DDR4_SPD_MIN_ROW_ACTIVE_DELAY_SAME_TRRD_SMIN	= 38,
+	DDR4_SPD_MIN_ROW_ACTIVE_DELAY_DIFF_TRRD_LMIN	= 39,
+	DDR4_SPD_MIN_CAS_TO_CAS_DELAY_TCCD_LMIN		= 40,
+	DDR4_SPD_MIN_CAS_TO_CAS_DELAY_FINE_TCCD_LMIN	= 117,
+	DDR4_SPD_MIN_ACT_TO_ACT_DELAY_SAME_FINE_TRRD_LMIN = 118,
+	DDR4_SPD_MIN_ACT_TO_ACT_DELAY_DIFF_FINE_TRRD_SMIN = 119,
+	DDR4_SPD_MIN_ACT_TO_ACT_REFRESH_DELAY_FINE_TRCMIN = 120,
+	DDR4_SPD_MIN_ROW_PRECHARGE_DELAY_FINE_TRPMIN	= 121,
+	DDR4_SPD_MIN_RAS_TO_CAS_DELAY_FINE_TRCDMIN	= 122,
+	DDR4_SPD_MIN_CAS_LATENCY_FINE_TAAMIN		= 123,
+	DDR4_SPD_MAX_CYCLE_TIME_FINE_TCKAVGMAX		= 124,
+	DDR4_SPD_MIN_CYCLE_TIME_FINE_TCKAVGMIN		= 125,
+	DDR4_SPD_CYCLICAL_REDUNDANCY_CODE_LOWER_NIBBLE  = 126,
+	DDR4_SPD_CYCLICAL_REDUNDANCY_CODE_UPPER_NIBBLE  = 127,
+	DDR4_SPD_REFERENCE_RAW_CARD			= 130,
+	DDR4_SPD_UDIMM_ADDR_MAPPING_FROM_EDGE		= 131,
+	DDR4_SPD_REGISTER_MANUFACTURER_ID_LSB		= 133,
+	DDR4_SPD_REGISTER_MANUFACTURER_ID_MSB		= 134,
+	DDR4_SPD_REGISTER_REVISION_NUMBER		= 135,
+	DDR4_SPD_RDIMM_ADDR_MAPPING_FROM_REGISTER_TO_DRAM = 136,
+	DDR4_SPD_RDIMM_REGISTER_DRIVE_STRENGTH_CTL	= 137,
+	DDR4_SPD_RDIMM_REGISTER_DRIVE_STRENGTH_CK	= 138,
+};
+
+#define SPD_EEPROM_SIZE		(DDR4_SPD_RDIMM_REGISTER_DRIVE_STRENGTH_CK + 1)
+
+struct impedence_values {
+	unsigned char *rodt_ohms;
+	unsigned char *rtt_nom_ohms;
+	unsigned char *rtt_nom_table;
+	unsigned char *rtt_wr_ohms;
+	unsigned char *dic_ohms;
+	short *drive_strength;
+	short *dqx_strength;
+};
+
+#define RODT_OHMS_COUNT        8
+#define RTT_NOM_OHMS_COUNT     8
+#define RTT_NOM_TABLE_COUNT    8
+#define RTT_WR_OHMS_COUNT      8
+#define DIC_OHMS_COUNT         3
+#define DRIVE_STRENGTH_COUNT  15
+
+/*
+ * Structure that provides DIMM information, either in the form of an SPD
+ * TWSI address, or a pointer to an array that contains SPD data. One of
+ * the two fields must be valid.
+ */
+struct dimm_config {
+	u16 spd_addrs[2]; /* TWSI address of SPD, 0 if not used */
+	u8 *spd_ptrs[2]; /* pointer to SPD data array, NULL if not used */
+	int spd_cached[2];
+	u8 spd_data[2][SPD_EEPROM_SIZE];
+};
+
+struct dimm_odt_config {
+	u8 odt_ena;            /* FIX: dqx_ctl for Octeon 3 DDR4 */
+	u64 odt_mask;          /* FIX: wodt_mask for Octeon 3 */
+	union cvmx_lmcx_modereg_params1 modereg_params1;
+	union cvmx_lmcx_modereg_params2 modereg_params2;
+	u8 qs_dic;             /* FIX: rodt_ctl for Octeon 3 */
+	u64 rodt_ctl;          /* FIX: rodt_mask for Octeon 3 */
+	u8 dic;
+};
+
+struct ddr_delay_config {
+	u32 ddr_board_delay;
+	u8 lmc_delay_clk;
+	u8 lmc_delay_cmd;
+	u8 lmc_delay_dq;
+};
+
+/*
+ * The parameters below make up the custom_lmc_config data structure.
+ * This structure is used to customize the way that the LMC DRAM
+ * Controller is configured for a particular board design.
+ *
+ * The HRM describes LMC Read Leveling which supports automatic
+ * selection of per byte-lane delays.  When measuring the read delays
+ * the LMC configuration software sweeps through a range of settings
+ * for LMC0_COMP_CTL2[RODT_CTL], the Octeon II on-die-termination
+ * resistance and LMC0_MODEREG_PARAMS1[RTT_NOM_XX], the DRAM
+ * on-die-termination resistance.  The minimum and maximum parameters
+ * for rtt_nom_idx and rodt_ctl listed below determine the ranges of
+ * ODT settings used for the measurements.  Note that for rtt_nom an
+ * index is used into a sorted table rather than the direct csr setting
+ * in order to optimize the sweep.
+ *
+ * .min_rtt_nom_idx: 1=120ohms, 2=60ohms, 3=40ohms, 4=30ohms, 5=20ohms
+ * .max_rtt_nom_idx: 1=120ohms, 2=60ohms, 3=40ohms, 4=30ohms, 5=20ohms
+ * .min_rodt_ctl: 1=20ohms, 2=30ohms, 3=40ohms, 4=60ohms, 5=120ohms
+ * .max_rodt_ctl: 1=20ohms, 2=30ohms, 3=40ohms, 4=60ohms, 5=120ohms
+ *
+ * The settings below control the Octeon II drive strength for the CK,
+ * ADD/CMD, and DQ/DQS signals.  1=24ohms, 2=26.67ohms, 3=30ohms,
+ * 4=34.3ohms, 5=40ohms, 6=48ohms, 6=60ohms.
+ *
+ * .dqx_ctl: Drive strength control for DDR_DQX/DDR_DQS_X_P/N drivers.
+ * .ck_ctl: Drive strength control for
+ * DDR_CK_X_P/DDR_DIMMX_CSX_L/DDR_DIMMX_ODT_X drivers.
+ * .cmd_ctl: Drive strength control for CMD/A/RESET_L/CKEX drivers.
+ *
+ * The LMC controller software selects the most optimal CAS Latency
+ * that complies with the appropriate SPD values and the frequency
+ * that the DRAMS are being operated.  When operating the DRAMs at
+ * frequencies substantially lower than their rated frequencies it
+ * might be necessary to limit the minimum CAS Latency the LMC
+ * controller software is allowed to select in order to make the DRAM
+ * work reliably.
+ *
+ * .min_cas_latency: Minimum allowed CAS Latency
+ *
+ * The value used for LMC0_RLEVEL_CTL[OFFSET_EN] determine how the
+ * read-leveling information that the Octeon II gathers is interpreted
+ * to determine the per-byte read delays.
+ *
+ * .offset_en: Value used for LMC0_RLEVEL_CTL[OFFSET_EN].
+ * .offset_udimm: Value used for LMC0_RLEVEL_CTL[OFFSET] for UDIMMS.
+ * .offset_rdimm: Value used for LMC0_RLEVEL_CTL[OFFSET] for RDIMMS.
+ *
+ * The LMC configuration software sweeps through a range of ODT
+ * settings while measuring the per-byte read delays.  During those
+ * measurements the software makes an assessment of the quality of the
+ * measurements in order to determine which measurements provide the
+ * most accurate delays.  The automatic settings provide the option to
+ * allow that same assessment to determine the most optimal RODT_CTL
+ * and/or RTT_NOM settings.
+ *
+ * The automatic approach might provide the best means to determine
+ * the settings used for initial poweron of a new design.  However,
+ * the final settings should be determined by board analysis, testing,
+ * and experience.
+ *
+ * .ddr_rtt_nom_auto: 1 means automatically set RTT_NOM value.
+ * .ddr_rodt_ctl_auto: 1 means automatically set RODT_CTL value.
+ *
+ * .rlevel_compute: Enables software interpretation of per-byte read
+ * delays using the measurements collected by the
+ * Octeon II rather than completely relying on the
+ * Octeon II to determine the delays.  1=software
+ * computation is recomended since a more complete
+ * analysis is implemented in software.
+ *
+ * .rlevel_comp_offset: Set to 2 unless instructed differently by Cavium.
+ *
+ * .rlevel_average_loops: Determines the number of times the read-leveling
+ * sequence is run for each rank.  The results is
+ * then averaged across the number of loops. The
+ * default setting is 1.
+ *
+ * .ddr2t_udimm:
+ * .ddr2t_rdimm: Turn on the DDR 2T mode. 2-cycle window for CMD and
+ * address. This mode helps relieve setup time pressure
+ * on the address and command bus. Please refer to
+ * Micron's tech note tn_47_01 titled DDR2-533 Memory
+ * Design Guide for Two Dimm Unbuffered Systems for
+ * physical details.
+ *
+ * .disable_sequential_delay_check: As result of the flyby topology
+ * prescribed in the JEDEC specifications the byte delays should
+ * maintain a consistent increasing or decreasing trend across
+ * the bytes on standard dimms.  This setting can be used disable
+ * that check for unusual circumstances where the check is not
+ * useful.
+ *
+ * .maximum_adjacent_rlevel_delay_increment: An additional sequential
+ * delay check for the delays that result from the flyby
+ * topology. This value specifies the maximum difference between
+ * the delays of adjacent bytes.  A value of 0 disables this
+ * check.
+ *
+ * .fprch2 Front Porch Enable: When set, the turn-off
+ * time for the default DDR_DQ/DQS drivers is FPRCH2 CKs earlier.
+ * 00 = 0 CKs
+ * 01 = 1 CKs
+ * 10 = 2 CKs
+ *
+ * .parity: The parity input signal PAR_IN on each dimm must be
+ * strapped high or low on the board.  This bit is programmed
+ * into LMC0_DIMM_CTL[PARITY] and it must be set to match the
+ * board strapping.  This signal is typically strapped low.
+ *
+ * .mode32b: Enable 32-bit datapath mode.  Set to 1 if only 32 DQ pins
+ * are used. (cn61xx, cn71xx)
+ *
+ * .measured_vref: Set to 1 to measure VREF; set to 0 to compute VREF.
+ *
+ * .dram_connection: Set to 1 if discrete DRAMs; set to 0 if using DIMMs.
+ * This changes the algorithms used to compute VREF.
+ *
+ * .dll_write_offset: FIXME: Add description
+ * .dll_read_offset:  FIXME: Add description
+ */
+
+struct rlevel_table {
+	const char part[20];
+	int speed;
+	u64 rl_rank[4][4];
+};
+
+struct ddr3_custom_config {
+	u8 min_rtt_nom_idx;
+	u8 max_rtt_nom_idx;
+	u8 min_rodt_ctl;
+	u8 max_rodt_ctl;
+	u8 dqx_ctl;
+	u8 ck_ctl;
+	u8 cmd_ctl;
+	u8 ctl_ctl;
+	u8 min_cas_latency;
+	u8 offset_en;
+	u8 offset_udimm;
+	u8 offset_rdimm;
+	u8 rlevel_compute;
+	u8 ddr_rtt_nom_auto;
+	u8 ddr_rodt_ctl_auto;
+	u8 rlevel_comp_offset_udimm;
+	u8 rlevel_comp_offset_rdimm;
+	int8_t ptune_offset;
+	int8_t ntune_offset;
+	u8 rlevel_average_loops;
+	u8 ddr2t_udimm;
+	u8 ddr2t_rdimm;
+	u8 disable_sequential_delay_check;
+	u8 maximum_adjacent_rlevel_delay_increment;
+	u8 parity;
+	u8 fprch2;
+	u8 mode32b;
+	u8 measured_vref;
+	u8 dram_connection;
+	const int8_t *dll_write_offset;
+	const int8_t *dll_read_offset;
+	struct rlevel_table *rl_tbl;
+};
+
+#define DDR_CFG_T_MAX_DIMMS     5
+
+struct ddr_conf {
+	struct dimm_config dimm_config_table[DDR_CFG_T_MAX_DIMMS];
+	struct dimm_odt_config odt_1rank_config[4];
+	struct dimm_odt_config odt_2rank_config[4];
+	struct dimm_odt_config odt_4rank_config[4];
+	struct ddr_delay_config unbuffered;
+	struct ddr_delay_config registered;
+	struct ddr3_custom_config custom_lmc_config;
+};
+
+/* Divide and round results to the nearest integer. */
+static inline u64 divide_nint(u64 dividend, u64 divisor)
+{
+	u64 quotent, remainder;
+
+	quotent   = dividend / divisor;
+	remainder = dividend % divisor;
+	return (quotent + ((remainder * 2) >= divisor));
+}
+
+/* Divide and round results up to the next higher integer. */
+static inline u64 divide_roundup(u64 dividend, u64 divisor)
+{
+	return ((dividend + divisor - 1) / divisor);
+}
+
+enum ddr_type {
+	DDR3_DRAM = 3,
+	DDR4_DRAM = 4,
+};
+
+#define rttnom_none   0         /* Rtt_Nom disabled */
+#define rttnom_60ohm  1         /* RZQ/4  = 240/4  =  60 ohms */
+#define rttnom_120ohm 2         /* RZQ/2  = 240/2  = 120 ohms */
+#define rttnom_40ohm  3         /* RZQ/6  = 240/6  =  40 ohms */
+#define rttnom_20ohm  4         /* RZQ/12 = 240/12 =  20 ohms */
+#define rttnom_30ohm  5         /* RZQ/8  = 240/8  =  30 ohms */
+#define rttnom_rsrv1  6         /* Reserved */
+#define rttnom_rsrv2  7         /* Reserved */
+
+#define rttwr_none    0         /* Dynamic ODT off */
+#define rttwr_60ohm   1         /* RZQ/4  = 240/4  =  60 ohms */
+#define rttwr_120ohm  2         /* RZQ/2  = 240/2  = 120 ohms */
+#define rttwr_rsrv1   3         /* Reserved */
+
+#define dic_40ohm     0         /* RZQ/6  = 240/6  =  40 ohms */
+#define dic_34ohm     1         /* RZQ/7  = 240/7  =  34 ohms */
+
+#define driver_24_ohm   1
+#define driver_27_ohm   2
+#define driver_30_ohm   3
+#define driver_34_ohm   4
+#define driver_40_ohm   5
+#define driver_48_ohm   6
+#define driver_60_ohm   7
+
+#define rodt_ctl_none     0
+#define rodt_ctl_20_ohm   1
+#define rodt_ctl_30_ohm   2
+#define rodt_ctl_40_ohm   3
+#define rodt_ctl_60_ohm   4
+#define rodt_ctl_120_ohm  5
+
+#define ddr4_rttnom_none   0         /* Rtt_Nom disabled */
+#define ddr4_rttnom_60ohm  1         /* RZQ/4  = 240/4  =  60 ohms */
+#define ddr4_rttnom_120ohm 2         /* RZQ/2  = 240/2  = 120 ohms */
+#define ddr4_rttnom_40ohm  3         /* RZQ/6  = 240/6  =  40 ohms */
+#define ddr4_rttnom_240ohm 4         /* RZQ/1  = 240/1  = 240 ohms */
+#define ddr4_rttnom_48ohm  5         /* RZQ/5  = 240/5  =  48 ohms */
+#define ddr4_rttnom_80ohm  6         /* RZQ/3  = 240/3  =  80 ohms */
+#define ddr4_rttnom_34ohm  7         /* RZQ/7  = 240/7  =  34 ohms */
+
+#define ddr4_rttwr_none    0         /* Dynamic ODT off */
+#define ddr4_rttwr_120ohm  1         /* RZQ/2  = 240/2  = 120 ohms */
+#define ddr4_rttwr_240ohm  2         /* RZQ/1  = 240/1  = 240 ohms */
+#define ddr4_rttwr_hiz     3         /* HiZ */
+/* This setting is available for cn78xx pass 2, and cn73xx & cnf75xx pass 1 */
+#define ddr4_rttwr_80ohm   4         /* RZQ/3  = 240/3  =  80 ohms */
+
+#define ddr4_dic_34ohm     0         /* RZQ/7  = 240/7  =  34 ohms */
+#define ddr4_dic_48ohm     1         /* RZQ/5  = 240/5  =  48 ohms */
+
+#define ddr4_rttpark_none   0         /* Rtt_Park disabled */
+#define ddr4_rttpark_60ohm  1         /* RZQ/4  = 240/4  =  60 ohms */
+#define ddr4_rttpark_120ohm 2         /* RZQ/2  = 240/2  = 120 ohms */
+#define ddr4_rttpark_40ohm  3         /* RZQ/6  = 240/6  =  40 ohms */
+#define ddr4_rttpark_240ohm 4         /* RZQ/1  = 240/1  = 240 ohms */
+#define ddr4_rttpark_48ohm  5         /* RZQ/5  = 240/5  =  48 ohms */
+#define ddr4_rttpark_80ohm  6         /* RZQ/3  = 240/3  =  80 ohms */
+#define ddr4_rttpark_34ohm  7         /* RZQ/7  = 240/7  =  34 ohms */
+
+#define ddr4_driver_26_ohm   2
+#define ddr4_driver_30_ohm   3
+#define ddr4_driver_34_ohm   4
+#define ddr4_driver_40_ohm   5
+#define ddr4_driver_48_ohm   6
+
+#define ddr4_dqx_driver_24_ohm   1
+#define ddr4_dqx_driver_27_ohm   2
+#define ddr4_dqx_driver_30_ohm   3
+#define ddr4_dqx_driver_34_ohm   4
+#define ddr4_dqx_driver_40_ohm   5
+#define ddr4_dqx_driver_48_ohm   6
+#define ddr4_dqx_driver_60_ohm   7
+
+#define ddr4_rodt_ctl_none     0
+#define ddr4_rodt_ctl_40_ohm   1
+#define ddr4_rodt_ctl_60_ohm   2
+#define ddr4_rodt_ctl_80_ohm   3
+#define ddr4_rodt_ctl_120_ohm  4
+#define ddr4_rodt_ctl_240_ohm  5
+#define ddr4_rodt_ctl_34_ohm   6
+#define ddr4_rodt_ctl_48_ohm   7
+
+#define DIMM_CONFIG_TERMINATOR	{ {0, 0}, {NULL, NULL} }
+
+#define SET_DDR_DLL_CTL3(field, expr)				\
+	do {							\
+		if (octeon_is_cpuid(OCTEON_CN66XX) ||		\
+		    octeon_is_cpuid(OCTEON_CN63XX))		\
+			ddr_dll_ctl3.cn63xx.field = (expr);	\
+		else if (octeon_is_cpuid(OCTEON_CN68XX) ||      \
+			 octeon_is_cpuid(OCTEON_CN61XX) ||      \
+			 octeon_is_cpuid(OCTEON_CNF71XX))       \
+			ddr_dll_ctl3.cn61xx.field = (expr);	\
+		else if (octeon_is_cpuid(OCTEON_CN70XX) ||	\
+			 octeon_is_cpuid(OCTEON_CN78XX))        \
+			ddr_dll_ctl3.cn70xx.field = (expr);	\
+		else if (octeon_is_cpuid(OCTEON_CN73XX) ||	\
+			 octeon_is_cpuid(OCTEON_CNF75XX))       \
+			ddr_dll_ctl3.cn73xx.field = (expr);	\
+		else                                            \
+			debug("%s(): " #field			\
+			      "not set for unknown chip\n",	\
+			      __func__);			\
+	} while (0)
+
+#define ENCODE_DLL90_BYTE_SEL(byte_sel)					\
+	(octeon_is_cpuid(OCTEON_CN70XX) ? ((9 + 7 - (byte_sel)) % 9) :	\
+	 ((byte_sel) + 1))
+
+/**
+ * If debugging is disabled the ddr_print macro is not compatible
+ * with this macro.
+ */
+# define GET_DDR_DLL_CTL3(field)		\
+	((octeon_is_cpuid(OCTEON_CN66XX) ||	\
+	  octeon_is_cpuid(OCTEON_CN63XX)) ?	\
+	 ddr_dll_ctl3.cn63xx.field :		\
+	 (octeon_is_cpuid(OCTEON_CN68XX) ||	\
+	  octeon_is_cpuid(OCTEON_CN61XX) ||	\
+	  octeon_is_cpuid(OCTEON_CNF71XX)) ?	\
+	 ddr_dll_ctl3.cn61xx.field :		\
+	 (octeon_is_cpuid(OCTEON_CN70XX) ||	\
+	  octeon_is_cpuid(OCTEON_CN78XX)) ?	\
+	 ddr_dll_ctl3.cn70xx.field :		\
+	 (octeon_is_cpuid(OCTEON_CN73XX) ||	\
+	  octeon_is_cpuid(OCTEON_CNF75XX)) ?	\
+	 ddr_dll_ctl3.cn73xx.field : 0)
+
+extern const char *ddr3_dimm_types[];
+extern const char *ddr4_dimm_types[];
+
+extern const struct dimm_odt_config disable_odt_config[];
+
+#define RLEVEL_BYTE_BITS	6
+#define RLEVEL_BYTE_MSK		((1ULL << 6) - 1)
+
+/* Prototypes */
+int get_ddr_type(struct dimm_config *dimm_config, int upper_dimm);
+int get_dimm_module_type(struct dimm_config *dimm_config, int upper_dimm,
+			 int ddr_type);
+int read_spd(struct dimm_config *dimm_config, int dimm_index, int spd_field);
+int read_spd_init(struct dimm_config *dimm_config, int dimm_index);
+void report_dimm(struct dimm_config *dimm_config, int upper_dimm,
+		 int dimm, int if_num);
+int validate_dimm(struct ddr_priv *priv, struct dimm_config *dimm_config,
+		  int dimm_index);
+char *printable_rank_spec(char *buffer, int num_ranks, int dram_width,
+			  int spd_package);
+
+bool ddr_memory_preserved(struct ddr_priv *priv);
+
+int get_wl_rank(union cvmx_lmcx_wlevel_rankx *lmc_wlevel_rank, int byte);
+int get_rl_rank(union cvmx_lmcx_rlevel_rankx *lmc_rlevel_rank, int byte);
+void upd_wl_rank(union cvmx_lmcx_wlevel_rankx *lmc_wlevel_rank, int byte,
+		 int delay);
+void upd_rl_rank(union cvmx_lmcx_rlevel_rankx *lmc_rlevel_rank, int byte,
+		 int delay);
+
+int compute_ddr3_rlevel_delay(u8 mstart, u8 width,
+			      union cvmx_lmcx_rlevel_ctl rlevel_ctl);
+
+int encode_row_lsb_ddr3(int row_lsb);
+int encode_pbank_lsb_ddr3(int pbank_lsb);
+
+int initialize_ddr_clock(struct ddr_priv *priv, struct ddr_conf *ddr_conf,
+			 u32 cpu_hertz, u32 ddr_hertz, u32 ddr_ref_hertz,
+			 int if_num, u32 if_mask);
+
+void process_custom_dll_offsets(struct ddr_priv *priv, int if_num,
+				const char *enable_str,
+				const int8_t *offsets, const char *byte_str,
+				int mode);
+int nonseq_del(struct rlevel_byte_data *rlevel_byte, int start, int end,
+	       int max_adj_delay_inc);
+int roundup_ddr3_wlevel_bitmask(int bitmask);
+
+void oct3_ddr3_seq(struct ddr_priv *priv, int rank_mask, int if_num,
+		   int sequence);
+void ddr_init_seq(struct ddr_priv *priv, int rank_mask, int if_num);
+
+void rlevel_to_wlevel(union cvmx_lmcx_rlevel_rankx *lmc_rlevel_rank,
+		      union cvmx_lmcx_wlevel_rankx *lmc_wlevel_rank, int byte);
+
+int validate_ddr3_rlevel_bitmask(struct rlevel_bitmask *rlevel_bitmask_p,
+				 int ddr_type);
+
+void change_dll_offset_enable(struct ddr_priv *priv, int if_num, int change);
+unsigned short load_dll_offset(struct ddr_priv *priv, int if_num,
+			       int dll_offset_mode,
+			       int byte_offset, int byte);
+
+u64 lmc_ddr3_rl_dbg_read(struct ddr_priv *priv, int if_num, int idx);
+u64 lmc_ddr3_wl_dbg_read(struct ddr_priv *priv, int if_num, int idx);
+
+void cvmx_maybe_tune_node(struct ddr_priv *priv, u32 ddr_speed);
+void cvmx_dbi_switchover(struct ddr_priv *priv);
+
+int init_octeon3_ddr3_interface(struct ddr_priv *priv,
+				struct ddr_conf *ddr_conf,
+				u32 ddr_hertz, u32 cpu_hertz, u32 ddr_ref_hertz,
+				int if_num, u32 if_mask);
+
+char *lookup_env(struct ddr_priv *priv, const char *format, ...);
+char *lookup_env_ull(struct ddr_priv *priv, const char *format, ...);
+
+/* Each board provides a board-specific config table via this function */
+struct ddr_conf *octeon_ddr_conf_table_get(int *count, int *def_ddr_freq);
+
+#endif /* __OCTEON_DDR_H_ */

From e13bb86588b19dde84b4b04d38076b374592a2f8 Mon Sep 17 00:00:00 2001
From: Aaron Williams <awilliams@marvell.com>
Date: Wed, 2 Sep 2020 08:29:06 +0200
Subject: [PATCH 05/27] ram: octeon: Add MIPS Octeon3 DDR4 support (part 1/3)

This Octeon 3 DDR driver is ported from the 2013 Cavium / Marvell U-Boot
repository. It currently supports DDR4 on Octeon 3. It can be later
extended to support also DDR3 and Octeon 2 platforms.

Part 1 adds the base U-Boot RAM driver, which will be instantiated by
the DT based probing.

Signed-off-by: Aaron Williams <awilliams@marvell.com>
Signed-off-by: Stefan Roese <sr@denx.de>
---
 drivers/ram/octeon/octeon_ddr.c | 2728 +++++++++++++++++++++++++++++++
 1 file changed, 2728 insertions(+)
 create mode 100644 drivers/ram/octeon/octeon_ddr.c

diff --git a/drivers/ram/octeon/octeon_ddr.c b/drivers/ram/octeon/octeon_ddr.c
new file mode 100644
index 0000000000..757436b9d3
--- /dev/null
+++ b/drivers/ram/octeon/octeon_ddr.c
@@ -0,0 +1,2728 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Copyright (C) 2020 Marvell International Ltd.
+ */
+
+#include <command.h>
+#include <config.h>
+#include <dm.h>
+#include <hang.h>
+#include <i2c.h>
+#include <ram.h>
+#include <time.h>
+
+#include <asm/sections.h>
+#include <linux/io.h>
+
+#include <mach/octeon_ddr.h>
+
+#define CONFIG_REF_HERTZ	50000000
+
+DECLARE_GLOBAL_DATA_PTR;
+
+/* Sign of an integer */
+static s64 _sign(s64 v)
+{
+	return (v < 0);
+}
+
+#ifndef DDR_NO_DEBUG
+char *lookup_env(struct ddr_priv *priv, const char *format, ...)
+{
+	char *s;
+	unsigned long value;
+	va_list args;
+	char buffer[64];
+
+	va_start(args, format);
+	vsnprintf(buffer, sizeof(buffer), format, args);
+	va_end(args);
+
+	s = ddr_getenv_debug(priv, buffer);
+	if (s) {
+		value = simple_strtoul(s, NULL, 0);
+		printf("Parameter found in environment %s=\"%s\" 0x%lx (%ld)\n",
+		       buffer, s, value, value);
+	}
+
+	return s;
+}
+
+char *lookup_env_ull(struct ddr_priv *priv, const char *format, ...)
+{
+	char *s;
+	u64 value;
+	va_list args;
+	char buffer[64];
+
+	va_start(args, format);
+	vsnprintf(buffer, sizeof(buffer), format, args);
+	va_end(args);
+
+	s = ddr_getenv_debug(priv, buffer);
+	if (s) {
+		value = simple_strtoull(s, NULL, 0);
+		printf("Parameter found in environment. %s = 0x%016llx\n",
+		       buffer, value);
+	}
+
+	return s;
+}
+#else
+char *lookup_env(struct ddr_priv *priv, const char *format, ...)
+{
+	return NULL;
+}
+
+char *lookup_env_ull(struct ddr_priv *priv, const char *format, ...)
+{
+	return NULL;
+}
+#endif
+
+/* Number of L2C Tag-and-data sections (TADs) that are connected to LMC. */
+#define CVMX_L2C_TADS  ((OCTEON_IS_MODEL(OCTEON_CN68XX) ||		\
+			 OCTEON_IS_MODEL(OCTEON_CN73XX) ||		\
+			 OCTEON_IS_MODEL(OCTEON_CNF75XX)) ? 4 :		\
+			(OCTEON_IS_MODEL(OCTEON_CN78XX)) ? 8 : 1)
+
+/* Number of L2C IOBs connected to LMC. */
+#define CVMX_L2C_IOBS  ((OCTEON_IS_MODEL(OCTEON_CN68XX) ||		\
+			 OCTEON_IS_MODEL(OCTEON_CN78XX) ||		\
+			 OCTEON_IS_MODEL(OCTEON_CN73XX) ||		\
+			 OCTEON_IS_MODEL(OCTEON_CNF75XX)) ? 2 : 1)
+
+#define CVMX_L2C_MAX_MEMSZ_ALLOWED (OCTEON_IS_OCTEON2() ?		\
+				    (32 * CVMX_L2C_TADS) :		\
+				    (OCTEON_IS_MODEL(OCTEON_CN70XX) ?	\
+				     512 : (OCTEON_IS_OCTEON3() ? 1024 : 0)))
+
+/**
+ * Initialize the BIG address in L2C+DRAM to generate proper error
+ * on reading/writing to an non-existent memory location.
+ *
+ * @param node      OCX CPU node number
+ * @param mem_size  Amount of DRAM configured in MB.
+ * @param mode      Allow/Disallow reporting errors L2C_INT_SUM[BIGRD,BIGWR].
+ */
+static void cvmx_l2c_set_big_size(struct ddr_priv *priv, u64 mem_size, int mode)
+{
+	if ((OCTEON_IS_OCTEON2() || OCTEON_IS_OCTEON3()) &&
+	    !OCTEON_IS_MODEL(OCTEON_CN63XX_PASS1_X)) {
+		union cvmx_l2c_big_ctl big_ctl;
+		int bits = 0, zero_bits = 0;
+		u64 mem;
+
+		if (mem_size > (CVMX_L2C_MAX_MEMSZ_ALLOWED * 1024ull)) {
+			printf("WARNING: Invalid memory size(%lld) requested, should be <= %lld\n",
+			       mem_size,
+			       (u64)CVMX_L2C_MAX_MEMSZ_ALLOWED * 1024);
+			mem_size = CVMX_L2C_MAX_MEMSZ_ALLOWED * 1024;
+		}
+
+		mem = mem_size;
+		while (mem) {
+			if ((mem & 1) == 0)
+				zero_bits++;
+			bits++;
+			mem >>= 1;
+		}
+
+		if ((bits - zero_bits) != 1 || (bits - 9) <= 0) {
+			printf("ERROR: Invalid DRAM size (%lld) requested, refer to L2C_BIG_CTL[maxdram] for valid options.\n",
+			       mem_size);
+			return;
+		}
+
+		/*
+		 * The BIG/HOLE is logic is not supported in pass1 as per
+		 * Errata L2C-17736
+		 */
+		if (mode == 0 && OCTEON_IS_MODEL(OCTEON_CN78XX_PASS1_X))
+			mode = 1;
+
+		big_ctl.u64 = 0;
+		big_ctl.s.maxdram = bits - 9;
+		big_ctl.cn61xx.disable = mode;
+		l2c_wr(priv, CVMX_L2C_BIG_CTL, big_ctl.u64);
+	}
+}
+
+static u32 octeon3_refclock(u32 alt_refclk, u32 ddr_hertz,
+			    struct dimm_config *dimm_config)
+{
+	u32 ddr_ref_hertz = CONFIG_REF_HERTZ;
+	int ddr_type;
+	int spd_dimm_type;
+
+	debug("%s(%u, %u, %p)\n", __func__, alt_refclk, ddr_hertz, dimm_config);
+
+	/* Octeon 3 case... */
+
+	/* we know whether alternate refclk is always wanted
+	 * we also know already if we want 2133 MT/s
+	 * if alt refclk not always wanted, then probe DDR and
+	 * DIMM type if DDR4 and RDIMMs, then set desired refclk
+	 * to 100MHz, otherwise to default (50MHz)
+	 * depend on ddr_initialize() to do the refclk selection
+	 * and validation/
+	 */
+	if (alt_refclk) {
+		/*
+		 * If alternate refclk was specified, let it override
+		 * everything
+		 */
+		ddr_ref_hertz = alt_refclk * 1000000;
+		printf("%s: DRAM init: %d MHz refclk is REQUESTED ALWAYS\n",
+		       __func__, alt_refclk);
+	} else if (ddr_hertz > 1000000000) {
+		ddr_type = get_ddr_type(dimm_config, 0);
+		spd_dimm_type = get_dimm_module_type(dimm_config, 0, ddr_type);
+
+		debug("ddr type: 0x%x, dimm type: 0x%x\n", ddr_type,
+		      spd_dimm_type);
+		/* Is DDR4 and RDIMM just to be sure. */
+		if (ddr_type == DDR4_DRAM &&
+		    (spd_dimm_type == 1 || spd_dimm_type == 5 ||
+		     spd_dimm_type == 8)) {
+			/* Yes, we require 100MHz refclk, so set it. */
+			ddr_ref_hertz = 100000000;
+			puts("DRAM init: 100 MHz refclk is REQUIRED\n");
+		}
+	}
+
+	debug("%s: speed: %u\n", __func__, ddr_ref_hertz);
+	return ddr_ref_hertz;
+}
+
+int encode_row_lsb_ddr3(int row_lsb)
+{
+	int row_lsb_start = 14;
+
+	/* Decoding for row_lsb        */
+	/* 000: row_lsb = mem_adr[14]  */
+	/* 001: row_lsb = mem_adr[15]  */
+	/* 010: row_lsb = mem_adr[16]  */
+	/* 011: row_lsb = mem_adr[17]  */
+	/* 100: row_lsb = mem_adr[18]  */
+	/* 101: row_lsb = mem_adr[19]  */
+	/* 110: row_lsb = mem_adr[20]  */
+	/* 111: RESERVED               */
+
+	if (octeon_is_cpuid(OCTEON_CN6XXX) ||
+	    octeon_is_cpuid(OCTEON_CNF7XXX) || octeon_is_cpuid(OCTEON_CN7XXX))
+		row_lsb_start = 14;
+	else
+		printf("ERROR: Unsupported Octeon model: 0x%x\n",
+		       read_c0_prid());
+
+	return row_lsb - row_lsb_start;
+}
+
+int encode_pbank_lsb_ddr3(int pbank_lsb)
+{
+	/* Decoding for pbank_lsb                                        */
+	/* 0000:DIMM = mem_adr[28]    / rank = mem_adr[27] (if RANK_ENA) */
+	/* 0001:DIMM = mem_adr[29]    / rank = mem_adr[28]      "        */
+	/* 0010:DIMM = mem_adr[30]    / rank = mem_adr[29]      "        */
+	/* 0011:DIMM = mem_adr[31]    / rank = mem_adr[30]      "        */
+	/* 0100:DIMM = mem_adr[32]    / rank = mem_adr[31]      "        */
+	/* 0101:DIMM = mem_adr[33]    / rank = mem_adr[32]      "        */
+	/* 0110:DIMM = mem_adr[34]    / rank = mem_adr[33]      "        */
+	/* 0111:DIMM = 0              / rank = mem_adr[34]      "        */
+	/* 1000-1111: RESERVED                                           */
+
+	int pbank_lsb_start = 0;
+
+	if (octeon_is_cpuid(OCTEON_CN6XXX) ||
+	    octeon_is_cpuid(OCTEON_CNF7XXX) || octeon_is_cpuid(OCTEON_CN7XXX))
+		pbank_lsb_start = 28;
+	else
+		printf("ERROR: Unsupported Octeon model: 0x%x\n",
+		       read_c0_prid());
+
+	return pbank_lsb - pbank_lsb_start;
+}
+
+static void set_ddr_clock_initialized(struct ddr_priv *priv, int if_num,
+				      bool inited_flag)
+{
+	priv->ddr_clock_initialized[if_num] = inited_flag;
+}
+
+static int ddr_clock_initialized(struct ddr_priv *priv, int if_num)
+{
+	return priv->ddr_clock_initialized[if_num];
+}
+
+static void set_ddr_memory_preserved(struct ddr_priv *priv)
+{
+	priv->ddr_memory_preserved = true;
+}
+
+bool ddr_memory_preserved(struct ddr_priv *priv)
+{
+	return priv->ddr_memory_preserved;
+}
+
+static void cn78xx_lmc_dreset_init(struct ddr_priv *priv, int if_num)
+{
+	union cvmx_lmcx_dll_ctl2 dll_ctl2;
+
+	/*
+	 * The remainder of this section describes the sequence for LMCn.
+	 *
+	 * 1. If not done already, write LMC(0..3)_DLL_CTL2 to its reset value
+	 * (except without changing the LMC(0..3)_DLL_CTL2[INTF_EN] value from
+	 * that set in the prior Step 3), including
+	 * LMC(0..3)_DLL_CTL2[DRESET] = 1.
+	 *
+	 * 2. Without changing any other LMC(0..3)_DLL_CTL2 fields, write
+	 * LMC(0..3)_DLL_CTL2[DLL_BRINGUP] = 1.
+	 */
+
+	dll_ctl2.u64 = lmc_rd(priv, CVMX_LMCX_DLL_CTL2(if_num));
+	dll_ctl2.cn78xx.dll_bringup = 1;
+	lmc_wr(priv, CVMX_LMCX_DLL_CTL2(if_num), dll_ctl2.u64);
+
+	/*
+	 * 3. Read LMC(0..3)_DLL_CTL2 and wait for the result.
+	 */
+
+	lmc_rd(priv, CVMX_LMCX_DLL_CTL2(if_num));
+
+	/*
+	 * 4. Wait for a minimum of 10 LMC CK cycles.
+	 */
+
+	udelay(1);
+
+	/*
+	 * 5. Without changing any other fields in LMC(0..3)_DLL_CTL2, write
+	 * LMC(0..3)_DLL_CTL2[QUAD_DLL_ENA] = 1.
+	 * LMC(0..3)_DLL_CTL2[QUAD_DLL_ENA] must not change after this point
+	 * without restarting the LMCn DRESET initialization sequence.
+	 */
+
+	dll_ctl2.u64 = lmc_rd(priv, CVMX_LMCX_DLL_CTL2(if_num));
+	dll_ctl2.cn78xx.quad_dll_ena = 1;
+	lmc_wr(priv, CVMX_LMCX_DLL_CTL2(if_num), dll_ctl2.u64);
+
+	/*
+	 * 6. Read LMC(0..3)_DLL_CTL2 and wait for the result.
+	 */
+
+	lmc_rd(priv, CVMX_LMCX_DLL_CTL2(if_num));
+
+	/*
+	 * 7. Wait a minimum of 10 us.
+	 */
+
+	udelay(10);
+
+	/*
+	 * 8. Without changing any other fields in LMC(0..3)_DLL_CTL2, write
+	 * LMC(0..3)_DLL_CTL2[DLL_BRINGUP] = 0.
+	 * LMC(0..3)_DLL_CTL2[DLL_BRINGUP] must not change after this point
+	 * without restarting the LMCn DRESET initialization sequence.
+	 */
+
+	dll_ctl2.u64 = lmc_rd(priv, CVMX_LMCX_DLL_CTL2(if_num));
+	dll_ctl2.cn78xx.dll_bringup = 0;
+	lmc_wr(priv, CVMX_LMCX_DLL_CTL2(if_num), dll_ctl2.u64);
+
+	/*
+	 * 9. Read LMC(0..3)_DLL_CTL2 and wait for the result.
+	 */
+
+	lmc_rd(priv, CVMX_LMCX_DLL_CTL2(if_num));
+
+	/*
+	 * 10. Without changing any other fields in LMC(0..3)_DLL_CTL2, write
+	 * LMC(0..3)_DLL_CTL2[DRESET] = 0.
+	 * LMC(0..3)_DLL_CTL2[DRESET] must not change after this point without
+	 * restarting the LMCn DRESET initialization sequence.
+	 *
+	 * After completing LMCn DRESET initialization, all LMC CSRs may be
+	 * accessed.  Prior to completing LMC DRESET initialization, only
+	 * LMC(0..3)_DDR_PLL_CTL, LMC(0..3)_DLL_CTL2, LMC(0..3)_RESET_CTL, and
+	 * LMC(0..3)_COMP_CTL2 LMC CSRs can be accessed.
+	 */
+
+	dll_ctl2.u64 = lmc_rd(priv, CVMX_LMCX_DLL_CTL2(if_num));
+	dll_ctl2.cn78xx.dreset = 0;
+	lmc_wr(priv, CVMX_LMCX_DLL_CTL2(if_num), dll_ctl2.u64);
+}
+
+int initialize_ddr_clock(struct ddr_priv *priv, struct ddr_conf *ddr_conf,
+			 u32 cpu_hertz, u32 ddr_hertz, u32 ddr_ref_hertz,
+			 int if_num, u32 if_mask)
+{
+	char *s;
+
+	if (ddr_clock_initialized(priv, if_num))
+		return 0;
+
+	if (!ddr_clock_initialized(priv, 0)) {	/* Do this once */
+		union cvmx_lmcx_reset_ctl reset_ctl;
+		int i;
+
+		/*
+		 * Check to see if memory is to be preserved and set global
+		 * flag
+		 */
+		for (i = 3; i >= 0; --i) {
+			if ((if_mask & (1 << i)) == 0)
+				continue;
+
+			reset_ctl.u64 = lmc_rd(priv, CVMX_LMCX_RESET_CTL(i));
+			if (reset_ctl.s.ddr3psv == 1) {
+				debug("LMC%d Preserving memory\n", i);
+				set_ddr_memory_preserved(priv);
+
+				/* Re-initialize flags */
+				reset_ctl.s.ddr3pwarm = 0;
+				reset_ctl.s.ddr3psoft = 0;
+				reset_ctl.s.ddr3psv = 0;
+				lmc_wr(priv, CVMX_LMCX_RESET_CTL(i),
+				       reset_ctl.u64);
+			}
+		}
+	}
+
+	/*
+	 * ToDo: Add support for these SoCs:
+	 *
+	 * if (octeon_is_cpuid(OCTEON_CN63XX) ||
+	 * octeon_is_cpuid(OCTEON_CN66XX) ||
+	 * octeon_is_cpuid(OCTEON_CN61XX) || octeon_is_cpuid(OCTEON_CNF71XX))
+	 *
+	 * and
+	 *
+	 * if (octeon_is_cpuid(OCTEON_CN68XX))
+	 *
+	 * and
+	 *
+	 * if (octeon_is_cpuid(OCTEON_CN70XX))
+	 *
+	 */
+
+	if (octeon_is_cpuid(OCTEON_CN78XX) || octeon_is_cpuid(OCTEON_CN73XX) ||
+	    octeon_is_cpuid(OCTEON_CNF75XX)) {
+		union cvmx_lmcx_dll_ctl2 dll_ctl2;
+		union cvmx_lmcx_dll_ctl3 ddr_dll_ctl3;
+		union cvmx_lmcx_ddr_pll_ctl ddr_pll_ctl;
+		struct dimm_config *dimm_config_table =
+			ddr_conf->dimm_config_table;
+		int en_idx, save_en_idx, best_en_idx = 0;
+		u64 clkf, clkr, max_clkf = 127;
+		u64 best_clkf = 0, best_clkr = 0;
+		u64 best_pll_MHz = 0;
+		u64 pll_MHz;
+		u64 min_pll_MHz = 800;
+		u64 max_pll_MHz = 5000;
+		u64 error;
+		u64 best_error;
+		u64 best_calculated_ddr_hertz = 0;
+		u64 calculated_ddr_hertz = 0;
+		u64 orig_ddr_hertz = ddr_hertz;
+		const int _en[] = { 1, 2, 3, 4, 5, 6, 7, 8, 10, 12 };
+		int override_pll_settings;
+		int new_bwadj;
+		int ddr_type;
+		int i;
+
+		/* ddr_type only indicates DDR4 or DDR3 */
+		ddr_type = (read_spd(&dimm_config_table[0], 0,
+				     DDR4_SPD_KEY_BYTE_DEVICE_TYPE) ==
+			    0x0C) ? DDR4_DRAM : DDR3_DRAM;
+
+		/*
+		 * 5.9 LMC Initialization Sequence
+		 *
+		 * There are 13 parts to the LMC initialization procedure:
+		 *
+		 * 1. DDR PLL initialization
+		 *
+		 * 2. LMC CK initialization
+		 *
+		 * 3. LMC interface enable initialization
+		 *
+		 * 4. LMC DRESET initialization
+		 *
+		 * 5. LMC CK local initialization
+		 *
+		 * 6. LMC RESET initialization
+		 *
+		 * 7. Early LMC initialization
+		 *
+		 * 8. LMC offset training
+		 *
+		 * 9. LMC internal Vref training
+		 *
+		 * 10. LMC deskew training
+		 *
+		 * 11. LMC write leveling
+		 *
+		 * 12. LMC read leveling
+		 *
+		 * 13. Final LMC initialization
+		 *
+		 * CN78XX supports two modes:
+		 *
+		 * - two-LMC mode: both LMCs 2/3 must not be enabled
+		 * (LMC2/3_DLL_CTL2[DRESET] must be set to 1 and
+		 * LMC2/3_DLL_CTL2[INTF_EN]
+		 * must be set to 0) and both LMCs 0/1 must be enabled).
+		 *
+		 * - four-LMC mode: all four LMCs 0..3 must be enabled.
+		 *
+		 * Steps 4 and 6..13 should each be performed for each
+		 * enabled LMC (either twice or four times). Steps 1..3 and
+		 * 5 are more global in nature and each must be executed
+		 * exactly once (not once per LMC) each time the DDR PLL
+		 * changes or is first brought up. Steps 1..3 and 5 need
+		 * not be performed if the DDR PLL is stable.
+		 *
+		 * Generally, the steps are performed in order. The exception
+		 * is that the CK local initialization (step 5) must be
+		 * performed after some DRESET initializations (step 4) and
+		 * before other DRESET initializations when the DDR PLL is
+		 * brought up or changed. (The CK local initialization uses
+		 * information from some LMCs to bring up the other local
+		 * CKs.) The following text describes these ordering
+		 * requirements in more detail.
+		 *
+		 * Following any chip reset, the DDR PLL must be brought up,
+		 * and all 13 steps should be executed. Subsequently, it is
+		 * possible to execute only steps 4 and 6..13, or to execute
+		 * only steps 8..13.
+		 *
+		 * The remainder of this section covers these initialization
+		 * steps in sequence.
+		 */
+
+		/* Do the following init only once */
+		if (if_num != 0)
+			goto not_if0;
+
+		/* Only for interface #0 ... */
+
+		/*
+		 * 5.9.3 LMC Interface-Enable Initialization
+		 *
+		 * LMC interface-enable initialization (Step 3) must be#
+		 * performed after Step 2 for each chip reset and whenever
+		 * the DDR clock speed changes. This step needs to be
+		 * performed only once, not once per LMC. Perform the
+		 * following three substeps for the LMC interface-enable
+		 * initialization:
+		 *
+		 * 1. Without changing any other LMC2_DLL_CTL2 fields
+		 * (LMC(0..3)_DLL_CTL2 should be at their reset values after
+		 * Step 1), write LMC2_DLL_CTL2[INTF_EN] = 1 if four-LMC
+		 * mode is desired.
+		 *
+		 * 2. Without changing any other LMC3_DLL_CTL2 fields, write
+		 * LMC3_DLL_CTL2[INTF_EN] = 1 if four-LMC mode is desired.
+		 *
+		 * 3. Read LMC2_DLL_CTL2 and wait for the result.
+		 *
+		 * The LMC2_DLL_CTL2[INTF_EN] and LMC3_DLL_CTL2[INTF_EN]
+		 * values should not be changed by software from this point.
+		 */
+
+		for (i = 0; i < 4; ++i) {
+			if ((if_mask & (1 << i)) == 0)
+				continue;
+
+			dll_ctl2.u64 = lmc_rd(priv, CVMX_LMCX_DLL_CTL2(i));
+
+			dll_ctl2.cn78xx.byp_setting = 0;
+			dll_ctl2.cn78xx.byp_sel = 0;
+			dll_ctl2.cn78xx.quad_dll_ena = 0;
+			dll_ctl2.cn78xx.dreset = 1;
+			dll_ctl2.cn78xx.dll_bringup = 0;
+			dll_ctl2.cn78xx.intf_en = 0;
+
+			lmc_wr(priv, CVMX_LMCX_DLL_CTL2(i), dll_ctl2.u64);
+		}
+
+		/*
+		 * ###### Interface enable (intf_en) deferred until after
+		 * DDR_DIV_RESET=0 #######
+		 */
+
+		/*
+		 * 5.9.1 DDR PLL Initialization
+		 *
+		 * DDR PLL initialization (Step 1) must be performed for each
+		 * chip reset and whenever the DDR clock speed changes. This
+		 * step needs to be performed only once, not once per LMC.
+		 *
+		 * Perform the following eight substeps to initialize the
+		 * DDR PLL:
+		 *
+		 * 1. If not done already, write all fields in
+		 * LMC(0..3)_DDR_PLL_CTL and
+		 * LMC(0..1)_DLL_CTL2 to their reset values, including:
+		 *
+		 * .. LMC0_DDR_PLL_CTL[DDR_DIV_RESET] = 1
+		 * .. LMC0_DLL_CTL2[DRESET] = 1
+		 *
+		 * This substep is not necessary after a chip reset.
+		 *
+		 */
+
+		ddr_pll_ctl.u64 = lmc_rd(priv, CVMX_LMCX_DDR_PLL_CTL(0));
+
+		ddr_pll_ctl.cn78xx.reset_n = 0;
+		ddr_pll_ctl.cn78xx.ddr_div_reset = 1;
+		ddr_pll_ctl.cn78xx.phy_dcok = 0;
+
+		/*
+		 * 73XX pass 1.3 has LMC0 DCLK_INVERT tied to 1; earlier
+		 * 73xx passes are tied to 0
+		 *
+		 * 75XX needs LMC0 DCLK_INVERT set to 1 to minimize duty
+		 * cycle falling points
+		 *
+		 * and we default all other chips LMC0 to DCLK_INVERT=0
+		 */
+		ddr_pll_ctl.cn78xx.dclk_invert =
+		    !!(octeon_is_cpuid(OCTEON_CN73XX_PASS1_3) ||
+		       octeon_is_cpuid(OCTEON_CNF75XX));
+
+		/*
+		 * allow override of LMC0 desired setting for DCLK_INVERT,
+		 * but not on 73XX;
+		 * we cannot change LMC0 DCLK_INVERT on 73XX any pass
+		 */
+		if (!(octeon_is_cpuid(OCTEON_CN73XX))) {
+			s = lookup_env(priv, "ddr0_set_dclk_invert");
+			if (s) {
+				ddr_pll_ctl.cn78xx.dclk_invert =
+				    !!simple_strtoul(s, NULL, 0);
+				debug("LMC0: override DDR_PLL_CTL[dclk_invert] to %d\n",
+				      ddr_pll_ctl.cn78xx.dclk_invert);
+			}
+		}
+
+		lmc_wr(priv, CVMX_LMCX_DDR_PLL_CTL(0), ddr_pll_ctl.u64);
+		debug("%-45s : 0x%016llx\n", "LMC0: DDR_PLL_CTL",
+		      ddr_pll_ctl.u64);
+
+		// only when LMC1 is active
+		if (if_mask & 0x2) {
+			/*
+			 * For CNF75XX, both LMC0 and LMC1 use the same PLL,
+			 * so we use the LMC0 setting of DCLK_INVERT for LMC1.
+			 */
+			if (!octeon_is_cpuid(OCTEON_CNF75XX)) {
+				int override = 0;
+
+				/*
+				 * by default, for non-CNF75XX, we want
+				 * LMC1 toggled LMC0
+				 */
+				int lmc0_dclk_invert =
+				    ddr_pll_ctl.cn78xx.dclk_invert;
+
+				/*
+				 * FIXME: work-around for DDR3 UDIMM problems
+				 * is to use LMC0 setting on LMC1 and if
+				 * 73xx pass 1.3, we want to default LMC1
+				 * DCLK_INVERT to LMC0, not the invert of LMC0
+				 */
+				int lmc1_dclk_invert;
+
+				lmc1_dclk_invert =
+					((ddr_type == DDR4_DRAM) &&
+					 !octeon_is_cpuid(OCTEON_CN73XX_PASS1_3))
+					? lmc0_dclk_invert ^ 1 :
+					lmc0_dclk_invert;
+
+				/*
+				 * allow override of LMC1 desired setting for
+				 * DCLK_INVERT
+				 */
+				s = lookup_env(priv, "ddr1_set_dclk_invert");
+				if (s) {
+					lmc1_dclk_invert =
+						!!simple_strtoul(s, NULL, 0);
+					override = 1;
+				}
+				debug("LMC1: %s DDR_PLL_CTL[dclk_invert] to %d (LMC0 %d)\n",
+				      (override) ? "override" :
+				      "default", lmc1_dclk_invert,
+				      lmc0_dclk_invert);
+
+				ddr_pll_ctl.cn78xx.dclk_invert =
+					lmc1_dclk_invert;
+			}
+
+			// but always write LMC1 CSR if it is active
+			lmc_wr(priv, CVMX_LMCX_DDR_PLL_CTL(1), ddr_pll_ctl.u64);
+			debug("%-45s : 0x%016llx\n",
+			      "LMC1: DDR_PLL_CTL", ddr_pll_ctl.u64);
+		}
+
+		/*
+		 * 2. If the current DRAM contents are not preserved (see
+		 * LMC(0..3)_RESET_ CTL[DDR3PSV]), this is also an appropriate
+		 * time to assert the RESET# pin of the DDR3/DDR4 DRAM parts.
+		 * If desired, write
+		 * LMC0_RESET_ CTL[DDR3RST] = 0 without modifying any other
+		 * LMC0_RESET_CTL fields to assert the DDR_RESET_L pin.
+		 * No action is required here to assert DDR_RESET_L
+		 * following a chip reset. Refer to Section 5.9.6. Do this
+		 * for all enabled LMCs.
+		 */
+
+		for (i = 0; (!ddr_memory_preserved(priv)) && i < 4; ++i) {
+			union cvmx_lmcx_reset_ctl reset_ctl;
+
+			if ((if_mask & (1 << i)) == 0)
+				continue;
+
+			reset_ctl.u64 = lmc_rd(priv, CVMX_LMCX_RESET_CTL(i));
+			reset_ctl.cn78xx.ddr3rst = 0;	/* Reset asserted */
+			debug("LMC%d Asserting DDR_RESET_L\n", i);
+			lmc_wr(priv, CVMX_LMCX_RESET_CTL(i), reset_ctl.u64);
+			lmc_rd(priv, CVMX_LMCX_RESET_CTL(i));
+		}
+
+		/*
+		 * 3. Without changing any other LMC0_DDR_PLL_CTL values,
+		 * write LMC0_DDR_PLL_CTL[CLKF] with a value that gives a
+		 * desired DDR PLL speed. The LMC0_DDR_PLL_CTL[CLKF] value
+		 * should be selected in conjunction with the post-scalar
+		 * divider values for LMC (LMC0_DDR_PLL_CTL[DDR_PS_EN]) so
+		 * that the desired LMC CK speeds are is produced (all
+		 * enabled LMCs must run the same speed). Section 5.14
+		 * describes LMC0_DDR_PLL_CTL[CLKF] and
+		 * LMC0_DDR_PLL_CTL[DDR_PS_EN] programmings that produce
+		 * the desired LMC CK speed. Section 5.9.2 describes LMC CK
+		 * initialization, which can be done separately from the DDR
+		 * PLL initialization described in this section.
+		 *
+		 * The LMC0_DDR_PLL_CTL[CLKF] value must not change after
+		 * this point without restarting this SDRAM PLL
+		 * initialization sequence.
+		 */
+
+		/* Init to max error */
+		error = ddr_hertz;
+		best_error = ddr_hertz;
+
+		debug("DDR Reference Hertz = %d\n", ddr_ref_hertz);
+
+		while (best_error == ddr_hertz) {
+			for (clkr = 0; clkr < 4; ++clkr) {
+				for (en_idx =
+				     sizeof(_en) / sizeof(int) -
+				     1; en_idx >= 0; --en_idx) {
+					save_en_idx = en_idx;
+					clkf =
+					    ((ddr_hertz) *
+					     (clkr + 1) * (_en[save_en_idx]));
+					clkf = divide_nint(clkf, ddr_ref_hertz)
+					    - 1;
+					pll_MHz =
+					    ddr_ref_hertz *
+					    (clkf + 1) / (clkr + 1) / 1000000;
+					calculated_ddr_hertz =
+					    ddr_ref_hertz *
+					    (clkf +
+					     1) / ((clkr +
+						    1) * (_en[save_en_idx]));
+					error =
+					    ddr_hertz - calculated_ddr_hertz;
+
+					if (pll_MHz < min_pll_MHz ||
+					    pll_MHz > max_pll_MHz)
+						continue;
+					if (clkf > max_clkf) {
+						/*
+						 * PLL requires clkf to be
+						 * limited
+						 */
+						continue;
+					}
+					if (abs(error) > abs(best_error))
+						continue;
+
+					debug("clkr: %2llu, en[%d]: %2d, clkf: %4llu, pll_MHz: %4llu, ddr_hertz: %8llu, error: %8lld\n",
+					      clkr, save_en_idx,
+					      _en[save_en_idx], clkf, pll_MHz,
+					     calculated_ddr_hertz, error);
+
+					/* Favor the highest PLL frequency. */
+					if (abs(error) < abs(best_error) ||
+					    pll_MHz > best_pll_MHz) {
+						best_pll_MHz = pll_MHz;
+						best_calculated_ddr_hertz =
+							calculated_ddr_hertz;
+						best_error = error;
+						best_clkr = clkr;
+						best_clkf = clkf;
+						best_en_idx = save_en_idx;
+					}
+				}
+			}
+
+			override_pll_settings = 0;
+
+			s = lookup_env(priv, "ddr_pll_clkr");
+			if (s) {
+				best_clkr = simple_strtoul(s, NULL, 0);
+				override_pll_settings = 1;
+			}
+
+			s = lookup_env(priv, "ddr_pll_clkf");
+			if (s) {
+				best_clkf = simple_strtoul(s, NULL, 0);
+				override_pll_settings = 1;
+			}
+
+			s = lookup_env(priv, "ddr_pll_en_idx");
+			if (s) {
+				best_en_idx = simple_strtoul(s, NULL, 0);
+				override_pll_settings = 1;
+			}
+
+			if (override_pll_settings) {
+				best_pll_MHz =
+				    ddr_ref_hertz * (best_clkf +
+						     1) /
+				    (best_clkr + 1) / 1000000;
+				best_calculated_ddr_hertz =
+				    ddr_ref_hertz * (best_clkf +
+						     1) /
+				    ((best_clkr + 1) * (_en[best_en_idx]));
+				best_error =
+				    ddr_hertz - best_calculated_ddr_hertz;
+			}
+
+			debug("clkr: %2llu, en[%d]: %2d, clkf: %4llu, pll_MHz: %4llu, ddr_hertz: %8llu, error: %8lld <==\n",
+			      best_clkr, best_en_idx, _en[best_en_idx],
+			      best_clkf, best_pll_MHz,
+			      best_calculated_ddr_hertz, best_error);
+
+			/*
+			 * Try lowering the frequency if we can't get a
+			 * working configuration
+			 */
+			if (best_error == ddr_hertz) {
+				if (ddr_hertz < orig_ddr_hertz - 10000000)
+					break;
+				ddr_hertz -= 1000000;
+				best_error = ddr_hertz;
+			}
+		}
+
+		if (best_error == ddr_hertz) {
+			printf("ERROR: Can not compute a legal DDR clock speed configuration.\n");
+			return -1;
+		}
+
+		new_bwadj = (best_clkf + 1) / 10;
+		debug("bwadj: %2d\n", new_bwadj);
+
+		s = lookup_env(priv, "ddr_pll_bwadj");
+		if (s) {
+			new_bwadj = strtoul(s, NULL, 0);
+			debug("bwadj: %2d\n", new_bwadj);
+		}
+
+		for (i = 0; i < 2; ++i) {
+			if ((if_mask & (1 << i)) == 0)
+				continue;
+
+			ddr_pll_ctl.u64 =
+			    lmc_rd(priv, CVMX_LMCX_DDR_PLL_CTL(i));
+			debug("LMC%d: DDR_PLL_CTL                             : 0x%016llx\n",
+			      i, ddr_pll_ctl.u64);
+
+			ddr_pll_ctl.cn78xx.ddr_ps_en = best_en_idx;
+			ddr_pll_ctl.cn78xx.clkf = best_clkf;
+			ddr_pll_ctl.cn78xx.clkr = best_clkr;
+			ddr_pll_ctl.cn78xx.reset_n = 0;
+			ddr_pll_ctl.cn78xx.bwadj = new_bwadj;
+
+			lmc_wr(priv, CVMX_LMCX_DDR_PLL_CTL(i), ddr_pll_ctl.u64);
+			debug("LMC%d: DDR_PLL_CTL                             : 0x%016llx\n",
+			      i, ddr_pll_ctl.u64);
+
+			/*
+			 * For cnf75xx LMC0 and LMC1 use the same PLL so
+			 * only program LMC0 PLL.
+			 */
+			if (octeon_is_cpuid(OCTEON_CNF75XX))
+				break;
+		}
+
+		for (i = 0; i < 4; ++i) {
+			if ((if_mask & (1 << i)) == 0)
+				continue;
+
+			/*
+			 * 4. Read LMC0_DDR_PLL_CTL and wait for the result.
+			 */
+
+			lmc_rd(priv, CVMX_LMCX_DDR_PLL_CTL(i));
+
+			/*
+			 * 5. Wait a minimum of 3 us.
+			 */
+
+			udelay(3);	/* Wait 3 us */
+
+			/*
+			 * 6. Write LMC0_DDR_PLL_CTL[RESET_N] = 1 without
+			 * changing any other LMC0_DDR_PLL_CTL values.
+			 */
+
+			ddr_pll_ctl.u64 =
+			    lmc_rd(priv, CVMX_LMCX_DDR_PLL_CTL(i));
+			ddr_pll_ctl.cn78xx.reset_n = 1;
+			lmc_wr(priv, CVMX_LMCX_DDR_PLL_CTL(i), ddr_pll_ctl.u64);
+
+			/*
+			 * 7. Read LMC0_DDR_PLL_CTL and wait for the result.
+			 */
+
+			lmc_rd(priv, CVMX_LMCX_DDR_PLL_CTL(i));
+
+			/*
+			 * 8. Wait a minimum of 25 us.
+			 */
+
+			udelay(25);	/* Wait 25 us */
+
+			/*
+			 * For cnf75xx LMC0 and LMC1 use the same PLL so
+			 * only program LMC0 PLL.
+			 */
+			if (octeon_is_cpuid(OCTEON_CNF75XX))
+				break;
+		}
+
+		for (i = 0; i < 4; ++i) {
+			if ((if_mask & (1 << i)) == 0)
+				continue;
+
+			/*
+			 * 5.9.2 LMC CK Initialization
+			 *
+			 * DDR PLL initialization must be completed prior to
+			 * starting LMC CK initialization.
+			 *
+			 * Perform the following substeps to initialize the
+			 * LMC CK:
+			 *
+			 * 1. Without changing any other LMC(0..3)_DDR_PLL_CTL
+			 * values, write
+			 * LMC(0..3)_DDR_PLL_CTL[DDR_DIV_RESET] = 1 and
+			 * LMC(0..3)_DDR_PLL_CTL[DDR_PS_EN] with the
+			 * appropriate value to get the desired LMC CK speed.
+			 * Section 5.14 discusses CLKF and DDR_PS_EN
+			 * programmings.  The LMC(0..3)_DDR_PLL_CTL[DDR_PS_EN]
+			 * must not change after this point without restarting
+			 * this LMC CK initialization sequence.
+			 */
+
+			ddr_pll_ctl.u64 = lmc_rd(priv,
+						 CVMX_LMCX_DDR_PLL_CTL(i));
+			ddr_pll_ctl.cn78xx.ddr_div_reset = 1;
+			lmc_wr(priv, CVMX_LMCX_DDR_PLL_CTL(i), ddr_pll_ctl.u64);
+
+			/*
+			 * 2. Without changing any other fields in
+			 * LMC(0..3)_DDR_PLL_CTL, write
+			 * LMC(0..3)_DDR_PLL_CTL[DDR4_MODE] = 0.
+			 */
+
+			ddr_pll_ctl.u64 =
+			    lmc_rd(priv, CVMX_LMCX_DDR_PLL_CTL(i));
+			ddr_pll_ctl.cn78xx.ddr4_mode =
+			    (ddr_type == DDR4_DRAM) ? 1 : 0;
+			lmc_wr(priv, CVMX_LMCX_DDR_PLL_CTL(i), ddr_pll_ctl.u64);
+
+			/*
+			 * 3. Read LMC(0..3)_DDR_PLL_CTL and wait for the
+			 * result.
+			 */
+
+			lmc_rd(priv, CVMX_LMCX_DDR_PLL_CTL(i));
+
+			/*
+			 * 4. Wait a minimum of 1 us.
+			 */
+
+			udelay(1);	/* Wait 1 us */
+
+			/*
+			 * ###### Steps 5 through 7 deferred until after
+			 * DDR_DIV_RESET=0 #######
+			 */
+
+			/*
+			 * 8. Without changing any other LMC(0..3)_COMP_CTL2
+			 * values, write
+			 * LMC(0..3)_COMP_CTL2[CK_CTL,CONTROL_CTL,CMD_CTL]
+			 * to the desired DDR*_CK_*_P control and command
+			 * signals drive strength.
+			 */
+
+			union cvmx_lmcx_comp_ctl2 comp_ctl2;
+			const struct ddr3_custom_config *custom_lmc_config =
+			    &ddr_conf->custom_lmc_config;
+
+			comp_ctl2.u64 = lmc_rd(priv, CVMX_LMCX_COMP_CTL2(i));
+
+			/* Default 4=34.3 ohm */
+			comp_ctl2.cn78xx.dqx_ctl =
+			    (custom_lmc_config->dqx_ctl ==
+			     0) ? 4 : custom_lmc_config->dqx_ctl;
+			/* Default 4=34.3 ohm */
+			comp_ctl2.cn78xx.ck_ctl =
+			    (custom_lmc_config->ck_ctl ==
+			     0) ? 4 : custom_lmc_config->ck_ctl;
+			/* Default 4=34.3 ohm */
+			comp_ctl2.cn78xx.cmd_ctl =
+			    (custom_lmc_config->cmd_ctl ==
+			     0) ? 4 : custom_lmc_config->cmd_ctl;
+
+			comp_ctl2.cn78xx.rodt_ctl = 0x4;	/* 60 ohm */
+
+			comp_ctl2.cn70xx.ptune_offset =
+			    (abs(custom_lmc_config->ptune_offset) & 0x7)
+			    | (_sign(custom_lmc_config->ptune_offset) << 3);
+			comp_ctl2.cn70xx.ntune_offset =
+			    (abs(custom_lmc_config->ntune_offset) & 0x7)
+			    | (_sign(custom_lmc_config->ntune_offset) << 3);
+
+			s = lookup_env(priv, "ddr_clk_ctl");
+			if (s) {
+				comp_ctl2.cn78xx.ck_ctl =
+				    simple_strtoul(s, NULL, 0);
+			}
+
+			s = lookup_env(priv, "ddr_ck_ctl");
+			if (s) {
+				comp_ctl2.cn78xx.ck_ctl =
+				    simple_strtoul(s, NULL, 0);
+			}
+
+			s = lookup_env(priv, "ddr_cmd_ctl");
+			if (s) {
+				comp_ctl2.cn78xx.cmd_ctl =
+				    simple_strtoul(s, NULL, 0);
+			}
+
+			s = lookup_env(priv, "ddr_dqx_ctl");
+			if (s) {
+				comp_ctl2.cn78xx.dqx_ctl =
+				    simple_strtoul(s, NULL, 0);
+			}
+
+			s = lookup_env(priv, "ddr_ptune_offset");
+			if (s) {
+				comp_ctl2.cn78xx.ptune_offset =
+				    simple_strtoul(s, NULL, 0);
+			}
+
+			s = lookup_env(priv, "ddr_ntune_offset");
+			if (s) {
+				comp_ctl2.cn78xx.ntune_offset =
+				    simple_strtoul(s, NULL, 0);
+			}
+
+			lmc_wr(priv, CVMX_LMCX_COMP_CTL2(i), comp_ctl2.u64);
+
+			/*
+			 * 9. Read LMC(0..3)_DDR_PLL_CTL and wait for the
+			 * result.
+			 */
+
+			lmc_rd(priv, CVMX_LMCX_DDR_PLL_CTL(i));
+
+			/*
+			 * 10. Wait a minimum of 200 ns.
+			 */
+
+			udelay(1);	/* Wait 1 us */
+
+			/*
+			 * 11. Without changing any other
+			 * LMC(0..3)_DDR_PLL_CTL values, write
+			 * LMC(0..3)_DDR_PLL_CTL[DDR_DIV_RESET] = 0.
+			 */
+
+			ddr_pll_ctl.u64 = lmc_rd(priv,
+						 CVMX_LMCX_DDR_PLL_CTL(i));
+			ddr_pll_ctl.cn78xx.ddr_div_reset = 0;
+			lmc_wr(priv, CVMX_LMCX_DDR_PLL_CTL(i), ddr_pll_ctl.u64);
+
+			/*
+			 * 12. Read LMC(0..3)_DDR_PLL_CTL and wait for the
+			 * result.
+			 */
+
+			lmc_rd(priv, CVMX_LMCX_DDR_PLL_CTL(i));
+
+			/*
+			 * 13. Wait a minimum of 200 ns.
+			 */
+
+			udelay(1);	/* Wait 1 us */
+		}
+
+		/*
+		 * Relocated Interface Enable (intf_en) Step
+		 */
+		for (i = (octeon_is_cpuid(OCTEON_CN73XX) ||
+			  octeon_is_cpuid(OCTEON_CNF75XX)) ? 1 : 2;
+		     i < 4; ++i) {
+			/*
+			 * This step is only necessary for LMC 2 and 3 in
+			 * 4-LMC mode. The mask will cause the unpopulated
+			 * interfaces to be skipped.
+			 */
+			if ((if_mask & (1 << i)) == 0)
+				continue;
+
+			dll_ctl2.u64 = lmc_rd(priv, CVMX_LMCX_DLL_CTL2(i));
+			dll_ctl2.cn78xx.intf_en = 1;
+			lmc_wr(priv, CVMX_LMCX_DLL_CTL2(i), dll_ctl2.u64);
+			lmc_rd(priv, CVMX_LMCX_DLL_CTL2(i));
+		}
+
+		/*
+		 * Relocated PHY_DCOK Step
+		 */
+		for (i = 0; i < 4; ++i) {
+			if ((if_mask & (1 << i)) == 0)
+				continue;
+			/*
+			 * 5. Without changing any other fields in
+			 * LMC(0..3)_DDR_PLL_CTL, write
+			 * LMC(0..3)_DDR_PLL_CTL[PHY_DCOK] = 1.
+			 */
+
+			ddr_pll_ctl.u64 = lmc_rd(priv,
+						 CVMX_LMCX_DDR_PLL_CTL(i));
+			ddr_pll_ctl.cn78xx.phy_dcok = 1;
+			lmc_wr(priv, CVMX_LMCX_DDR_PLL_CTL(i), ddr_pll_ctl.u64);
+			/*
+			 * 6. Read LMC(0..3)_DDR_PLL_CTL and wait for
+			 * the result.
+			 */
+
+			lmc_rd(priv, CVMX_LMCX_DDR_PLL_CTL(i));
+
+			/*
+			 * 7. Wait a minimum of 20 us.
+			 */
+
+			udelay(20);	/* Wait 20 us */
+		}
+
+		/*
+		 * 5.9.4 LMC DRESET Initialization
+		 *
+		 * All of the DDR PLL, LMC global CK, and LMC interface
+		 * enable initializations must be completed prior to starting
+		 * this LMC DRESET initialization (Step 4).
+		 *
+		 * This LMC DRESET step is done for all enabled LMCs.
+		 *
+		 * There are special constraints on the ordering of DRESET
+		 * initialization (Steps 4) and CK local initialization
+		 * (Step 5) whenever CK local initialization must be executed.
+		 * CK local initialization must be executed whenever the DDR
+		 * PLL is being brought up (for each chip reset* and whenever
+		 * the DDR clock speed changes).
+		 *
+		 * When Step 5 must be executed in the two-LMC mode case:
+		 * - LMC0 DRESET initialization must occur before Step 5.
+		 * - LMC1 DRESET initialization must occur after Step 5.
+		 *
+		 * When Step 5 must be executed in the four-LMC mode case:
+		 * - LMC2 and LMC3 DRESET initialization must occur before
+		 *   Step 5.
+		 * - LMC0 and LMC1 DRESET initialization must occur after
+		 *   Step 5.
+		 */
+
+		if (octeon_is_cpuid(OCTEON_CN73XX)) {
+			/* ONE-LMC or TWO-LMC MODE BEFORE STEP 5 for cn73xx */
+			cn78xx_lmc_dreset_init(priv, 0);
+		} else if (octeon_is_cpuid(OCTEON_CNF75XX)) {
+			if (if_mask == 0x3) {
+				/*
+				 * 2-LMC Mode: LMC1 DRESET must occur
+				 * before Step 5
+				 */
+				cn78xx_lmc_dreset_init(priv, 1);
+			}
+		} else {
+			/* TWO-LMC MODE DRESET BEFORE STEP 5 */
+			if (if_mask == 0x3)
+				cn78xx_lmc_dreset_init(priv, 0);
+
+			/* FOUR-LMC MODE BEFORE STEP 5 */
+			if (if_mask == 0xf) {
+				cn78xx_lmc_dreset_init(priv, 2);
+				cn78xx_lmc_dreset_init(priv, 3);
+			}
+		}
+
+		/*
+		 * 5.9.5 LMC CK Local Initialization
+		 *
+		 * All of DDR PLL, LMC global CK, and LMC interface-enable
+		 * initializations must be completed prior to starting this
+		 * LMC CK local initialization (Step 5).
+		 *
+		 * LMC CK Local initialization must be performed for each
+		 * chip reset and whenever the DDR clock speed changes. This
+		 * step needs to be performed only once, not once per LMC.
+		 *
+		 * There are special constraints on the ordering of DRESET
+		 * initialization (Steps 4) and CK local initialization
+		 * (Step 5) whenever CK local initialization must be executed.
+		 * CK local initialization must be executed whenever the
+		 * DDR PLL is being brought up (for each chip reset and
+		 * whenever the DDR clock speed changes).
+		 *
+		 * When Step 5 must be executed in the two-LMC mode case:
+		 * - LMC0 DRESET initialization must occur before Step 5.
+		 * - LMC1 DRESET initialization must occur after Step 5.
+		 *
+		 * When Step 5 must be executed in the four-LMC mode case:
+		 * - LMC2 and LMC3 DRESET initialization must occur before
+		 *   Step 5.
+		 * - LMC0 and LMC1 DRESET initialization must occur after
+		 *   Step 5.
+		 *
+		 * LMC CK local initialization is different depending on
+		 * whether two-LMC or four-LMC modes are desired.
+		 */
+
+		if (if_mask == 0x3) {
+			int temp_lmc_if_num = octeon_is_cpuid(OCTEON_CNF75XX) ?
+				1 : 0;
+
+			/*
+			 * 5.9.5.1 LMC CK Local Initialization for Two-LMC
+			 * Mode
+			 *
+			 * 1. Write LMC0_DLL_CTL3 to its reset value. (Note
+			 * that LMC0_DLL_CTL3[DLL_90_BYTE_SEL] = 0x2 .. 0x8
+			 * should also work.)
+			 */
+
+			ddr_dll_ctl3.u64 = 0;
+			ddr_dll_ctl3.cn78xx.dclk90_recal_dis = 1;
+
+			if (octeon_is_cpuid(OCTEON_CNF75XX))
+				ddr_dll_ctl3.cn78xx.dll90_byte_sel = 7;
+			else
+				ddr_dll_ctl3.cn78xx.dll90_byte_sel = 1;
+
+			lmc_wr(priv,
+			       CVMX_LMCX_DLL_CTL3(temp_lmc_if_num),
+			       ddr_dll_ctl3.u64);
+
+			/*
+			 * 2. Read LMC0_DLL_CTL3 and wait for the result.
+			 */
+
+			lmc_rd(priv, CVMX_LMCX_DLL_CTL3(temp_lmc_if_num));
+
+			/*
+			 * 3. Without changing any other fields in
+			 * LMC0_DLL_CTL3, write
+			 * LMC0_DLL_CTL3[DCLK90_FWD] = 1.  Writing
+			 * LMC0_DLL_CTL3[DCLK90_FWD] = 1
+			 * causes clock-delay information to be forwarded
+			 * from LMC0 to LMC1.
+			 */
+
+			ddr_dll_ctl3.cn78xx.dclk90_fwd = 1;
+			lmc_wr(priv,
+			       CVMX_LMCX_DLL_CTL3(temp_lmc_if_num),
+			       ddr_dll_ctl3.u64);
+
+			/*
+			 * 4. Read LMC0_DLL_CTL3 and wait for the result.
+			 */
+
+			lmc_rd(priv, CVMX_LMCX_DLL_CTL3(temp_lmc_if_num));
+		}
+
+		if (if_mask == 0xf) {
+			/*
+			 * 5.9.5.2 LMC CK Local Initialization for Four-LMC
+			 * Mode
+			 *
+			 * 1. Write LMC2_DLL_CTL3 to its reset value except
+			 * LMC2_DLL_CTL3[DLL90_BYTE_SEL] = 0x7.
+			 */
+
+			ddr_dll_ctl3.u64 = 0;
+			ddr_dll_ctl3.cn78xx.dclk90_recal_dis = 1;
+			ddr_dll_ctl3.cn78xx.dll90_byte_sel = 7;
+			lmc_wr(priv, CVMX_LMCX_DLL_CTL3(2), ddr_dll_ctl3.u64);
+
+			/*
+			 * 2. Write LMC3_DLL_CTL3 to its reset value except
+			 * LMC3_DLL_CTL3[DLL90_BYTE_SEL] = 0x2.
+			 */
+
+			ddr_dll_ctl3.u64 = 0;
+			ddr_dll_ctl3.cn78xx.dclk90_recal_dis = 1;
+			ddr_dll_ctl3.cn78xx.dll90_byte_sel = 2;
+			lmc_wr(priv, CVMX_LMCX_DLL_CTL3(3), ddr_dll_ctl3.u64);
+
+			/*
+			 * 3. Read LMC3_DLL_CTL3 and wait for the result.
+			 */
+
+			lmc_rd(priv, CVMX_LMCX_DLL_CTL3(3));
+
+			/*
+			 * 4. Without changing any other fields in
+			 * LMC2_DLL_CTL3, write LMC2_DLL_CTL3[DCLK90_FWD] = 1
+			 * and LMC2_DLL_CTL3[DCLK90_RECAL_ DIS] = 1.
+			 * Writing LMC2_DLL_CTL3[DCLK90_FWD] = 1 causes LMC 2
+			 * to forward clockdelay information to LMC0. Setting
+			 * LMC2_DLL_CTL3[DCLK90_RECAL_DIS] to 1 prevents LMC2
+			 * from periodically recalibrating this delay
+			 * information.
+			 */
+
+			ddr_dll_ctl3.u64 = lmc_rd(priv, CVMX_LMCX_DLL_CTL3(2));
+			ddr_dll_ctl3.cn78xx.dclk90_fwd = 1;
+			ddr_dll_ctl3.cn78xx.dclk90_recal_dis = 1;
+			lmc_wr(priv, CVMX_LMCX_DLL_CTL3(2), ddr_dll_ctl3.u64);
+
+			/*
+			 * 5. Without changing any other fields in
+			 * LMC3_DLL_CTL3, write LMC3_DLL_CTL3[DCLK90_FWD] = 1
+			 * and LMC3_DLL_CTL3[DCLK90_RECAL_ DIS] = 1.
+			 * Writing LMC3_DLL_CTL3[DCLK90_FWD] = 1 causes LMC3
+			 * to forward clockdelay information to LMC1. Setting
+			 * LMC3_DLL_CTL3[DCLK90_RECAL_DIS] to 1 prevents LMC3
+			 * from periodically recalibrating this delay
+			 * information.
+			 */
+
+			ddr_dll_ctl3.u64 = lmc_rd(priv, CVMX_LMCX_DLL_CTL3(3));
+			ddr_dll_ctl3.cn78xx.dclk90_fwd = 1;
+			ddr_dll_ctl3.cn78xx.dclk90_recal_dis = 1;
+			lmc_wr(priv, CVMX_LMCX_DLL_CTL3(3), ddr_dll_ctl3.u64);
+
+			/*
+			 * 6. Read LMC3_DLL_CTL3 and wait for the result.
+			 */
+
+			lmc_rd(priv, CVMX_LMCX_DLL_CTL3(3));
+		}
+
+		if (octeon_is_cpuid(OCTEON_CNF75XX)) {
+			/*
+			 * cnf75xx 2-LMC Mode: LMC0 DRESET must occur after
+			 * Step 5, Do LMC0 for 1-LMC Mode here too
+			 */
+			cn78xx_lmc_dreset_init(priv, 0);
+		}
+
+		/* TWO-LMC MODE AFTER STEP 5 */
+		if (if_mask == 0x3) {
+			if (octeon_is_cpuid(OCTEON_CNF75XX)) {
+				/*
+				 * cnf75xx 2-LMC Mode: LMC0 DRESET must
+				 * occur after Step 5
+				 */
+				cn78xx_lmc_dreset_init(priv, 0);
+			} else {
+				cn78xx_lmc_dreset_init(priv, 1);
+			}
+		}
+
+		/* FOUR-LMC MODE AFTER STEP 5 */
+		if (if_mask == 0xf) {
+			cn78xx_lmc_dreset_init(priv, 0);
+			cn78xx_lmc_dreset_init(priv, 1);
+
+			/*
+			 * Enable periodic recalibration of DDR90 delay
+			 * line in.
+			 */
+			ddr_dll_ctl3.u64 = lmc_rd(priv, CVMX_LMCX_DLL_CTL3(0));
+			ddr_dll_ctl3.cn78xx.dclk90_recal_dis = 0;
+			lmc_wr(priv, CVMX_LMCX_DLL_CTL3(0), ddr_dll_ctl3.u64);
+			ddr_dll_ctl3.u64 = lmc_rd(priv, CVMX_LMCX_DLL_CTL3(1));
+			ddr_dll_ctl3.cn78xx.dclk90_recal_dis = 0;
+			lmc_wr(priv, CVMX_LMCX_DLL_CTL3(1), ddr_dll_ctl3.u64);
+		}
+
+		/* Enable fine tune mode for all LMCs */
+		for (i = 0; i < 4; ++i) {
+			if ((if_mask & (1 << i)) == 0)
+				continue;
+			ddr_dll_ctl3.u64 = lmc_rd(priv, CVMX_LMCX_DLL_CTL3(i));
+			ddr_dll_ctl3.cn78xx.fine_tune_mode = 1;
+			lmc_wr(priv, CVMX_LMCX_DLL_CTL3(i), ddr_dll_ctl3.u64);
+		}
+
+		/*
+		 * Enable the trim circuit on the appropriate channels to
+		 * adjust the DDR clock duty cycle for chips that support
+		 * it
+		 */
+		if (octeon_is_cpuid(OCTEON_CN78XX_PASS2_X) ||
+		    octeon_is_cpuid(OCTEON_CN73XX) ||
+		    octeon_is_cpuid(OCTEON_CNF75XX)) {
+			union cvmx_lmcx_phy_ctl lmc_phy_ctl;
+			int i;
+
+			for (i = 0; i < 4; ++i) {
+				if ((if_mask & (1 << i)) == 0)
+					continue;
+
+				lmc_phy_ctl.u64 =
+				    lmc_rd(priv, CVMX_LMCX_PHY_CTL(i));
+
+				if (octeon_is_cpuid(OCTEON_CNF75XX) ||
+				    octeon_is_cpuid(OCTEON_CN73XX_PASS1_3)) {
+					/* Both LMCs */
+					lmc_phy_ctl.s.lv_mode = 0;
+				} else {
+					/* Odd LMCs = 0, Even LMCs = 1 */
+					lmc_phy_ctl.s.lv_mode = (~i) & 1;
+				}
+
+				debug("LMC%d: PHY_CTL                                 : 0x%016llx\n",
+				      i, lmc_phy_ctl.u64);
+				lmc_wr(priv, CVMX_LMCX_PHY_CTL(i),
+				       lmc_phy_ctl.u64);
+			}
+		}
+	}
+
+	/*
+	 * 5.9.6 LMC RESET Initialization
+	 *
+	 * NOTE: this is now done as the first step in
+	 * init_octeon3_ddr3_interface, rather than the last step in clock
+	 * init. This reorg allows restarting per-LMC initialization should
+	 * problems be encountered, rather than being forced to resort to
+	 * resetting the chip and starting all over.
+	 *
+	 * Look for the code in octeon3_lmc.c: perform_lmc_reset().
+	 */
+
+	/* Fallthrough for all interfaces... */
+not_if0:
+
+	/*
+	 * Start the DDR clock so that its frequency can be measured.
+	 * For some chips we must activate the memory controller with
+	 * init_start to make the DDR clock start to run.
+	 */
+	if ((!octeon_is_cpuid(OCTEON_CN6XXX)) &&
+	    (!octeon_is_cpuid(OCTEON_CNF7XXX)) &&
+	    (!octeon_is_cpuid(OCTEON_CN7XXX))) {
+		union cvmx_lmcx_mem_cfg0 mem_cfg0;
+
+		mem_cfg0.u64 = 0;
+		mem_cfg0.s.init_start = 1;
+		lmc_wr(priv, CVMX_LMCX_MEM_CFG0(if_num), mem_cfg0.u64);
+		lmc_rd(priv, CVMX_LMCX_MEM_CFG0(if_num));
+	}
+
+	set_ddr_clock_initialized(priv, if_num, 1);
+
+	return 0;
+}
+
+static void octeon_ipd_delay_cycles(u64 cycles)
+{
+	u64 start = csr_rd(CVMX_IPD_CLK_COUNT);
+
+	while (start + cycles > csr_rd(CVMX_IPD_CLK_COUNT))
+		;
+}
+
+static void octeon_ipd_delay_cycles_o3(u64 cycles)
+{
+	u64 start = csr_rd(CVMX_FPA_CLK_COUNT);
+
+	while (start + cycles > csr_rd(CVMX_FPA_CLK_COUNT))
+		;
+}
+
+static u32 measure_octeon_ddr_clock(struct ddr_priv *priv,
+				    struct ddr_conf *ddr_conf, u32 cpu_hertz,
+				    u32 ddr_hertz, u32 ddr_ref_hertz,
+				    int if_num, u32 if_mask)
+{
+	u64 core_clocks;
+	u64 ddr_clocks;
+	u64 calc_ddr_hertz;
+
+	if (ddr_conf) {
+		if (initialize_ddr_clock(priv, ddr_conf, cpu_hertz,
+					 ddr_hertz, ddr_ref_hertz, if_num,
+					 if_mask) != 0)
+			return 0;
+	}
+
+	/* Dynamically determine the DDR clock speed */
+	if (OCTEON_IS_OCTEON2() || octeon_is_cpuid(OCTEON_CN70XX)) {
+		core_clocks = csr_rd(CVMX_IPD_CLK_COUNT);
+		ddr_clocks = lmc_rd(priv, CVMX_LMCX_DCLK_CNT(if_num));
+		/* How many cpu cycles to measure over */
+		octeon_ipd_delay_cycles(100000000);
+		core_clocks = csr_rd(CVMX_IPD_CLK_COUNT) - core_clocks;
+		ddr_clocks =
+		    lmc_rd(priv, CVMX_LMCX_DCLK_CNT(if_num)) - ddr_clocks;
+		calc_ddr_hertz = ddr_clocks * gd->bus_clk / core_clocks;
+	} else if (octeon_is_cpuid(OCTEON_CN7XXX)) {
+		core_clocks = csr_rd(CVMX_FPA_CLK_COUNT);
+		ddr_clocks = lmc_rd(priv, CVMX_LMCX_DCLK_CNT(if_num));
+		/* How many cpu cycles to measure over */
+		octeon_ipd_delay_cycles_o3(100000000);
+		core_clocks = csr_rd(CVMX_FPA_CLK_COUNT) - core_clocks;
+		ddr_clocks =
+		    lmc_rd(priv, CVMX_LMCX_DCLK_CNT(if_num)) - ddr_clocks;
+		calc_ddr_hertz = ddr_clocks * gd->bus_clk / core_clocks;
+	} else {
+		core_clocks = csr_rd(CVMX_IPD_CLK_COUNT);
+		/*
+		 * ignore overflow, starts counting when we enable the
+		 * controller
+		 */
+		ddr_clocks = lmc_rd(priv, CVMX_LMCX_DCLK_CNT_LO(if_num));
+		/* How many cpu cycles to measure over */
+		octeon_ipd_delay_cycles(100000000);
+		core_clocks = csr_rd(CVMX_IPD_CLK_COUNT) - core_clocks;
+		ddr_clocks =
+		    lmc_rd(priv, CVMX_LMCX_DCLK_CNT_LO(if_num)) - ddr_clocks;
+		calc_ddr_hertz = ddr_clocks * cpu_hertz / core_clocks;
+	}
+
+	debug("core clocks: %llu, ddr clocks: %llu, calc rate: %llu\n",
+	      core_clocks, ddr_clocks, calc_ddr_hertz);
+	debug("LMC%d: Measured DDR clock: %lld, cpu clock: %u, ddr clocks: %llu\n",
+	      if_num, calc_ddr_hertz, cpu_hertz, ddr_clocks);
+
+	/* Check for unreasonable settings. */
+	if (calc_ddr_hertz < 10000) {
+		udelay(8000000 * 100);
+		printf("DDR clock misconfigured on interface %d. Resetting...\n",
+		       if_num);
+		do_reset(NULL, 0, 0, NULL);
+	}
+
+	return calc_ddr_hertz;
+}
+
+u64 lmc_ddr3_rl_dbg_read(struct ddr_priv *priv, int if_num, int idx)
+{
+	union cvmx_lmcx_rlevel_dbg rlevel_dbg;
+	union cvmx_lmcx_rlevel_ctl rlevel_ctl;
+
+	rlevel_ctl.u64 = lmc_rd(priv, CVMX_LMCX_RLEVEL_CTL(if_num));
+	rlevel_ctl.s.byte = idx;
+
+	lmc_wr(priv, CVMX_LMCX_RLEVEL_CTL(if_num), rlevel_ctl.u64);
+	lmc_rd(priv, CVMX_LMCX_RLEVEL_CTL(if_num));
+
+	rlevel_dbg.u64 = lmc_rd(priv, CVMX_LMCX_RLEVEL_DBG(if_num));
+	return rlevel_dbg.s.bitmask;
+}
+
+u64 lmc_ddr3_wl_dbg_read(struct ddr_priv *priv, int if_num, int idx)
+{
+	union cvmx_lmcx_wlevel_dbg wlevel_dbg;
+
+	wlevel_dbg.u64 = 0;
+	wlevel_dbg.s.byte = idx;
+
+	lmc_wr(priv, CVMX_LMCX_WLEVEL_DBG(if_num), wlevel_dbg.u64);
+	lmc_rd(priv, CVMX_LMCX_WLEVEL_DBG(if_num));
+
+	wlevel_dbg.u64 = lmc_rd(priv, CVMX_LMCX_WLEVEL_DBG(if_num));
+	return wlevel_dbg.s.bitmask;
+}
+
+int validate_ddr3_rlevel_bitmask(struct rlevel_bitmask *rlevel_bitmask_p,
+				 int ddr_type)
+{
+	int i;
+	int errors = 0;
+	u64 mask = 0;		/* Used in 64-bit comparisons */
+	u8 mstart = 0;
+	u8 width = 0;
+	u8 firstbit = 0;
+	u8 lastbit = 0;
+	u8 bubble = 0;
+	u8 tbubble = 0;
+	u8 blank = 0;
+	u8 narrow = 0;
+	u8 trailing = 0;
+	u64 bitmask = rlevel_bitmask_p->bm;
+	u8 extras = 0;
+	u8 toolong = 0;
+	u64 temp;
+
+	if (bitmask == 0) {
+		blank += RLEVEL_BITMASK_BLANK_ERROR;
+	} else {
+		/* Look for fb, the first bit */
+		temp = bitmask;
+		while (!(temp & 1)) {
+			firstbit++;
+			temp >>= 1;
+		}
+
+		/* Look for lb, the last bit */
+		lastbit = firstbit;
+		while ((temp >>= 1))
+			lastbit++;
+
+		/*
+		 * Start with the max range to try to find the largest mask
+		 * within the bitmask data
+		 */
+		width = MASKRANGE_BITS;
+		for (mask = MASKRANGE; mask > 0; mask >>= 1, --width) {
+			for (mstart = lastbit - width + 1; mstart >= firstbit;
+			     --mstart) {
+				temp = mask << mstart;
+				if ((bitmask & temp) == temp)
+					goto done_now;
+			}
+		}
+done_now:
+		/* look for any more contiguous 1's to the right of mstart */
+		if (width == MASKRANGE_BITS) {	// only when maximum mask
+			while ((bitmask >> (mstart - 1)) & 1) {
+				// slide right over more 1's
+				--mstart;
+				// count the number of extra bits only for DDR4
+				if (ddr_type == DDR4_DRAM)
+					extras++;
+			}
+		}
+
+		/* Penalize any extra 1's beyond the maximum desired mask */
+		if (extras > 0)
+			toolong =
+			    RLEVEL_BITMASK_TOOLONG_ERROR * ((1 << extras) - 1);
+
+		/* Detect if bitmask is too narrow. */
+		if (width < 4)
+			narrow = (4 - width) * RLEVEL_BITMASK_NARROW_ERROR;
+
+		/*
+		 * detect leading bubble bits, that is, any 0's between first
+		 * and mstart
+		 */
+		temp = bitmask >> (firstbit + 1);
+		i = mstart - firstbit - 1;
+		while (--i >= 0) {
+			if ((temp & 1) == 0)
+				bubble += RLEVEL_BITMASK_BUBBLE_BITS_ERROR;
+			temp >>= 1;
+		}
+
+		temp = bitmask >> (mstart + width + extras);
+		i = lastbit - (mstart + width + extras - 1);
+		while (--i >= 0) {
+			if (temp & 1) {
+				/*
+				 * Detect 1 bits after the trailing end of
+				 * the mask, including last.
+				 */
+				trailing += RLEVEL_BITMASK_TRAILING_BITS_ERROR;
+			} else {
+				/*
+				 * Detect trailing bubble bits, that is,
+				 * any 0's between end-of-mask and last
+				 */
+				tbubble += RLEVEL_BITMASK_BUBBLE_BITS_ERROR;
+			}
+			temp >>= 1;
+		}
+	}
+
+	errors = bubble + tbubble + blank + narrow + trailing + toolong;
+
+	/* Pass out useful statistics */
+	rlevel_bitmask_p->mstart = mstart;
+	rlevel_bitmask_p->width = width;
+
+	debug_bitmask_print("bm:%08lx mask:%02lx, width:%2u, mstart:%2d, fb:%2u, lb:%2u (bu:%2d, tb:%2d, bl:%2d, n:%2d, t:%2d, x:%2d) errors:%3d %s\n",
+			    (unsigned long)bitmask, mask, width, mstart,
+			    firstbit, lastbit, bubble, tbubble, blank,
+			    narrow, trailing, toolong, errors,
+			    (errors) ? "=> invalid" : "");
+
+	return errors;
+}
+
+int compute_ddr3_rlevel_delay(u8 mstart, u8 width,
+			      union cvmx_lmcx_rlevel_ctl rlevel_ctl)
+{
+	int delay;
+
+	debug_bitmask_print("  offset_en:%d", rlevel_ctl.s.offset_en);
+
+	if (rlevel_ctl.s.offset_en) {
+		delay = max((int)mstart,
+			    (int)(mstart + width - 1 - rlevel_ctl.s.offset));
+	} else {
+		/* if (rlevel_ctl.s.offset) { *//* Experimental */
+		if (0) {
+			delay = max(mstart + rlevel_ctl.s.offset, mstart + 1);
+			/*
+			 * Insure that the offset delay falls within the
+			 * bitmask
+			 */
+			delay = min(delay, mstart + width - 1);
+		} else {
+			/* Round down */
+			delay = (width - 1) / 2 + mstart;
+		}
+	}
+
+	return delay;
+}
+
+/* Default ODT config must disable ODT */
+/* Must be const (read only) so that the structure is in flash */
+const struct dimm_odt_config disable_odt_config[] = {
+	/*   1 */ { 0, 0x0000, {.u64 = 0x0000}, {.u64 = 0x0000}, 0, 0x0000, 0 },
+	/*   2 */ { 0, 0x0000, {.u64 = 0x0000}, {.u64 = 0x0000}, 0, 0x0000, 0 },
+	/*   3 */ { 0, 0x0000, {.u64 = 0x0000}, {.u64 = 0x0000}, 0, 0x0000, 0 },
+	/*   4 */ { 0, 0x0000, {.u64 = 0x0000}, {.u64 = 0x0000}, 0, 0x0000, 0 },
+};
+
+/* Memory controller setup function */
+static int init_octeon_dram_interface(struct ddr_priv *priv,
+				      struct ddr_conf *ddr_conf,
+				      u32 ddr_hertz, u32 cpu_hertz,
+				      u32 ddr_ref_hertz, int if_num,
+				      u32 if_mask)
+{
+	u32 mem_size_mbytes = 0;
+	char *s;
+
+	s = lookup_env(priv, "ddr_timing_hertz");
+	if (s)
+		ddr_hertz = simple_strtoul(s, NULL, 0);
+
+	if (OCTEON_IS_OCTEON3()) {
+		int lmc_restart_retries = 0;
+#define DEFAULT_RESTART_RETRIES 3
+		int lmc_restart_retries_limit = DEFAULT_RESTART_RETRIES;
+
+		s = lookup_env(priv, "ddr_restart_retries_limit");
+		if (s)
+			lmc_restart_retries_limit = simple_strtoul(s, NULL, 0);
+
+restart_lmc_init:
+		mem_size_mbytes = init_octeon3_ddr3_interface(priv, ddr_conf,
+							      ddr_hertz,
+							      cpu_hertz,
+							      ddr_ref_hertz,
+							      if_num, if_mask);
+		if (mem_size_mbytes == 0) {	// 0 means restart is possible
+			if (lmc_restart_retries < lmc_restart_retries_limit) {
+				lmc_restart_retries++;
+				printf("N0.LMC%d Configuration problem: attempting LMC reset and init restart %d\n",
+				       if_num, lmc_restart_retries);
+				goto restart_lmc_init;
+			} else {
+				if (lmc_restart_retries_limit > 0) {
+					printf("INFO: N0.LMC%d Configuration: fatal problem remains after %d LMC init retries - Resetting node...\n",
+					       if_num, lmc_restart_retries);
+					mdelay(500);
+					do_reset(NULL, 0, 0, NULL);
+				} else {
+					// return an error, no restart
+					mem_size_mbytes = -1;
+				}
+			}
+		}
+	}
+
+	debug("N0.LMC%d Configuration Completed: %d MB\n",
+	      if_num, mem_size_mbytes);
+
+	return mem_size_mbytes;
+}
+
+#define WLEVEL_BYTE_BITS	5
+#define WLEVEL_BYTE_MSK		((1ULL << 5) - 1)
+
+void upd_wl_rank(union cvmx_lmcx_wlevel_rankx *lmc_wlevel_rank,
+		 int byte, int delay)
+{
+	union cvmx_lmcx_wlevel_rankx temp_wlevel_rank;
+
+	if (byte >= 0 && byte <= 8) {
+		temp_wlevel_rank.u64 = lmc_wlevel_rank->u64;
+		temp_wlevel_rank.u64 &=
+		    ~(WLEVEL_BYTE_MSK << (WLEVEL_BYTE_BITS * byte));
+		temp_wlevel_rank.u64 |=
+		    ((delay & WLEVEL_BYTE_MSK) << (WLEVEL_BYTE_BITS * byte));
+		lmc_wlevel_rank->u64 = temp_wlevel_rank.u64;
+	}
+}
+
+int get_wl_rank(union cvmx_lmcx_wlevel_rankx *lmc_wlevel_rank, int byte)
+{
+	int delay = 0;
+
+	if (byte >= 0 && byte <= 8)
+		delay =
+		    ((lmc_wlevel_rank->u64) >> (WLEVEL_BYTE_BITS *
+						byte)) & WLEVEL_BYTE_MSK;
+
+	return delay;
+}
+
+void upd_rl_rank(union cvmx_lmcx_rlevel_rankx *lmc_rlevel_rank,
+		 int byte, int delay)
+{
+	union cvmx_lmcx_rlevel_rankx temp_rlevel_rank;
+
+	if (byte >= 0 && byte <= 8) {
+		temp_rlevel_rank.u64 =
+		    lmc_rlevel_rank->u64 & ~(RLEVEL_BYTE_MSK <<
+					     (RLEVEL_BYTE_BITS * byte));
+		temp_rlevel_rank.u64 |=
+		    ((delay & RLEVEL_BYTE_MSK) << (RLEVEL_BYTE_BITS * byte));
+		lmc_rlevel_rank->u64 = temp_rlevel_rank.u64;
+	}
+}
+
+int get_rl_rank(union cvmx_lmcx_rlevel_rankx *lmc_rlevel_rank, int byte)
+{
+	int delay = 0;
+
+	if (byte >= 0 && byte <= 8)
+		delay =
+		    ((lmc_rlevel_rank->u64) >> (RLEVEL_BYTE_BITS *
+						byte)) & RLEVEL_BYTE_MSK;
+
+	return delay;
+}
+
+void rlevel_to_wlevel(union cvmx_lmcx_rlevel_rankx *lmc_rlevel_rank,
+		      union cvmx_lmcx_wlevel_rankx *lmc_wlevel_rank, int byte)
+{
+	int byte_delay = get_rl_rank(lmc_rlevel_rank, byte);
+
+	debug("Estimating Wlevel delay byte %d: ", byte);
+	debug("Rlevel=%d => ", byte_delay);
+	byte_delay = divide_roundup(byte_delay, 2) & 0x1e;
+	debug("Wlevel=%d\n", byte_delay);
+	upd_wl_rank(lmc_wlevel_rank, byte, byte_delay);
+}
+
+/* Delay trend: constant=0, decreasing=-1, increasing=1 */
+static s64 calc_delay_trend(s64 v)
+{
+	if (v == 0)
+		return 0;
+	if (v < 0)
+		return -1;
+
+	return 1;
+}
+
+/*
+ * Evaluate delay sequence across the whole range of byte delays while
+ * keeping track of the overall delay trend, increasing or decreasing.
+ * If the trend changes charge an error amount to the score.
+ */
+
+// NOTE: "max_adj_delay_inc" argument is, by default, 1 for DDR3 and 2 for DDR4
+
+int nonseq_del(struct rlevel_byte_data *rlevel_byte, int start, int end,
+	       int max_adj_delay_inc)
+{
+	s64 error = 0;
+	s64 delay_trend, prev_trend = 0;
+	int byte_idx;
+	s64 seq_err;
+	s64 adj_err;
+	s64 delay_inc;
+	s64 delay_diff;
+
+	for (byte_idx = start; byte_idx < end; ++byte_idx) {
+		delay_diff = rlevel_byte[byte_idx + 1].delay -
+			rlevel_byte[byte_idx].delay;
+		delay_trend = calc_delay_trend(delay_diff);
+
+		/*
+		 * Increment error each time the trend changes to the
+		 * opposite direction.
+		 */
+		if (prev_trend != 0 && delay_trend != 0 &&
+		    prev_trend != delay_trend) {
+			seq_err = RLEVEL_NONSEQUENTIAL_DELAY_ERROR;
+		} else {
+			seq_err = 0;
+		}
+
+		// how big was the delay change, if any
+		delay_inc = abs(delay_diff);
+
+		/*
+		 * Even if the trend did not change to the opposite direction,
+		 * check for the magnitude of the change, and scale the
+		 * penalty by the amount that the size is larger than the
+		 * provided limit.
+		 */
+		if (max_adj_delay_inc != 0 && delay_inc > max_adj_delay_inc) {
+			adj_err = (delay_inc - max_adj_delay_inc) *
+				RLEVEL_ADJACENT_DELAY_ERROR;
+		} else {
+			adj_err = 0;
+		}
+
+		rlevel_byte[byte_idx + 1].sqerrs = seq_err + adj_err;
+		error += seq_err + adj_err;
+
+		debug_bitmask_print("Byte %d: %d, Byte %d: %d, delay_trend: %ld, prev_trend: %ld, [%ld/%ld]%s%s\n",
+				    byte_idx + 0,
+				    rlevel_byte[byte_idx + 0].delay,
+				    byte_idx + 1,
+				    rlevel_byte[byte_idx + 1].delay,
+				    delay_trend,
+				    prev_trend, seq_err, adj_err,
+				    (seq_err) ?
+				    " => Nonsequential byte delay" : "",
+				    (adj_err) ?
+				    " => Adjacent delay error" : "");
+
+		if (delay_trend != 0)
+			prev_trend = delay_trend;
+	}
+
+	return (int)error;
+}
+
+int roundup_ddr3_wlevel_bitmask(int bitmask)
+{
+	int shifted_bitmask;
+	int leader;
+	int delay;
+
+	for (leader = 0; leader < 8; ++leader) {
+		shifted_bitmask = (bitmask >> leader);
+		if ((shifted_bitmask & 1) == 0)
+			break;
+	}
+
+	for (leader = leader; leader < 16; ++leader) {
+		shifted_bitmask = (bitmask >> (leader % 8));
+		if (shifted_bitmask & 1)
+			break;
+	}
+
+	delay = (leader & 1) ? leader + 1 : leader;
+	delay = delay % 8;
+
+	return delay;
+}
+
+/* Octeon 2 */
+static void oct2_ddr3_seq(struct ddr_priv *priv, int rank_mask, int if_num,
+			  int sequence)
+{
+	char *s;
+
+#ifdef DEBUG_PERFORM_DDR3_SEQUENCE
+	static const char * const sequence_str[] = {
+		"power-up/init",
+		"read-leveling",
+		"self-refresh entry",
+		"self-refresh exit",
+		"precharge power-down entry",
+		"precharge power-down exit",
+		"write-leveling",
+		"illegal"
+	};
+#endif
+
+	union cvmx_lmcx_control lmc_control;
+	union cvmx_lmcx_config lmc_config;
+	int save_ddr2t;
+
+	lmc_control.u64 = lmc_rd(priv, CVMX_LMCX_CONTROL(if_num));
+	save_ddr2t = lmc_control.s.ddr2t;
+
+	if (save_ddr2t == 0 && octeon_is_cpuid(OCTEON_CN63XX_PASS1_X)) {
+		/* Some register parts (IDT and TI included) do not like
+		 * the sequence that LMC generates for an MRS register
+		 * write in 1T mode. In this case, the register part does
+		 * not properly forward the MRS register write to the DRAM
+		 * parts.  See errata (LMC-14548) Issues with registered
+		 * DIMMs.
+		 */
+		debug("Forcing DDR 2T during init seq. Re: Pass 1 LMC-14548\n");
+		lmc_control.s.ddr2t = 1;
+	}
+
+	s = lookup_env(priv, "ddr_init_2t");
+	if (s)
+		lmc_control.s.ddr2t = simple_strtoul(s, NULL, 0);
+
+	lmc_wr(priv, CVMX_LMCX_CONTROL(if_num), lmc_control.u64);
+
+	lmc_config.u64 = lmc_rd(priv, CVMX_LMCX_CONFIG(if_num));
+
+	lmc_config.s.init_start = 1;
+	if (OCTEON_IS_OCTEON2())
+		lmc_config.cn63xx.sequence = sequence;
+	lmc_config.s.rankmask = rank_mask;
+
+#ifdef DEBUG_PERFORM_DDR3_SEQUENCE
+	debug("Performing LMC sequence: rank_mask=0x%02x, sequence=%d, %s\n",
+	      rank_mask, sequence, sequence_str[sequence]);
+#endif
+
+	lmc_wr(priv, CVMX_LMCX_CONFIG(if_num), lmc_config.u64);
+	lmc_rd(priv, CVMX_LMCX_CONFIG(if_num));
+	udelay(600);		/* Wait a while */
+
+	lmc_control.s.ddr2t = save_ddr2t;
+	lmc_wr(priv, CVMX_LMCX_CONTROL(if_num), lmc_control.u64);
+	lmc_rd(priv, CVMX_LMCX_CONTROL(if_num));
+}
+
+/* Check to see if any custom offset values are used */
+static int is_dll_offset_provided(const int8_t *dll_offset_table)
+{
+	int i;
+
+	if (!dll_offset_table)	/* Check for pointer to table. */
+		return 0;
+
+	for (i = 0; i < 9; ++i) {
+		if (dll_offset_table[i] != 0)
+			return 1;
+	}
+
+	return 0;
+}
+
+void change_dll_offset_enable(struct ddr_priv *priv, int if_num, int change)
+{
+	union cvmx_lmcx_dll_ctl3 ddr_dll_ctl3;
+
+	ddr_dll_ctl3.u64 = lmc_rd(priv, CVMX_LMCX_DLL_CTL3(if_num));
+	SET_DDR_DLL_CTL3(offset_ena, !!change);
+	lmc_wr(priv, CVMX_LMCX_DLL_CTL3(if_num), ddr_dll_ctl3.u64);
+	ddr_dll_ctl3.u64 = lmc_rd(priv, CVMX_LMCX_DLL_CTL3(if_num));
+}
+
+unsigned short load_dll_offset(struct ddr_priv *priv, int if_num,
+			       int dll_offset_mode, int byte_offset, int byte)
+{
+	union cvmx_lmcx_dll_ctl3 ddr_dll_ctl3;
+	int field_width = 6;
+	/*
+	 * byte_sel:
+	 * 0x1 = byte 0, ..., 0x9 = byte 8
+	 * 0xA = all bytes
+	 */
+	int byte_sel = (byte == 10) ? byte : byte + 1;
+
+	if (octeon_is_cpuid(OCTEON_CN6XXX))
+		field_width = 5;
+
+	ddr_dll_ctl3.u64 = lmc_rd(priv, CVMX_LMCX_DLL_CTL3(if_num));
+	SET_DDR_DLL_CTL3(load_offset, 0);
+	lmc_wr(priv, CVMX_LMCX_DLL_CTL3(if_num), ddr_dll_ctl3.u64);
+	ddr_dll_ctl3.u64 = lmc_rd(priv, CVMX_LMCX_DLL_CTL3(if_num));
+
+	SET_DDR_DLL_CTL3(mode_sel, dll_offset_mode);
+	SET_DDR_DLL_CTL3(offset,
+			 (abs(byte_offset) & (~(-1 << field_width))) |
+			 (_sign(byte_offset) << field_width));
+	SET_DDR_DLL_CTL3(byte_sel, byte_sel);
+	lmc_wr(priv, CVMX_LMCX_DLL_CTL3(if_num), ddr_dll_ctl3.u64);
+	ddr_dll_ctl3.u64 = lmc_rd(priv, CVMX_LMCX_DLL_CTL3(if_num));
+
+	SET_DDR_DLL_CTL3(load_offset, 1);
+	lmc_wr(priv, CVMX_LMCX_DLL_CTL3(if_num), ddr_dll_ctl3.u64);
+	ddr_dll_ctl3.u64 = lmc_rd(priv, CVMX_LMCX_DLL_CTL3(if_num));
+
+	return (unsigned short)GET_DDR_DLL_CTL3(offset);
+}
+
+void process_custom_dll_offsets(struct ddr_priv *priv, int if_num,
+				const char *enable_str,
+				const int8_t *offsets, const char *byte_str,
+				int mode)
+{
+	const char *s;
+	int enabled;
+	int provided;
+	int byte_offset;
+	unsigned short offset[9] = { 0 };
+	int byte;
+
+	s = lookup_env(priv, enable_str);
+	if (s)
+		enabled = !!simple_strtol(s, NULL, 0);
+	else
+		enabled = -1;
+
+	/*
+	 * enabled == -1: no override, do only configured offsets if provided
+	 * enabled ==  0: override OFF, do NOT do it even if configured
+	 *                offsets provided
+	 * enabled ==  1: override ON, do it for overrides plus configured
+	 *                offsets
+	 */
+
+	if (enabled == 0)
+		return;
+
+	provided = is_dll_offset_provided(offsets);
+
+	if (enabled < 0 && !provided)
+		return;
+
+	change_dll_offset_enable(priv, if_num, 0);
+
+	for (byte = 0; byte < 9; ++byte) {
+		// always take the provided, if available
+		byte_offset = (provided) ? offsets[byte] : 0;
+
+		// then, if enabled, use any overrides present
+		if (enabled > 0) {
+			s = lookup_env(priv, byte_str, if_num, byte);
+			if (s)
+				byte_offset = simple_strtol(s, NULL, 0);
+		}
+
+		offset[byte] =
+		    load_dll_offset(priv, if_num, mode, byte_offset, byte);
+	}
+
+	change_dll_offset_enable(priv, if_num, 1);
+
+	debug("N0.LMC%d: DLL %s Offset 8:0       :  0x%02x  0x%02x  0x%02x  0x%02x  0x%02x  0x%02x  0x%02x  0x%02x  0x%02x\n",
+	      if_num, (mode == 2) ? "Read " : "Write",
+	      offset[8], offset[7], offset[6], offset[5], offset[4],
+	      offset[3], offset[2], offset[1], offset[0]);
+}
+
+void ddr_init_seq(struct ddr_priv *priv, int rank_mask, int if_num)
+{
+	char *s;
+	int ddr_init_loops = 1;
+	int rankx;
+
+	s = lookup_env(priv, "ddr%d_init_loops", if_num);
+	if (s)
+		ddr_init_loops = simple_strtoul(s, NULL, 0);
+
+	while (ddr_init_loops--) {
+		for (rankx = 0; rankx < 8; rankx++) {
+			if (!(rank_mask & (1 << rankx)))
+				continue;
+
+			if (OCTEON_IS_OCTEON3()) {
+				/* power-up/init */
+				oct3_ddr3_seq(priv, 1 << rankx, if_num, 0);
+			} else {
+				/* power-up/init */
+				oct2_ddr3_seq(priv, 1 << rankx, if_num, 0);
+			}
+
+			udelay(1000);	/* Wait a while. */
+
+			s = lookup_env(priv, "ddr_sequence1");
+			if (s) {
+				int sequence1;
+
+				sequence1 = simple_strtoul(s, NULL, 0);
+
+				if (OCTEON_IS_OCTEON3()) {
+					oct3_ddr3_seq(priv, 1 << rankx,
+						      if_num, sequence1);
+				} else {
+					oct2_ddr3_seq(priv, 1 << rankx,
+						      if_num, sequence1);
+				}
+			}
+
+			s = lookup_env(priv, "ddr_sequence2");
+			if (s) {
+				int sequence2;
+
+				sequence2 = simple_strtoul(s, NULL, 0);
+
+				if (OCTEON_IS_OCTEON3())
+					oct3_ddr3_seq(priv, 1 << rankx,
+						      if_num, sequence2);
+				else
+					oct2_ddr3_seq(priv, 1 << rankx,
+						      if_num, sequence2);
+			}
+		}
+	}
+}
+
+static int octeon_ddr_initialize(struct ddr_priv *priv, u32 cpu_hertz,
+				 u32 ddr_hertz, u32 ddr_ref_hertz,
+				 u32 if_mask,
+				 struct ddr_conf *ddr_conf,
+				 u32 *measured_ddr_hertz)
+{
+	u32 ddr_conf_valid_mask = 0;
+	int memsize_mbytes = 0;
+	char *eptr;
+	int if_idx;
+	u32 ddr_max_speed = 667000000;
+	u32 calc_ddr_hertz = -1;
+	int val;
+	int ret;
+
+	if (env_get("ddr_verbose") || env_get("ddr_prompt"))
+		priv->flags |= FLAG_DDR_VERBOSE;
+
+#ifdef DDR_VERBOSE
+	priv->flags |= FLAG_DDR_VERBOSE;
+#endif
+
+	if (env_get("ddr_trace_init")) {
+		printf("Parameter ddr_trace_init found in environment.\n");
+		priv->flags |= FLAG_DDR_TRACE_INIT;
+		priv->flags |= FLAG_DDR_VERBOSE;
+	}
+
+	priv->flags |= FLAG_DDR_DEBUG;
+
+	val = env_get_ulong("ddr_debug", 10, (u32)-1);
+	switch (val) {
+	case 0:
+		priv->flags &= ~FLAG_DDR_DEBUG;
+		printf("Parameter ddr_debug clear in environment\n");
+		break;
+	case (u32)-1:
+		break;
+	default:
+		printf("Parameter ddr_debug set in environment\n");
+		priv->flags |= FLAG_DDR_DEBUG;
+		priv->flags |= FLAG_DDR_VERBOSE;
+		break;
+	}
+	if (env_get("ddr_prompt"))
+		priv->flags |= FLAG_DDR_PROMPT;
+
+	/* Force ddr_verbose for failsafe debugger */
+	if (priv->flags & FLAG_FAILSAFE_MODE)
+		priv->flags |= FLAG_DDR_VERBOSE;
+
+#ifdef DDR_DEBUG
+	priv->flags |= FLAG_DDR_DEBUG;
+	/* Keep verbose on while we are still debugging. */
+	priv->flags |= FLAG_DDR_VERBOSE;
+#endif
+
+	if ((octeon_is_cpuid(OCTEON_CN61XX) ||
+	     octeon_is_cpuid(OCTEON_CNF71XX)) && ddr_max_speed > 533333333) {
+		ddr_max_speed = 533333333;
+	} else if (octeon_is_cpuid(OCTEON_CN7XXX)) {
+		/* Override speed restrictions to support internal testing. */
+		ddr_max_speed = 1210000000;
+	}
+
+	if (ddr_hertz > ddr_max_speed) {
+		printf("DDR clock speed %u exceeds maximum supported DDR speed, reducing to %uHz\n",
+		       ddr_hertz, ddr_max_speed);
+		ddr_hertz = ddr_max_speed;
+	}
+
+	if (OCTEON_IS_OCTEON3()) {	// restrict check
+		if (ddr_hertz > cpu_hertz) {
+			printf("\nFATAL ERROR: DDR speed %u exceeds CPU speed %u, exiting...\n\n",
+			       ddr_hertz, cpu_hertz);
+			return -1;
+		}
+	}
+
+	/* Enable L2 ECC */
+	eptr = env_get("disable_l2_ecc");
+	if (eptr) {
+		printf("Disabling L2 ECC based on disable_l2_ecc environment variable\n");
+		union cvmx_l2c_ctl l2c_val;
+
+		l2c_val.u64 = l2c_rd(priv, CVMX_L2C_CTL);
+		l2c_val.s.disecc = 1;
+		l2c_wr(priv, CVMX_L2C_CTL, l2c_val.u64);
+	} else {
+		union cvmx_l2c_ctl l2c_val;
+
+		l2c_val.u64 = l2c_rd(priv, CVMX_L2C_CTL);
+		l2c_val.s.disecc = 0;
+		l2c_wr(priv, CVMX_L2C_CTL, l2c_val.u64);
+	}
+
+	/*
+	 * Init the L2C, must be done before DRAM access so that we
+	 * know L2 is empty
+	 */
+	eptr = env_get("disable_l2_index_aliasing");
+	if (eptr) {
+		union cvmx_l2c_ctl l2c_val;
+
+		puts("L2 index aliasing disabled.\n");
+
+		l2c_val.u64 = l2c_rd(priv, CVMX_L2C_CTL);
+		l2c_val.s.disidxalias = 1;
+		l2c_wr(priv, CVMX_L2C_CTL, l2c_val.u64);
+	} else {
+		union cvmx_l2c_ctl l2c_val;
+
+		/* Enable L2C index aliasing */
+
+		l2c_val.u64 = l2c_rd(priv, CVMX_L2C_CTL);
+		l2c_val.s.disidxalias = 0;
+		l2c_wr(priv, CVMX_L2C_CTL, l2c_val.u64);
+	}
+
+	if (OCTEON_IS_OCTEON3()) {
+		/*
+		 * rdf_cnt: Defines the sample point of the LMC response data in
+		 * the DDR-clock/core-clock crossing.  For optimal
+		 * performance set to 10 * (DDR-clock period/core-clock
+		 * period) - 1.  To disable set to 0. All other values
+		 * are reserved.
+		 */
+
+		union cvmx_l2c_ctl l2c_ctl;
+		u64 rdf_cnt;
+		char *s;
+
+		l2c_ctl.u64 = l2c_rd(priv, CVMX_L2C_CTL);
+
+		/*
+		 * It is more convenient to compute the ratio using clock
+		 * frequencies rather than clock periods.
+		 */
+		rdf_cnt = (((u64)10 * cpu_hertz) / ddr_hertz) - 1;
+		rdf_cnt = rdf_cnt < 256 ? rdf_cnt : 255;
+		l2c_ctl.cn78xx.rdf_cnt = rdf_cnt;
+
+		s = lookup_env(priv, "early_fill_count");
+		if (s)
+			l2c_ctl.cn78xx.rdf_cnt = simple_strtoul(s, NULL, 0);
+
+		debug("%-45s : %d, cpu_hertz:%d, ddr_hertz:%d\n",
+		      "EARLY FILL COUNT  ", l2c_ctl.cn78xx.rdf_cnt, cpu_hertz,
+		      ddr_hertz);
+		l2c_wr(priv, CVMX_L2C_CTL, l2c_ctl.u64);
+	}
+
+	/* Check for lower DIMM socket populated */
+	for (if_idx = 0; if_idx < 4; ++if_idx) {
+		if ((if_mask & (1 << if_idx)) &&
+		    validate_dimm(priv,
+				  &ddr_conf[(int)if_idx].dimm_config_table[0],
+				  0))
+			ddr_conf_valid_mask |= (1 << if_idx);
+	}
+
+	if (octeon_is_cpuid(OCTEON_CN68XX) || octeon_is_cpuid(OCTEON_CN78XX)) {
+		int four_lmc_mode = 1;
+		char *s;
+
+		if (priv->flags & FLAG_FAILSAFE_MODE)
+			four_lmc_mode = 0;
+
+		/* Pass 1.0 disable four LMC mode.
+		 *  See errata (LMC-15811)
+		 */
+		if (octeon_is_cpuid(OCTEON_CN68XX_PASS1_0))
+			four_lmc_mode = 0;
+
+		s = env_get("ddr_four_lmc");
+		if (s) {
+			four_lmc_mode = simple_strtoul(s, NULL, 0);
+			printf("Parameter found in environment. ddr_four_lmc = %d\n",
+			       four_lmc_mode);
+		}
+
+		if (!four_lmc_mode) {
+			puts("Forcing two-LMC Mode.\n");
+			/* Invalidate LMC[2:3] */
+			ddr_conf_valid_mask &= ~(3 << 2);
+		}
+	} else if (octeon_is_cpuid(OCTEON_CN73XX)) {
+		int one_lmc_mode = 0;
+		char *s;
+
+		s = env_get("ddr_one_lmc");
+		if (s) {
+			one_lmc_mode = simple_strtoul(s, NULL, 0);
+			printf("Parameter found in environment. ddr_one_lmc = %d\n",
+			       one_lmc_mode);
+		}
+
+		if (one_lmc_mode) {
+			puts("Forcing one-LMC Mode.\n");
+			/* Invalidate LMC[1:3] */
+			ddr_conf_valid_mask &= ~(1 << 1);
+		}
+	}
+
+	if (!ddr_conf_valid_mask) {
+		printf
+		    ("ERROR: No valid DIMMs detected on any DDR interface.\n");
+		hang();
+		return -1;	// testr-only: no ret negativ!!!
+	}
+
+	/*
+	 * We measure the DDR frequency by counting DDR clocks.  We can
+	 * confirm or adjust the expected frequency as necessary.  We use
+	 * the measured frequency to make accurate timing calculations
+	 * used to configure the controller.
+	 */
+	for (if_idx = 0; if_idx < 4; ++if_idx) {
+		u32 tmp_hertz;
+
+		if (!(ddr_conf_valid_mask & (1 << if_idx)))
+			continue;
+
+try_again:
+		/*
+		 * only check for alternate refclk wanted on chips that
+		 * support it
+		 */
+		if ((octeon_is_cpuid(OCTEON_CN73XX)) ||
+		    (octeon_is_cpuid(OCTEON_CNF75XX)) ||
+		    (octeon_is_cpuid(OCTEON_CN78XX_PASS2_X))) {
+			// only need do this if we are LMC0
+			if (if_idx == 0) {
+				union cvmx_lmcx_ddr_pll_ctl ddr_pll_ctl;
+
+				ddr_pll_ctl.u64 =
+				    lmc_rd(priv, CVMX_LMCX_DDR_PLL_CTL(0));
+
+				/*
+				 * If we are asking for 100 MHz refclk, we can
+				 * only get it via alternate, so switch to it
+				 */
+				if (ddr_ref_hertz == 100000000) {
+					ddr_pll_ctl.cn78xx.dclk_alt_refclk_sel =
+					    1;
+					lmc_wr(priv, CVMX_LMCX_DDR_PLL_CTL(0),
+					       ddr_pll_ctl.u64);
+					udelay(1000);	// wait 1 msec
+				} else {
+					/*
+					 * If we are NOT asking for 100MHz,
+					 * then reset to (assumed) 50MHz and go
+					 * on
+					 */
+					ddr_pll_ctl.cn78xx.dclk_alt_refclk_sel =
+					    0;
+					lmc_wr(priv, CVMX_LMCX_DDR_PLL_CTL(0),
+					       ddr_pll_ctl.u64);
+					udelay(1000);	// wait 1 msec
+				}
+			}
+		} else {
+			if (ddr_ref_hertz == 100000000) {
+				debug("N0: DRAM init: requested 100 MHz refclk NOT SUPPORTED\n");
+				ddr_ref_hertz = CONFIG_REF_HERTZ;
+			}
+		}
+
+		tmp_hertz = measure_octeon_ddr_clock(priv, &ddr_conf[if_idx],
+						     cpu_hertz, ddr_hertz,
+						     ddr_ref_hertz, if_idx,
+						     ddr_conf_valid_mask);
+
+		/*
+		 * only check for alternate refclk acquired on chips that
+		 * support it
+		 */
+		if ((octeon_is_cpuid(OCTEON_CN73XX)) ||
+		    (octeon_is_cpuid(OCTEON_CNF75XX)) ||
+		    (octeon_is_cpuid(OCTEON_CN78XX_PASS2_X))) {
+			/*
+			 * if we are LMC0 and we are asked for 100 MHz refclk,
+			 * we must be sure it is available
+			 * If not, we print an error message, set to 50MHz,
+			 * and go on...
+			 */
+			if (if_idx == 0 && ddr_ref_hertz == 100000000) {
+				/*
+				 * Validate that the clock returned is close
+				 * enough to the clock desired
+				 */
+				// FIXME: is 5% close enough?
+				int hertz_diff =
+				    abs((int)tmp_hertz - (int)ddr_hertz);
+				if (hertz_diff > ((int)ddr_hertz * 5 / 100)) {
+					// nope, diff is greater than than 5%
+					debug("N0: DRAM init: requested 100 MHz refclk NOT FOUND\n");
+					ddr_ref_hertz = CONFIG_REF_HERTZ;
+					// clear the flag before trying again!!
+					set_ddr_clock_initialized(priv, 0, 0);
+					goto try_again;
+				} else {
+					debug("N0: DRAM Init: requested 100 MHz refclk FOUND and SELECTED\n");
+				}
+			}
+		}
+
+		if (tmp_hertz > 0)
+			calc_ddr_hertz = tmp_hertz;
+		debug("LMC%d: measured speed: %u hz\n", if_idx, tmp_hertz);
+	}
+
+	if (measured_ddr_hertz)
+		*measured_ddr_hertz = calc_ddr_hertz;
+
+	memsize_mbytes = 0;
+	for (if_idx = 0; if_idx < 4; ++if_idx) {
+		if (!(ddr_conf_valid_mask & (1 << if_idx)))
+			continue;
+
+		ret = init_octeon_dram_interface(priv, &ddr_conf[if_idx],
+						 calc_ddr_hertz,
+						 cpu_hertz, ddr_ref_hertz,
+						 if_idx, ddr_conf_valid_mask);
+		if (ret > 0)
+			memsize_mbytes += ret;
+	}
+
+	if (memsize_mbytes == 0)
+		/* All interfaces failed to initialize, so return error */
+		return -1;
+
+	/*
+	 * switch over to DBI mode only for chips that support it, and
+	 * enabled by envvar
+	 */
+	if ((octeon_is_cpuid(OCTEON_CN73XX)) ||
+	    (octeon_is_cpuid(OCTEON_CNF75XX)) ||
+	    (octeon_is_cpuid(OCTEON_CN78XX_PASS2_X))) {
+		eptr = env_get("ddr_dbi_switchover");
+		if (eptr) {
+			printf("DBI Switchover starting...\n");
+			cvmx_dbi_switchover(priv);
+			printf("DBI Switchover finished.\n");
+		}
+	}
+
+	/* call HW-assist tuning here on chips that support it */
+	if ((octeon_is_cpuid(OCTEON_CN73XX)) ||
+	    (octeon_is_cpuid(OCTEON_CNF75XX)) ||
+	    (octeon_is_cpuid(OCTEON_CN78XX_PASS2_X)))
+		cvmx_maybe_tune_node(priv, calc_ddr_hertz);
+
+	eptr = env_get("limit_dram_mbytes");
+	if (eptr) {
+		unsigned int mbytes = simple_strtoul(eptr, NULL, 10);
+
+		if (mbytes > 0) {
+			memsize_mbytes = mbytes;
+			printf("Limiting DRAM size to %d MBytes based on limit_dram_mbytes env. variable\n",
+			       mbytes);
+		}
+	}
+
+	debug("LMC Initialization complete. Total DRAM %d MB\n",
+	      memsize_mbytes);
+
+	return memsize_mbytes;
+}
+
+static int octeon_ddr_probe(struct udevice *dev)
+{
+	struct ddr_priv *priv = dev_get_priv(dev);
+	struct ofnode_phandle_args l2c_node;
+	struct ddr_conf *ddr_conf_ptr;
+	u32 ddr_conf_valid_mask = 0;
+	u32 measured_ddr_hertz = 0;
+	int conf_table_count;
+	int def_ddr_freq;
+	u32 mem_mbytes = 0;
+	u32 ddr_hertz;
+	u32 ddr_ref_hertz;
+	int alt_refclk;
+	const char *eptr;
+	fdt_addr_t addr;
+	u64 *ptr;
+	u64 val;
+	int ret;
+	int i;
+
+	/* Don't try to re-init the DDR controller after relocation */
+	if (gd->flags & GD_FLG_RELOC)
+		return 0;
+
+	/*
+	 * Dummy read all local variables into cache, so that they are
+	 * locked in cache when the DDR code runs with flushes etc enabled
+	 */
+	ptr = (u64 *)_end;
+	for (i = 0; i < (0x100000 / sizeof(u64)); i++)
+		val = readq(ptr++);
+
+	/*
+	 * The base addresses of LMC and L2C are read from the DT. This
+	 * makes it possible to use the DDR init code without the need
+	 * of the "node" variable, describing on which node to access. The
+	 * node number is already included implicitly in the base addresses
+	 * read from the DT this way.
+	 */
+
+	/* Get LMC base address */
+	priv->lmc_base = dev_remap_addr(dev);
+	debug("%s: lmc_base=%p\n", __func__, priv->lmc_base);
+
+	/* Get L2C base address */
+	ret = dev_read_phandle_with_args(dev, "l2c-handle", NULL, 0, 0,
+					 &l2c_node);
+	if (ret) {
+		printf("Can't access L2C node!\n");
+		return -ENODEV;
+	}
+
+	addr = ofnode_get_addr(l2c_node.node);
+	if (addr == FDT_ADDR_T_NONE) {
+		printf("Can't access L2C node!\n");
+		return -ENODEV;
+	}
+
+	priv->l2c_base = map_physmem(addr, 0, MAP_NOCACHE);
+	debug("%s: l2c_base=%p\n", __func__, priv->l2c_base);
+
+	ddr_conf_ptr = octeon_ddr_conf_table_get(&conf_table_count,
+						 &def_ddr_freq);
+	if (!ddr_conf_ptr) {
+		printf("ERROR: unable to determine DDR configuration\n");
+		return -ENODEV;
+	}
+
+	for (i = 0; i < conf_table_count; i++) {
+		if (ddr_conf_ptr[i].dimm_config_table[0].spd_addrs[0] ||
+		    ddr_conf_ptr[i].dimm_config_table[0].spd_ptrs[0])
+			ddr_conf_valid_mask |= 1 << i;
+	}
+
+	/*
+	 * Check for special case of mismarked 3005 samples,
+	 * and adjust cpuid
+	 */
+	alt_refclk = 0;
+	ddr_hertz = def_ddr_freq * 1000000;
+
+	eptr = env_get("ddr_clock_hertz");
+	if (eptr) {
+		ddr_hertz = simple_strtoul(eptr, NULL, 0);
+		gd->mem_clk = divide_nint(ddr_hertz, 1000000);
+		printf("Parameter found in environment. ddr_clock_hertz = %d\n",
+		       ddr_hertz);
+	}
+
+	ddr_ref_hertz = octeon3_refclock(alt_refclk,
+					 ddr_hertz,
+					 &ddr_conf_ptr[0].dimm_config_table[0]);
+
+	debug("Initializing DDR, clock = %uhz, reference = %uhz\n",
+	      ddr_hertz, ddr_ref_hertz);
+
+	mem_mbytes = octeon_ddr_initialize(priv, gd->cpu_clk,
+					   ddr_hertz, ddr_ref_hertz,
+					   ddr_conf_valid_mask,
+					   ddr_conf_ptr, &measured_ddr_hertz);
+	debug("Mem size in MBYTES: %u\n", mem_mbytes);
+
+	gd->mem_clk = divide_nint(measured_ddr_hertz, 1000000);
+
+	debug("Measured DDR clock %d Hz\n", measured_ddr_hertz);
+
+	if (measured_ddr_hertz != 0) {
+		if (!gd->mem_clk) {
+			/*
+			 * If ddr_clock not set, use measured clock
+			 * and don't warn
+			 */
+			gd->mem_clk = divide_nint(measured_ddr_hertz, 1000000);
+		} else if ((measured_ddr_hertz > ddr_hertz + 3000000) ||
+			   (measured_ddr_hertz < ddr_hertz - 3000000)) {
+			printf("\nWARNING:\n");
+			printf("WARNING: Measured DDR clock mismatch!  expected: %lld MHz, measured: %lldMHz, cpu clock: %lu MHz\n",
+			       divide_nint(ddr_hertz, 1000000),
+			       divide_nint(measured_ddr_hertz, 1000000),
+			       gd->cpu_clk);
+			printf("WARNING:\n\n");
+			gd->mem_clk = divide_nint(measured_ddr_hertz, 1000000);
+		}
+	}
+
+	if (!mem_mbytes)
+		return -ENODEV;
+
+	priv->info.base = CONFIG_SYS_SDRAM_BASE;
+	priv->info.size = MB(mem_mbytes);
+
+	/*
+	 * For 6XXX generate a proper error when reading/writing
+	 * non-existent memory locations.
+	 */
+	cvmx_l2c_set_big_size(priv, mem_mbytes, 0);
+
+	debug("Ram size %uMiB\n", mem_mbytes);
+
+	return 0;
+}
+
+static int octeon_get_info(struct udevice *dev, struct ram_info *info)
+{
+	struct ddr_priv *priv = dev_get_priv(dev);
+
+	*info = priv->info;
+
+	return 0;
+}
+
+static struct ram_ops octeon_ops = {
+	.get_info = octeon_get_info,
+};
+
+static const struct udevice_id octeon_ids[] = {
+	{.compatible = "cavium,octeon-7xxx-ddr4" },
+	{ }
+};
+
+U_BOOT_DRIVER(octeon_ddr) = {
+	.name = "octeon_ddr",
+	.id = UCLASS_RAM,
+	.of_match = octeon_ids,
+	.ops = &octeon_ops,
+	.probe = octeon_ddr_probe,
+	.platdata_auto_alloc_size = sizeof(struct ddr_priv),
+};

From 61674a17bcff855770ac91dbc67d5f1cfb56f39f Mon Sep 17 00:00:00 2001
From: Aaron Williams <awilliams@marvell.com>
Date: Wed, 2 Sep 2020 08:29:07 +0200
Subject: [PATCH 06/27] ram: octeon: Add MIPS Octeon3 DDR4 support (part 2/3)

This Octeon 3 DDR driver is ported from the 2013 Cavium / Marvell U-Boot
repository. It currently supports DDR4 on Octeon 3. It can be later
extended to support also DDR3 and Octeon 2 platforms.

Part 2 includes the very complex Octeon 3 DDR4 configuration

Signed-off-by: Aaron Williams <awilliams@marvell.com>
Signed-off-by: Stefan Roese <sr@denx.de>
---
 drivers/ram/octeon/octeon3_lmc.c | 11030 +++++++++++++++++++++++++++++
 1 file changed, 11030 insertions(+)
 create mode 100644 drivers/ram/octeon/octeon3_lmc.c

diff --git a/drivers/ram/octeon/octeon3_lmc.c b/drivers/ram/octeon/octeon3_lmc.c
new file mode 100644
index 0000000000..327cdc5873
--- /dev/null
+++ b/drivers/ram/octeon/octeon3_lmc.c
@@ -0,0 +1,11030 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Copyright (C) 2020 Marvell International Ltd.
+ */
+
+#include <command.h>
+#include <dm.h>
+#include <hang.h>
+#include <i2c.h>
+#include <ram.h>
+#include <time.h>
+
+#include <linux/bitops.h>
+#include <linux/io.h>
+
+#include <mach/octeon_ddr.h>
+
+/* Random number generator stuff */
+
+#define CVMX_RNM_CTL_STATUS	0x0001180040000000
+#define CVMX_OCT_DID_RNG	8ULL
+
+static u64 cvmx_build_io_address(u64 major_did, u64 sub_did)
+{
+	return ((0x1ull << 48) | (major_did << 43) | (sub_did << 40));
+}
+
+static u64 cvmx_rng_get_random64(void)
+{
+	return csr_rd(cvmx_build_io_address(CVMX_OCT_DID_RNG, 0));
+}
+
+static void cvmx_rng_enable(void)
+{
+	u64 val;
+
+	val = csr_rd(CVMX_RNM_CTL_STATUS);
+	val |= BIT(0) | BIT(1);
+	csr_wr(CVMX_RNM_CTL_STATUS, val);
+}
+
+#define RLEVEL_PRINTALL_DEFAULT		1
+#define WLEVEL_PRINTALL_DEFAULT		1
+
+/*
+ * Define how many HW WL samples to take for majority voting.
+ * MUST BE odd!!
+ * Assume there should only be 2 possible values that will show up,
+ * so treat ties as a problem!!!
+ * NOTE: Do not change this without checking the code!!!
+ */
+#define WLEVEL_LOOPS_DEFAULT		5
+
+#define ENABLE_COMPUTED_VREF_ADJUSTMENT	1
+#define SW_WLEVEL_HW_DEFAULT		1
+#define DEFAULT_BEST_RANK_SCORE		9999999
+#define MAX_RANK_SCORE_LIMIT		99
+
+/*
+ * Define how many HW RL samples per rank to take multiple samples will
+ * allow looking for the best sample score
+ */
+#define RLEVEL_SAMPLES_DEFAULT		3
+
+#define ddr_seq_print(format, ...) do {} while (0)
+
+struct wlevel_bitcnt {
+	int bitcnt[4];
+};
+
+static void display_dac_dbi_settings(int lmc, int dac_or_dbi,
+				     int ecc_ena, int *settings, char *title);
+
+static unsigned short load_dac_override(struct ddr_priv *priv, int if_num,
+					int dac_value, int byte);
+
+/* "mode" arg */
+#define DBTRAIN_TEST 0
+#define DBTRAIN_DBI  1
+#define DBTRAIN_LFSR 2
+
+static int run_best_hw_patterns(struct ddr_priv *priv, int lmc, u64 phys_addr,
+				int mode, u64 *xor_data);
+
+#define LMC_DDR3_RESET_ASSERT   0
+#define LMC_DDR3_RESET_DEASSERT 1
+
+static void cn7xxx_lmc_ddr3_reset(struct ddr_priv *priv, int if_num, int reset)
+{
+	union cvmx_lmcx_reset_ctl reset_ctl;
+
+	/*
+	 * 4. Deassert DDRn_RESET_L pin by writing
+	 *    LMC(0..3)_RESET_CTL[DDR3RST] = 1
+	 *    without modifying any other LMC(0..3)_RESET_CTL fields.
+	 * 5. Read LMC(0..3)_RESET_CTL and wait for the result.
+	 * 6. Wait a minimum of 500us. This guarantees the necessary T = 500us
+	 *    delay between DDRn_RESET_L deassertion and DDRn_DIMM*_CKE*
+	 *    assertion.
+	 */
+	debug("LMC%d %s DDR_RESET_L\n", if_num,
+	      (reset ==
+	       LMC_DDR3_RESET_DEASSERT) ? "De-asserting" : "Asserting");
+
+	reset_ctl.u64 = lmc_rd(priv, CVMX_LMCX_RESET_CTL(if_num));
+	reset_ctl.cn78xx.ddr3rst = reset;
+	lmc_wr(priv, CVMX_LMCX_RESET_CTL(if_num), reset_ctl.u64);
+
+	lmc_rd(priv, CVMX_LMCX_RESET_CTL(if_num));
+
+	udelay(500);
+}
+
+static void perform_lmc_reset(struct ddr_priv *priv, int node, int if_num)
+{
+	/*
+	 * 5.9.6 LMC RESET Initialization
+	 *
+	 * The purpose of this step is to assert/deassert the RESET# pin at the
+	 * DDR3/DDR4 parts.
+	 *
+	 * This LMC RESET step is done for all enabled LMCs.
+	 *
+	 * It may be appropriate to skip this step if the DDR3/DDR4 DRAM parts
+	 * are in self refresh and are currently preserving their
+	 * contents. (Software can determine this via
+	 * LMC(0..3)_RESET_CTL[DDR3PSV] in some circumstances.) The remainder of
+	 * this section assumes that the DRAM contents need not be preserved.
+	 *
+	 * The remainder of this section assumes that the CN78XX DDRn_RESET_L
+	 * pin is attached to the RESET# pin of the attached DDR3/DDR4 parts,
+	 * as will be appropriate in many systems.
+	 *
+	 * (In other systems, such as ones that can preserve DDR3/DDR4 part
+	 * contents while CN78XX is powered down, it will not be appropriate to
+	 * directly attach the CN78XX DDRn_RESET_L pin to DRESET# of the
+	 * DDR3/DDR4 parts, and this section may not apply.)
+	 *
+	 * The remainder of this section describes the sequence for LMCn.
+	 *
+	 * Perform the following six substeps for LMC reset initialization:
+	 *
+	 * 1. If not done already, assert DDRn_RESET_L pin by writing
+	 * LMC(0..3)_RESET_ CTL[DDR3RST] = 0 without modifying any other
+	 * LMC(0..3)_RESET_CTL fields.
+	 */
+
+	if (!ddr_memory_preserved(priv)) {
+		/*
+		 * 2. Read LMC(0..3)_RESET_CTL and wait for the result.
+		 */
+
+		lmc_rd(priv, CVMX_LMCX_RESET_CTL(if_num));
+
+		/*
+		 * 3. Wait until RESET# assertion-time requirement from JEDEC
+		 * DDR3/DDR4 specification is satisfied (200 us during a
+		 * power-on ramp, 100ns when power is already stable).
+		 */
+
+		udelay(200);
+
+		/*
+		 * 4. Deassert DDRn_RESET_L pin by writing
+		 *    LMC(0..3)_RESET_CTL[DDR3RST] = 1
+		 *    without modifying any other LMC(0..3)_RESET_CTL fields.
+		 * 5. Read LMC(0..3)_RESET_CTL and wait for the result.
+		 * 6. Wait a minimum of 500us. This guarantees the necessary
+		 *    T = 500us delay between DDRn_RESET_L deassertion and
+		 *    DDRn_DIMM*_CKE* assertion.
+		 */
+		cn7xxx_lmc_ddr3_reset(priv, if_num, LMC_DDR3_RESET_DEASSERT);
+
+		/* Toggle Reset Again */
+		/* That is, assert, then de-assert, one more time */
+		cn7xxx_lmc_ddr3_reset(priv, if_num, LMC_DDR3_RESET_ASSERT);
+		cn7xxx_lmc_ddr3_reset(priv, if_num, LMC_DDR3_RESET_DEASSERT);
+	}
+}
+
+void oct3_ddr3_seq(struct ddr_priv *priv, int rank_mask, int if_num,
+		   int sequence)
+{
+	/*
+	 * 3. Without changing any other fields in LMC(0)_CONFIG, write
+	 *    LMC(0)_CONFIG[RANKMASK] then write both
+	 *    LMC(0)_SEQ_CTL[SEQ_SEL,INIT_START] = 1 with a single CSR write
+	 *    operation. LMC(0)_CONFIG[RANKMASK] bits should be set to indicate
+	 *    the ranks that will participate in the sequence.
+	 *
+	 *    The LMC(0)_SEQ_CTL[SEQ_SEL] value should select power-up/init or
+	 *    selfrefresh exit, depending on whether the DRAM parts are in
+	 *    self-refresh and whether their contents should be preserved. While
+	 *    LMC performs these sequences, it will not perform any other DDR3
+	 *    transactions. When the sequence is complete, hardware sets the
+	 *    LMC(0)_CONFIG[INIT_STATUS] bits for the ranks that have been
+	 *    initialized.
+	 *
+	 *    If power-up/init is selected immediately following a DRESET
+	 *    assertion, LMC executes the sequence described in the "Reset and
+	 *    Initialization Procedure" section of the JEDEC DDR3
+	 *    specification. This includes activating CKE, writing all four DDR3
+	 *    mode registers on all selected ranks, and issuing the required
+	 *    ZQCL
+	 *    command. The LMC(0)_CONFIG[RANKMASK] value should select all ranks
+	 *    with attached DRAM in this case. If LMC(0)_CONTROL[RDIMM_ENA] = 1,
+	 *    LMC writes the JEDEC standard SSTE32882 control words selected by
+	 *    LMC(0)_DIMM_CTL[DIMM*_WMASK] between DDR_CKE* signal assertion and
+	 *    the first DDR3 mode register write operation.
+	 *    LMC(0)_DIMM_CTL[DIMM*_WMASK] should be cleared to 0 if the
+	 *    corresponding DIMM is not present.
+	 *
+	 *    If self-refresh exit is selected, LMC executes the required SRX
+	 *    command followed by a refresh and ZQ calibration. Section 4.5
+	 *    describes behavior of a REF + ZQCS.  LMC does not write the DDR3
+	 *    mode registers as part of this sequence, and the mode register
+	 *    parameters must match at self-refresh entry and exit times.
+	 *
+	 * 4. Read LMC(0)_SEQ_CTL and wait for LMC(0)_SEQ_CTL[SEQ_COMPLETE]
+	 *    to be set.
+	 *
+	 * 5. Read LMC(0)_CONFIG[INIT_STATUS] and confirm that all ranks have
+	 *    been initialized.
+	 */
+
+	union cvmx_lmcx_seq_ctl seq_ctl;
+	union cvmx_lmcx_config lmc_config;
+	int timeout;
+
+	lmc_config.u64 = lmc_rd(priv, CVMX_LMCX_CONFIG(if_num));
+	lmc_config.s.rankmask = rank_mask;
+	lmc_wr(priv, CVMX_LMCX_CONFIG(if_num), lmc_config.u64);
+
+	seq_ctl.u64 = 0;
+
+	seq_ctl.s.init_start = 1;
+	seq_ctl.s.seq_sel = sequence;
+
+	ddr_seq_print
+	    ("Performing LMC sequence: rank_mask=0x%02x, sequence=0x%x, %s\n",
+	     rank_mask, sequence, sequence_str[sequence]);
+
+	if (seq_ctl.s.seq_sel == 3)
+		debug("LMC%d: Exiting Self-refresh Rank_mask:%x\n", if_num,
+		      rank_mask);
+
+	lmc_wr(priv, CVMX_LMCX_SEQ_CTL(if_num), seq_ctl.u64);
+	lmc_rd(priv, CVMX_LMCX_SEQ_CTL(if_num));
+
+	timeout = 100;
+	do {
+		udelay(100);	/* Wait a while */
+		seq_ctl.u64 = lmc_rd(priv, CVMX_LMCX_SEQ_CTL(if_num));
+		if (--timeout == 0) {
+			printf("Sequence %d timed out\n", sequence);
+			break;
+		}
+	} while (seq_ctl.s.seq_complete != 1);
+
+	ddr_seq_print("           LMC sequence=%x: Completed.\n", sequence);
+}
+
+#define bdk_numa_get_address(n, p)	((p) | ((u64)n) << CVMX_NODE_MEM_SHIFT)
+#define AREA_BASE_OFFSET		BIT_ULL(26)
+
+static int test_dram_byte64(struct ddr_priv *priv, int lmc, u64 p,
+			    u64 bitmask, u64 *xor_data)
+{
+	u64 p1, p2, d1, d2;
+	u64 v, v1;
+	u64 p2offset = (1ULL << 26);	// offset to area 2
+	u64 datamask;
+	u64 xor;
+	u64 i, j, k;
+	u64 ii;
+	int errors = 0;
+	//u64 index;
+	u64 pattern1 = cvmx_rng_get_random64();
+	u64 pattern2 = 0;
+	u64 bad_bits[2] = { 0, 0 };
+	int kbitno = (octeon_is_cpuid(OCTEON_CN7XXX)) ? 20 : 18;
+	union cvmx_l2c_ctl l2c_ctl;
+	int burst;
+	int saved_dissblkdty;
+	int node = 0;
+
+	// Force full cacheline write-backs to boost traffic
+	l2c_ctl.u64 = l2c_rd(priv, CVMX_L2C_CTL);
+	saved_dissblkdty = l2c_ctl.cn78xx.dissblkdty;
+	l2c_ctl.cn78xx.dissblkdty = 1;
+	l2c_wr(priv, CVMX_L2C_CTL, l2c_ctl.u64);
+
+	if (octeon_is_cpuid(OCTEON_CN73XX) || octeon_is_cpuid(OCTEON_CNF75XX))
+		kbitno = 18;
+
+	// Byte lanes may be clear in the mask to indicate no testing on that
+	//lane.
+	datamask = bitmask;
+
+	/*
+	 * Add offset to both test regions to not clobber boot stuff
+	 * when running from L2 for NAND boot.
+	 */
+	p += AREA_BASE_OFFSET;	// make sure base is out of the way of boot
+
+	// final address must include LMC and node
+	p |= (lmc << 7);	/* Map address into proper interface */
+	p = bdk_numa_get_address(node, p);	/* Map to node */
+	p |= 1ull << 63;
+
+#define II_INC BIT_ULL(22)
+#define II_MAX BIT_ULL(22)
+#define K_INC  BIT_ULL(14)
+#define K_MAX  BIT_ULL(kbitno)
+#define J_INC  BIT_ULL(9)
+#define J_MAX  BIT_ULL(12)
+#define I_INC  BIT_ULL(3)
+#define I_MAX  BIT_ULL(7)
+
+	debug("N%d.LMC%d: %s: phys_addr=0x%llx/0x%llx (0x%llx)\n",
+	      node, lmc, __func__, p, p + p2offset, 1ULL << kbitno);
+
+	// loops are ordered so that only a single 64-bit slot is written to
+	// each cacheline at one time, then the cachelines are forced out;
+	// this should maximize read/write traffic
+
+	// FIXME? extend the range of memory tested!!
+	for (ii = 0; ii < II_MAX; ii += II_INC) {
+		for (i = 0; i < I_MAX; i += I_INC) {
+			for (k = 0; k < K_MAX; k += K_INC) {
+				for (j = 0; j < J_MAX; j += J_INC) {
+					p1 = p + ii + k + j;
+					p2 = p1 + p2offset;
+
+					v = pattern1 * (p1 + i);
+					// write the same thing to both areas
+					v1 = v;
+
+					cvmx_write64_uint64(p1 + i, v);
+					cvmx_write64_uint64(p2 + i, v1);
+
+					CVMX_CACHE_WBIL2(p1, 0);
+					CVMX_CACHE_WBIL2(p2, 0);
+				}
+			}
+		}
+	}
+
+	CVMX_DCACHE_INVALIDATE;
+
+	debug("N%d.LMC%d: dram_tuning_mem_xor: done INIT loop\n", node, lmc);
+
+	/* Make a series of passes over the memory areas. */
+
+	for (burst = 0; burst < 1 /* was: dram_tune_use_bursts */ ; burst++) {
+		u64 this_pattern = cvmx_rng_get_random64();
+
+		pattern2 ^= this_pattern;
+
+		/*
+		 * XOR the data with a random value, applying the change to both
+		 * memory areas.
+		 */
+
+		// FIXME? extend the range of memory tested!!
+		for (ii = 0; ii < II_MAX; ii += II_INC) {
+			// FIXME: rearranged, did not make much difference?
+			for (i = 0; i < I_MAX; i += I_INC) {
+				for (k = 0; k < K_MAX; k += K_INC) {
+					for (j = 0; j < J_MAX; j += J_INC) {
+						p1 = p + ii + k + j;
+						p2 = p1 + p2offset;
+
+						v = cvmx_read64_uint64(p1 +
+								      i) ^
+						    this_pattern;
+						v1 = cvmx_read64_uint64(p2 +
+								       i) ^
+						    this_pattern;
+
+						cvmx_write64_uint64(p1 + i, v);
+						cvmx_write64_uint64(p2 + i, v1);
+
+						CVMX_CACHE_WBIL2(p1, 0);
+						CVMX_CACHE_WBIL2(p2, 0);
+					}
+				}
+			}
+		}
+
+		CVMX_DCACHE_INVALIDATE;
+
+		debug("N%d.LMC%d: dram_tuning_mem_xor: done MODIFY loop\n",
+		      node, lmc);
+
+		/*
+		 * Look for differences in the areas. If there is a mismatch,
+		 * reset both memory locations with the same pattern. Failing
+		 * to do so means that on all subsequent passes the pair of
+		 * locations remain out of sync giving spurious errors.
+		 */
+
+		// FIXME: Change the loop order so that an entire cache line
+		//        is compared at one time. This is so that a read
+		//        error that occurs *anywhere* on the cacheline will
+		//        be caught, rather than comparing only 1 cacheline
+		//        slot at a time, where an error on a different
+		//        slot will be missed that time around
+		// Does the above make sense?
+
+		// FIXME? extend the range of memory tested!!
+		for (ii = 0; ii < II_MAX; ii += II_INC) {
+			for (k = 0; k < K_MAX; k += K_INC) {
+				for (j = 0; j < J_MAX; j += J_INC) {
+					p1 = p + ii + k + j;
+					p2 = p1 + p2offset;
+
+					// process entire cachelines in the
+					//innermost loop
+					for (i = 0; i < I_MAX; i += I_INC) {
+						int bybit = 1;
+						// start in byte lane 0
+						u64 bymsk = 0xffULL;
+
+						// FIXME: this should predict
+						// what we find...???
+						v = ((p1 + i) * pattern1) ^
+							pattern2;
+						d1 = cvmx_read64_uint64(p1 + i);
+						d2 = cvmx_read64_uint64(p2 + i);
+
+						// union of error bits only in
+						// active byte lanes
+						xor = ((d1 ^ v) | (d2 ^ v)) &
+							datamask;
+
+						if (!xor)
+							continue;
+
+						// accumulate bad bits
+						bad_bits[0] |= xor;
+
+						while (xor != 0) {
+							debug("ERROR(%03d): [0x%016llX] [0x%016llX]  expected 0x%016llX d1 %016llX d2 %016llX\n",
+							      burst, p1, p2, v,
+							      d1, d2);
+							// error(s) in this lane
+							if (xor & bymsk) {
+								// set the byte
+								// error bit
+								errors |= bybit;
+								// clear byte
+								// lane in
+								// error bits
+								xor &= ~bymsk;
+								// clear the
+								// byte lane in
+								// the mask
+								datamask &= ~bymsk;
+#if EXIT_WHEN_ALL_LANES_HAVE_ERRORS
+								// nothing
+								// left to do
+								if (datamask == 0) {
+									return errors;
+								}
+#endif /* EXIT_WHEN_ALL_LANES_HAVE_ERRORS */
+							}
+							// move mask into
+							// next byte lane
+							bymsk <<= 8;
+							// move bit into next
+							// byte position
+							bybit <<= 1;
+						}
+					}
+					CVMX_CACHE_WBIL2(p1, 0);
+					CVMX_CACHE_WBIL2(p2, 0);
+				}
+			}
+		}
+
+		debug("N%d.LMC%d: dram_tuning_mem_xor: done TEST loop\n",
+		      node, lmc);
+	}
+
+	if (xor_data) {		// send the bad bits back...
+		xor_data[0] = bad_bits[0];
+		xor_data[1] = bad_bits[1];	// let it be zeroed
+	}
+
+	// Restore original setting that could enable partial cacheline writes
+	l2c_ctl.u64 = l2c_rd(priv, CVMX_L2C_CTL);
+	l2c_ctl.cn78xx.dissblkdty = saved_dissblkdty;
+	l2c_wr(priv, CVMX_L2C_CTL, l2c_ctl.u64);
+
+	return errors;
+}
+
+static void ddr4_mrw(struct ddr_priv *priv, int if_num, int rank,
+		     int mr_wr_addr, int mr_wr_sel, int mr_wr_bg1)
+{
+	union cvmx_lmcx_mr_mpr_ctl lmc_mr_mpr_ctl;
+
+	lmc_mr_mpr_ctl.u64 = 0;
+	lmc_mr_mpr_ctl.cn78xx.mr_wr_addr = (mr_wr_addr == -1) ? 0 : mr_wr_addr;
+	lmc_mr_mpr_ctl.cn78xx.mr_wr_sel = mr_wr_sel;
+	lmc_mr_mpr_ctl.cn78xx.mr_wr_rank = rank;
+	lmc_mr_mpr_ctl.cn78xx.mr_wr_use_default_value =
+		(mr_wr_addr == -1) ? 1 : 0;
+	lmc_mr_mpr_ctl.cn78xx.mr_wr_bg1 = mr_wr_bg1;
+	lmc_wr(priv, CVMX_LMCX_MR_MPR_CTL(if_num), lmc_mr_mpr_ctl.u64);
+
+	/* Mode Register Write */
+	oct3_ddr3_seq(priv, 1 << rank, if_num, 0x8);
+}
+
+#define INV_A0_17(x)	((x) ^ 0x22bf8)
+
+static void set_mpr_mode(struct ddr_priv *priv, int rank_mask,
+			 int if_num, int dimm_count, int mpr, int bg1)
+{
+	int rankx;
+
+	debug("All Ranks: Set mpr mode = %x %c-side\n",
+	      mpr, (bg1 == 0) ? 'A' : 'B');
+
+	for (rankx = 0; rankx < dimm_count * 4; rankx++) {
+		if (!(rank_mask & (1 << rankx)))
+			continue;
+		if (bg1 == 0) {
+			/* MR3 A-side */
+			ddr4_mrw(priv, if_num, rankx, mpr << 2, 3, bg1);
+		} else {
+			/* MR3 B-side */
+			ddr4_mrw(priv, if_num, rankx, INV_A0_17(mpr << 2), ~3,
+				 bg1);
+		}
+	}
+}
+
+static void do_ddr4_mpr_read(struct ddr_priv *priv, int if_num,
+			     int rank, int page, int location)
+{
+	union cvmx_lmcx_mr_mpr_ctl lmc_mr_mpr_ctl;
+
+	lmc_mr_mpr_ctl.u64 = lmc_rd(priv, CVMX_LMCX_MR_MPR_CTL(if_num));
+	lmc_mr_mpr_ctl.cn70xx.mr_wr_addr = 0;
+	lmc_mr_mpr_ctl.cn70xx.mr_wr_sel = page;	/* Page */
+	lmc_mr_mpr_ctl.cn70xx.mr_wr_rank = rank;
+	lmc_mr_mpr_ctl.cn70xx.mpr_loc = location;
+	lmc_mr_mpr_ctl.cn70xx.mpr_wr = 0;	/* Read=0, Write=1 */
+	lmc_wr(priv, CVMX_LMCX_MR_MPR_CTL(if_num), lmc_mr_mpr_ctl.u64);
+
+	/* MPR register access sequence */
+	oct3_ddr3_seq(priv, 1 << rank, if_num, 0x9);
+
+	debug("LMC_MR_MPR_CTL                  : 0x%016llx\n",
+	      lmc_mr_mpr_ctl.u64);
+	debug("lmc_mr_mpr_ctl.cn70xx.mr_wr_addr: 0x%02x\n",
+	      lmc_mr_mpr_ctl.cn70xx.mr_wr_addr);
+	debug("lmc_mr_mpr_ctl.cn70xx.mr_wr_sel : 0x%02x\n",
+	      lmc_mr_mpr_ctl.cn70xx.mr_wr_sel);
+	debug("lmc_mr_mpr_ctl.cn70xx.mpr_loc   : 0x%02x\n",
+	      lmc_mr_mpr_ctl.cn70xx.mpr_loc);
+	debug("lmc_mr_mpr_ctl.cn70xx.mpr_wr    : 0x%02x\n",
+	      lmc_mr_mpr_ctl.cn70xx.mpr_wr);
+}
+
+static int set_rdimm_mode(struct ddr_priv *priv, int if_num, int enable)
+{
+	union cvmx_lmcx_control lmc_control;
+	int save_rdimm_mode;
+
+	lmc_control.u64 = lmc_rd(priv, CVMX_LMCX_CONTROL(if_num));
+	save_rdimm_mode = lmc_control.s.rdimm_ena;
+	lmc_control.s.rdimm_ena = enable;
+	debug("Setting RDIMM_ENA = %x\n", enable);
+	lmc_wr(priv, CVMX_LMCX_CONTROL(if_num), lmc_control.u64);
+
+	return save_rdimm_mode;
+}
+
+static void ddr4_mpr_read(struct ddr_priv *priv, int if_num, int rank,
+			  int page, int location, u64 *mpr_data)
+{
+	do_ddr4_mpr_read(priv, if_num, rank, page, location);
+
+	mpr_data[0] = lmc_rd(priv, CVMX_LMCX_MPR_DATA0(if_num));
+}
+
+/* Display MPR values for Page */
+static void display_mpr_page(struct ddr_priv *priv, int rank_mask,
+			     int if_num, int page)
+{
+	int rankx, location;
+	u64 mpr_data[3];
+
+	for (rankx = 0; rankx < 4; rankx++) {
+		if (!(rank_mask & (1 << rankx)))
+			continue;
+
+		debug("N0.LMC%d.R%d: MPR Page %d loc [0:3]: ",
+		      if_num, rankx, page);
+		for (location = 0; location < 4; location++) {
+			ddr4_mpr_read(priv, if_num, rankx, page, location,
+				      mpr_data);
+			debug("0x%02llx ", mpr_data[0] & 0xFF);
+		}
+		debug("\n");
+
+	}			/* for (rankx = 0; rankx < 4; rankx++) */
+}
+
+static void ddr4_mpr_write(struct ddr_priv *priv, int if_num, int rank,
+			   int page, int location, u8 mpr_data)
+{
+	union cvmx_lmcx_mr_mpr_ctl lmc_mr_mpr_ctl;
+
+	lmc_mr_mpr_ctl.u64 = 0;
+	lmc_mr_mpr_ctl.cn70xx.mr_wr_addr = mpr_data;
+	lmc_mr_mpr_ctl.cn70xx.mr_wr_sel = page;	/* Page */
+	lmc_mr_mpr_ctl.cn70xx.mr_wr_rank = rank;
+	lmc_mr_mpr_ctl.cn70xx.mpr_loc = location;
+	lmc_mr_mpr_ctl.cn70xx.mpr_wr = 1;	/* Read=0, Write=1 */
+	lmc_wr(priv, CVMX_LMCX_MR_MPR_CTL(if_num), lmc_mr_mpr_ctl.u64);
+
+	/* MPR register access sequence */
+	oct3_ddr3_seq(priv, 1 << rank, if_num, 0x9);
+
+	debug("LMC_MR_MPR_CTL                  : 0x%016llx\n",
+	      lmc_mr_mpr_ctl.u64);
+	debug("lmc_mr_mpr_ctl.cn70xx.mr_wr_addr: 0x%02x\n",
+	      lmc_mr_mpr_ctl.cn70xx.mr_wr_addr);
+	debug("lmc_mr_mpr_ctl.cn70xx.mr_wr_sel : 0x%02x\n",
+	      lmc_mr_mpr_ctl.cn70xx.mr_wr_sel);
+	debug("lmc_mr_mpr_ctl.cn70xx.mpr_loc   : 0x%02x\n",
+	      lmc_mr_mpr_ctl.cn70xx.mpr_loc);
+	debug("lmc_mr_mpr_ctl.cn70xx.mpr_wr    : 0x%02x\n",
+	      lmc_mr_mpr_ctl.cn70xx.mpr_wr);
+}
+
+static void set_vref(struct ddr_priv *priv, int if_num, int rank,
+		     int range, int value)
+{
+	union cvmx_lmcx_mr_mpr_ctl lmc_mr_mpr_ctl;
+	union cvmx_lmcx_modereg_params3 lmc_modereg_params3;
+	int mr_wr_addr = 0;
+
+	lmc_mr_mpr_ctl.u64 = 0;
+	lmc_modereg_params3.u64 = lmc_rd(priv,
+					 CVMX_LMCX_MODEREG_PARAMS3(if_num));
+
+	/* A12:A10 tCCD_L */
+	mr_wr_addr |= lmc_modereg_params3.s.tccd_l << 10;
+	mr_wr_addr |= 1 << 7;	/* A7 1 = Enable(Training Mode) */
+	mr_wr_addr |= range << 6;	/* A6 vrefDQ Training Range */
+	mr_wr_addr |= value << 0;	/* A5:A0 vrefDQ Training Value */
+
+	lmc_mr_mpr_ctl.cn70xx.mr_wr_addr = mr_wr_addr;
+	lmc_mr_mpr_ctl.cn70xx.mr_wr_sel = 6;	/* Write MR6 */
+	lmc_mr_mpr_ctl.cn70xx.mr_wr_rank = rank;
+	lmc_wr(priv, CVMX_LMCX_MR_MPR_CTL(if_num), lmc_mr_mpr_ctl.u64);
+
+	/* 0x8 = Mode Register Write */
+	oct3_ddr3_seq(priv, 1 << rank, if_num, 0x8);
+
+	/*
+	 * It is vendor specific whether vref_value is captured with A7=1.
+	 * A subsequent MRS might be necessary.
+	 */
+	oct3_ddr3_seq(priv, 1 << rank, if_num, 0x8);
+
+	mr_wr_addr &= ~(1 << 7);	/* A7 0 = Disable(Training Mode) */
+	lmc_mr_mpr_ctl.cn70xx.mr_wr_addr = mr_wr_addr;
+	lmc_wr(priv, CVMX_LMCX_MR_MPR_CTL(if_num), lmc_mr_mpr_ctl.u64);
+}
+
+static void set_dram_output_inversion(struct ddr_priv *priv, int if_num,
+				      int dimm_count, int rank_mask,
+				      int inversion)
+{
+	union cvmx_lmcx_ddr4_dimm_ctl lmc_ddr4_dimm_ctl;
+	union cvmx_lmcx_dimmx_params lmc_dimmx_params;
+	union cvmx_lmcx_dimm_ctl lmc_dimm_ctl;
+	int dimm_no;
+
+	/* Don't touch extenced register control words */
+	lmc_ddr4_dimm_ctl.u64 = 0;
+	lmc_wr(priv, CVMX_LMCX_DDR4_DIMM_CTL(if_num), lmc_ddr4_dimm_ctl.u64);
+
+	debug("All DIMMs: Register Control Word          RC0 : %x\n",
+	      (inversion & 1));
+
+	for (dimm_no = 0; dimm_no < dimm_count; ++dimm_no) {
+		lmc_dimmx_params.u64 =
+		    lmc_rd(priv, CVMX_LMCX_DIMMX_PARAMS(dimm_no, if_num));
+		lmc_dimmx_params.s.rc0 =
+		    (lmc_dimmx_params.s.rc0 & ~1) | (inversion & 1);
+
+		lmc_wr(priv,
+		       CVMX_LMCX_DIMMX_PARAMS(dimm_no, if_num),
+		       lmc_dimmx_params.u64);
+	}
+
+	/* LMC0_DIMM_CTL */
+	lmc_dimm_ctl.u64 = lmc_rd(priv, CVMX_LMCX_DIMM_CTL(if_num));
+	lmc_dimm_ctl.s.dimm0_wmask = 0x1;
+	lmc_dimm_ctl.s.dimm1_wmask = (dimm_count > 1) ? 0x0001 : 0x0000;
+
+	debug("LMC DIMM_CTL                                  : 0x%016llx\n",
+	      lmc_dimm_ctl.u64);
+	lmc_wr(priv, CVMX_LMCX_DIMM_CTL(if_num), lmc_dimm_ctl.u64);
+
+	oct3_ddr3_seq(priv, rank_mask, if_num, 0x7);	/* Init RCW */
+}
+
+static void write_mpr_page0_pattern(struct ddr_priv *priv, int rank_mask,
+				    int if_num, int dimm_count, int pattern,
+				    int location_mask)
+{
+	int rankx;
+	int location;
+
+	for (rankx = 0; rankx < dimm_count * 4; rankx++) {
+		if (!(rank_mask & (1 << rankx)))
+			continue;
+		for (location = 0; location < 4; ++location) {
+			if (!(location_mask & (1 << location)))
+				continue;
+
+			ddr4_mpr_write(priv, if_num, rankx,
+				       /* page */ 0, /* location */ location,
+				       pattern);
+		}
+	}
+}
+
+static void change_rdimm_mpr_pattern(struct ddr_priv *priv, int rank_mask,
+				     int if_num, int dimm_count)
+{
+	int save_ref_zqcs_int;
+	union cvmx_lmcx_config lmc_config;
+
+	/*
+	 * Okay, here is the latest sequence.  This should work for all
+	 * chips and passes (78,88,73,etc).  This sequence should be run
+	 * immediately after DRAM INIT.  The basic idea is to write the
+	 * same pattern into each of the 4 MPR locations in the DRAM, so
+	 * that the same value is returned when doing MPR reads regardless
+	 * of the inversion state.  My advice is to put this into a
+	 * function, change_rdimm_mpr_pattern or something like that, so
+	 * that it can be called multiple times, as I think David wants a
+	 * clock-like pattern for OFFSET training, but does not want a
+	 * clock pattern for Bit-Deskew.  You should then be able to call
+	 * this at any point in the init sequence (after DRAM init) to
+	 * change the pattern to a new value.
+	 * Mike
+	 *
+	 * A correction: PHY doesn't need any pattern during offset
+	 * training, but needs clock like pattern for internal vref and
+	 * bit-dskew training.  So for that reason, these steps below have
+	 * to be conducted before those trainings to pre-condition
+	 * the pattern.  David
+	 *
+	 * Note: Step 3, 4, 8 and 9 have to be done through RDIMM
+	 * sequence. If you issue MRW sequence to do RCW write (in o78 pass
+	 * 1 at least), LMC will still do two commands because
+	 * CONTROL[RDIMM_ENA] is still set high. We don't want it to have
+	 * any unintentional mode register write so it's best to do what
+	 * Mike is doing here.
+	 * Andrew
+	 */
+
+	/* 1) Disable refresh (REF_ZQCS_INT = 0) */
+
+	debug("1) Disable refresh (REF_ZQCS_INT = 0)\n");
+
+	lmc_config.u64 = lmc_rd(priv, CVMX_LMCX_CONFIG(if_num));
+	save_ref_zqcs_int = lmc_config.cn78xx.ref_zqcs_int;
+	lmc_config.cn78xx.ref_zqcs_int = 0;
+	lmc_wr(priv, CVMX_LMCX_CONFIG(if_num), lmc_config.u64);
+
+	/*
+	 * 2) Put all devices in MPR mode (Run MRW sequence (sequence=8)
+	 * with MODEREG_PARAMS0[MPRLOC]=0,
+	 * MODEREG_PARAMS0[MPR]=1, MR_MPR_CTL[MR_WR_SEL]=3, and
+	 * MR_MPR_CTL[MR_WR_USE_DEFAULT_VALUE]=1)
+	 */
+
+	debug("2) Put all devices in MPR mode (Run MRW sequence (sequence=8)\n");
+
+	/* A-side */
+	set_mpr_mode(priv, rank_mask, if_num, dimm_count, 1, 0);
+	/* B-side */
+	set_mpr_mode(priv, rank_mask, if_num, dimm_count, 1, 1);
+
+	/*
+	 * a. Or you can set MR_MPR_CTL[MR_WR_USE_DEFAULT_VALUE]=0 and set
+	 * the value you would like directly into
+	 * MR_MPR_CTL[MR_WR_ADDR]
+	 */
+
+	/*
+	 * 3) Disable RCD Parity (if previously enabled) - parity does not
+	 * work if inversion disabled
+	 */
+
+	debug("3) Disable RCD Parity\n");
+
+	/*
+	 * 4) Disable Inversion in the RCD.
+	 * a. I did (3&4) via the RDIMM sequence (seq_sel=7), but it
+	 * may be easier to use the MRW sequence (seq_sel=8).  Just set
+	 * MR_MPR_CTL[MR_WR_SEL]=7, MR_MPR_CTL[MR_WR_ADDR][3:0]=data,
+	 * MR_MPR_CTL[MR_WR_ADDR][7:4]=RCD reg
+	 */
+
+	debug("4) Disable Inversion in the RCD.\n");
+
+	set_dram_output_inversion(priv, if_num, dimm_count, rank_mask, 1);
+
+	/*
+	 * 5) Disable CONTROL[RDIMM_ENA] so that MR sequence goes out
+	 * non-inverted.
+	 */
+
+	debug("5) Disable CONTROL[RDIMM_ENA]\n");
+
+	set_rdimm_mode(priv, if_num, 0);
+
+	/*
+	 * 6) Write all 4 MPR registers with the desired pattern (have to
+	 * do this for all enabled ranks)
+	 * a. MR_MPR_CTL.MPR_WR=1, MR_MPR_CTL.MPR_LOC=0..3,
+	 * MR_MPR_CTL.MR_WR_SEL=0, MR_MPR_CTL.MR_WR_ADDR[7:0]=pattern
+	 */
+
+	debug("6) Write all 4 MPR page 0 Training Patterns\n");
+
+	write_mpr_page0_pattern(priv, rank_mask, if_num, dimm_count, 0x55, 0x8);
+
+	/* 7) Re-enable RDIMM_ENA */
+
+	debug("7) Re-enable RDIMM_ENA\n");
+
+	set_rdimm_mode(priv, if_num, 1);
+
+	/* 8) Re-enable RDIMM inversion */
+
+	debug("8) Re-enable RDIMM inversion\n");
+
+	set_dram_output_inversion(priv, if_num, dimm_count, rank_mask, 0);
+
+	/* 9) Re-enable RDIMM parity (if desired) */
+
+	debug("9) Re-enable RDIMM parity (if desired)\n");
+
+	/*
+	 * 10)Take B-side devices out of MPR mode (Run MRW sequence
+	 * (sequence=8) with MODEREG_PARAMS0[MPRLOC]=0,
+	 * MODEREG_PARAMS0[MPR]=0, MR_MPR_CTL[MR_WR_SEL]=3, and
+	 * MR_MPR_CTL[MR_WR_USE_DEFAULT_VALUE]=1)
+	 */
+
+	debug("10)Take B-side devices out of MPR mode\n");
+
+	set_mpr_mode(priv, rank_mask, if_num, dimm_count,
+		     /* mpr */ 0, /* bg1 */ 1);
+
+	/*
+	 * a. Or you can set MR_MPR_CTL[MR_WR_USE_DEFAULT_VALUE]=0 and
+	 * set the value you would like directly into MR_MPR_CTL[MR_WR_ADDR]
+	 */
+
+	/* 11)Re-enable refresh (REF_ZQCS_INT=previous value) */
+
+	debug("11)Re-enable refresh (REF_ZQCS_INT=previous value)\n");
+
+	lmc_config.u64 = lmc_rd(priv, CVMX_LMCX_CONFIG(if_num));
+	lmc_config.cn78xx.ref_zqcs_int = save_ref_zqcs_int;
+	lmc_wr(priv, CVMX_LMCX_CONFIG(if_num), lmc_config.u64);
+}
+
+static int validate_hwl_seq(int *wl, int *seq)
+{
+	// sequence index, step through the sequence array
+	int seqx;
+	int bitnum;
+
+	seqx = 0;
+
+	while (seq[seqx + 1] >= 0) {	// stop on next seq entry == -1
+		// but now, check current versus next
+		bitnum = (wl[seq[seqx]] << 2) | wl[seq[seqx + 1]];
+		// magic validity number (see matrix above)
+		if (!((1 << bitnum) & 0xBDE7))
+			return 1;
+		seqx++;
+	}
+
+	return 0;
+}
+
+static int validate_hw_wl_settings(int if_num,
+				   union cvmx_lmcx_wlevel_rankx
+				   *lmc_wlevel_rank, int is_rdimm, int ecc_ena)
+{
+	int wl[9], byte, errors;
+
+	// arrange the sequences so
+	// index 0 has byte 0, etc, ECC in middle
+	int useq[] = { 0, 1, 2, 3, 8, 4, 5, 6, 7, -1 };
+	// index 0 is ECC, then go down
+	int rseq1[] = { 8, 3, 2, 1, 0, -1 };
+	// index 0 has byte 4, then go up
+	int rseq2[] = { 4, 5, 6, 7, -1 };
+	// index 0 has byte 0, etc, no ECC
+	int useqno[] = { 0, 1, 2, 3, 4, 5, 6, 7, -1 };
+	// index 0 is byte 3, then go down, no ECC
+	int rseq1no[] = { 3, 2, 1, 0, -1 };
+
+	// in the CSR, bytes 0-7 are always data, byte 8 is ECC
+	for (byte = 0; byte < (8 + ecc_ena); byte++) {
+		// preprocess :-)
+		wl[byte] = (get_wl_rank(lmc_wlevel_rank, byte) >>
+			    1) & 3;
+	}
+
+	errors = 0;
+	if (is_rdimm) {		// RDIMM order
+		errors = validate_hwl_seq(wl, (ecc_ena) ? rseq1 : rseq1no);
+		errors += validate_hwl_seq(wl, rseq2);
+	} else {		// UDIMM order
+		errors = validate_hwl_seq(wl, (ecc_ena) ? useq : useqno);
+	}
+
+	return errors;
+}
+
+static unsigned int extr_wr(u64 u, int x)
+{
+	return (unsigned int)(((u >> (x * 12 + 5)) & 0x3ULL) |
+			      ((u >> (51 + x - 2)) & 0x4ULL));
+}
+
+static void insrt_wr(u64 *up, int x, int v)
+{
+	u64 u = *up;
+
+	u &= ~(((0x3ULL) << (x * 12 + 5)) | ((0x1ULL) << (51 + x)));
+	*up = (u | ((v & 0x3ULL) << (x * 12 + 5)) |
+	       ((v & 0x4ULL) << (51 + x - 2)));
+}
+
+/* Read out Deskew Settings for DDR */
+
+struct deskew_bytes {
+	u16 bits[8];
+};
+
+struct deskew_data {
+	struct deskew_bytes bytes[9];
+};
+
+struct dac_data {
+	int bytes[9];
+};
+
+// T88 pass 1, skip 4=DAC
+static const u8 dsk_bit_seq_p1[8] = { 0, 1, 2, 3, 5, 6, 7, 8 };
+// T88 Pass 2, skip 4=DAC and 5=DBI
+static const u8 dsk_bit_seq_p2[8] = { 0, 1, 2, 3, 6, 7, 8, 9 };
+
+static void get_deskew_settings(struct ddr_priv *priv, int if_num,
+				struct deskew_data *dskdat)
+{
+	union cvmx_lmcx_phy_ctl phy_ctl;
+	union cvmx_lmcx_config lmc_config;
+	int bit_index;
+	int byte_lane, byte_limit;
+	// NOTE: these are for pass 2.x
+	int is_o78p2 = !octeon_is_cpuid(OCTEON_CN78XX_PASS1_X);
+	const u8 *bit_seq = (is_o78p2) ? dsk_bit_seq_p2 : dsk_bit_seq_p1;
+
+	lmc_config.u64 = lmc_rd(priv, CVMX_LMCX_CONFIG(if_num));
+	byte_limit = ((!lmc_config.s.mode32b) ? 8 : 4) + lmc_config.s.ecc_ena;
+
+	memset(dskdat, 0, sizeof(*dskdat));
+
+	phy_ctl.u64 = lmc_rd(priv, CVMX_LMCX_PHY_CTL(if_num));
+	phy_ctl.s.dsk_dbg_clk_scaler = 3;
+
+	for (byte_lane = 0; byte_lane < byte_limit; byte_lane++) {
+		phy_ctl.s.dsk_dbg_byte_sel = byte_lane;	// set byte lane
+
+		for (bit_index = 0; bit_index < 8; ++bit_index) {
+			// set bit number and start read sequence
+			phy_ctl.s.dsk_dbg_bit_sel = bit_seq[bit_index];
+			phy_ctl.s.dsk_dbg_rd_start = 1;
+			lmc_wr(priv, CVMX_LMCX_PHY_CTL(if_num), phy_ctl.u64);
+
+			// poll for read sequence to complete
+			do {
+				phy_ctl.u64 =
+					lmc_rd(priv, CVMX_LMCX_PHY_CTL(if_num));
+			} while (phy_ctl.s.dsk_dbg_rd_complete != 1);
+
+			// record the data
+			dskdat->bytes[byte_lane].bits[bit_index] =
+				phy_ctl.s.dsk_dbg_rd_data & 0x3ff;
+		}
+	}
+}
+
+static void display_deskew_settings(struct ddr_priv *priv, int if_num,
+				    struct deskew_data *dskdat,
+				    int print_enable)
+{
+	int byte_lane;
+	int bit_num;
+	u16 flags, deskew;
+	union cvmx_lmcx_config lmc_config;
+	int byte_limit;
+	const char *fc = " ?-=+*#&";
+
+	lmc_config.u64 = lmc_rd(priv, CVMX_LMCX_CONFIG(if_num));
+	byte_limit = ((lmc_config.s.mode32b) ? 4 : 8) + lmc_config.s.ecc_ena;
+
+	if (print_enable) {
+		debug("N0.LMC%d: Deskew Data:              Bit =>      :",
+		      if_num);
+		for (bit_num = 7; bit_num >= 0; --bit_num)
+			debug(" %3d  ", bit_num);
+		debug("\n");
+	}
+
+	for (byte_lane = 0; byte_lane < byte_limit; byte_lane++) {
+		if (print_enable)
+			debug("N0.LMC%d: Bit Deskew Byte %d %s               :",
+			      if_num, byte_lane,
+			      (print_enable >= 3) ? "FINAL" : "     ");
+
+		for (bit_num = 7; bit_num >= 0; --bit_num) {
+			flags = dskdat->bytes[byte_lane].bits[bit_num] & 7;
+			deskew = dskdat->bytes[byte_lane].bits[bit_num] >> 3;
+
+			if (print_enable)
+				debug(" %3d %c", deskew, fc[flags ^ 1]);
+
+		}		/* for (bit_num = 7; bit_num >= 0; --bit_num) */
+
+		if (print_enable)
+			debug("\n");
+	}
+}
+
+static void override_deskew_settings(struct ddr_priv *priv, int if_num,
+				     struct deskew_data *dskdat)
+{
+	union cvmx_lmcx_phy_ctl phy_ctl;
+	union cvmx_lmcx_config lmc_config;
+
+	int bit, byte_lane, byte_limit;
+	u64 csr_data;
+
+	lmc_config.u64 = lmc_rd(priv, CVMX_LMCX_CONFIG(if_num));
+	byte_limit = ((lmc_config.s.mode32b) ? 4 : 8) + lmc_config.s.ecc_ena;
+
+	phy_ctl.u64 = lmc_rd(priv, CVMX_LMCX_PHY_CTL(if_num));
+
+	phy_ctl.s.phy_reset = 0;
+	phy_ctl.s.dsk_dbg_num_bits_sel = 1;
+	phy_ctl.s.dsk_dbg_offset = 0;
+	phy_ctl.s.dsk_dbg_clk_scaler = 3;
+
+	phy_ctl.s.dsk_dbg_wr_mode = 1;
+	phy_ctl.s.dsk_dbg_load_dis = 0;
+	phy_ctl.s.dsk_dbg_overwrt_ena = 0;
+
+	phy_ctl.s.phy_dsk_reset = 0;
+
+	lmc_wr(priv, CVMX_LMCX_PHY_CTL(if_num), phy_ctl.u64);
+	lmc_rd(priv, CVMX_LMCX_PHY_CTL(if_num));
+
+	for (byte_lane = 0; byte_lane < byte_limit; byte_lane++) {
+		csr_data = 0;
+		// FIXME: can we ignore DBI?
+		for (bit = 0; bit < 8; ++bit) {
+			// fetch input and adjust
+			u64 bits = (dskdat->bytes[byte_lane].bits[bit] >> 3) &
+				0x7F;
+
+			/*
+			 * lmc_general_purpose0.data[6:0]    // DQ0
+			 * lmc_general_purpose0.data[13:7]   // DQ1
+			 * lmc_general_purpose0.data[20:14]  // DQ2
+			 * lmc_general_purpose0.data[27:21]  // DQ3
+			 * lmc_general_purpose0.data[34:28]  // DQ4
+			 * lmc_general_purpose0.data[41:35]  // DQ5
+			 * lmc_general_purpose0.data[48:42]  // DQ6
+			 * lmc_general_purpose0.data[55:49]  // DQ7
+			 * lmc_general_purpose0.data[62:56]  // DBI
+			 */
+			csr_data |= (bits << (7 * bit));
+
+		} /* for (bit = 0; bit < 8; ++bit) */
+
+		// update GP0 with the bit data for this byte lane
+		lmc_wr(priv, CVMX_LMCX_GENERAL_PURPOSE0(if_num), csr_data);
+		lmc_rd(priv, CVMX_LMCX_GENERAL_PURPOSE0(if_num));
+
+		// start the deskew load sequence
+		phy_ctl.s.dsk_dbg_byte_sel = byte_lane;
+		phy_ctl.s.dsk_dbg_rd_start = 1;
+		lmc_wr(priv, CVMX_LMCX_PHY_CTL(if_num), phy_ctl.u64);
+
+		// poll for read sequence to complete
+		do {
+			udelay(100);
+			phy_ctl.u64 = lmc_rd(priv, CVMX_LMCX_PHY_CTL(if_num));
+		} while (phy_ctl.s.dsk_dbg_rd_complete != 1);
+	}
+
+	// tell phy to use the new settings
+	phy_ctl.s.dsk_dbg_overwrt_ena = 1;
+	phy_ctl.s.dsk_dbg_rd_start = 0;
+	lmc_wr(priv, CVMX_LMCX_PHY_CTL(if_num), phy_ctl.u64);
+
+	phy_ctl.s.dsk_dbg_wr_mode = 0;
+	lmc_wr(priv, CVMX_LMCX_PHY_CTL(if_num), phy_ctl.u64);
+}
+
+static void process_by_rank_dac(struct ddr_priv *priv, int if_num,
+				int rank_mask, struct dac_data *dacdat)
+{
+	union cvmx_lmcx_config lmc_config;
+	int rankx, byte_lane;
+	int byte_limit;
+	int rank_count;
+	struct dac_data dacsum;
+	int lane_probs;
+
+	lmc_config.u64 = lmc_rd(priv, CVMX_LMCX_CONFIG(if_num));
+	byte_limit = ((lmc_config.s.mode32b) ? 4 : 8) + lmc_config.s.ecc_ena;
+
+	memset((void *)&dacsum, 0, sizeof(dacsum));
+	rank_count = 0;
+	lane_probs = 0;
+
+	for (rankx = 0; rankx < 4; rankx++) {
+		if (!(rank_mask & (1 << rankx)))
+			continue;
+		rank_count++;
+
+		display_dac_dbi_settings(if_num, /*dac */ 1,
+					 lmc_config.s.ecc_ena,
+					 &dacdat[rankx].bytes[0],
+					 "By-Ranks VREF");
+		// sum
+		for (byte_lane = 0; byte_lane < byte_limit; byte_lane++) {
+			if (rank_count == 2) {
+				int ranks_diff =
+				    abs((dacsum.bytes[byte_lane] -
+					 dacdat[rankx].bytes[byte_lane]));
+
+				// FIXME: is 19 a good number?
+				if (ranks_diff > 19)
+					lane_probs |= (1 << byte_lane);
+			}
+			dacsum.bytes[byte_lane] +=
+			    dacdat[rankx].bytes[byte_lane];
+		}
+	}
+
+	// average
+	for (byte_lane = 0; byte_lane < byte_limit; byte_lane++)
+		dacsum.bytes[byte_lane] /= rank_count;	// FIXME: nint?
+
+	display_dac_dbi_settings(if_num, /*dac */ 1, lmc_config.s.ecc_ena,
+				 &dacsum.bytes[0], "All-Rank VREF");
+
+	if (lane_probs) {
+		debug("N0.LMC%d: All-Rank VREF DAC Problem Bytelane(s): 0x%03x\n",
+		      if_num, lane_probs);
+	}
+
+	// finally, write the averaged DAC values
+	for (byte_lane = 0; byte_lane < byte_limit; byte_lane++) {
+		load_dac_override(priv, if_num, dacsum.bytes[byte_lane],
+				  byte_lane);
+	}
+}
+
+static void process_by_rank_dsk(struct ddr_priv *priv, int if_num,
+				int rank_mask, struct deskew_data *dskdat)
+{
+	union cvmx_lmcx_config lmc_config;
+	int rankx, lane, bit;
+	int byte_limit;
+	struct deskew_data dsksum, dskcnt;
+	u16 deskew;
+
+	lmc_config.u64 = lmc_rd(priv, CVMX_LMCX_CONFIG(if_num));
+	byte_limit = ((lmc_config.s.mode32b) ? 4 : 8) + lmc_config.s.ecc_ena;
+
+	memset((void *)&dsksum, 0, sizeof(dsksum));
+	memset((void *)&dskcnt, 0, sizeof(dskcnt));
+
+	for (rankx = 0; rankx < 4; rankx++) {
+		if (!(rank_mask & (1 << rankx)))
+			continue;
+
+		// sum ranks
+		for (lane = 0; lane < byte_limit; lane++) {
+			for (bit = 0; bit < 8; ++bit) {
+				deskew = dskdat[rankx].bytes[lane].bits[bit];
+				// if flags indicate sat hi or lo, skip it
+				if (deskew & 6)
+					continue;
+
+				// clear flags
+				dsksum.bytes[lane].bits[bit] +=
+					deskew & ~7;
+				// count entries
+				dskcnt.bytes[lane].bits[bit] += 1;
+			}
+		}
+	}
+
+	// average ranks
+	for (lane = 0; lane < byte_limit; lane++) {
+		for (bit = 0; bit < 8; ++bit) {
+			int div = dskcnt.bytes[lane].bits[bit];
+
+			if (div > 0) {
+				dsksum.bytes[lane].bits[bit] /= div;
+				// clear flags
+				dsksum.bytes[lane].bits[bit] &= ~7;
+				// set LOCK
+				dsksum.bytes[lane].bits[bit] |= 1;
+			} else {
+				// FIXME? use reset value?
+				dsksum.bytes[lane].bits[bit] =
+					(64 << 3) | 1;
+			}
+		}
+	}
+
+	// TME for FINAL version
+	display_deskew_settings(priv, if_num, &dsksum, /*VBL_TME */ 3);
+
+	// finally, write the averaged DESKEW values
+	override_deskew_settings(priv, if_num, &dsksum);
+}
+
+struct deskew_counts {
+	int saturated;		// number saturated
+	int unlocked;		// number unlocked
+	int nibrng_errs;	// nibble range errors
+	int nibunl_errs;	// nibble unlocked errors
+	int bitval_errs;	// bit value errors
+};
+
+#define MIN_BITVAL  17
+#define MAX_BITVAL 110
+
+static void validate_deskew_training(struct ddr_priv *priv, int rank_mask,
+				     int if_num, struct deskew_counts *counts,
+				     int print_flags)
+{
+	int byte_lane, bit_index, nib_num;
+	int nibrng_errs, nibunl_errs, bitval_errs;
+	union cvmx_lmcx_config lmc_config;
+	s16 nib_min[2], nib_max[2], nib_unl[2];
+	int byte_limit;
+	int print_enable = print_flags & 1;
+	struct deskew_data dskdat;
+	s16 flags, deskew;
+	const char *fc = " ?-=+*#&";
+	int bit_last;
+
+	lmc_config.u64 = lmc_rd(priv, CVMX_LMCX_CONFIG(if_num));
+	byte_limit = ((!lmc_config.s.mode32b) ? 8 : 4) + lmc_config.s.ecc_ena;
+
+	memset(counts, 0, sizeof(struct deskew_counts));
+
+	get_deskew_settings(priv, if_num, &dskdat);
+
+	if (print_enable) {
+		debug("N0.LMC%d: Deskew Settings:          Bit =>      :",
+		      if_num);
+		for (bit_index = 7; bit_index >= 0; --bit_index)
+			debug(" %3d  ", bit_index);
+		debug("\n");
+	}
+
+	for (byte_lane = 0; byte_lane < byte_limit; byte_lane++) {
+		if (print_enable)
+			debug("N0.LMC%d: Bit Deskew Byte %d %s               :",
+			      if_num, byte_lane,
+			      (print_flags & 2) ? "FINAL" : "     ");
+
+		nib_min[0] = 127;
+		nib_min[1] = 127;
+		nib_max[0] = 0;
+		nib_max[1] = 0;
+		nib_unl[0] = 0;
+		nib_unl[1] = 0;
+
+		if (lmc_config.s.mode32b == 1 && byte_lane == 4) {
+			bit_last = 3;
+			if (print_enable)
+				debug("                        ");
+		} else {
+			bit_last = 7;
+		}
+
+		for (bit_index = bit_last; bit_index >= 0; --bit_index) {
+			nib_num = (bit_index > 3) ? 1 : 0;
+
+			flags = dskdat.bytes[byte_lane].bits[bit_index] & 7;
+			deskew = dskdat.bytes[byte_lane].bits[bit_index] >> 3;
+
+			counts->saturated += !!(flags & 6);
+
+			// Do range calc even when locked; it could happen
+			// that a bit is still unlocked after final retry,
+			// and we want to have an external retry if a RANGE
+			// error is present at exit...
+			nib_min[nib_num] = min(nib_min[nib_num], deskew);
+			nib_max[nib_num] = max(nib_max[nib_num], deskew);
+
+			if (!(flags & 1)) {	// only when not locked
+				counts->unlocked += 1;
+				nib_unl[nib_num] += 1;
+			}
+
+			if (print_enable)
+				debug(" %3d %c", deskew, fc[flags ^ 1]);
+		}
+
+		/*
+		 * Now look for nibble errors
+		 *
+		 * For bit 55, it looks like a bit deskew problem. When the
+		 * upper nibble of byte 6 needs to go to saturation, bit 7
+		 * of byte 6 locks prematurely at 64. For DIMMs with raw
+		 * card A and B, can we reset the deskew training when we
+		 * encounter this case? The reset criteria should be looking
+		 * at one nibble at a time for raw card A and B; if the
+		 * bit-deskew setting within a nibble is different by > 33,
+		 * we'll issue a reset to the bit deskew training.
+		 *
+		 * LMC0 Bit Deskew Byte(6): 64 0 - 0 - 0 - 26 61 35 64
+		 */
+		// upper nibble range, then lower nibble range
+		nibrng_errs = ((nib_max[1] - nib_min[1]) > 33) ? 1 : 0;
+		nibrng_errs |= ((nib_max[0] - nib_min[0]) > 33) ? 1 : 0;
+
+		// check for nibble all unlocked
+		nibunl_errs = ((nib_unl[0] == 4) || (nib_unl[1] == 4)) ? 1 : 0;
+
+		// check for bit value errors, ie < 17 or > 110
+		// FIXME? assume max always > MIN_BITVAL and min < MAX_BITVAL
+		bitval_errs = ((nib_max[1] > MAX_BITVAL) ||
+			       (nib_max[0] > MAX_BITVAL)) ? 1 : 0;
+		bitval_errs |= ((nib_min[1] < MIN_BITVAL) ||
+				(nib_min[0] < MIN_BITVAL)) ? 1 : 0;
+
+		if ((nibrng_errs != 0 || nibunl_errs != 0 ||
+		     bitval_errs != 0) && print_enable) {
+			debug(" %c%c%c",
+			      (nibrng_errs) ? 'R' : ' ',
+			      (nibunl_errs) ? 'U' : ' ',
+			      (bitval_errs) ? 'V' : ' ');
+		}
+
+		if (print_enable)
+			debug("\n");
+
+		counts->nibrng_errs |= (nibrng_errs << byte_lane);
+		counts->nibunl_errs |= (nibunl_errs << byte_lane);
+		counts->bitval_errs |= (bitval_errs << byte_lane);
+	}
+}
+
+static unsigned short load_dac_override(struct ddr_priv *priv, int if_num,
+					int dac_value, int byte)
+{
+	union cvmx_lmcx_dll_ctl3 ddr_dll_ctl3;
+	// single bytelanes incr by 1; A is for ALL
+	int bytex = (byte == 0x0A) ? byte : byte + 1;
+
+	ddr_dll_ctl3.u64 = lmc_rd(priv, CVMX_LMCX_DLL_CTL3(if_num));
+
+	SET_DDR_DLL_CTL3(byte_sel, bytex);
+	SET_DDR_DLL_CTL3(offset, dac_value >> 1);
+
+	ddr_dll_ctl3.cn73xx.bit_select = 0x9;	/* No-op */
+	lmc_wr(priv, CVMX_LMCX_DLL_CTL3(if_num), ddr_dll_ctl3.u64);
+
+	ddr_dll_ctl3.cn73xx.bit_select = 0xC;	/* vref bypass setting load */
+	lmc_wr(priv, CVMX_LMCX_DLL_CTL3(if_num), ddr_dll_ctl3.u64);
+
+	ddr_dll_ctl3.cn73xx.bit_select = 0xD;	/* vref bypass on. */
+	lmc_wr(priv, CVMX_LMCX_DLL_CTL3(if_num), ddr_dll_ctl3.u64);
+
+	ddr_dll_ctl3.cn73xx.bit_select = 0x9;	/* No-op */
+	lmc_wr(priv, CVMX_LMCX_DLL_CTL3(if_num), ddr_dll_ctl3.u64);
+
+	lmc_rd(priv, CVMX_LMCX_DLL_CTL3(if_num));	// flush writes
+
+	return (unsigned short)GET_DDR_DLL_CTL3(offset);
+}
+
+// arg dac_or_dbi is 1 for DAC, 0 for DBI
+// returns 9 entries (bytelanes 0 through 8) in settings[]
+// returns 0 if OK, -1 if a problem
+static int read_dac_dbi_settings(struct ddr_priv *priv, int if_num,
+				 int dac_or_dbi, int *settings)
+{
+	union cvmx_lmcx_phy_ctl phy_ctl;
+	int byte_lane, bit_num;
+	int deskew;
+	int dac_value;
+	int new_deskew_layout = 0;
+
+	new_deskew_layout = octeon_is_cpuid(OCTEON_CN73XX) ||
+		octeon_is_cpuid(OCTEON_CNF75XX);
+	new_deskew_layout |= (octeon_is_cpuid(OCTEON_CN78XX) &&
+			      !octeon_is_cpuid(OCTEON_CN78XX_PASS1_X));
+
+	phy_ctl.u64 = lmc_rd(priv, CVMX_LMCX_PHY_CTL(if_num));
+	phy_ctl.s.dsk_dbg_clk_scaler = 3;
+	lmc_wr(priv, CVMX_LMCX_PHY_CTL(if_num), phy_ctl.u64);
+
+	bit_num = (dac_or_dbi) ? 4 : 5;
+	// DBI not available
+	if (bit_num == 5 && !new_deskew_layout)
+		return -1;
+
+	// FIXME: always assume ECC is available
+	for (byte_lane = 8; byte_lane >= 0; --byte_lane) {
+		//set byte lane and bit to read
+		phy_ctl.s.dsk_dbg_bit_sel = bit_num;
+		phy_ctl.s.dsk_dbg_byte_sel = byte_lane;
+		lmc_wr(priv, CVMX_LMCX_PHY_CTL(if_num), phy_ctl.u64);
+
+		//start read sequence
+		phy_ctl.u64 = lmc_rd(priv, CVMX_LMCX_PHY_CTL(if_num));
+		phy_ctl.s.dsk_dbg_rd_start = 1;
+		lmc_wr(priv, CVMX_LMCX_PHY_CTL(if_num), phy_ctl.u64);
+
+		//poll for read sequence to complete
+		do {
+			phy_ctl.u64 = lmc_rd(priv, CVMX_LMCX_PHY_CTL(if_num));
+		} while (phy_ctl.s.dsk_dbg_rd_complete != 1);
+
+		// keep the flag bits where they are for DBI
+		deskew = phy_ctl.s.dsk_dbg_rd_data; /* >> 3 */
+		dac_value = phy_ctl.s.dsk_dbg_rd_data & 0xff;
+
+		settings[byte_lane] = (dac_or_dbi) ? dac_value : deskew;
+	}
+
+	return 0;
+}
+
+// print out the DBI settings array
+// arg dac_or_dbi is 1 for DAC, 0 for DBI
+static void display_dac_dbi_settings(int lmc, int dac_or_dbi,
+				     int ecc_ena, int *settings, char *title)
+{
+	int byte;
+	int flags;
+	int deskew;
+	const char *fc = " ?-=+*#&";
+
+	debug("N0.LMC%d: %s %s Settings %d:0 :",
+	      lmc, title, (dac_or_dbi) ? "DAC" : "DBI", 7 + ecc_ena);
+	// FIXME: what about 32-bit mode?
+	for (byte = (7 + ecc_ena); byte >= 0; --byte) {
+		if (dac_or_dbi) {	// DAC
+			flags = 1;	// say its locked to get blank
+			deskew = settings[byte] & 0xff;
+		} else {	// DBI
+			flags = settings[byte] & 7;
+			deskew = (settings[byte] >> 3) & 0x7f;
+		}
+		debug(" %3d %c", deskew, fc[flags ^ 1]);
+	}
+	debug("\n");
+}
+
+// Find a HWL majority
+static int find_wl_majority(struct wlevel_bitcnt *bc, int *mx, int *mc,
+			    int *xc, int *cc)
+{
+	int ix, ic;
+
+	*mx = -1;
+	*mc = 0;
+	*xc = 0;
+	*cc = 0;
+
+	for (ix = 0; ix < 4; ix++) {
+		ic = bc->bitcnt[ix];
+
+		// make a bitmask of the ones with a count
+		if (ic > 0) {
+			*mc |= (1 << ix);
+			*cc += 1;	// count how many had non-zero counts
+		}
+
+		// find the majority
+		if (ic > *xc) {	// new max?
+			*xc = ic;	// yes
+			*mx = ix;	// set its index
+		}
+	}
+
+	return (*mx << 1);
+}
+
+// Evaluate the DAC settings array
+static int evaluate_dac_settings(int if_64b, int ecc_ena, int *settings)
+{
+	int byte, lane, dac, comp;
+	int last = (if_64b) ? 7 : 3;
+
+	// FIXME: change the check...???
+	// this looks only for sets of DAC values whose max/min differ by a lot
+	// let any EVEN go so long as it is within range...
+	for (byte = (last + ecc_ena); byte >= 0; --byte) {
+		dac = settings[byte] & 0xff;
+
+		for (lane = (last + ecc_ena); lane >= 0; --lane) {
+			comp = settings[lane] & 0xff;
+			if (abs((dac - comp)) > 25)
+				return 1;
+		}
+	}
+
+	return 0;
+}
+
+static void perform_offset_training(struct ddr_priv *priv, int rank_mask,
+				    int if_num)
+{
+	union cvmx_lmcx_phy_ctl lmc_phy_ctl;
+	u64 orig_phy_ctl;
+	const char *s;
+
+	/*
+	 * 4.8.6 LMC Offset Training
+	 *
+	 * LMC requires input-receiver offset training.
+	 *
+	 * 1. Write LMC(0)_PHY_CTL[DAC_ON] = 1
+	 */
+	lmc_phy_ctl.u64 = lmc_rd(priv, CVMX_LMCX_PHY_CTL(if_num));
+	orig_phy_ctl = lmc_phy_ctl.u64;
+	lmc_phy_ctl.s.dac_on = 1;
+
+	// allow full CSR override
+	s = lookup_env_ull(priv, "ddr_phy_ctl");
+	if (s)
+		lmc_phy_ctl.u64 = strtoull(s, NULL, 0);
+
+	// do not print or write if CSR does not change...
+	if (lmc_phy_ctl.u64 != orig_phy_ctl) {
+		debug("PHY_CTL                                       : 0x%016llx\n",
+		      lmc_phy_ctl.u64);
+		lmc_wr(priv, CVMX_LMCX_PHY_CTL(if_num), lmc_phy_ctl.u64);
+	}
+
+	/*
+	 * 2. Write LMC(0)_SEQ_CTL[SEQ_SEL] = 0x0B and
+	 *    LMC(0)_SEQ_CTL[INIT_START] = 1.
+	 *
+	 * 3. Wait for LMC(0)_SEQ_CTL[SEQ_COMPLETE] to be set to 1.
+	 */
+	/* Start Offset training sequence */
+	oct3_ddr3_seq(priv, rank_mask, if_num, 0x0B);
+}
+
+static void perform_internal_vref_training(struct ddr_priv *priv,
+					   int rank_mask, int if_num)
+{
+	union cvmx_lmcx_ext_config ext_config;
+	union cvmx_lmcx_dll_ctl3 ddr_dll_ctl3;
+
+	// First, make sure all byte-lanes are out of VREF bypass mode
+	ddr_dll_ctl3.u64 = lmc_rd(priv, CVMX_LMCX_DLL_CTL3(if_num));
+
+	ddr_dll_ctl3.cn78xx.byte_sel = 0x0A;	/* all byte-lanes */
+	ddr_dll_ctl3.cn78xx.bit_select = 0x09;	/* No-op */
+	lmc_wr(priv, CVMX_LMCX_DLL_CTL3(if_num), ddr_dll_ctl3.u64);
+
+	ddr_dll_ctl3.cn78xx.bit_select = 0x0E;	/* vref bypass off. */
+	lmc_wr(priv, CVMX_LMCX_DLL_CTL3(if_num), ddr_dll_ctl3.u64);
+
+	ddr_dll_ctl3.cn78xx.bit_select = 0x09;	/* No-op */
+	lmc_wr(priv, CVMX_LMCX_DLL_CTL3(if_num), ddr_dll_ctl3.u64);
+
+	/*
+	 * 4.8.7 LMC Internal vref Training
+	 *
+	 * LMC requires input-reference-voltage training.
+	 *
+	 * 1. Write LMC(0)_EXT_CONFIG[VREFINT_SEQ_DESKEW] = 0.
+	 */
+	ext_config.u64 = lmc_rd(priv, CVMX_LMCX_EXT_CONFIG(if_num));
+	ext_config.s.vrefint_seq_deskew = 0;
+
+	ddr_seq_print("Performing LMC sequence: vrefint_seq_deskew = %d\n",
+		      ext_config.s.vrefint_seq_deskew);
+
+	lmc_wr(priv, CVMX_LMCX_EXT_CONFIG(if_num), ext_config.u64);
+
+	/*
+	 * 2. Write LMC(0)_SEQ_CTL[SEQ_SEL] = 0x0a and
+	 *    LMC(0)_SEQ_CTL[INIT_START] = 1.
+	 *
+	 * 3. Wait for LMC(0)_SEQ_CTL[SEQ_COMPLETE] to be set to 1.
+	 */
+	/* Start LMC Internal vref Training */
+	oct3_ddr3_seq(priv, rank_mask, if_num, 0x0A);
+}
+
+#define dbg_avg(format, ...)	// debug(format, ##__VA_ARGS__)
+
+static int process_samples_average(s16 *bytes, int num_samples,
+				   int lmc, int lane_no)
+{
+	int i, sadj, sum = 0, ret, asum, trunc;
+	s16 smin = 32767, smax = -32768;
+	int nmin, nmax;
+	//int rng;
+
+	dbg_avg("DBG_AVG%d.%d: ", lmc, lane_no);
+
+	for (i = 0; i < num_samples; i++) {
+		sum += bytes[i];
+		if (bytes[i] < smin)
+			smin = bytes[i];
+		if (bytes[i] > smax)
+			smax = bytes[i];
+		dbg_avg(" %3d", bytes[i]);
+	}
+
+	nmin = 0;
+	nmax = 0;
+	for (i = 0; i < num_samples; i++) {
+		if (bytes[i] == smin)
+			nmin += 1;
+		if (bytes[i] == smax)
+			nmax += 1;
+	}
+	dbg_avg(" (min=%3d/%d, max=%3d/%d, range=%2d, samples=%2d)",
+		smin, nmin, smax, nmax, rng, num_samples);
+
+	asum = sum - smin - smax;
+
+	sadj = divide_nint(asum * 10, (num_samples - 2));
+
+	trunc = asum / (num_samples - 2);
+
+	dbg_avg(" [%3d.%d, %3d]", sadj / 10, sadj % 10, trunc);
+
+	sadj = divide_nint(sadj, 10);
+	if (trunc & 1)
+		ret = trunc;
+	else if (sadj & 1)
+		ret = sadj;
+	else
+		ret = trunc + 1;
+
+	dbg_avg(" -> %3d\n", ret);
+
+	return ret;
+}
+
+#define DEFAULT_SAT_RETRY_LIMIT    11	// 1 + 10 retries
+
+#define default_lock_retry_limit   20	// 20 retries
+#define deskew_validation_delay    10000	// 10 millisecs
+
+static int perform_deskew_training(struct ddr_priv *priv, int rank_mask,
+				   int if_num, int spd_rawcard_aorb)
+{
+	int unsaturated, locked;
+	int sat_retries, sat_retries_limit;
+	int lock_retries, lock_retries_total, lock_retries_limit;
+	int print_first;
+	int print_them_all;
+	struct deskew_counts dsk_counts;
+	union cvmx_lmcx_phy_ctl phy_ctl;
+	char *s;
+	int has_no_sat = octeon_is_cpuid(OCTEON_CN78XX_PASS2_X) ||
+		octeon_is_cpuid(OCTEON_CNF75XX);
+	int disable_bitval_retries = 1;	// default to disabled
+
+	debug("N0.LMC%d: Performing Deskew Training.\n", if_num);
+
+	sat_retries = 0;
+	sat_retries_limit = (has_no_sat) ? 5 : DEFAULT_SAT_RETRY_LIMIT;
+
+	lock_retries_total = 0;
+	unsaturated = 0;
+	print_first = 1;	// print the first one
+	// set to true for printing all normal deskew attempts
+	print_them_all = 0;
+
+	// provide override for bitval_errs causing internal VREF retries
+	s = env_get("ddr_disable_bitval_retries");
+	if (s)
+		disable_bitval_retries = !!simple_strtoul(s, NULL, 0);
+
+	lock_retries_limit = default_lock_retry_limit;
+	if ((octeon_is_cpuid(OCTEON_CN78XX_PASS2_X)) ||
+	    (octeon_is_cpuid(OCTEON_CN73XX)) ||
+	    (octeon_is_cpuid(OCTEON_CNF75XX)))
+		lock_retries_limit *= 2;	// give new chips twice as many
+
+	do {			/* while (sat_retries < sat_retry_limit) */
+		/*
+		 * 4.8.8 LMC Deskew Training
+		 *
+		 * LMC requires input-read-data deskew training.
+		 *
+		 * 1. Write LMC(0)_EXT_CONFIG[VREFINT_SEQ_DESKEW] = 1.
+		 */
+
+		union cvmx_lmcx_ext_config ext_config;
+
+		ext_config.u64 = lmc_rd(priv, CVMX_LMCX_EXT_CONFIG(if_num));
+		ext_config.s.vrefint_seq_deskew = 1;
+
+		ddr_seq_print
+		    ("Performing LMC sequence: vrefint_seq_deskew = %d\n",
+		     ext_config.s.vrefint_seq_deskew);
+
+		lmc_wr(priv, CVMX_LMCX_EXT_CONFIG(if_num), ext_config.u64);
+
+		/*
+		 * 2. Write LMC(0)_SEQ_CTL[SEQ_SEL] = 0x0A and
+		 *    LMC(0)_SEQ_CTL[INIT_START] = 1.
+		 *
+		 * 3. Wait for LMC(0)_SEQ_CTL[SEQ_COMPLETE] to be set to 1.
+		 */
+
+		phy_ctl.u64 = lmc_rd(priv, CVMX_LMCX_PHY_CTL(if_num));
+		phy_ctl.s.phy_dsk_reset = 1;	/* RESET Deskew sequence */
+		lmc_wr(priv, CVMX_LMCX_PHY_CTL(if_num), phy_ctl.u64);
+
+		/* LMC Deskew Training */
+		oct3_ddr3_seq(priv, rank_mask, if_num, 0x0A);
+
+		lock_retries = 0;
+
+perform_deskew_training:
+
+		phy_ctl.u64 = lmc_rd(priv, CVMX_LMCX_PHY_CTL(if_num));
+		phy_ctl.s.phy_dsk_reset = 0;	/* Normal Deskew sequence */
+		lmc_wr(priv, CVMX_LMCX_PHY_CTL(if_num), phy_ctl.u64);
+
+		/* LMC Deskew Training */
+		oct3_ddr3_seq(priv, rank_mask, if_num, 0x0A);
+
+		// Moved this from validate_deskew_training
+		/* Allow deskew results to stabilize before evaluating them. */
+		udelay(deskew_validation_delay);
+
+		// Now go look at lock and saturation status...
+		validate_deskew_training(priv, rank_mask, if_num, &dsk_counts,
+					 print_first);
+		// after printing the first and not doing them all, no more
+		if (print_first && !print_them_all)
+			print_first = 0;
+
+		unsaturated = (dsk_counts.saturated == 0);
+		locked = (dsk_counts.unlocked == 0);
+
+		// only do locking retries if unsaturated or rawcard A or B,
+		// otherwise full SAT retry
+		if (unsaturated || (spd_rawcard_aorb && !has_no_sat)) {
+			if (!locked) {	// and not locked
+				lock_retries++;
+				lock_retries_total++;
+				if (lock_retries <= lock_retries_limit) {
+					goto perform_deskew_training;
+				} else {
+					debug("N0.LMC%d: LOCK RETRIES failed after %d retries\n",
+					      if_num, lock_retries_limit);
+				}
+			} else {
+				// only print if we did try
+				if (lock_retries_total > 0)
+					debug("N0.LMC%d: LOCK RETRIES successful after %d retries\n",
+					      if_num, lock_retries);
+			}
+		}		/* if (unsaturated || spd_rawcard_aorb) */
+
+		++sat_retries;
+
+		/*
+		 * At this point, check for a DDR4 RDIMM that will not
+		 * benefit from SAT retries; if so, exit
+		 */
+		if (spd_rawcard_aorb && !has_no_sat) {
+			debug("N0.LMC%d: Deskew Training Loop: Exiting for RAWCARD == A or B.\n",
+			      if_num);
+			break;	// no sat or lock retries
+		}
+
+	} while (!unsaturated && (sat_retries < sat_retries_limit));
+
+	debug("N0.LMC%d: Deskew Training %s. %d sat-retries, %d lock-retries\n",
+	      if_num, (sat_retries >= DEFAULT_SAT_RETRY_LIMIT) ?
+	      "Timed Out" : "Completed", sat_retries - 1, lock_retries_total);
+
+	// FIXME? add saturation to reasons for fault return - give it a
+	// chance via Internal VREF
+	// FIXME? add OPTIONAL bit value to reasons for fault return -
+	// give it a chance via Internal VREF
+	if (dsk_counts.nibrng_errs != 0 || dsk_counts.nibunl_errs != 0 ||
+	    (dsk_counts.bitval_errs != 0 && !disable_bitval_retries) ||
+	    !unsaturated) {
+		debug("N0.LMC%d: Nibble or Saturation Error(s) found, returning FAULT\n",
+		      if_num);
+		// FIXME: do we want this output always for errors?
+		validate_deskew_training(priv, rank_mask, if_num,
+					 &dsk_counts, 1);
+		return -1;	// we did retry locally, they did not help
+	}
+
+	// NOTE: we (currently) always print one last training validation
+	// before starting Read Leveling...
+
+	return 0;
+}
+
+#define SCALING_FACTOR (1000)
+
+// NOTE: this gets called for 1-rank and 2-rank DIMMs in single-slot config
+static int compute_vref_1slot_2rank(int rtt_wr, int rtt_park, int dqx_ctl,
+				    int rank_count, int dram_connection)
+{
+	u64 reff_s;
+	u64 rser_s = (dram_connection) ? 0 : 15;
+	u64 vdd = 1200;
+	u64 vref;
+	// 99 == HiZ
+	u64 rtt_wr_s = (((rtt_wr == 0) || rtt_wr == 99) ?
+			1 * 1024 * 1024 : rtt_wr);
+	u64 rtt_park_s = (((rtt_park == 0) || ((rank_count == 1) &&
+					       (rtt_wr != 0))) ?
+			  1 * 1024 * 1024 : rtt_park);
+	u64 dqx_ctl_s = (dqx_ctl == 0 ? 1 * 1024 * 1024 : dqx_ctl);
+	int vref_value;
+	u64 rangepc = 6000;	// range1 base
+	u64 vrefpc;
+	int vref_range = 0;
+
+	reff_s = divide_nint((rtt_wr_s * rtt_park_s), (rtt_wr_s + rtt_park_s));
+
+	vref = (((rser_s + dqx_ctl_s) * SCALING_FACTOR) /
+		(rser_s + dqx_ctl_s + reff_s)) + SCALING_FACTOR;
+
+	vref = (vref * vdd) / 2 / SCALING_FACTOR;
+
+	vrefpc = (vref * 100 * 100) / vdd;
+
+	if (vrefpc < rangepc) {	// < range1 base, use range2
+		vref_range = 1 << 6;	// set bit A6 for range2
+		rangepc = 4500;	// range2 base is 45%
+	}
+
+	vref_value = divide_nint(vrefpc - rangepc, 65);
+	if (vref_value < 0)
+		vref_value = vref_range;	// set to base of range
+	else
+		vref_value |= vref_range;
+
+	debug("rtt_wr: %d, rtt_park: %d, dqx_ctl: %d, rank_count: %d\n",
+	      rtt_wr, rtt_park, dqx_ctl, rank_count);
+	debug("rtt_wr_s: %lld, rtt_park_s: %lld, dqx_ctl_s: %lld, vref_value: 0x%x, range: %d\n",
+	      rtt_wr_s, rtt_park_s, dqx_ctl_s, vref_value ^ vref_range,
+	      vref_range ? 2 : 1);
+
+	return vref_value;
+}
+
+// NOTE: this gets called for 1-rank and 2-rank DIMMs in two-slot configs
+static int compute_vref_2slot_2rank(int rtt_wr, int rtt_park_00,
+				    int rtt_park_01,
+				    int dqx_ctl, int rtt_nom,
+				    int dram_connection)
+{
+	u64 rser = (dram_connection) ? 0 : 15;
+	u64 vdd = 1200;
+	u64 vl, vlp, vcm;
+	u64 rd0, rd1, rpullup;
+	// 99 == HiZ
+	u64 rtt_wr_s = (((rtt_wr == 0) || rtt_wr == 99) ?
+			1 * 1024 * 1024 : rtt_wr);
+	u64 rtt_park_00_s = (rtt_park_00 == 0 ? 1 * 1024 * 1024 : rtt_park_00);
+	u64 rtt_park_01_s = (rtt_park_01 == 0 ? 1 * 1024 * 1024 : rtt_park_01);
+	u64 dqx_ctl_s = (dqx_ctl == 0 ? 1 * 1024 * 1024 : dqx_ctl);
+	u64 rtt_nom_s = (rtt_nom == 0 ? 1 * 1024 * 1024 : rtt_nom);
+	int vref_value;
+	u64 rangepc = 6000;	// range1 base
+	u64 vrefpc;
+	int vref_range = 0;
+
+	// rd0 = (RTT_NOM (parallel) RTT_WR) +  =
+	// ((RTT_NOM * RTT_WR) / (RTT_NOM + RTT_WR)) + RSER
+	rd0 = divide_nint((rtt_nom_s * rtt_wr_s),
+			  (rtt_nom_s + rtt_wr_s)) + rser;
+
+	// rd1 = (RTT_PARK_00 (parallel) RTT_PARK_01) + RSER =
+	// ((RTT_PARK_00 * RTT_PARK_01) / (RTT_PARK_00 + RTT_PARK_01)) + RSER
+	rd1 = divide_nint((rtt_park_00_s * rtt_park_01_s),
+			  (rtt_park_00_s + rtt_park_01_s)) + rser;
+
+	// rpullup = rd0 (parallel) rd1 = (rd0 * rd1) / (rd0 + rd1)
+	rpullup = divide_nint((rd0 * rd1), (rd0 + rd1));
+
+	// vl = (DQX_CTL / (DQX_CTL + rpullup)) * 1.2
+	vl = divide_nint((dqx_ctl_s * vdd), (dqx_ctl_s + rpullup));
+
+	// vlp = ((RSER / rd0) * (1.2 - vl)) + vl
+	vlp = divide_nint((rser * (vdd - vl)), rd0) + vl;
+
+	// vcm = (vlp + 1.2) / 2
+	vcm = divide_nint((vlp + vdd), 2);
+
+	// vrefpc = (vcm / 1.2) * 100
+	vrefpc = divide_nint((vcm * 100 * 100), vdd);
+
+	if (vrefpc < rangepc) {	// < range1 base, use range2
+		vref_range = 1 << 6;	// set bit A6 for range2
+		rangepc = 4500;	// range2 base is 45%
+	}
+
+	vref_value = divide_nint(vrefpc - rangepc, 65);
+	if (vref_value < 0)
+		vref_value = vref_range;	// set to base of range
+	else
+		vref_value |= vref_range;
+
+	debug("rtt_wr:%d, rtt_park_00:%d, rtt_park_01:%d, dqx_ctl:%d, rtt_nom:%d, vref_value:%d (0x%x)\n",
+	      rtt_wr, rtt_park_00, rtt_park_01, dqx_ctl, rtt_nom, vref_value,
+	      vref_value);
+
+	return vref_value;
+}
+
+// NOTE: only call this for DIMMs with 1 or 2 ranks, not 4.
+static int compute_vref_val(struct ddr_priv *priv, int if_num, int rankx,
+			    int dimm_count, int rank_count,
+			    struct impedence_values *imp_values,
+			    int is_stacked_die, int dram_connection)
+{
+	int computed_final_vref_value = 0;
+	int enable_adjust = ENABLE_COMPUTED_VREF_ADJUSTMENT;
+	const char *s;
+	int rtt_wr, dqx_ctl, rtt_nom, index;
+	union cvmx_lmcx_modereg_params1 lmc_modereg_params1;
+	union cvmx_lmcx_modereg_params2 lmc_modereg_params2;
+	union cvmx_lmcx_comp_ctl2 comp_ctl2;
+	int rtt_park;
+	int rtt_park_00;
+	int rtt_park_01;
+
+	debug("N0.LMC%d.R%d: %s(...dram_connection = %d)\n",
+	      if_num, rankx, __func__, dram_connection);
+
+	// allow some overrides...
+	s = env_get("ddr_adjust_computed_vref");
+	if (s) {
+		enable_adjust = !!simple_strtoul(s, NULL, 0);
+		if (!enable_adjust) {
+			debug("N0.LMC%d.R%d: DISABLE adjustment of computed VREF\n",
+			      if_num, rankx);
+		}
+	}
+
+	s = env_get("ddr_set_computed_vref");
+	if (s) {
+		int new_vref = simple_strtoul(s, NULL, 0);
+
+		debug("N0.LMC%d.R%d: OVERRIDE computed VREF to 0x%x (%d)\n",
+		      if_num, rankx, new_vref, new_vref);
+		return new_vref;
+	}
+
+	/*
+	 * Calculate an alternative to the measured vref value
+	 * but only for configurations we know how to...
+	 */
+	// We have code for 2-rank DIMMs in both 1-slot or 2-slot configs,
+	// and can use the 2-rank 1-slot code for 1-rank DIMMs in 1-slot
+	// configs, and can use the 2-rank 2-slot code for 1-rank DIMMs
+	// in 2-slot configs.
+
+	lmc_modereg_params1.u64 =
+	    lmc_rd(priv, CVMX_LMCX_MODEREG_PARAMS1(if_num));
+	lmc_modereg_params2.u64 =
+	    lmc_rd(priv, CVMX_LMCX_MODEREG_PARAMS2(if_num));
+	comp_ctl2.u64 = lmc_rd(priv, CVMX_LMCX_COMP_CTL2(if_num));
+	dqx_ctl = imp_values->dqx_strength[comp_ctl2.s.dqx_ctl];
+
+	// WR always comes from the current rank
+	index = (lmc_modereg_params1.u64 >> (rankx * 12 + 5)) & 0x03;
+	if (!octeon_is_cpuid(OCTEON_CN78XX_PASS1_X))
+		index |= lmc_modereg_params1.u64 >> (51 + rankx - 2) & 0x04;
+	rtt_wr = imp_values->rtt_wr_ohms[index];
+
+	// separate calculations for 1 vs 2 DIMMs per LMC
+	if (dimm_count == 1) {
+		// PARK comes from this rank if 1-rank, otherwise other rank
+		index =
+		    (lmc_modereg_params2.u64 >>
+		     ((rankx ^ (rank_count - 1)) * 10 + 0)) & 0x07;
+		rtt_park = imp_values->rtt_nom_ohms[index];
+		computed_final_vref_value =
+		    compute_vref_1slot_2rank(rtt_wr, rtt_park, dqx_ctl,
+					     rank_count, dram_connection);
+	} else {
+		// get both PARK values from the other DIMM
+		index =
+		    (lmc_modereg_params2.u64 >> ((rankx ^ 0x02) * 10 + 0)) &
+		    0x07;
+		rtt_park_00 = imp_values->rtt_nom_ohms[index];
+		index =
+		    (lmc_modereg_params2.u64 >> ((rankx ^ 0x03) * 10 + 0)) &
+		    0x07;
+		rtt_park_01 = imp_values->rtt_nom_ohms[index];
+		// NOM comes from this rank if 1-rank, otherwise other rank
+		index =
+		    (lmc_modereg_params1.u64 >>
+		     ((rankx ^ (rank_count - 1)) * 12 + 9)) & 0x07;
+		rtt_nom = imp_values->rtt_nom_ohms[index];
+		computed_final_vref_value =
+		    compute_vref_2slot_2rank(rtt_wr, rtt_park_00, rtt_park_01,
+					     dqx_ctl, rtt_nom, dram_connection);
+	}
+
+	if (enable_adjust) {
+		union cvmx_lmcx_config lmc_config;
+		union cvmx_lmcx_control lmc_control;
+
+		lmc_config.u64 = lmc_rd(priv, CVMX_LMCX_CONFIG(if_num));
+		lmc_control.u64 = lmc_rd(priv, CVMX_LMCX_CONTROL(if_num));
+
+		/*
+		 *  New computed vref = existing computed vref – X
+		 *
+		 * The value of X is depending on different conditions.
+		 * Both #122 and #139 are 2Rx4 RDIMM, while #124 is stacked
+		 * die 2Rx4, so I conclude the results into two conditions:
+		 *
+		 * 1. Stacked Die: 2Rx4
+		 * 1-slot: offset = 7. i, e New computed vref = existing
+		 * computed vref – 7
+		 * 2-slot: offset = 6
+		 *
+		 * 2. Regular: 2Rx4
+		 * 1-slot: offset = 3
+		 * 2-slot:  offset = 2
+		 */
+		// we know we never get called unless DDR4, so test just
+		// the other conditions
+		if (lmc_control.s.rdimm_ena == 1 &&
+		    rank_count == 2 && lmc_config.s.mode_x4dev) {
+			// it must first be RDIMM and 2-rank and x4
+			int adj;
+
+			// now do according to stacked die or not...
+			if (is_stacked_die)
+				adj = (dimm_count == 1) ? -7 : -6;
+			else
+				adj = (dimm_count == 1) ? -3 : -2;
+
+			// we must have adjusted it, so print it out if
+			// verbosity is right
+			debug("N0.LMC%d.R%d: adjusting computed vref from %2d (0x%02x) to %2d (0x%02x)\n",
+			      if_num, rankx, computed_final_vref_value,
+			      computed_final_vref_value,
+			      computed_final_vref_value + adj,
+			      computed_final_vref_value + adj);
+			computed_final_vref_value += adj;
+		}
+	}
+
+	return computed_final_vref_value;
+}
+
+static void unpack_rlevel_settings(int if_bytemask, int ecc_ena,
+				   struct rlevel_byte_data *rlevel_byte,
+				   union cvmx_lmcx_rlevel_rankx lmc_rlevel_rank)
+{
+	if ((if_bytemask & 0xff) == 0xff) {
+		if (ecc_ena) {
+			rlevel_byte[8].delay = lmc_rlevel_rank.s.byte7;
+			rlevel_byte[7].delay = lmc_rlevel_rank.s.byte6;
+			rlevel_byte[6].delay = lmc_rlevel_rank.s.byte5;
+			rlevel_byte[5].delay = lmc_rlevel_rank.s.byte4;
+			/* ECC */
+			rlevel_byte[4].delay = lmc_rlevel_rank.s.byte8;
+		} else {
+			rlevel_byte[7].delay = lmc_rlevel_rank.s.byte7;
+			rlevel_byte[6].delay = lmc_rlevel_rank.s.byte6;
+			rlevel_byte[5].delay = lmc_rlevel_rank.s.byte5;
+			rlevel_byte[4].delay = lmc_rlevel_rank.s.byte4;
+		}
+	} else {
+		rlevel_byte[8].delay = lmc_rlevel_rank.s.byte8;	/* unused */
+		rlevel_byte[7].delay = lmc_rlevel_rank.s.byte7;	/* unused */
+		rlevel_byte[6].delay = lmc_rlevel_rank.s.byte6;	/* unused */
+		rlevel_byte[5].delay = lmc_rlevel_rank.s.byte5;	/* unused */
+		rlevel_byte[4].delay = lmc_rlevel_rank.s.byte4;	/* ECC */
+	}
+
+	rlevel_byte[3].delay = lmc_rlevel_rank.s.byte3;
+	rlevel_byte[2].delay = lmc_rlevel_rank.s.byte2;
+	rlevel_byte[1].delay = lmc_rlevel_rank.s.byte1;
+	rlevel_byte[0].delay = lmc_rlevel_rank.s.byte0;
+}
+
+static void pack_rlevel_settings(int if_bytemask, int ecc_ena,
+				 struct rlevel_byte_data *rlevel_byte,
+				 union cvmx_lmcx_rlevel_rankx
+				 *final_rlevel_rank)
+{
+	union cvmx_lmcx_rlevel_rankx lmc_rlevel_rank = *final_rlevel_rank;
+
+	if ((if_bytemask & 0xff) == 0xff) {
+		if (ecc_ena) {
+			lmc_rlevel_rank.s.byte7 = rlevel_byte[8].delay;
+			lmc_rlevel_rank.s.byte6 = rlevel_byte[7].delay;
+			lmc_rlevel_rank.s.byte5 = rlevel_byte[6].delay;
+			lmc_rlevel_rank.s.byte4 = rlevel_byte[5].delay;
+			/* ECC */
+			lmc_rlevel_rank.s.byte8 = rlevel_byte[4].delay;
+		} else {
+			lmc_rlevel_rank.s.byte7 = rlevel_byte[7].delay;
+			lmc_rlevel_rank.s.byte6 = rlevel_byte[6].delay;
+			lmc_rlevel_rank.s.byte5 = rlevel_byte[5].delay;
+			lmc_rlevel_rank.s.byte4 = rlevel_byte[4].delay;
+		}
+	} else {
+		lmc_rlevel_rank.s.byte8 = rlevel_byte[8].delay;
+		lmc_rlevel_rank.s.byte7 = rlevel_byte[7].delay;
+		lmc_rlevel_rank.s.byte6 = rlevel_byte[6].delay;
+		lmc_rlevel_rank.s.byte5 = rlevel_byte[5].delay;
+		lmc_rlevel_rank.s.byte4 = rlevel_byte[4].delay;
+	}
+
+	lmc_rlevel_rank.s.byte3 = rlevel_byte[3].delay;
+	lmc_rlevel_rank.s.byte2 = rlevel_byte[2].delay;
+	lmc_rlevel_rank.s.byte1 = rlevel_byte[1].delay;
+	lmc_rlevel_rank.s.byte0 = rlevel_byte[0].delay;
+
+	*final_rlevel_rank = lmc_rlevel_rank;
+}
+
+/////////////////// These are the RLEVEL settings display routines
+
+// flags
+#define WITH_NOTHING 0
+#define WITH_SCORE   1
+#define WITH_AVERAGE 2
+#define WITH_FINAL   4
+#define WITH_COMPUTE 8
+
+static void do_display_rl(int if_num,
+			  union cvmx_lmcx_rlevel_rankx lmc_rlevel_rank,
+			  int rank, int flags, int score)
+{
+	char score_buf[16];
+	char *msg_buf;
+	char hex_buf[20];
+
+	if (flags & WITH_SCORE) {
+		snprintf(score_buf, sizeof(score_buf), "(%d)", score);
+	} else {
+		score_buf[0] = ' ';
+		score_buf[1] = 0;
+	}
+
+	if (flags & WITH_AVERAGE) {
+		msg_buf = "  DELAY AVERAGES  ";
+	} else if (flags & WITH_FINAL) {
+		msg_buf = "  FINAL SETTINGS  ";
+	} else if (flags & WITH_COMPUTE) {
+		msg_buf = "  COMPUTED DELAYS ";
+	} else {
+		snprintf(hex_buf, sizeof(hex_buf), "0x%016llX",
+			 (unsigned long long)lmc_rlevel_rank.u64);
+		msg_buf = hex_buf;
+	}
+
+	debug("N0.LMC%d.R%d: Rlevel Rank %#4x, %s  : %5d %5d %5d %5d %5d %5d %5d %5d %5d %s\n",
+	      if_num, rank, lmc_rlevel_rank.s.status, msg_buf,
+	      lmc_rlevel_rank.s.byte8, lmc_rlevel_rank.s.byte7,
+	      lmc_rlevel_rank.s.byte6, lmc_rlevel_rank.s.byte5,
+	      lmc_rlevel_rank.s.byte4, lmc_rlevel_rank.s.byte3,
+	      lmc_rlevel_rank.s.byte2, lmc_rlevel_rank.s.byte1,
+	      lmc_rlevel_rank.s.byte0, score_buf);
+}
+
+static void display_rl(int if_num,
+		       union cvmx_lmcx_rlevel_rankx lmc_rlevel_rank, int rank)
+{
+	do_display_rl(if_num, lmc_rlevel_rank, rank, 0, 0);
+}
+
+static void display_rl_with_score(int if_num,
+				  union cvmx_lmcx_rlevel_rankx lmc_rlevel_rank,
+				  int rank, int score)
+{
+	do_display_rl(if_num, lmc_rlevel_rank, rank, 1, score);
+}
+
+static void display_rl_with_final(int if_num,
+				  union cvmx_lmcx_rlevel_rankx lmc_rlevel_rank,
+				  int rank)
+{
+	do_display_rl(if_num, lmc_rlevel_rank, rank, 4, 0);
+}
+
+static void display_rl_with_computed(int if_num,
+				     union cvmx_lmcx_rlevel_rankx
+				     lmc_rlevel_rank, int rank, int score)
+{
+	do_display_rl(if_num, lmc_rlevel_rank, rank, 9, score);
+}
+
+// flag values
+#define WITH_RODT_BLANK      0
+#define WITH_RODT_SKIPPING   1
+#define WITH_RODT_BESTROW    2
+#define WITH_RODT_BESTSCORE  3
+// control
+#define SKIP_SKIPPING 1
+
+static const char *with_rodt_canned_msgs[4] = {
+	"          ", "SKIPPING  ", "BEST ROW  ", "BEST SCORE"
+};
+
+static void display_rl_with_rodt(int if_num,
+				 union cvmx_lmcx_rlevel_rankx lmc_rlevel_rank,
+				 int rank, int score,
+				 int nom_ohms, int rodt_ohms, int flag)
+{
+	const char *msg_buf;
+	char set_buf[20];
+
+#if SKIP_SKIPPING
+	if (flag == WITH_RODT_SKIPPING)
+		return;
+#endif
+
+	msg_buf = with_rodt_canned_msgs[flag];
+	if (nom_ohms < 0) {
+		snprintf(set_buf, sizeof(set_buf), "    RODT %3d    ",
+			 rodt_ohms);
+	} else {
+		snprintf(set_buf, sizeof(set_buf), "NOM %3d RODT %3d", nom_ohms,
+			 rodt_ohms);
+	}
+
+	debug("N0.LMC%d.R%d: Rlevel %s   %s  : %5d %5d %5d %5d %5d %5d %5d %5d %5d (%d)\n",
+	      if_num, rank, set_buf, msg_buf, lmc_rlevel_rank.s.byte8,
+	      lmc_rlevel_rank.s.byte7, lmc_rlevel_rank.s.byte6,
+	      lmc_rlevel_rank.s.byte5, lmc_rlevel_rank.s.byte4,
+	      lmc_rlevel_rank.s.byte3, lmc_rlevel_rank.s.byte2,
+	      lmc_rlevel_rank.s.byte1, lmc_rlevel_rank.s.byte0, score);
+}
+
+static void do_display_wl(int if_num,
+			  union cvmx_lmcx_wlevel_rankx lmc_wlevel_rank,
+			  int rank, int flags)
+{
+	char *msg_buf;
+	char hex_buf[20];
+
+	if (flags & WITH_FINAL) {
+		msg_buf = "  FINAL SETTINGS  ";
+	} else {
+		snprintf(hex_buf, sizeof(hex_buf), "0x%016llX",
+			 (unsigned long long)lmc_wlevel_rank.u64);
+		msg_buf = hex_buf;
+	}
+
+	debug("N0.LMC%d.R%d: Wlevel Rank %#4x, %s  : %5d %5d %5d %5d %5d %5d %5d %5d %5d\n",
+	      if_num, rank, lmc_wlevel_rank.s.status, msg_buf,
+	      lmc_wlevel_rank.s.byte8, lmc_wlevel_rank.s.byte7,
+	      lmc_wlevel_rank.s.byte6, lmc_wlevel_rank.s.byte5,
+	      lmc_wlevel_rank.s.byte4, lmc_wlevel_rank.s.byte3,
+	      lmc_wlevel_rank.s.byte2, lmc_wlevel_rank.s.byte1,
+	      lmc_wlevel_rank.s.byte0);
+}
+
+static void display_wl(int if_num,
+		       union cvmx_lmcx_wlevel_rankx lmc_wlevel_rank, int rank)
+{
+	do_display_wl(if_num, lmc_wlevel_rank, rank, WITH_NOTHING);
+}
+
+static void display_wl_with_final(int if_num,
+				  union cvmx_lmcx_wlevel_rankx lmc_wlevel_rank,
+				  int rank)
+{
+	do_display_wl(if_num, lmc_wlevel_rank, rank, WITH_FINAL);
+}
+
+// pretty-print bitmask adjuster
+static u64 ppbm(u64 bm)
+{
+	if (bm != 0ul) {
+		while ((bm & 0x0fful) == 0ul)
+			bm >>= 4;
+	}
+
+	return bm;
+}
+
+// xlate PACKED index to UNPACKED index to use with rlevel_byte
+#define XPU(i, e) (((i) < 4) ? (i) : (((i) < 8) ? (i) + (e) : 4))
+// xlate UNPACKED index to PACKED index to use with rlevel_bitmask
+#define XUP(i, e) (((i) < 4) ? (i) : (e) ? (((i) > 4) ? (i) - 1 : 8) : (i))
+
+// flag values
+#define WITH_WL_BITMASKS      0
+#define WITH_RL_BITMASKS      1
+#define WITH_RL_MASK_SCORES   2
+#define WITH_RL_SEQ_SCORES    3
+
+static void do_display_bm(int if_num, int rank, void *bm,
+			  int flags, int ecc)
+{
+	if (flags == WITH_WL_BITMASKS) {
+		// wlevel_bitmask array in PACKED index order, so just
+		// print them
+		int *bitmasks = (int *)bm;
+
+		debug("N0.LMC%d.R%d: Wlevel Debug Bitmasks                 : %05x %05x %05x %05x %05x %05x %05x %05x %05x\n",
+		      if_num, rank, bitmasks[8], bitmasks[7], bitmasks[6],
+		      bitmasks[5], bitmasks[4], bitmasks[3], bitmasks[2],
+		      bitmasks[1], bitmasks[0]
+			);
+	} else if (flags == WITH_RL_BITMASKS) {
+		// rlevel_bitmask array in PACKED index order, so just
+		// print them
+		struct rlevel_bitmask *rlevel_bitmask =
+			(struct rlevel_bitmask *)bm;
+
+		debug("N0.LMC%d.R%d: Rlevel Debug Bitmasks        8:0      : %05llx %05llx %05llx %05llx %05llx %05llx %05llx %05llx %05llx\n",
+		      if_num, rank, ppbm(rlevel_bitmask[8].bm),
+		      ppbm(rlevel_bitmask[7].bm), ppbm(rlevel_bitmask[6].bm),
+		      ppbm(rlevel_bitmask[5].bm), ppbm(rlevel_bitmask[4].bm),
+		      ppbm(rlevel_bitmask[3].bm), ppbm(rlevel_bitmask[2].bm),
+		      ppbm(rlevel_bitmask[1].bm), ppbm(rlevel_bitmask[0].bm)
+			);
+	} else if (flags == WITH_RL_MASK_SCORES) {
+		// rlevel_bitmask array in PACKED index order, so just
+		// print them
+		struct rlevel_bitmask *rlevel_bitmask =
+			(struct rlevel_bitmask *)bm;
+
+		debug("N0.LMC%d.R%d: Rlevel Debug Bitmask Scores  8:0      : %5d %5d %5d %5d %5d %5d %5d %5d %5d\n",
+		      if_num, rank, rlevel_bitmask[8].errs,
+		      rlevel_bitmask[7].errs, rlevel_bitmask[6].errs,
+		      rlevel_bitmask[5].errs, rlevel_bitmask[4].errs,
+		      rlevel_bitmask[3].errs, rlevel_bitmask[2].errs,
+		      rlevel_bitmask[1].errs, rlevel_bitmask[0].errs);
+	} else if (flags == WITH_RL_SEQ_SCORES) {
+		// rlevel_byte array in UNPACKED index order, so xlate
+		// and print them
+		struct rlevel_byte_data *rlevel_byte =
+			(struct rlevel_byte_data *)bm;
+
+		debug("N0.LMC%d.R%d: Rlevel Debug Non-seq Scores  8:0      : %5d %5d %5d %5d %5d %5d %5d %5d %5d\n",
+		      if_num, rank, rlevel_byte[XPU(8, ecc)].sqerrs,
+		      rlevel_byte[XPU(7, ecc)].sqerrs,
+		      rlevel_byte[XPU(6, ecc)].sqerrs,
+		      rlevel_byte[XPU(5, ecc)].sqerrs,
+		      rlevel_byte[XPU(4, ecc)].sqerrs,
+		      rlevel_byte[XPU(3, ecc)].sqerrs,
+		      rlevel_byte[XPU(2, ecc)].sqerrs,
+		      rlevel_byte[XPU(1, ecc)].sqerrs,
+		      rlevel_byte[XPU(0, ecc)].sqerrs);
+	}
+}
+
+static void display_wl_bm(int if_num, int rank, int *bitmasks)
+{
+	do_display_bm(if_num, rank, (void *)bitmasks, WITH_WL_BITMASKS, 0);
+}
+
+static void display_rl_bm(int if_num, int rank,
+			  struct rlevel_bitmask *bitmasks, int ecc_ena)
+{
+	do_display_bm(if_num, rank, (void *)bitmasks, WITH_RL_BITMASKS,
+		      ecc_ena);
+}
+
+static void display_rl_bm_scores(int if_num, int rank,
+				 struct rlevel_bitmask *bitmasks, int ecc_ena)
+{
+	do_display_bm(if_num, rank, (void *)bitmasks, WITH_RL_MASK_SCORES,
+		      ecc_ena);
+}
+
+static void display_rl_seq_scores(int if_num, int rank,
+				  struct rlevel_byte_data *bytes, int ecc_ena)
+{
+	do_display_bm(if_num, rank, (void *)bytes, WITH_RL_SEQ_SCORES, ecc_ena);
+}
+
+#define RODT_OHMS_COUNT        8
+#define RTT_NOM_OHMS_COUNT     8
+#define RTT_NOM_TABLE_COUNT    8
+#define RTT_WR_OHMS_COUNT      8
+#define DIC_OHMS_COUNT         3
+#define DRIVE_STRENGTH_COUNT  15
+
+static unsigned char ddr4_rodt_ohms[RODT_OHMS_COUNT] = {
+	0, 40, 60, 80, 120, 240, 34, 48 };
+static unsigned char ddr4_rtt_nom_ohms[RTT_NOM_OHMS_COUNT] = {
+	0, 60, 120, 40, 240, 48, 80, 34 };
+static unsigned char ddr4_rtt_nom_table[RTT_NOM_TABLE_COUNT] = {
+	0, 4, 2, 6, 1, 5, 3, 7 };
+// setting HiZ ohms to 99 for computed vref
+static unsigned char ddr4_rtt_wr_ohms[RTT_WR_OHMS_COUNT] = {
+	0, 120, 240, 99, 80 };
+static unsigned char ddr4_dic_ohms[DIC_OHMS_COUNT] = { 34, 48 };
+static short ddr4_drive_strength[DRIVE_STRENGTH_COUNT] = {
+	0, 0, 26, 30, 34, 40, 48, 68, 0, 0, 0, 0, 0, 0, 0 };
+static short ddr4_dqx_strength[DRIVE_STRENGTH_COUNT] = {
+	0, 24, 27, 30, 34, 40, 48, 60, 0, 0, 0, 0, 0, 0, 0 };
+struct impedence_values ddr4_impedence_val = {
+	.rodt_ohms = ddr4_rodt_ohms,
+	.rtt_nom_ohms = ddr4_rtt_nom_ohms,
+	.rtt_nom_table = ddr4_rtt_nom_table,
+	.rtt_wr_ohms = ddr4_rtt_wr_ohms,
+	.dic_ohms = ddr4_dic_ohms,
+	.drive_strength = ddr4_drive_strength,
+	.dqx_strength = ddr4_dqx_strength,
+};
+
+static unsigned char ddr3_rodt_ohms[RODT_OHMS_COUNT] = {
+	0, 20, 30, 40, 60, 120, 0, 0 };
+static unsigned char ddr3_rtt_nom_ohms[RTT_NOM_OHMS_COUNT] = {
+	0, 60, 120, 40, 20, 30, 0, 0 };
+static unsigned char ddr3_rtt_nom_table[RTT_NOM_TABLE_COUNT] = {
+	0, 2, 1, 3, 5, 4, 0, 0 };
+static unsigned char ddr3_rtt_wr_ohms[RTT_WR_OHMS_COUNT] = { 0, 60, 120 };
+static unsigned char ddr3_dic_ohms[DIC_OHMS_COUNT] = { 40, 34 };
+static short ddr3_drive_strength[DRIVE_STRENGTH_COUNT] = {
+	0, 24, 27, 30, 34, 40, 48, 60, 0, 0, 0, 0, 0, 0, 0 };
+static struct impedence_values ddr3_impedence_val = {
+	.rodt_ohms = ddr3_rodt_ohms,
+	.rtt_nom_ohms = ddr3_rtt_nom_ohms,
+	.rtt_nom_table = ddr3_rtt_nom_table,
+	.rtt_wr_ohms = ddr3_rtt_wr_ohms,
+	.dic_ohms = ddr3_dic_ohms,
+	.drive_strength = ddr3_drive_strength,
+	.dqx_strength = ddr3_drive_strength,
+};
+
+static u64 hertz_to_psecs(u64 hertz)
+{
+	/* Clock in psecs */
+	return divide_nint((u64)1000 * 1000 * 1000 * 1000, hertz);
+}
+
+#define DIVIDEND_SCALE 1000	/* Scale to avoid rounding error. */
+
+static u64 psecs_to_mts(u64 psecs)
+{
+	return divide_nint(divide_nint((u64)(2 * 1000000 * DIVIDEND_SCALE),
+				       psecs), DIVIDEND_SCALE);
+}
+
+#define WITHIN(v, b, m) (((v) >= ((b) - (m))) && ((v) <= ((b) + (m))))
+
+static unsigned long pretty_psecs_to_mts(u64 psecs)
+{
+	u64 ret = 0;		// default to error
+
+	if (WITHIN(psecs, 2500, 1))
+		ret = 800;
+	else if (WITHIN(psecs, 1875, 1))
+		ret = 1066;
+	else if (WITHIN(psecs, 1500, 1))
+		ret = 1333;
+	else if (WITHIN(psecs, 1250, 1))
+		ret = 1600;
+	else if (WITHIN(psecs, 1071, 1))
+		ret = 1866;
+	else if (WITHIN(psecs, 937, 1))
+		ret = 2133;
+	else if (WITHIN(psecs, 833, 1))
+		ret = 2400;
+	else if (WITHIN(psecs, 750, 1))
+		ret = 2666;
+	return ret;
+}
+
+static u64 mts_to_hertz(u64 mts)
+{
+	return ((mts * 1000 * 1000) / 2);
+}
+
+static int compute_rc3x(int64_t tclk_psecs)
+{
+	long speed;
+	long tclk_psecs_min, tclk_psecs_max;
+	long data_rate_mhz, data_rate_mhz_min, data_rate_mhz_max;
+	int rc3x;
+
+#define ENCODING_BASE 1240
+
+	data_rate_mhz = psecs_to_mts(tclk_psecs);
+
+	/*
+	 * 2400 MT/s is a special case. Using integer arithmetic it rounds
+	 * from 833 psecs to 2401 MT/s. Force it to 2400 to pick the
+	 * proper setting from the table.
+	 */
+	if (tclk_psecs == 833)
+		data_rate_mhz = 2400;
+
+	for (speed = ENCODING_BASE; speed < 3200; speed += 20) {
+		int error = 0;
+
+		/* Clock in psecs */
+		tclk_psecs_min = hertz_to_psecs(mts_to_hertz(speed + 00));
+		/* Clock in psecs */
+		tclk_psecs_max = hertz_to_psecs(mts_to_hertz(speed + 18));
+
+		data_rate_mhz_min = psecs_to_mts(tclk_psecs_min);
+		data_rate_mhz_max = psecs_to_mts(tclk_psecs_max);
+
+		/* Force alingment to multiple to avound rounding errors. */
+		data_rate_mhz_min = ((data_rate_mhz_min + 18) / 20) * 20;
+		data_rate_mhz_max = ((data_rate_mhz_max + 18) / 20) * 20;
+
+		error += (speed + 00 != data_rate_mhz_min);
+		error += (speed + 20 != data_rate_mhz_max);
+
+		rc3x = (speed - ENCODING_BASE) / 20;
+
+		if (data_rate_mhz <= (speed + 20))
+			break;
+	}
+
+	return rc3x;
+}
+
+/*
+ * static global variables needed, so that functions (loops) can be
+ * restructured from the main huge function. Its not elegant, but the
+ * only way to break the original functions like init_octeon3_ddr3_interface()
+ * into separate logical smaller functions with less indentation levels.
+ */
+static int if_num __section(".data");
+static u32 if_mask __section(".data");
+static int ddr_hertz __section(".data");
+
+static struct ddr_conf *ddr_conf __section(".data");
+static const struct dimm_odt_config *odt_1rank_config __section(".data");
+static const struct dimm_odt_config *odt_2rank_config __section(".data");
+static const struct dimm_odt_config *odt_4rank_config __section(".data");
+static struct dimm_config *dimm_config_table __section(".data");
+static const struct dimm_odt_config *odt_config __section(".data");
+static const struct ddr3_custom_config *c_cfg __section(".data");
+
+static int odt_idx __section(".data");
+
+static ulong tclk_psecs __section(".data");
+static ulong eclk_psecs __section(".data");
+
+static int row_bits __section(".data");
+static int col_bits __section(".data");
+static int num_banks __section(".data");
+static int num_ranks __section(".data");
+static int dram_width __section(".data");
+static int dimm_count __section(".data");
+/* Accumulate and report all the errors before giving up */
+static int fatal_error __section(".data");
+/* Flag that indicates safe DDR settings should be used */
+static int safe_ddr_flag __section(".data");
+/* Octeon II Default: 64bit interface width */
+static int if_64b __section(".data");
+static int if_bytemask __section(".data");
+static u32 mem_size_mbytes __section(".data");
+static unsigned int didx __section(".data");
+static int bank_bits __section(".data");
+static int bunk_enable __section(".data");
+static int rank_mask __section(".data");
+static int column_bits_start __section(".data");
+static int row_lsb __section(".data");
+static int pbank_lsb __section(".data");
+static int use_ecc __section(".data");
+static int mtb_psec __section(".data");
+static short ftb_dividend __section(".data");
+static short ftb_divisor __section(".data");
+static int taamin __section(".data");
+static int tckmin __section(".data");
+static int cl __section(".data");
+static int min_cas_latency __section(".data");
+static int max_cas_latency __section(".data");
+static int override_cas_latency __section(".data");
+static int ddr_rtt_nom_auto __section(".data");
+static int ddr_rodt_ctl_auto __section(".data");
+
+static int spd_addr __section(".data");
+static int spd_org __section(".data");
+static int spd_banks __section(".data");
+static int spd_rdimm __section(".data");
+static int spd_dimm_type __section(".data");
+static int spd_ecc __section(".data");
+static u32 spd_cas_latency __section(".data");
+static int spd_mtb_dividend __section(".data");
+static int spd_mtb_divisor __section(".data");
+static int spd_tck_min __section(".data");
+static int spd_taa_min __section(".data");
+static int spd_twr __section(".data");
+static int spd_trcd __section(".data");
+static int spd_trrd __section(".data");
+static int spd_trp __section(".data");
+static int spd_tras __section(".data");
+static int spd_trc __section(".data");
+static int spd_trfc __section(".data");
+static int spd_twtr __section(".data");
+static int spd_trtp __section(".data");
+static int spd_tfaw __section(".data");
+static int spd_addr_mirror __section(".data");
+static int spd_package __section(".data");
+static int spd_rawcard __section(".data");
+static int spd_rawcard_aorb __section(".data");
+static int spd_rdimm_registers __section(".data");
+static int spd_thermal_sensor __section(".data");
+
+static int is_stacked_die __section(".data");
+static int is_3ds_dimm __section(".data");
+// 3DS: logical ranks per package rank
+static int lranks_per_prank __section(".data");
+// 3DS: logical ranks bits
+static int lranks_bits __section(".data");
+// in Mbits; only used for 3DS
+static int die_capacity __section(".data");
+
+static enum ddr_type ddr_type __section(".data");
+
+static int twr __section(".data");
+static int trcd __section(".data");
+static int trrd __section(".data");
+static int trp __section(".data");
+static int tras __section(".data");
+static int trc __section(".data");
+static int trfc __section(".data");
+static int twtr __section(".data");
+static int trtp __section(".data");
+static int tfaw __section(".data");
+
+static int ddr4_tckavgmin __section(".data");
+static int ddr4_tckavgmax __section(".data");
+static int ddr4_trdcmin __section(".data");
+static int ddr4_trpmin __section(".data");
+static int ddr4_trasmin __section(".data");
+static int ddr4_trcmin __section(".data");
+static int ddr4_trfc1min __section(".data");
+static int ddr4_trfc2min __section(".data");
+static int ddr4_trfc4min __section(".data");
+static int ddr4_tfawmin __section(".data");
+static int ddr4_trrd_smin __section(".data");
+static int ddr4_trrd_lmin __section(".data");
+static int ddr4_tccd_lmin __section(".data");
+
+static int wl_mask_err __section(".data");
+static int wl_loops __section(".data");
+static int default_rtt_nom[4] __section(".data");
+static int dyn_rtt_nom_mask __section(".data");
+static struct impedence_values *imp_val __section(".data");
+static char default_rodt_ctl __section(".data");
+// default to disabled (ie, try LMC restart, not chip reset)
+static int ddr_disable_chip_reset __section(".data");
+static const char *dimm_type_name __section(".data");
+static int match_wl_rtt_nom __section(".data");
+
+struct hwl_alt_by_rank {
+	u16 hwl_alt_mask;	// mask of bytelanes with alternate
+	u16 hwl_alt_delay[9];	// bytelane alternate avail if mask=1
+};
+
+static struct hwl_alt_by_rank hwl_alts[4] __section(".data");
+
+#define DEFAULT_INTERNAL_VREF_TRAINING_LIMIT 3	// was: 5
+static int internal_retries __section(".data");
+
+static int deskew_training_errors __section(".data");
+static struct deskew_counts deskew_training_results __section(".data");
+static int disable_deskew_training __section(".data");
+static int restart_if_dsk_incomplete __section(".data");
+static int dac_eval_retries __section(".data");
+static int dac_settings[9] __section(".data");
+static int num_samples __section(".data");
+static int sample __section(".data");
+static int lane __section(".data");
+static int last_lane __section(".data");
+static int total_dac_eval_retries __section(".data");
+static int dac_eval_exhausted __section(".data");
+
+#define DEFAULT_DAC_SAMPLES 7	// originally was 5
+#define DAC_RETRIES_LIMIT   2
+
+struct bytelane_sample {
+	s16 bytes[DEFAULT_DAC_SAMPLES];
+};
+
+static struct bytelane_sample lanes[9] __section(".data");
+
+static char disable_sequential_delay_check __section(".data");
+static int wl_print __section(".data");
+
+static int enable_by_rank_init __section(".data");
+static int saved_rank_mask __section(".data");
+static int by_rank __section(".data");
+static struct deskew_data rank_dsk[4] __section(".data");
+static struct dac_data rank_dac[4] __section(".data");
+
+// todo: perhaps remove node at some time completely?
+static int node __section(".data");
+static int base_cl __section(".data");
+
+/* Parameters from DDR3 Specifications */
+#define DDR3_TREFI         7800000	/* 7.8 us */
+#define DDR3_ZQCS          80000ull	/* 80 ns */
+#define DDR3_ZQCS_INTERNAL 1280000000ull	/* 128ms/100 */
+#define DDR3_TCKE          5000	/* 5 ns */
+#define DDR3_TMRD          4	/* 4 nCK */
+#define DDR3_TDLLK         512	/* 512 nCK */
+#define DDR3_TMPRR         1	/* 1 nCK */
+#define DDR3_TWLMRD        40	/* 40 nCK */
+#define DDR3_TWLDQSEN      25	/* 25 nCK */
+
+/* Parameters from DDR4 Specifications */
+#define DDR4_TMRD          8	/* 8 nCK */
+#define DDR4_TDLLK         768	/* 768 nCK */
+
+static void lmc_config(struct ddr_priv *priv)
+{
+	union cvmx_lmcx_config cfg;
+	char *s;
+
+	cfg.u64 = 0;
+
+	cfg.cn78xx.ecc_ena = use_ecc;
+	cfg.cn78xx.row_lsb = encode_row_lsb_ddr3(row_lsb);
+	cfg.cn78xx.pbank_lsb = encode_pbank_lsb_ddr3(pbank_lsb);
+
+	cfg.cn78xx.idlepower = 0;	/* Disabled */
+
+	s = lookup_env(priv, "ddr_idlepower");
+	if (s)
+		cfg.cn78xx.idlepower = simple_strtoul(s, NULL, 0);
+
+	cfg.cn78xx.forcewrite = 0;	/* Disabled */
+	/* Include memory reference address in the ECC */
+	cfg.cn78xx.ecc_adr = 1;
+
+	s = lookup_env(priv, "ddr_ecc_adr");
+	if (s)
+		cfg.cn78xx.ecc_adr = simple_strtoul(s, NULL, 0);
+
+	cfg.cn78xx.reset = 0;
+
+	/*
+	 * Program LMC0_CONFIG[24:18], ref_zqcs_int(6:0) to
+	 * RND-DN(tREFI/clkPeriod/512) Program LMC0_CONFIG[36:25],
+	 * ref_zqcs_int(18:7) to
+	 * RND-DN(ZQCS_Interval/clkPeriod/(512*128)). Note that this
+	 * value should always be greater than 32, to account for
+	 * resistor calibration delays.
+	 */
+
+	cfg.cn78xx.ref_zqcs_int = ((DDR3_TREFI / tclk_psecs / 512) & 0x7f);
+	cfg.cn78xx.ref_zqcs_int |=
+		((max(33ull, (DDR3_ZQCS_INTERNAL / (tclk_psecs / 100) /
+			      (512 * 128))) & 0xfff) << 7);
+
+	cfg.cn78xx.early_dqx = 1;	/* Default to enabled */
+
+	s = lookup_env(priv, "ddr_early_dqx");
+	if (!s)
+		s = lookup_env(priv, "ddr%d_early_dqx", if_num);
+
+	if (s)
+		cfg.cn78xx.early_dqx = simple_strtoul(s, NULL, 0);
+
+	cfg.cn78xx.sref_with_dll = 0;
+
+	cfg.cn78xx.rank_ena = bunk_enable;
+	cfg.cn78xx.rankmask = rank_mask;	/* Set later */
+	cfg.cn78xx.mirrmask = (spd_addr_mirror << 1 | spd_addr_mirror << 3) &
+		rank_mask;
+	/* Set once and don't change it. */
+	cfg.cn78xx.init_status = rank_mask;
+	cfg.cn78xx.early_unload_d0_r0 = 0;
+	cfg.cn78xx.early_unload_d0_r1 = 0;
+	cfg.cn78xx.early_unload_d1_r0 = 0;
+	cfg.cn78xx.early_unload_d1_r1 = 0;
+	cfg.cn78xx.scrz = 0;
+	if (octeon_is_cpuid(OCTEON_CN70XX))
+		cfg.cn78xx.mode32b = 1;	/* Read-only. Always 1. */
+	cfg.cn78xx.mode_x4dev = (dram_width == 4) ? 1 : 0;
+	cfg.cn78xx.bg2_enable = ((ddr_type == DDR4_DRAM) &&
+				 (dram_width == 16)) ? 0 : 1;
+
+	s = lookup_env_ull(priv, "ddr_config");
+	if (s)
+		cfg.u64 = simple_strtoull(s, NULL, 0);
+	debug("LMC_CONFIG                                    : 0x%016llx\n",
+	      cfg.u64);
+	lmc_wr(priv, CVMX_LMCX_CONFIG(if_num), cfg.u64);
+}
+
+static void lmc_control(struct ddr_priv *priv)
+{
+	union cvmx_lmcx_control ctrl;
+	char *s;
+
+	ctrl.u64 = lmc_rd(priv, CVMX_LMCX_CONTROL(if_num));
+	ctrl.s.rdimm_ena = spd_rdimm;
+	ctrl.s.bwcnt = 0;	/* Clear counter later */
+	if (spd_rdimm)
+		ctrl.s.ddr2t = (safe_ddr_flag ? 1 : c_cfg->ddr2t_rdimm);
+	else
+		ctrl.s.ddr2t = (safe_ddr_flag ? 1 : c_cfg->ddr2t_udimm);
+	ctrl.s.pocas = 0;
+	ctrl.s.fprch2 = (safe_ddr_flag ? 2 : c_cfg->fprch2);
+	ctrl.s.throttle_rd = safe_ddr_flag ? 1 : 0;
+	ctrl.s.throttle_wr = safe_ddr_flag ? 1 : 0;
+	ctrl.s.inorder_rd = safe_ddr_flag ? 1 : 0;
+	ctrl.s.inorder_wr = safe_ddr_flag ? 1 : 0;
+	ctrl.s.elev_prio_dis = safe_ddr_flag ? 1 : 0;
+	/* discards writes to addresses that don't exist in the DRAM */
+	ctrl.s.nxm_write_en = 0;
+	ctrl.s.max_write_batch = 8;
+	ctrl.s.xor_bank = 1;
+	ctrl.s.auto_dclkdis = 1;
+	ctrl.s.int_zqcs_dis = 0;
+	ctrl.s.ext_zqcs_dis = 0;
+	ctrl.s.bprch = 1;
+	ctrl.s.wodt_bprch = 1;
+	ctrl.s.rodt_bprch = 1;
+
+	s = lookup_env(priv, "ddr_xor_bank");
+	if (s)
+		ctrl.s.xor_bank = simple_strtoul(s, NULL, 0);
+
+	s = lookup_env(priv, "ddr_2t");
+	if (s)
+		ctrl.s.ddr2t = simple_strtoul(s, NULL, 0);
+
+	s = lookup_env(priv, "ddr_fprch2");
+	if (s)
+		ctrl.s.fprch2 = simple_strtoul(s, NULL, 0);
+
+	s = lookup_env(priv, "ddr_bprch");
+	if (s)
+		ctrl.s.bprch = simple_strtoul(s, NULL, 0);
+
+	s = lookup_env(priv, "ddr_wodt_bprch");
+	if (s)
+		ctrl.s.wodt_bprch = simple_strtoul(s, NULL, 0);
+
+	s = lookup_env(priv, "ddr_rodt_bprch");
+	if (s)
+		ctrl.s.rodt_bprch = simple_strtoul(s, NULL, 0);
+
+	s = lookup_env(priv, "ddr_int_zqcs_dis");
+	if (s)
+		ctrl.s.int_zqcs_dis = simple_strtoul(s, NULL, 0);
+
+	s = lookup_env(priv, "ddr_ext_zqcs_dis");
+	if (s)
+		ctrl.s.ext_zqcs_dis = simple_strtoul(s, NULL, 0);
+
+	s = lookup_env_ull(priv, "ddr_control");
+	if (s)
+		ctrl.u64 = simple_strtoull(s, NULL, 0);
+
+	debug("LMC_CONTROL                                   : 0x%016llx\n",
+	      ctrl.u64);
+	lmc_wr(priv, CVMX_LMCX_CONTROL(if_num), ctrl.u64);
+}
+
+static void lmc_timing_params0(struct ddr_priv *priv)
+{
+	union cvmx_lmcx_timing_params0 tp0;
+	unsigned int trp_value;
+	char *s;
+
+	tp0.u64 = lmc_rd(priv, CVMX_LMCX_TIMING_PARAMS0(if_num));
+
+	trp_value = divide_roundup(trp, tclk_psecs) - 1;
+	debug("TIMING_PARAMS0[TRP]: NEW 0x%x, OLD 0x%x\n", trp_value,
+	      trp_value +
+	      (unsigned int)(divide_roundup(max(4ull * tclk_psecs, 7500ull),
+					    tclk_psecs)) - 4);
+	s = lookup_env_ull(priv, "ddr_use_old_trp");
+	if (s) {
+		if (!!simple_strtoull(s, NULL, 0)) {
+			trp_value +=
+			    divide_roundup(max(4ull * tclk_psecs, 7500ull),
+					   tclk_psecs) - 4;
+			debug("TIMING_PARAMS0[trp]: USING OLD 0x%x\n",
+			      trp_value);
+		}
+	}
+
+	tp0.cn78xx.txpr =
+	    divide_roundup(max(5ull * tclk_psecs, trfc + 10000ull),
+			   16 * tclk_psecs);
+	tp0.cn78xx.trp = trp_value & 0x1f;
+	tp0.cn78xx.tcksre =
+	    divide_roundup(max(5ull * tclk_psecs, 10000ull), tclk_psecs) - 1;
+
+	if (ddr_type == DDR4_DRAM) {
+		int tzqinit = 4;	// Default to 4, for all DDR4 speed bins
+
+		s = lookup_env(priv, "ddr_tzqinit");
+		if (s)
+			tzqinit = simple_strtoul(s, NULL, 0);
+
+		tp0.cn78xx.tzqinit = tzqinit;
+		/* Always 8. */
+		tp0.cn78xx.tzqcs = divide_roundup(128 * tclk_psecs,
+						  (16 * tclk_psecs));
+		tp0.cn78xx.tcke =
+		    divide_roundup(max(3 * tclk_psecs, (ulong)DDR3_TCKE),
+				   tclk_psecs) - 1;
+		tp0.cn78xx.tmrd =
+		    divide_roundup((DDR4_TMRD * tclk_psecs), tclk_psecs) - 1;
+		tp0.cn78xx.tmod = 25;	/* 25 is the max allowed */
+		tp0.cn78xx.tdllk = divide_roundup(DDR4_TDLLK, 256);
+	} else {
+		tp0.cn78xx.tzqinit =
+		    divide_roundup(max(512ull * tclk_psecs, 640000ull),
+				   (256 * tclk_psecs));
+		tp0.cn78xx.tzqcs =
+		    divide_roundup(max(64ull * tclk_psecs, DDR3_ZQCS),
+				   (16 * tclk_psecs));
+		tp0.cn78xx.tcke = divide_roundup(DDR3_TCKE, tclk_psecs) - 1;
+		tp0.cn78xx.tmrd =
+		    divide_roundup((DDR3_TMRD * tclk_psecs), tclk_psecs) - 1;
+		tp0.cn78xx.tmod =
+		    divide_roundup(max(12ull * tclk_psecs, 15000ull),
+				   tclk_psecs) - 1;
+		tp0.cn78xx.tdllk = divide_roundup(DDR3_TDLLK, 256);
+	}
+
+	s = lookup_env_ull(priv, "ddr_timing_params0");
+	if (s)
+		tp0.u64 = simple_strtoull(s, NULL, 0);
+	debug("TIMING_PARAMS0                                : 0x%016llx\n",
+	      tp0.u64);
+	lmc_wr(priv, CVMX_LMCX_TIMING_PARAMS0(if_num), tp0.u64);
+}
+
+static void lmc_timing_params1(struct ddr_priv *priv)
+{
+	union cvmx_lmcx_timing_params1 tp1;
+	unsigned int txp, temp_trcd, trfc_dlr;
+	char *s;
+
+	tp1.u64 = lmc_rd(priv, CVMX_LMCX_TIMING_PARAMS1(if_num));
+
+	/* .cn70xx. */
+	tp1.s.tmprr = divide_roundup(DDR3_TMPRR * tclk_psecs, tclk_psecs) - 1;
+
+	tp1.cn78xx.tras = divide_roundup(tras, tclk_psecs) - 1;
+
+	temp_trcd = divide_roundup(trcd, tclk_psecs);
+	if (temp_trcd > 15) {
+		debug("TIMING_PARAMS1[trcd]: need extension bit for 0x%x\n",
+		      temp_trcd);
+	}
+	if (octeon_is_cpuid(OCTEON_CN78XX_PASS1_X) && temp_trcd > 15) {
+		/*
+		 * Let .trcd=0 serve as a flag that the field has
+		 * overflowed. Must use Additive Latency mode as a
+		 * workaround.
+		 */
+		temp_trcd = 0;
+	}
+	tp1.cn78xx.trcd = (temp_trcd >> 0) & 0xf;
+	tp1.cn78xx.trcd_ext = (temp_trcd >> 4) & 0x1;
+
+	tp1.cn78xx.twtr = divide_roundup(twtr, tclk_psecs) - 1;
+	tp1.cn78xx.trfc = divide_roundup(trfc, 8 * tclk_psecs);
+
+	if (ddr_type == DDR4_DRAM) {
+		/* Workaround bug 24006. Use Trrd_l. */
+		tp1.cn78xx.trrd =
+		    divide_roundup(ddr4_trrd_lmin, tclk_psecs) - 2;
+	} else {
+		tp1.cn78xx.trrd = divide_roundup(trrd, tclk_psecs) - 2;
+	}
+
+	/*
+	 * tXP = max( 3nCK, 7.5 ns)     DDR3-800   tCLK = 2500 psec
+	 * tXP = max( 3nCK, 7.5 ns)     DDR3-1066  tCLK = 1875 psec
+	 * tXP = max( 3nCK, 6.0 ns)     DDR3-1333  tCLK = 1500 psec
+	 * tXP = max( 3nCK, 6.0 ns)     DDR3-1600  tCLK = 1250 psec
+	 * tXP = max( 3nCK, 6.0 ns)     DDR3-1866  tCLK = 1071 psec
+	 * tXP = max( 3nCK, 6.0 ns)     DDR3-2133  tCLK =  937 psec
+	 */
+	txp = (tclk_psecs < 1875) ? 6000 : 7500;
+	txp = divide_roundup(max((unsigned int)(3 * tclk_psecs), txp),
+			     tclk_psecs) - 1;
+	if (txp > 7) {
+		debug("TIMING_PARAMS1[txp]: need extension bit for 0x%x\n",
+		      txp);
+	}
+	if (octeon_is_cpuid(OCTEON_CN78XX_PASS1_X) && txp > 7)
+		txp = 7;	// max it out
+	tp1.cn78xx.txp = (txp >> 0) & 7;
+	tp1.cn78xx.txp_ext = (txp >> 3) & 1;
+
+	tp1.cn78xx.twlmrd = divide_roundup(DDR3_TWLMRD * tclk_psecs,
+					   4 * tclk_psecs);
+	tp1.cn78xx.twldqsen = divide_roundup(DDR3_TWLDQSEN * tclk_psecs,
+					     4 * tclk_psecs);
+	tp1.cn78xx.tfaw = divide_roundup(tfaw, 4 * tclk_psecs);
+	tp1.cn78xx.txpdll = divide_roundup(max(10ull * tclk_psecs, 24000ull),
+					   tclk_psecs) - 1;
+
+	if (ddr_type == DDR4_DRAM && is_3ds_dimm) {
+		/*
+		 * 4 Gb: tRFC_DLR = 90 ns
+		 * 8 Gb: tRFC_DLR = 120 ns
+		 * 16 Gb: tRFC_DLR = 190 ns FIXME?
+		 */
+		if (die_capacity == 0x1000)	// 4 Gbit
+			trfc_dlr = 90;
+		else if (die_capacity == 0x2000)	// 8 Gbit
+			trfc_dlr = 120;
+		else if (die_capacity == 0x4000)	// 16 Gbit
+			trfc_dlr = 190;
+		else
+			trfc_dlr = 0;
+
+		if (trfc_dlr == 0) {
+			debug("N%d.LMC%d: ERROR: tRFC_DLR: die_capacity %u Mbit is illegal\n",
+			      node, if_num, die_capacity);
+		} else {
+			tp1.cn78xx.trfc_dlr =
+			    divide_roundup(trfc_dlr * 1000UL, 8 * tclk_psecs);
+			debug("N%d.LMC%d: TIMING_PARAMS1[trfc_dlr] set to %u\n",
+			      node, if_num, tp1.cn78xx.trfc_dlr);
+		}
+	}
+
+	s = lookup_env_ull(priv, "ddr_timing_params1");
+	if (s)
+		tp1.u64 = simple_strtoull(s, NULL, 0);
+
+	debug("TIMING_PARAMS1                                : 0x%016llx\n",
+	      tp1.u64);
+	lmc_wr(priv, CVMX_LMCX_TIMING_PARAMS1(if_num), tp1.u64);
+}
+
+static void lmc_timing_params2(struct ddr_priv *priv)
+{
+	if (ddr_type == DDR4_DRAM) {
+		union cvmx_lmcx_timing_params1 tp1;
+		union cvmx_lmcx_timing_params2 tp2;
+		int temp_trrd_l;
+
+		tp1.u64 = lmc_rd(priv, CVMX_LMCX_TIMING_PARAMS1(if_num));
+		tp2.u64 = lmc_rd(priv, CVMX_LMCX_TIMING_PARAMS2(if_num));
+		debug("TIMING_PARAMS2                                : 0x%016llx\n",
+		      tp2.u64);
+
+		temp_trrd_l = divide_roundup(ddr4_trrd_lmin, tclk_psecs) - 2;
+		if (temp_trrd_l > 7)
+			debug("TIMING_PARAMS2[trrd_l]: need extension bit for 0x%x\n",
+			      temp_trrd_l);
+		if (octeon_is_cpuid(OCTEON_CN78XX_PASS1_X) && temp_trrd_l > 7)
+			temp_trrd_l = 7;	// max it out
+		tp2.cn78xx.trrd_l = (temp_trrd_l >> 0) & 7;
+		tp2.cn78xx.trrd_l_ext = (temp_trrd_l >> 3) & 1;
+
+		// correct for 1600-2400
+		tp2.s.twtr_l = divide_nint(max(4ull * tclk_psecs, 7500ull),
+					   tclk_psecs) - 1;
+		tp2.s.t_rw_op_max = 7;
+		tp2.s.trtp = divide_roundup(max(4ull * tclk_psecs, 7500ull),
+					    tclk_psecs) - 1;
+
+		debug("TIMING_PARAMS2                                : 0x%016llx\n",
+		      tp2.u64);
+		lmc_wr(priv, CVMX_LMCX_TIMING_PARAMS2(if_num), tp2.u64);
+
+		/*
+		 * Workaround Errata 25823 - LMC: Possible DDR4 tWTR_L not met
+		 * for Write-to-Read operations to the same Bank Group
+		 */
+		if (tp1.cn78xx.twtr < (tp2.s.twtr_l - 4)) {
+			tp1.cn78xx.twtr = tp2.s.twtr_l - 4;
+			debug("ERRATA 25823: NEW: TWTR: %d, TWTR_L: %d\n",
+			      tp1.cn78xx.twtr, tp2.s.twtr_l);
+			debug("TIMING_PARAMS1                                : 0x%016llx\n",
+			      tp1.u64);
+			lmc_wr(priv, CVMX_LMCX_TIMING_PARAMS1(if_num), tp1.u64);
+		}
+	}
+}
+
+static void lmc_modereg_params0(struct ddr_priv *priv)
+{
+	union cvmx_lmcx_modereg_params0 mp0;
+	int param;
+	char *s;
+
+	mp0.u64 = lmc_rd(priv, CVMX_LMCX_MODEREG_PARAMS0(if_num));
+
+	if (ddr_type == DDR4_DRAM) {
+		mp0.s.cwl = 0;	/* 1600 (1250ps) */
+		if (tclk_psecs < 1250)
+			mp0.s.cwl = 1;	/* 1866 (1072ps) */
+		if (tclk_psecs < 1072)
+			mp0.s.cwl = 2;	/* 2133 (938ps) */
+		if (tclk_psecs < 938)
+			mp0.s.cwl = 3;	/* 2400 (833ps) */
+		if (tclk_psecs < 833)
+			mp0.s.cwl = 4;	/* 2666 (750ps) */
+		if (tclk_psecs < 750)
+			mp0.s.cwl = 5;	/* 3200 (625ps) */
+	} else {
+		/*
+		 ** CSR   CWL         CAS write Latency
+		 ** ===   ===   =================================
+		 **  0      5   (           tCK(avg) >=   2.5 ns)
+		 **  1      6   (2.5 ns   > tCK(avg) >= 1.875 ns)
+		 **  2      7   (1.875 ns > tCK(avg) >= 1.5   ns)
+		 **  3      8   (1.5 ns   > tCK(avg) >= 1.25  ns)
+		 **  4      9   (1.25 ns  > tCK(avg) >= 1.07  ns)
+		 **  5     10   (1.07 ns  > tCK(avg) >= 0.935 ns)
+		 **  6     11   (0.935 ns > tCK(avg) >= 0.833 ns)
+		 **  7     12   (0.833 ns > tCK(avg) >= 0.75  ns)
+		 */
+
+		mp0.s.cwl = 0;
+		if (tclk_psecs < 2500)
+			mp0.s.cwl = 1;
+		if (tclk_psecs < 1875)
+			mp0.s.cwl = 2;
+		if (tclk_psecs < 1500)
+			mp0.s.cwl = 3;
+		if (tclk_psecs < 1250)
+			mp0.s.cwl = 4;
+		if (tclk_psecs < 1070)
+			mp0.s.cwl = 5;
+		if (tclk_psecs < 935)
+			mp0.s.cwl = 6;
+		if (tclk_psecs < 833)
+			mp0.s.cwl = 7;
+	}
+
+	s = lookup_env(priv, "ddr_cwl");
+	if (s)
+		mp0.s.cwl = simple_strtoul(s, NULL, 0) - 5;
+
+	if (ddr_type == DDR4_DRAM) {
+		debug("%-45s : %d, [0x%x]\n", "CAS Write Latency CWL, [CSR]",
+		      mp0.s.cwl + 9
+		      + ((mp0.s.cwl > 2) ? (mp0.s.cwl - 3) * 2 : 0), mp0.s.cwl);
+	} else {
+		debug("%-45s : %d, [0x%x]\n", "CAS Write Latency CWL, [CSR]",
+		      mp0.s.cwl + 5, mp0.s.cwl);
+	}
+
+	mp0.s.mprloc = 0;
+	mp0.s.mpr = 0;
+	mp0.s.dll = (ddr_type == DDR4_DRAM);	/* 0 for DDR3 and 1 for DDR4 */
+	mp0.s.al = 0;
+	mp0.s.wlev = 0;		/* Read Only */
+	if (octeon_is_cpuid(OCTEON_CN70XX) || ddr_type == DDR4_DRAM)
+		mp0.s.tdqs = 0;
+	else
+		mp0.s.tdqs = 1;
+	mp0.s.qoff = 0;
+
+	s = lookup_env(priv, "ddr_cl");
+	if (s) {
+		cl = simple_strtoul(s, NULL, 0);
+		debug("CAS Latency                                   : %6d\n",
+		      cl);
+	}
+
+	if (ddr_type == DDR4_DRAM) {
+		mp0.s.cl = 0x0;
+		if (cl > 9)
+			mp0.s.cl = 0x1;
+		if (cl > 10)
+			mp0.s.cl = 0x2;
+		if (cl > 11)
+			mp0.s.cl = 0x3;
+		if (cl > 12)
+			mp0.s.cl = 0x4;
+		if (cl > 13)
+			mp0.s.cl = 0x5;
+		if (cl > 14)
+			mp0.s.cl = 0x6;
+		if (cl > 15)
+			mp0.s.cl = 0x7;
+		if (cl > 16)
+			mp0.s.cl = 0x8;
+		if (cl > 18)
+			mp0.s.cl = 0x9;
+		if (cl > 20)
+			mp0.s.cl = 0xA;
+		if (cl > 24)
+			mp0.s.cl = 0xB;
+	} else {
+		mp0.s.cl = 0x2;
+		if (cl > 5)
+			mp0.s.cl = 0x4;
+		if (cl > 6)
+			mp0.s.cl = 0x6;
+		if (cl > 7)
+			mp0.s.cl = 0x8;
+		if (cl > 8)
+			mp0.s.cl = 0xA;
+		if (cl > 9)
+			mp0.s.cl = 0xC;
+		if (cl > 10)
+			mp0.s.cl = 0xE;
+		if (cl > 11)
+			mp0.s.cl = 0x1;
+		if (cl > 12)
+			mp0.s.cl = 0x3;
+		if (cl > 13)
+			mp0.s.cl = 0x5;
+		if (cl > 14)
+			mp0.s.cl = 0x7;
+		if (cl > 15)
+			mp0.s.cl = 0x9;
+	}
+
+	mp0.s.rbt = 0;		/* Read Only. */
+	mp0.s.tm = 0;
+	mp0.s.dllr = 0;
+
+	param = divide_roundup(twr, tclk_psecs);
+
+	if (ddr_type == DDR4_DRAM) {	/* DDR4 */
+		mp0.s.wrp = 1;
+		if (param > 12)
+			mp0.s.wrp = 2;
+		if (param > 14)
+			mp0.s.wrp = 3;
+		if (param > 16)
+			mp0.s.wrp = 4;
+		if (param > 18)
+			mp0.s.wrp = 5;
+		if (param > 20)
+			mp0.s.wrp = 6;
+		if (param > 24)	/* RESERVED in DDR4 spec */
+			mp0.s.wrp = 7;
+	} else {		/* DDR3 */
+		mp0.s.wrp = 1;
+		if (param > 5)
+			mp0.s.wrp = 2;
+		if (param > 6)
+			mp0.s.wrp = 3;
+		if (param > 7)
+			mp0.s.wrp = 4;
+		if (param > 8)
+			mp0.s.wrp = 5;
+		if (param > 10)
+			mp0.s.wrp = 6;
+		if (param > 12)
+			mp0.s.wrp = 7;
+	}
+
+	mp0.s.ppd = 0;
+
+	s = lookup_env(priv, "ddr_wrp");
+	if (s)
+		mp0.s.wrp = simple_strtoul(s, NULL, 0);
+
+	debug("%-45s : %d, [0x%x]\n",
+	      "Write recovery for auto precharge WRP, [CSR]", param, mp0.s.wrp);
+
+	s = lookup_env_ull(priv, "ddr_modereg_params0");
+	if (s)
+		mp0.u64 = simple_strtoull(s, NULL, 0);
+
+	debug("MODEREG_PARAMS0                               : 0x%016llx\n",
+	      mp0.u64);
+	lmc_wr(priv, CVMX_LMCX_MODEREG_PARAMS0(if_num), mp0.u64);
+}
+
+static void lmc_modereg_params1(struct ddr_priv *priv)
+{
+	union cvmx_lmcx_modereg_params1 mp1;
+	char *s;
+	int i;
+
+	mp1.u64 = odt_config[odt_idx].modereg_params1.u64;
+
+	/*
+	 * Special request: mismatched DIMM support. Slot 0: 2-Rank,
+	 * Slot 1: 1-Rank
+	 */
+	if (rank_mask == 0x7) {	/* 2-Rank, 1-Rank */
+		mp1.s.rtt_nom_00 = 0;
+		mp1.s.rtt_nom_01 = 3;	/* rttnom_40ohm */
+		mp1.s.rtt_nom_10 = 3;	/* rttnom_40ohm */
+		mp1.s.rtt_nom_11 = 0;
+		dyn_rtt_nom_mask = 0x6;
+	}
+
+	s = lookup_env(priv, "ddr_rtt_nom_mask");
+	if (s)
+		dyn_rtt_nom_mask = simple_strtoul(s, NULL, 0);
+
+	/*
+	 * Save the original rtt_nom settings before sweeping through
+	 * settings.
+	 */
+	default_rtt_nom[0] = mp1.s.rtt_nom_00;
+	default_rtt_nom[1] = mp1.s.rtt_nom_01;
+	default_rtt_nom[2] = mp1.s.rtt_nom_10;
+	default_rtt_nom[3] = mp1.s.rtt_nom_11;
+
+	ddr_rtt_nom_auto = c_cfg->ddr_rtt_nom_auto;
+
+	for (i = 0; i < 4; ++i) {
+		u64 value;
+
+		s = lookup_env(priv, "ddr_rtt_nom_%1d%1d", !!(i & 2),
+			       !!(i & 1));
+		if (!s)
+			s = lookup_env(priv, "ddr%d_rtt_nom_%1d%1d", if_num,
+				       !!(i & 2), !!(i & 1));
+		if (s) {
+			value = simple_strtoul(s, NULL, 0);
+			mp1.u64 &= ~((u64)0x7 << (i * 12 + 9));
+			mp1.u64 |= ((value & 0x7) << (i * 12 + 9));
+			default_rtt_nom[i] = value;
+			ddr_rtt_nom_auto = 0;
+		}
+	}
+
+	s = lookup_env(priv, "ddr_rtt_nom");
+	if (!s)
+		s = lookup_env(priv, "ddr%d_rtt_nom", if_num);
+	if (s) {
+		u64 value;
+
+		value = simple_strtoul(s, NULL, 0);
+
+		if (dyn_rtt_nom_mask & 1) {
+			default_rtt_nom[0] = value;
+			mp1.s.rtt_nom_00 = value;
+		}
+		if (dyn_rtt_nom_mask & 2) {
+			default_rtt_nom[1] = value;
+			mp1.s.rtt_nom_01 = value;
+		}
+		if (dyn_rtt_nom_mask & 4) {
+			default_rtt_nom[2] = value;
+			mp1.s.rtt_nom_10 = value;
+		}
+		if (dyn_rtt_nom_mask & 8) {
+			default_rtt_nom[3] = value;
+			mp1.s.rtt_nom_11 = value;
+		}
+
+		ddr_rtt_nom_auto = 0;
+	}
+
+	for (i = 0; i < 4; ++i) {
+		u64 value;
+
+		s = lookup_env(priv, "ddr_rtt_wr_%1d%1d", !!(i & 2), !!(i & 1));
+		if (!s)
+			s = lookup_env(priv, "ddr%d_rtt_wr_%1d%1d", if_num,
+				       !!(i & 2), !!(i & 1));
+		if (s) {
+			value = simple_strtoul(s, NULL, 0);
+			insrt_wr(&mp1.u64, i, value);
+		}
+	}
+
+	// Make sure 78XX pass 1 has valid RTT_WR settings, because
+	// configuration files may be set-up for later chips, and
+	// 78XX pass 1 supports no RTT_WR extension bits
+	if (octeon_is_cpuid(OCTEON_CN78XX_PASS1_X)) {
+		for (i = 0; i < 4; ++i) {
+			// if 80 or undefined
+			if (extr_wr(mp1.u64, i) > 3) {
+				// FIXME? always insert 120
+				insrt_wr(&mp1.u64, i, 1);
+				debug("RTT_WR_%d%d set to 120 for CN78XX pass 1\n",
+				      !!(i & 2), i & 1);
+			}
+		}
+	}
+
+	s = lookup_env(priv, "ddr_dic");
+	if (s) {
+		u64 value = simple_strtoul(s, NULL, 0);
+
+		for (i = 0; i < 4; ++i) {
+			mp1.u64 &= ~((u64)0x3 << (i * 12 + 7));
+			mp1.u64 |= ((value & 0x3) << (i * 12 + 7));
+		}
+	}
+
+	for (i = 0; i < 4; ++i) {
+		u64 value;
+
+		s = lookup_env(priv, "ddr_dic_%1d%1d", !!(i & 2), !!(i & 1));
+		if (s) {
+			value = simple_strtoul(s, NULL, 0);
+			mp1.u64 &= ~((u64)0x3 << (i * 12 + 7));
+			mp1.u64 |= ((value & 0x3) << (i * 12 + 7));
+		}
+	}
+
+	s = lookup_env_ull(priv, "ddr_modereg_params1");
+	if (s)
+		mp1.u64 = simple_strtoull(s, NULL, 0);
+
+	debug("RTT_NOM     %3d, %3d, %3d, %3d ohms           :  %x,%x,%x,%x\n",
+	      imp_val->rtt_nom_ohms[mp1.s.rtt_nom_11],
+	      imp_val->rtt_nom_ohms[mp1.s.rtt_nom_10],
+	      imp_val->rtt_nom_ohms[mp1.s.rtt_nom_01],
+	      imp_val->rtt_nom_ohms[mp1.s.rtt_nom_00],
+	      mp1.s.rtt_nom_11,
+	      mp1.s.rtt_nom_10, mp1.s.rtt_nom_01, mp1.s.rtt_nom_00);
+
+	debug("RTT_WR      %3d, %3d, %3d, %3d ohms           :  %x,%x,%x,%x\n",
+	      imp_val->rtt_wr_ohms[extr_wr(mp1.u64, 3)],
+	      imp_val->rtt_wr_ohms[extr_wr(mp1.u64, 2)],
+	      imp_val->rtt_wr_ohms[extr_wr(mp1.u64, 1)],
+	      imp_val->rtt_wr_ohms[extr_wr(mp1.u64, 0)],
+	      extr_wr(mp1.u64, 3),
+	      extr_wr(mp1.u64, 2), extr_wr(mp1.u64, 1), extr_wr(mp1.u64, 0));
+
+	debug("DIC         %3d, %3d, %3d, %3d ohms           :  %x,%x,%x,%x\n",
+	      imp_val->dic_ohms[mp1.s.dic_11],
+	      imp_val->dic_ohms[mp1.s.dic_10],
+	      imp_val->dic_ohms[mp1.s.dic_01],
+	      imp_val->dic_ohms[mp1.s.dic_00],
+	      mp1.s.dic_11, mp1.s.dic_10, mp1.s.dic_01, mp1.s.dic_00);
+
+	debug("MODEREG_PARAMS1                               : 0x%016llx\n",
+	      mp1.u64);
+	lmc_wr(priv, CVMX_LMCX_MODEREG_PARAMS1(if_num), mp1.u64);
+}
+
+static void lmc_modereg_params2(struct ddr_priv *priv)
+{
+	char *s;
+	int i;
+
+	if (ddr_type == DDR4_DRAM) {
+		union cvmx_lmcx_modereg_params2 mp2;
+
+		mp2.u64 = odt_config[odt_idx].modereg_params2.u64;
+
+		s = lookup_env(priv, "ddr_rtt_park");
+		if (s) {
+			u64 value = simple_strtoul(s, NULL, 0);
+
+			for (i = 0; i < 4; ++i) {
+				mp2.u64 &= ~((u64)0x7 << (i * 10 + 0));
+				mp2.u64 |= ((value & 0x7) << (i * 10 + 0));
+			}
+		}
+
+		for (i = 0; i < 4; ++i) {
+			u64 value;
+
+			s = lookup_env(priv, "ddr_rtt_park_%1d%1d", !!(i & 2),
+				       !!(i & 1));
+			if (s) {
+				value = simple_strtoul(s, NULL, 0);
+				mp2.u64 &= ~((u64)0x7 << (i * 10 + 0));
+				mp2.u64 |= ((value & 0x7) << (i * 10 + 0));
+			}
+		}
+
+		s = lookup_env_ull(priv, "ddr_modereg_params2");
+		if (s)
+			mp2.u64 = simple_strtoull(s, NULL, 0);
+
+		debug("RTT_PARK    %3d, %3d, %3d, %3d ohms           :  %x,%x,%x,%x\n",
+		      imp_val->rtt_nom_ohms[mp2.s.rtt_park_11],
+		      imp_val->rtt_nom_ohms[mp2.s.rtt_park_10],
+		      imp_val->rtt_nom_ohms[mp2.s.rtt_park_01],
+		      imp_val->rtt_nom_ohms[mp2.s.rtt_park_00],
+		      mp2.s.rtt_park_11, mp2.s.rtt_park_10, mp2.s.rtt_park_01,
+		      mp2.s.rtt_park_00);
+
+		debug("%-45s :  0x%x,0x%x,0x%x,0x%x\n", "VREF_RANGE",
+		      mp2.s.vref_range_11,
+		      mp2.s.vref_range_10,
+		      mp2.s.vref_range_01, mp2.s.vref_range_00);
+
+		debug("%-45s :  0x%x,0x%x,0x%x,0x%x\n", "VREF_VALUE",
+		      mp2.s.vref_value_11,
+		      mp2.s.vref_value_10,
+		      mp2.s.vref_value_01, mp2.s.vref_value_00);
+
+		debug("MODEREG_PARAMS2                               : 0x%016llx\n",
+		      mp2.u64);
+		lmc_wr(priv, CVMX_LMCX_MODEREG_PARAMS2(if_num), mp2.u64);
+	}
+}
+
+static void lmc_modereg_params3(struct ddr_priv *priv)
+{
+	char *s;
+
+	if (ddr_type == DDR4_DRAM) {
+		union cvmx_lmcx_modereg_params3 mp3;
+
+		mp3.u64 = lmc_rd(priv, CVMX_LMCX_MODEREG_PARAMS3(if_num));
+		/* Disable as workaround to Errata 20547 */
+		mp3.s.rd_dbi = 0;
+		mp3.s.tccd_l = max(divide_roundup(ddr4_tccd_lmin, tclk_psecs),
+				   5ull) - 4;
+
+		s = lookup_env(priv, "ddr_rd_preamble");
+		if (s)
+			mp3.s.rd_preamble = !!simple_strtoul(s, NULL, 0);
+
+		if (!octeon_is_cpuid(OCTEON_CN78XX_PASS1_X)) {
+			int delay = 0;
+
+			if (lranks_per_prank == 4 && ddr_hertz >= 1000000000)
+				delay = 1;
+
+			mp3.s.xrank_add_tccd_l = delay;
+			mp3.s.xrank_add_tccd_s = delay;
+		}
+
+		lmc_wr(priv, CVMX_LMCX_MODEREG_PARAMS3(if_num), mp3.u64);
+		debug("MODEREG_PARAMS3                               : 0x%016llx\n",
+		      mp3.u64);
+	}
+}
+
+static void lmc_nxm(struct ddr_priv *priv)
+{
+	union cvmx_lmcx_nxm lmc_nxm;
+	int num_bits = row_lsb + row_bits + lranks_bits - 26;
+	char *s;
+
+	lmc_nxm.u64 = lmc_rd(priv, CVMX_LMCX_NXM(if_num));
+
+	/* .cn78xx. */
+	if (rank_mask & 0x1)
+		lmc_nxm.cn78xx.mem_msb_d0_r0 = num_bits;
+	if (rank_mask & 0x2)
+		lmc_nxm.cn78xx.mem_msb_d0_r1 = num_bits;
+	if (rank_mask & 0x4)
+		lmc_nxm.cn78xx.mem_msb_d1_r0 = num_bits;
+	if (rank_mask & 0x8)
+		lmc_nxm.cn78xx.mem_msb_d1_r1 = num_bits;
+
+	/* Set the mask for non-existent ranks. */
+	lmc_nxm.cn78xx.cs_mask = ~rank_mask & 0xff;
+
+	s = lookup_env_ull(priv, "ddr_nxm");
+	if (s)
+		lmc_nxm.u64 = simple_strtoull(s, NULL, 0);
+
+	debug("LMC_NXM                                       : 0x%016llx\n",
+	      lmc_nxm.u64);
+	lmc_wr(priv, CVMX_LMCX_NXM(if_num), lmc_nxm.u64);
+}
+
+static void lmc_wodt_mask(struct ddr_priv *priv)
+{
+	union cvmx_lmcx_wodt_mask wodt_mask;
+	char *s;
+
+	wodt_mask.u64 = odt_config[odt_idx].odt_mask;
+
+	s = lookup_env_ull(priv, "ddr_wodt_mask");
+	if (s)
+		wodt_mask.u64 = simple_strtoull(s, NULL, 0);
+
+	debug("WODT_MASK                                     : 0x%016llx\n",
+	      wodt_mask.u64);
+	lmc_wr(priv, CVMX_LMCX_WODT_MASK(if_num), wodt_mask.u64);
+}
+
+static void lmc_rodt_mask(struct ddr_priv *priv)
+{
+	union cvmx_lmcx_rodt_mask rodt_mask;
+	int rankx;
+	char *s;
+
+	rodt_mask.u64 = odt_config[odt_idx].rodt_ctl;
+
+	s = lookup_env_ull(priv, "ddr_rodt_mask");
+	if (s)
+		rodt_mask.u64 = simple_strtoull(s, NULL, 0);
+
+	debug("%-45s : 0x%016llx\n", "RODT_MASK", rodt_mask.u64);
+	lmc_wr(priv, CVMX_LMCX_RODT_MASK(if_num), rodt_mask.u64);
+
+	dyn_rtt_nom_mask = 0;
+	for (rankx = 0; rankx < dimm_count * 4; rankx++) {
+		if (!(rank_mask & (1 << rankx)))
+			continue;
+		dyn_rtt_nom_mask |= ((rodt_mask.u64 >> (8 * rankx)) & 0xff);
+	}
+	if (num_ranks == 4) {
+		/*
+		 * Normally ODT1 is wired to rank 1. For quad-ranked DIMMs
+		 * ODT1 is wired to the third rank (rank 2).  The mask,
+		 * dyn_rtt_nom_mask, is used to indicate for which ranks
+		 * to sweep RTT_NOM during read-leveling. Shift the bit
+		 * from the ODT1 position over to the "ODT2" position so
+		 * that the read-leveling analysis comes out right.
+		 */
+		int odt1_bit = dyn_rtt_nom_mask & 2;
+
+		dyn_rtt_nom_mask &= ~2;
+		dyn_rtt_nom_mask |= odt1_bit << 1;
+	}
+	debug("%-45s : 0x%02x\n", "DYN_RTT_NOM_MASK", dyn_rtt_nom_mask);
+}
+
+static void lmc_comp_ctl2(struct ddr_priv *priv)
+{
+	union cvmx_lmcx_comp_ctl2 cc2;
+	char *s;
+
+	cc2.u64 = lmc_rd(priv, CVMX_LMCX_COMP_CTL2(if_num));
+
+	cc2.cn78xx.dqx_ctl = odt_config[odt_idx].odt_ena;
+	/* Default 4=34.3 ohm */
+	cc2.cn78xx.ck_ctl = (c_cfg->ck_ctl == 0) ? 4 : c_cfg->ck_ctl;
+	/* Default 4=34.3 ohm */
+	cc2.cn78xx.cmd_ctl = (c_cfg->cmd_ctl == 0) ? 4 : c_cfg->cmd_ctl;
+	/* Default 4=34.3 ohm */
+	cc2.cn78xx.control_ctl = (c_cfg->ctl_ctl == 0) ? 4 : c_cfg->ctl_ctl;
+
+	ddr_rodt_ctl_auto = c_cfg->ddr_rodt_ctl_auto;
+	s = lookup_env(priv, "ddr_rodt_ctl_auto");
+	if (s)
+		ddr_rodt_ctl_auto = !!simple_strtoul(s, NULL, 0);
+
+	default_rodt_ctl = odt_config[odt_idx].qs_dic;
+	s = lookup_env(priv, "ddr_rodt_ctl");
+	if (!s)
+		s = lookup_env(priv, "ddr%d_rodt_ctl", if_num);
+	if (s) {
+		default_rodt_ctl = simple_strtoul(s, NULL, 0);
+		ddr_rodt_ctl_auto = 0;
+	}
+
+	cc2.cn70xx.rodt_ctl = default_rodt_ctl;
+
+	// if DDR4, force CK_CTL to 26 ohms if it is currently 34 ohms,
+	// and DCLK speed is 1 GHz or more...
+	if (ddr_type == DDR4_DRAM && cc2.s.ck_ctl == ddr4_driver_34_ohm &&
+	    ddr_hertz >= 1000000000) {
+		// lowest for DDR4 is 26 ohms
+		cc2.s.ck_ctl = ddr4_driver_26_ohm;
+		debug("N%d.LMC%d: Forcing DDR4 COMP_CTL2[CK_CTL] to %d, %d ohms\n",
+		      node, if_num, cc2.s.ck_ctl,
+		      imp_val->drive_strength[cc2.s.ck_ctl]);
+	}
+
+	// if DDR4, 2DPC, UDIMM, force CONTROL_CTL and CMD_CTL to 26 ohms,
+	// if DCLK speed is 1 GHz or more...
+	if (ddr_type == DDR4_DRAM && dimm_count == 2 &&
+	    (spd_dimm_type == 2 || spd_dimm_type == 6) &&
+	    ddr_hertz >= 1000000000) {
+		// lowest for DDR4 is 26 ohms
+		cc2.cn78xx.control_ctl = ddr4_driver_26_ohm;
+		// lowest for DDR4 is 26 ohms
+		cc2.cn78xx.cmd_ctl = ddr4_driver_26_ohm;
+		debug("N%d.LMC%d: Forcing DDR4 COMP_CTL2[CONTROL_CTL,CMD_CTL] to %d, %d ohms\n",
+		      node, if_num, ddr4_driver_26_ohm,
+		      imp_val->drive_strength[ddr4_driver_26_ohm]);
+	}
+
+	s = lookup_env(priv, "ddr_ck_ctl");
+	if (s)
+		cc2.cn78xx.ck_ctl = simple_strtoul(s, NULL, 0);
+
+	s = lookup_env(priv, "ddr_cmd_ctl");
+	if (s)
+		cc2.cn78xx.cmd_ctl = simple_strtoul(s, NULL, 0);
+
+	s = lookup_env(priv, "ddr_control_ctl");
+	if (s)
+		cc2.cn70xx.control_ctl = simple_strtoul(s, NULL, 0);
+
+	s = lookup_env(priv, "ddr_dqx_ctl");
+	if (s)
+		cc2.cn78xx.dqx_ctl = simple_strtoul(s, NULL, 0);
+
+	debug("%-45s : %d, %d ohms\n", "DQX_CTL           ", cc2.cn78xx.dqx_ctl,
+	      imp_val->drive_strength[cc2.cn78xx.dqx_ctl]);
+	debug("%-45s : %d, %d ohms\n", "CK_CTL            ", cc2.cn78xx.ck_ctl,
+	      imp_val->drive_strength[cc2.cn78xx.ck_ctl]);
+	debug("%-45s : %d, %d ohms\n", "CMD_CTL           ", cc2.cn78xx.cmd_ctl,
+	      imp_val->drive_strength[cc2.cn78xx.cmd_ctl]);
+	debug("%-45s : %d, %d ohms\n", "CONTROL_CTL       ",
+	      cc2.cn78xx.control_ctl,
+	      imp_val->drive_strength[cc2.cn78xx.control_ctl]);
+	debug("Read ODT_CTL                                  : 0x%x (%d ohms)\n",
+	      cc2.cn78xx.rodt_ctl, imp_val->rodt_ohms[cc2.cn78xx.rodt_ctl]);
+
+	debug("%-45s : 0x%016llx\n", "COMP_CTL2", cc2.u64);
+	lmc_wr(priv, CVMX_LMCX_COMP_CTL2(if_num), cc2.u64);
+}
+
+static void lmc_phy_ctl(struct ddr_priv *priv)
+{
+	union cvmx_lmcx_phy_ctl phy_ctl;
+
+	phy_ctl.u64 = lmc_rd(priv, CVMX_LMCX_PHY_CTL(if_num));
+	phy_ctl.s.ts_stagger = 0;
+	// FIXME: are there others TBD?
+	phy_ctl.s.dsk_dbg_overwrt_ena = 0;
+
+	if (!octeon_is_cpuid(OCTEON_CN78XX_PASS1_X) && lranks_per_prank > 1) {
+		// C0 is TEN, C1 is A17
+		phy_ctl.s.c0_sel = 2;
+		phy_ctl.s.c1_sel = 2;
+		debug("N%d.LMC%d: 3DS: setting PHY_CTL[cx_csel] = %d\n",
+		      node, if_num, phy_ctl.s.c1_sel);
+	}
+
+	debug("PHY_CTL                                       : 0x%016llx\n",
+	      phy_ctl.u64);
+	lmc_wr(priv, CVMX_LMCX_PHY_CTL(if_num), phy_ctl.u64);
+}
+
+static void lmc_ext_config(struct ddr_priv *priv)
+{
+	union cvmx_lmcx_ext_config ext_cfg;
+	char *s;
+
+	ext_cfg.u64 = lmc_rd(priv, CVMX_LMCX_EXT_CONFIG(if_num));
+	ext_cfg.s.vrefint_seq_deskew = 0;
+	ext_cfg.s.read_ena_bprch = 1;
+	ext_cfg.s.read_ena_fprch = 1;
+	ext_cfg.s.drive_ena_fprch = 1;
+	ext_cfg.s.drive_ena_bprch = 1;
+	// make sure this is OFF for all current chips
+	ext_cfg.s.invert_data = 0;
+
+	s = lookup_env(priv, "ddr_read_fprch");
+	if (s)
+		ext_cfg.s.read_ena_fprch = strtoul(s, NULL, 0);
+
+	s = lookup_env(priv, "ddr_read_bprch");
+	if (s)
+		ext_cfg.s.read_ena_bprch = strtoul(s, NULL, 0);
+
+	s = lookup_env(priv, "ddr_drive_fprch");
+	if (s)
+		ext_cfg.s.drive_ena_fprch = strtoul(s, NULL, 0);
+
+	s = lookup_env(priv, "ddr_drive_bprch");
+	if (s)
+		ext_cfg.s.drive_ena_bprch = strtoul(s, NULL, 0);
+
+	if (!octeon_is_cpuid(OCTEON_CN78XX_PASS1_X) && lranks_per_prank > 1) {
+		ext_cfg.s.dimm0_cid = lranks_bits;
+		ext_cfg.s.dimm1_cid = lranks_bits;
+		debug("N%d.LMC%d: 3DS: setting EXT_CONFIG[dimmx_cid] = %d\n",
+		      node, if_num, ext_cfg.s.dimm0_cid);
+	}
+
+	lmc_wr(priv, CVMX_LMCX_EXT_CONFIG(if_num), ext_cfg.u64);
+	debug("%-45s : 0x%016llx\n", "EXT_CONFIG", ext_cfg.u64);
+}
+
+static void lmc_ext_config2(struct ddr_priv *priv)
+{
+	char *s;
+
+	// NOTE: all chips have this register, but not necessarily the
+	// fields we modify...
+	if (!octeon_is_cpuid(OCTEON_CN78XX_PASS1_X) &&
+	    !octeon_is_cpuid(OCTEON_CN73XX)) {
+		union cvmx_lmcx_ext_config2 ext_cfg2;
+		int value = 1;	// default to 1
+
+		ext_cfg2.u64 = lmc_rd(priv, CVMX_LMCX_EXT_CONFIG2(if_num));
+
+		s = lookup_env(priv, "ddr_ext2_delay_unload");
+		if (s)
+			value = !!simple_strtoul(s, NULL, 0);
+
+		ext_cfg2.s.delay_unload_r0 = value;
+		ext_cfg2.s.delay_unload_r1 = value;
+		ext_cfg2.s.delay_unload_r2 = value;
+		ext_cfg2.s.delay_unload_r3 = value;
+
+		lmc_wr(priv, CVMX_LMCX_EXT_CONFIG2(if_num), ext_cfg2.u64);
+		debug("%-45s : 0x%016llx\n", "EXT_CONFIG2", ext_cfg2.u64);
+	}
+}
+
+static void lmc_dimm01_params_loop(struct ddr_priv *priv)
+{
+	union cvmx_lmcx_dimmx_params dimm_p;
+	int dimmx = didx;
+	char *s;
+	int rc;
+	int i;
+
+	dimm_p.u64 = lmc_rd(priv, CVMX_LMCX_DIMMX_PARAMS(dimmx, if_num));
+
+	if (ddr_type == DDR4_DRAM) {
+		union cvmx_lmcx_dimmx_ddr4_params0 ddr4_p0;
+		union cvmx_lmcx_dimmx_ddr4_params1 ddr4_p1;
+		union cvmx_lmcx_ddr4_dimm_ctl ddr4_ctl;
+
+		dimm_p.s.rc0 = 0;
+		dimm_p.s.rc1 = 0;
+		dimm_p.s.rc2 = 0;
+
+		rc = read_spd(&dimm_config_table[didx], 0,
+			      DDR4_SPD_RDIMM_REGISTER_DRIVE_STRENGTH_CTL);
+		dimm_p.s.rc3 = (rc >> 4) & 0xf;
+		dimm_p.s.rc4 = ((rc >> 0) & 0x3) << 2;
+		dimm_p.s.rc4 |= ((rc >> 2) & 0x3) << 0;
+
+		rc = read_spd(&dimm_config_table[didx], 0,
+			      DDR4_SPD_RDIMM_REGISTER_DRIVE_STRENGTH_CK);
+		dimm_p.s.rc5 = ((rc >> 0) & 0x3) << 2;
+		dimm_p.s.rc5 |= ((rc >> 2) & 0x3) << 0;
+
+		dimm_p.s.rc6 = 0;
+		dimm_p.s.rc7 = 0;
+		dimm_p.s.rc8 = 0;
+		dimm_p.s.rc9 = 0;
+
+		/*
+		 * rc10               DDR4 RDIMM Operating Speed
+		 * ===  ===================================================
+		 *  0               tclk_psecs >= 1250 psec DDR4-1600 (1250 ps)
+		 *  1   1250 psec > tclk_psecs >= 1071 psec DDR4-1866 (1071 ps)
+		 *  2   1071 psec > tclk_psecs >=  938 psec DDR4-2133 ( 938 ps)
+		 *  3    938 psec > tclk_psecs >=  833 psec DDR4-2400 ( 833 ps)
+		 *  4    833 psec > tclk_psecs >=  750 psec DDR4-2666 ( 750 ps)
+		 *  5    750 psec > tclk_psecs >=  625 psec DDR4-3200 ( 625 ps)
+		 */
+		dimm_p.s.rc10 = 0;
+		if (tclk_psecs < 1250)
+			dimm_p.s.rc10 = 1;
+		if (tclk_psecs < 1071)
+			dimm_p.s.rc10 = 2;
+		if (tclk_psecs < 938)
+			dimm_p.s.rc10 = 3;
+		if (tclk_psecs < 833)
+			dimm_p.s.rc10 = 4;
+		if (tclk_psecs < 750)
+			dimm_p.s.rc10 = 5;
+
+		dimm_p.s.rc11 = 0;
+		dimm_p.s.rc12 = 0;
+		/* 0=LRDIMM, 1=RDIMM */
+		dimm_p.s.rc13 = (spd_dimm_type == 4) ? 0 : 4;
+		dimm_p.s.rc13 |= (ddr_type == DDR4_DRAM) ?
+			(spd_addr_mirror << 3) : 0;
+		dimm_p.s.rc14 = 0;
+		dimm_p.s.rc15 = 0;	/* 1 nCK latency adder */
+
+		ddr4_p0.u64 = 0;
+
+		ddr4_p0.s.rc8x = 0;
+		ddr4_p0.s.rc7x = 0;
+		ddr4_p0.s.rc6x = 0;
+		ddr4_p0.s.rc5x = 0;
+		ddr4_p0.s.rc4x = 0;
+
+		ddr4_p0.s.rc3x = compute_rc3x(tclk_psecs);
+
+		ddr4_p0.s.rc2x = 0;
+		ddr4_p0.s.rc1x = 0;
+
+		ddr4_p1.u64 = 0;
+
+		ddr4_p1.s.rcbx = 0;
+		ddr4_p1.s.rcax = 0;
+		ddr4_p1.s.rc9x = 0;
+
+		ddr4_ctl.u64 = 0;
+		ddr4_ctl.cn70xx.ddr4_dimm0_wmask = 0x004;
+		ddr4_ctl.cn70xx.ddr4_dimm1_wmask =
+		    (dimm_count > 1) ? 0x004 : 0x0000;
+
+		/*
+		 * Handle any overrides from envvars here...
+		 */
+		s = lookup_env(priv, "ddr_ddr4_params0");
+		if (s)
+			ddr4_p0.u64 = simple_strtoul(s, NULL, 0);
+
+		s = lookup_env(priv, "ddr_ddr4_params1");
+		if (s)
+			ddr4_p1.u64 = simple_strtoul(s, NULL, 0);
+
+		s = lookup_env(priv, "ddr_ddr4_dimm_ctl");
+		if (s)
+			ddr4_ctl.u64 = simple_strtoul(s, NULL, 0);
+
+		for (i = 0; i < 11; ++i) {
+			u64 value;
+
+			s = lookup_env(priv, "ddr_ddr4_rc%1xx", i + 1);
+			if (s) {
+				value = simple_strtoul(s, NULL, 0);
+				if (i < 8) {
+					ddr4_p0.u64 &= ~((u64)0xff << (i * 8));
+					ddr4_p0.u64 |= (value << (i * 8));
+				} else {
+					ddr4_p1.u64 &=
+					    ~((u64)0xff << ((i - 8) * 8));
+					ddr4_p1.u64 |= (value << ((i - 8) * 8));
+				}
+			}
+		}
+
+		/*
+		 * write the final CSR values
+		 */
+		lmc_wr(priv, CVMX_LMCX_DIMMX_DDR4_PARAMS0(dimmx, if_num),
+		       ddr4_p0.u64);
+
+		lmc_wr(priv, CVMX_LMCX_DDR4_DIMM_CTL(if_num), ddr4_ctl.u64);
+
+		lmc_wr(priv, CVMX_LMCX_DIMMX_DDR4_PARAMS1(dimmx, if_num),
+		       ddr4_p1.u64);
+
+		debug("DIMM%d Register Control Words        RCBx:RC1x : %x %x %x %x %x %x %x %x %x %x %x\n",
+		      dimmx, ddr4_p1.s.rcbx, ddr4_p1.s.rcax,
+		      ddr4_p1.s.rc9x, ddr4_p0.s.rc8x,
+		      ddr4_p0.s.rc7x, ddr4_p0.s.rc6x,
+		      ddr4_p0.s.rc5x, ddr4_p0.s.rc4x,
+		      ddr4_p0.s.rc3x, ddr4_p0.s.rc2x, ddr4_p0.s.rc1x);
+
+	} else {
+		rc = read_spd(&dimm_config_table[didx], 0, 69);
+		dimm_p.s.rc0 = (rc >> 0) & 0xf;
+		dimm_p.s.rc1 = (rc >> 4) & 0xf;
+
+		rc = read_spd(&dimm_config_table[didx], 0, 70);
+		dimm_p.s.rc2 = (rc >> 0) & 0xf;
+		dimm_p.s.rc3 = (rc >> 4) & 0xf;
+
+		rc = read_spd(&dimm_config_table[didx], 0, 71);
+		dimm_p.s.rc4 = (rc >> 0) & 0xf;
+		dimm_p.s.rc5 = (rc >> 4) & 0xf;
+
+		rc = read_spd(&dimm_config_table[didx], 0, 72);
+		dimm_p.s.rc6 = (rc >> 0) & 0xf;
+		dimm_p.s.rc7 = (rc >> 4) & 0xf;
+
+		rc = read_spd(&dimm_config_table[didx], 0, 73);
+		dimm_p.s.rc8 = (rc >> 0) & 0xf;
+		dimm_p.s.rc9 = (rc >> 4) & 0xf;
+
+		rc = read_spd(&dimm_config_table[didx], 0, 74);
+		dimm_p.s.rc10 = (rc >> 0) & 0xf;
+		dimm_p.s.rc11 = (rc >> 4) & 0xf;
+
+		rc = read_spd(&dimm_config_table[didx], 0, 75);
+		dimm_p.s.rc12 = (rc >> 0) & 0xf;
+		dimm_p.s.rc13 = (rc >> 4) & 0xf;
+
+		rc = read_spd(&dimm_config_table[didx], 0, 76);
+		dimm_p.s.rc14 = (rc >> 0) & 0xf;
+		dimm_p.s.rc15 = (rc >> 4) & 0xf;
+
+		s = ddr_getenv_debug(priv, "ddr_clk_drive");
+		if (s) {
+			if (strcmp(s, "light") == 0)
+				dimm_p.s.rc5 = 0x0;	/* Light Drive */
+			if (strcmp(s, "moderate") == 0)
+				dimm_p.s.rc5 = 0x5;	/* Moderate Drive */
+			if (strcmp(s, "strong") == 0)
+				dimm_p.s.rc5 = 0xA;	/* Strong Drive */
+			printf("Parameter found in environment. ddr_clk_drive = %s\n",
+			       s);
+		}
+
+		s = ddr_getenv_debug(priv, "ddr_cmd_drive");
+		if (s) {
+			if (strcmp(s, "light") == 0)
+				dimm_p.s.rc3 = 0x0;	/* Light Drive */
+			if (strcmp(s, "moderate") == 0)
+				dimm_p.s.rc3 = 0x5;	/* Moderate Drive */
+			if (strcmp(s, "strong") == 0)
+				dimm_p.s.rc3 = 0xA;	/* Strong Drive */
+			printf("Parameter found in environment. ddr_cmd_drive = %s\n",
+			       s);
+		}
+
+		s = ddr_getenv_debug(priv, "ddr_ctl_drive");
+		if (s) {
+			if (strcmp(s, "light") == 0)
+				dimm_p.s.rc4 = 0x0;	/* Light Drive */
+			if (strcmp(s, "moderate") == 0)
+				dimm_p.s.rc4 = 0x5;	/* Moderate Drive */
+			printf("Parameter found in environment. ddr_ctl_drive = %s\n",
+			       s);
+		}
+
+		/*
+		 * rc10               DDR3 RDIMM Operating Speed
+		 * ==   =====================================================
+		 *  0               tclk_psecs >= 2500 psec DDR3/DDR3L-800 def
+		 *  1   2500 psec > tclk_psecs >= 1875 psec DDR3/DDR3L-1066
+		 *  2   1875 psec > tclk_psecs >= 1500 psec DDR3/DDR3L-1333
+		 *  3   1500 psec > tclk_psecs >= 1250 psec DDR3/DDR3L-1600
+		 *  4   1250 psec > tclk_psecs >= 1071 psec DDR3-1866
+		 */
+		dimm_p.s.rc10 = 0;
+		if (tclk_psecs < 2500)
+			dimm_p.s.rc10 = 1;
+		if (tclk_psecs < 1875)
+			dimm_p.s.rc10 = 2;
+		if (tclk_psecs < 1500)
+			dimm_p.s.rc10 = 3;
+		if (tclk_psecs < 1250)
+			dimm_p.s.rc10 = 4;
+	}
+
+	s = lookup_env(priv, "ddr_dimmx_params", i);
+	if (s)
+		dimm_p.u64 = simple_strtoul(s, NULL, 0);
+
+	for (i = 0; i < 16; ++i) {
+		u64 value;
+
+		s = lookup_env(priv, "ddr_rc%d", i);
+		if (s) {
+			value = simple_strtoul(s, NULL, 0);
+			dimm_p.u64 &= ~((u64)0xf << (i * 4));
+			dimm_p.u64 |= (value << (i * 4));
+		}
+	}
+
+	lmc_wr(priv, CVMX_LMCX_DIMMX_PARAMS(dimmx, if_num), dimm_p.u64);
+
+	debug("DIMM%d Register Control Words         RC15:RC0 : %x %x %x %x %x %x %x %x %x %x %x %x %x %x %x %x\n",
+	      dimmx, dimm_p.s.rc15, dimm_p.s.rc14, dimm_p.s.rc13,
+	      dimm_p.s.rc12, dimm_p.s.rc11, dimm_p.s.rc10,
+	      dimm_p.s.rc9, dimm_p.s.rc8, dimm_p.s.rc7,
+	      dimm_p.s.rc6, dimm_p.s.rc5, dimm_p.s.rc4,
+	      dimm_p.s.rc3, dimm_p.s.rc2, dimm_p.s.rc1, dimm_p.s.rc0);
+
+	// FIXME: recognize a DDR3 RDIMM with 4 ranks and 2 registers,
+	// and treat it specially
+	if (ddr_type == DDR3_DRAM && num_ranks == 4 &&
+	    spd_rdimm_registers == 2 && dimmx == 0) {
+		debug("DDR3: Copying DIMM0_PARAMS to DIMM1_PARAMS for pseudo-DIMM #1...\n");
+		lmc_wr(priv, CVMX_LMCX_DIMMX_PARAMS(1, if_num), dimm_p.u64);
+	}
+}
+
+static void lmc_dimm01_params(struct ddr_priv *priv)
+{
+	union cvmx_lmcx_dimm_ctl dimm_ctl;
+	char *s;
+
+	if (spd_rdimm) {
+		for (didx = 0; didx < (unsigned int)dimm_count; ++didx)
+			lmc_dimm01_params_loop(priv);
+
+		if (ddr_type == DDR4_DRAM) {
+			/* LMC0_DIMM_CTL */
+			dimm_ctl.u64 = lmc_rd(priv, CVMX_LMCX_DIMM_CTL(if_num));
+			dimm_ctl.s.dimm0_wmask = 0xdf3f;
+			dimm_ctl.s.dimm1_wmask =
+			    (dimm_count > 1) ? 0xdf3f : 0x0000;
+			dimm_ctl.s.tcws = 0x4e0;
+			dimm_ctl.s.parity = c_cfg->parity;
+
+			s = lookup_env(priv, "ddr_dimm0_wmask");
+			if (s) {
+				dimm_ctl.s.dimm0_wmask =
+				    simple_strtoul(s, NULL, 0);
+			}
+
+			s = lookup_env(priv, "ddr_dimm1_wmask");
+			if (s) {
+				dimm_ctl.s.dimm1_wmask =
+				    simple_strtoul(s, NULL, 0);
+			}
+
+			s = lookup_env(priv, "ddr_dimm_ctl_parity");
+			if (s)
+				dimm_ctl.s.parity = simple_strtoul(s, NULL, 0);
+
+			s = lookup_env(priv, "ddr_dimm_ctl_tcws");
+			if (s)
+				dimm_ctl.s.tcws = simple_strtoul(s, NULL, 0);
+
+			debug("LMC DIMM_CTL                                  : 0x%016llx\n",
+			      dimm_ctl.u64);
+			lmc_wr(priv, CVMX_LMCX_DIMM_CTL(if_num), dimm_ctl.u64);
+
+			/* Init RCW */
+			oct3_ddr3_seq(priv, rank_mask, if_num, 0x7);
+
+			/* Write RC0D last */
+			dimm_ctl.s.dimm0_wmask = 0x2000;
+			dimm_ctl.s.dimm1_wmask = (dimm_count > 1) ?
+				0x2000 : 0x0000;
+			debug("LMC DIMM_CTL                                  : 0x%016llx\n",
+			      dimm_ctl.u64);
+			lmc_wr(priv, CVMX_LMCX_DIMM_CTL(if_num), dimm_ctl.u64);
+
+			/*
+			 * Don't write any extended registers the second time
+			 */
+			lmc_wr(priv, CVMX_LMCX_DDR4_DIMM_CTL(if_num), 0);
+
+			/* Init RCW */
+			oct3_ddr3_seq(priv, rank_mask, if_num, 0x7);
+		} else {
+			/* LMC0_DIMM_CTL */
+			dimm_ctl.u64 = lmc_rd(priv, CVMX_LMCX_DIMM_CTL(if_num));
+			dimm_ctl.s.dimm0_wmask = 0xffff;
+			// FIXME: recognize a DDR3 RDIMM with 4 ranks and 2
+			// registers, and treat it specially
+			if (num_ranks == 4 && spd_rdimm_registers == 2) {
+				debug("DDR3: Activating DIMM_CTL[dimm1_mask] bits...\n");
+				dimm_ctl.s.dimm1_wmask = 0xffff;
+			} else {
+				dimm_ctl.s.dimm1_wmask =
+				    (dimm_count > 1) ? 0xffff : 0x0000;
+			}
+			dimm_ctl.s.tcws = 0x4e0;
+			dimm_ctl.s.parity = c_cfg->parity;
+
+			s = lookup_env(priv, "ddr_dimm0_wmask");
+			if (s) {
+				dimm_ctl.s.dimm0_wmask =
+				    simple_strtoul(s, NULL, 0);
+			}
+
+			s = lookup_env(priv, "ddr_dimm1_wmask");
+			if (s) {
+				dimm_ctl.s.dimm1_wmask =
+				    simple_strtoul(s, NULL, 0);
+			}
+
+			s = lookup_env(priv, "ddr_dimm_ctl_parity");
+			if (s)
+				dimm_ctl.s.parity = simple_strtoul(s, NULL, 0);
+
+			s = lookup_env(priv, "ddr_dimm_ctl_tcws");
+			if (s)
+				dimm_ctl.s.tcws = simple_strtoul(s, NULL, 0);
+
+			debug("LMC DIMM_CTL                                  : 0x%016llx\n",
+			      dimm_ctl.u64);
+			lmc_wr(priv, CVMX_LMCX_DIMM_CTL(if_num), dimm_ctl.u64);
+
+			/* Init RCW */
+			oct3_ddr3_seq(priv, rank_mask, if_num, 0x7);
+		}
+
+	} else {
+		/* Disable register control writes for unbuffered */
+		union cvmx_lmcx_dimm_ctl dimm_ctl;
+
+		dimm_ctl.u64 = lmc_rd(priv, CVMX_LMCX_DIMM_CTL(if_num));
+		dimm_ctl.s.dimm0_wmask = 0;
+		dimm_ctl.s.dimm1_wmask = 0;
+		lmc_wr(priv, CVMX_LMCX_DIMM_CTL(if_num), dimm_ctl.u64);
+	}
+}
+
+static int lmc_rank_init(struct ddr_priv *priv)
+{
+	char *s;
+
+	if (enable_by_rank_init) {
+		by_rank = 3;
+		saved_rank_mask = rank_mask;
+	}
+
+start_by_rank_init:
+
+	if (enable_by_rank_init) {
+		rank_mask = (1 << by_rank);
+		if (!(rank_mask & saved_rank_mask))
+			goto end_by_rank_init;
+		if (by_rank == 0)
+			rank_mask = saved_rank_mask;
+
+		debug("\n>>>>> BY_RANK: starting rank %d with mask 0x%02x\n\n",
+		      by_rank, rank_mask);
+	}
+
+	/*
+	 * Comments (steps 3 through 5) continue in oct3_ddr3_seq()
+	 */
+	union cvmx_lmcx_modereg_params0 mp0;
+
+	if (ddr_memory_preserved(priv)) {
+		/*
+		 * Contents are being preserved. Take DRAM out of self-refresh
+		 * first. Then init steps can procede normally
+		 */
+		/* self-refresh exit */
+		oct3_ddr3_seq(priv, rank_mask, if_num, 3);
+	}
+
+	mp0.u64 = lmc_rd(priv, CVMX_LMCX_MODEREG_PARAMS0(if_num));
+	mp0.s.dllr = 1;		/* Set during first init sequence */
+	lmc_wr(priv, CVMX_LMCX_MODEREG_PARAMS0(if_num), mp0.u64);
+
+	ddr_init_seq(priv, rank_mask, if_num);
+
+	mp0.s.dllr = 0;		/* Clear for normal operation */
+	lmc_wr(priv, CVMX_LMCX_MODEREG_PARAMS0(if_num), mp0.u64);
+
+	if (spd_rdimm && ddr_type == DDR4_DRAM &&
+	    octeon_is_cpuid(OCTEON_CN7XXX)) {
+		debug("Running init sequence 1\n");
+		change_rdimm_mpr_pattern(priv, rank_mask, if_num, dimm_count);
+	}
+
+	memset(lanes, 0, sizeof(lanes));
+	for (lane = 0; lane < last_lane; lane++) {
+		// init all lanes to reset value
+		dac_settings[lane] = 127;
+	}
+
+	// FIXME: disable internal VREF if deskew is disabled?
+	if (disable_deskew_training) {
+		debug("N%d.LMC%d: internal VREF Training disabled, leaving them in RESET.\n",
+		      node, if_num);
+		num_samples = 0;
+	} else if (ddr_type == DDR4_DRAM &&
+		   !octeon_is_cpuid(OCTEON_CN78XX_PASS1_X)) {
+		num_samples = DEFAULT_DAC_SAMPLES;
+	} else {
+		// if DDR3 or no ability to write DAC values
+		num_samples = 1;
+	}
+
+perform_internal_vref_training:
+
+	total_dac_eval_retries = 0;
+	dac_eval_exhausted = 0;
+
+	for (sample = 0; sample < num_samples; sample++) {
+		dac_eval_retries = 0;
+
+		// make offset and internal vref training repeatable
+		do {
+			/*
+			 * 6.9.8 LMC Offset Training
+			 * LMC requires input-receiver offset training.
+			 */
+			perform_offset_training(priv, rank_mask, if_num);
+
+			/*
+			 * 6.9.9 LMC Internal vref Training
+			 * LMC requires input-reference-voltage training.
+			 */
+			perform_internal_vref_training(priv, rank_mask, if_num);
+
+			// read and maybe display the DAC values for a sample
+			read_dac_dbi_settings(priv, if_num, /*DAC*/ 1,
+					      dac_settings);
+			if (num_samples == 1 || ddr_verbose(priv)) {
+				display_dac_dbi_settings(if_num, /*DAC*/ 1,
+							 use_ecc, dac_settings,
+							 "Internal VREF");
+			}
+
+			// for DDR4, evaluate the DAC settings and retry
+			// if any issues
+			if (ddr_type == DDR4_DRAM) {
+				if (evaluate_dac_settings
+				    (if_64b, use_ecc, dac_settings)) {
+					dac_eval_retries += 1;
+					if (dac_eval_retries >
+					    DAC_RETRIES_LIMIT) {
+						debug("N%d.LMC%d: DDR4 internal VREF DAC settings: retries exhausted; continuing...\n",
+						      node, if_num);
+						dac_eval_exhausted += 1;
+					} else {
+						debug("N%d.LMC%d: DDR4 internal VREF DAC settings inconsistent; retrying....\n",
+						      node, if_num);
+						total_dac_eval_retries += 1;
+						// try another sample
+						continue;
+					}
+				}
+
+				// taking multiple samples, otherwise do nothing
+				if (num_samples > 1) {
+					// good sample or exhausted retries,
+					// record it
+					for (lane = 0; lane < last_lane;
+					     lane++) {
+						lanes[lane].bytes[sample] =
+						    dac_settings[lane];
+					}
+				}
+			}
+			// done if DDR3, or good sample, or exhausted retries
+			break;
+		} while (1);
+	}
+
+	if (ddr_type == DDR4_DRAM && dac_eval_exhausted > 0) {
+		debug("N%d.LMC%d: DDR internal VREF DAC settings: total retries %d, exhausted %d\n",
+		      node, if_num, total_dac_eval_retries, dac_eval_exhausted);
+	}
+
+	if (num_samples > 1) {
+		debug("N%d.LMC%d: DDR4 internal VREF DAC settings: processing multiple samples...\n",
+		      node, if_num);
+
+		for (lane = 0; lane < last_lane; lane++) {
+			dac_settings[lane] =
+			    process_samples_average(&lanes[lane].bytes[0],
+						    num_samples, if_num, lane);
+		}
+		display_dac_dbi_settings(if_num, /*DAC*/ 1, use_ecc,
+					 dac_settings, "Averaged VREF");
+
+		// finally, write the final DAC values
+		for (lane = 0; lane < last_lane; lane++) {
+			load_dac_override(priv, if_num, dac_settings[lane],
+					  lane);
+		}
+	}
+
+	// allow override of any byte-lane internal VREF
+	int overrode_vref_dac = 0;
+
+	for (lane = 0; lane < last_lane; lane++) {
+		s = lookup_env(priv, "ddr%d_vref_dac_byte%d", if_num, lane);
+		if (s) {
+			dac_settings[lane] = simple_strtoul(s, NULL, 0);
+			overrode_vref_dac = 1;
+			// finally, write the new DAC value
+			load_dac_override(priv, if_num, dac_settings[lane],
+					  lane);
+		}
+	}
+	if (overrode_vref_dac) {
+		display_dac_dbi_settings(if_num, /*DAC*/ 1, use_ecc,
+					 dac_settings, "Override VREF");
+	}
+
+	// as a second step, after internal VREF training, before starting
+	// deskew training:
+	// for DDR3 and OCTEON3 not O78 pass 1.x, override the DAC setting
+	// to 127
+	if (ddr_type == DDR3_DRAM && !octeon_is_cpuid(OCTEON_CN78XX_PASS1_X) &&
+	    !disable_deskew_training) {
+		load_dac_override(priv, if_num, 127, /* all */ 0x0A);
+		debug("N%d.LMC%d: Overriding DDR3 internal VREF DAC settings to 127.\n",
+		      node, if_num);
+	}
+
+	/*
+	 * 4.8.8 LMC Deskew Training
+	 *
+	 * LMC requires input-read-data deskew training.
+	 */
+	if (!disable_deskew_training) {
+		deskew_training_errors =
+		    perform_deskew_training(priv, rank_mask, if_num,
+					    spd_rawcard_aorb);
+
+		// All the Deskew lock and saturation retries (may) have
+		// been done, but we ended up with nibble errors; so,
+		// as a last ditch effort, try the Internal vref
+		// Training again...
+		if (deskew_training_errors) {
+			if (internal_retries <
+			    DEFAULT_INTERNAL_VREF_TRAINING_LIMIT) {
+				internal_retries++;
+				debug("N%d.LMC%d: Deskew training results still unsettled - retrying internal vref training (%d)\n",
+				      node, if_num, internal_retries);
+				goto perform_internal_vref_training;
+			} else {
+				if (restart_if_dsk_incomplete) {
+					debug("N%d.LMC%d: INFO: Deskew training incomplete - %d retries exhausted, Restarting LMC init...\n",
+					      node, if_num, internal_retries);
+					return -EAGAIN;
+				}
+				debug("N%d.LMC%d: Deskew training incomplete - %d retries exhausted, but continuing...\n",
+				      node, if_num, internal_retries);
+			}
+		}		/* if (deskew_training_errors) */
+
+		// FIXME: treat this as the final DSK print from now on,
+		// and print if VBL_NORM or above also, save the results
+		// of the original training in case we want them later
+		validate_deskew_training(priv, rank_mask, if_num,
+					 &deskew_training_results, 1);
+	} else {		/* if (! disable_deskew_training) */
+		debug("N%d.LMC%d: Deskew Training disabled, printing settings before HWL.\n",
+		      node, if_num);
+		validate_deskew_training(priv, rank_mask, if_num,
+					 &deskew_training_results, 1);
+	}			/* if (! disable_deskew_training) */
+
+	if (enable_by_rank_init) {
+		read_dac_dbi_settings(priv, if_num, /*dac */ 1,
+				      &rank_dac[by_rank].bytes[0]);
+		get_deskew_settings(priv, if_num, &rank_dsk[by_rank]);
+		debug("\n>>>>> BY_RANK: ending rank %d\n\n", by_rank);
+	}
+
+end_by_rank_init:
+
+	if (enable_by_rank_init) {
+		//debug("\n>>>>> BY_RANK: ending rank %d\n\n", by_rank);
+
+		by_rank--;
+		if (by_rank >= 0)
+			goto start_by_rank_init;
+
+		rank_mask = saved_rank_mask;
+		ddr_init_seq(priv, rank_mask, if_num);
+
+		process_by_rank_dac(priv, if_num, rank_mask, rank_dac);
+		process_by_rank_dsk(priv, if_num, rank_mask, rank_dsk);
+
+		// FIXME: set this to prevent later checking!!!
+		disable_deskew_training = 1;
+
+		debug("\n>>>>> BY_RANK: FINISHED!!\n\n");
+	}
+
+	return 0;
+}
+
+static void lmc_config_2(struct ddr_priv *priv)
+{
+	union cvmx_lmcx_config lmc_config;
+	int save_ref_zqcs_int;
+	u64 temp_delay_usecs;
+
+	lmc_config.u64 = lmc_rd(priv, CVMX_LMCX_CONFIG(if_num));
+
+	/*
+	 * Temporarily select the minimum ZQCS interval and wait
+	 * long enough for a few ZQCS calibrations to occur.  This
+	 * should ensure that the calibration circuitry is
+	 * stabilized before read/write leveling occurs.
+	 */
+	if (octeon_is_cpuid(OCTEON_CN7XXX)) {
+		save_ref_zqcs_int = lmc_config.cn78xx.ref_zqcs_int;
+		/* set smallest interval */
+		lmc_config.cn78xx.ref_zqcs_int = 1 | (32 << 7);
+	} else {
+		save_ref_zqcs_int = lmc_config.cn63xx.ref_zqcs_int;
+		/* set smallest interval */
+		lmc_config.cn63xx.ref_zqcs_int = 1 | (32 << 7);
+	}
+	lmc_wr(priv, CVMX_LMCX_CONFIG(if_num), lmc_config.u64);
+	lmc_rd(priv, CVMX_LMCX_CONFIG(if_num));
+
+	/*
+	 * Compute an appropriate delay based on the current ZQCS
+	 * interval. The delay should be long enough for the
+	 * current ZQCS delay counter to expire plus ten of the
+	 * minimum intarvals to ensure that some calibrations
+	 * occur.
+	 */
+	temp_delay_usecs = (((u64)save_ref_zqcs_int >> 7) * tclk_psecs *
+			    100 * 512 * 128) / (10000 * 10000) + 10 *
+		((u64)32 * tclk_psecs * 100 * 512 * 128) / (10000 * 10000);
+
+	debug("Waiting %lld usecs for ZQCS calibrations to start\n",
+	      temp_delay_usecs);
+	udelay(temp_delay_usecs);
+
+	if (octeon_is_cpuid(OCTEON_CN7XXX)) {
+		/* Restore computed interval */
+		lmc_config.cn78xx.ref_zqcs_int = save_ref_zqcs_int;
+	} else {
+		/* Restore computed interval */
+		lmc_config.cn63xx.ref_zqcs_int = save_ref_zqcs_int;
+	}
+
+	lmc_wr(priv, CVMX_LMCX_CONFIG(if_num), lmc_config.u64);
+	lmc_rd(priv, CVMX_LMCX_CONFIG(if_num));
+}
+
+static union cvmx_lmcx_wlevel_ctl wl_ctl __section(".data");
+static union cvmx_lmcx_wlevel_rankx wl_rank __section(".data");
+static union cvmx_lmcx_modereg_params1 mp1 __section(".data");
+
+static int wl_mask[9] __section(".data");
+static int byte_idx __section(".data");
+static int ecc_ena __section(".data");
+static int wl_roundup __section(".data");
+static int save_mode32b __section(".data");
+static int disable_hwl_validity __section(".data");
+static int default_wl_rtt_nom __section(".data");
+static int wl_pbm_pump __section(".data");
+
+static void lmc_write_leveling_loop(struct ddr_priv *priv, int rankx)
+{
+	int wloop = 0;
+	// retries per sample for HW-related issues with bitmasks or values
+	int wloop_retries = 0;
+	int wloop_retries_total = 0;
+	int wloop_retries_exhausted = 0;
+#define WLOOP_RETRIES_DEFAULT 5
+	int wl_val_err;
+	int wl_mask_err_rank = 0;
+	int wl_val_err_rank = 0;
+	// array to collect counts of byte-lane values
+	// assume low-order 3 bits and even, so really only 2-bit values
+	struct wlevel_bitcnt wl_bytes[9], wl_bytes_extra[9];
+	int extra_bumps, extra_mask;
+	int rank_nom = 0;
+
+	if (!(rank_mask & (1 << rankx)))
+		return;
+
+	if (match_wl_rtt_nom) {
+		if (rankx == 0)
+			rank_nom = mp1.s.rtt_nom_00;
+		if (rankx == 1)
+			rank_nom = mp1.s.rtt_nom_01;
+		if (rankx == 2)
+			rank_nom = mp1.s.rtt_nom_10;
+		if (rankx == 3)
+			rank_nom = mp1.s.rtt_nom_11;
+
+		debug("N%d.LMC%d.R%d: Setting WLEVEL_CTL[rtt_nom] to %d (%d)\n",
+		      node, if_num, rankx, rank_nom,
+		      imp_val->rtt_nom_ohms[rank_nom]);
+	}
+
+	memset(wl_bytes, 0, sizeof(wl_bytes));
+	memset(wl_bytes_extra, 0, sizeof(wl_bytes_extra));
+
+	// restructure the looping so we can keep trying until we get the
+	// samples we want
+	while (wloop < wl_loops) {
+		wl_ctl.u64 = lmc_rd(priv, CVMX_LMCX_WLEVEL_CTL(if_num));
+
+		wl_ctl.cn78xx.rtt_nom =
+		    (default_wl_rtt_nom > 0) ? (default_wl_rtt_nom - 1) : 7;
+
+		if (match_wl_rtt_nom) {
+			wl_ctl.cn78xx.rtt_nom =
+			    (rank_nom > 0) ? (rank_nom - 1) : 7;
+		}
+
+		/* Clear write-level delays */
+		lmc_wr(priv, CVMX_LMCX_WLEVEL_RANKX(rankx, if_num), 0);
+
+		wl_mask_err = 0;	/* Reset error counters */
+		wl_val_err = 0;
+
+		for (byte_idx = 0; byte_idx < 9; ++byte_idx)
+			wl_mask[byte_idx] = 0;	/* Reset bitmasks */
+
+		// do all the byte-lanes at the same time
+		wl_ctl.cn78xx.lanemask = 0x1ff;
+
+		lmc_wr(priv, CVMX_LMCX_WLEVEL_CTL(if_num), wl_ctl.u64);
+
+		/*
+		 * Read and write values back in order to update the
+		 * status field. This insures that we read the updated
+		 * values after write-leveling has completed.
+		 */
+		lmc_wr(priv, CVMX_LMCX_WLEVEL_RANKX(rankx, if_num),
+		       lmc_rd(priv, CVMX_LMCX_WLEVEL_RANKX(rankx, if_num)));
+
+		/* write-leveling */
+		oct3_ddr3_seq(priv, 1 << rankx, if_num, 6);
+
+		do {
+			wl_rank.u64 = lmc_rd(priv,
+					     CVMX_LMCX_WLEVEL_RANKX(rankx,
+								    if_num));
+		} while (wl_rank.cn78xx.status != 3);
+
+		wl_rank.u64 = lmc_rd(priv, CVMX_LMCX_WLEVEL_RANKX(rankx,
+								  if_num));
+
+		for (byte_idx = 0; byte_idx < (8 + ecc_ena); ++byte_idx) {
+			wl_mask[byte_idx] = lmc_ddr3_wl_dbg_read(priv,
+								 if_num,
+								 byte_idx);
+			if (wl_mask[byte_idx] == 0)
+				++wl_mask_err;
+		}
+
+		// check validity only if no bitmask errors
+		if (wl_mask_err == 0) {
+			if ((spd_dimm_type == 1 || spd_dimm_type == 2) &&
+			    dram_width != 16 && if_64b &&
+			    !disable_hwl_validity) {
+				// bypass if [mini|SO]-[RU]DIMM or x16 or
+				// 32-bit
+				wl_val_err =
+				    validate_hw_wl_settings(if_num,
+							    &wl_rank,
+							    spd_rdimm, ecc_ena);
+				wl_val_err_rank += (wl_val_err != 0);
+			}
+		} else {
+			wl_mask_err_rank++;
+		}
+
+		// before we print, if we had bitmask or validity errors,
+		// do a retry...
+		if (wl_mask_err != 0 || wl_val_err != 0) {
+			if (wloop_retries < WLOOP_RETRIES_DEFAULT) {
+				wloop_retries++;
+				wloop_retries_total++;
+				// this printout is per-retry: only when VBL
+				// is high enough (DEV?)
+				// FIXME: do we want to show the bad bitmaps
+				// or delays here also?
+				debug("N%d.LMC%d.R%d: H/W Write-Leveling had %s errors - retrying...\n",
+				      node, if_num, rankx,
+				      (wl_mask_err) ? "Bitmask" : "Validity");
+				// this takes us back to the top without
+				// counting a sample
+				return;
+			}
+
+			// retries exhausted, do not print at normal VBL
+			debug("N%d.LMC%d.R%d: H/W Write-Leveling issues: %s errors\n",
+			      node, if_num, rankx,
+			      (wl_mask_err) ? "Bitmask" : "Validity");
+			wloop_retries_exhausted++;
+		}
+		// no errors or exhausted retries, use this sample
+		wloop_retries = 0;	//reset for next sample
+
+		// when only 1 sample or forced, print the bitmasks then
+		// current HW WL
+		if (wl_loops == 1 || wl_print) {
+			if (wl_print > 1)
+				display_wl_bm(if_num, rankx, wl_mask);
+			display_wl(if_num, wl_rank, rankx);
+		}
+
+		if (wl_roundup) {	/* Round up odd bitmask delays */
+			for (byte_idx = 0; byte_idx < (8 + ecc_ena);
+			     ++byte_idx) {
+				if (!(if_bytemask & (1 << byte_idx)))
+					return;
+				upd_wl_rank(&wl_rank, byte_idx,
+					    roundup_ddr3_wlevel_bitmask
+					    (wl_mask[byte_idx]));
+			}
+			lmc_wr(priv, CVMX_LMCX_WLEVEL_RANKX(rankx, if_num),
+			       wl_rank.u64);
+			display_wl(if_num, wl_rank, rankx);
+		}
+
+		// OK, we have a decent sample, no bitmask or validity errors
+		extra_bumps = 0;
+		extra_mask = 0;
+		for (byte_idx = 0; byte_idx < (8 + ecc_ena); ++byte_idx) {
+			int ix;
+
+			if (!(if_bytemask & (1 << byte_idx)))
+				return;
+
+			// increment count of byte-lane value
+			// only 4 values
+			ix = (get_wl_rank(&wl_rank, byte_idx) >> 1) & 3;
+			wl_bytes[byte_idx].bitcnt[ix]++;
+			wl_bytes_extra[byte_idx].bitcnt[ix]++;
+			// if perfect...
+			if (__builtin_popcount(wl_mask[byte_idx]) == 4) {
+				wl_bytes_extra[byte_idx].bitcnt[ix] +=
+				    wl_pbm_pump;
+				extra_bumps++;
+				extra_mask |= 1 << byte_idx;
+			}
+		}
+
+		if (extra_bumps) {
+			if (wl_print > 1) {
+				debug("N%d.LMC%d.R%d: HWL sample had %d bumps (0x%02x).\n",
+				      node, if_num, rankx, extra_bumps,
+				      extra_mask);
+			}
+		}
+
+		// if we get here, we have taken a decent sample
+		wloop++;
+
+	}			/* while (wloop < wl_loops) */
+
+	// if we did sample more than once, try to pick a majority vote
+	if (wl_loops > 1) {
+		// look for the majority in each byte-lane
+		for (byte_idx = 0; byte_idx < (8 + ecc_ena); ++byte_idx) {
+			int mx, mc, xc, cc;
+			int ix, alts;
+			int maj, xmaj, xmx, xmc, xxc, xcc;
+
+			if (!(if_bytemask & (1 << byte_idx)))
+				return;
+			maj = find_wl_majority(&wl_bytes[byte_idx], &mx,
+					       &mc, &xc, &cc);
+			xmaj = find_wl_majority(&wl_bytes_extra[byte_idx],
+						&xmx, &xmc, &xxc, &xcc);
+			if (maj != xmaj) {
+				if (wl_print) {
+					debug("N%d.LMC%d.R%d: Byte %d: HWL maj %d(%d), USING xmaj %d(%d)\n",
+					      node, if_num, rankx,
+					      byte_idx, maj, xc, xmaj, xxc);
+				}
+				mx = xmx;
+				mc = xmc;
+				xc = xxc;
+				cc = xcc;
+			}
+
+			// see if there was an alternate
+			// take out the majority choice
+			alts = (mc & ~(1 << mx));
+			if (alts != 0) {
+				for (ix = 0; ix < 4; ix++) {
+					// FIXME: could be done multiple times?
+					// bad if so
+					if (alts & (1 << ix)) {
+						// set the mask
+						hwl_alts[rankx].hwl_alt_mask |=
+							(1 << byte_idx);
+						// record the value
+						hwl_alts[rankx].hwl_alt_delay[byte_idx] =
+							ix << 1;
+						if (wl_print > 1) {
+							debug("N%d.LMC%d.R%d: SWL_TRY_HWL_ALT: Byte %d maj %d (%d) alt %d (%d).\n",
+							      node,
+							      if_num,
+							      rankx,
+							      byte_idx,
+							      mx << 1,
+							      xc,
+							      ix << 1,
+							      wl_bytes
+							      [byte_idx].bitcnt
+							      [ix]);
+						}
+					}
+				}
+			}
+
+			if (cc > 2) {	// unlikely, but...
+				// assume: counts for 3 indices are all 1
+				// possiblities are: 0/2/4, 2/4/6, 0/4/6, 0/2/6
+				// and the desired?:   2  ,   4  ,     6, 0
+				// we choose the middle, assuming one of the
+				// outliers is bad
+				// NOTE: this is an ugly hack at the moment;
+				// there must be a better way
+				switch (mc) {
+				case 0x7:
+					mx = 1;
+					break;	// was 0/2/4, choose 2
+				case 0xb:
+					mx = 0;
+					break;	// was 0/2/6, choose 0
+				case 0xd:
+					mx = 3;
+					break;	// was 0/4/6, choose 6
+				case 0xe:
+					mx = 2;
+					break;	// was 2/4/6, choose 4
+				default:
+				case 0xf:
+					mx = 1;
+					break;	// was 0/2/4/6, choose 2?
+				}
+				printf("N%d.LMC%d.R%d: HW WL MAJORITY: bad byte-lane %d (0x%x), using %d.\n",
+				       node, if_num, rankx, byte_idx, mc,
+				       mx << 1);
+			}
+			upd_wl_rank(&wl_rank, byte_idx, mx << 1);
+		}
+
+		lmc_wr(priv, CVMX_LMCX_WLEVEL_RANKX(rankx, if_num),
+		       wl_rank.u64);
+		display_wl_with_final(if_num, wl_rank, rankx);
+
+		// FIXME: does this help make the output a little easier
+		// to focus?
+		if (wl_print > 0)
+			debug("-----------\n");
+
+	}			/* if (wl_loops > 1) */
+
+	// maybe print an error summary for the rank
+	if (wl_mask_err_rank != 0 || wl_val_err_rank != 0) {
+		debug("N%d.LMC%d.R%d: H/W Write-Leveling errors - %d bitmask, %d validity, %d retries, %d exhausted\n",
+		      node, if_num, rankx, wl_mask_err_rank,
+		      wl_val_err_rank, wloop_retries_total,
+		      wloop_retries_exhausted);
+	}
+}
+
+static void lmc_write_leveling(struct ddr_priv *priv)
+{
+	union cvmx_lmcx_config cfg;
+	int rankx;
+	char *s;
+
+	/*
+	 * 4.8.9 LMC Write Leveling
+	 *
+	 * LMC supports an automatic write leveling like that described in the
+	 * JEDEC DDR3 specifications separately per byte-lane.
+	 *
+	 * All of DDR PLL, LMC CK, LMC DRESET, and early LMC initializations
+	 * must be completed prior to starting this LMC write-leveling sequence.
+	 *
+	 * There are many possible procedures that will write-level all the
+	 * attached DDR3 DRAM parts. One possibility is for software to simply
+	 * write the desired values into LMC(0)_WLEVEL_RANK(0..3). This section
+	 * describes one possible sequence that uses LMC's autowrite-leveling
+	 * capabilities.
+	 *
+	 * 1. If the DQS/DQ delays on the board may be more than the ADD/CMD
+	 *    delays, then ensure that LMC(0)_CONFIG[EARLY_DQX] is set at this
+	 *    point.
+	 *
+	 * Do the remaining steps 2-7 separately for each rank i with attached
+	 * DRAM.
+	 *
+	 * 2. Write LMC(0)_WLEVEL_RANKi = 0.
+	 *
+	 * 3. For x8 parts:
+	 *
+	 *    Without changing any other fields in LMC(0)_WLEVEL_CTL, write
+	 *    LMC(0)_WLEVEL_CTL[LANEMASK] to select all byte lanes with attached
+	 *    DRAM.
+	 *
+	 *    For x16 parts:
+	 *
+	 *    Without changing any other fields in LMC(0)_WLEVEL_CTL, write
+	 *    LMC(0)_WLEVEL_CTL[LANEMASK] to select all even byte lanes with
+	 *    attached DRAM.
+	 *
+	 * 4. Without changing any other fields in LMC(0)_CONFIG,
+	 *
+	 *    o write LMC(0)_SEQ_CTL[SEQ_SEL] to select write-leveling
+	 *
+	 *    o write LMC(0)_CONFIG[RANKMASK] = (1 << i)
+	 *
+	 *    o write LMC(0)_SEQ_CTL[INIT_START] = 1
+	 *
+	 *    LMC will initiate write-leveling at this point. Assuming
+	 *    LMC(0)_WLEVEL_CTL [SSET] = 0, LMC first enables write-leveling on
+	 *    the selected DRAM rank via a DDR3 MR1 write, then sequences
+	 *    through
+	 *    and accumulates write-leveling results for eight different delay
+	 *    settings twice, starting at a delay of zero in this case since
+	 *    LMC(0)_WLEVEL_RANKi[BYTE*<4:3>] = 0, increasing by 1/8 CK each
+	 *    setting, covering a total distance of one CK, then disables the
+	 *    write-leveling via another DDR3 MR1 write.
+	 *
+	 *    After the sequence through 16 delay settings is complete:
+	 *
+	 *    o LMC sets LMC(0)_WLEVEL_RANKi[STATUS] = 3
+	 *
+	 *    o LMC sets LMC(0)_WLEVEL_RANKi[BYTE*<2:0>] (for all ranks selected
+	 *      by LMC(0)_WLEVEL_CTL[LANEMASK]) to indicate the first write
+	 *      leveling result of 1 that followed result of 0 during the
+	 *      sequence, except that the LMC always writes
+	 *      LMC(0)_WLEVEL_RANKi[BYTE*<0>]=0.
+	 *
+	 *    o Software can read the eight write-leveling results from the
+	 *      first pass through the delay settings by reading
+	 *      LMC(0)_WLEVEL_DBG[BITMASK] (after writing
+	 *      LMC(0)_WLEVEL_DBG[BYTE]). (LMC does not retain the writeleveling
+	 *      results from the second pass through the eight delay
+	 *      settings. They should often be identical to the
+	 *      LMC(0)_WLEVEL_DBG[BITMASK] results, though.)
+	 *
+	 * 5. Wait until LMC(0)_WLEVEL_RANKi[STATUS] != 2.
+	 *
+	 *    LMC will have updated LMC(0)_WLEVEL_RANKi[BYTE*<2:0>] for all byte
+	 *    lanes selected by LMC(0)_WLEVEL_CTL[LANEMASK] at this point.
+	 *    LMC(0)_WLEVEL_RANKi[BYTE*<4:3>] will still be the value that
+	 *    software wrote in substep 2 above, which is 0.
+	 *
+	 * 6. For x16 parts:
+	 *
+	 *    Without changing any other fields in LMC(0)_WLEVEL_CTL, write
+	 *    LMC(0)_WLEVEL_CTL[LANEMASK] to select all odd byte lanes with
+	 *    attached DRAM.
+	 *
+	 *    Repeat substeps 4 and 5 with this new LMC(0)_WLEVEL_CTL[LANEMASK]
+	 *    setting. Skip to substep 7 if this has already been done.
+	 *
+	 *    For x8 parts:
+	 *
+	 *    Skip this substep. Go to substep 7.
+	 *
+	 * 7. Calculate LMC(0)_WLEVEL_RANKi[BYTE*<4:3>] settings for all byte
+	 *    lanes on all ranks with attached DRAM.
+	 *
+	 *    At this point, all byte lanes on rank i with attached DRAM should
+	 *    have been write-leveled, and LMC(0)_WLEVEL_RANKi[BYTE*<2:0>] has
+	 *    the result for each byte lane.
+	 *
+	 *    But note that the DDR3 write-leveling sequence will only determine
+	 *    the delay modulo the CK cycle time, and cannot determine how many
+	 *    additional CK cycles of delay are present. Software must calculate
+	 *    the number of CK cycles, or equivalently, the
+	 *    LMC(0)_WLEVEL_RANKi[BYTE*<4:3>] settings.
+	 *
+	 *    This BYTE*<4:3> calculation is system/board specific.
+	 *
+	 * Many techniques can be used to calculate write-leveling BYTE*<4:3>
+	 * values, including:
+	 *
+	 *    o Known values for some byte lanes.
+	 *
+	 *    o Relative values for some byte lanes relative to others.
+	 *
+	 *    For example, suppose lane X is likely to require a larger
+	 *    write-leveling delay than lane Y. A BYTEX<2:0> value that is much
+	 *    smaller than the BYTEY<2:0> value may then indicate that the
+	 *    required lane X delay wrapped into the next CK, so BYTEX<4:3>
+	 *    should be set to BYTEY<4:3>+1.
+	 *
+	 *    When ECC DRAM is not present (i.e. when DRAM is not attached to
+	 *    the DDR_CBS_0_* and DDR_CB<7:0> chip signals, or the
+	 *    DDR_DQS_<4>_* and DDR_DQ<35:32> chip signals), write
+	 *    LMC(0)_WLEVEL_RANK*[BYTE8] = LMC(0)_WLEVEL_RANK*[BYTE0],
+	 *    using the final calculated BYTE0 value.
+	 *    Write LMC(0)_WLEVEL_RANK*[BYTE4] = LMC(0)_WLEVEL_RANK*[BYTE0],
+	 *    using the final calculated BYTE0 value.
+	 *
+	 * 8. Initialize LMC(0)_WLEVEL_RANK* values for all unused ranks.
+	 *
+	 *    Let rank i be a rank with attached DRAM.
+	 *
+	 *    For all ranks j that do not have attached DRAM, set
+	 *    LMC(0)_WLEVEL_RANKj = LMC(0)_WLEVEL_RANKi.
+	 */
+
+	rankx = 0;
+	wl_roundup = 0;
+	disable_hwl_validity = 0;
+
+	// wl_pbm_pump: weight for write-leveling PBMs...
+	// 0 causes original behavior
+	// 1 allows a minority of 2 pbms to outscore a majority of 3 non-pbms
+	// 4 would allow a minority of 1 pbm to outscore a majority of 4
+	// non-pbms
+	wl_pbm_pump = 4;	// FIXME: is 4 too much?
+
+	if (wl_loops) {
+		debug("N%d.LMC%d: Performing Hardware Write-Leveling\n", node,
+		      if_num);
+	} else {
+		/* Force software write-leveling to run */
+		wl_mask_err = 1;
+		debug("N%d.LMC%d: Forcing software Write-Leveling\n", node,
+		      if_num);
+	}
+
+	default_wl_rtt_nom = (ddr_type == DDR3_DRAM) ?
+		rttnom_20ohm : ddr4_rttnom_40ohm;
+
+	cfg.u64 = lmc_rd(priv, CVMX_LMCX_CONFIG(if_num));
+	ecc_ena = cfg.s.ecc_ena;
+	save_mode32b = cfg.cn78xx.mode32b;
+	cfg.cn78xx.mode32b = (!if_64b);
+	lmc_wr(priv, CVMX_LMCX_CONFIG(if_num), cfg.u64);
+	debug("%-45s : %d\n", "MODE32B", cfg.cn78xx.mode32b);
+
+	s = lookup_env(priv, "ddr_wlevel_roundup");
+	if (s)
+		wl_roundup = simple_strtoul(s, NULL, 0);
+
+	s = lookup_env(priv, "ddr_wlevel_printall");
+	if (s)
+		wl_print = strtoul(s, NULL, 0);
+
+	s = lookup_env(priv, "ddr_wlevel_pbm_bump");
+	if (s)
+		wl_pbm_pump = strtoul(s, NULL, 0);
+
+	// default to disable when RL sequential delay check is disabled
+	disable_hwl_validity = disable_sequential_delay_check;
+	s = lookup_env(priv, "ddr_disable_hwl_validity");
+	if (s)
+		disable_hwl_validity = !!strtoul(s, NULL, 0);
+
+	s = lookup_env(priv, "ddr_wl_rtt_nom");
+	if (s)
+		default_wl_rtt_nom = simple_strtoul(s, NULL, 0);
+
+	s = lookup_env(priv, "ddr_match_wl_rtt_nom");
+	if (s)
+		match_wl_rtt_nom = !!simple_strtoul(s, NULL, 0);
+
+	if (match_wl_rtt_nom)
+		mp1.u64 = lmc_rd(priv, CVMX_LMCX_MODEREG_PARAMS1(if_num));
+
+	// For DDR3, we do not touch WLEVEL_CTL fields OR_DIS or BITMASK
+	// For DDR4, we touch WLEVEL_CTL fields OR_DIS or BITMASK here
+	if (ddr_type == DDR4_DRAM) {
+		int default_or_dis = 1;
+		int default_bitmask = 0xff;
+
+		// when x4, use only the lower nibble
+		if (dram_width == 4) {
+			default_bitmask = 0x0f;
+			if (wl_print) {
+				debug("N%d.LMC%d: WLEVEL_CTL: default bitmask is 0x%02x for DDR4 x4\n",
+				      node, if_num, default_bitmask);
+			}
+		}
+
+		wl_ctl.u64 = lmc_rd(priv, CVMX_LMCX_WLEVEL_CTL(if_num));
+		wl_ctl.s.or_dis = default_or_dis;
+		wl_ctl.s.bitmask = default_bitmask;
+
+		// allow overrides
+		s = lookup_env(priv, "ddr_wlevel_ctl_or_dis");
+		if (s)
+			wl_ctl.s.or_dis = !!strtoul(s, NULL, 0);
+
+		s = lookup_env(priv, "ddr_wlevel_ctl_bitmask");
+		if (s)
+			wl_ctl.s.bitmask = simple_strtoul(s, NULL, 0);
+
+		// print only if not defaults
+		if (wl_ctl.s.or_dis != default_or_dis ||
+		    wl_ctl.s.bitmask != default_bitmask) {
+			debug("N%d.LMC%d: WLEVEL_CTL: or_dis=%d, bitmask=0x%02x\n",
+			      node, if_num, wl_ctl.s.or_dis, wl_ctl.s.bitmask);
+		}
+
+		// always write
+		lmc_wr(priv, CVMX_LMCX_WLEVEL_CTL(if_num), wl_ctl.u64);
+	}
+
+	// Start the hardware write-leveling loop per rank
+	for (rankx = 0; rankx < dimm_count * 4; rankx++)
+		lmc_write_leveling_loop(priv, rankx);
+
+	cfg.u64 = lmc_rd(priv, CVMX_LMCX_CONFIG(if_num));
+	cfg.cn78xx.mode32b = save_mode32b;
+	lmc_wr(priv, CVMX_LMCX_CONFIG(if_num), cfg.u64);
+	debug("%-45s : %d\n", "MODE32B", cfg.cn78xx.mode32b);
+
+	// At the end of HW Write Leveling, check on some DESKEW things...
+	if (!disable_deskew_training) {
+		struct deskew_counts dsk_counts;
+		int retry_count = 0;
+
+		debug("N%d.LMC%d: Check Deskew Settings before Read-Leveling.\n",
+		      node, if_num);
+
+		do {
+			validate_deskew_training(priv, rank_mask, if_num,
+						 &dsk_counts, 1);
+
+			// only RAWCARD A or B will not benefit from
+			// retraining if there's only saturation
+			// or any rawcard if there is a nibble error
+			if ((!spd_rawcard_aorb && dsk_counts.saturated > 0) ||
+			    (dsk_counts.nibrng_errs != 0 ||
+			     dsk_counts.nibunl_errs != 0)) {
+				retry_count++;
+				debug("N%d.LMC%d: Deskew Status indicates saturation or nibble errors - retry %d Training.\n",
+				      node, if_num, retry_count);
+				perform_deskew_training(priv, rank_mask, if_num,
+							spd_rawcard_aorb);
+			} else {
+				break;
+			}
+		} while (retry_count < 5);
+	}
+}
+
+static void lmc_workaround(struct ddr_priv *priv)
+{
+	/* Workaround Trcd overflow by using Additive latency. */
+	if (octeon_is_cpuid(OCTEON_CN78XX_PASS1_X)) {
+		union cvmx_lmcx_modereg_params0 mp0;
+		union cvmx_lmcx_timing_params1 tp1;
+		union cvmx_lmcx_control ctrl;
+		int rankx;
+
+		tp1.u64 = lmc_rd(priv, CVMX_LMCX_TIMING_PARAMS1(if_num));
+		mp0.u64 = lmc_rd(priv, CVMX_LMCX_MODEREG_PARAMS0(if_num));
+		ctrl.u64 = lmc_rd(priv, CVMX_LMCX_CONTROL(if_num));
+
+		if (tp1.cn78xx.trcd == 0) {
+			debug("Workaround Trcd overflow by using Additive latency.\n");
+			/* Hard code this to 12 and enable additive latency */
+			tp1.cn78xx.trcd = 12;
+			mp0.s.al = 2;	/* CL-2 */
+			ctrl.s.pocas = 1;
+
+			debug("MODEREG_PARAMS0                               : 0x%016llx\n",
+			      mp0.u64);
+			lmc_wr(priv, CVMX_LMCX_MODEREG_PARAMS0(if_num),
+			       mp0.u64);
+			debug("TIMING_PARAMS1                                : 0x%016llx\n",
+			      tp1.u64);
+			lmc_wr(priv, CVMX_LMCX_TIMING_PARAMS1(if_num), tp1.u64);
+
+			debug("LMC_CONTROL                                   : 0x%016llx\n",
+			      ctrl.u64);
+			lmc_wr(priv, CVMX_LMCX_CONTROL(if_num), ctrl.u64);
+
+			for (rankx = 0; rankx < dimm_count * 4; rankx++) {
+				if (!(rank_mask & (1 << rankx)))
+					continue;
+
+				/* MR1 */
+				ddr4_mrw(priv, if_num, rankx, -1, 1, 0);
+			}
+		}
+	}
+
+	// this is here just for output, to allow check of the Deskew
+	// settings one last time...
+	if (!disable_deskew_training) {
+		struct deskew_counts dsk_counts;
+
+		debug("N%d.LMC%d: Check Deskew Settings before software Write-Leveling.\n",
+		      node, if_num);
+		validate_deskew_training(priv, rank_mask, if_num, &dsk_counts,
+					 3);
+	}
+
+	/*
+	 * Workaround Errata 26304 (T88@2.0, O75@1.x, O78@2.x)
+	 *
+	 * When the CSRs LMCX_DLL_CTL3[WR_DESKEW_ENA] = 1 AND
+	 * LMCX_PHY_CTL2[DQS[0..8]_DSK_ADJ] > 4, set
+	 * LMCX_EXT_CONFIG[DRIVE_ENA_BPRCH] = 1.
+	 */
+	if (octeon_is_cpuid(OCTEON_CN78XX_PASS2_X) ||
+	    octeon_is_cpuid(OCTEON_CNF75XX_PASS1_X)) {
+		union cvmx_lmcx_dll_ctl3 dll_ctl3;
+		union cvmx_lmcx_phy_ctl2 phy_ctl2;
+		union cvmx_lmcx_ext_config ext_cfg;
+		int increased_dsk_adj = 0;
+		int byte;
+
+		phy_ctl2.u64 = lmc_rd(priv, CVMX_LMCX_PHY_CTL2(if_num));
+		ext_cfg.u64 = lmc_rd(priv, CVMX_LMCX_EXT_CONFIG(if_num));
+		dll_ctl3.u64 = lmc_rd(priv, CVMX_LMCX_DLL_CTL3(if_num));
+
+		for (byte = 0; byte < 8; ++byte) {
+			if (!(if_bytemask & (1 << byte)))
+				continue;
+			increased_dsk_adj |=
+			    (((phy_ctl2.u64 >> (byte * 3)) & 0x7) > 4);
+		}
+
+		if (dll_ctl3.s.wr_deskew_ena == 1 && increased_dsk_adj) {
+			ext_cfg.s.drive_ena_bprch = 1;
+			lmc_wr(priv, CVMX_LMCX_EXT_CONFIG(if_num), ext_cfg.u64);
+			debug("LMC%d: Forcing DRIVE_ENA_BPRCH for Workaround Errata 26304.\n",
+			      if_num);
+		}
+	}
+}
+
+// Software Write-Leveling block
+
+#define VREF_RANGE1_LIMIT 0x33	// range1 is valid for 0x00 - 0x32
+#define VREF_RANGE2_LIMIT 0x18	// range2 is valid for 0x00 - 0x17
+// full window is valid for 0x00 to 0x4A
+// let 0x00 - 0x17 be range2, 0x18 - 0x4a be range 1
+#define VREF_LIMIT        (VREF_RANGE1_LIMIT + VREF_RANGE2_LIMIT)
+#define VREF_FINAL        (VREF_LIMIT - 1)
+
+enum sw_wl_status {
+	WL_ESTIMATED = 0, /* HW/SW wleveling failed. Reslt estimated */
+	WL_HARDWARE = 1,	/* H/W wleveling succeeded */
+	WL_SOFTWARE = 2, /* S/W wleveling passed 2 contiguous setting */
+	WL_SOFTWARE1 = 3, /* S/W wleveling passed 1 marginal setting */
+};
+
+static u64 rank_addr __section(".data");
+static int vref_val __section(".data");
+static int final_vref_val __section(".data");
+static int final_vref_range __section(".data");
+static int start_vref_val __section(".data");
+static int computed_final_vref_val __section(".data");
+static char best_vref_val_count __section(".data");
+static char vref_val_count __section(".data");
+static char best_vref_val_start __section(".data");
+static char vref_val_start __section(".data");
+static int bytes_failed __section(".data");
+static enum sw_wl_status byte_test_status[9] __section(".data");
+static enum sw_wl_status sw_wl_rank_status __section(".data");
+static int sw_wl_failed __section(".data");
+static int sw_wl_hw __section(".data");
+static int measured_vref_flag __section(".data");
+
+static void ddr4_vref_loop(struct ddr_priv *priv, int rankx)
+{
+	char *s;
+
+	if (vref_val < VREF_FINAL) {
+		int vrange, vvalue;
+
+		if (vref_val < VREF_RANGE2_LIMIT) {
+			vrange = 1;
+			vvalue = vref_val;
+		} else {
+			vrange = 0;
+			vvalue = vref_val - VREF_RANGE2_LIMIT;
+		}
+
+		set_vref(priv, if_num, rankx, vrange, vvalue);
+	} else {		/* if (vref_val < VREF_FINAL) */
+		/* Print the final vref value first. */
+
+		/* Always print the computed first if its valid */
+		if (computed_final_vref_val >= 0) {
+			debug("N%d.LMC%d.R%d: vref Computed Summary                 :              %2d (0x%02x)\n",
+			      node, if_num, rankx,
+			      computed_final_vref_val, computed_final_vref_val);
+		}
+
+		if (!measured_vref_flag) {	// setup to use the computed
+			best_vref_val_count = 1;
+			final_vref_val = computed_final_vref_val;
+		} else {	// setup to use the measured
+			if (best_vref_val_count > 0) {
+				best_vref_val_count =
+				    max(best_vref_val_count, (char)2);
+				final_vref_val = best_vref_val_start +
+					divide_nint(best_vref_val_count - 1, 2);
+
+				if (final_vref_val < VREF_RANGE2_LIMIT) {
+					final_vref_range = 1;
+				} else {
+					final_vref_range = 0;
+					final_vref_val -= VREF_RANGE2_LIMIT;
+				}
+
+				int vvlo = best_vref_val_start;
+				int vrlo;
+				int vvhi = best_vref_val_start +
+					best_vref_val_count - 1;
+				int vrhi;
+
+				if (vvlo < VREF_RANGE2_LIMIT) {
+					vrlo = 2;
+				} else {
+					vrlo = 1;
+					vvlo -= VREF_RANGE2_LIMIT;
+				}
+
+				if (vvhi < VREF_RANGE2_LIMIT) {
+					vrhi = 2;
+				} else {
+					vrhi = 1;
+					vvhi -= VREF_RANGE2_LIMIT;
+				}
+				debug("N%d.LMC%d.R%d: vref Training Summary                 :  0x%02x/%1d <----- 0x%02x/%1d -----> 0x%02x/%1d, range: %2d\n",
+				      node, if_num, rankx, vvlo, vrlo,
+				      final_vref_val,
+				      final_vref_range + 1, vvhi, vrhi,
+				      best_vref_val_count - 1);
+
+			} else {
+				/*
+				 * If nothing passed use the default vref
+				 * value for this rank
+				 */
+				union cvmx_lmcx_modereg_params2 mp2;
+
+				mp2.u64 =
+					lmc_rd(priv,
+					       CVMX_LMCX_MODEREG_PARAMS2(if_num));
+				final_vref_val = (mp2.u64 >>
+						  (rankx * 10 + 3)) & 0x3f;
+				final_vref_range = (mp2.u64 >>
+						    (rankx * 10 + 9)) & 0x01;
+
+				debug("N%d.LMC%d.R%d: vref Using Default                    :    %2d <----- %2d (0x%02x) -----> %2d, range%1d\n",
+				      node, if_num, rankx, final_vref_val,
+				      final_vref_val, final_vref_val,
+				      final_vref_val, final_vref_range + 1);
+			}
+		}
+
+		// allow override
+		s = lookup_env(priv, "ddr%d_vref_val_%1d%1d",
+			       if_num, !!(rankx & 2), !!(rankx & 1));
+		if (s)
+			final_vref_val = strtoul(s, NULL, 0);
+
+		set_vref(priv, if_num, rankx, final_vref_range, final_vref_val);
+	}
+}
+
+#define WL_MIN_NO_ERRORS_COUNT 3	// FIXME? three passes without errors
+
+static int errors __section(".data");
+static int byte_delay[9] __section(".data");
+static u64 bytemask __section(".data");
+static int bytes_todo __section(".data");
+static int no_errors_count __section(".data");
+static u64 bad_bits[2] __section(".data");
+static u64 sum_dram_dclk __section(".data");
+static u64 sum_dram_ops __section(".data");
+static u64 start_dram_dclk __section(".data");
+static u64 stop_dram_dclk __section(".data");
+static u64 start_dram_ops __section(".data");
+static u64 stop_dram_ops __section(".data");
+
+static void lmc_sw_write_leveling_loop(struct ddr_priv *priv, int rankx)
+{
+	int delay;
+	int b;
+
+	// write the current set of WL delays
+	lmc_wr(priv, CVMX_LMCX_WLEVEL_RANKX(rankx, if_num), wl_rank.u64);
+	wl_rank.u64 = lmc_rd(priv, CVMX_LMCX_WLEVEL_RANKX(rankx, if_num));
+
+	// do the test
+	if (sw_wl_hw) {
+		errors = run_best_hw_patterns(priv, if_num, rank_addr,
+					      DBTRAIN_TEST, bad_bits);
+		errors &= bytes_todo;	// keep only the ones we are still doing
+	} else {
+		start_dram_dclk = lmc_rd(priv, CVMX_LMCX_DCLK_CNT(if_num));
+		start_dram_ops = lmc_rd(priv, CVMX_LMCX_OPS_CNT(if_num));
+		errors = test_dram_byte64(priv, if_num, rank_addr, bytemask,
+					  bad_bits);
+
+		stop_dram_dclk = lmc_rd(priv, CVMX_LMCX_DCLK_CNT(if_num));
+		stop_dram_ops = lmc_rd(priv, CVMX_LMCX_OPS_CNT(if_num));
+		sum_dram_dclk += stop_dram_dclk - start_dram_dclk;
+		sum_dram_ops += stop_dram_ops - start_dram_ops;
+	}
+
+	debug("WL pass1: test_dram_byte returned 0x%x\n", errors);
+
+	// remember, errors will not be returned for byte-lanes that have
+	// maxxed out...
+	if (errors == 0) {
+		no_errors_count++;	// bump
+		// bypass check/update completely
+		if (no_errors_count > 1)
+			return;	// to end of do-while
+	} else {
+		no_errors_count = 0;	// reset
+	}
+
+	// check errors by byte
+	for (b = 0; b < 9; ++b) {
+		if (!(bytes_todo & (1 << b)))
+			continue;
+
+		delay = byte_delay[b];
+		// yes, an error in this byte lane
+		if (errors & (1 << b)) {
+			debug("        byte %d delay %2d Errors\n", b, delay);
+			// since this byte had an error, we move to the next
+			// delay value, unless done with it
+			delay += 8;	// incr by 8 to do delay high-order bits
+			if (delay < 32) {
+				upd_wl_rank(&wl_rank, b, delay);
+				debug("        byte %d delay %2d New\n",
+				      b, delay);
+				byte_delay[b] = delay;
+			} else {
+				// reached max delay, maybe really done with
+				// this byte
+				// consider an alt only for computed VREF and
+				if (!measured_vref_flag &&
+				    (hwl_alts[rankx].hwl_alt_mask & (1 << b))) {
+					// if an alt exists...
+					// just orig low-3 bits
+					int bad_delay = delay & 0x6;
+
+					// yes, use it
+					delay =	hwl_alts[rankx].hwl_alt_delay[b];
+					// clear that flag
+					hwl_alts[rankx].hwl_alt_mask &=
+						~(1 << b);
+					upd_wl_rank(&wl_rank, b, delay);
+					byte_delay[b] = delay;
+					debug("        byte %d delay %2d ALTERNATE\n",
+					      b, delay);
+					debug("N%d.LMC%d.R%d: SWL: Byte %d: %d FAIL, trying ALTERNATE %d\n",
+					      node, if_num,
+					      rankx, b, bad_delay, delay);
+
+				} else {
+					unsigned int bits_bad;
+
+					if (b < 8) {
+						// test no longer, remove from
+						// byte mask
+						bytemask &=
+							~(0xffULL << (8 * b));
+						bits_bad = (unsigned int)
+							((bad_bits[0] >>
+							  (8 * b)) & 0xffUL);
+					} else {
+						bits_bad = (unsigned int)
+						    (bad_bits[1] & 0xffUL);
+					}
+
+					// remove from bytes to do
+					bytes_todo &= ~(1 << b);
+					// make sure this is set for this case
+					byte_test_status[b] = WL_ESTIMATED;
+					debug("        byte %d delay %2d Exhausted\n",
+					      b, delay);
+					if (!measured_vref_flag) {
+						// this is too noisy when doing
+						// measured VREF
+						debug("N%d.LMC%d.R%d: SWL: Byte %d (0x%02x): delay %d EXHAUSTED\n",
+						      node, if_num, rankx,
+						      b, bits_bad, delay);
+					}
+				}
+			}
+		} else {
+			// no error, stay with current delay, but keep testing
+			// it...
+			debug("        byte %d delay %2d Passed\n", b, delay);
+			byte_test_status[b] = WL_HARDWARE;	// change status
+		}
+	}			/* for (b = 0; b < 9; ++b) */
+}
+
+static void sw_write_lvl_use_ecc(struct ddr_priv *priv, int rankx)
+{
+	int save_byte8 = wl_rank.s.byte8;
+
+	byte_test_status[8] = WL_HARDWARE;	/* H/W delay value */
+
+	if (save_byte8 != wl_rank.s.byte3 &&
+	    save_byte8 != wl_rank.s.byte4) {
+		int test_byte8 = save_byte8;
+		int test_byte8_error;
+		int byte8_error = 0x1f;
+		int adder;
+		int avg_bytes = divide_nint(wl_rank.s.byte3 + wl_rank.s.byte4,
+					    2);
+
+		for (adder = 0; adder <= 32; adder += 8) {
+			test_byte8_error = abs((adder + save_byte8) -
+					       avg_bytes);
+			if (test_byte8_error < byte8_error) {
+				byte8_error = test_byte8_error;
+				test_byte8 = save_byte8 + adder;
+			}
+		}
+
+		// only do the check if we are not using measured VREF
+		if (!measured_vref_flag) {
+			/* Use only even settings, rounding down... */
+			test_byte8 &= ~1;
+
+			// do validity check on the calculated ECC delay value
+			// this depends on the DIMM type
+			if (spd_rdimm) {	// RDIMM
+				// but not mini-RDIMM
+				if (spd_dimm_type != 5) {
+					// it can be > byte4, but should never
+					// be > byte3
+					if (test_byte8 > wl_rank.s.byte3) {
+						/* say it is still estimated */
+						byte_test_status[8] =
+							WL_ESTIMATED;
+					}
+				}
+			} else {	// UDIMM
+				if (test_byte8 < wl_rank.s.byte3 ||
+				    test_byte8 > wl_rank.s.byte4) {
+					// should never be outside the
+					// byte 3-4 range
+					/* say it is still estimated */
+					byte_test_status[8] = WL_ESTIMATED;
+				}
+			}
+			/*
+			 * Report whenever the calculation appears bad.
+			 * This happens if some of the original values were off,
+			 * or unexpected geometry from DIMM type, or custom
+			 * circuitry (NIC225E, I am looking at you!).
+			 * We will trust the calculated value, and depend on
+			 * later testing to catch any instances when that
+			 * value is truly bad.
+			 */
+			// ESTIMATED means there may be an issue
+			if (byte_test_status[8] == WL_ESTIMATED) {
+				debug("N%d.LMC%d.R%d: SWL: (%cDIMM): calculated ECC delay unexpected (%d/%d/%d)\n",
+				      node, if_num, rankx,
+				      (spd_rdimm ? 'R' : 'U'), wl_rank.s.byte4,
+				      test_byte8, wl_rank.s.byte3);
+				byte_test_status[8] = WL_HARDWARE;
+			}
+		}
+		/* Use only even settings */
+		wl_rank.s.byte8 = test_byte8 & ~1;
+	}
+
+	if (wl_rank.s.byte8 != save_byte8) {
+		/* Change the status if s/w adjusted the delay */
+		byte_test_status[8] = WL_SOFTWARE;	/* Estimated delay */
+	}
+}
+
+static __maybe_unused void parallel_wl_block_delay(struct ddr_priv *priv,
+						   int rankx)
+{
+	int errors;
+	int byte_delay[8];
+	int byte_passed[8];
+	u64 bytemask;
+	u64 bitmask;
+	int wl_offset;
+	int bytes_todo;
+	int sw_wl_offset = 1;
+	int delay;
+	int b;
+
+	for (b = 0; b < 8; ++b)
+		byte_passed[b] = 0;
+
+	bytes_todo = if_bytemask;
+
+	for (wl_offset = sw_wl_offset; wl_offset >= 0; --wl_offset) {
+		debug("Starting wl_offset for-loop: %d\n", wl_offset);
+
+		bytemask = 0;
+
+		for (b = 0; b < 8; ++b) {
+			byte_delay[b] = 0;
+			// this does not contain fully passed bytes
+			if (!(bytes_todo & (1 << b)))
+				continue;
+
+			// reset across passes if not fully passed
+			byte_passed[b] = 0;
+			upd_wl_rank(&wl_rank, b, 0);	// all delays start at 0
+			bitmask = ((!if_64b) && (b == 4)) ? 0x0f : 0xff;
+			// set the bytes bits in the bytemask
+			bytemask |= bitmask << (8 * b);
+		}		/* for (b = 0; b < 8; ++b) */
+
+		// start a pass if there is any byte lane to test
+		while (bytemask != 0) {
+			debug("Starting bytemask while-loop: 0x%llx\n",
+			      bytemask);
+
+			// write this set of WL delays
+			lmc_wr(priv, CVMX_LMCX_WLEVEL_RANKX(rankx, if_num),
+			       wl_rank.u64);
+			wl_rank.u64 = lmc_rd(priv,
+					     CVMX_LMCX_WLEVEL_RANKX(rankx,
+								    if_num));
+
+			// do the test
+			if (sw_wl_hw) {
+				errors = run_best_hw_patterns(priv, if_num,
+							      rank_addr,
+							      DBTRAIN_TEST,
+							      NULL) & 0xff;
+			} else {
+				errors = test_dram_byte64(priv, if_num,
+							  rank_addr, bytemask,
+							  NULL);
+			}
+
+			debug("test_dram_byte returned 0x%x\n", errors);
+
+			// check errors by byte
+			for (b = 0; b < 8; ++b) {
+				if (!(bytes_todo & (1 << b)))
+					continue;
+
+				delay = byte_delay[b];
+				if (errors & (1 << b)) {	// yes, an error
+					debug("        byte %d delay %2d Errors\n",
+					      b, delay);
+					byte_passed[b] = 0;
+				} else {	// no error
+					byte_passed[b] += 1;
+					// Look for consecutive working settings
+					if (byte_passed[b] == (1 + wl_offset)) {
+						debug("        byte %d delay %2d FULLY Passed\n",
+						      b, delay);
+						if (wl_offset == 1) {
+							byte_test_status[b] =
+								WL_SOFTWARE;
+						} else if (wl_offset == 0) {
+							byte_test_status[b] =
+								WL_SOFTWARE1;
+						}
+
+						// test no longer, remove
+						// from byte mask this pass
+						bytemask &= ~(0xffULL <<
+							      (8 * b));
+						// remove completely from
+						// concern
+						bytes_todo &= ~(1 << b);
+						// on to the next byte, bypass
+						// delay updating!!
+						continue;
+					} else {
+						debug("        byte %d delay %2d Passed\n",
+						      b, delay);
+					}
+				}
+
+				// error or no, here we move to the next delay
+				// value for this byte, unless done all delays
+				// only a byte that has "fully passed" will
+				// bypass around this,
+				delay += 2;
+				if (delay < 32) {
+					upd_wl_rank(&wl_rank, b, delay);
+					debug("        byte %d delay %2d New\n",
+					      b, delay);
+					byte_delay[b] = delay;
+				} else {
+					// reached max delay, done with this
+					// byte
+					debug("        byte %d delay %2d Exhausted\n",
+					      b, delay);
+					// test no longer, remove from byte
+					// mask this pass
+					bytemask &= ~(0xffULL << (8 * b));
+				}
+			}	/* for (b = 0; b < 8; ++b) */
+			debug("End of for-loop: bytemask 0x%llx\n", bytemask);
+		}		/* while (bytemask != 0) */
+	}
+
+	for (b = 0; b < 8; ++b) {
+		// any bytes left in bytes_todo did not pass
+		if (bytes_todo & (1 << b)) {
+			union cvmx_lmcx_rlevel_rankx lmc_rlevel_rank;
+
+			/*
+			 * Last resort. Use Rlevel settings to estimate
+			 * Wlevel if software write-leveling fails
+			 */
+			debug("Using RLEVEL as WLEVEL estimate for byte %d\n",
+			      b);
+			lmc_rlevel_rank.u64 =
+				lmc_rd(priv, CVMX_LMCX_RLEVEL_RANKX(rankx,
+								    if_num));
+			rlevel_to_wlevel(&lmc_rlevel_rank, &wl_rank, b);
+		}
+	}			/* for (b = 0; b < 8; ++b) */
+}
+
+static int lmc_sw_write_leveling(struct ddr_priv *priv)
+{
+	/* Try to determine/optimize write-level delays experimentally. */
+	union cvmx_lmcx_wlevel_rankx wl_rank_hw_res;
+	union cvmx_lmcx_config cfg;
+	int rankx;
+	int byte;
+	char *s;
+	int i;
+
+	int active_rank;
+	int sw_wl_enable = 1;	/* FIX... Should be customizable. */
+	int interfaces;
+
+	static const char * const wl_status_strings[] = {
+		"(e)",
+		"   ",
+		"   ",
+		"(1)"
+	};
+
+	// FIXME: make HW-assist the default now?
+	int sw_wl_hw_default = SW_WLEVEL_HW_DEFAULT;
+	int dram_connection = c_cfg->dram_connection;
+
+	s = lookup_env(priv, "ddr_sw_wlevel_hw");
+	if (s)
+		sw_wl_hw_default = !!strtoul(s, NULL, 0);
+	if (!if_64b)		// must use SW algo if 32-bit mode
+		sw_wl_hw_default = 0;
+
+	// can never use hw-assist
+	if (octeon_is_cpuid(OCTEON_CN78XX_PASS1_X))
+		sw_wl_hw_default = 0;
+
+	s = lookup_env(priv, "ddr_software_wlevel");
+	if (s)
+		sw_wl_enable = strtoul(s, NULL, 0);
+
+	s = lookup_env(priv, "ddr%d_dram_connection", if_num);
+	if (s)
+		dram_connection = !!strtoul(s, NULL, 0);
+
+	cvmx_rng_enable();
+
+	/*
+	 * Get the measured_vref setting from the config, check for an
+	 * override...
+	 */
+	/* NOTE: measured_vref=1 (ON) means force use of MEASURED vref... */
+	// NOTE: measured VREF can only be done for DDR4
+	if (ddr_type == DDR4_DRAM) {
+		measured_vref_flag = c_cfg->measured_vref;
+		s = lookup_env(priv, "ddr_measured_vref");
+		if (s)
+			measured_vref_flag = !!strtoul(s, NULL, 0);
+	} else {
+		measured_vref_flag = 0;	// OFF for DDR3
+	}
+
+	/*
+	 * Ensure disabled ECC for DRAM tests using the SW algo, else leave
+	 * it untouched
+	 */
+	if (!sw_wl_hw_default) {
+		cfg.u64 = lmc_rd(priv, CVMX_LMCX_CONFIG(if_num));
+		cfg.cn78xx.ecc_ena = 0;
+		lmc_wr(priv, CVMX_LMCX_CONFIG(if_num), cfg.u64);
+	}
+
+	/*
+	 * We need to track absolute rank number, as well as how many
+	 * active ranks we have.  Two single rank DIMMs show up as
+	 * ranks 0 and 2, but only 2 ranks are active.
+	 */
+	active_rank = 0;
+
+	interfaces = __builtin_popcount(if_mask);
+
+	for (rankx = 0; rankx < dimm_count * 4; rankx++) {
+		final_vref_range = 0;
+		start_vref_val = 0;
+		computed_final_vref_val = -1;
+		sw_wl_rank_status = WL_HARDWARE;
+		sw_wl_failed = 0;
+		sw_wl_hw = sw_wl_hw_default;
+
+		if (!sw_wl_enable)
+			break;
+
+		if (!(rank_mask & (1 << rankx)))
+			continue;
+
+		debug("N%d.LMC%d.R%d: Performing Software Write-Leveling %s\n",
+		      node, if_num, rankx,
+		      (sw_wl_hw) ? "with H/W assist" :
+		      "with S/W algorithm");
+
+		if (ddr_type == DDR4_DRAM && num_ranks != 4) {
+			// always compute when we can...
+			computed_final_vref_val =
+			    compute_vref_val(priv, if_num, rankx, dimm_count,
+					     num_ranks, imp_val,
+					     is_stacked_die, dram_connection);
+
+			// but only use it if allowed
+			if (!measured_vref_flag) {
+				// skip all the measured vref processing,
+				// just the final setting
+				start_vref_val = VREF_FINAL;
+			}
+		}
+
+		/* Save off the h/w wl results */
+		wl_rank_hw_res.u64 = lmc_rd(priv,
+					    CVMX_LMCX_WLEVEL_RANKX(rankx,
+								   if_num));
+
+		vref_val_count = 0;
+		vref_val_start = 0;
+		best_vref_val_count = 0;
+		best_vref_val_start = 0;
+
+		/* Loop one extra time using the Final vref value. */
+		for (vref_val = start_vref_val; vref_val < VREF_LIMIT;
+		     ++vref_val) {
+			if (ddr_type == DDR4_DRAM)
+				ddr4_vref_loop(priv, rankx);
+
+			/* Restore the saved value */
+			wl_rank.u64 = wl_rank_hw_res.u64;
+
+			for (byte = 0; byte < 9; ++byte)
+				byte_test_status[byte] = WL_ESTIMATED;
+
+			if (wl_mask_err == 0) {
+				/*
+				 * Determine address of DRAM to test for
+				 * pass 1 of software write leveling.
+				 */
+				rank_addr = active_rank *
+					(1ull << (pbank_lsb - bunk_enable +
+						  (interfaces / 2)));
+
+				/*
+				 * Adjust address for boot bus hole in memory
+				 * map.
+				 */
+				if (rank_addr > 0x10000000)
+					rank_addr += 0x10000000;
+
+				debug("N%d.LMC%d.R%d: Active Rank %d Address: 0x%llx\n",
+				      node, if_num, rankx, active_rank,
+				      rank_addr);
+
+				// start parallel write-leveling block for
+				// delay high-order bits
+				errors = 0;
+				no_errors_count = 0;
+				sum_dram_dclk = 0;
+				sum_dram_ops = 0;
+
+				if (if_64b) {
+					bytes_todo = (sw_wl_hw) ?
+						if_bytemask : 0xFF;
+					bytemask = ~0ULL;
+				} else {
+					// 32-bit, must be using SW algo,
+					// only data bytes
+					bytes_todo = 0x0f;
+					bytemask = 0x00000000ffffffffULL;
+				}
+
+				for (byte = 0; byte < 9; ++byte) {
+					if (!(bytes_todo & (1 << byte))) {
+						byte_delay[byte] = 0;
+					} else {
+						byte_delay[byte] =
+						    get_wl_rank(&wl_rank, byte);
+					}
+				}	/* for (byte = 0; byte < 9; ++byte) */
+
+				do {
+					lmc_sw_write_leveling_loop(priv, rankx);
+				} while (no_errors_count <
+					 WL_MIN_NO_ERRORS_COUNT);
+
+				if (!sw_wl_hw) {
+					u64 percent_x10;
+
+					if (sum_dram_dclk == 0)
+						sum_dram_dclk = 1;
+					percent_x10 = sum_dram_ops * 1000 /
+						sum_dram_dclk;
+					debug("N%d.LMC%d.R%d: ops %llu, cycles %llu, used %llu.%llu%%\n",
+					      node, if_num, rankx, sum_dram_ops,
+					      sum_dram_dclk, percent_x10 / 10,
+					      percent_x10 % 10);
+				}
+				if (errors) {
+					debug("End WLEV_64 while loop: vref_val %d(0x%x), errors 0x%02x\n",
+					      vref_val, vref_val, errors);
+				}
+				// end parallel write-leveling block for
+				// delay high-order bits
+
+				// if we used HW-assist, we did the ECC byte
+				// when approp.
+				if (sw_wl_hw) {
+					if (wl_print) {
+						debug("N%d.LMC%d.R%d: HW-assisted SWL - ECC estimate not needed.\n",
+						      node, if_num, rankx);
+					}
+					goto no_ecc_estimate;
+				}
+
+				if ((if_bytemask & 0xff) == 0xff) {
+					if (use_ecc) {
+						sw_write_lvl_use_ecc(priv,
+								     rankx);
+					} else {
+						/* H/W delay value */
+						byte_test_status[8] =
+							WL_HARDWARE;
+						/* ECC is not used */
+						wl_rank.s.byte8 =
+							wl_rank.s.byte0;
+					}
+				} else {
+					if (use_ecc) {
+						/* Estimate the ECC byte dly */
+						// add hi-order to b4
+						wl_rank.s.byte4 |=
+							(wl_rank.s.byte3 &
+							 0x38);
+						if ((wl_rank.s.byte4 & 0x06) <
+						    (wl_rank.s.byte3 & 0x06)) {
+							// must be next clock
+							wl_rank.s.byte4 += 8;
+						}
+					} else {
+						/* ECC is not used */
+						wl_rank.s.byte4 =
+							wl_rank.s.byte0;
+					}
+
+					/*
+					 * Change the status if s/w adjusted
+					 * the delay
+					 */
+					/* Estimated delay */
+					byte_test_status[4] = WL_SOFTWARE;
+				}	/* if ((if_bytemask & 0xff) == 0xff) */
+			}	/* if (wl_mask_err == 0) */
+
+no_ecc_estimate:
+
+			bytes_failed = 0;
+			for (byte = 0; byte < 9; ++byte) {
+				/* Don't accumulate errors for untested bytes */
+				if (!(if_bytemask & (1 << byte)))
+					continue;
+				bytes_failed +=
+				    (byte_test_status[byte] == WL_ESTIMATED);
+			}
+
+			/* vref training loop is only used for DDR4  */
+			if (ddr_type != DDR4_DRAM)
+				break;
+
+			if (bytes_failed == 0) {
+				if (vref_val_count == 0)
+					vref_val_start = vref_val;
+
+				++vref_val_count;
+				if (vref_val_count > best_vref_val_count) {
+					best_vref_val_count = vref_val_count;
+					best_vref_val_start = vref_val_start;
+					debug("N%d.LMC%d.R%d: vref Training                    (%2d) :    0x%02x <----- ???? -----> 0x%02x\n",
+					      node, if_num, rankx, vref_val,
+					      best_vref_val_start,
+					      best_vref_val_start +
+					      best_vref_val_count - 1);
+				}
+			} else {
+				vref_val_count = 0;
+				debug("N%d.LMC%d.R%d: vref Training                    (%2d) :    failed\n",
+				      node, if_num, rankx, vref_val);
+			}
+		}
+
+		/*
+		 * Determine address of DRAM to test for software write
+		 * leveling.
+		 */
+		rank_addr = active_rank * (1ull << (pbank_lsb - bunk_enable +
+						    (interfaces / 2)));
+		/* Adjust address for boot bus hole in memory map. */
+		if (rank_addr > 0x10000000)
+			rank_addr += 0x10000000;
+
+		debug("Rank Address: 0x%llx\n", rank_addr);
+
+		if (bytes_failed) {
+			// FIXME? the big hammer, did not even try SW WL pass2,
+			// assume only chip reset will help
+			debug("N%d.LMC%d.R%d: S/W write-leveling pass 1 failed\n",
+			      node, if_num, rankx);
+			sw_wl_failed = 1;
+		} else {	/* if (bytes_failed) */
+			// SW WL pass 1 was OK, write the settings
+			lmc_wr(priv, CVMX_LMCX_WLEVEL_RANKX(rankx, if_num),
+			       wl_rank.u64);
+			wl_rank.u64 = lmc_rd(priv,
+					     CVMX_LMCX_WLEVEL_RANKX(rankx,
+								    if_num));
+
+			// do validity check on the delay values by running
+			// the test 1 more time...
+			// FIXME: we really need to check the ECC byte setting
+			// here as well, so we need to enable ECC for this test!
+			// if there are any errors, claim SW WL failure
+			u64 datamask = (if_64b) ? 0xffffffffffffffffULL :
+				0x00000000ffffffffULL;
+			int errors;
+
+			// do the test
+			if (sw_wl_hw) {
+				errors = run_best_hw_patterns(priv, if_num,
+							      rank_addr,
+							      DBTRAIN_TEST,
+							      NULL) & 0xff;
+			} else {
+				errors = test_dram_byte64(priv, if_num,
+							  rank_addr, datamask,
+							  NULL);
+			}
+
+			if (errors) {
+				debug("N%d.LMC%d.R%d: Wlevel Rank Final Test errors 0x%03x\n",
+				      node, if_num, rankx, errors);
+				sw_wl_failed = 1;
+			}
+		}		/* if (bytes_failed) */
+
+		// FIXME? dump the WL settings, so we get more of a clue
+		// as to what happened where
+		debug("N%d.LMC%d.R%d: Wlevel Rank %#4x, 0x%016llX  : %2d%3s %2d%3s %2d%3s %2d%3s %2d%3s %2d%3s %2d%3s %2d%3s %2d%3s %s\n",
+		      node, if_num, rankx, wl_rank.s.status, wl_rank.u64,
+		      wl_rank.s.byte8, wl_status_strings[byte_test_status[8]],
+		      wl_rank.s.byte7, wl_status_strings[byte_test_status[7]],
+		      wl_rank.s.byte6, wl_status_strings[byte_test_status[6]],
+		      wl_rank.s.byte5, wl_status_strings[byte_test_status[5]],
+		      wl_rank.s.byte4, wl_status_strings[byte_test_status[4]],
+		      wl_rank.s.byte3, wl_status_strings[byte_test_status[3]],
+		      wl_rank.s.byte2, wl_status_strings[byte_test_status[2]],
+		      wl_rank.s.byte1, wl_status_strings[byte_test_status[1]],
+		      wl_rank.s.byte0, wl_status_strings[byte_test_status[0]],
+		      (sw_wl_rank_status == WL_HARDWARE) ? "" : "(s)");
+
+		// finally, check for fatal conditions: either chip reset
+		// right here, or return error flag
+		if ((ddr_type == DDR4_DRAM && best_vref_val_count == 0) ||
+		    sw_wl_failed) {
+			if (!ddr_disable_chip_reset) {	// do chip RESET
+				printf("N%d.LMC%d.R%d: INFO: Short memory test indicates a retry is needed. Resetting node...\n",
+				       node, if_num, rankx);
+				mdelay(500);
+				do_reset(NULL, 0, 0, NULL);
+			} else {
+				// return error flag so LMC init can be retried.
+				debug("N%d.LMC%d.R%d: INFO: Short memory test indicates a retry is needed. Restarting LMC init...\n",
+				      node, if_num, rankx);
+				return -EAGAIN;	// 0 indicates restart possible.
+			}
+		}
+		active_rank++;
+	}
+
+	for (rankx = 0; rankx < dimm_count * 4; rankx++) {
+		int parameter_set = 0;
+		u64 value;
+
+		if (!(rank_mask & (1 << rankx)))
+			continue;
+
+		wl_rank.u64 = lmc_rd(priv, CVMX_LMCX_WLEVEL_RANKX(rankx,
+								  if_num));
+
+		for (i = 0; i < 9; ++i) {
+			s = lookup_env(priv, "ddr%d_wlevel_rank%d_byte%d",
+				       if_num, rankx, i);
+			if (s) {
+				parameter_set |= 1;
+				value = strtoul(s, NULL, 0);
+
+				upd_wl_rank(&wl_rank, i, value);
+			}
+		}
+
+		s = lookup_env_ull(priv, "ddr%d_wlevel_rank%d", if_num, rankx);
+		if (s) {
+			parameter_set |= 1;
+			value = strtoull(s, NULL, 0);
+			wl_rank.u64 = value;
+		}
+
+		if (parameter_set) {
+			lmc_wr(priv, CVMX_LMCX_WLEVEL_RANKX(rankx, if_num),
+			       wl_rank.u64);
+			wl_rank.u64 =
+			    lmc_rd(priv, CVMX_LMCX_WLEVEL_RANKX(rankx, if_num));
+			display_wl(if_num, wl_rank, rankx);
+		}
+		// if there are unused entries to be filled
+		if ((rank_mask & 0x0F) != 0x0F) {
+			if (rankx < 3) {
+				debug("N%d.LMC%d.R%d: checking for WLEVEL_RANK unused entries.\n",
+				      node, if_num, rankx);
+
+				// if rank 0, write ranks 1 and 2 here if empty
+				if (rankx == 0) {
+					// check that rank 1 is empty
+					if (!(rank_mask & (1 << 1))) {
+						debug("N%d.LMC%d.R%d: writing WLEVEL_RANK unused entry R%d.\n",
+						      node, if_num, rankx, 1);
+						lmc_wr(priv,
+						       CVMX_LMCX_WLEVEL_RANKX(1,
+								if_num),
+						       wl_rank.u64);
+					}
+
+					// check that rank 2 is empty
+					if (!(rank_mask & (1 << 2))) {
+						debug("N%d.LMC%d.R%d: writing WLEVEL_RANK unused entry R%d.\n",
+						      node, if_num, rankx, 2);
+						lmc_wr(priv,
+						       CVMX_LMCX_WLEVEL_RANKX(2,
+								if_num),
+						       wl_rank.u64);
+					}
+				}
+
+				// if rank 0, 1 or 2, write rank 3 here if empty
+				// check that rank 3 is empty
+				if (!(rank_mask & (1 << 3))) {
+					debug("N%d.LMC%d.R%d: writing WLEVEL_RANK unused entry R%d.\n",
+					      node, if_num, rankx, 3);
+					lmc_wr(priv,
+					       CVMX_LMCX_WLEVEL_RANKX(3,
+								      if_num),
+					       wl_rank.u64);
+				}
+			}
+		}
+	}
+
+	/* Enable 32-bit mode if required. */
+	cfg.u64 = lmc_rd(priv, CVMX_LMCX_CONFIG(if_num));
+	cfg.cn78xx.mode32b = (!if_64b);
+	debug("%-45s : %d\n", "MODE32B", cfg.cn78xx.mode32b);
+
+	/* Restore the ECC configuration */
+	if (!sw_wl_hw_default)
+		cfg.cn78xx.ecc_ena = use_ecc;
+
+	lmc_wr(priv, CVMX_LMCX_CONFIG(if_num), cfg.u64);
+
+	return 0;
+}
+
+static void lmc_dll(struct ddr_priv *priv)
+{
+	union cvmx_lmcx_dll_ctl3 ddr_dll_ctl3;
+	int setting[9];
+	int i;
+
+	ddr_dll_ctl3.u64 = lmc_rd(priv, CVMX_LMCX_DLL_CTL3(if_num));
+
+	for (i = 0; i < 9; ++i) {
+		SET_DDR_DLL_CTL3(dll90_byte_sel, ENCODE_DLL90_BYTE_SEL(i));
+		lmc_wr(priv, CVMX_LMCX_DLL_CTL3(if_num), ddr_dll_ctl3.u64);
+		lmc_rd(priv, CVMX_LMCX_DLL_CTL3(if_num));
+		ddr_dll_ctl3.u64 = lmc_rd(priv, CVMX_LMCX_DLL_CTL3(if_num));
+		setting[i] = GET_DDR_DLL_CTL3(dll90_setting);
+		debug("%d. LMC%d_DLL_CTL3[%d] = %016llx %d\n", i, if_num,
+		      GET_DDR_DLL_CTL3(dll90_byte_sel), ddr_dll_ctl3.u64,
+		      setting[i]);
+	}
+
+	debug("N%d.LMC%d: %-36s : %5d %5d %5d %5d %5d %5d %5d %5d %5d\n",
+	      node, if_num, "DLL90 Setting 8:0",
+	      setting[8], setting[7], setting[6], setting[5], setting[4],
+	      setting[3], setting[2], setting[1], setting[0]);
+
+	process_custom_dll_offsets(priv, if_num, "ddr_dll_write_offset",
+				   c_cfg->dll_write_offset,
+				   "ddr%d_dll_write_offset_byte%d", 1);
+	process_custom_dll_offsets(priv, if_num, "ddr_dll_read_offset",
+				   c_cfg->dll_read_offset,
+				   "ddr%d_dll_read_offset_byte%d", 2);
+}
+
+#define SLOT_CTL_INCR(csr, chip, field, incr)				\
+	csr.chip.field = (csr.chip.field < (64 - incr)) ?		\
+		(csr.chip.field + incr) : 63
+
+#define INCR(csr, chip, field, incr)                                    \
+	csr.chip.field = (csr.chip.field < (64 - incr)) ?		\
+		(csr.chip.field + incr) : 63
+
+static void lmc_workaround_2(struct ddr_priv *priv)
+{
+	/* Workaround Errata 21063 */
+	if (octeon_is_cpuid(OCTEON_CN78XX) ||
+	    octeon_is_cpuid(OCTEON_CN70XX_PASS1_X)) {
+		union cvmx_lmcx_slot_ctl0 slot_ctl0;
+		union cvmx_lmcx_slot_ctl1 slot_ctl1;
+		union cvmx_lmcx_slot_ctl2 slot_ctl2;
+		union cvmx_lmcx_ext_config ext_cfg;
+
+		slot_ctl0.u64 = lmc_rd(priv, CVMX_LMCX_SLOT_CTL0(if_num));
+		slot_ctl1.u64 = lmc_rd(priv, CVMX_LMCX_SLOT_CTL1(if_num));
+		slot_ctl2.u64 = lmc_rd(priv, CVMX_LMCX_SLOT_CTL2(if_num));
+
+		ext_cfg.u64 = lmc_rd(priv, CVMX_LMCX_EXT_CONFIG(if_num));
+
+		/* When ext_cfg.s.read_ena_bprch is set add 1 */
+		if (ext_cfg.s.read_ena_bprch) {
+			SLOT_CTL_INCR(slot_ctl0, cn78xx, r2w_init, 1);
+			SLOT_CTL_INCR(slot_ctl0, cn78xx, r2w_l_init, 1);
+			SLOT_CTL_INCR(slot_ctl1, cn78xx, r2w_xrank_init, 1);
+			SLOT_CTL_INCR(slot_ctl2, cn78xx, r2w_xdimm_init, 1);
+		}
+
+		/* Always add 2 */
+		SLOT_CTL_INCR(slot_ctl1, cn78xx, w2r_xrank_init, 2);
+		SLOT_CTL_INCR(slot_ctl2, cn78xx, w2r_xdimm_init, 2);
+
+		lmc_wr(priv, CVMX_LMCX_SLOT_CTL0(if_num), slot_ctl0.u64);
+		lmc_wr(priv, CVMX_LMCX_SLOT_CTL1(if_num), slot_ctl1.u64);
+		lmc_wr(priv, CVMX_LMCX_SLOT_CTL2(if_num), slot_ctl2.u64);
+	}
+
+	/* Workaround Errata 21216 */
+	if (octeon_is_cpuid(OCTEON_CN78XX_PASS1_X) ||
+	    octeon_is_cpuid(OCTEON_CN70XX_PASS1_X)) {
+		union cvmx_lmcx_slot_ctl1 slot_ctl1;
+		union cvmx_lmcx_slot_ctl2 slot_ctl2;
+
+		slot_ctl1.u64 = lmc_rd(priv, CVMX_LMCX_SLOT_CTL1(if_num));
+		slot_ctl1.cn78xx.w2w_xrank_init =
+		    max(10, (int)slot_ctl1.cn78xx.w2w_xrank_init);
+		lmc_wr(priv, CVMX_LMCX_SLOT_CTL1(if_num), slot_ctl1.u64);
+
+		slot_ctl2.u64 = lmc_rd(priv, CVMX_LMCX_SLOT_CTL2(if_num));
+		slot_ctl2.cn78xx.w2w_xdimm_init =
+		    max(10, (int)slot_ctl2.cn78xx.w2w_xdimm_init);
+		lmc_wr(priv, CVMX_LMCX_SLOT_CTL2(if_num), slot_ctl2.u64);
+	}
+}
+
+static void lmc_final(struct ddr_priv *priv)
+{
+	/*
+	 * 4.8.11 Final LMC Initialization
+	 *
+	 * Early LMC initialization, LMC write-leveling, and LMC read-leveling
+	 * must be completed prior to starting this final LMC initialization.
+	 *
+	 * LMC hardware updates the LMC(0)_SLOT_CTL0, LMC(0)_SLOT_CTL1,
+	 * LMC(0)_SLOT_CTL2 CSRs with minimum values based on the selected
+	 * readleveling and write-leveling settings. Software should not write
+	 * the final LMC(0)_SLOT_CTL0, LMC(0)_SLOT_CTL1, and LMC(0)_SLOT_CTL2
+	 * values until after the final read-leveling and write-leveling
+	 * settings are written.
+	 *
+	 * Software must ensure the LMC(0)_SLOT_CTL0, LMC(0)_SLOT_CTL1, and
+	 * LMC(0)_SLOT_CTL2 CSR values are appropriate for this step. These CSRs
+	 * select the minimum gaps between read operations and write operations
+	 * of various types.
+	 *
+	 * Software must not reduce the values in these CSR fields below the
+	 * values previously selected by the LMC hardware (during write-leveling
+	 * and read-leveling steps above).
+	 *
+	 * All sections in this chapter may be used to derive proper settings
+	 * for these registers.
+	 *
+	 * For minimal read latency, L2C_CTL[EF_ENA,EF_CNT] should be programmed
+	 * properly. This should be done prior to the first read.
+	 */
+
+	/* Clear any residual ECC errors */
+	int num_tads = 1;
+	int tad;
+	int num_mcis = 1;
+	int mci;
+
+	if (octeon_is_cpuid(OCTEON_CN78XX)) {
+		num_tads = 8;
+		num_mcis = 4;
+	} else if (octeon_is_cpuid(OCTEON_CN70XX)) {
+		num_tads = 1;
+		num_mcis = 1;
+	} else if (octeon_is_cpuid(OCTEON_CN73XX) ||
+		   octeon_is_cpuid(OCTEON_CNF75XX)) {
+		num_tads = 4;
+		num_mcis = 3;
+	}
+
+	lmc_wr(priv, CVMX_LMCX_INT(if_num), -1ULL);
+	lmc_rd(priv, CVMX_LMCX_INT(if_num));
+
+	for (tad = 0; tad < num_tads; tad++) {
+		l2c_wr(priv, CVMX_L2C_TADX_INT(tad),
+		       l2c_rd(priv, CVMX_L2C_TADX_INT(tad)));
+		debug("%-45s : (%d) 0x%08llx\n", "CVMX_L2C_TAD_INT", tad,
+		      l2c_rd(priv, CVMX_L2C_TADX_INT(tad)));
+	}
+
+	for (mci = 0; mci < num_mcis; mci++) {
+		l2c_wr(priv, CVMX_L2C_MCIX_INT(mci),
+		       l2c_rd(priv, CVMX_L2C_MCIX_INT(mci)));
+		debug("%-45s : (%d) 0x%08llx\n", "L2C_MCI_INT", mci,
+		      l2c_rd(priv, CVMX_L2C_MCIX_INT(mci)));
+	}
+
+	debug("%-45s : 0x%08llx\n", "LMC_INT",
+	      lmc_rd(priv, CVMX_LMCX_INT(if_num)));
+}
+
+static void lmc_scrambling(struct ddr_priv *priv)
+{
+	// Make sure scrambling is disabled during init...
+	union cvmx_lmcx_control ctrl;
+	union cvmx_lmcx_scramble_cfg0 lmc_scramble_cfg0;
+	union cvmx_lmcx_scramble_cfg1 lmc_scramble_cfg1;
+	union cvmx_lmcx_scramble_cfg2 lmc_scramble_cfg2;
+	union cvmx_lmcx_ns_ctl lmc_ns_ctl;
+	int use_scramble = 0;	// default OFF
+	char *s;
+
+	ctrl.u64 = lmc_rd(priv, CVMX_LMCX_CONTROL(if_num));
+	lmc_scramble_cfg0.u64 = lmc_rd(priv, CVMX_LMCX_SCRAMBLE_CFG0(if_num));
+	lmc_scramble_cfg1.u64 = lmc_rd(priv, CVMX_LMCX_SCRAMBLE_CFG1(if_num));
+	lmc_scramble_cfg2.u64 = 0;	// quiet compiler
+	if (!octeon_is_cpuid(OCTEON_CN78XX_PASS1_X)) {
+		lmc_scramble_cfg2.u64 =
+		    lmc_rd(priv, CVMX_LMCX_SCRAMBLE_CFG2(if_num));
+	}
+	lmc_ns_ctl.u64 = lmc_rd(priv, CVMX_LMCX_NS_CTL(if_num));
+
+	s = lookup_env_ull(priv, "ddr_use_scramble");
+	if (s)
+		use_scramble = simple_strtoull(s, NULL, 0);
+
+	/* Generate random values if scrambling is needed */
+	if (use_scramble) {
+		lmc_scramble_cfg0.u64 = cvmx_rng_get_random64();
+		lmc_scramble_cfg1.u64 = cvmx_rng_get_random64();
+		lmc_scramble_cfg2.u64 = cvmx_rng_get_random64();
+		lmc_ns_ctl.s.ns_scramble_dis = 0;
+		lmc_ns_ctl.s.adr_offset = 0;
+		ctrl.s.scramble_ena = 1;
+	}
+
+	s = lookup_env_ull(priv, "ddr_scramble_cfg0");
+	if (s) {
+		lmc_scramble_cfg0.u64 = simple_strtoull(s, NULL, 0);
+		ctrl.s.scramble_ena = 1;
+	}
+	debug("%-45s : 0x%016llx\n", "LMC_SCRAMBLE_CFG0",
+	      lmc_scramble_cfg0.u64);
+
+	lmc_wr(priv, CVMX_LMCX_SCRAMBLE_CFG0(if_num), lmc_scramble_cfg0.u64);
+
+	s = lookup_env_ull(priv, "ddr_scramble_cfg1");
+	if (s) {
+		lmc_scramble_cfg1.u64 = simple_strtoull(s, NULL, 0);
+		ctrl.s.scramble_ena = 1;
+	}
+	debug("%-45s : 0x%016llx\n", "LMC_SCRAMBLE_CFG1",
+	      lmc_scramble_cfg1.u64);
+	lmc_wr(priv, CVMX_LMCX_SCRAMBLE_CFG1(if_num), lmc_scramble_cfg1.u64);
+
+	if (!octeon_is_cpuid(OCTEON_CN78XX_PASS1_X)) {
+		s = lookup_env_ull(priv, "ddr_scramble_cfg2");
+		if (s) {
+			lmc_scramble_cfg2.u64 = simple_strtoull(s, NULL, 0);
+			ctrl.s.scramble_ena = 1;
+		}
+		debug("%-45s : 0x%016llx\n", "LMC_SCRAMBLE_CFG2",
+		      lmc_scramble_cfg1.u64);
+		lmc_wr(priv, CVMX_LMCX_SCRAMBLE_CFG2(if_num),
+		       lmc_scramble_cfg2.u64);
+	}
+
+	s = lookup_env_ull(priv, "ddr_ns_ctl");
+	if (s)
+		lmc_ns_ctl.u64 = simple_strtoull(s, NULL, 0);
+	debug("%-45s : 0x%016llx\n", "LMC_NS_CTL", lmc_ns_ctl.u64);
+	lmc_wr(priv, CVMX_LMCX_NS_CTL(if_num), lmc_ns_ctl.u64);
+
+	lmc_wr(priv, CVMX_LMCX_CONTROL(if_num), ctrl.u64);
+}
+
+struct rl_score {
+	u64 setting;
+	int score;
+};
+
+static union cvmx_lmcx_rlevel_rankx rl_rank __section(".data");
+static union cvmx_lmcx_rlevel_ctl rl_ctl __section(".data");
+static unsigned char rodt_ctl __section(".data");
+
+static int rl_rodt_err __section(".data");
+static unsigned char rtt_nom __section(".data");
+static unsigned char rtt_idx __section(".data");
+static char min_rtt_nom_idx __section(".data");
+static char max_rtt_nom_idx __section(".data");
+static char min_rodt_ctl __section(".data");
+static char max_rodt_ctl __section(".data");
+static int rl_dbg_loops __section(".data");
+static unsigned char save_ddr2t __section(".data");
+static int rl_samples __section(".data");
+static char rl_compute __section(".data");
+static char saved_ddr__ptune __section(".data");
+static char saved_ddr__ntune __section(".data");
+static char rl_comp_offs __section(".data");
+static char saved_int_zqcs_dis __section(".data");
+static int max_adj_rl_del_inc __section(".data");
+static int print_nom_ohms __section(".data");
+static int rl_print __section(".data");
+
+#ifdef ENABLE_HARDCODED_RLEVEL
+static char part_number[21] __section(".data");
+#endif /* ENABLE_HARDCODED_RLEVEL */
+
+struct perfect_counts {
+	u16 count[9][32]; // 8+ECC by 64 values
+	u32 mask[9];      // 8+ECC, bitmask of perfect delays
+};
+
+static struct perfect_counts rank_perf[4] __section(".data");
+static struct perfect_counts rodt_perfect_counts __section(".data");
+static int pbm_lowsum_limit __section(".data");
+// FIXME: PBM skip for RODT 240 and 34
+static u32 pbm_rodt_skip __section(".data");
+
+// control rank majority processing
+static int disable_rank_majority __section(".data");
+
+// default to mask 11b ODDs for DDR4 (except 73xx), else DISABLE
+// for DDR3
+static int enable_rldelay_bump __section(".data");
+static int rldelay_bump_incr __section(".data");
+static int disable_rlv_bump_this_byte __section(".data");
+static u64 value_mask __section(".data");
+
+static struct rlevel_byte_data rl_byte[9] __section(".data");
+static int sample_loops __section(".data");
+static int max_samples __section(".data");
+static int rl_rank_errors __section(".data");
+static int rl_mask_err __section(".data");
+static int rl_nonseq_err __section(".data");
+static struct rlevel_bitmask rl_mask[9] __section(".data");
+static int rl_best_rank_score __section(".data");
+
+static int rodt_row_skip_mask __section(".data");
+
+static void rodt_loop(struct ddr_priv *priv, int rankx, struct rl_score
+		      rl_score[RTT_NOM_OHMS_COUNT][RODT_OHMS_COUNT][4])
+{
+	union cvmx_lmcx_comp_ctl2 cc2;
+	const int rl_separate_ab = 1;
+	int i;
+
+	rl_best_rank_score = DEFAULT_BEST_RANK_SCORE;
+	rl_rodt_err = 0;
+	cc2.u64 = lmc_rd(priv, CVMX_LMCX_COMP_CTL2(if_num));
+	cc2.cn78xx.rodt_ctl = rodt_ctl;
+	lmc_wr(priv, CVMX_LMCX_COMP_CTL2(if_num), cc2.u64);
+	cc2.u64 = lmc_rd(priv, CVMX_LMCX_COMP_CTL2(if_num));
+	udelay(1); /* Give it a little time to take affect */
+	if (rl_print > 1) {
+		debug("Read ODT_CTL                                  : 0x%x (%d ohms)\n",
+		      cc2.cn78xx.rodt_ctl,
+		      imp_val->rodt_ohms[cc2.cn78xx.rodt_ctl]);
+	}
+
+	memset(rl_byte, 0, sizeof(rl_byte));
+	memset(&rodt_perfect_counts, 0, sizeof(rodt_perfect_counts));
+
+	// when iter RODT is the target RODT, take more samples...
+	max_samples = rl_samples;
+	if (rodt_ctl == default_rodt_ctl)
+		max_samples += rl_samples + 1;
+
+	for (sample_loops = 0; sample_loops < max_samples; sample_loops++) {
+		int redoing_nonseq_errs = 0;
+
+		rl_mask_err = 0;
+
+		if (!(rl_separate_ab && spd_rdimm &&
+		      ddr_type == DDR4_DRAM)) {
+			/* Clear read-level delays */
+			lmc_wr(priv, CVMX_LMCX_RLEVEL_RANKX(rankx, if_num), 0);
+
+			/* read-leveling */
+			oct3_ddr3_seq(priv, 1 << rankx, if_num, 1);
+
+			do {
+				rl_rank.u64 =
+					lmc_rd(priv,
+					       CVMX_LMCX_RLEVEL_RANKX(rankx,
+								      if_num));
+			} while (rl_rank.cn78xx.status != 3);
+		}
+
+		rl_rank.u64 =
+			lmc_rd(priv, CVMX_LMCX_RLEVEL_RANKX(rankx, if_num));
+
+		// start bitmask interpretation block
+
+		memset(rl_mask, 0, sizeof(rl_mask));
+
+		if (rl_separate_ab && spd_rdimm && ddr_type == DDR4_DRAM) {
+			union cvmx_lmcx_rlevel_rankx rl_rank_aside;
+			union cvmx_lmcx_modereg_params0 mp0;
+
+			/* A-side */
+			mp0.u64 =
+				lmc_rd(priv, CVMX_LMCX_MODEREG_PARAMS0(if_num));
+			mp0.s.mprloc = 0; /* MPR Page 0 Location 0 */
+			lmc_wr(priv,
+			       CVMX_LMCX_MODEREG_PARAMS0(if_num),
+			       mp0.u64);
+
+			/* Clear read-level delays */
+			lmc_wr(priv, CVMX_LMCX_RLEVEL_RANKX(rankx, if_num), 0);
+
+			/* read-leveling */
+			oct3_ddr3_seq(priv, 1 << rankx, if_num, 1);
+
+			do {
+				rl_rank.u64 =
+					lmc_rd(priv,
+					       CVMX_LMCX_RLEVEL_RANKX(rankx,
+								      if_num));
+			} while (rl_rank.cn78xx.status != 3);
+
+			rl_rank.u64 =
+				lmc_rd(priv, CVMX_LMCX_RLEVEL_RANKX(rankx,
+								    if_num));
+
+			rl_rank_aside.u64 = rl_rank.u64;
+
+			rl_mask[0].bm = lmc_ddr3_rl_dbg_read(priv, if_num, 0);
+			rl_mask[1].bm = lmc_ddr3_rl_dbg_read(priv, if_num, 1);
+			rl_mask[2].bm = lmc_ddr3_rl_dbg_read(priv, if_num, 2);
+			rl_mask[3].bm = lmc_ddr3_rl_dbg_read(priv, if_num, 3);
+			rl_mask[8].bm = lmc_ddr3_rl_dbg_read(priv, if_num, 8);
+			/* A-side complete */
+
+			/* B-side */
+			mp0.u64 =
+				lmc_rd(priv, CVMX_LMCX_MODEREG_PARAMS0(if_num));
+			mp0.s.mprloc = 3; /* MPR Page 0 Location 3 */
+			lmc_wr(priv, CVMX_LMCX_MODEREG_PARAMS0(if_num),
+			       mp0.u64);
+
+			/* Clear read-level delays */
+			lmc_wr(priv, CVMX_LMCX_RLEVEL_RANKX(rankx, if_num), 0);
+
+			/* read-leveling */
+			oct3_ddr3_seq(priv, 1 << rankx, if_num, 1);
+
+			do {
+				rl_rank.u64 =
+					lmc_rd(priv,
+					       CVMX_LMCX_RLEVEL_RANKX(rankx,
+								      if_num));
+			} while (rl_rank.cn78xx.status != 3);
+
+			rl_rank.u64 =
+				lmc_rd(priv, CVMX_LMCX_RLEVEL_RANKX(rankx,
+								    if_num));
+
+			rl_mask[4].bm = lmc_ddr3_rl_dbg_read(priv, if_num, 4);
+			rl_mask[5].bm = lmc_ddr3_rl_dbg_read(priv, if_num, 5);
+			rl_mask[6].bm = lmc_ddr3_rl_dbg_read(priv, if_num, 6);
+			rl_mask[7].bm = lmc_ddr3_rl_dbg_read(priv, if_num, 7);
+			/* B-side complete */
+
+			upd_rl_rank(&rl_rank, 0, rl_rank_aside.s.byte0);
+			upd_rl_rank(&rl_rank, 1, rl_rank_aside.s.byte1);
+			upd_rl_rank(&rl_rank, 2, rl_rank_aside.s.byte2);
+			upd_rl_rank(&rl_rank, 3, rl_rank_aside.s.byte3);
+			/* ECC A-side */
+			upd_rl_rank(&rl_rank, 8, rl_rank_aside.s.byte8);
+
+			mp0.u64 =
+				lmc_rd(priv, CVMX_LMCX_MODEREG_PARAMS0(if_num));
+			mp0.s.mprloc = 0; /* MPR Page 0 Location 0 */
+			lmc_wr(priv, CVMX_LMCX_MODEREG_PARAMS0(if_num),
+			       mp0.u64);
+		}
+
+		/*
+		 * Evaluate the quality of the read-leveling delays from the
+		 * bitmasks. Also save off a software computed read-leveling
+		 * mask that may be used later to qualify the delay results
+		 * from Octeon.
+		 */
+		for (i = 0; i < (8 + ecc_ena); ++i) {
+			int bmerr;
+
+			if (!(if_bytemask & (1 << i)))
+				continue;
+			if (!(rl_separate_ab && spd_rdimm &&
+			      ddr_type == DDR4_DRAM)) {
+				rl_mask[i].bm =
+					lmc_ddr3_rl_dbg_read(priv, if_num, i);
+			}
+			bmerr = validate_ddr3_rlevel_bitmask(&rl_mask[i],
+							     ddr_type);
+			rl_mask[i].errs = bmerr;
+			rl_mask_err += bmerr;
+			// count only the "perfect" bitmasks
+			if (ddr_type == DDR4_DRAM && !bmerr) {
+				int delay;
+				// FIXME: for now, simple filtering:
+				// do NOT count PBMs for RODTs in skip mask
+				if ((1U << rodt_ctl) & pbm_rodt_skip)
+					continue;
+				// FIXME: could optimize this a bit?
+				delay = get_rl_rank(&rl_rank, i);
+				rank_perf[rankx].count[i][delay] += 1;
+				rank_perf[rankx].mask[i] |=
+					(1ULL << delay);
+				rodt_perfect_counts.count[i][delay] += 1;
+				rodt_perfect_counts.mask[i] |= (1ULL << delay);
+			}
+		}
+
+		/* Set delays for unused bytes to match byte 0. */
+		for (i = 0; i < 9; ++i) {
+			if (if_bytemask & (1 << i))
+				continue;
+			upd_rl_rank(&rl_rank, i, rl_rank.s.byte0);
+		}
+
+		/*
+		 * Save a copy of the byte delays in physical
+		 * order for sequential evaluation.
+		 */
+		unpack_rlevel_settings(if_bytemask, ecc_ena, rl_byte, rl_rank);
+
+	redo_nonseq_errs:
+
+		rl_nonseq_err  = 0;
+		if (!disable_sequential_delay_check) {
+			for (i = 0; i < 9; ++i)
+				rl_byte[i].sqerrs = 0;
+
+			if ((if_bytemask & 0xff) == 0xff) {
+				/*
+				 * Evaluate delay sequence across the whole
+				 * range of bytes for standard dimms.
+				 */
+				/* 1=RDIMM, 5=Mini-RDIMM */
+				if (spd_dimm_type == 1 || spd_dimm_type == 5) {
+					int reg_adj_del = abs(rl_byte[4].delay -
+							      rl_byte[5].delay);
+
+					/*
+					 * Registered dimm topology routes
+					 * from the center.
+					 */
+					rl_nonseq_err +=
+						nonseq_del(rl_byte, 0,
+							   3 + ecc_ena,
+							   max_adj_rl_del_inc);
+					rl_nonseq_err +=
+						nonseq_del(rl_byte, 5,
+							   7 + ecc_ena,
+							   max_adj_rl_del_inc);
+					// byte 5 sqerrs never gets cleared
+					// for RDIMMs
+					rl_byte[5].sqerrs = 0;
+					if (reg_adj_del > 1) {
+						/*
+						 * Assess proximity of bytes on
+						 * opposite sides of register
+						 */
+						rl_nonseq_err += (reg_adj_del -
+								  1) *
+							RLEVEL_ADJACENT_DELAY_ERROR;
+						// update byte 5 error
+						rl_byte[5].sqerrs +=
+							(reg_adj_del - 1) *
+							RLEVEL_ADJACENT_DELAY_ERROR;
+					}
+				}
+
+				/* 2=UDIMM, 6=Mini-UDIMM */
+				if (spd_dimm_type == 2 || spd_dimm_type == 6) {
+					/*
+					 * Unbuffered dimm topology routes
+					 * from end to end.
+					 */
+					rl_nonseq_err += nonseq_del(rl_byte, 0,
+								    7 + ecc_ena,
+								    max_adj_rl_del_inc);
+				}
+			} else {
+				rl_nonseq_err += nonseq_del(rl_byte, 0,
+							    3 + ecc_ena,
+							    max_adj_rl_del_inc);
+			}
+		} /* if (! disable_sequential_delay_check) */
+
+		rl_rank_errors = rl_mask_err + rl_nonseq_err;
+
+		// print original sample here only if we are not really
+		// averaging or picking best
+		// also do not print if we were redoing the NONSEQ score
+		// for using COMPUTED
+		if (!redoing_nonseq_errs && rl_samples < 2) {
+			if (rl_print > 1) {
+				display_rl_bm(if_num, rankx, rl_mask, ecc_ena);
+				display_rl_bm_scores(if_num, rankx, rl_mask,
+						     ecc_ena);
+				display_rl_seq_scores(if_num, rankx, rl_byte,
+						      ecc_ena);
+			}
+			display_rl_with_score(if_num, rl_rank, rankx,
+					      rl_rank_errors);
+		}
+
+		if (rl_compute) {
+			if (!redoing_nonseq_errs) {
+				/* Recompute the delays based on the bitmask */
+				for (i = 0; i < (8 + ecc_ena); ++i) {
+					if (!(if_bytemask & (1 << i)))
+						continue;
+
+					upd_rl_rank(&rl_rank, i,
+						    compute_ddr3_rlevel_delay(
+							    rl_mask[i].mstart,
+							    rl_mask[i].width,
+							    rl_ctl));
+				}
+
+				/*
+				 * Override the copy of byte delays with the
+				 * computed results.
+				 */
+				unpack_rlevel_settings(if_bytemask, ecc_ena,
+						       rl_byte, rl_rank);
+
+				redoing_nonseq_errs = 1;
+				goto redo_nonseq_errs;
+
+			} else {
+				/*
+				 * now print this if already printed the
+				 * original sample
+				 */
+				if (rl_samples < 2 || rl_print) {
+					display_rl_with_computed(if_num,
+								 rl_rank, rankx,
+								 rl_rank_errors);
+				}
+			}
+		} /* if (rl_compute) */
+
+		// end bitmask interpretation block
+
+		// if it is a better (lower) score, then  keep it
+		if (rl_rank_errors < rl_best_rank_score) {
+			rl_best_rank_score = rl_rank_errors;
+
+			// save the new best delays and best errors
+			for (i = 0; i < (8 + ecc_ena); ++i) {
+				rl_byte[i].best = rl_byte[i].delay;
+				rl_byte[i].bestsq = rl_byte[i].sqerrs;
+				// save bitmasks and their scores as well
+				// xlate UNPACKED index to PACKED index to
+				// get from rl_mask
+				rl_byte[i].bm = rl_mask[XUP(i, !!ecc_ena)].bm;
+				rl_byte[i].bmerrs =
+					rl_mask[XUP(i, !!ecc_ena)].errs;
+			}
+		}
+
+		rl_rodt_err += rl_rank_errors;
+	}
+
+	/* We recorded the best score across the averaging loops */
+	rl_score[rtt_nom][rodt_ctl][rankx].score = rl_best_rank_score;
+
+	/*
+	 * Restore the delays from the best fields that go with the best
+	 * score
+	 */
+	for (i = 0; i < 9; ++i) {
+		rl_byte[i].delay = rl_byte[i].best;
+		rl_byte[i].sqerrs = rl_byte[i].bestsq;
+	}
+
+	rl_rank.u64 = lmc_rd(priv, CVMX_LMCX_RLEVEL_RANKX(rankx, if_num));
+
+	pack_rlevel_settings(if_bytemask, ecc_ena, rl_byte, &rl_rank);
+
+	if (rl_samples > 1) {
+		// restore the "best" bitmasks and their scores for printing
+		for (i = 0; i < 9; ++i) {
+			if ((if_bytemask & (1 << i)) == 0)
+				continue;
+			// xlate PACKED index to UNPACKED index to get from
+			// rl_byte
+			rl_mask[i].bm   = rl_byte[XPU(i, !!ecc_ena)].bm;
+			rl_mask[i].errs = rl_byte[XPU(i, !!ecc_ena)].bmerrs;
+		}
+
+		// maybe print bitmasks/scores here
+		if (rl_print > 1) {
+			display_rl_bm(if_num, rankx, rl_mask, ecc_ena);
+			display_rl_bm_scores(if_num, rankx, rl_mask, ecc_ena);
+			display_rl_seq_scores(if_num, rankx, rl_byte, ecc_ena);
+
+			display_rl_with_rodt(if_num, rl_rank, rankx,
+					     rl_score[rtt_nom][rodt_ctl][rankx].score,
+					     print_nom_ohms,
+					     imp_val->rodt_ohms[rodt_ctl],
+					     WITH_RODT_BESTSCORE);
+
+			debug("-----------\n");
+		}
+	}
+
+	rl_score[rtt_nom][rodt_ctl][rankx].setting = rl_rank.u64;
+
+	// print out the PBMs for the current RODT
+	if (ddr_type == DDR4_DRAM && rl_print > 1) { // verbosity?
+		// FIXME: change verbosity level after debug complete...
+
+		for (i = 0; i < 9; i++) {
+			u64 temp_mask;
+			int num_values;
+
+			// FIXME: PBM skip for RODTs in mask
+			if ((1U << rodt_ctl) & pbm_rodt_skip)
+				continue;
+
+			temp_mask = rodt_perfect_counts.mask[i];
+			num_values = __builtin_popcountll(temp_mask);
+			i = __builtin_ffsll(temp_mask) - 1;
+
+			debug("N%d.LMC%d.R%d: PERFECT: RODT %3d: Byte %d: mask 0x%02llx (%d): ",
+			      node, if_num, rankx,
+			      imp_val->rodt_ohms[rodt_ctl],
+			      i, temp_mask >> i, num_values);
+
+			while (temp_mask != 0) {
+				i = __builtin_ffsll(temp_mask) - 1;
+				debug("%2d(%2d) ", i,
+				      rodt_perfect_counts.count[i][i]);
+				temp_mask &= ~(1UL << i);
+			} /* while (temp_mask != 0) */
+			debug("\n");
+		}
+	}
+}
+
+static void rank_major_loop(struct ddr_priv *priv, int rankx, struct rl_score
+			    rl_score[RTT_NOM_OHMS_COUNT][RODT_OHMS_COUNT][4])
+{
+	/* Start with an arbitrarily high score */
+	int best_rank_score = DEFAULT_BEST_RANK_SCORE;
+	int best_rank_rtt_nom = 0;
+	int best_rank_ctl = 0;
+	int best_rank_ohms = 0;
+	int best_rankx = 0;
+	int dimm_rank_mask;
+	int max_rank_score;
+	union cvmx_lmcx_rlevel_rankx saved_rl_rank;
+	int next_ohms;
+	int orankx;
+	int next_score = 0;
+	int best_byte, new_byte, temp_byte, orig_best_byte;
+	int rank_best_bytes[9];
+	int byte_sh;
+	int avg_byte;
+	int avg_diff;
+	int i;
+
+	if (!(rank_mask & (1 << rankx)))
+		return;
+
+	// some of the rank-related loops below need to operate only on
+	// the ranks of a single DIMM,
+	// so create a mask for their use here
+	if (num_ranks == 4) {
+		dimm_rank_mask = rank_mask; // should be 1111
+	} else {
+		dimm_rank_mask = rank_mask & 3; // should be 01 or 11
+		if (rankx >= 2) {
+			// doing a rank on the second DIMM, should be
+			// 0100 or 1100
+			dimm_rank_mask <<= 2;
+		}
+	}
+	debug("DIMM rank mask: 0x%x, rank mask: 0x%x, rankx: %d\n",
+	      dimm_rank_mask, rank_mask, rankx);
+
+	// this is the start of the BEST ROW SCORE LOOP
+
+	for (rtt_idx = min_rtt_nom_idx; rtt_idx <= max_rtt_nom_idx; ++rtt_idx) {
+		rtt_nom = imp_val->rtt_nom_table[rtt_idx];
+
+		debug("N%d.LMC%d.R%d: starting RTT_NOM %d (%d)\n",
+		      node, if_num, rankx, rtt_nom,
+		      imp_val->rtt_nom_ohms[rtt_nom]);
+
+		for (rodt_ctl = max_rodt_ctl; rodt_ctl >= min_rodt_ctl;
+		     --rodt_ctl) {
+			next_ohms = imp_val->rodt_ohms[rodt_ctl];
+
+			// skip RODT rows in mask, but *NOT* rows with too
+			// high a score;
+			// we will not use the skipped ones for printing or
+			// evaluating, but we need to allow all the
+			// non-skipped ones to be candidates for "best"
+			if (((1 << rodt_ctl) & rodt_row_skip_mask) != 0) {
+				debug("N%d.LMC%d.R%d: SKIPPING rodt:%d (%d) with rank_score:%d\n",
+				      node, if_num, rankx, rodt_ctl,
+				      next_ohms, next_score);
+				continue;
+			}
+
+			// this is ROFFIX-0528
+			for (orankx = 0; orankx < dimm_count * 4; orankx++) {
+				// stay on the same DIMM
+				if (!(dimm_rank_mask & (1 << orankx)))
+					continue;
+
+				next_score = rl_score[rtt_nom][rodt_ctl][orankx].score;
+
+				// always skip a higher score
+				if (next_score > best_rank_score)
+					continue;
+
+				// if scores are equal
+				if (next_score == best_rank_score) {
+					// always skip lower ohms
+					if (next_ohms < best_rank_ohms)
+						continue;
+
+					// if same ohms
+					if (next_ohms == best_rank_ohms) {
+						// always skip the other rank(s)
+						if (orankx != rankx)
+							continue;
+					}
+					// else next_ohms are greater,
+					// always choose it
+				}
+				// else next_score is less than current best,
+				// so always choose it
+				debug("N%d.LMC%d.R%d: new best score: rank %d, rodt %d(%3d), new best %d, previous best %d(%d)\n",
+				      node, if_num, rankx, orankx, rodt_ctl, next_ohms, next_score,
+				      best_rank_score, best_rank_ohms);
+				best_rank_score	    = next_score;
+				best_rank_rtt_nom   = rtt_nom;
+				//best_rank_nom_ohms  = rtt_nom_ohms;
+				best_rank_ctl       = rodt_ctl;
+				best_rank_ohms      = next_ohms;
+				best_rankx          = orankx;
+				rl_rank.u64 =
+					rl_score[rtt_nom][rodt_ctl][orankx].setting;
+			}
+		}
+	}
+
+	// this is the end of the BEST ROW SCORE LOOP
+
+	// DANGER, Will Robinson!! Abort now if we did not find a best
+	// score at all...
+	if (best_rank_score == DEFAULT_BEST_RANK_SCORE) {
+		printf("N%d.LMC%d.R%d: WARNING: no best rank score found - resetting node...\n",
+		       node, if_num, rankx);
+		mdelay(500);
+		do_reset(NULL, 0, 0, NULL);
+	}
+
+	// FIXME: relative now, but still arbitrary...
+	max_rank_score = best_rank_score;
+	if (ddr_type == DDR4_DRAM) {
+		// halve the range if 2 DIMMs unless they are single rank...
+		max_rank_score += (MAX_RANK_SCORE_LIMIT / ((num_ranks > 1) ?
+							   dimm_count : 1));
+	} else {
+		// Since DDR3 typically has a wider score range,
+		// keep more of them always
+		max_rank_score += MAX_RANK_SCORE_LIMIT;
+	}
+
+	if (!ecc_ena) {
+		/* ECC is not used */
+		rl_rank.s.byte8 = rl_rank.s.byte0;
+	}
+
+	// at the end, write the best row settings to the current rank
+	lmc_wr(priv, CVMX_LMCX_RLEVEL_RANKX(rankx, if_num), rl_rank.u64);
+	rl_rank.u64 = lmc_rd(priv, CVMX_LMCX_RLEVEL_RANKX(rankx, if_num));
+
+	saved_rl_rank.u64 = rl_rank.u64;
+
+	// this is the start of the PRINT LOOP
+	int pass;
+
+	// for pass==0, print current rank, pass==1 print other rank(s)
+	// this is done because we want to show each ranks RODT values
+	// together, not interlaced
+	// keep separates for ranks - pass=0 target rank, pass=1 other
+	// rank on DIMM
+	int mask_skipped[2] = {0, 0};
+	int score_skipped[2] = {0, 0};
+	int selected_rows[2] = {0, 0};
+	int zero_scores[2] = {0, 0};
+	for (pass = 0; pass < 2; pass++) {
+		for (orankx = 0; orankx < dimm_count * 4; orankx++) {
+			// stay on the same DIMM
+			if (!(dimm_rank_mask & (1 << orankx)))
+				continue;
+
+			if ((pass == 0 && orankx != rankx) ||
+			    (pass != 0 && orankx == rankx))
+				continue;
+
+			for (rtt_idx = min_rtt_nom_idx;
+			     rtt_idx <= max_rtt_nom_idx; ++rtt_idx) {
+				rtt_nom = imp_val->rtt_nom_table[rtt_idx];
+				if (dyn_rtt_nom_mask == 0) {
+					print_nom_ohms = -1;
+				} else {
+					print_nom_ohms =
+						imp_val->rtt_nom_ohms[rtt_nom];
+				}
+
+				// cycle through all the RODT values...
+				for (rodt_ctl = max_rodt_ctl;
+				     rodt_ctl >= min_rodt_ctl; --rodt_ctl) {
+					union cvmx_lmcx_rlevel_rankx
+						temp_rl_rank;
+					int temp_score =
+						rl_score[rtt_nom][rodt_ctl][orankx].score;
+					int skip_row;
+
+					temp_rl_rank.u64 =
+						rl_score[rtt_nom][rodt_ctl][orankx].setting;
+
+					// skip RODT rows in mask, or rows
+					// with too high a score;
+					// we will not use them for printing
+					// or evaluating...
+					if ((1 << rodt_ctl) &
+					    rodt_row_skip_mask) {
+						skip_row = WITH_RODT_SKIPPING;
+						++mask_skipped[pass];
+					} else if (temp_score >
+						   max_rank_score) {
+						skip_row = WITH_RODT_SKIPPING;
+						++score_skipped[pass];
+					} else {
+						skip_row = WITH_RODT_BLANK;
+						++selected_rows[pass];
+						if (temp_score == 0)
+							++zero_scores[pass];
+					}
+
+					// identify and print the BEST ROW
+					// when it comes up
+					if (skip_row == WITH_RODT_BLANK &&
+					    best_rankx == orankx &&
+					    best_rank_rtt_nom == rtt_nom &&
+					    best_rank_ctl == rodt_ctl)
+						skip_row = WITH_RODT_BESTROW;
+
+					if (rl_print) {
+						display_rl_with_rodt(if_num,
+								     temp_rl_rank, orankx, temp_score,
+								     print_nom_ohms,
+								     imp_val->rodt_ohms[rodt_ctl],
+								     skip_row);
+					}
+				}
+			}
+		}
+	}
+	debug("N%d.LMC%d.R%d: RLROWS: selected %d+%d, zero_scores %d+%d, mask_skipped %d+%d, score_skipped %d+%d\n",
+	      node, if_num, rankx, selected_rows[0], selected_rows[1],
+	      zero_scores[0], zero_scores[1], mask_skipped[0], mask_skipped[1],
+	      score_skipped[0], score_skipped[1]);
+	// this is the end of the PRINT LOOP
+
+	// now evaluate which bytes need adjusting
+	// collect the new byte values; first init with current best for
+	// neighbor use
+	for (i = 0, byte_sh = 0; i < 8 + ecc_ena; i++, byte_sh += 6) {
+		rank_best_bytes[i] = (int)(rl_rank.u64 >> byte_sh) &
+			RLEVEL_BYTE_MSK;
+	}
+
+	// this is the start of the BEST BYTE LOOP
+
+	for (i = 0, byte_sh = 0; i < 8 + ecc_ena; i++, byte_sh += 6) {
+		int sum = 0, count = 0;
+		int count_less = 0, count_same = 0, count_more = 0;
+		int count_byte; // save the value we counted around
+		// for rank majority use
+		int rank_less = 0, rank_same = 0, rank_more = 0;
+		int neighbor;
+		int neigh_byte;
+
+		best_byte = rank_best_bytes[i];
+		orig_best_byte = rank_best_bytes[i];
+
+		// this is the start of the BEST BYTE AVERAGING LOOP
+
+		// validate the initial "best" byte by looking at the
+		// average of the unskipped byte-column entries
+		// we want to do this before we go further, so we can
+		// try to start with a better initial value
+		// this is the so-called "BESTBUY" patch set
+
+		for (rtt_idx = min_rtt_nom_idx; rtt_idx <= max_rtt_nom_idx;
+		     ++rtt_idx) {
+			rtt_nom = imp_val->rtt_nom_table[rtt_idx];
+
+			for (rodt_ctl = max_rodt_ctl; rodt_ctl >= min_rodt_ctl;
+			     --rodt_ctl) {
+				union cvmx_lmcx_rlevel_rankx temp_rl_rank;
+				int temp_score;
+
+				// average over all the ranks
+				for (orankx = 0; orankx < dimm_count * 4;
+				     orankx++) {
+					// stay on the same DIMM
+					if (!(dimm_rank_mask & (1 << orankx)))
+						continue;
+
+					temp_score =
+						rl_score[rtt_nom][rodt_ctl][orankx].score;
+					// skip RODT rows in mask, or rows with
+					// too high a score;
+					// we will not use them for printing or
+					// evaluating...
+
+					if (!((1 << rodt_ctl) &
+					      rodt_row_skip_mask) &&
+					    temp_score <= max_rank_score) {
+						temp_rl_rank.u64 =
+							rl_score[rtt_nom][rodt_ctl][orankx].setting;
+						temp_byte =
+							(int)(temp_rl_rank.u64 >> byte_sh) &
+							RLEVEL_BYTE_MSK;
+						sum += temp_byte;
+						count++;
+					}
+				}
+			}
+		}
+
+		// this is the end of the BEST BYTE AVERAGING LOOP
+
+		// FIXME: validate count and sum??
+		avg_byte = (int)divide_nint(sum, count);
+		avg_diff = best_byte - avg_byte;
+		new_byte = best_byte;
+		if (avg_diff != 0) {
+			// bump best up/dn by 1, not necessarily all the
+			// way to avg
+			new_byte = best_byte + ((avg_diff > 0) ? -1 : 1);
+		}
+
+		if (rl_print) {
+			debug("N%d.LMC%d.R%d: START:   Byte %d: best %d is different by %d from average %d, using %d.\n",
+			      node, if_num, rankx,
+			      i, best_byte, avg_diff, avg_byte, new_byte);
+		}
+		best_byte = new_byte;
+		count_byte = new_byte; // save the value we will count around
+
+		// At this point best_byte is either:
+		// 1. the original byte-column value from the best scoring
+		//    RODT row, OR
+		// 2. that value bumped toward the average of all the
+		//    byte-column values
+		//
+		// best_byte will not change from here on...
+
+		// this is the start of the BEST BYTE COUNTING LOOP
+
+		// NOTE: we do this next loop separately from above, because
+		// we count relative to "best_byte"
+		// which may have been modified by the above averaging
+		// operation...
+
+		for (rtt_idx = min_rtt_nom_idx; rtt_idx <= max_rtt_nom_idx;
+		     ++rtt_idx) {
+			rtt_nom = imp_val->rtt_nom_table[rtt_idx];
+
+			for (rodt_ctl = max_rodt_ctl; rodt_ctl >= min_rodt_ctl;
+			     --rodt_ctl) {
+				union cvmx_lmcx_rlevel_rankx temp_rl_rank;
+				int temp_score;
+
+				for (orankx = 0; orankx < dimm_count * 4;
+				     orankx++) { // count over all the ranks
+					// stay on the same DIMM
+					if (!(dimm_rank_mask & (1 << orankx)))
+						continue;
+
+					temp_score =
+						rl_score[rtt_nom][rodt_ctl][orankx].score;
+					// skip RODT rows in mask, or rows
+					// with too high a score;
+					// we will not use them for printing
+					// or evaluating...
+					if (((1 << rodt_ctl) &
+					     rodt_row_skip_mask) ||
+					    temp_score > max_rank_score)
+						continue;
+
+					temp_rl_rank.u64 =
+						rl_score[rtt_nom][rodt_ctl][orankx].setting;
+					temp_byte = (temp_rl_rank.u64 >>
+						     byte_sh) & RLEVEL_BYTE_MSK;
+
+					if (temp_byte == 0)
+						;  // do not count it if illegal
+					else if (temp_byte == best_byte)
+						count_same++;
+					else if (temp_byte == best_byte - 1)
+						count_less++;
+					else if (temp_byte == best_byte + 1)
+						count_more++;
+					// else do not count anything more
+					// than 1 away from the best
+
+					// no rank counting if disabled
+					if (disable_rank_majority)
+						continue;
+
+					// FIXME? count is relative to
+					// best_byte; should it be rank-based?
+					// rank counts only on main rank
+					if (orankx != rankx)
+						continue;
+					else if (temp_byte == best_byte)
+						rank_same++;
+					else if (temp_byte == best_byte - 1)
+						rank_less++;
+					else if (temp_byte == best_byte + 1)
+						rank_more++;
+				}
+			}
+		}
+
+		if (rl_print) {
+			debug("N%d.LMC%d.R%d: COUNT:   Byte %d: orig %d now %d, more %d same %d less %d (%d/%d/%d)\n",
+			      node, if_num, rankx,
+			      i, orig_best_byte, best_byte,
+			      count_more, count_same, count_less,
+			      rank_more, rank_same, rank_less);
+		}
+
+		// this is the end of the BEST BYTE COUNTING LOOP
+
+		// choose the new byte value
+		// we need to check that there is no gap greater than 2
+		// between adjacent bytes (adjacency depends on DIMM type)
+		// use the neighbor value to help decide
+		// initially, the rank_best_bytes[] will contain values from
+		// the chosen lowest score rank
+		new_byte = 0;
+
+		// neighbor is index-1 unless we are index 0 or index 8 (ECC)
+		neighbor = (i == 8) ? 3 : ((i == 0) ? 1 : i - 1);
+		neigh_byte = rank_best_bytes[neighbor];
+
+		// can go up or down or stay the same, so look at a numeric
+		// average to help
+		new_byte = (int)divide_nint(((count_more * (best_byte + 1)) +
+					     (count_same * (best_byte + 0)) +
+					     (count_less * (best_byte - 1))),
+					    max(1, (count_more + count_same +
+						    count_less)));
+
+		// use neighbor to help choose with average
+		if (i > 0 && (abs(neigh_byte - new_byte) > 2) &&
+		    !disable_sequential_delay_check) {
+			// but not for byte 0
+			int avg_pick = new_byte;
+
+			if ((new_byte - best_byte) != 0) {
+				// back to best, average did not get better
+				new_byte = best_byte;
+			} else {
+				// avg was the same, still too far, now move
+				// it towards the neighbor
+				new_byte += (neigh_byte > new_byte) ? 1 : -1;
+			}
+
+			if (rl_print) {
+				debug("N%d.LMC%d.R%d: AVERAGE: Byte %d: neighbor %d too different %d from average %d, picking %d.\n",
+				      node, if_num, rankx,
+				      i, neighbor, neigh_byte, avg_pick,
+				      new_byte);
+			}
+		} else {
+			// NOTE:
+			// For now, we let the neighbor processing above trump
+			// the new simple majority processing here.
+			// This is mostly because we have seen no smoking gun
+			// for a neighbor bad choice (yet?).
+			// Also note that we will ALWAYS be using byte 0
+			// majority, because of the if clause above.
+
+			// majority is dependent on the counts, which are
+			// relative to best_byte, so start there
+			int maj_byte = best_byte;
+			int rank_maj;
+			int rank_sum;
+
+			if (count_more > count_same &&
+			    count_more > count_less) {
+				maj_byte++;
+			} else if (count_less > count_same &&
+				   count_less > count_more) {
+				maj_byte--;
+			}
+
+			if (maj_byte != new_byte) {
+				// print only when majority choice is
+				// different from average
+				if (rl_print) {
+					debug("N%d.LMC%d.R%d: MAJORTY: Byte %d: picking majority of %d over average %d.\n",
+					      node, if_num, rankx, i, maj_byte,
+					      new_byte);
+				}
+				new_byte = maj_byte;
+			} else {
+				if (rl_print) {
+					debug("N%d.LMC%d.R%d: AVERAGE: Byte %d: picking average of %d.\n",
+					      node, if_num, rankx, i, new_byte);
+				}
+			}
+
+			if (!disable_rank_majority) {
+				// rank majority is dependent on the rank
+				// counts, which are relative to best_byte,
+				// so start there, and adjust according to the
+				// rank counts majority
+				rank_maj = best_byte;
+				if (rank_more > rank_same &&
+				    rank_more > rank_less) {
+					rank_maj++;
+				} else if (rank_less > rank_same &&
+					   rank_less > rank_more) {
+					rank_maj--;
+				}
+				rank_sum = rank_more + rank_same + rank_less;
+
+				// now, let rank majority possibly rule over
+				// the current new_byte however we got it
+				if (rank_maj != new_byte) { // only if different
+					// Here is where we decide whether to
+					// completely apply RANK_MAJORITY or not
+					// ignore if less than
+					if (rank_maj < new_byte) {
+						if (rl_print) {
+							debug("N%d.LMC%d.R%d: RANKMAJ: Byte %d: LESS: NOT using %d over %d.\n",
+							      node, if_num,
+							      rankx, i,
+							      rank_maj,
+							      new_byte);
+						}
+					} else {
+						// For the moment, we do it
+						// ONLY when running 2-slot
+						// configs
+						//  OR when rank_sum is big
+						// enough
+						if (dimm_count > 1 ||
+						    rank_sum > 2) {
+							// print only when rank
+							// majority choice is
+							// selected
+							if (rl_print) {
+								debug("N%d.LMC%d.R%d: RANKMAJ: Byte %d: picking %d over %d.\n",
+								      node,
+								      if_num,
+								      rankx,
+								      i,
+								      rank_maj,
+								      new_byte);
+							}
+							new_byte = rank_maj;
+						} else {
+							// FIXME: print some
+							// info when we could
+							// have chosen RANKMAJ
+							// but did not
+							if (rl_print) {
+								debug("N%d.LMC%d.R%d: RANKMAJ: Byte %d: NOT using %d over %d (best=%d,sum=%d).\n",
+								      node,
+								      if_num,
+								      rankx,
+								      i,
+								      rank_maj,
+								      new_byte,
+								      best_byte,
+								      rank_sum);
+							}
+						}
+					}
+				}
+			} /* if (!disable_rank_majority) */
+		}
+		// one last check:
+		// if new_byte is still count_byte, BUT there was no count
+		// for that value, DO SOMETHING!!!
+		// FIXME: go back to original best byte from the best row
+		if (new_byte == count_byte && count_same == 0) {
+			new_byte = orig_best_byte;
+			if (rl_print) {
+				debug("N%d.LMC%d.R%d: FAILSAF: Byte %d: going back to original %d.\n",
+				      node, if_num, rankx, i, new_byte);
+			}
+		}
+		// Look at counts for "perfect" bitmasks (PBMs) if we had
+		// any for this byte-lane.
+		// Remember, we only counted for DDR4, so zero means none
+		// or DDR3, and we bypass this...
+		value_mask = rank_perf[rankx].mask[i];
+		disable_rlv_bump_this_byte = 0;
+
+		if (value_mask != 0 && rl_ctl.cn78xx.offset == 1) {
+			int i, delay_count, delay_max = 0, del_val = 0;
+			int num_values = __builtin_popcountll(value_mask);
+			int sum_counts = 0;
+			u64 temp_mask = value_mask;
+
+			disable_rlv_bump_this_byte = 1;
+			i = __builtin_ffsll(temp_mask) - 1;
+			if (rl_print)
+				debug("N%d.LMC%d.R%d: PERFECT: Byte %d: OFF1: mask 0x%02llx (%d): ",
+				      node, if_num, rankx, i, value_mask >> i,
+				      num_values);
+
+			while (temp_mask != 0) {
+				i = __builtin_ffsll(temp_mask) - 1;
+				delay_count = rank_perf[rankx].count[i][i];
+				sum_counts += delay_count;
+				if (rl_print)
+					debug("%2d(%2d) ", i, delay_count);
+				if (delay_count >= delay_max) {
+					delay_max = delay_count;
+					del_val = i;
+				}
+				temp_mask &= ~(1UL << i);
+			} /* while (temp_mask != 0) */
+
+			// if sum_counts is small, just use NEW_BYTE
+			if (sum_counts < pbm_lowsum_limit) {
+				if (rl_print)
+					debug(": LOWSUM (%2d), choose ORIG ",
+					      sum_counts);
+				del_val = new_byte;
+				delay_max = rank_perf[rankx].count[i][del_val];
+			}
+
+			// finish printing here...
+			if (rl_print) {
+				debug(": USING %2d (%2d) D%d\n", del_val,
+				      delay_max, disable_rlv_bump_this_byte);
+			}
+
+			new_byte = del_val; // override with best PBM choice
+
+		} else if ((value_mask != 0) && (rl_ctl.cn78xx.offset == 2)) {
+			//                        if (value_mask != 0) {
+			int i, delay_count, del_val;
+			int num_values = __builtin_popcountll(value_mask);
+			int sum_counts = 0;
+			u64 temp_mask = value_mask;
+
+			i = __builtin_ffsll(temp_mask) - 1;
+			if (rl_print)
+				debug("N%d.LMC%d.R%d: PERFECT: Byte %d: mask 0x%02llx (%d): ",
+				      node, if_num, rankx, i, value_mask >> i,
+				      num_values);
+			while (temp_mask != 0) {
+				i = __builtin_ffsll(temp_mask) - 1;
+				delay_count = rank_perf[rankx].count[i][i];
+				sum_counts += delay_count;
+				if (rl_print)
+					debug("%2d(%2d) ", i, delay_count);
+				temp_mask &= ~(1UL << i);
+			} /* while (temp_mask != 0) */
+
+			del_val = __builtin_ffsll(value_mask) - 1;
+			delay_count =
+				rank_perf[rankx].count[i][del_val];
+
+			// overkill, normally only 1-4 bits
+			i = (value_mask >> del_val) & 0x1F;
+
+			// if sum_counts is small, treat as special and use
+			// NEW_BYTE
+			if (sum_counts < pbm_lowsum_limit) {
+				if (rl_print)
+					debug(": LOWSUM (%2d), choose ORIG",
+					      sum_counts);
+				i = 99; // SPECIAL case...
+			}
+
+			switch (i) {
+			case 0x01 /* 00001b */:
+				// allow BUMP
+				break;
+
+			case 0x13 /* 10011b */:
+			case 0x0B /* 01011b */:
+			case 0x03 /* 00011b */:
+				del_val += 1; // take the second
+				disable_rlv_bump_this_byte = 1; // allow no BUMP
+				break;
+
+			case 0x0D /* 01101b */:
+			case 0x05 /* 00101b */:
+				// test count of lowest and all
+				if (delay_count >= 5 || sum_counts <= 5)
+					del_val += 1; // take the hole
+				else
+					del_val += 2; // take the next set
+				disable_rlv_bump_this_byte = 1; // allow no BUMP
+				break;
+
+			case 0x0F /* 01111b */:
+			case 0x17 /* 10111b */:
+			case 0x07 /* 00111b */:
+				del_val += 1; // take the second
+				if (delay_count < 5) { // lowest count is small
+					int second =
+						rank_perf[rankx].count[i][del_val];
+					int third =
+						rank_perf[rankx].count[i][del_val + 1];
+					// test if middle is more than 1 OR
+					// top is more than 1;
+					// this means if they are BOTH 1,
+					// then we keep the second...
+					if (second > 1 || third > 1) {
+						// if middle is small OR top
+						// is large
+						if (second < 5 ||
+						    third > 1) {
+							// take the top
+							del_val += 1;
+							if (rl_print)
+								debug(": TOP7 ");
+						}
+					}
+				}
+				disable_rlv_bump_this_byte = 1; // allow no BUMP
+				break;
+
+			default: // all others...
+				if (rl_print)
+					debug(": ABNORMAL, choose ORIG");
+
+			case 99: // special
+				 // FIXME: choose original choice?
+				del_val = new_byte;
+				disable_rlv_bump_this_byte = 1; // allow no BUMP
+				break;
+			}
+			delay_count =
+				rank_perf[rankx].count[i][del_val];
+
+			// finish printing here...
+			if (rl_print)
+				debug(": USING %2d (%2d) D%d\n", del_val,
+				      delay_count, disable_rlv_bump_this_byte);
+			new_byte = del_val; // override with best PBM choice
+		} else {
+			if (ddr_type == DDR4_DRAM) { // only report when DDR4
+				// FIXME: remove or increase VBL for this
+				// output...
+				if (rl_print)
+					debug("N%d.LMC%d.R%d: PERFECT: Byte %d: ZERO PBMs, USING %d\n",
+					      node, if_num, rankx, i,
+					      new_byte);
+				// prevent ODD bump, rely on original
+				disable_rlv_bump_this_byte = 1;
+			}
+		} /* if (value_mask != 0) */
+
+		// optionally bump the delay value
+		if (enable_rldelay_bump && !disable_rlv_bump_this_byte) {
+			if ((new_byte & enable_rldelay_bump) ==
+			    enable_rldelay_bump) {
+				int bump_value = new_byte + rldelay_bump_incr;
+
+				if (rl_print) {
+					debug("N%d.LMC%d.R%d: RLVBUMP: Byte %d: CHANGING %d to %d (%s)\n",
+					      node, if_num, rankx, i,
+					      new_byte, bump_value,
+					      (value_mask &
+					       (1 << bump_value)) ?
+					      "PBM" : "NOPBM");
+				}
+				new_byte = bump_value;
+			}
+		}
+
+		// last checks for count-related purposes
+		if (new_byte == best_byte && count_more > 0 &&
+		    count_less == 0) {
+			// we really should take best_byte + 1
+			if (rl_print) {
+				debug("N%d.LMC%d.R%d: CADJMOR: Byte %d: CHANGING %d to %d\n",
+				      node, if_num, rankx, i,
+				      new_byte, best_byte + 1);
+				new_byte = best_byte + 1;
+			}
+		} else if ((new_byte < best_byte) && (count_same > 0)) {
+			// we really should take best_byte
+			if (rl_print) {
+				debug("N%d.LMC%d.R%d: CADJSAM: Byte %d: CHANGING %d to %d\n",
+				      node, if_num, rankx, i,
+				      new_byte, best_byte);
+				new_byte = best_byte;
+			}
+		} else if (new_byte > best_byte) {
+			if ((new_byte == (best_byte + 1)) &&
+			    count_more == 0 && count_less > 0) {
+				// we really should take best_byte
+				if (rl_print) {
+					debug("N%d.LMC%d.R%d: CADJLE1: Byte %d: CHANGING %d to %d\n",
+					      node, if_num, rankx, i,
+					      new_byte, best_byte);
+					new_byte = best_byte;
+				}
+			} else if ((new_byte >= (best_byte + 2)) &&
+				   ((count_more > 0) || (count_same > 0))) {
+				if (rl_print) {
+					debug("N%d.LMC%d.R%d: CADJLE2: Byte %d: CHANGING %d to %d\n",
+					      node, if_num, rankx, i,
+					      new_byte, best_byte + 1);
+					new_byte = best_byte + 1;
+				}
+			}
+		}
+
+		if (rl_print) {
+			debug("N%d.LMC%d.R%d: SUMMARY: Byte %d: orig %d now %d, more %d same %d less %d, using %d\n",
+			      node, if_num, rankx, i, orig_best_byte,
+			      best_byte, count_more, count_same, count_less,
+			      new_byte);
+		}
+
+		// update the byte with the new value (NOTE: orig value in
+		// the CSR may not be current "best")
+		upd_rl_rank(&rl_rank, i, new_byte);
+
+		// save new best for neighbor use
+		rank_best_bytes[i] = new_byte;
+	} /* for (i = 0; i < 8+ecc_ena; i++) */
+
+	////////////////// this is the end of the BEST BYTE LOOP
+
+	if (saved_rl_rank.u64 != rl_rank.u64) {
+		lmc_wr(priv, CVMX_LMCX_RLEVEL_RANKX(rankx, if_num),
+		       rl_rank.u64);
+		rl_rank.u64 = lmc_rd(priv,
+				     CVMX_LMCX_RLEVEL_RANKX(rankx, if_num));
+		debug("Adjusting Read-Leveling per-RANK settings.\n");
+	} else {
+		debug("Not Adjusting Read-Leveling per-RANK settings.\n");
+	}
+	display_rl_with_final(if_num, rl_rank, rankx);
+
+	// FIXME: does this help make the output a little easier to focus?
+	if (rl_print > 0)
+		debug("-----------\n");
+
+#define RLEVEL_RANKX_EXTRAS_INCR  0
+	// if there are unused entries to be filled
+	if ((rank_mask & 0x0f) != 0x0f) {
+		// copy the current rank
+		union cvmx_lmcx_rlevel_rankx temp_rl_rank = rl_rank;
+
+		if (rankx < 3) {
+#if RLEVEL_RANKX_EXTRAS_INCR > 0
+			int byte, delay;
+
+			// modify the copy in prep for writing to empty slot(s)
+			for (byte = 0; byte < 9; byte++) {
+				delay = get_rl_rank(&temp_rl_rank, byte) +
+					RLEVEL_RANKX_EXTRAS_INCR;
+				if (delay > RLEVEL_BYTE_MSK)
+					delay = RLEVEL_BYTE_MSK;
+				upd_rl_rank(&temp_rl_rank, byte, delay);
+			}
+#endif
+
+			// if rank 0, write rank 1 and rank 2 here if empty
+			if (rankx == 0) {
+				// check that rank 1 is empty
+				if (!(rank_mask & (1 << 1))) {
+					debug("N%d.LMC%d.R%d: writing RLEVEL_RANK unused entry R%d.\n",
+					      node, if_num, rankx, 1);
+					lmc_wr(priv,
+					       CVMX_LMCX_RLEVEL_RANKX(1,
+								      if_num),
+					       temp_rl_rank.u64);
+				}
+
+				// check that rank 2 is empty
+				if (!(rank_mask & (1 << 2))) {
+					debug("N%d.LMC%d.R%d: writing RLEVEL_RANK unused entry R%d.\n",
+					      node, if_num, rankx, 2);
+					lmc_wr(priv,
+					       CVMX_LMCX_RLEVEL_RANKX(2,
+								      if_num),
+					       temp_rl_rank.u64);
+				}
+			}
+
+			// if ranks 0, 1 or 2, write rank 3 here if empty
+			// check that rank 3 is empty
+			if (!(rank_mask & (1 << 3))) {
+				debug("N%d.LMC%d.R%d: writing RLEVEL_RANK unused entry R%d.\n",
+				      node, if_num, rankx, 3);
+				lmc_wr(priv, CVMX_LMCX_RLEVEL_RANKX(3, if_num),
+				       temp_rl_rank.u64);
+			}
+		}
+	}
+}
+
+static void lmc_read_leveling(struct ddr_priv *priv)
+{
+	struct rl_score rl_score[RTT_NOM_OHMS_COUNT][RODT_OHMS_COUNT][4];
+	union cvmx_lmcx_control ctl;
+	union cvmx_lmcx_config cfg;
+	int rankx;
+	char *s;
+	int i;
+
+	/*
+	 * 4.8.10 LMC Read Leveling
+	 *
+	 * LMC supports an automatic read-leveling separately per byte-lane
+	 * using the DDR3 multipurpose register predefined pattern for system
+	 * calibration defined in the JEDEC DDR3 specifications.
+	 *
+	 * All of DDR PLL, LMC CK, and LMC DRESET, and early LMC initializations
+	 * must be completed prior to starting this LMC read-leveling sequence.
+	 *
+	 * Software could simply write the desired read-leveling values into
+	 * LMC(0)_RLEVEL_RANK(0..3). This section describes a sequence that uses
+	 * LMC's autoread-leveling capabilities.
+	 *
+	 * When LMC does the read-leveling sequence for a rank, it first enables
+	 * the DDR3 multipurpose register predefined pattern for system
+	 * calibration on the selected DRAM rank via a DDR3 MR3 write, then
+	 * executes 64 RD operations at different internal delay settings, then
+	 * disables the predefined pattern via another DDR3 MR3 write
+	 * operation. LMC determines the pass or fail of each of the 64 settings
+	 * independently for each byte lane, then writes appropriate
+	 * LMC(0)_RLEVEL_RANK(0..3)[BYTE*] values for the rank.
+	 *
+	 * After read-leveling for a rank, software can read the 64 pass/fail
+	 * indications for one byte lane via LMC(0)_RLEVEL_DBG[BITMASK].
+	 * Software can observe all pass/fail results for all byte lanes in a
+	 * rank via separate read-leveling sequences on the rank with different
+	 * LMC(0)_RLEVEL_CTL[BYTE] values.
+	 *
+	 * The 64 pass/fail results will typically have failures for the low
+	 * delays, followed by a run of some passing settings, followed by more
+	 * failures in the remaining high delays.  LMC sets
+	 * LMC(0)_RLEVEL_RANK(0..3)[BYTE*] to one of the passing settings.
+	 * First, LMC selects the longest run of successes in the 64 results.
+	 * (In the unlikely event that there is more than one longest run, LMC
+	 * selects the first one.) Then if LMC(0)_RLEVEL_CTL[OFFSET_EN] = 1 and
+	 * the selected run has more than LMC(0)_RLEVEL_CTL[OFFSET] successes,
+	 * LMC selects the last passing setting in the run minus
+	 * LMC(0)_RLEVEL_CTL[OFFSET]. Otherwise LMC selects the middle setting
+	 * in the run (rounding earlier when necessary). We expect the
+	 * read-leveling sequence to produce good results with the reset values
+	 * LMC(0)_RLEVEL_CTL [OFFSET_EN]=1, LMC(0)_RLEVEL_CTL[OFFSET] = 2.
+	 *
+	 * The read-leveling sequence has the following steps:
+	 *
+	 * 1. Select desired LMC(0)_RLEVEL_CTL[OFFSET_EN,OFFSET,BYTE] settings.
+	 *    Do the remaining substeps 2-4 separately for each rank i with
+	 *    attached DRAM.
+	 *
+	 * 2. Without changing any other fields in LMC(0)_CONFIG,
+	 *
+	 *    o write LMC(0)_SEQ_CTL[SEQ_SEL] to select read-leveling
+	 *
+	 *    o write LMC(0)_CONFIG[RANKMASK] = (1 << i)
+	 *
+	 *    o write LMC(0)_SEQ_CTL[INIT_START] = 1
+	 *
+	 *    This initiates the previously-described read-leveling.
+	 *
+	 * 3. Wait until LMC(0)_RLEVEL_RANKi[STATUS] != 2
+	 *
+	 *    LMC will have updated LMC(0)_RLEVEL_RANKi[BYTE*] for all byte
+	 *    lanes at this point.
+	 *
+	 *    If ECC DRAM is not present (i.e. when DRAM is not attached to the
+	 *    DDR_CBS_0_* and DDR_CB<7:0> chip signals, or the DDR_DQS_<4>_* and
+	 *    DDR_DQ<35:32> chip signals), write LMC(0)_RLEVEL_RANK*[BYTE8] =
+	 *    LMC(0)_RLEVEL_RANK*[BYTE0]. Write LMC(0)_RLEVEL_RANK*[BYTE4] =
+	 *    LMC(0)_RLEVEL_RANK*[BYTE0].
+	 *
+	 * 4. If desired, consult LMC(0)_RLEVEL_DBG[BITMASK] and compare to
+	 *    LMC(0)_RLEVEL_RANKi[BYTE*] for the lane selected by
+	 *    LMC(0)_RLEVEL_CTL[BYTE]. If desired, modify
+	 *    LMC(0)_RLEVEL_CTL[BYTE] to a new value and repeat so that all
+	 *    BITMASKs can be observed.
+	 *
+	 * 5. Initialize LMC(0)_RLEVEL_RANK* values for all unused ranks.
+	 *
+	 *    Let rank i be a rank with attached DRAM.
+	 *
+	 *    For all ranks j that do not have attached DRAM, set
+	 *    LMC(0)_RLEVEL_RANKj = LMC(0)_RLEVEL_RANKi.
+	 *
+	 * This read-leveling sequence can help select the proper CN70XX ODT
+	 * resistance value (LMC(0)_COMP_CTL2[RODT_CTL]). A hardware-generated
+	 * LMC(0)_RLEVEL_RANKi[BYTEj] value (for a used byte lane j) that is
+	 * drastically different from a neighboring LMC(0)_RLEVEL_RANKi[BYTEk]
+	 * (for a used byte lane k) can indicate that the CN70XX ODT value is
+	 * bad. It is possible to simultaneously optimize both
+	 * LMC(0)_COMP_CTL2[RODT_CTL] and LMC(0)_RLEVEL_RANKn[BYTE*] values by
+	 * performing this read-leveling sequence for several
+	 * LMC(0)_COMP_CTL2[RODT_CTL] values and selecting the one with the
+	 * best LMC(0)_RLEVEL_RANKn[BYTE*] profile for the ranks.
+	 */
+
+	rl_rodt_err = 0;
+	rl_dbg_loops = 1;
+	saved_int_zqcs_dis = 0;
+	max_adj_rl_del_inc = 0;
+	rl_print = RLEVEL_PRINTALL_DEFAULT;
+
+#ifdef ENABLE_HARDCODED_RLEVEL
+	part_number[21] = {0};
+#endif /* ENABLE_HARDCODED_RLEVEL */
+
+	pbm_lowsum_limit = 5; // FIXME: is this a good default?
+	// FIXME: PBM skip for RODT 240 and 34
+	pbm_rodt_skip = (1U << ddr4_rodt_ctl_240_ohm) |
+		(1U << ddr4_rodt_ctl_34_ohm);
+
+	disable_rank_majority = 0; // control rank majority processing
+
+	// default to mask 11b ODDs for DDR4 (except 73xx), else DISABLE
+	// for DDR3
+	rldelay_bump_incr = 0;
+	disable_rlv_bump_this_byte = 0;
+
+	enable_rldelay_bump = (ddr_type == DDR4_DRAM) ?
+		((octeon_is_cpuid(OCTEON_CN73XX)) ? 1 : 3) : 0;
+
+	s = lookup_env(priv, "ddr_disable_rank_majority");
+	if (s)
+		disable_rank_majority = !!simple_strtoul(s, NULL, 0);
+
+	s = lookup_env(priv, "ddr_pbm_lowsum_limit");
+	if (s)
+		pbm_lowsum_limit = simple_strtoul(s, NULL, 0);
+
+	s = lookup_env(priv, "ddr_pbm_rodt_skip");
+	if (s)
+		pbm_rodt_skip = simple_strtoul(s, NULL, 0);
+	memset(rank_perf, 0, sizeof(rank_perf));
+
+	ctl.u64 = lmc_rd(priv, CVMX_LMCX_CONTROL(if_num));
+	save_ddr2t = ctl.cn78xx.ddr2t;
+
+	cfg.u64 = lmc_rd(priv, CVMX_LMCX_CONFIG(if_num));
+	ecc_ena = cfg.cn78xx.ecc_ena;
+
+	s = lookup_env(priv, "ddr_rlevel_2t");
+	if (s)
+		ctl.cn78xx.ddr2t = simple_strtoul(s, NULL, 0);
+
+	lmc_wr(priv, CVMX_LMCX_CONTROL(if_num), ctl.u64);
+
+	debug("LMC%d: Performing Read-Leveling\n", if_num);
+
+	rl_ctl.u64 = lmc_rd(priv, CVMX_LMCX_RLEVEL_CTL(if_num));
+
+	rl_samples = c_cfg->rlevel_average_loops;
+	if (rl_samples == 0) {
+		rl_samples = RLEVEL_SAMPLES_DEFAULT;
+		// up the samples for these cases
+		if (dimm_count == 1 || num_ranks == 1)
+			rl_samples = rl_samples * 2 + 1;
+	}
+
+	rl_compute = c_cfg->rlevel_compute;
+	rl_ctl.cn78xx.offset_en = c_cfg->offset_en;
+	rl_ctl.cn78xx.offset    = spd_rdimm
+		? c_cfg->offset_rdimm
+		: c_cfg->offset_udimm;
+
+	int value = 1; // should ALWAYS be set
+
+	s = lookup_env(priv, "ddr_rlevel_delay_unload");
+	if (s)
+		value = !!simple_strtoul(s, NULL, 0);
+	rl_ctl.cn78xx.delay_unload_0 = value;
+	rl_ctl.cn78xx.delay_unload_1 = value;
+	rl_ctl.cn78xx.delay_unload_2 = value;
+	rl_ctl.cn78xx.delay_unload_3 = value;
+
+	// use OR_DIS=1 to try for better results
+	rl_ctl.cn78xx.or_dis = 1;
+
+	/*
+	 * If we will be switching to 32bit mode level based on only
+	 * four bits because there are only 4 ECC bits.
+	 */
+	rl_ctl.cn78xx.bitmask = (if_64b) ? 0xFF : 0x0F;
+
+	// allow overrides
+	s = lookup_env(priv, "ddr_rlevel_ctl_or_dis");
+	if (s)
+		rl_ctl.cn78xx.or_dis = simple_strtoul(s, NULL, 0);
+
+	s = lookup_env(priv, "ddr_rlevel_ctl_bitmask");
+	if (s)
+		rl_ctl.cn78xx.bitmask = simple_strtoul(s, NULL, 0);
+
+	rl_comp_offs = spd_rdimm
+		? c_cfg->rlevel_comp_offset_rdimm
+		: c_cfg->rlevel_comp_offset_udimm;
+	s = lookup_env(priv, "ddr_rlevel_comp_offset");
+	if (s)
+		rl_comp_offs = strtoul(s, NULL, 0);
+
+	s = lookup_env(priv, "ddr_rlevel_offset");
+	if (s)
+		rl_ctl.cn78xx.offset   = simple_strtoul(s, NULL, 0);
+
+	s = lookup_env(priv, "ddr_rlevel_offset_en");
+	if (s)
+		rl_ctl.cn78xx.offset_en   = simple_strtoul(s, NULL, 0);
+
+	s = lookup_env(priv, "ddr_rlevel_ctl");
+	if (s)
+		rl_ctl.u64   = simple_strtoul(s, NULL, 0);
+
+	lmc_wr(priv,
+	       CVMX_LMCX_RLEVEL_CTL(if_num),
+	       rl_ctl.u64);
+
+	// do this here so we can look at final RLEVEL_CTL[offset] setting...
+	s = lookup_env(priv, "ddr_enable_rldelay_bump");
+	if (s) {
+		// also use as mask bits
+		enable_rldelay_bump = strtoul(s, NULL, 0);
+	}
+
+	if (enable_rldelay_bump != 0)
+		rldelay_bump_incr = (rl_ctl.cn78xx.offset == 1) ? -1 : 1;
+
+	s = lookup_env(priv, "ddr%d_rlevel_debug_loops", if_num);
+	if (s)
+		rl_dbg_loops = simple_strtoul(s, NULL, 0);
+
+	s = lookup_env(priv, "ddr_rtt_nom_auto");
+	if (s)
+		ddr_rtt_nom_auto = !!simple_strtoul(s, NULL, 0);
+
+	s = lookup_env(priv, "ddr_rlevel_average");
+	if (s)
+		rl_samples = simple_strtoul(s, NULL, 0);
+
+	s = lookup_env(priv, "ddr_rlevel_compute");
+	if (s)
+		rl_compute = simple_strtoul(s, NULL, 0);
+
+	s = lookup_env(priv, "ddr_rlevel_printall");
+	if (s)
+		rl_print = simple_strtoul(s, NULL, 0);
+
+	debug("RLEVEL_CTL                                    : 0x%016llx\n",
+	      rl_ctl.u64);
+	debug("RLEVEL_OFFSET                                 : %6d\n",
+	      rl_ctl.cn78xx.offset);
+	debug("RLEVEL_OFFSET_EN                              : %6d\n",
+	      rl_ctl.cn78xx.offset_en);
+
+	/*
+	 * The purpose for the indexed table is to sort the settings
+	 * by the ohm value to simplify the testing when incrementing
+	 * through the settings.  (index => ohms) 1=120, 2=60, 3=40,
+	 * 4=30, 5=20
+	 */
+	min_rtt_nom_idx = (c_cfg->min_rtt_nom_idx == 0) ?
+		1 : c_cfg->min_rtt_nom_idx;
+	max_rtt_nom_idx = (c_cfg->max_rtt_nom_idx == 0) ?
+		5 : c_cfg->max_rtt_nom_idx;
+
+	min_rodt_ctl = (c_cfg->min_rodt_ctl == 0) ? 1 : c_cfg->min_rodt_ctl;
+	max_rodt_ctl = (c_cfg->max_rodt_ctl == 0) ? 5 : c_cfg->max_rodt_ctl;
+
+	s = lookup_env(priv, "ddr_min_rodt_ctl");
+	if (s)
+		min_rodt_ctl = simple_strtoul(s, NULL, 0);
+
+	s = lookup_env(priv, "ddr_max_rodt_ctl");
+	if (s)
+		max_rodt_ctl = simple_strtoul(s, NULL, 0);
+
+	s = lookup_env(priv, "ddr_min_rtt_nom_idx");
+	if (s)
+		min_rtt_nom_idx = simple_strtoul(s, NULL, 0);
+
+	s = lookup_env(priv, "ddr_max_rtt_nom_idx");
+	if (s)
+		max_rtt_nom_idx = simple_strtoul(s, NULL, 0);
+
+#ifdef ENABLE_HARDCODED_RLEVEL
+	if (c_cfg->rl_tbl) {
+		/* Check for hard-coded read-leveling settings */
+		get_dimm_part_number(part_number, &dimm_config_table[0],
+				     0, ddr_type);
+		for (rankx = 0; rankx < dimm_count * 4; rankx++) {
+			if (!(rank_mask & (1 << rankx)))
+				continue;
+
+			rl_rank.u64 = lmc_rd(priv,
+					     CVMX_LMCX_RLEVEL_RANKX(rankx,
+								    if_num));
+
+			i = 0;
+			while (c_cfg->rl_tbl[i].part) {
+				debug("DIMM part number:\"%s\", SPD: \"%s\"\n",
+				      c_cfg->rl_tbl[i].part, part_number);
+				if ((strcmp(part_number,
+					    c_cfg->rl_tbl[i].part) == 0) &&
+				    (abs(c_cfg->rl_tbl[i].speed -
+					 2 * ddr_hertz / (1000 * 1000)) < 10)) {
+					debug("Using hard-coded read leveling for DIMM part number: \"%s\"\n",
+					      part_number);
+					rl_rank.u64 =
+						c_cfg->rl_tbl[i].rl_rank[if_num][rankx];
+					lmc_wr(priv,
+					       CVMX_LMCX_RLEVEL_RANKX(rankx,
+								      if_num),
+					       rl_rank.u64);
+					rl_rank.u64 =
+						lmc_rd(priv,
+						       CVMX_LMCX_RLEVEL_RANKX(rankx,
+									      if_num));
+					display_rl(if_num, rl_rank, rankx);
+					/* Disable h/w read-leveling */
+					rl_dbg_loops = 0;
+					break;
+				}
+				++i;
+			}
+		}
+	}
+#endif /* ENABLE_HARDCODED_RLEVEL */
+
+	max_adj_rl_del_inc = c_cfg->maximum_adjacent_rlevel_delay_increment;
+	s = lookup_env(priv, "ddr_maximum_adjacent_rlevel_delay_increment");
+	if (s)
+		max_adj_rl_del_inc = strtoul(s, NULL, 0);
+
+	while (rl_dbg_loops--) {
+		union cvmx_lmcx_modereg_params1 mp1;
+		union cvmx_lmcx_comp_ctl2 cc2;
+
+		/* Initialize the error scoreboard */
+		memset(rl_score, 0, sizeof(rl_score));
+
+		cc2.u64 = lmc_rd(priv, CVMX_LMCX_COMP_CTL2(if_num));
+		saved_ddr__ptune = cc2.cn78xx.ddr__ptune;
+		saved_ddr__ntune = cc2.cn78xx.ddr__ntune;
+
+		/* Disable dynamic compensation settings */
+		if (rl_comp_offs != 0) {
+			cc2.cn78xx.ptune = saved_ddr__ptune;
+			cc2.cn78xx.ntune = saved_ddr__ntune;
+
+			/*
+			 * Round up the ptune calculation to bias the odd
+			 * cases toward ptune
+			 */
+			cc2.cn78xx.ptune += divide_roundup(rl_comp_offs, 2);
+			cc2.cn78xx.ntune -= rl_comp_offs / 2;
+
+			ctl.u64 = lmc_rd(priv, CVMX_LMCX_CONTROL(if_num));
+			saved_int_zqcs_dis = ctl.s.int_zqcs_dis;
+			/* Disable ZQCS while in bypass. */
+			ctl.s.int_zqcs_dis = 1;
+			lmc_wr(priv, CVMX_LMCX_CONTROL(if_num), ctl.u64);
+
+			cc2.cn78xx.byp = 1; /* Enable bypass mode */
+			lmc_wr(priv, CVMX_LMCX_COMP_CTL2(if_num), cc2.u64);
+			lmc_rd(priv, CVMX_LMCX_COMP_CTL2(if_num));
+			/* Read again */
+			cc2.u64 = lmc_rd(priv, CVMX_LMCX_COMP_CTL2(if_num));
+			debug("DDR__PTUNE/DDR__NTUNE                         : %d/%d\n",
+			      cc2.cn78xx.ddr__ptune, cc2.cn78xx.ddr__ntune);
+		}
+
+		mp1.u64 = lmc_rd(priv, CVMX_LMCX_MODEREG_PARAMS1(if_num));
+
+		for (rtt_idx = min_rtt_nom_idx; rtt_idx <= max_rtt_nom_idx;
+		     ++rtt_idx) {
+			rtt_nom = imp_val->rtt_nom_table[rtt_idx];
+
+			/*
+			 * When the read ODT mask is zero the dyn_rtt_nom_mask
+			 * is zero than RTT_NOM will not be changing during
+			 * read-leveling.  Since the value is fixed we only need
+			 * to test it once.
+			 */
+			if (dyn_rtt_nom_mask == 0) {
+				// flag not to print NOM ohms
+				print_nom_ohms = -1;
+			} else {
+				if (dyn_rtt_nom_mask & 1)
+					mp1.s.rtt_nom_00 = rtt_nom;
+				if (dyn_rtt_nom_mask & 2)
+					mp1.s.rtt_nom_01 = rtt_nom;
+				if (dyn_rtt_nom_mask & 4)
+					mp1.s.rtt_nom_10 = rtt_nom;
+				if (dyn_rtt_nom_mask & 8)
+					mp1.s.rtt_nom_11 = rtt_nom;
+				// FIXME? rank 0 ohms always?
+				print_nom_ohms =
+					imp_val->rtt_nom_ohms[mp1.s.rtt_nom_00];
+			}
+
+			lmc_wr(priv, CVMX_LMCX_MODEREG_PARAMS1(if_num),
+			       mp1.u64);
+
+			if (print_nom_ohms >= 0 && rl_print > 1) {
+				debug("\n");
+				debug("RTT_NOM     %3d, %3d, %3d, %3d ohms           :  %x,%x,%x,%x\n",
+				      imp_val->rtt_nom_ohms[mp1.s.rtt_nom_11],
+				      imp_val->rtt_nom_ohms[mp1.s.rtt_nom_10],
+				      imp_val->rtt_nom_ohms[mp1.s.rtt_nom_01],
+				      imp_val->rtt_nom_ohms[mp1.s.rtt_nom_00],
+				      mp1.s.rtt_nom_11,
+				      mp1.s.rtt_nom_10,
+				      mp1.s.rtt_nom_01,
+				      mp1.s.rtt_nom_00);
+			}
+
+			ddr_init_seq(priv, rank_mask, if_num);
+
+			// Try RANK outside RODT to rearrange the output...
+			for (rankx = 0; rankx < dimm_count * 4; rankx++) {
+				if (!(rank_mask & (1 << rankx)))
+					continue;
+
+				for (rodt_ctl = max_rodt_ctl;
+				     rodt_ctl >= min_rodt_ctl; --rodt_ctl)
+					rodt_loop(priv, rankx, rl_score);
+			}
+		}
+
+		/* Re-enable dynamic compensation settings. */
+		if (rl_comp_offs != 0) {
+			cc2.u64 = lmc_rd(priv, CVMX_LMCX_COMP_CTL2(if_num));
+
+			cc2.cn78xx.ptune = 0;
+			cc2.cn78xx.ntune = 0;
+			cc2.cn78xx.byp = 0; /* Disable bypass mode */
+			lmc_wr(priv, CVMX_LMCX_COMP_CTL2(if_num), cc2.u64);
+			/* Read once */
+			lmc_rd(priv, CVMX_LMCX_COMP_CTL2(if_num));
+
+			/* Read again */
+			cc2.u64 = lmc_rd(priv, CVMX_LMCX_COMP_CTL2(if_num));
+			debug("DDR__PTUNE/DDR__NTUNE                         : %d/%d\n",
+			      cc2.cn78xx.ddr__ptune, cc2.cn78xx.ddr__ntune);
+
+			ctl.u64 = lmc_rd(priv, CVMX_LMCX_CONTROL(if_num));
+			/* Restore original setting */
+			ctl.s.int_zqcs_dis = saved_int_zqcs_dis;
+			lmc_wr(priv, CVMX_LMCX_CONTROL(if_num), ctl.u64);
+		}
+
+		int override_compensation = 0;
+
+		s = lookup_env(priv, "ddr__ptune");
+		if (s)
+			saved_ddr__ptune = strtoul(s, NULL, 0);
+
+		s = lookup_env(priv, "ddr__ntune");
+		if (s) {
+			saved_ddr__ntune = strtoul(s, NULL, 0);
+			override_compensation = 1;
+		}
+
+		if (override_compensation) {
+			cc2.cn78xx.ptune = saved_ddr__ptune;
+			cc2.cn78xx.ntune = saved_ddr__ntune;
+
+			ctl.u64 = lmc_rd(priv, CVMX_LMCX_CONTROL(if_num));
+			saved_int_zqcs_dis = ctl.s.int_zqcs_dis;
+			/* Disable ZQCS while in bypass. */
+			ctl.s.int_zqcs_dis = 1;
+			lmc_wr(priv, CVMX_LMCX_CONTROL(if_num), ctl.u64);
+
+			cc2.cn78xx.byp = 1; /* Enable bypass mode */
+			lmc_wr(priv, CVMX_LMCX_COMP_CTL2(if_num), cc2.u64);
+			/* Read again */
+			cc2.u64 = lmc_rd(priv, CVMX_LMCX_COMP_CTL2(if_num));
+
+			debug("DDR__PTUNE/DDR__NTUNE                         : %d/%d\n",
+			      cc2.cn78xx.ptune, cc2.cn78xx.ntune);
+		}
+
+		/* Evaluation block */
+		/* Still at initial value? */
+		int best_rodt_score = DEFAULT_BEST_RANK_SCORE;
+		int auto_rodt_ctl = 0;
+		int auto_rtt_nom  = 0;
+		int rodt_score;
+
+		rodt_row_skip_mask = 0;
+
+		// just add specific RODT rows to the skip mask for DDR4
+		// at this time...
+		if (ddr_type == DDR4_DRAM) {
+			// skip RODT row 34 ohms for all DDR4 types
+			rodt_row_skip_mask |= (1 << ddr4_rodt_ctl_34_ohm);
+			// skip RODT row 40 ohms for all DDR4 types
+			rodt_row_skip_mask |= (1 << ddr4_rodt_ctl_40_ohm);
+			// For now, do not skip RODT row 40 or 48 ohm when
+			// ddr_hertz is above 1075 MHz
+			if (ddr_hertz > 1075000000) {
+				// noskip RODT row 40 ohms
+				rodt_row_skip_mask &=
+					~(1 << ddr4_rodt_ctl_40_ohm);
+				// noskip RODT row 48 ohms
+				rodt_row_skip_mask &=
+					~(1 << ddr4_rodt_ctl_48_ohm);
+			}
+			// For now, do not skip RODT row 48 ohm for 2Rx4
+			// stacked die DIMMs
+			if (is_stacked_die && num_ranks == 2 &&
+			    dram_width == 4) {
+				// noskip RODT row 48 ohms
+				rodt_row_skip_mask &=
+					~(1 << ddr4_rodt_ctl_48_ohm);
+			}
+			// for now, leave all rows eligible when we have
+			// mini-DIMMs...
+			if (spd_dimm_type == 5 || spd_dimm_type == 6)
+				rodt_row_skip_mask = 0;
+			// for now, leave all rows eligible when we have
+			// a 2-slot 1-rank config
+			if (dimm_count == 2 && num_ranks == 1)
+				rodt_row_skip_mask = 0;
+
+			debug("Evaluating Read-Leveling Scoreboard for AUTO settings.\n");
+			for (rtt_idx = min_rtt_nom_idx;
+			     rtt_idx <= max_rtt_nom_idx; ++rtt_idx) {
+				rtt_nom = imp_val->rtt_nom_table[rtt_idx];
+
+				for (rodt_ctl = max_rodt_ctl;
+				     rodt_ctl >= min_rodt_ctl; --rodt_ctl) {
+					rodt_score = 0;
+					for (rankx = 0; rankx < dimm_count * 4;
+					     rankx++) {
+						if (!(rank_mask & (1 << rankx)))
+							continue;
+
+						debug("rl_score[rtt_nom=%d][rodt_ctl=%d][rankx=%d].score:%d\n",
+						      rtt_nom, rodt_ctl, rankx,
+						      rl_score[rtt_nom][rodt_ctl][rankx].score);
+						rodt_score +=
+							rl_score[rtt_nom][rodt_ctl][rankx].score;
+					}
+					// FIXME: do we need to skip RODT rows
+					// here, like we do below in the
+					// by-RANK settings?
+
+					/*
+					 * When using automatic ODT settings use
+					 * the ODT settings associated with the
+					 * best score for all of the tested ODT
+					 * combinations.
+					 */
+
+					if (rodt_score < best_rodt_score ||
+					    (rodt_score == best_rodt_score &&
+					     (imp_val->rodt_ohms[rodt_ctl] >
+					      imp_val->rodt_ohms[auto_rodt_ctl]))) {
+						debug("AUTO: new best score for rodt:%d (%d), new score:%d, previous score:%d\n",
+						      rodt_ctl,
+						      imp_val->rodt_ohms[rodt_ctl],
+						      rodt_score,
+						      best_rodt_score);
+						best_rodt_score = rodt_score;
+						auto_rodt_ctl   = rodt_ctl;
+						auto_rtt_nom    = rtt_nom;
+					}
+				}
+			}
+
+			mp1.u64 = lmc_rd(priv,
+					 CVMX_LMCX_MODEREG_PARAMS1(if_num));
+
+			if (ddr_rtt_nom_auto) {
+				/* Store the automatically set RTT_NOM value */
+				if (dyn_rtt_nom_mask & 1)
+					mp1.s.rtt_nom_00 = auto_rtt_nom;
+				if (dyn_rtt_nom_mask & 2)
+					mp1.s.rtt_nom_01 = auto_rtt_nom;
+				if (dyn_rtt_nom_mask & 4)
+					mp1.s.rtt_nom_10 = auto_rtt_nom;
+				if (dyn_rtt_nom_mask & 8)
+					mp1.s.rtt_nom_11 = auto_rtt_nom;
+			} else {
+				/*
+				 * restore the manual settings to the register
+				 */
+				mp1.s.rtt_nom_00 = default_rtt_nom[0];
+				mp1.s.rtt_nom_01 = default_rtt_nom[1];
+				mp1.s.rtt_nom_10 = default_rtt_nom[2];
+				mp1.s.rtt_nom_11 = default_rtt_nom[3];
+			}
+
+			lmc_wr(priv, CVMX_LMCX_MODEREG_PARAMS1(if_num),
+			       mp1.u64);
+			debug("RTT_NOM     %3d, %3d, %3d, %3d ohms           :  %x,%x,%x,%x\n",
+			      imp_val->rtt_nom_ohms[mp1.s.rtt_nom_11],
+			      imp_val->rtt_nom_ohms[mp1.s.rtt_nom_10],
+			      imp_val->rtt_nom_ohms[mp1.s.rtt_nom_01],
+			      imp_val->rtt_nom_ohms[mp1.s.rtt_nom_00],
+			      mp1.s.rtt_nom_11,
+			      mp1.s.rtt_nom_10,
+			      mp1.s.rtt_nom_01,
+			      mp1.s.rtt_nom_00);
+
+			debug("RTT_WR      %3d, %3d, %3d, %3d ohms           :  %x,%x,%x,%x\n",
+			      imp_val->rtt_wr_ohms[extr_wr(mp1.u64, 3)],
+			      imp_val->rtt_wr_ohms[extr_wr(mp1.u64, 2)],
+			      imp_val->rtt_wr_ohms[extr_wr(mp1.u64, 1)],
+			      imp_val->rtt_wr_ohms[extr_wr(mp1.u64, 0)],
+			      extr_wr(mp1.u64, 3),
+			      extr_wr(mp1.u64, 2),
+			      extr_wr(mp1.u64, 1),
+			      extr_wr(mp1.u64, 0));
+
+			debug("DIC         %3d, %3d, %3d, %3d ohms           :  %x,%x,%x,%x\n",
+			      imp_val->dic_ohms[mp1.s.dic_11],
+			      imp_val->dic_ohms[mp1.s.dic_10],
+			      imp_val->dic_ohms[mp1.s.dic_01],
+			      imp_val->dic_ohms[mp1.s.dic_00],
+			      mp1.s.dic_11,
+			      mp1.s.dic_10,
+			      mp1.s.dic_01,
+			      mp1.s.dic_00);
+
+			if (ddr_type == DDR4_DRAM) {
+				union cvmx_lmcx_modereg_params2 mp2;
+				/*
+				 * We must read the CSR, and not depend on
+				 * odt_config[odt_idx].odt_mask2, since we could
+				 * have overridden values with envvars.
+				 * NOTE: this corrects the printout, since the
+				 * CSR is not written with the old values...
+				 */
+				mp2.u64 = lmc_rd(priv,
+						 CVMX_LMCX_MODEREG_PARAMS2(if_num));
+
+				debug("RTT_PARK    %3d, %3d, %3d, %3d ohms           :  %x,%x,%x,%x\n",
+				      imp_val->rtt_nom_ohms[mp2.s.rtt_park_11],
+				      imp_val->rtt_nom_ohms[mp2.s.rtt_park_10],
+				      imp_val->rtt_nom_ohms[mp2.s.rtt_park_01],
+				      imp_val->rtt_nom_ohms[mp2.s.rtt_park_00],
+				      mp2.s.rtt_park_11,
+				      mp2.s.rtt_park_10,
+				      mp2.s.rtt_park_01,
+				      mp2.s.rtt_park_00);
+
+				debug("%-45s :  0x%x,0x%x,0x%x,0x%x\n",
+				      "VREF_RANGE",
+				      mp2.s.vref_range_11,
+				      mp2.s.vref_range_10,
+				      mp2.s.vref_range_01,
+				      mp2.s.vref_range_00);
+
+				debug("%-45s :  0x%x,0x%x,0x%x,0x%x\n",
+				      "VREF_VALUE",
+				      mp2.s.vref_value_11,
+				      mp2.s.vref_value_10,
+				      mp2.s.vref_value_01,
+				      mp2.s.vref_value_00);
+			}
+
+			cc2.u64 = lmc_rd(priv, CVMX_LMCX_COMP_CTL2(if_num));
+			if (ddr_rodt_ctl_auto) {
+				cc2.cn78xx.rodt_ctl = auto_rodt_ctl;
+			} else {
+				// back to the original setting
+				cc2.cn78xx.rodt_ctl = default_rodt_ctl;
+			}
+			lmc_wr(priv, CVMX_LMCX_COMP_CTL2(if_num), cc2.u64);
+			cc2.u64 = lmc_rd(priv, CVMX_LMCX_COMP_CTL2(if_num));
+			debug("Read ODT_CTL                                  : 0x%x (%d ohms)\n",
+			      cc2.cn78xx.rodt_ctl,
+			      imp_val->rodt_ohms[cc2.cn78xx.rodt_ctl]);
+
+			/*
+			 * Use the delays associated with the best score for
+			 * each individual rank
+			 */
+			debug("Evaluating Read-Leveling Scoreboard for per-RANK settings.\n");
+
+			// this is the the RANK MAJOR LOOP
+			for (rankx = 0; rankx < dimm_count * 4; rankx++)
+				rank_major_loop(priv, rankx, rl_score);
+		}  /* Evaluation block */
+	} /* while(rl_dbg_loops--) */
+
+	ctl.cn78xx.ddr2t = save_ddr2t;
+	lmc_wr(priv, CVMX_LMCX_CONTROL(if_num), ctl.u64);
+	ctl.u64 = lmc_rd(priv, CVMX_LMCX_CONTROL(if_num));
+	/* Display final 2T value */
+	debug("DDR2T                                         : %6d\n",
+	      ctl.cn78xx.ddr2t);
+
+	ddr_init_seq(priv, rank_mask, if_num);
+
+	for (rankx = 0; rankx < dimm_count * 4; rankx++) {
+		u64 value;
+		int parameter_set = 0;
+
+		if (!(rank_mask & (1 << rankx)))
+			continue;
+
+		rl_rank.u64 = lmc_rd(priv, CVMX_LMCX_RLEVEL_RANKX(rankx,
+								  if_num));
+
+		for (i = 0; i < 9; ++i) {
+			s = lookup_env(priv, "ddr%d_rlevel_rank%d_byte%d",
+				       if_num, rankx, i);
+			if (s) {
+				parameter_set |= 1;
+				value = simple_strtoul(s, NULL, 0);
+
+				upd_rl_rank(&rl_rank, i, value);
+			}
+		}
+
+		s = lookup_env_ull(priv, "ddr%d_rlevel_rank%d", if_num, rankx);
+		if (s) {
+			parameter_set |= 1;
+			value = simple_strtoull(s, NULL, 0);
+			rl_rank.u64 = value;
+		}
+
+		if (parameter_set) {
+			lmc_wr(priv,
+			       CVMX_LMCX_RLEVEL_RANKX(rankx, if_num),
+			       rl_rank.u64);
+			rl_rank.u64 = lmc_rd(priv,
+					     CVMX_LMCX_RLEVEL_RANKX(rankx,
+								    if_num));
+			display_rl(if_num, rl_rank, rankx);
+		}
+	}
+}
+
+int init_octeon3_ddr3_interface(struct ddr_priv *priv,
+				struct ddr_conf *_ddr_conf, u32 _ddr_hertz,
+				u32 cpu_hertz, u32 ddr_ref_hertz, int _if_num,
+				u32 _if_mask)
+{
+	union cvmx_lmcx_control ctrl;
+	int ret;
+	char *s;
+	int i;
+
+	if_num = _if_num;
+	ddr_hertz = _ddr_hertz;
+	ddr_conf = _ddr_conf;
+	if_mask = _if_mask;
+	odt_1rank_config = ddr_conf->odt_1rank_config;
+	odt_2rank_config = ddr_conf->odt_2rank_config;
+	odt_4rank_config = ddr_conf->odt_4rank_config;
+	dimm_config_table = ddr_conf->dimm_config_table;
+	c_cfg = &ddr_conf->custom_lmc_config;
+
+	/*
+	 * Compute clock rates to the nearest picosecond.
+	 */
+	tclk_psecs = hertz_to_psecs(ddr_hertz);	/* Clock in psecs */
+	eclk_psecs = hertz_to_psecs(cpu_hertz);	/* Clock in psecs */
+
+	dimm_count = 0;
+	/* Accumulate and report all the errors before giving up */
+	fatal_error = 0;
+
+	/* Flag that indicates safe DDR settings should be used */
+	safe_ddr_flag = 0;
+	if_64b = 1;		/* Octeon II Default: 64bit interface width */
+	mem_size_mbytes = 0;
+	bank_bits = 0;
+	column_bits_start = 1;
+	use_ecc = 1;
+	min_cas_latency = 0, max_cas_latency = 0, override_cas_latency = 0;
+	spd_package = 0;
+	spd_rawcard = 0;
+	spd_rawcard_aorb = 0;
+	spd_rdimm_registers = 0;
+	is_stacked_die = 0;
+	is_3ds_dimm = 0;	// 3DS
+	lranks_per_prank = 1;	// 3DS: logical ranks per package rank
+	lranks_bits = 0;	// 3DS: logical ranks bits
+	die_capacity = 0;	// in Mbits; only used for 3DS
+
+	wl_mask_err = 0;
+	dyn_rtt_nom_mask = 0;
+	ddr_disable_chip_reset = 1;
+	match_wl_rtt_nom = 0;
+
+	internal_retries = 0;
+
+	disable_deskew_training = 0;
+	restart_if_dsk_incomplete = 0;
+	last_lane = ((if_64b) ? 8 : 4) + use_ecc;
+
+	disable_sequential_delay_check = 0;
+	wl_print = WLEVEL_PRINTALL_DEFAULT;
+
+	enable_by_rank_init = 1;	// FIXME: default by-rank ON
+	saved_rank_mask = 0;
+
+	node = 0;
+
+	memset(hwl_alts, 0, sizeof(hwl_alts));
+
+	/*
+	 * Initialize these to shut up the compiler. They are configured
+	 * and used only for DDR4
+	 */
+	ddr4_trrd_lmin = 6000;
+	ddr4_tccd_lmin = 6000;
+
+	debug("\nInitializing node %d DDR interface %d, DDR Clock %d, DDR Reference Clock %d, CPUID 0x%08x\n",
+	      node, if_num, ddr_hertz, ddr_ref_hertz, read_c0_prid());
+
+	if (dimm_config_table[0].spd_addrs[0] == 0 &&
+	    !dimm_config_table[0].spd_ptrs[0]) {
+		printf("ERROR: No dimms specified in the dimm_config_table.\n");
+		return -1;
+	}
+
+	// allow some overrides to be done
+
+	// this one controls several things related to DIMM geometry: HWL and RL
+	disable_sequential_delay_check = c_cfg->disable_sequential_delay_check;
+	s = lookup_env(priv, "ddr_disable_sequential_delay_check");
+	if (s)
+		disable_sequential_delay_check = strtoul(s, NULL, 0);
+
+	// this one controls whether chip RESET is done, or LMC init restarted
+	// from step 6.9.6
+	s = lookup_env(priv, "ddr_disable_chip_reset");
+	if (s)
+		ddr_disable_chip_reset = !!strtoul(s, NULL, 0);
+
+	// this one controls whether Deskew Training is performed
+	s = lookup_env(priv, "ddr_disable_deskew_training");
+	if (s)
+		disable_deskew_training = !!strtoul(s, NULL, 0);
+
+	if (ddr_verbose(priv)) {
+		printf("DDR SPD Table:");
+		for (didx = 0; didx < DDR_CFG_T_MAX_DIMMS; ++didx) {
+			if (dimm_config_table[didx].spd_addrs[0] == 0)
+				break;
+
+			printf(" --ddr%dspd=0x%02x", if_num,
+			       dimm_config_table[didx].spd_addrs[0]);
+			if (dimm_config_table[didx].spd_addrs[1] != 0)
+				printf(",0x%02x",
+				       dimm_config_table[didx].spd_addrs[1]);
+		}
+		printf("\n");
+	}
+
+	/*
+	 * Walk the DRAM Socket Configuration Table to see what is installed.
+	 */
+	for (didx = 0; didx < DDR_CFG_T_MAX_DIMMS; ++didx) {
+		/* Check for lower DIMM socket populated */
+		if (validate_dimm(priv, &dimm_config_table[didx], 0)) {
+			if (ddr_verbose(priv))
+				report_dimm(&dimm_config_table[didx], 0,
+					    dimm_count, if_num);
+			++dimm_count;
+		} else {
+			break;
+		}		/* Finished when there is no lower DIMM */
+	}
+
+	initialize_ddr_clock(priv, ddr_conf, cpu_hertz, ddr_hertz,
+			     ddr_ref_hertz, if_num, if_mask);
+
+	if (!odt_1rank_config)
+		odt_1rank_config = disable_odt_config;
+	if (!odt_2rank_config)
+		odt_2rank_config = disable_odt_config;
+	if (!odt_4rank_config)
+		odt_4rank_config = disable_odt_config;
+
+	s = env_get("ddr_safe");
+	if (s) {
+		safe_ddr_flag = !!simple_strtoul(s, NULL, 0);
+		printf("Parameter found in environment. ddr_safe = %d\n",
+		       safe_ddr_flag);
+	}
+
+	if (dimm_count == 0) {
+		printf("ERROR: DIMM 0 not detected.\n");
+		return (-1);
+	}
+
+	if (c_cfg->mode32b)
+		if_64b = 0;
+
+	s = lookup_env(priv, "if_64b");
+	if (s)
+		if_64b = !!simple_strtoul(s, NULL, 0);
+
+	if (if_64b == 1) {
+		if (octeon_is_cpuid(OCTEON_CN70XX)) {
+			printf("64-bit interface width is not supported for this Octeon model\n");
+			++fatal_error;
+		}
+	}
+
+	/* ddr_type only indicates DDR4 or DDR3 */
+	ddr_type = (read_spd(&dimm_config_table[0], 0,
+			     DDR4_SPD_KEY_BYTE_DEVICE_TYPE) == 0x0C) ? 4 : 3;
+	debug("DRAM Device Type: DDR%d\n", ddr_type);
+
+	if (ddr_type == DDR4_DRAM) {
+		int spd_module_type;
+		int asymmetric;
+		const char *signal_load[4] = { "", "MLS", "3DS", "RSV" };
+
+		imp_val = &ddr4_impedence_val;
+
+		spd_addr =
+		    read_spd(&dimm_config_table[0], 0,
+			     DDR4_SPD_ADDRESSING_ROW_COL_BITS);
+		spd_org =
+		    read_spd(&dimm_config_table[0], 0,
+			     DDR4_SPD_MODULE_ORGANIZATION);
+		spd_banks =
+		    0xFF & read_spd(&dimm_config_table[0], 0,
+				    DDR4_SPD_DENSITY_BANKS);
+
+		bank_bits =
+		    (2 + ((spd_banks >> 4) & 0x3)) + ((spd_banks >> 6) & 0x3);
+		/* Controller can only address 4 bits. */
+		bank_bits = min((int)bank_bits, 4);
+
+		spd_package =
+		    0XFF & read_spd(&dimm_config_table[0], 0,
+				    DDR4_SPD_PACKAGE_TYPE);
+		if (spd_package & 0x80) {	// non-monolithic device
+			is_stacked_die = ((spd_package & 0x73) == 0x11);
+			debug("DDR4: Package Type 0x%02x (%s), %d die\n",
+			      spd_package, signal_load[(spd_package & 3)],
+			      ((spd_package >> 4) & 7) + 1);
+			is_3ds_dimm = ((spd_package & 3) == 2);	// is it 3DS?
+			if (is_3ds_dimm) {	// is it 3DS?
+				lranks_per_prank = ((spd_package >> 4) & 7) + 1;
+				// FIXME: should make sure it is only 2H or 4H
+				// or 8H?
+				lranks_bits = lranks_per_prank >> 1;
+				if (lranks_bits == 4)
+					lranks_bits = 3;
+			}
+		} else if (spd_package != 0) {
+			// FIXME: print non-zero monolithic device definition
+			debug("DDR4: Package Type MONOLITHIC: %d die, signal load %d\n",
+			      ((spd_package >> 4) & 7) + 1, (spd_package & 3));
+		}
+
+		asymmetric = (spd_org >> 6) & 1;
+		if (asymmetric) {
+			int spd_secondary_pkg =
+			    read_spd(&dimm_config_table[0], 0,
+				     DDR4_SPD_SECONDARY_PACKAGE_TYPE);
+			debug("DDR4: Module Organization: ASYMMETRICAL: Secondary Package Type 0x%02x\n",
+			      spd_secondary_pkg);
+		} else {
+			u64 bus_width =
+				8 << (0x07 &
+				read_spd(&dimm_config_table[0], 0,
+					 DDR4_SPD_MODULE_MEMORY_BUS_WIDTH));
+			u64 ddr_width = 4 << ((spd_org >> 0) & 0x7);
+			u64 module_cap;
+			int shift = (spd_banks & 0x0F);
+
+			die_capacity = (shift < 8) ? (256UL << shift) :
+				((12UL << (shift & 1)) << 10);
+			debug("DDR4: Module Organization: SYMMETRICAL: capacity per die %d %cbit\n",
+			      (die_capacity > 512) ? (die_capacity >> 10) :
+			      die_capacity, (die_capacity > 512) ? 'G' : 'M');
+			module_cap = ((u64)die_capacity << 20) / 8UL *
+				bus_width / ddr_width *
+				(1UL + ((spd_org >> 3) & 0x7));
+
+			// is it 3DS?
+			if (is_3ds_dimm) {
+				module_cap *= (u64)(((spd_package >> 4) & 7) +
+						    1);
+			}
+			debug("DDR4: Module Organization: SYMMETRICAL: capacity per module %lld GB\n",
+			      module_cap >> 30);
+		}
+
+		spd_rawcard =
+		    0xFF & read_spd(&dimm_config_table[0], 0,
+				    DDR4_SPD_REFERENCE_RAW_CARD);
+		debug("DDR4: Reference Raw Card 0x%02x\n", spd_rawcard);
+
+		spd_module_type =
+		    read_spd(&dimm_config_table[0], 0,
+			     DDR4_SPD_KEY_BYTE_MODULE_TYPE);
+		if (spd_module_type & 0x80) {	// HYBRID module
+			debug("DDR4: HYBRID module, type %s\n",
+			      ((spd_module_type & 0x70) ==
+			       0x10) ? "NVDIMM" : "UNKNOWN");
+		}
+		spd_thermal_sensor =
+		    read_spd(&dimm_config_table[0], 0,
+			     DDR4_SPD_MODULE_THERMAL_SENSOR);
+		spd_dimm_type = spd_module_type & 0x0F;
+		spd_rdimm = (spd_dimm_type == 1) || (spd_dimm_type == 5) ||
+			(spd_dimm_type == 8);
+		if (spd_rdimm) {
+			u16 spd_mfgr_id, spd_register_rev, spd_mod_attr;
+			static const u16 manu_ids[4] = {
+				0xb380, 0x3286, 0x9780, 0xb304
+			};
+			static const char *manu_names[4] = {
+				"XXX", "XXXXXXX", "XX", "XXXXX"
+			};
+			int mc;
+
+			spd_mfgr_id =
+			    (0xFFU &
+			     read_spd(&dimm_config_table[0], 0,
+				      DDR4_SPD_REGISTER_MANUFACTURER_ID_LSB)) |
+			    ((0xFFU &
+			      read_spd(&dimm_config_table[0], 0,
+				       DDR4_SPD_REGISTER_MANUFACTURER_ID_MSB))
+			     << 8);
+			spd_register_rev =
+			    0xFFU & read_spd(&dimm_config_table[0], 0,
+					     DDR4_SPD_REGISTER_REVISION_NUMBER);
+			for (mc = 0; mc < 4; mc++)
+				if (manu_ids[mc] == spd_mfgr_id)
+					break;
+
+			debug("DDR4: RDIMM Register Manufacturer ID: %s, Revision: 0x%02x\n",
+			      (mc >= 4) ? "UNKNOWN" : manu_names[mc],
+			      spd_register_rev);
+
+			// RAWCARD A or B must be bit 7=0 and bits 4-0
+			// either 00000(A) or 00001(B)
+			spd_rawcard_aorb = ((spd_rawcard & 0x9fUL) <= 1);
+			// RDIMM Module Attributes
+			spd_mod_attr =
+			    0xFFU & read_spd(&dimm_config_table[0], 0,
+					DDR4_SPD_UDIMM_ADDR_MAPPING_FROM_EDGE);
+			spd_rdimm_registers = ((1 << (spd_mod_attr & 3)) >> 1);
+			debug("DDR4: RDIMM Module Attributes (0x%02x): Register Type DDR4RCD%02d, DRAM rows %d, Registers %d\n",
+			      spd_mod_attr, (spd_mod_attr >> 4) + 1,
+			      ((1 << ((spd_mod_attr >> 2) & 3)) >> 1),
+			      spd_rdimm_registers);
+		}
+		dimm_type_name = ddr4_dimm_types[spd_dimm_type];
+	} else {		/* if (ddr_type == DDR4_DRAM) */
+		const char *signal_load[4] = { "UNK", "MLS", "SLS", "RSV" };
+
+		imp_val = &ddr3_impedence_val;
+
+		spd_addr =
+		    read_spd(&dimm_config_table[0], 0,
+			     DDR3_SPD_ADDRESSING_ROW_COL_BITS);
+		spd_org =
+		    read_spd(&dimm_config_table[0], 0,
+			     DDR3_SPD_MODULE_ORGANIZATION);
+		spd_banks =
+		    read_spd(&dimm_config_table[0], 0,
+			     DDR3_SPD_DENSITY_BANKS) & 0xff;
+
+		bank_bits = 3 + ((spd_banks >> 4) & 0x7);
+		/* Controller can only address 3 bits. */
+		bank_bits = min((int)bank_bits, 3);
+		spd_dimm_type =
+		    0x0f & read_spd(&dimm_config_table[0], 0,
+				    DDR3_SPD_KEY_BYTE_MODULE_TYPE);
+		spd_rdimm = (spd_dimm_type == 1) || (spd_dimm_type == 5) ||
+			(spd_dimm_type == 9);
+
+		spd_package =
+		    0xFF & read_spd(&dimm_config_table[0], 0,
+				    DDR3_SPD_SDRAM_DEVICE_TYPE);
+		if (spd_package & 0x80) {	// non-standard device
+			debug("DDR3: Device Type 0x%02x (%s), %d die\n",
+			      spd_package, signal_load[(spd_package & 3)],
+			      ((1 << ((spd_package >> 4) & 7)) >> 1));
+		} else if (spd_package != 0) {
+			// FIXME: print non-zero monolithic device definition
+			debug("DDR3: Device Type MONOLITHIC: %d die, signal load %d\n",
+			      ((1 << (spd_package >> 4) & 7) >> 1),
+			      (spd_package & 3));
+		}
+
+		spd_rawcard =
+		    0xFF & read_spd(&dimm_config_table[0], 0,
+				    DDR3_SPD_REFERENCE_RAW_CARD);
+		debug("DDR3: Reference Raw Card 0x%02x\n", spd_rawcard);
+		spd_thermal_sensor =
+		    read_spd(&dimm_config_table[0], 0,
+			     DDR3_SPD_MODULE_THERMAL_SENSOR);
+
+		if (spd_rdimm) {
+			int spd_mfgr_id, spd_register_rev, spd_mod_attr;
+
+			spd_mfgr_id =
+			    (0xFFU &
+			     read_spd(&dimm_config_table[0], 0,
+				      DDR3_SPD_REGISTER_MANUFACTURER_ID_LSB)) |
+			    ((0xFFU &
+			      read_spd(&dimm_config_table[0], 0,
+				       DDR3_SPD_REGISTER_MANUFACTURER_ID_MSB))
+			     << 8);
+			spd_register_rev =
+			    0xFFU & read_spd(&dimm_config_table[0], 0,
+					     DDR3_SPD_REGISTER_REVISION_NUMBER);
+			debug("DDR3: RDIMM Register Manufacturer ID 0x%x Revision 0x%02x\n",
+			      spd_mfgr_id, spd_register_rev);
+			// Module Attributes
+			spd_mod_attr =
+			    0xFFU & read_spd(&dimm_config_table[0], 0,
+					     DDR3_SPD_ADDRESS_MAPPING);
+			spd_rdimm_registers = ((1 << (spd_mod_attr & 3)) >> 1);
+			debug("DDR3: RDIMM Module Attributes (0x%02x): DRAM rows %d, Registers %d\n",
+			      spd_mod_attr,
+			      ((1 << ((spd_mod_attr >> 2) & 3)) >> 1),
+			      spd_rdimm_registers);
+		}
+		dimm_type_name = ddr3_dimm_types[spd_dimm_type];
+	}
+
+	if (spd_thermal_sensor & 0x80) {
+		debug("DDR%d: SPD: Thermal Sensor PRESENT\n",
+		      (ddr_type == DDR4_DRAM) ? 4 : 3);
+	}
+
+	debug("spd_addr        : %#06x\n", spd_addr);
+	debug("spd_org         : %#06x\n", spd_org);
+	debug("spd_banks       : %#06x\n", spd_banks);
+
+	row_bits = 12 + ((spd_addr >> 3) & 0x7);
+	col_bits = 9 + ((spd_addr >> 0) & 0x7);
+
+	num_ranks = 1 + ((spd_org >> 3) & 0x7);
+	dram_width = 4 << ((spd_org >> 0) & 0x7);
+	num_banks = 1 << bank_bits;
+
+	s = lookup_env(priv, "ddr_num_ranks");
+	if (s)
+		num_ranks = simple_strtoul(s, NULL, 0);
+
+	s = lookup_env(priv, "ddr_enable_by_rank_init");
+	if (s)
+		enable_by_rank_init = !!simple_strtoul(s, NULL, 0);
+
+	// FIXME: for now, we can only handle a DDR4 2rank-1slot config
+	// FIXME: also, by-rank init does not work correctly if 32-bit mode...
+	if (enable_by_rank_init && (ddr_type != DDR4_DRAM ||
+				    dimm_count != 1 || if_64b != 1 ||
+				    num_ranks != 2))
+		enable_by_rank_init = 0;
+
+	if (enable_by_rank_init) {
+		struct dimm_odt_config *odt_config;
+		union cvmx_lmcx_modereg_params1 mp1;
+		union cvmx_lmcx_modereg_params2 modereg_params2;
+		int by_rank_rodt, by_rank_wr, by_rank_park;
+
+		// Do ODT settings changes which work best for 2R-1S configs
+		debug("DDR4: 2R-1S special BY-RANK init ODT settings updated\n");
+
+		// setup for modifying config table values - 2 ranks and 1 DIMM
+		odt_config =
+		    (struct dimm_odt_config *)&ddr_conf->odt_2rank_config[0];
+
+		// original was 80, first try was 60
+		by_rank_rodt = ddr4_rodt_ctl_48_ohm;
+		s = lookup_env(priv, "ddr_by_rank_rodt");
+		if (s)
+			by_rank_rodt = strtoul(s, NULL, 0);
+
+		odt_config->qs_dic = /*RODT_CTL */ by_rank_rodt;
+
+		// this is for MODEREG_PARAMS1 fields
+		// fetch the original settings
+		mp1.u64 = odt_config->modereg_params1.u64;
+
+		by_rank_wr = ddr4_rttwr_80ohm;	// originals were 240
+		s = lookup_env(priv, "ddr_by_rank_wr");
+		if (s)
+			by_rank_wr = simple_strtoul(s, NULL, 0);
+
+		// change specific settings here...
+		insrt_wr(&mp1.u64, /*rank */ 00, by_rank_wr);
+		insrt_wr(&mp1.u64, /*rank */ 01, by_rank_wr);
+
+		// save final settings
+		odt_config->modereg_params1.u64 = mp1.u64;
+
+		// this is for MODEREG_PARAMS2 fields
+		// fetch the original settings
+		modereg_params2.u64 = odt_config->modereg_params2.u64;
+
+		by_rank_park = ddr4_rttpark_none;	// originals were 120
+		s = lookup_env(priv, "ddr_by_rank_park");
+		if (s)
+			by_rank_park = simple_strtoul(s, NULL, 0);
+
+		// change specific settings here...
+		modereg_params2.s.rtt_park_00 = by_rank_park;
+		modereg_params2.s.rtt_park_01 = by_rank_park;
+
+		// save final settings
+		odt_config->modereg_params2.u64 = modereg_params2.u64;
+	}
+
+	/*
+	 * FIX
+	 * Check that values are within some theoretical limits.
+	 * col_bits(min) = row_lsb(min) - bank_bits(max) - bus_bits(max) =
+	 *   14 - 3 - 4 = 7
+	 * col_bits(max) = row_lsb(max) - bank_bits(min) - bus_bits(min) =
+	 *   18 - 2 - 3 = 13
+	 */
+	if (col_bits > 13 || col_bits < 7) {
+		printf("Unsupported number of Col Bits: %d\n", col_bits);
+		++fatal_error;
+	}
+
+	/*
+	 * FIX
+	 * Check that values are within some theoretical limits.
+	 * row_bits(min) = pbank_lsb(min) - row_lsb(max) - rank_bits =
+	 *   26 - 18 - 1 = 7
+	 * row_bits(max) = pbank_lsb(max) - row_lsb(min) - rank_bits =
+	 *   33 - 14 - 1 = 18
+	 */
+	if (row_bits > 18 || row_bits < 7) {
+		printf("Unsupported number of Row Bits: %d\n", row_bits);
+		++fatal_error;
+	}
+
+	s = lookup_env(priv, "ddr_rdimm_ena");
+	if (s)
+		spd_rdimm = !!simple_strtoul(s, NULL, 0);
+
+	wl_loops = WLEVEL_LOOPS_DEFAULT;
+	// accept generic or interface-specific override
+	s = lookup_env(priv, "ddr_wlevel_loops");
+	if (!s)
+		s = lookup_env(priv, "ddr%d_wlevel_loops", if_num);
+
+	if (s)
+		wl_loops = strtoul(s, NULL, 0);
+
+	s = lookup_env(priv, "ddr_ranks");
+	if (s)
+		num_ranks = simple_strtoul(s, NULL, 0);
+
+	bunk_enable = (num_ranks > 1);
+
+	if (octeon_is_cpuid(OCTEON_CN7XXX))
+		column_bits_start = 3;
+	else
+		printf("ERROR: Unsupported Octeon model: 0x%x\n",
+		       read_c0_prid());
+
+	row_lsb = column_bits_start + col_bits + bank_bits - (!if_64b);
+	debug("row_lsb = column_bits_start + col_bits + bank_bits = %d\n",
+	      row_lsb);
+
+	pbank_lsb = row_lsb + row_bits + bunk_enable;
+	debug("pbank_lsb = row_lsb + row_bits + bunk_enable = %d\n", pbank_lsb);
+
+	if (lranks_per_prank > 1) {
+		pbank_lsb = row_lsb + row_bits + lranks_bits + bunk_enable;
+		debug("DDR4: 3DS: pbank_lsb = (%d row_lsb) + (%d row_bits) + (%d lranks_bits) + (%d bunk_enable) = %d\n",
+		      row_lsb, row_bits, lranks_bits, bunk_enable, pbank_lsb);
+	}
+
+	mem_size_mbytes = dimm_count * ((1ull << pbank_lsb) >> 20);
+	if (num_ranks == 4) {
+		/*
+		 * Quad rank dimm capacity is equivalent to two dual-rank
+		 * dimms.
+		 */
+		mem_size_mbytes *= 2;
+	}
+
+	/*
+	 * Mask with 1 bits set for for each active rank, allowing 2 bits
+	 * per dimm. This makes later calculations simpler, as a variety
+	 * of CSRs use this layout. This init needs to be updated for dual
+	 * configs (ie non-identical DIMMs).
+	 *
+	 * Bit 0 = dimm0, rank 0
+	 * Bit 1 = dimm0, rank 1
+	 * Bit 2 = dimm1, rank 0
+	 * Bit 3 = dimm1, rank 1
+	 * ...
+	 */
+	rank_mask = 0x1;
+	if (num_ranks > 1)
+		rank_mask = 0x3;
+	if (num_ranks > 2)
+		rank_mask = 0xf;
+
+	for (i = 1; i < dimm_count; i++)
+		rank_mask |= ((rank_mask & 0x3) << (2 * i));
+
+	/*
+	 * If we are booting from RAM, the DRAM controller is
+	 * already set up.  Just return the memory size
+	 */
+	if (priv->flags & FLAG_RAM_RESIDENT) {
+		debug("Ram Boot: Skipping LMC config\n");
+		return mem_size_mbytes;
+	}
+
+	if (ddr_type == DDR4_DRAM) {
+		spd_ecc =
+		    !!(read_spd
+		       (&dimm_config_table[0], 0,
+			DDR4_SPD_MODULE_MEMORY_BUS_WIDTH) & 8);
+	} else {
+		spd_ecc =
+		    !!(read_spd
+		       (&dimm_config_table[0], 0,
+			DDR3_SPD_MEMORY_BUS_WIDTH) & 8);
+	}
+
+	char rank_spec[8];
+
+	printable_rank_spec(rank_spec, num_ranks, dram_width, spd_package);
+	debug("Summary: %d %s%s %s %s, row bits=%d, col bits=%d, bank bits=%d\n",
+	      dimm_count, dimm_type_name, (dimm_count > 1) ? "s" : "",
+	      rank_spec,
+	      (spd_ecc) ? "ECC" : "non-ECC", row_bits, col_bits, bank_bits);
+
+	if (ddr_type == DDR4_DRAM) {
+		spd_cas_latency =
+		    ((0xff &
+		      read_spd(&dimm_config_table[0], 0,
+			       DDR4_SPD_CAS_LATENCIES_BYTE0)) << 0);
+		spd_cas_latency |=
+		    ((0xff &
+		      read_spd(&dimm_config_table[0], 0,
+			       DDR4_SPD_CAS_LATENCIES_BYTE1)) << 8);
+		spd_cas_latency |=
+		    ((0xff &
+		      read_spd(&dimm_config_table[0], 0,
+			       DDR4_SPD_CAS_LATENCIES_BYTE2)) << 16);
+		spd_cas_latency |=
+		    ((0xff &
+		      read_spd(&dimm_config_table[0], 0,
+			       DDR4_SPD_CAS_LATENCIES_BYTE3)) << 24);
+	} else {
+		spd_cas_latency =
+		    0xff & read_spd(&dimm_config_table[0], 0,
+				    DDR3_SPD_CAS_LATENCIES_LSB);
+		spd_cas_latency |=
+		    ((0xff &
+		      read_spd(&dimm_config_table[0], 0,
+			       DDR3_SPD_CAS_LATENCIES_MSB)) << 8);
+	}
+	debug("spd_cas_latency : %#06x\n", spd_cas_latency);
+
+	if (ddr_type == DDR4_DRAM) {
+		/*
+		 * No other values for DDR4 MTB and FTB are specified at the
+		 * current time so don't bother reading them. Can't speculate
+		 * how new values will be represented.
+		 */
+		int spdmtb = 125;
+		int spdftb = 1;
+
+		taamin = spdmtb * read_spd(&dimm_config_table[0], 0,
+					   DDR4_SPD_MIN_CAS_LATENCY_TAAMIN) +
+			 spdftb * (signed char)read_spd(&dimm_config_table[0],
+			 0, DDR4_SPD_MIN_CAS_LATENCY_FINE_TAAMIN);
+
+		ddr4_tckavgmin = spdmtb * read_spd(&dimm_config_table[0], 0,
+			DDR4_SPD_MINIMUM_CYCLE_TIME_TCKAVGMIN) +
+			spdftb * (signed char)read_spd(&dimm_config_table[0], 0,
+			DDR4_SPD_MIN_CYCLE_TIME_FINE_TCKAVGMIN);
+
+		ddr4_tckavgmax = spdmtb * read_spd(&dimm_config_table[0], 0,
+			DDR4_SPD_MAXIMUM_CYCLE_TIME_TCKAVGMAX) +
+			spdftb * (signed char)read_spd(&dimm_config_table[0], 0,
+			DDR4_SPD_MAX_CYCLE_TIME_FINE_TCKAVGMAX);
+
+		ddr4_trdcmin = spdmtb * read_spd(&dimm_config_table[0], 0,
+			DDR4_SPD_MIN_RAS_CAS_DELAY_TRCDMIN) +
+			spdftb * (signed char)read_spd(&dimm_config_table[0], 0,
+			DDR4_SPD_MIN_RAS_TO_CAS_DELAY_FINE_TRCDMIN);
+
+		ddr4_trpmin = spdmtb * read_spd(&dimm_config_table[0], 0,
+			DDR4_SPD_MIN_ROW_PRECHARGE_DELAY_TRPMIN) +
+			spdftb * (signed char)read_spd(&dimm_config_table[0], 0,
+			DDR4_SPD_MIN_ROW_PRECHARGE_DELAY_FINE_TRPMIN);
+
+		ddr4_trasmin = spdmtb *
+			(((read_spd
+			   (&dimm_config_table[0], 0,
+			    DDR4_SPD_UPPER_NIBBLES_TRAS_TRC) & 0xf) << 8) +
+			 (read_spd
+			  (&dimm_config_table[0], 0,
+			   DDR4_SPD_MIN_ACTIVE_PRECHARGE_LSB_TRASMIN) & 0xff));
+
+		ddr4_trcmin = spdmtb *
+			((((read_spd
+			    (&dimm_config_table[0], 0,
+			     DDR4_SPD_UPPER_NIBBLES_TRAS_TRC) >> 4) & 0xf) <<
+			  8) + (read_spd
+				(&dimm_config_table[0], 0,
+				 DDR4_SPD_MIN_ACTIVE_REFRESH_LSB_TRCMIN) &
+				0xff))
+			+ spdftb * (signed char)read_spd(&dimm_config_table[0],
+							 0,
+			DDR4_SPD_MIN_ACT_TO_ACT_REFRESH_DELAY_FINE_TRCMIN);
+
+		ddr4_trfc1min = spdmtb * (((read_spd(&dimm_config_table[0], 0,
+			DDR4_SPD_MIN_REFRESH_RECOVERY_MSB_TRFC1MIN) & 0xff) <<
+			8) + (read_spd(&dimm_config_table[0], 0,
+			DDR4_SPD_MIN_REFRESH_RECOVERY_LSB_TRFC1MIN) & 0xff));
+
+		ddr4_trfc2min = spdmtb * (((read_spd(&dimm_config_table[0], 0,
+			DDR4_SPD_MIN_REFRESH_RECOVERY_MSB_TRFC2MIN) & 0xff) <<
+			8) + (read_spd(&dimm_config_table[0], 0,
+			DDR4_SPD_MIN_REFRESH_RECOVERY_LSB_TRFC2MIN) & 0xff));
+
+		ddr4_trfc4min = spdmtb * (((read_spd(&dimm_config_table[0], 0,
+			DDR4_SPD_MIN_REFRESH_RECOVERY_MSB_TRFC4MIN) & 0xff) <<
+			8) + (read_spd(&dimm_config_table[0], 0,
+			DDR4_SPD_MIN_REFRESH_RECOVERY_LSB_TRFC4MIN) & 0xff));
+
+		ddr4_tfawmin = spdmtb * (((read_spd(&dimm_config_table[0], 0,
+			DDR4_SPD_MIN_FOUR_ACTIVE_WINDOW_MSN_TFAWMIN) & 0xf) <<
+			8) + (read_spd(&dimm_config_table[0], 0,
+			DDR4_SPD_MIN_FOUR_ACTIVE_WINDOW_LSB_TFAWMIN) & 0xff));
+
+		ddr4_trrd_smin = spdmtb * read_spd(&dimm_config_table[0], 0,
+			DDR4_SPD_MIN_ROW_ACTIVE_DELAY_SAME_TRRD_SMIN) +
+			spdftb * (signed char)read_spd(&dimm_config_table[0], 0,
+			DDR4_SPD_MIN_ACT_TO_ACT_DELAY_DIFF_FINE_TRRD_SMIN);
+
+		ddr4_trrd_lmin = spdmtb * read_spd(&dimm_config_table[0], 0,
+			DDR4_SPD_MIN_ROW_ACTIVE_DELAY_DIFF_TRRD_LMIN) +
+			spdftb * (signed char)read_spd(&dimm_config_table[0], 0,
+			DDR4_SPD_MIN_ACT_TO_ACT_DELAY_SAME_FINE_TRRD_LMIN);
+
+		ddr4_tccd_lmin = spdmtb * read_spd(&dimm_config_table[0], 0,
+			DDR4_SPD_MIN_CAS_TO_CAS_DELAY_TCCD_LMIN) +
+			spdftb * (signed char)read_spd(&dimm_config_table[0], 0,
+			DDR4_SPD_MIN_CAS_TO_CAS_DELAY_FINE_TCCD_LMIN);
+
+		debug("%-45s : %6d ps\n", "Medium Timebase (MTB)", spdmtb);
+		debug("%-45s : %6d ps\n", "Fine Timebase   (FTB)", spdftb);
+
+		debug("%-45s : %6d ps (%ld MT/s)\n",
+		      "SDRAM Minimum Cycle Time (tCKAVGmin)", ddr4_tckavgmin,
+		      pretty_psecs_to_mts(ddr4_tckavgmin));
+		debug("%-45s : %6d ps\n",
+		      "SDRAM Maximum Cycle Time (tCKAVGmax)", ddr4_tckavgmax);
+		debug("%-45s : %6d ps\n", "Minimum CAS Latency Time (taamin)",
+		      taamin);
+		debug("%-45s : %6d ps\n",
+		      "Minimum RAS to CAS Delay Time (tRCDmin)", ddr4_trdcmin);
+		debug("%-45s : %6d ps\n",
+		      "Minimum Row Precharge Delay Time (tRPmin)", ddr4_trpmin);
+		debug("%-45s : %6d ps\n",
+		      "Minimum Active to Precharge Delay (tRASmin)",
+		      ddr4_trasmin);
+		debug("%-45s : %6d ps\n",
+		      "Minimum Active to Active/Refr. Delay (tRCmin)",
+		      ddr4_trcmin);
+		debug("%-45s : %6d ps\n",
+		      "Minimum Refresh Recovery Delay (tRFC1min)",
+		      ddr4_trfc1min);
+		debug("%-45s : %6d ps\n",
+		      "Minimum Refresh Recovery Delay (tRFC2min)",
+		      ddr4_trfc2min);
+		debug("%-45s : %6d ps\n",
+		      "Minimum Refresh Recovery Delay (tRFC4min)",
+		      ddr4_trfc4min);
+		debug("%-45s : %6d ps\n",
+		      "Minimum Four Activate Window Time (tFAWmin)",
+		      ddr4_tfawmin);
+		debug("%-45s : %6d ps\n",
+		      "Minimum Act. to Act. Delay (tRRD_Smin)", ddr4_trrd_smin);
+		debug("%-45s : %6d ps\n",
+		      "Minimum Act. to Act. Delay (tRRD_Lmin)", ddr4_trrd_lmin);
+		debug("%-45s : %6d ps\n",
+		      "Minimum CAS to CAS Delay Time (tCCD_Lmin)",
+		      ddr4_tccd_lmin);
+
+#define DDR4_TWR 15000
+#define DDR4_TWTR_S 2500
+
+		tckmin = ddr4_tckavgmin;
+		twr = DDR4_TWR;
+		trcd = ddr4_trdcmin;
+		trrd = ddr4_trrd_smin;
+		trp = ddr4_trpmin;
+		tras = ddr4_trasmin;
+		trc = ddr4_trcmin;
+		trfc = ddr4_trfc1min;
+		twtr = DDR4_TWTR_S;
+		tfaw = ddr4_tfawmin;
+
+		if (spd_rdimm) {
+			spd_addr_mirror = read_spd(&dimm_config_table[0], 0,
+			DDR4_SPD_RDIMM_ADDR_MAPPING_FROM_REGISTER_TO_DRAM) &
+			0x1;
+		} else {
+			spd_addr_mirror = read_spd(&dimm_config_table[0], 0,
+				DDR4_SPD_UDIMM_ADDR_MAPPING_FROM_EDGE) & 0x1;
+		}
+		debug("spd_addr_mirror : %#06x\n", spd_addr_mirror);
+	} else {
+		spd_mtb_dividend =
+		    0xff & read_spd(&dimm_config_table[0], 0,
+				    DDR3_SPD_MEDIUM_TIMEBASE_DIVIDEND);
+		spd_mtb_divisor =
+		    0xff & read_spd(&dimm_config_table[0], 0,
+				    DDR3_SPD_MEDIUM_TIMEBASE_DIVISOR);
+		spd_tck_min =
+		    0xff & read_spd(&dimm_config_table[0], 0,
+				    DDR3_SPD_MINIMUM_CYCLE_TIME_TCKMIN);
+		spd_taa_min =
+		    0xff & read_spd(&dimm_config_table[0], 0,
+				    DDR3_SPD_MIN_CAS_LATENCY_TAAMIN);
+
+		spd_twr =
+		    0xff & read_spd(&dimm_config_table[0], 0,
+				    DDR3_SPD_MIN_WRITE_RECOVERY_TWRMIN);
+		spd_trcd =
+		    0xff & read_spd(&dimm_config_table[0], 0,
+				    DDR3_SPD_MIN_RAS_CAS_DELAY_TRCDMIN);
+		spd_trrd =
+		    0xff & read_spd(&dimm_config_table[0], 0,
+				    DDR3_SPD_MIN_ROW_ACTIVE_DELAY_TRRDMIN);
+		spd_trp =
+		    0xff & read_spd(&dimm_config_table[0], 0,
+				    DDR3_SPD_MIN_ROW_PRECHARGE_DELAY_TRPMIN);
+		spd_tras =
+		    0xff & read_spd(&dimm_config_table[0], 0,
+				    DDR3_SPD_MIN_ACTIVE_PRECHARGE_LSB_TRASMIN);
+		spd_tras |=
+		    ((0xff &
+		      read_spd(&dimm_config_table[0], 0,
+			       DDR3_SPD_UPPER_NIBBLES_TRAS_TRC) & 0xf) << 8);
+		spd_trc =
+		    0xff & read_spd(&dimm_config_table[0], 0,
+				    DDR3_SPD_MIN_ACTIVE_REFRESH_LSB_TRCMIN);
+		spd_trc |=
+		    ((0xff &
+		      read_spd(&dimm_config_table[0], 0,
+			       DDR3_SPD_UPPER_NIBBLES_TRAS_TRC) & 0xf0) << 4);
+		spd_trfc =
+		    0xff & read_spd(&dimm_config_table[0], 0,
+				    DDR3_SPD_MIN_REFRESH_RECOVERY_LSB_TRFCMIN);
+		spd_trfc |=
+		    ((0xff &
+		      read_spd(&dimm_config_table[0], 0,
+			       DDR3_SPD_MIN_REFRESH_RECOVERY_MSB_TRFCMIN)) <<
+		     8);
+		spd_twtr =
+		    0xff & read_spd(&dimm_config_table[0], 0,
+				DDR3_SPD_MIN_INTERNAL_WRITE_READ_CMD_TWTRMIN);
+		spd_trtp =
+		    0xff & read_spd(&dimm_config_table[0], 0,
+			DDR3_SPD_MIN_INTERNAL_READ_PRECHARGE_CMD_TRTPMIN);
+		spd_tfaw =
+		    0xff & read_spd(&dimm_config_table[0], 0,
+				    DDR3_SPD_MIN_FOUR_ACTIVE_WINDOW_TFAWMIN);
+		spd_tfaw |=
+		    ((0xff &
+		      read_spd(&dimm_config_table[0], 0,
+			       DDR3_SPD_UPPER_NIBBLE_TFAW) & 0xf) << 8);
+		spd_addr_mirror =
+		    0xff & read_spd(&dimm_config_table[0], 0,
+				    DDR3_SPD_ADDRESS_MAPPING) & 0x1;
+		/* Only address mirror unbuffered dimms.  */
+		spd_addr_mirror = spd_addr_mirror && !spd_rdimm;
+		ftb_dividend =
+		    read_spd(&dimm_config_table[0], 0,
+			     DDR3_SPD_FINE_TIMEBASE_DIVIDEND_DIVISOR) >> 4;
+		ftb_divisor =
+		    read_spd(&dimm_config_table[0], 0,
+			     DDR3_SPD_FINE_TIMEBASE_DIVIDEND_DIVISOR) & 0xf;
+		/* Make sure that it is not 0 */
+		ftb_divisor = (ftb_divisor == 0) ? 1 : ftb_divisor;
+
+		debug("spd_twr         : %#06x\n", spd_twr);
+		debug("spd_trcd        : %#06x\n", spd_trcd);
+		debug("spd_trrd        : %#06x\n", spd_trrd);
+		debug("spd_trp         : %#06x\n", spd_trp);
+		debug("spd_tras        : %#06x\n", spd_tras);
+		debug("spd_trc         : %#06x\n", spd_trc);
+		debug("spd_trfc        : %#06x\n", spd_trfc);
+		debug("spd_twtr        : %#06x\n", spd_twtr);
+		debug("spd_trtp        : %#06x\n", spd_trtp);
+		debug("spd_tfaw        : %#06x\n", spd_tfaw);
+		debug("spd_addr_mirror : %#06x\n", spd_addr_mirror);
+
+		mtb_psec = spd_mtb_dividend * 1000 / spd_mtb_divisor;
+		taamin = mtb_psec * spd_taa_min;
+		taamin += ftb_dividend *
+			(signed char)read_spd(&dimm_config_table[0],
+				0, DDR3_SPD_MIN_CAS_LATENCY_FINE_TAAMIN) /
+			ftb_divisor;
+		tckmin = mtb_psec * spd_tck_min;
+		tckmin += ftb_dividend *
+			(signed char)read_spd(&dimm_config_table[0],
+				0, DDR3_SPD_MINIMUM_CYCLE_TIME_FINE_TCKMIN) /
+			ftb_divisor;
+
+		twr = spd_twr * mtb_psec;
+		trcd = spd_trcd * mtb_psec;
+		trrd = spd_trrd * mtb_psec;
+		trp = spd_trp * mtb_psec;
+		tras = spd_tras * mtb_psec;
+		trc = spd_trc * mtb_psec;
+		trfc = spd_trfc * mtb_psec;
+		if (octeon_is_cpuid(OCTEON_CN78XX_PASS2_X) && trfc < 260000) {
+			// default to this - because it works...
+			int new_trfc = 260000;
+
+			s = env_get("ddr_trfc");
+			if (s) {
+				new_trfc = simple_strtoul(s, NULL, 0);
+				printf("Parameter found in environment. ddr_trfc = %d\n",
+				       new_trfc);
+				if (new_trfc < 160000 || new_trfc > 260000) {
+					// back to default if out of range
+					new_trfc = 260000;
+				}
+			}
+			debug("N%d.LMC%d: Adjusting tRFC from %d to %d, for CN78XX Pass 2.x\n",
+			      node, if_num, trfc, new_trfc);
+			trfc = new_trfc;
+		}
+
+		twtr = spd_twtr * mtb_psec;
+		trtp = spd_trtp * mtb_psec;
+		tfaw = spd_tfaw * mtb_psec;
+
+		debug("Medium Timebase (MTB)                         : %6d ps\n",
+		      mtb_psec);
+		debug("Minimum Cycle Time (tckmin)                   : %6d ps (%ld MT/s)\n",
+		      tckmin, pretty_psecs_to_mts(tckmin));
+		debug("Minimum CAS Latency Time (taamin)             : %6d ps\n",
+		      taamin);
+		debug("Write Recovery Time (tWR)                     : %6d ps\n",
+		      twr);
+		debug("Minimum RAS to CAS delay (tRCD)               : %6d ps\n",
+		      trcd);
+		debug("Minimum Row Active to Row Active delay (tRRD) : %6d ps\n",
+		      trrd);
+		debug("Minimum Row Precharge Delay (tRP)             : %6d ps\n",
+		      trp);
+		debug("Minimum Active to Precharge (tRAS)            : %6d ps\n",
+		      tras);
+		debug("Minimum Active to Active/Refresh Delay (tRC)  : %6d ps\n",
+		      trc);
+		debug("Minimum Refresh Recovery Delay (tRFC)         : %6d ps\n",
+		      trfc);
+		debug("Internal write to read command delay (tWTR)   : %6d ps\n",
+		      twtr);
+		debug("Min Internal Rd to Precharge Cmd Delay (tRTP) : %6d ps\n",
+		      trtp);
+		debug("Minimum Four Activate Window Delay (tFAW)     : %6d ps\n",
+		      tfaw);
+	}
+
+	/*
+	 * When the cycle time is within 1 psec of the minimum accept it
+	 * as a slight rounding error and adjust it to exactly the minimum
+	 * cycle time. This avoids an unnecessary warning.
+	 */
+	if (abs(tclk_psecs - tckmin) < 2)
+		tclk_psecs = tckmin;
+
+	if (tclk_psecs < (u64)tckmin) {
+		printf("WARNING!!!!: DDR Clock Rate (tCLK: %ld) exceeds DIMM specifications (tckmin: %ld)!!!!\n",
+		       tclk_psecs, (ulong)tckmin);
+	}
+
+	debug("DDR Clock Rate (tCLK)                         : %6ld ps\n",
+	      tclk_psecs);
+	debug("Core Clock Rate (eCLK)                        : %6ld ps\n",
+	      eclk_psecs);
+
+	s = env_get("ddr_use_ecc");
+	if (s) {
+		use_ecc = !!simple_strtoul(s, NULL, 0);
+		printf("Parameter found in environment. ddr_use_ecc = %d\n",
+		       use_ecc);
+	}
+	use_ecc = use_ecc && spd_ecc;
+
+	if_bytemask = if_64b ? (use_ecc ? 0x1ff : 0xff)
+	    : (use_ecc ? 0x01f : 0x0f);
+
+	debug("DRAM Interface width: %d bits %s bytemask 0x%03x\n",
+	      if_64b ? 64 : 32, use_ecc ? "+ECC" : "", if_bytemask);
+
+	debug("\n------ Board Custom Configuration Settings ------\n");
+	debug("%-45s : %d\n", "MIN_RTT_NOM_IDX   ", c_cfg->min_rtt_nom_idx);
+	debug("%-45s : %d\n", "MAX_RTT_NOM_IDX   ", c_cfg->max_rtt_nom_idx);
+	debug("%-45s : %d\n", "MIN_RODT_CTL      ", c_cfg->min_rodt_ctl);
+	debug("%-45s : %d\n", "MAX_RODT_CTL      ", c_cfg->max_rodt_ctl);
+	debug("%-45s : %d\n", "MIN_CAS_LATENCY   ", c_cfg->min_cas_latency);
+	debug("%-45s : %d\n", "OFFSET_EN         ", c_cfg->offset_en);
+	debug("%-45s : %d\n", "OFFSET_UDIMM      ", c_cfg->offset_udimm);
+	debug("%-45s : %d\n", "OFFSET_RDIMM      ", c_cfg->offset_rdimm);
+	debug("%-45s : %d\n", "DDR_RTT_NOM_AUTO  ", c_cfg->ddr_rtt_nom_auto);
+	debug("%-45s : %d\n", "DDR_RODT_CTL_AUTO ", c_cfg->ddr_rodt_ctl_auto);
+	if (spd_rdimm)
+		debug("%-45s : %d\n", "RLEVEL_COMP_OFFSET",
+		      c_cfg->rlevel_comp_offset_rdimm);
+	else
+		debug("%-45s : %d\n", "RLEVEL_COMP_OFFSET",
+		      c_cfg->rlevel_comp_offset_udimm);
+	debug("%-45s : %d\n", "RLEVEL_COMPUTE    ", c_cfg->rlevel_compute);
+	debug("%-45s : %d\n", "DDR2T_UDIMM       ", c_cfg->ddr2t_udimm);
+	debug("%-45s : %d\n", "DDR2T_RDIMM       ", c_cfg->ddr2t_rdimm);
+	debug("%-45s : %d\n", "FPRCH2            ", c_cfg->fprch2);
+	debug("%-45s : %d\n", "PTUNE_OFFSET      ", c_cfg->ptune_offset);
+	debug("%-45s : %d\n", "NTUNE_OFFSET      ", c_cfg->ntune_offset);
+	debug("-------------------------------------------------\n");
+
+	cl = divide_roundup(taamin, tclk_psecs);
+
+	debug("Desired CAS Latency                           : %6d\n", cl);
+
+	min_cas_latency = c_cfg->min_cas_latency;
+
+	s = lookup_env(priv, "ddr_min_cas_latency");
+	if (s)
+		min_cas_latency = simple_strtoul(s, NULL, 0);
+
+	debug("CAS Latencies supported in DIMM               :");
+	base_cl = (ddr_type == DDR4_DRAM) ? 7 : 4;
+	for (i = 0; i < 32; ++i) {
+		if ((spd_cas_latency >> i) & 1) {
+			debug(" %d", i + base_cl);
+			max_cas_latency = i + base_cl;
+			if (min_cas_latency == 0)
+				min_cas_latency = i + base_cl;
+		}
+	}
+	debug("\n");
+
+	/*
+	 * Use relaxed timing when running slower than the minimum
+	 * supported speed.  Adjust timing to match the smallest supported
+	 * CAS Latency.
+	 */
+	if (min_cas_latency > cl) {
+		ulong adjusted_tclk = taamin / min_cas_latency;
+
+		cl = min_cas_latency;
+		debug("Slow clock speed. Adjusting timing: tClk = %ld, Adjusted tClk = %ld\n",
+		      tclk_psecs, adjusted_tclk);
+		tclk_psecs = adjusted_tclk;
+	}
+
+	s = env_get("ddr_cas_latency");
+	if (s) {
+		override_cas_latency = simple_strtoul(s, NULL, 0);
+		printf("Parameter found in environment. ddr_cas_latency = %d\n",
+		       override_cas_latency);
+	}
+
+	/* Make sure that the selected cas latency is legal */
+	for (i = (cl - base_cl); i < 32; ++i) {
+		if ((spd_cas_latency >> i) & 1) {
+			cl = i + base_cl;
+			break;
+		}
+	}
+
+	if (max_cas_latency < cl)
+		cl = max_cas_latency;
+
+	if (override_cas_latency != 0)
+		cl = override_cas_latency;
+
+	debug("CAS Latency                                   : %6d\n", cl);
+
+	if ((cl * tckmin) > 20000) {
+		debug("(CLactual * tckmin) = %d exceeds 20 ns\n",
+		      (cl * tckmin));
+	}
+
+	if (tclk_psecs < (ulong)tckmin) {
+		printf("WARNING!!!!!!: DDR3 Clock Rate (tCLK: %ld) exceeds DIMM specifications (tckmin:%ld)!!!!!!!!\n",
+		       tclk_psecs, (ulong)tckmin);
+	}
+
+	if (num_banks != 4 && num_banks != 8 && num_banks != 16) {
+		printf("Unsupported number of banks %d. Must be 4 or 8.\n",
+		       num_banks);
+		++fatal_error;
+	}
+
+	if (num_ranks != 1 && num_ranks != 2 && num_ranks != 4) {
+		printf("Unsupported number of ranks: %d\n", num_ranks);
+		++fatal_error;
+	}
+
+	if (octeon_is_cpuid(OCTEON_CN78XX) ||
+	    octeon_is_cpuid(OCTEON_CN73XX) ||
+	    octeon_is_cpuid(OCTEON_CNF75XX)) {
+		if (dram_width != 8 && dram_width != 16 && dram_width != 4) {
+			printf("Unsupported SDRAM Width, %d.  Must be 4, 8 or 16.\n",
+			       dram_width);
+			++fatal_error;
+		}
+	} else if (dram_width != 8 && dram_width != 16) {
+		printf("Unsupported SDRAM Width, %d.  Must be 8 or 16.\n",
+		       dram_width);
+		++fatal_error;
+	}
+
+	/*
+	 ** Bail out here if things are not copasetic.
+	 */
+	if (fatal_error)
+		return (-1);
+
+	/*
+	 * 4.8.4 LMC RESET Initialization
+	 *
+	 * The purpose of this step is to assert/deassert the RESET# pin at the
+	 * DDR3/DDR4 parts.
+	 *
+	 * This LMC RESET step is done for all enabled LMCs.
+	 */
+	perform_lmc_reset(priv, node, if_num);
+
+	// Make sure scrambling is disabled during init...
+	ctrl.u64 = lmc_rd(priv, CVMX_LMCX_CONTROL(if_num));
+	ctrl.s.scramble_ena = 0;
+	lmc_wr(priv, CVMX_LMCX_CONTROL(if_num), ctrl.u64);
+
+	lmc_wr(priv, CVMX_LMCX_SCRAMBLE_CFG0(if_num), 0);
+	lmc_wr(priv, CVMX_LMCX_SCRAMBLE_CFG1(if_num), 0);
+	if (!octeon_is_cpuid(OCTEON_CN78XX_PASS1_X))
+		lmc_wr(priv, CVMX_LMCX_SCRAMBLE_CFG2(if_num), 0);
+
+	odt_idx = min(dimm_count - 1, 3);
+
+	switch (num_ranks) {
+	case 1:
+		odt_config = odt_1rank_config;
+		break;
+	case 2:
+		odt_config = odt_2rank_config;
+		break;
+	case 4:
+		odt_config = odt_4rank_config;
+		break;
+	default:
+		odt_config = disable_odt_config;
+		printf("Unsupported number of ranks: %d\n", num_ranks);
+		++fatal_error;
+	}
+
+	/*
+	 * 4.8.5 Early LMC Initialization
+	 *
+	 * All of DDR PLL, LMC CK, and LMC DRESET initializations must be
+	 * completed prior to starting this LMC initialization sequence.
+	 *
+	 * Perform the following five substeps for early LMC initialization:
+	 *
+	 * 1. Software must ensure there are no pending DRAM transactions.
+	 *
+	 * 2. Write LMC(0)_CONFIG, LMC(0)_CONTROL, LMC(0)_TIMING_PARAMS0,
+	 *    LMC(0)_TIMING_PARAMS1, LMC(0)_MODEREG_PARAMS0,
+	 *    LMC(0)_MODEREG_PARAMS1, LMC(0)_DUAL_MEMCFG, LMC(0)_NXM,
+	 *    LMC(0)_WODT_MASK, LMC(0)_RODT_MASK, LMC(0)_COMP_CTL2,
+	 *    LMC(0)_PHY_CTL, LMC(0)_DIMM0/1_PARAMS, and LMC(0)_DIMM_CTL with
+	 *    appropriate values. All sections in this chapter can be used to
+	 *    derive proper register settings.
+	 */
+
+	/* LMC(0)_CONFIG */
+	lmc_config(priv);
+
+	/* LMC(0)_CONTROL */
+	lmc_control(priv);
+
+	/* LMC(0)_TIMING_PARAMS0 */
+	lmc_timing_params0(priv);
+
+	/* LMC(0)_TIMING_PARAMS1 */
+	lmc_timing_params1(priv);
+
+	/* LMC(0)_TIMING_PARAMS2 */
+	lmc_timing_params2(priv);
+
+	/* LMC(0)_MODEREG_PARAMS0 */
+	lmc_modereg_params0(priv);
+
+	/* LMC(0)_MODEREG_PARAMS1 */
+	lmc_modereg_params1(priv);
+
+	/* LMC(0)_MODEREG_PARAMS2 */
+	lmc_modereg_params2(priv);
+
+	/* LMC(0)_MODEREG_PARAMS3 */
+	lmc_modereg_params3(priv);
+
+	/* LMC(0)_NXM */
+	lmc_nxm(priv);
+
+	/* LMC(0)_WODT_MASK */
+	lmc_wodt_mask(priv);
+
+	/* LMC(0)_RODT_MASK */
+	lmc_rodt_mask(priv);
+
+	/* LMC(0)_COMP_CTL2 */
+	lmc_comp_ctl2(priv);
+
+	/* LMC(0)_PHY_CTL */
+	lmc_phy_ctl(priv);
+
+	/* LMC(0)_EXT_CONFIG */
+	lmc_ext_config(priv);
+
+	/* LMC(0)_EXT_CONFIG2 */
+	lmc_ext_config2(priv);
+
+	/* LMC(0)_DIMM0/1_PARAMS */
+	lmc_dimm01_params(priv);
+
+	ret = lmc_rank_init(priv);
+	if (ret < 0)
+		return 0;	/* 0 indicates problem */
+
+	lmc_config_2(priv);
+
+	lmc_write_leveling(priv);
+
+	lmc_read_leveling(priv);
+
+	lmc_workaround(priv);
+
+	ret = lmc_sw_write_leveling(priv);
+	if (ret < 0)
+		return 0;	/* 0 indicates problem */
+
+	// this sometimes causes stack overflow crashes..
+	// display only for DDR4 RDIMMs.
+	if (ddr_type == DDR4_DRAM && spd_rdimm) {
+		int i;
+
+		for (i = 0; i < 3; i += 2)	// just pages 0 and 2 for now..
+			display_mpr_page(priv, rank_mask, if_num, i);
+	}
+
+	lmc_dll(priv);
+
+	lmc_workaround_2(priv);
+
+	lmc_final(priv);
+
+	lmc_scrambling(priv);
+
+	return mem_size_mbytes;
+}
+
+/////    HW-assist byte DLL offset tuning   //////
+
+static int cvmx_dram_get_num_lmc(struct ddr_priv *priv)
+{
+	union cvmx_lmcx_dll_ctl2 lmcx_dll_ctl2;
+
+	if (octeon_is_cpuid(OCTEON_CN70XX))
+		return 1;
+
+	if (octeon_is_cpuid(OCTEON_CN73XX) || octeon_is_cpuid(OCTEON_CNF75XX)) {
+		// sample LMC1
+		lmcx_dll_ctl2.u64 = lmc_rd(priv, CVMX_LMCX_DLL_CTL2(1));
+		if (lmcx_dll_ctl2.cn78xx.intf_en)
+			return 2;
+		else
+			return 1;
+	}
+
+	// for CN78XX, LMCs are always active in pairs, and always LMC0/1
+	// so, we sample LMC2 to see if 2 and 3 are active
+	lmcx_dll_ctl2.u64 = lmc_rd(priv, CVMX_LMCX_DLL_CTL2(2));
+	if (lmcx_dll_ctl2.cn78xx.intf_en)
+		return 4;
+	else
+		return 2;
+}
+
+// got to do these here, even though already defined in BDK
+
+// all DDR3, and DDR4 x16 today, use only 3 bank bits;
+// DDR4 x4 and x8 always have 4 bank bits
+// NOTE: this will change in the future, when DDR4 x16 devices can
+// come with 16 banks!! FIXME!!
+static int cvmx_dram_get_num_bank_bits(struct ddr_priv *priv, int lmc)
+{
+	union cvmx_lmcx_dll_ctl2 lmcx_dll_ctl2;
+	union cvmx_lmcx_config lmcx_config;
+	union cvmx_lmcx_ddr_pll_ctl lmcx_ddr_pll_ctl;
+	int bank_width;
+
+	// can always read this
+	lmcx_dll_ctl2.u64 = lmc_rd(priv, CVMX_LMCX_DLL_CTL2(lmc));
+
+	if (lmcx_dll_ctl2.cn78xx.dreset)	// check LMCn
+		return 0;
+
+	lmcx_config.u64 = lmc_rd(priv, CVMX_LMCX_DLL_CTL2(lmc));
+	lmcx_ddr_pll_ctl.u64 = lmc_rd(priv, CVMX_LMCX_DDR_PLL_CTL(lmc));
+
+	bank_width = ((lmcx_ddr_pll_ctl.s.ddr4_mode != 0) &&
+		      (lmcx_config.s.bg2_enable)) ? 4 : 3;
+
+	return bank_width;
+}
+
+#define EXTRACT(v, lsb, width) (((v) >> (lsb)) & ((1ull << (width)) - 1))
+#define ADDRESS_HOLE 0x10000000ULL
+
+static void cvmx_dram_address_extract_info(struct ddr_priv *priv, u64 address,
+					   int *node, int *lmc, int *dimm,
+					   int *prank, int *lrank, int *bank,
+					   int *row, int *col)
+{
+	int bank_lsb, xbits;
+	union cvmx_l2c_ctl l2c_ctl;
+	union cvmx_lmcx_config lmcx_config;
+	union cvmx_lmcx_control lmcx_control;
+	union cvmx_lmcx_ext_config ext_config;
+	int bitno = (octeon_is_cpuid(OCTEON_CN7XXX)) ? 20 : 18;
+	int bank_width;
+	int dimm_lsb;
+	int dimm_width;
+	int prank_lsb, lrank_lsb;
+	int prank_width, lrank_width;
+	int row_lsb;
+	int row_width;
+	int col_hi_lsb;
+	int col_hi_width;
+	int col_hi;
+
+	if (octeon_is_cpuid(OCTEON_CN73XX) || octeon_is_cpuid(OCTEON_CNF75XX))
+		bitno = 18;
+
+	*node = EXTRACT(address, 40, 2);	/* Address bits [41:40] */
+
+	address &= (1ULL << 40) - 1;	// lop off any node bits or above
+	if (address >= ADDRESS_HOLE)	// adjust down if at HOLE or above
+		address -= ADDRESS_HOLE;
+
+	/* Determine the LMC controllers */
+	l2c_ctl.u64 = l2c_rd(priv, CVMX_L2C_CTL);
+
+	/* xbits depends on number of LMCs */
+	xbits = cvmx_dram_get_num_lmc(priv) >> 1;	// 4->2, 2->1, 1->0
+	bank_lsb = 7 + xbits;
+
+	/* LMC number is probably aliased */
+	if (l2c_ctl.s.disidxalias) {
+		*lmc = EXTRACT(address, 7, xbits);
+	}  else {
+		*lmc = EXTRACT(address, 7, xbits) ^
+			EXTRACT(address, bitno, xbits) ^
+			EXTRACT(address, 12, xbits);
+	}
+
+	/* Figure out the bank field width */
+	lmcx_config.u64 = lmc_rd(priv, CVMX_LMCX_CONFIG(*lmc));
+	ext_config.u64 = lmc_rd(priv, CVMX_LMCX_EXT_CONFIG(*lmc));
+	bank_width = cvmx_dram_get_num_bank_bits(priv, *lmc);
+
+	/* Extract additional info from the LMC_CONFIG CSR */
+	dimm_lsb = 28 + lmcx_config.s.pbank_lsb + xbits;
+	dimm_width = 40 - dimm_lsb;
+	prank_lsb = dimm_lsb - lmcx_config.s.rank_ena;
+	prank_width = dimm_lsb - prank_lsb;
+	lrank_lsb = prank_lsb - ext_config.s.dimm0_cid;
+	lrank_width = prank_lsb - lrank_lsb;
+	row_lsb = 14 + lmcx_config.s.row_lsb + xbits;
+	row_width = lrank_lsb - row_lsb;
+	col_hi_lsb = bank_lsb + bank_width;
+	col_hi_width = row_lsb - col_hi_lsb;
+
+	/* Extract the parts of the address */
+	*dimm = EXTRACT(address, dimm_lsb, dimm_width);
+	*prank = EXTRACT(address, prank_lsb, prank_width);
+	*lrank = EXTRACT(address, lrank_lsb, lrank_width);
+	*row = EXTRACT(address, row_lsb, row_width);
+
+	/* bank calculation may be aliased... */
+	lmcx_control.u64 = lmc_rd(priv, CVMX_LMCX_CONTROL(*lmc));
+	if (lmcx_control.s.xor_bank) {
+		*bank = EXTRACT(address, bank_lsb, bank_width) ^
+			EXTRACT(address, 12 + xbits, bank_width);
+	} else {
+		*bank = EXTRACT(address, bank_lsb, bank_width);
+	}
+
+	/* LMC number already extracted */
+	col_hi = EXTRACT(address, col_hi_lsb, col_hi_width);
+	*col = EXTRACT(address, 3, 4) | (col_hi << 4);
+	/* Bus byte is address bits [2:0]. Unused here */
+}
+
+// end of added workarounds
+
+// NOTE: "mode" argument:
+//         DBTRAIN_TEST: for testing using GP patterns, includes ECC
+//         DBTRAIN_DBI:  for DBI deskew training behavior (uses GP patterns)
+//         DBTRAIN_LFSR: for testing using LFSR patterns, includes ECC
+// NOTE: trust the caller to specify the correct/supported mode
+//
+static int test_dram_byte_hw(struct ddr_priv *priv, int if_num, u64 p,
+			     int mode, u64 *xor_data)
+{
+	u64 p1;
+	u64 k;
+	int errors = 0;
+
+	u64 mpr_data0, mpr_data1;
+	u64 bad_bits[2] = { 0, 0 };
+
+	int node_address, lmc, dimm;
+	int prank, lrank;
+	int bank, row, col;
+	int save_or_dis;
+	int byte;
+	int ba_loop, ba_bits;
+
+	union cvmx_lmcx_rlevel_ctl rlevel_ctl;
+	union cvmx_lmcx_dbtrain_ctl dbtrain_ctl;
+	union cvmx_lmcx_phy_ctl phy_ctl;
+
+	int biter_errs;
+
+	// FIXME: K iterations set to 4 for now.
+	// FIXME: decrement to increase interations.
+	// FIXME: must be no less than 22 to stay above an LMC hash field.
+	int kshift = 27;
+
+	const char *s;
+	int node = 0;
+
+	// allow override default setting for kshift
+	s = env_get("ddr_tune_set_kshift");
+	if (s) {
+		int temp = simple_strtoul(s, NULL, 0);
+
+		if (temp < 22 || temp > 28) {
+			debug("N%d.LMC%d: ILLEGAL override of kshift to %d, using default %d\n",
+			      node, if_num, temp, kshift);
+		} else {
+			debug("N%d.LMC%d: overriding kshift (%d) to %d\n",
+			      node, if_num, kshift, temp);
+			kshift = temp;
+		}
+	}
+
+	/*
+	 * 1) Make sure that RLEVEL_CTL[OR_DIS] = 0.
+	 */
+	rlevel_ctl.u64 = lmc_rd(priv, CVMX_LMCX_RLEVEL_CTL(if_num));
+	save_or_dis = rlevel_ctl.s.or_dis;
+	/* or_dis must be disabled for this sequence */
+	rlevel_ctl.s.or_dis = 0;
+	lmc_wr(priv, CVMX_LMCX_RLEVEL_CTL(if_num), rlevel_ctl.u64);
+
+	/*
+	 * NOTE: this step done in the calling routine(s)...
+	 * 3) Setup GENERAL_PURPOSE[0-2] registers with the data pattern
+	 * of choice.
+	 * a. GENERAL_PURPOSE0[DATA<63:0>] – sets the initial lower
+	 * (rising edge) 64 bits of data.
+	 * b. GENERAL_PURPOSE1[DATA<63:0>] – sets the initial upper
+	 * (falling edge) 64 bits of data.
+	 * c. GENERAL_PURPOSE2[DATA<15:0>] – sets the initial lower
+	 * (rising edge <7:0>) and upper (falling edge <15:8>) ECC data.
+	 */
+
+	// final address must include LMC and node
+	p |= (if_num << 7);	/* Map address into proper interface */
+	p |= (u64)node << CVMX_NODE_MEM_SHIFT;	// map to node
+
+	/*
+	 * Add base offset to both test regions to not clobber u-boot stuff
+	 * when running from L2 for NAND boot.
+	 */
+	p += 0x20000000;	// offset to 512MB, ie above THE HOLE!!!
+	p |= 1ull << 63;	// needed for OCTEON
+
+	errors = 0;
+
+	cvmx_dram_address_extract_info(priv, p, &node_address, &lmc, &dimm,
+				       &prank, &lrank, &bank, &row, &col);
+	debug("%s: START at A:0x%012llx, N%d L%d D%d/%d R%d B%1x Row:%05x Col:%05x\n",
+	      __func__, p, node_address, lmc, dimm, prank, lrank, bank,
+	      row, col);
+
+	// only check once per call, and ignore if no match...
+	if ((int)node != node_address) {
+		printf("ERROR: Node address mismatch\n");
+		return 0;
+	}
+	if (lmc != if_num) {
+		printf("ERROR: LMC address mismatch\n");
+		return 0;
+	}
+
+	/*
+	 * 7) Set PHY_CTL[PHY_RESET] = 1 (LMC automatically clears this as
+	 * it’s a one-shot operation). This is to get into the habit of
+	 * resetting PHY’s SILO to the original 0 location.
+	 */
+	phy_ctl.u64 = lmc_rd(priv, CVMX_LMCX_PHY_CTL(if_num));
+	phy_ctl.s.phy_reset = 1;
+	lmc_wr(priv, CVMX_LMCX_PHY_CTL(if_num), phy_ctl.u64);
+
+	/*
+	 * Walk through a range of addresses avoiding bits that alias
+	 * interfaces on the CN88XX.
+	 */
+
+	// FIXME: want to try to keep the K increment from affecting the
+	// LMC via hash, so keep it above bit 21 we also want to keep k
+	// less than the base offset of bit 29 (512MB)
+
+	for (k = 0; k < (1UL << 29); k += (1UL << kshift)) {
+		// FIXME: the sequence will interate over 1/2 cacheline
+		// FIXME: for each unit specified in "read_cmd_count",
+		// FIXME: so, we setup each sequence to do the max cachelines
+		// it can
+
+		p1 = p + k;
+
+		cvmx_dram_address_extract_info(priv, p1, &node_address, &lmc,
+					       &dimm, &prank, &lrank, &bank,
+					       &row, &col);
+
+		/*
+		 * 2) Setup the fields of the CSR DBTRAIN_CTL as follows:
+		 * a. COL, ROW, BA, BG, PRANK points to the starting point
+		 * of the address.
+		 * You can just set them to all 0.
+		 * b. RW_TRAIN – set this to 1.
+		 * c. TCCD_L – set this to 0.
+		 * d. READ_CMD_COUNT – instruct the sequence to the how many
+		 * writes/reads.
+		 * It is 5 bits field, so set to 31 of maximum # of r/w.
+		 */
+		dbtrain_ctl.u64 = lmc_rd(priv, CVMX_LMCX_DBTRAIN_CTL(if_num));
+		dbtrain_ctl.s.column_a = col;
+		dbtrain_ctl.s.row_a = row;
+		dbtrain_ctl.s.bg = (bank >> 2) & 3;
+		dbtrain_ctl.s.prank = (dimm * 2) + prank;	// FIXME?
+		dbtrain_ctl.s.lrank = lrank;	// FIXME?
+		dbtrain_ctl.s.activate = (mode == DBTRAIN_DBI);
+		dbtrain_ctl.s.write_ena = 1;
+		dbtrain_ctl.s.read_cmd_count = 31;	// max count pass 1.x
+		if (octeon_is_cpuid(OCTEON_CN78XX_PASS2_X) ||
+		    octeon_is_cpuid(OCTEON_CNF75XX)) {
+			// max count on chips that support it
+			dbtrain_ctl.s.cmd_count_ext = 3;
+		} else {
+			// max count pass 1.x
+			dbtrain_ctl.s.cmd_count_ext = 0;
+		}
+
+		dbtrain_ctl.s.rw_train = 1;
+		dbtrain_ctl.s.tccd_sel = (mode == DBTRAIN_DBI);
+		// LFSR should only be on when chip supports it...
+		dbtrain_ctl.s.lfsr_pattern_sel = (mode == DBTRAIN_LFSR) ? 1 : 0;
+
+		biter_errs = 0;
+
+		// for each address, iterate over the 4 "banks" in the BA
+		for (ba_loop = 0, ba_bits = bank & 3;
+		     ba_loop < 4; ba_loop++, ba_bits = (ba_bits + 1) & 3) {
+			dbtrain_ctl.s.ba = ba_bits;
+			lmc_wr(priv, CVMX_LMCX_DBTRAIN_CTL(if_num),
+			       dbtrain_ctl.u64);
+
+			/*
+			 * We will use the RW_TRAINING sequence (14) for
+			 * this task.
+			 *
+			 * 4) Kick off the sequence (SEQ_CTL[SEQ_SEL] = 14,
+			 *    SEQ_CTL[INIT_START] = 1).
+			 * 5) Poll on SEQ_CTL[SEQ_COMPLETE] for completion.
+			 */
+			oct3_ddr3_seq(priv, prank, if_num, 14);
+
+			/*
+			 * 6) Read MPR_DATA0 and MPR_DATA1 for results.
+			 * a. MPR_DATA0[MPR_DATA<63:0>] – comparison results
+			 *    for DQ63:DQ0. (1 means MATCH, 0 means FAIL).
+			 * b. MPR_DATA1[MPR_DATA<7:0>] – comparison results
+			 *    for ECC bit7:0.
+			 */
+			mpr_data0 = lmc_rd(priv, CVMX_LMCX_MPR_DATA0(if_num));
+			mpr_data1 = lmc_rd(priv, CVMX_LMCX_MPR_DATA1(if_num));
+
+			/*
+			 * 7) Set PHY_CTL[PHY_RESET] = 1 (LMC automatically
+			 * clears this as it’s a one-shot operation).
+			 * This is to get into the habit of resetting PHY’s
+			 * SILO to the original 0 location.
+			 */
+			phy_ctl.u64 = lmc_rd(priv, CVMX_LMCX_PHY_CTL(if_num));
+			phy_ctl.s.phy_reset = 1;
+			lmc_wr(priv, CVMX_LMCX_PHY_CTL(if_num), phy_ctl.u64);
+
+			// bypass any error checking or updating when DBI mode
+			if (mode == DBTRAIN_DBI)
+				continue;
+
+			// data bytes
+			if (~mpr_data0) {
+				for (byte = 0; byte < 8; byte++) {
+					if ((~mpr_data0 >> (8 * byte)) & 0xffUL)
+						biter_errs |= (1 << byte);
+				}
+				// accumulate bad bits
+				bad_bits[0] |= ~mpr_data0;
+			}
+
+			// include ECC byte errors
+			if (~mpr_data1 & 0xffUL) {
+				biter_errs |= (1 << 8);
+				bad_bits[1] |= ~mpr_data1 & 0xffUL;
+			}
+		}
+
+		errors |= biter_errs;
+	}			/* end for (k=...) */
+
+	rlevel_ctl.s.or_dis = save_or_dis;
+	lmc_wr(priv, CVMX_LMCX_RLEVEL_CTL(if_num), rlevel_ctl.u64);
+
+	// send the bad bits back...
+	if (mode != DBTRAIN_DBI && xor_data) {
+		xor_data[0] = bad_bits[0];
+		xor_data[1] = bad_bits[1];
+	}
+
+	return errors;
+}
+
+// setup default for byte test pattern array
+// take these from the HRM section 6.9.13
+static const u64 byte_pattern_0[] = {
+	0xFFAAFFFFFF55FFFFULL,	// GP0
+	0x55555555AAAAAAAAULL,	// GP1
+	0xAA55AAAAULL,		// GP2
+};
+
+static const u64 byte_pattern_1[] = {
+	0xFBF7EFDFBF7FFEFDULL,	// GP0
+	0x0F1E3C78F0E1C387ULL,	// GP1
+	0xF0E1BF7FULL,		// GP2
+};
+
+// this is from Andrew via LFSR with PRBS=0xFFFFAAAA
+static const u64 byte_pattern_2[] = {
+	0xEE55AADDEE55AADDULL,	// GP0
+	0x55AADDEE55AADDEEULL,	// GP1
+	0x55EEULL,		// GP2
+};
+
+// this is from Mike via LFSR with PRBS=0x4A519909
+static const u64 byte_pattern_3[] = {
+	0x0088CCEE0088CCEEULL,	// GP0
+	0xBB552211BB552211ULL,	// GP1
+	0xBB00ULL,		// GP2
+};
+
+static const u64 *byte_patterns[4] = {
+	byte_pattern_0, byte_pattern_1, byte_pattern_2, byte_pattern_3
+};
+
+static const u32 lfsr_patterns[4] = {
+	0xFFFFAAAAUL, 0x06000000UL, 0xAAAAFFFFUL, 0x4A519909UL
+};
+
+#define NUM_BYTE_PATTERNS 4
+
+#define DEFAULT_BYTE_BURSTS 32	// compromise between time and rigor
+
+static void setup_hw_pattern(struct ddr_priv *priv, int lmc,
+			     const u64 *pattern_p)
+{
+	/*
+	 * 3) Setup GENERAL_PURPOSE[0-2] registers with the data pattern
+	 * of choice.
+	 * a. GENERAL_PURPOSE0[DATA<63:0>] â sets the initial lower
+	 *    (rising edge) 64 bits of data.
+	 * b. GENERAL_PURPOSE1[DATA<63:0>] â sets the initial upper
+	 *    (falling edge) 64 bits of data.
+	 * c. GENERAL_PURPOSE2[DATA<15:0>] â sets the initial lower
+	 *    (rising edge <7:0>) and upper
+	 * (falling edge <15:8>) ECC data.
+	 */
+	lmc_wr(priv, CVMX_LMCX_GENERAL_PURPOSE0(lmc), pattern_p[0]);
+	lmc_wr(priv, CVMX_LMCX_GENERAL_PURPOSE1(lmc), pattern_p[1]);
+	lmc_wr(priv, CVMX_LMCX_GENERAL_PURPOSE2(lmc), pattern_p[2]);
+}
+
+static void setup_lfsr_pattern(struct ddr_priv *priv, int lmc, u32 data)
+{
+	union cvmx_lmcx_char_ctl char_ctl;
+	u32 prbs;
+	const char *s;
+
+	s = env_get("ddr_lfsr_prbs");
+	if (s)
+		prbs = simple_strtoul(s, NULL, 0);
+	else
+		prbs = data;
+
+	/*
+	 * 2) DBTRAIN_CTL[LFSR_PATTERN_SEL] = 1
+	 * here data comes from the LFSR generating a PRBS pattern
+	 * CHAR_CTL.EN = 0
+	 * CHAR_CTL.SEL = 0; // for PRBS
+	 * CHAR_CTL.DR = 1;
+	 * CHAR_CTL.PRBS = setup for whatever type of PRBS to send
+	 * CHAR_CTL.SKEW_ON = 1;
+	 */
+	char_ctl.u64 = lmc_rd(priv, CVMX_LMCX_CHAR_CTL(lmc));
+	char_ctl.s.en = 0;
+	char_ctl.s.sel = 0;
+	char_ctl.s.dr = 1;
+	char_ctl.s.prbs = prbs;
+	char_ctl.s.skew_on = 1;
+	lmc_wr(priv, CVMX_LMCX_CHAR_CTL(lmc), char_ctl.u64);
+}
+
+static int choose_best_hw_patterns(int lmc, int mode)
+{
+	int new_mode = mode;
+	const char *s;
+
+	switch (mode) {
+	case DBTRAIN_TEST:	// always choose LFSR if chip supports it
+		if (octeon_is_cpuid(OCTEON_CN78XX_PASS2_X)) {
+			int lfsr_enable = 1;
+
+			s = env_get("ddr_allow_lfsr");
+			if (s) {
+				// override?
+				lfsr_enable = !!strtoul(s, NULL, 0);
+			}
+
+			if (lfsr_enable)
+				new_mode = DBTRAIN_LFSR;
+		}
+		break;
+
+	case DBTRAIN_DBI:	// possibly can allow LFSR use?
+		break;
+
+	case DBTRAIN_LFSR:	// forced already
+		if (!octeon_is_cpuid(OCTEON_CN78XX_PASS2_X)) {
+			debug("ERROR: illegal HW assist mode %d\n", mode);
+			new_mode = DBTRAIN_TEST;
+		}
+		break;
+
+	default:
+		debug("ERROR: unknown HW assist mode %d\n", mode);
+	}
+
+	if (new_mode != mode)
+		debug("%s: changing mode %d to %d\n", __func__, mode, new_mode);
+
+	return new_mode;
+}
+
+int run_best_hw_patterns(struct ddr_priv *priv, int lmc, u64 phys_addr,
+			 int mode, u64 *xor_data)
+{
+	int pattern;
+	const u64 *pattern_p;
+	int errs, errors = 0;
+
+	// FIXME? always choose LFSR if chip supports it???
+	mode = choose_best_hw_patterns(lmc, mode);
+
+	for (pattern = 0; pattern < NUM_BYTE_PATTERNS; pattern++) {
+		if (mode == DBTRAIN_LFSR) {
+			setup_lfsr_pattern(priv, lmc, lfsr_patterns[pattern]);
+		} else {
+			pattern_p = byte_patterns[pattern];
+			setup_hw_pattern(priv, lmc, pattern_p);
+		}
+		errs = test_dram_byte_hw(priv, lmc, phys_addr, mode, xor_data);
+
+		debug("%s: PATTERN %d at A:0x%012llx errors 0x%x\n",
+		      __func__, pattern, phys_addr, errs);
+
+		errors |= errs;
+	}
+
+	return errors;
+}
+
+static void hw_assist_test_dll_offset(struct ddr_priv *priv,
+				      int dll_offset_mode, int lmc,
+				      int bytelane,
+				      int if_64b,
+				      u64 dram_tune_rank_offset,
+				      int dram_tune_byte_bursts)
+{
+	int byte_offset, new_best_offset[9];
+	int rank_delay_start[4][9];
+	int rank_delay_count[4][9];
+	int rank_delay_best_start[4][9];
+	int rank_delay_best_count[4][9];
+	int errors[4], off_errors, tot_errors;
+	int rank_mask, rankx, active_ranks;
+	int pattern;
+	const u64 *pattern_p;
+	int byte;
+	char *mode_str = (dll_offset_mode == 2) ? "Read" : "Write";
+	int pat_best_offset[9];
+	u64 phys_addr;
+	int pat_beg, pat_end;
+	int rank_beg, rank_end;
+	int byte_lo, byte_hi;
+	union cvmx_lmcx_config lmcx_config;
+	u64 hw_rank_offset;
+	int num_lmcs = cvmx_dram_get_num_lmc(priv);
+	// FIXME? always choose LFSR if chip supports it???
+	int mode = choose_best_hw_patterns(lmc, DBTRAIN_TEST);
+	int node = 0;
+
+	if (bytelane == 0x0A) {	// all bytelanes
+		byte_lo = 0;
+		byte_hi = 8;
+	} else {		// just 1
+		byte_lo = bytelane;
+		byte_hi = bytelane;
+	}
+
+	lmcx_config.u64 = lmc_rd(priv, CVMX_LMCX_CONFIG(lmc));
+	rank_mask = lmcx_config.s.init_status;
+
+	// this should be correct for 1 or 2 ranks, 1 or 2 DIMMs
+	hw_rank_offset =
+	    1ull << (28 + lmcx_config.s.pbank_lsb - lmcx_config.s.rank_ena +
+		     (num_lmcs / 2));
+
+	debug("N%d: %s: starting LMC%d with rank offset 0x%016llx\n",
+	      node, __func__, lmc, (unsigned long long)hw_rank_offset);
+
+	// start of pattern loop
+	// we do the set of tests for each pattern supplied...
+
+	memset(new_best_offset, 0, sizeof(new_best_offset));
+	for (pattern = 0; pattern < NUM_BYTE_PATTERNS; pattern++) {
+		memset(pat_best_offset, 0, sizeof(pat_best_offset));
+
+		if (mode == DBTRAIN_TEST) {
+			pattern_p = byte_patterns[pattern];
+			setup_hw_pattern(priv, lmc, pattern_p);
+		} else {
+			setup_lfsr_pattern(priv, lmc, lfsr_patterns[pattern]);
+		}
+
+		// now loop through all legal values for the DLL byte offset...
+
+#define BYTE_OFFSET_INCR 3	// FIXME: make this tunable?
+
+		tot_errors = 0;
+
+		memset(rank_delay_count, 0, sizeof(rank_delay_count));
+		memset(rank_delay_start, 0, sizeof(rank_delay_start));
+		memset(rank_delay_best_count, 0, sizeof(rank_delay_best_count));
+		memset(rank_delay_best_start, 0, sizeof(rank_delay_best_start));
+
+		for (byte_offset = -63; byte_offset < 64;
+		     byte_offset += BYTE_OFFSET_INCR) {
+			// do the setup on the active LMC
+			// set the bytelanes DLL offsets
+			change_dll_offset_enable(priv, lmc, 0);
+			// FIXME? bytelane?
+			load_dll_offset(priv, lmc, dll_offset_mode,
+					byte_offset, bytelane);
+			change_dll_offset_enable(priv, lmc, 1);
+
+			//bdk_watchdog_poke();
+
+			// run the test on each rank
+			// only 1 call per rank should be enough, let the
+			// bursts, loops, etc, control the load...
+
+			// errors for this byte_offset, all ranks
+			off_errors = 0;
+
+			active_ranks = 0;
+
+			for (rankx = 0; rankx < 4; rankx++) {
+				if (!(rank_mask & (1 << rankx)))
+					continue;
+
+				phys_addr = hw_rank_offset * active_ranks;
+				// FIXME: now done by test_dram_byte_hw()
+				//phys_addr |= (lmc << 7);
+				//phys_addr |= (u64)node << CVMX_NODE_MEM_SHIFT;
+
+				active_ranks++;
+
+				// NOTE: return is a now a bitmask of the
+				// erroring bytelanes.
+				errors[rankx] =
+				    test_dram_byte_hw(priv, lmc, phys_addr,
+						      mode, NULL);
+
+				// process any errors in the bytelane(s) that
+				// are being tested
+				for (byte = byte_lo; byte <= byte_hi; byte++) {
+					// check errors
+					// yes, an error in the byte lane in
+					// this rank
+					if (errors[rankx] & (1 << byte)) {
+						off_errors |= (1 << byte);
+
+						debug("N%d.LMC%d.R%d: Bytelane %d DLL %s Offset Test %3d: Address 0x%012llx errors\n",
+						      node, lmc, rankx, byte,
+						      mode_str, byte_offset,
+						      phys_addr);
+
+						// had started run
+						if (rank_delay_count
+						    [rankx][byte] > 0) {
+							debug("N%d.LMC%d.R%d: Bytelane %d DLL %s Offset Test %3d: stopping a run here\n",
+							      node, lmc, rankx,
+							      byte, mode_str,
+							      byte_offset);
+							// stop now
+							rank_delay_count
+								[rankx][byte] =
+								0;
+						}
+						// FIXME: else had not started
+						// run - nothing else to do?
+					} else {
+						// no error in the byte lane
+						// first success, set run start
+						if (rank_delay_count[rankx]
+						    [byte] == 0) {
+							debug("N%d.LMC%d.R%d: Bytelane %d DLL %s Offset Test %3d: starting a run here\n",
+							      node, lmc, rankx,
+							      byte, mode_str,
+							      byte_offset);
+							rank_delay_start[rankx]
+								[byte] =
+								byte_offset;
+						}
+						// bump run length
+						rank_delay_count[rankx][byte]
+							+= BYTE_OFFSET_INCR;
+
+						// is this now the biggest
+						// window?
+						if (rank_delay_count[rankx]
+						    [byte] >
+						    rank_delay_best_count[rankx]
+						    [byte]) {
+							rank_delay_best_count
+							    [rankx][byte] =
+							    rank_delay_count
+							    [rankx][byte];
+							rank_delay_best_start
+							    [rankx][byte] =
+							    rank_delay_start
+							    [rankx][byte];
+							debug("N%d.LMC%d.R%d: Bytelane %d DLL %s Offset Test %3d: updating best to %d/%d\n",
+							      node, lmc, rankx,
+							      byte, mode_str,
+							      byte_offset,
+							      rank_delay_best_start
+							      [rankx][byte],
+							      rank_delay_best_count
+							      [rankx][byte]);
+						}
+					}
+				}
+			} /* for (rankx = 0; rankx < 4; rankx++) */
+
+			tot_errors |= off_errors;
+		}
+
+		// set the bytelanes DLL offsets all back to 0
+		change_dll_offset_enable(priv, lmc, 0);
+		load_dll_offset(priv, lmc, dll_offset_mode, 0, bytelane);
+		change_dll_offset_enable(priv, lmc, 1);
+
+		// now choose the best byte_offsets for this pattern
+		// according to the best windows of the tested ranks
+		// calculate offset by constructing an average window
+		// from the rank windows
+		for (byte = byte_lo; byte <= byte_hi; byte++) {
+			pat_beg = -999;
+			pat_end = 999;
+
+			for (rankx = 0; rankx < 4; rankx++) {
+				if (!(rank_mask & (1 << rankx)))
+					continue;
+
+				rank_beg = rank_delay_best_start[rankx][byte];
+				pat_beg = max(pat_beg, rank_beg);
+				rank_end = rank_beg +
+					rank_delay_best_count[rankx][byte] -
+					BYTE_OFFSET_INCR;
+				pat_end = min(pat_end, rank_end);
+
+				debug("N%d.LMC%d.R%d: Bytelane %d DLL %s Offset Test:  Rank Window %3d:%3d\n",
+				      node, lmc, rankx, byte, mode_str,
+				      rank_beg, rank_end);
+
+			}	/* for (rankx = 0; rankx < 4; rankx++) */
+
+			pat_best_offset[byte] = (pat_end + pat_beg) / 2;
+
+			// sum the pattern averages
+			new_best_offset[byte] += pat_best_offset[byte];
+		}
+
+		// now print them on 1 line, descending order...
+		debug("N%d.LMC%d: HW DLL %s Offset Pattern %d :",
+		      node, lmc, mode_str, pattern);
+		for (byte = byte_hi; byte >= byte_lo; --byte)
+			debug(" %4d", pat_best_offset[byte]);
+		debug("\n");
+	}
+	// end of pattern loop
+
+	debug("N%d.LMC%d: HW DLL %s Offset Average  : ", node, lmc, mode_str);
+
+	// print in decending byte index order
+	for (byte = byte_hi; byte >= byte_lo; --byte) {
+		// create the new average NINT
+		new_best_offset[byte] = divide_nint(new_best_offset[byte],
+						    NUM_BYTE_PATTERNS);
+
+		// print the best offsets from all patterns
+
+		// print just the offset of all the bytes
+		if (bytelane == 0x0A)
+			debug("%4d ", new_best_offset[byte]);
+		else		// print the bytelanes also
+			debug("(byte %d) %4d ", byte, new_best_offset[byte]);
+
+		// done with testing, load up the best offsets we found...
+		// disable offsets while we load...
+		change_dll_offset_enable(priv, lmc, 0);
+		load_dll_offset(priv, lmc, dll_offset_mode,
+				new_best_offset[byte], byte);
+		// re-enable the offsets now that we are done loading
+		change_dll_offset_enable(priv, lmc, 1);
+	}
+
+	debug("\n");
+}
+
+/*
+ * Automatically adjust the DLL offset for the selected bytelane using
+ * hardware-assist
+ */
+static int perform_HW_dll_offset_tuning(struct ddr_priv *priv,
+					int dll_offset_mode, int bytelane)
+{
+	int if_64b;
+	int save_ecc_ena[4];
+	union cvmx_lmcx_config lmc_config;
+	int lmc, num_lmcs = cvmx_dram_get_num_lmc(priv);
+	const char *s;
+	int loops = 1, loop;
+	int by;
+	u64 dram_tune_rank_offset;
+	int dram_tune_byte_bursts = DEFAULT_BYTE_BURSTS;
+	int node = 0;
+
+	// see if we want to do the tuning more than once per LMC...
+	s = env_get("ddr_tune_ecc_loops");
+	if (s)
+		loops = strtoul(s, NULL, 0);
+
+	// allow override of the test repeats (bursts)
+	s = env_get("ddr_tune_byte_bursts");
+	if (s)
+		dram_tune_byte_bursts = strtoul(s, NULL, 10);
+
+	// print current working values
+	debug("N%d: H/W Tuning for bytelane %d will use %d loops, %d bursts, and %d patterns.\n",
+	      node, bytelane, loops, dram_tune_byte_bursts, NUM_BYTE_PATTERNS);
+
+	// FIXME? get flag from LMC0 only
+	lmc_config.u64 = lmc_rd(priv, CVMX_LMCX_CONFIG(0));
+	if_64b = !lmc_config.s.mode32b;
+
+	// this should be correct for 1 or 2 ranks, 1 or 2 DIMMs
+	dram_tune_rank_offset =
+	    1ull << (28 + lmc_config.s.pbank_lsb - lmc_config.s.rank_ena +
+		     (num_lmcs / 2));
+
+	// do once for each active LMC
+
+	for (lmc = 0; lmc < num_lmcs; lmc++) {
+		debug("N%d: H/W Tuning: starting LMC%d bytelane %d tune.\n",
+		      node, lmc, bytelane);
+
+		/* Enable ECC for the HW tests */
+		// NOTE: we do enable ECC, but the HW tests used will not
+		// generate "visible" errors
+		lmc_config.u64 = lmc_rd(priv, CVMX_LMCX_CONFIG(lmc));
+		save_ecc_ena[lmc] = lmc_config.s.ecc_ena;
+		lmc_config.s.ecc_ena = 1;
+		lmc_wr(priv, CVMX_LMCX_CONFIG(lmc), lmc_config.u64);
+		lmc_config.u64 = lmc_rd(priv, CVMX_LMCX_CONFIG(lmc));
+
+		// testing is done on a single LMC at a time
+		// FIXME: for now, loop here to show what happens multiple times
+		for (loop = 0; loop < loops; loop++) {
+			/* Perform DLL offset tuning */
+			hw_assist_test_dll_offset(priv, 2 /* 2=read */, lmc,
+						  bytelane,
+						  if_64b, dram_tune_rank_offset,
+						  dram_tune_byte_bursts);
+		}
+
+		// perform cleanup on active LMC
+		debug("N%d: H/W Tuning: finishing LMC%d bytelane %d tune.\n",
+		      node, lmc, bytelane);
+
+		/* Restore ECC for DRAM tests */
+		lmc_config.u64 = lmc_rd(priv, CVMX_LMCX_CONFIG(lmc));
+		lmc_config.s.ecc_ena = save_ecc_ena[lmc];
+		lmc_wr(priv, CVMX_LMCX_CONFIG(lmc), lmc_config.u64);
+		lmc_config.u64 = lmc_rd(priv, CVMX_LMCX_CONFIG(lmc));
+
+		// finally, see if there are any read offset overrides
+		// after tuning
+		for (by = 0; by < 9; by++) {
+			s = lookup_env(priv, "ddr%d_tune_byte%d", lmc, by);
+			if (s) {
+				int dllro = strtoul(s, NULL, 10);
+
+				change_dll_offset_enable(priv, lmc, 0);
+				load_dll_offset(priv, lmc, 2, dllro, by);
+				change_dll_offset_enable(priv, lmc, 1);
+			}
+		}
+
+	}			/* for (lmc = 0; lmc < num_lmcs; lmc++) */
+
+	// finish up...
+
+	return 0;
+
+}				/* perform_HW_dll_offset_tuning */
+
+// this routine simply makes the calls to the tuning routine and returns
+// any errors
+static int cvmx_tune_node(struct ddr_priv *priv)
+{
+	int errs, tot_errs;
+	int do_dllwo = 0;	// default to NO
+	const char *str;
+	int node = 0;
+
+	// Automatically tune the data and ECC byte DLL read offsets
+	debug("N%d: Starting DLL Read Offset Tuning for LMCs\n", node);
+	errs = perform_HW_dll_offset_tuning(priv, 2, 0x0A /* all bytelanes */);
+	debug("N%d: Finished DLL Read Offset Tuning for LMCs, %d errors\n",
+	      node, errs);
+	tot_errs = errs;
+
+	// disabled by default for now, does not seem to be needed?
+	// Automatically tune the data and ECC byte DLL write offsets
+	// allow override of default setting
+	str = env_get("ddr_tune_write_offsets");
+	if (str)
+		do_dllwo = !!strtoul(str, NULL, 0);
+	if (do_dllwo) {
+		debug("N%d: Starting DLL Write Offset Tuning for LMCs\n", node);
+		errs =
+		    perform_HW_dll_offset_tuning(priv, 1,
+						 0x0A /* all bytelanes */);
+		debug("N%d: Finished DLL Write Offset Tuning for LMCs, %d errors\n",
+		      node, errs);
+		tot_errs += errs;
+	}
+
+	return tot_errs;
+}
+
+// this routine makes the calls to the tuning routines when criteria are met
+// intended to be called for automated tuning, to apply filtering...
+
+#define IS_DDR4  1
+#define IS_DDR3  0
+#define IS_RDIMM 1
+#define IS_UDIMM 0
+#define IS_1SLOT 1
+#define IS_2SLOT 0
+
+// FIXME: DDR3 is not tuned
+static const u32 ddr_speed_filter[2][2][2] = {
+	[IS_DDR4] = {
+		     [IS_RDIMM] = {
+				   [IS_1SLOT] = 940,
+				   [IS_2SLOT] = 800},
+		     [IS_UDIMM] = {
+				   [IS_1SLOT] = 1050,
+				   [IS_2SLOT] = 940},
+		      },
+	[IS_DDR3] = {
+		     [IS_RDIMM] = {
+				   [IS_1SLOT] = 0,	// disabled
+				   [IS_2SLOT] = 0	// disabled
+				   },
+		     [IS_UDIMM] = {
+				   [IS_1SLOT] = 0,	// disabled
+				   [IS_2SLOT] = 0	// disabled
+				}
+		}
+};
+
+void cvmx_maybe_tune_node(struct ddr_priv *priv, u32 ddr_speed)
+{
+	const char *s;
+	union cvmx_lmcx_config lmc_config;
+	union cvmx_lmcx_control lmc_control;
+	union cvmx_lmcx_ddr_pll_ctl lmc_ddr_pll_ctl;
+	int is_ddr4;
+	int is_rdimm;
+	int is_1slot;
+	int do_tune = 0;
+	u32 ddr_min_speed;
+	int node = 0;
+
+	// scale it down from Hz to MHz
+	ddr_speed = divide_nint(ddr_speed, 1000000);
+
+	// FIXME: allow an override here so that all configs can be tuned
+	// or none
+	// If the envvar is defined, always either force it or avoid it
+	// accordingly
+	s = env_get("ddr_tune_all_configs");
+	if (s) {
+		do_tune = !!strtoul(s, NULL, 0);
+		printf("N%d: DRAM auto-tuning %s.\n", node,
+		       (do_tune) ? "forced" : "disabled");
+		if (do_tune)
+			cvmx_tune_node(priv);
+
+		return;
+	}
+
+	// filter the tuning calls here...
+	// determine if we should/can run automatically for this configuration
+	//
+	// FIXME: tune only when the configuration indicates it will help:
+	//    DDR type, RDIMM or UDIMM, 1-slot or 2-slot, and speed
+	//
+	lmc_config.u64 = lmc_rd(priv, CVMX_LMCX_CONFIG(0));	// sample LMC0
+	lmc_control.u64 = lmc_rd(priv, CVMX_LMCX_CONTROL(0));	// sample LMC0
+	// sample LMC0
+	lmc_ddr_pll_ctl.u64 = lmc_rd(priv, CVMX_LMCX_DDR_PLL_CTL(0));
+
+	is_ddr4 = (lmc_ddr_pll_ctl.s.ddr4_mode != 0);
+	is_rdimm = (lmc_control.s.rdimm_ena != 0);
+	// HACK, should do better
+	is_1slot = (lmc_config.s.init_status < 4);
+
+	ddr_min_speed = ddr_speed_filter[is_ddr4][is_rdimm][is_1slot];
+	do_tune = ((ddr_min_speed != 0) && (ddr_speed > ddr_min_speed));
+
+	debug("N%d: DDR%d %cDIMM %d-slot at %d MHz %s eligible for auto-tuning.\n",
+	      node, (is_ddr4) ? 4 : 3, (is_rdimm) ? 'R' : 'U',
+	      (is_1slot) ? 1 : 2, ddr_speed, (do_tune) ? "is" : "is not");
+
+	// call the tuning routine, filtering is done...
+	if (do_tune)
+		cvmx_tune_node(priv);
+}
+
+/*
+ * first pattern example:
+ * GENERAL_PURPOSE0.DATA == 64'h00ff00ff00ff00ff;
+ * GENERAL_PURPOSE1.DATA == 64'h00ff00ff00ff00ff;
+ * GENERAL_PURPOSE0.DATA == 16'h0000;
+ */
+
+static const u64 dbi_pattern[3] = {
+	0x00ff00ff00ff00ffULL, 0x00ff00ff00ff00ffULL, 0x0000ULL };
+
+// Perform switchover to DBI
+static void cvmx_dbi_switchover_interface(struct ddr_priv *priv, int lmc)
+{
+	union cvmx_lmcx_modereg_params0 modereg_params0;
+	union cvmx_lmcx_modereg_params3 modereg_params3;
+	union cvmx_lmcx_phy_ctl phy_ctl;
+	union cvmx_lmcx_config lmcx_config;
+	union cvmx_lmcx_ddr_pll_ctl ddr_pll_ctl;
+	int rank_mask, rankx, active_ranks;
+	u64 phys_addr, rank_offset;
+	int num_lmcs, errors;
+	int dbi_settings[9], byte, unlocked, retries;
+	int ecc_ena;
+	int rank_max = 1;	// FIXME: make this 4 to try all the ranks
+	int node = 0;
+
+	ddr_pll_ctl.u64 = lmc_rd(priv, CVMX_LMCX_DDR_PLL_CTL(0));
+
+	lmcx_config.u64 = lmc_rd(priv, CVMX_LMCX_CONFIG(lmc));
+	rank_mask = lmcx_config.s.init_status;
+	ecc_ena = lmcx_config.s.ecc_ena;
+
+	// FIXME: must filter out any non-supported configs
+	//        ie, no DDR3, no x4 devices
+	if (ddr_pll_ctl.s.ddr4_mode == 0 || lmcx_config.s.mode_x4dev == 1) {
+		debug("N%d.LMC%d: DBI switchover: inappropriate device; EXITING...\n",
+		      node, lmc);
+		return;
+	}
+
+	// this should be correct for 1 or 2 ranks, 1 or 2 DIMMs
+	num_lmcs = cvmx_dram_get_num_lmc(priv);
+	rank_offset = 1ull << (28 + lmcx_config.s.pbank_lsb -
+			       lmcx_config.s.rank_ena + (num_lmcs / 2));
+
+	debug("N%d.LMC%d: DBI switchover: rank mask 0x%x, rank size 0x%016llx.\n",
+	      node, lmc, rank_mask, (unsigned long long)rank_offset);
+
+	/*
+	 * 1. conduct the current init sequence as usual all the way
+	 * after software write leveling.
+	 */
+
+	read_dac_dbi_settings(priv, lmc, /*DBI*/ 0, dbi_settings);
+
+	display_dac_dbi_settings(lmc, /*DBI*/ 0, ecc_ena, dbi_settings,
+				 " INIT");
+
+	/*
+	 * 2. set DBI related CSRs as below and issue MR write.
+	 * MODEREG_PARAMS3.WR_DBI=1
+	 * MODEREG_PARAMS3.RD_DBI=1
+	 * PHY_CTL.DBI_MODE_ENA=1
+	 */
+	modereg_params0.u64 = lmc_rd(priv, CVMX_LMCX_MODEREG_PARAMS0(lmc));
+
+	modereg_params3.u64 = lmc_rd(priv, CVMX_LMCX_MODEREG_PARAMS3(lmc));
+	modereg_params3.s.wr_dbi = 1;
+	modereg_params3.s.rd_dbi = 1;
+	lmc_wr(priv, CVMX_LMCX_MODEREG_PARAMS3(lmc), modereg_params3.u64);
+
+	phy_ctl.u64 = lmc_rd(priv, CVMX_LMCX_PHY_CTL(lmc));
+	phy_ctl.s.dbi_mode_ena = 1;
+	lmc_wr(priv, CVMX_LMCX_PHY_CTL(lmc), phy_ctl.u64);
+
+	/*
+	 * there are two options for data to send.  Lets start with (1)
+	 * and could move to (2) in the future:
+	 *
+	 * 1) DBTRAIN_CTL[LFSR_PATTERN_SEL] = 0 (or for older chips where
+	 * this does not exist) set data directly in these reigsters.
+	 * this will yield a clk/2 pattern:
+	 * GENERAL_PURPOSE0.DATA == 64'h00ff00ff00ff00ff;
+	 * GENERAL_PURPOSE1.DATA == 64'h00ff00ff00ff00ff;
+	 * GENERAL_PURPOSE0.DATA == 16'h0000;
+	 * 2) DBTRAIN_CTL[LFSR_PATTERN_SEL] = 1
+	 * here data comes from the LFSR generating a PRBS pattern
+	 * CHAR_CTL.EN = 0
+	 * CHAR_CTL.SEL = 0; // for PRBS
+	 * CHAR_CTL.DR = 1;
+	 * CHAR_CTL.PRBS = setup for whatever type of PRBS to send
+	 * CHAR_CTL.SKEW_ON = 1;
+	 */
+	lmc_wr(priv, CVMX_LMCX_GENERAL_PURPOSE0(lmc), dbi_pattern[0]);
+	lmc_wr(priv, CVMX_LMCX_GENERAL_PURPOSE1(lmc), dbi_pattern[1]);
+	lmc_wr(priv, CVMX_LMCX_GENERAL_PURPOSE2(lmc), dbi_pattern[2]);
+
+	/*
+	 * 3. adjust cas_latency (only necessary if RD_DBI is set).
+	 * here is my code for doing this:
+	 *
+	 * if (csr_model.MODEREG_PARAMS3.RD_DBI.value == 1) begin
+	 * case (csr_model.MODEREG_PARAMS0.CL.value)
+	 * 0,1,2,3,4: csr_model.MODEREG_PARAMS0.CL.value += 2;
+	 * // CL 9-13 -> 11-15
+	 * 5: begin
+	 * // CL=14, CWL=10,12 gets +2, CLW=11,14 gets +3
+	 * if((csr_model.MODEREG_PARAMS0.CWL.value==1 ||
+	 * csr_model.MODEREG_PARAMS0.CWL.value==3))
+	 * csr_model.MODEREG_PARAMS0.CL.value = 7; // 14->16
+	 * else
+	 * csr_model.MODEREG_PARAMS0.CL.value = 13; // 14->17
+	 * end
+	 * 6: csr_model.MODEREG_PARAMS0.CL.value = 8; // 15->18
+	 * 7: csr_model.MODEREG_PARAMS0.CL.value = 14; // 16->19
+	 * 8: csr_model.MODEREG_PARAMS0.CL.value = 15; // 18->21
+	 * default:
+	 * `cn_fatal(("Error mem_cfg (%s) CL (%d) with RD_DBI=1,
+	 * I am not sure what to do.",
+	 * mem_cfg, csr_model.MODEREG_PARAMS3.RD_DBI.value))
+	 * endcase
+	 * end
+	 */
+
+	if (modereg_params3.s.rd_dbi == 1) {
+		int old_cl, new_cl, old_cwl;
+
+		old_cl = modereg_params0.s.cl;
+		old_cwl = modereg_params0.s.cwl;
+
+		switch (old_cl) {
+		case 0:
+		case 1:
+		case 2:
+		case 3:
+		case 4:
+			new_cl = old_cl + 2;
+			break;	// 9-13->11-15
+			// CL=14, CWL=10,12 gets +2, CLW=11,14 gets +3
+		case 5:
+			new_cl = ((old_cwl == 1) || (old_cwl == 3)) ? 7 : 13;
+			break;
+		case 6:
+			new_cl = 8;
+			break;	// 15->18
+		case 7:
+			new_cl = 14;
+			break;	// 16->19
+		case 8:
+			new_cl = 15;
+			break;	// 18->21
+		default:
+			printf("ERROR: Bad CL value (%d) for DBI switchover.\n",
+			       old_cl);
+			// FIXME: need to error exit here...
+			old_cl = -1;
+			new_cl = -1;
+			break;
+		}
+		debug("N%d.LMC%d: DBI switchover: CL ADJ: old_cl 0x%x, old_cwl 0x%x, new_cl 0x%x.\n",
+		      node, lmc, old_cl, old_cwl, new_cl);
+		modereg_params0.s.cl = new_cl;
+		lmc_wr(priv, CVMX_LMCX_MODEREG_PARAMS0(lmc),
+		       modereg_params0.u64);
+	}
+
+	/*
+	 * 4. issue MRW to MR0 (CL) and MR5 (DBI), using LMC sequence
+	 * SEQ_CTL[SEQ_SEL] = MRW.
+	 */
+	// Use the default values, from the CSRs fields
+	// also, do B-sides for RDIMMs...
+
+	for (rankx = 0; rankx < 4; rankx++) {
+		if (!(rank_mask & (1 << rankx)))
+			continue;
+
+		// for RDIMMs, B-side writes should get done automatically
+		// when the A-side is written
+		ddr4_mrw(priv, lmc, rankx, -1 /* use_default */,
+			 0 /*MRreg */, 0 /*A-side */);	/* MR0 */
+		ddr4_mrw(priv, lmc, rankx, -1 /* use_default */,
+			 5 /*MRreg */, 0 /*A-side */);	/* MR5 */
+	}
+
+	/*
+	 * 5. conduct DBI bit deskew training via the General Purpose
+	 * R/W sequence (dbtrain). may need to run this over and over to get
+	 * a lock (I need up to 5 in simulation):
+	 * SEQ_CTL[SEQ_SEL] = RW_TRAINING (15)
+	 * DBTRAIN_CTL.CMD_COUNT_EXT = all 1's
+	 * DBTRAIN_CTL.READ_CMD_COUNT = all 1's
+	 * DBTRAIN_CTL.TCCD_SEL = set according to MODEREG_PARAMS3[TCCD_L]
+	 * DBTRAIN_CTL.RW_TRAIN = 1
+	 * DBTRAIN_CTL.READ_DQ_COUNT = dont care
+	 * DBTRAIN_CTL.WRITE_ENA = 1;
+	 * DBTRAIN_CTL.ACTIVATE = 1;
+	 * DBTRAIN_CTL LRANK, PRANK, ROW_A, BG, BA, COLUMN_A = set to a
+	 * valid address
+	 */
+
+	// NOW - do the training
+	debug("N%d.LMC%d: DBI switchover: TRAINING begins...\n", node, lmc);
+
+	active_ranks = 0;
+	for (rankx = 0; rankx < rank_max; rankx++) {
+		if (!(rank_mask & (1 << rankx)))
+			continue;
+
+		phys_addr = rank_offset * active_ranks;
+		// FIXME: now done by test_dram_byte_hw()
+
+		active_ranks++;
+
+		retries = 0;
+
+restart_training:
+
+		// NOTE: return is a bitmask of the erroring bytelanes -
+		// we only print it
+		errors =
+		    test_dram_byte_hw(priv, lmc, phys_addr, DBTRAIN_DBI, NULL);
+
+		debug("N%d.LMC%d: DBI switchover: TEST: rank %d, phys_addr 0x%llx, errors 0x%x.\n",
+		      node, lmc, rankx, (unsigned long long)phys_addr, errors);
+
+		// NEXT - check for locking
+		unlocked = 0;
+		read_dac_dbi_settings(priv, lmc, /*DBI*/ 0, dbi_settings);
+
+		for (byte = 0; byte < (8 + ecc_ena); byte++)
+			unlocked += (dbi_settings[byte] & 1) ^ 1;
+
+		// FIXME: print out the DBI settings array after each rank?
+		if (rank_max > 1)	// only when doing more than 1 rank
+			display_dac_dbi_settings(lmc, /*DBI*/ 0, ecc_ena,
+						 dbi_settings, " RANK");
+
+		if (unlocked > 0) {
+			debug("N%d.LMC%d: DBI switchover: LOCK: %d still unlocked.\n",
+			      node, lmc, unlocked);
+			retries++;
+			if (retries < 10) {
+				goto restart_training;
+			} else {
+				debug("N%d.LMC%d: DBI switchover: LOCK: %d retries exhausted.\n",
+				      node, lmc, retries);
+			}
+		}
+	}			/* for (rankx = 0; rankx < 4; rankx++) */
+
+	// print out the final DBI settings array
+	display_dac_dbi_settings(lmc, /*DBI*/ 0, ecc_ena, dbi_settings,
+				 "FINAL");
+}
+
+void cvmx_dbi_switchover(struct ddr_priv *priv)
+{
+	int lmc;
+	int num_lmcs = cvmx_dram_get_num_lmc(priv);
+
+	for (lmc = 0; lmc < num_lmcs; lmc++)
+		cvmx_dbi_switchover_interface(priv, lmc);
+}

From 15afe725f390774af588c21d127b94915b4f1e17 Mon Sep 17 00:00:00 2001
From: Aaron Williams <awilliams@marvell.com>
Date: Wed, 2 Sep 2020 08:29:08 +0200
Subject: [PATCH 07/27] ram: octeon: Add MIPS Octeon3 DDR4 support (part 3/3)

This Octeon 3 DDR driver is ported from the 2013 Cavium / Marvell U-Boot
repository. It currently supports DDR4 on Octeon 3. It can be later
extended to support also DDR3 and Octeon 2 platforms.

Part 3 includes the DIMM SPD handling code and the Kconfig / Makefile
integration.

Signed-off-by: Aaron Williams <awilliams@marvell.com>
Signed-off-by: Stefan Roese <sr@denx.de>
---
 drivers/ram/Kconfig                  |   1 +
 drivers/ram/Makefile                 |   2 +
 drivers/ram/octeon/Kconfig           |  17 ++
 drivers/ram/octeon/Makefile          |   8 +
 drivers/ram/octeon/dimm_spd_eeprom.c | 407 +++++++++++++++++++++++++++
 5 files changed, 435 insertions(+)
 create mode 100644 drivers/ram/octeon/Kconfig
 create mode 100644 drivers/ram/octeon/Makefile
 create mode 100644 drivers/ram/octeon/dimm_spd_eeprom.c

diff --git a/drivers/ram/Kconfig b/drivers/ram/Kconfig
index 7e6e981897..a0e859afd6 100644
--- a/drivers/ram/Kconfig
+++ b/drivers/ram/Kconfig
@@ -76,3 +76,4 @@ config IMXRT_SDRAM
 source "drivers/ram/rockchip/Kconfig"
 source "drivers/ram/sifive/Kconfig"
 source "drivers/ram/stm32mp1/Kconfig"
+source "drivers/ram/octeon/Kconfig"
diff --git a/drivers/ram/Makefile b/drivers/ram/Makefile
index 769c9d6218..d685a579a0 100644
--- a/drivers/ram/Makefile
+++ b/drivers/ram/Makefile
@@ -19,3 +19,5 @@ obj-$(CONFIG_K3_J721E_DDRSS) += k3-j721e/
 obj-$(CONFIG_IMXRT_SDRAM) += imxrt_sdram.o
 
 obj-$(CONFIG_RAM_SIFIVE) += sifive/
+
+obj-$(CONFIG_ARCH_OCTEON) += octeon/
diff --git a/drivers/ram/octeon/Kconfig b/drivers/ram/octeon/Kconfig
new file mode 100644
index 0000000000..eb5a1208ed
--- /dev/null
+++ b/drivers/ram/octeon/Kconfig
@@ -0,0 +1,17 @@
+config RAM_OCTEON
+	bool "Ram drivers for Octeon SoCs"
+	depends on RAM && ARCH_OCTEON
+	default n
+	help
+	 This enables support for RAM drivers for Octeon SoCs.
+
+if RAM_OCTEON
+
+config RAM_OCTEON_DDR4
+	bool "Octeon III DDR4 RAM support"
+	default n
+	help
+	 This enables support for DDR4 RAM suppoort for Octeon III.  This does
+	 not include support for Octeon CN70XX.
+
+endif # RAM_OCTEON
diff --git a/drivers/ram/octeon/Makefile b/drivers/ram/octeon/Makefile
new file mode 100644
index 0000000000..27649d1e6f
--- /dev/null
+++ b/drivers/ram/octeon/Makefile
@@ -0,0 +1,8 @@
+# SPDX-License-Identifier: GPL-2.0
+#
+# Copyright (c) 2020 Marvell, Inc.
+#
+
+obj-$(CONFIG_RAM_OCTEON_DDR4) += octeon_ddr.o
+obj-$(CONFIG_RAM_OCTEON_DDR4) += octeon3_lmc.o
+obj-y += dimm_spd_eeprom.o
diff --git a/drivers/ram/octeon/dimm_spd_eeprom.c b/drivers/ram/octeon/dimm_spd_eeprom.c
new file mode 100644
index 0000000000..30db54804c
--- /dev/null
+++ b/drivers/ram/octeon/dimm_spd_eeprom.c
@@ -0,0 +1,407 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Copyright (C) 2020 Marvell International Ltd.
+ */
+
+#include <i2c.h>
+#include <ram.h>
+
+#include <mach/octeon_ddr.h>
+
+#define DEVICE_TYPE	DDR4_SPD_KEY_BYTE_DEVICE_TYPE // same for DDR3 and DDR4
+#define MODULE_TYPE	DDR4_SPD_KEY_BYTE_MODULE_TYPE // same for DDR3 and DDR4
+#define BUS_WIDTH(t)	(((t) == DDR4_DRAM) ?		    \
+			 DDR4_SPD_MODULE_MEMORY_BUS_WIDTH : \
+			 DDR3_SPD_MEMORY_BUS_WIDTH)
+
+/*
+ * Allow legacy code to encode bus number in the upper bits of the address
+ * These are only supported in read_spd()
+ */
+#define OCTEON_TWSI_BUS_IN_ADDR_BIT       12
+#define OCTEON_TWSI_BUS_IN_ADDR_MASK      (15 << OCTEON_TWSI_BUS_IN_ADDR_BIT)
+#define OCTEON_TWSI_GET_BUS(addr)			\
+	(((addr) >> OCTEON_TWSI_BUS_IN_ADDR_BIT) & 0xf)
+
+const char *ddr3_dimm_types[] = {
+	/* 0000 */ "Undefined",
+	/* 0001 */ "RDIMM",
+	/* 0010 */ "UDIMM",
+	/* 0011 */ "SO-DIMM",
+	/* 0100 */ "Micro-DIMM",
+	/* 0101 */ "Mini-RDIMM",
+	/* 0110 */ "Mini-UDIMM",
+	/* 0111 */ "Mini-CDIMM",
+	/* 1000 */ "72b-SO-UDIMM",
+	/* 1001 */ "72b-SO-RDIMM",
+	/* 1010 */ "72b-SO-CDIMM"
+	/* 1011 */ "LRDIMM",
+	/* 1100 */ "16b-SO-DIMM",
+	/* 1101 */ "32b-SO-DIMM",
+	/* 1110 */ "Reserved",
+	/* 1111 */ "Reserved"
+};
+
+const char *ddr4_dimm_types[] = {
+	/* 0000 */ "Extended",
+	/* 0001 */ "RDIMM",
+	/* 0010 */ "UDIMM",
+	/* 0011 */ "SO-DIMM",
+	/* 0100 */ "LRDIMM",
+	/* 0101 */ "Mini-RDIMM",
+	/* 0110 */ "Mini-UDIMM",
+	/* 0111 */ "Reserved",
+	/* 1000 */ "72b-SO-RDIMM",
+	/* 1001 */ "72b-SO-UDIMM",
+	/* 1010 */ "Reserved",
+	/* 1011 */ "Reserved",
+	/* 1100 */ "16b-SO-DIMM",
+	/* 1101 */ "32b-SO-DIMM",
+	/* 1110 */ "Reserved",
+	/* 1111 */ "Reserved"
+};
+
+static u16 ddr3_crc16(u8 *ptr, int count)
+{
+	/* From DDR3 SPD specification */
+	int crc, i;
+
+	crc = 0;
+	while (--count >= 0) {
+		crc = crc ^ (int)*ptr++ << 8;
+		for (i = 0; i < 8; ++i) {
+			if (crc & 0x8000)
+				crc = crc << 1 ^ 0x1021;
+			else
+				crc = crc << 1;
+		}
+	}
+
+	return (crc & 0xFFFF);
+}
+
+static int validate_spd_checksum_ddr4(struct dimm_config *dimm_config,
+				      int dimm_index, int twsi_addr, int silent)
+{
+	u8 *spd_data = dimm_config->spd_data[dimm_index];
+	int crc_bytes = 126;
+	u16 crc_comp;
+
+	/* Check byte 0 to see how many bytes checksum is over */
+	if (spd_data[0] & 0x80)
+		crc_bytes = 117;
+
+	crc_comp = ddr3_crc16(spd_data, crc_bytes);
+
+	if (spd_data[126] == (crc_comp & 0xff) &&
+	    spd_data[127] == (crc_comp >> 8))
+		return 1;
+
+	if (!silent) {
+		printf("DDR4 SPD CRC error, spd addr: 0x%x, calculated crc: 0x%04x, read crc: 0x%02x%02x\n",
+		       twsi_addr, crc_comp, spd_data[127], spd_data[126]);
+	}
+
+	return 0;
+}
+
+static int validate_spd_checksum(struct ddr_priv *priv,
+				 struct dimm_config *dimm_config,
+				 int dimm_index, int twsi_addr,
+				 int silent, u8 rv)
+{
+	if (ddr_verbose(priv))
+		debug("Validating DIMM at address 0x%x\n", twsi_addr);
+
+	if (rv >= 0x8 && rv <= 0xA)
+		printf("%s: Error: DDR2 support disabled\n", __func__);
+
+	if (rv == 0xB)
+		printf("%s: Error: DDR3 support disabled\n", __func__);
+
+	if (rv == 0xC) {
+		return validate_spd_checksum_ddr4(dimm_config, dimm_index,
+						  twsi_addr, silent);
+	}
+
+	if (!silent) {
+		printf("Unrecognized DIMM type: 0x%x at spd address: 0x%x\n",
+		       rv, twsi_addr);
+	}
+
+	return 0;
+}
+
+/*
+ * Read an DIMM SPD value, either using TWSI to read it from the DIMM, or
+ * from a provided array.
+ */
+int read_spd(struct dimm_config *dimm_config, int dimm_index, int spd_field)
+{
+	dimm_index = !!dimm_index;
+
+	if (spd_field >= SPD_EEPROM_SIZE) {
+		printf("ERROR: Trying to read unsupported SPD EEPROM value %d\n",
+		       spd_field);
+	}
+
+	/*
+	 * If pointer to data is provided, use it, otherwise read from SPD
+	 * over twsi
+	 */
+	if (dimm_config->spd_ptrs[dimm_index])
+		return dimm_config->spd_ptrs[dimm_index][spd_field];
+	else if (dimm_config->spd_addrs[dimm_index])
+		return dimm_config->spd_data[dimm_index][spd_field];
+
+	return -1;
+}
+
+int read_spd_init(struct dimm_config *dimm_config, int dimm_index)
+{
+	u8 busno = OCTEON_TWSI_GET_BUS(dimm_config->spd_addrs[dimm_index]);
+	u8 cmdno = dimm_config->spd_addrs[dimm_index];
+	struct udevice *dev_i2c;
+	u8 *spd_data;
+	int ret;
+
+	if (dimm_config->spd_cached[dimm_index])
+		return 0;
+
+	dimm_config->spd_cached[dimm_index] = 1;
+	spd_data = dimm_config->spd_data[dimm_index];
+
+	ret = i2c_get_chip_for_busnum(busno, cmdno, 2, &dev_i2c);
+	if (ret) {
+		debug("Cannot find SPL EEPROM: %d\n", ret);
+		return -ENODEV;
+	}
+
+	ret = dm_i2c_read(dev_i2c, 0, spd_data, SPD_EEPROM_SIZE);
+
+	return ret;
+}
+
+int validate_dimm(struct ddr_priv *priv, struct dimm_config *dimm_config,
+		  int dimm_index)
+{
+	int spd_addr;
+
+	dimm_index = !!dimm_index;  /* Normalize to 0/1 */
+	spd_addr = dimm_config->spd_addrs[dimm_index];
+
+	debug("Validating dimm %d, spd addr: 0x%02x spd ptr: %p\n",
+	      dimm_index,
+	      dimm_config->spd_addrs[dimm_index],
+	      dimm_config->spd_ptrs[dimm_index]);
+
+	/* Only validate 'real' dimms, assume compiled in values are OK */
+	if (!dimm_config->spd_ptrs[dimm_index]) {
+		int val0, val1;
+		int dimm_type;
+		int ret;
+
+		ret = read_spd_init(dimm_config, dimm_index);
+		if (ret)
+			return 0;
+
+		dimm_type = read_spd(dimm_config, dimm_index,
+				     DDR2_SPD_MEM_TYPE) & 0xff;
+		switch (dimm_type) {
+		case 0x0B:              /* DDR3 */
+			if (ddr_verbose(priv))
+				printf("Validating DDR3 DIMM %d\n", dimm_index);
+			val0 = read_spd(dimm_config, dimm_index,
+					DDR3_SPD_DENSITY_BANKS);
+			val1 = read_spd(dimm_config, dimm_index,
+					DDR3_SPD_ADDRESSING_ROW_COL_BITS);
+			if (val0 < 0 && val1 < 0) {
+				if (ddr_verbose(priv))
+					printf("Error reading SPD for DIMM %d\n",
+					       dimm_index);
+				return 0; /* Failed to read dimm */
+			}
+			if (val0 == 0xff && val1 == 0xff) {
+				if (ddr_verbose(priv))
+					printf("Blank or unreadable SPD for DIMM %d\n",
+					       dimm_index);
+				/* Blank SPD or otherwise unreadable device */
+				return 0;
+			}
+
+			/* Don't treat bad checksums as fatal */
+			validate_spd_checksum(priv, dimm_config, dimm_index,
+					      spd_addr, 0, dimm_type);
+			break;
+
+		case 0x0C:              /* DDR4 */
+			if (ddr_verbose(priv))
+				printf("Validating DDR4 DIMM %d\n", dimm_index);
+			val0 = read_spd(dimm_config, dimm_index,
+					DDR4_SPD_DENSITY_BANKS);
+			val1 = read_spd(dimm_config, dimm_index,
+					DDR4_SPD_ADDRESSING_ROW_COL_BITS);
+			if (val0 < 0 && val1 < 0) {
+				if (ddr_verbose(priv))
+					printf("Error reading SPD for DIMM %d\n",
+					       dimm_index);
+				return 0; /* Failed to read dimm */
+			}
+			if (val0 == 0xff && val1 == 0xff) {
+				if (ddr_verbose(priv)) {
+					printf("Blank or unreadable SPD for DIMM %d\n",
+					       dimm_index);
+				}
+				/* Blank SPD or otherwise unreadable device */
+				return 0;
+			}
+
+			/* Don't treat bad checksums as fatal */
+			validate_spd_checksum(priv, dimm_config, dimm_index,
+					      spd_addr, 0, dimm_type);
+			break;
+
+		case 0x00:
+			/* Terminator detected. Fail silently. */
+			return 0;
+
+		default:
+			debug("Unknown DIMM type 0x%x for DIMM %d @ 0x%x\n",
+			      dimm_type, dimm_index,
+			      dimm_config->spd_addrs[dimm_index]);
+			return 0;      /* Failed to read dimm */
+		}
+	}
+
+	return 1;
+}
+
+int get_ddr_type(struct dimm_config *dimm_config, int upper_dimm)
+{
+	int spd_ddr_type;
+
+	spd_ddr_type = read_spd(dimm_config, upper_dimm, DEVICE_TYPE);
+
+	debug("%s:%d spd_ddr_type=0x%02x\n", __func__, __LINE__,
+	      spd_ddr_type);
+
+	/* we return only DDR4 or DDR3 */
+	return (spd_ddr_type == 0x0C) ? DDR4_DRAM : DDR3_DRAM;
+}
+
+static int get_dimm_ecc(struct dimm_config *dimm_config, int upper_dimm,
+			int ddr_type)
+{
+	return !!(read_spd(dimm_config, upper_dimm, BUS_WIDTH(ddr_type)) & 8);
+}
+
+int get_dimm_module_type(struct dimm_config *dimm_config, int upper_dimm,
+			 int ddr_type)
+{
+	return read_spd(dimm_config, upper_dimm, MODULE_TYPE) & 0x0f;
+}
+
+char *printable_rank_spec(char *buffer, int num_ranks, int dram_width,
+			  int spd_package)
+{
+	int die_count = ((spd_package >> 4) & 7) + 1;
+
+	if (spd_package & 0x80) { // non-monolithic
+		if ((spd_package & 3) == 2) { // 3DS
+			sprintf(buffer, "%dS%dRx%d", num_ranks, die_count,
+				dram_width);
+		} else { // MLS
+			char hchar = (die_count == 2) ? 'D' : 'Q';
+
+			sprintf(buffer, "%d%cRx%d", num_ranks, hchar,
+				dram_width);
+		}
+	} else {
+		sprintf(buffer, "%dRx%d", num_ranks, dram_width);
+	}
+
+	return buffer;
+}
+
+static void report_common_dimm(struct dimm_config *dimm_config, int upper_dimm,
+			       int dimm, const char **dimm_types, int ddr_type,
+			       char *volt_str, int if_num,
+			       int num_ranks, int dram_width, int spd_package)
+{
+	unsigned int spd_module_type;
+	char rank_spec[8];
+	int spd_ecc;
+
+	spd_module_type = get_dimm_module_type(dimm_config, upper_dimm,
+					       ddr_type);
+	spd_ecc = get_dimm_ecc(dimm_config, upper_dimm, ddr_type);
+
+	printable_rank_spec(rank_spec, num_ranks, dram_width, spd_package);
+	printf("LMC%d.DIMM%d: DDR%d %s %s %s, %s\n",
+	       if_num, dimm, ddr_type, dimm_types[spd_module_type],
+	       rank_spec, spd_ecc ? "ECC" : "non-ECC", volt_str);
+}
+
+static void report_ddr3_dimm(struct dimm_config *dimm_config, int upper_dimm,
+			     int dimm, int if_num)
+{
+	int spd_voltage;
+	char *volt_str;
+	int spd_org = read_spd(dimm_config, upper_dimm,
+			       DDR3_SPD_MODULE_ORGANIZATION);
+	int num_ranks = 1 +  ((spd_org >> 3) & 0x7);
+	int dram_width = 4 << ((spd_org >> 0) & 0x7);
+
+	spd_voltage = read_spd(dimm_config, upper_dimm,
+			       DDR3_SPD_NOMINAL_VOLTAGE);
+	if (spd_voltage == 0 || spd_voltage & 3)
+		volt_str = "1.5V";
+	if (spd_voltage & 2)
+		volt_str = "1.35V";
+	if (spd_voltage & 4)
+		volt_str = "1.2xV";
+
+	report_common_dimm(dimm_config, upper_dimm, dimm, ddr3_dimm_types,
+			   DDR3_DRAM, volt_str, if_num,
+			   num_ranks, dram_width, /*spd_package*/0);
+}
+
+static void report_ddr4_dimm(struct dimm_config *dimm_config, int upper_dimm,
+			     int dimm, int if_num)
+{
+	int spd_voltage;
+	char *volt_str;
+	int spd_package = 0xff & read_spd(dimm_config, upper_dimm,
+					  DDR4_SPD_PACKAGE_TYPE);
+	int spd_org     = 0xff & read_spd(dimm_config, upper_dimm,
+					  DDR4_SPD_MODULE_ORGANIZATION);
+	int num_ranks   = 1 +  ((spd_org >> 3) & 0x7);
+	int dram_width  = 4 << ((spd_org >> 0) & 0x7);
+
+	spd_voltage = read_spd(dimm_config, upper_dimm,
+			       DDR4_SPD_MODULE_NOMINAL_VOLTAGE);
+	if (spd_voltage == 0x01 || spd_voltage & 0x02)
+		volt_str = "1.2V";
+	if (spd_voltage == 0x04 || spd_voltage & 0x08)
+		volt_str = "TBD1 V";
+	if (spd_voltage == 0x10 || spd_voltage & 0x20)
+		volt_str = "TBD2 V";
+
+	report_common_dimm(dimm_config, upper_dimm, dimm, ddr4_dimm_types,
+			   DDR4_DRAM, volt_str, if_num,
+			   num_ranks, dram_width, spd_package);
+}
+
+void report_dimm(struct dimm_config *dimm_config, int upper_dimm,
+		 int dimm, int if_num)
+{
+	int ddr_type;
+
+	/* ddr_type only indicates DDR4 or DDR3 */
+	ddr_type = get_ddr_type(dimm_config, upper_dimm);
+
+	if (ddr_type == DDR4_DRAM)
+		report_ddr4_dimm(dimm_config, 0, dimm, if_num);
+	else
+		report_ddr3_dimm(dimm_config, 0, dimm, if_num);
+}

From 590d48e9d1cc3a32626643f4acdeb4a762d92616 Mon Sep 17 00:00:00 2001
From: Stefan Roese <sr@denx.de>
Date: Wed, 2 Sep 2020 08:29:09 +0200
Subject: [PATCH 08/27] mips: octeon: dram.c: Add RAM driver support

This patch adds the initialization call for the Octeon RAM driver to
the Octeon platforms code. So if enabled via Kconfig, the DDR driver
will be called and the RAM will be configured and used. If the RAM
driver is not enabled, the L2 cache is still used as RAM.

Signed-off-by: Stefan Roese <sr@denx.de>
---
 arch/mips/mach-octeon/dram.c | 72 ++++++++++++++++++++++++++++++++----
 1 file changed, 64 insertions(+), 8 deletions(-)

diff --git a/arch/mips/mach-octeon/dram.c b/arch/mips/mach-octeon/dram.c
index ff7a59f2ab..6dc08e19da 100644
--- a/arch/mips/mach-octeon/dram.c
+++ b/arch/mips/mach-octeon/dram.c
@@ -1,28 +1,84 @@
 // SPDX-License-Identifier: GPL-2.0+
 /*
- * Copyright (C) Stefan Roese <sr@denx.de>
+ * Copyright (C) 2020 Stefan Roese <sr@denx.de>
  */
 
+#include <config.h>
 #include <dm.h>
 #include <ram.h>
 #include <asm/global_data.h>
 #include <linux/compat.h>
+#include <display_options.h>
 
 DECLARE_GLOBAL_DATA_PTR;
 
+#define UBOOT_RAM_SIZE_MAX	0x10000000ULL
+
 int dram_init(void)
 {
-	/*
-	 * No DDR init yet -> run in L2 cache
-	 */
-	gd->ram_size = (4 << 20);
-	gd->bd->bi_dram[0].size = gd->ram_size;
-	gd->bd->bi_dram[1].size = 0;
+	if (IS_ENABLED(CONFIG_RAM_OCTEON)) {
+		struct ram_info ram;
+		struct udevice *dev;
+		int ret;
+
+		ret = uclass_get_device(UCLASS_RAM, 0, &dev);
+		if (ret) {
+			debug("DRAM init failed: %d\n", ret);
+			return ret;
+		}
+
+		ret = ram_get_info(dev, &ram);
+		if (ret) {
+			debug("Cannot get DRAM size: %d\n", ret);
+			return ret;
+		}
+
+		gd->ram_size = min_t(size_t, ram.size, UBOOT_RAM_SIZE_MAX);
+		debug("SDRAM base=%lx, size=%lx\n",
+		      (unsigned long)ram.base, (unsigned long)ram.size);
+	} else {
+		/*
+		 * No DDR init yet -> run in L2 cache
+		 */
+		gd->ram_size = (4 << 20);
+		gd->bd->bi_dram[0].size = gd->ram_size;
+		gd->bd->bi_dram[1].size = 0;
+	}
 
 	return 0;
 }
 
+void board_add_ram_info(int use_default)
+{
+	if (IS_ENABLED(CONFIG_RAM_OCTEON)) {
+		struct ram_info ram;
+		struct udevice *dev;
+		int ret;
+
+		ret = uclass_get_device(UCLASS_RAM, 0, &dev);
+		if (ret) {
+			debug("DRAM init failed: %d\n", ret);
+			return;
+		}
+
+		ret = ram_get_info(dev, &ram);
+		if (ret) {
+			debug("Cannot get DRAM size: %d\n", ret);
+			return;
+		}
+
+		printf(" (");
+		print_size(ram.size, " total)");
+	}
+}
+
 ulong board_get_usable_ram_top(ulong total_size)
 {
-	return gd->ram_top;
+	if (IS_ENABLED(CONFIG_RAM_OCTEON)) {
+		/* Map a maximum of 256MiB - return not size but address */
+		return CONFIG_SYS_SDRAM_BASE + min(gd->ram_size,
+						   UBOOT_RAM_SIZE_MAX);
+	} else {
+		return gd->ram_top;
+	}
 }

From 63051d62b894b47e6aa3f59756be46513a450742 Mon Sep 17 00:00:00 2001
From: Stefan Roese <sr@denx.de>
Date: Wed, 2 Sep 2020 08:29:10 +0200
Subject: [PATCH 09/27] mips: octeon: octeon_ebb7304: Add DDR4 support

This patch adds the board specific configuration (struct) for the
Octeon 3 EBB7304 EVK. This struct is ported from the 2013er Cavium /
Marvell U-Boot repository. Also, the Octeon RAM driver is enabled in
the board defconfig for its usage.

Tested with one and two DIMMs on the EBB7304 EVK (8 & 16 GiB).

Signed-off-by: Stefan Roese <sr@denx.de>
---
 board/Marvell/octeon_ebb7304/board.c     |  25 +-
 board/Marvell/octeon_ebb7304/board_ddr.h | 447 +++++++++++++++++++++++
 configs/octeon_ebb7304_defconfig         |   3 +
 include/configs/octeon_common.h          |  11 +-
 4 files changed, 479 insertions(+), 7 deletions(-)
 create mode 100644 board/Marvell/octeon_ebb7304/board_ddr.h

diff --git a/board/Marvell/octeon_ebb7304/board.c b/board/Marvell/octeon_ebb7304/board.c
index 56e50a9063..611b18fa6a 100644
--- a/board/Marvell/octeon_ebb7304/board.c
+++ b/board/Marvell/octeon_ebb7304/board.c
@@ -3,7 +3,24 @@
  * Copyright (C) 2020 Stefan Roese <sr@denx.de>
  */
 
-/*
- * Nothing included right now. Code will be added in follow-up
- * patches.
- */
+#include <common.h>
+#include <dm.h>
+#include <ram.h>
+
+#include <mach/octeon_ddr.h>
+
+#include "board_ddr.h"
+
+#define EBB7304_DEF_DRAM_FREQ	800
+
+static struct ddr_conf board_ddr_conf[] = {
+	 OCTEON_EBB7304_DDR_CONFIGURATION
+};
+
+struct ddr_conf *octeon_ddr_conf_table_get(int *count, int *def_ddr_freq)
+{
+	*count = ARRAY_SIZE(board_ddr_conf);
+	*def_ddr_freq = EBB7304_DEF_DRAM_FREQ;
+
+	return board_ddr_conf;
+}
diff --git a/board/Marvell/octeon_ebb7304/board_ddr.h b/board/Marvell/octeon_ebb7304/board_ddr.h
new file mode 100644
index 0000000000..f2f3419e5b
--- /dev/null
+++ b/board/Marvell/octeon_ebb7304/board_ddr.h
@@ -0,0 +1,447 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * Copyright (C) 2020 Marvell International Ltd.
+ *
+ * https://spdx.org/licenses
+ */
+
+#ifndef __BOARD_DDR_H__
+#define __BOARD_DDR_H__
+
+#define OCTEON_EBB7304_DRAM_SOCKET_CONFIGURATION0			\
+	{ {0x1050, 0x0}, {NULL, NULL} }, { {0x1051, 0x0}, {NULL, NULL} }
+#define OCTEON_EBB7304_DRAM_SOCKET_CONFIGURATION1			\
+	{ {0x1052, 0x0}, {NULL, NULL} }, { {0x1053, 0x0}, {NULL, NULL} }
+
+#define OCTEON_EBB7304_BOARD_EEPROM_TWSI_ADDR	0x56
+
+/*
+ * Local copy of these parameters to allow for customization for this
+ * board design.  The generic version resides in lib_octeon_shared.h.
+ */
+
+/* LMC0_MODEREG_PARAMS1 */
+#define OCTEON_EBB7304_MODEREG_PARAMS1_1RANK_1SLOT		\
+	{							\
+		.cn78xx = {					\
+			.pasr_00	= 0,			\
+			.asr_00		= 0,			\
+			.srt_00		= 0,			\
+			.rtt_wr_00	= ddr4_rttwr_80ohm & 3,	\
+			.rtt_wr_00_ext	= (ddr4_rttwr_80ohm >> 2) & 1,	\
+			.dic_00		= ddr4_dic_34ohm,	\
+			.rtt_nom_00	= 0,                    \
+			.pasr_01	= 0,			\
+			.asr_01		= 0,			\
+			.srt_01		= 0,			\
+			.rtt_wr_01	= 0,			\
+			.dic_01		= ddr4_dic_34ohm,	\
+			.rtt_nom_01	= 0,			\
+			.pasr_10	= 0,			\
+			.asr_10		= 0,			\
+			.srt_10		= 0,			\
+			.rtt_wr_10	= 0,			\
+			.dic_10		= ddr4_dic_34ohm,	\
+			.rtt_nom_10	= 0,			\
+			.pasr_11	= 0,			\
+			.asr_11		= 0,			\
+			.srt_11		= 0,			\
+			.rtt_wr_11	= 0,			\
+			.dic_11		= ddr4_dic_34ohm,	\
+			.rtt_nom_11	= 0,			\
+		}						\
+	}
+
+#define OCTEON_EBB7304_MODEREG_PARAMS1_1RANK_2SLOT	\
+	{							\
+		.cn78xx = {					\
+			.pasr_00	= 0,			\
+			.asr_00		= 0,			\
+			.srt_00		= 0,			\
+			.rtt_wr_00	= ddr4_rttwr_80ohm & 3,	\
+			.rtt_wr_00_ext	= (ddr4_rttwr_80ohm >> 2) & 1,	\
+			.dic_00		= ddr4_dic_34ohm,	\
+			.rtt_nom_00	= 0,			\
+			.pasr_01	= 0,			\
+			.asr_01		= 0,			\
+			.srt_01		= 0,			\
+			.rtt_wr_01	= 0,			\
+			.dic_01		= ddr4_dic_34ohm,	\
+			.rtt_nom_01	= 0,			\
+			.pasr_10	= 0,			\
+			.asr_10		= 0,			\
+			.srt_10		= 0,                    \
+			.rtt_wr_10	= ddr4_rttwr_80ohm & 3,	\
+			.rtt_wr_10_ext	= (ddr4_rttwr_80ohm >> 2) & 1,	\
+			.dic_10		= ddr4_dic_34ohm,	\
+			.rtt_nom_10	= 0,			\
+			.pasr_11	= 0,			\
+			.asr_11		= 0,			\
+			.srt_11		= 0,			\
+			.rtt_wr_11	= 0,			\
+			.dic_11		= ddr4_dic_34ohm,	\
+			.rtt_nom_11	= 0			\
+		}                                               \
+	}
+
+#define OCTEON_EBB7304_MODEREG_PARAMS1_2RANK_1SLOT		\
+	{							\
+		.cn78xx = {					\
+			.pasr_00	= 0,			\
+			.asr_00		= 0,			\
+			.srt_00		= 0,			\
+			.rtt_wr_00	= ddr4_rttwr_240ohm,	\
+			.dic_00		= ddr4_dic_34ohm,	\
+			.rtt_nom_00	= 0,			\
+			.pasr_01	= 0,			\
+			.asr_01		= 0,			\
+			.srt_01		= 0,			\
+			.rtt_wr_01	= ddr4_rttwr_240ohm,	\
+			.dic_01		= ddr4_dic_34ohm,	\
+			.rtt_nom_01	= 0,			\
+			.pasr_10	= 0,			\
+			.asr_10		= 0,			\
+			.srt_10		= 0,			\
+			.dic_10		= ddr4_dic_34ohm,	\
+			.rtt_nom_10	= 0,			\
+			.pasr_11	= 0,			\
+			.asr_11		= 0,			\
+			.srt_11		= 0,			\
+			.rtt_wr_11	= 0,			\
+			.dic_11		= ddr4_dic_34ohm,	\
+			.rtt_nom_11	= 0,			\
+		}						\
+	}
+
+#define OCTEON_EBB7304_MODEREG_PARAMS1_2RANK_2SLOT		\
+	{							\
+		.cn78xx = {					\
+			.pasr_00	= 0,			\
+			.asr_00		= 0,			\
+			.srt_00		= 0,			\
+			.rtt_wr_00	= ddr4_rttwr_240ohm,	\
+			.dic_00		= ddr4_dic_34ohm,	\
+			.rtt_nom_00	= ddr4_rttnom_120ohm,	\
+			.pasr_01	= 0,			\
+			.asr_01		= 0,			\
+			.srt_01		= 0,			\
+			.rtt_wr_01	= ddr4_rttwr_240ohm,	\
+			.dic_01		= ddr4_dic_34ohm,	\
+			.rtt_nom_01	= ddr4_rttnom_120ohm,	\
+			.pasr_10	= 0,			\
+			.asr_10		= 0,			\
+			.srt_10		= 0,			\
+			.rtt_wr_10	= ddr4_rttwr_240ohm,	\
+			.dic_10		= ddr4_dic_34ohm,	\
+			.rtt_nom_10	= ddr4_rttnom_120ohm,	\
+			.pasr_11	= 0,			\
+			.asr_11		= 0,			\
+			.srt_11		= 0,			\
+			.rtt_wr_11	= ddr4_rttwr_240ohm,	\
+			.dic_11		= ddr4_dic_34ohm,	\
+			.rtt_nom_11	= ddr4_rttnom_120ohm,	\
+		}						\
+	}
+
+#define OCTEON_EBB7304_MODEREG_PARAMS1_4RANK_1SLOT		\
+	{							\
+		.cn78xx = {					\
+			.pasr_00	= 0,			\
+			.asr_00		= 0,			\
+			.srt_00		= 0,			\
+			.rtt_wr_00	= rttwr_60ohm,		\
+			.dic_00		= dic_34ohm,		\
+			.rtt_nom_00	= rttnom_20ohm,		\
+			.pasr_01	= 0,			\
+			.asr_01		= 0,			\
+			.srt_01		= 0,			\
+			.rtt_wr_01	= rttwr_60ohm,		\
+			.dic_01		= dic_34ohm,		\
+			.rtt_nom_01	= rttnom_none,		\
+			.pasr_10	= 0,			\
+			.asr_10		= 0,			\
+			.srt_10		= 0,			\
+			.rtt_wr_10	= rttwr_60ohm,		\
+			.dic_10		= dic_34ohm,		\
+			.rtt_nom_10	= rttnom_20ohm,		\
+			.pasr_11	= 0,			\
+			.asr_11		= 0,			\
+			.srt_11		= 0,			\
+			.rtt_wr_11	= rttwr_60ohm,		\
+			.dic_11		= dic_34ohm,		\
+			.rtt_nom_11	= rttnom_none,		\
+		}						\
+	}
+
+#define OCTEON_EBB7304_MODEREG_PARAMS2_1RANK_1SLOT	\
+{							\
+	.cn78xx = {					\
+		.rtt_park_00    = ddr4_rttpark_60ohm,	\
+		.vref_value_00  = 0x22,			\
+		.vref_range_00  = 0,			\
+		.rtt_park_01    = 0,			\
+		.vref_value_01  = 0,			\
+		.vref_range_01  = 0,			\
+		.rtt_park_10    = 0,			\
+		.vref_value_10  = 0,			\
+		.vref_range_10  = 0,			\
+		.rtt_park_11    = 0,			\
+		.vref_value_11  = 0,			\
+		.vref_range_11  = 0			\
+	}						\
+}
+
+/* FIX */
+#define OCTEON_EBB7304_MODEREG_PARAMS2_1RANK_2SLOT	\
+{							\
+	.cn78xx = {					\
+		.rtt_park_00    = ddr4_rttpark_48ohm,	\
+		.vref_value_00  = 0x1f,			\
+		.vref_range_00  = 0,			\
+		.rtt_park_01    = 0,			\
+		.vref_value_01  = 0,			\
+		.vref_range_01  = 0,			\
+		.rtt_park_10    = ddr4_rttpark_48ohm,	\
+		.vref_value_10  = 0x1f,			\
+		.vref_range_10  = 0,			\
+		.rtt_park_11    = 0,			\
+		.vref_value_11  = 0,			\
+		.vref_range_11  = 0			\
+	}						\
+}
+
+#define OCTEON_EBB7304_MODEREG_PARAMS2_2RANK_1SLOT	\
+{							\
+	.cn78xx = {					\
+		.rtt_park_00    = ddr4_rttpark_120ohm,	\
+		.vref_value_00  = 0x19,			\
+		.vref_range_00  = 0,			\
+		.rtt_park_01    = ddr4_rttpark_120ohm,	\
+		.vref_value_01  = 0x19,			\
+		.vref_range_01  = 0,			\
+		.rtt_park_10    = 0,			\
+		.vref_value_10  = 0,			\
+		.vref_range_10  = 0,			\
+		.rtt_park_11    = 0,			\
+		.vref_value_11  = 0,			\
+		.vref_range_11  = 0			\
+	}						\
+}
+
+#define OCTEON_EBB7304_MODEREG_PARAMS2_2RANK_2SLOT	\
+{							\
+	.cn78xx = {					\
+		.rtt_park_00    = ddr4_rttpark_60ohm,	\
+		.vref_value_00  = 0x19,			\
+		.vref_range_00  = 0,			\
+		.rtt_park_01    = ddr4_rttpark_60ohm,	\
+		.vref_value_01  = 0x19,			\
+		.vref_range_01  = 0,			\
+		.rtt_park_10    = ddr4_rttpark_60ohm,	\
+		.vref_value_10  = 0x19,			\
+		.vref_range_10  = 0,			\
+		.rtt_park_11    = ddr4_rttpark_60ohm,	\
+		.vref_value_11  = 0x19,			\
+		.vref_range_11  = 0			\
+	}						\
+}
+
+#define OCTEON_EBB7304_MODEREG_PARAMS2_4RANK_1SLOT	\
+{							\
+	.cn78xx = {					\
+		.rtt_park_00    = ddr4_rttpark_80ohm,	\
+		.vref_value_00  = 0x1f,			\
+		.vref_range_00  = 0,			\
+		.rtt_park_01    = ddr4_rttpark_80ohm,	\
+		.vref_value_01  = 0x1f,			\
+		.vref_range_01  = 0,			\
+		.rtt_park_10    = 0,			\
+		.vref_value_10  = 0,			\
+		.vref_range_10  = 0,			\
+		.rtt_park_11    = 0,			\
+		.vref_value_11  = 0,			\
+		.vref_range_11  = 0			\
+	}						\
+}
+
+#define OCTEON_EBB7304_CN78XX_DRAM_ODT_1RANK_CONFIGURATION		\
+	/*   1 */							\
+	{								\
+		ddr4_dqx_driver_34_ohm,					\
+		0x00000000ULL,						\
+		OCTEON_EBB7304_MODEREG_PARAMS1_1RANK_1SLOT,		\
+		OCTEON_EBB7304_MODEREG_PARAMS2_1RANK_1SLOT,		\
+		ddr4_rodt_ctl_48_ohm,					\
+		0x00000000ULL,						\
+		0							\
+	},								\
+	/*   2 */							\
+	{								\
+		ddr4_dqx_driver_34_ohm,					\
+		0x00000000ULL,						\
+		OCTEON_EBB7304_MODEREG_PARAMS1_1RANK_2SLOT,		\
+		OCTEON_EBB7304_MODEREG_PARAMS2_1RANK_2SLOT,		\
+		ddr4_rodt_ctl_80_ohm,					\
+		0x00000000ULL,						\
+		0							\
+	}
+
+#define OCTEON_EBB7304_CN78XX_DRAM_ODT_2RANK_CONFIGURATION		\
+	/*   1 */							\
+	{								\
+		ddr4_dqx_driver_34_ohm,					\
+		0x00000000ULL,						\
+		OCTEON_EBB7304_MODEREG_PARAMS1_2RANK_1SLOT,		\
+		OCTEON_EBB7304_MODEREG_PARAMS2_2RANK_1SLOT,		\
+		ddr4_rodt_ctl_80_ohm,					\
+		0x00000000ULL,						\
+		0							\
+	},								\
+	/*   2 */							\
+	{								\
+		ddr4_dqx_driver_34_ohm,					\
+		0x0c0c0303ULL,						\
+		OCTEON_EBB7304_MODEREG_PARAMS1_2RANK_2SLOT,		\
+		OCTEON_EBB7304_MODEREG_PARAMS2_2RANK_2SLOT,		\
+		ddr4_rodt_ctl_48_ohm,					\
+		0x04080102ULL,						\
+		0							\
+	}
+
+#define OCTEON_EBB7304_CN78XX_DRAM_ODT_4RANK_CONFIGURATION		\
+	/*   1 */							\
+	{								\
+		ddr4_dqx_driver_34_ohm,					\
+		0x01030203ULL,						\
+		OCTEON_EBB7304_MODEREG_PARAMS1_4RANK_1SLOT,		\
+		OCTEON_EBB7304_MODEREG_PARAMS2_4RANK_1SLOT,		\
+		ddr4_rodt_ctl_48_ohm,					\
+		0x01010202ULL,						\
+		0							\
+	}
+
+/*
+ * Construct a static initializer for the ddr_configuration_t variable that
+ * holds (almost) all of the information required for DDR initialization.
+ */
+
+/*
+ * The parameters below make up the custom_lmc_config data structure.
+ * This structure is used to customize the way that the LMC DRAM
+ * Controller is configured for a particular board design.
+ *
+ * Refer to the file lib_octeon_board_table_entry.h for a description
+ * of the custom board settings.  It is usually kept in the following
+ * location... arch/mips/include/asm/arch-octeon/
+ *
+ */
+
+#define OCTEON_EBB7304_DDR_CONFIGURATION				\
+/* Interface 0 */							\
+{									\
+	.custom_lmc_config = {						\
+		.min_rtt_nom_idx		= 1,			\
+		.max_rtt_nom_idx		= 7,			\
+		.min_rodt_ctl			= 1,			\
+		.max_rodt_ctl			= 7,			\
+		.ck_ctl				= ddr4_driver_34_ohm,	\
+		.cmd_ctl			= ddr4_driver_34_ohm,	\
+		.ctl_ctl			= ddr4_driver_34_ohm,	\
+		.min_cas_latency		= 0,			\
+		.offset_en			= 1,			\
+		.offset_udimm			= 2,			\
+		.offset_rdimm			= 2,			\
+		.ddr_rtt_nom_auto		= 0,			\
+		.ddr_rodt_ctl_auto		= 0,			\
+		.rlevel_comp_offset_udimm	= 0,			\
+		.rlevel_comp_offset_rdimm	= 0,			\
+		.rlevel_compute			= 0,			\
+		.ddr2t_udimm			= 1,			\
+		.ddr2t_rdimm			= 1,			\
+		.maximum_adjacent_rlevel_delay_increment = 2,		\
+		.fprch2				= 2,			\
+		.dll_write_offset		= NULL,			\
+		.dll_read_offset		= NULL,			\
+		.parity				= 0			\
+	},								\
+	.dimm_config_table = {						\
+		OCTEON_EBB7304_DRAM_SOCKET_CONFIGURATION0,		\
+		DIMM_CONFIG_TERMINATOR					\
+	},								\
+	.unbuffered = {							\
+		.ddr_board_delay		= 0,			\
+		.lmc_delay_clk			= 0,			\
+		.lmc_delay_cmd			= 0,			\
+		.lmc_delay_dq			= 0			\
+	},								\
+	.registered = {							\
+		.ddr_board_delay		= 0,			\
+		.lmc_delay_clk			= 0,			\
+		.lmc_delay_cmd			= 0,			\
+		.lmc_delay_dq			= 0			\
+	},								\
+	.odt_1rank_config = {						\
+		OCTEON_EBB7304_CN78XX_DRAM_ODT_1RANK_CONFIGURATION	\
+	},								\
+	.odt_2rank_config = {						\
+		OCTEON_EBB7304_CN78XX_DRAM_ODT_2RANK_CONFIGURATION	\
+	},								\
+	.odt_4rank_config = {						\
+		OCTEON_EBB7304_CN78XX_DRAM_ODT_4RANK_CONFIGURATION	\
+	}								\
+},									\
+/* Interface 1 */							\
+{									\
+	.custom_lmc_config = {						\
+		.min_rtt_nom_idx		= 1,			\
+		.max_rtt_nom_idx		= 7,			\
+		.min_rodt_ctl			= 1,			\
+		.max_rodt_ctl			= 7,			\
+		.ck_ctl				= ddr4_driver_34_ohm,	\
+		.cmd_ctl			= ddr4_driver_34_ohm,	\
+		.ctl_ctl			= ddr4_driver_34_ohm,	\
+		.min_cas_latency		= 0,			\
+		.offset_en			= 1,			\
+		.offset_udimm			= 2,			\
+		.offset_rdimm			= 2,			\
+		.ddr_rtt_nom_auto		= 0,			\
+		.ddr_rodt_ctl_auto		= 0,			\
+		.rlevel_comp_offset_udimm	= 0,			\
+		.rlevel_comp_offset_rdimm	= 0,			\
+		.rlevel_compute			= 0,			\
+		.ddr2t_udimm			= 1,			\
+		.ddr2t_rdimm			= 1,			\
+		.maximum_adjacent_rlevel_delay_increment = 2,		\
+		.fprch2				= 2,			\
+		.dll_write_offset		= NULL,			\
+		.dll_read_offset		= NULL,			\
+		.parity				= 0			\
+	},								\
+	.dimm_config_table = {						\
+		OCTEON_EBB7304_DRAM_SOCKET_CONFIGURATION1,		\
+		DIMM_CONFIG_TERMINATOR					\
+	},								\
+	.unbuffered = {							\
+		.ddr_board_delay		= 0,			\
+		.lmc_delay_clk			= 0,			\
+		.lmc_delay_cmd			= 0,			\
+		.lmc_delay_dq			= 0			\
+	},								\
+	.registered = {							\
+		.ddr_board_delay		= 0,			\
+		.lmc_delay_clk			= 0,			\
+		.lmc_delay_cmd			= 0,			\
+		.lmc_delay_dq			= 0			\
+	},								\
+	.odt_1rank_config = {						\
+		OCTEON_EBB7304_CN78XX_DRAM_ODT_1RANK_CONFIGURATION	\
+	},								\
+	.odt_2rank_config = {						\
+		OCTEON_EBB7304_CN78XX_DRAM_ODT_2RANK_CONFIGURATION	\
+	},								\
+	.odt_4rank_config = {						\
+		OCTEON_EBB7304_CN78XX_DRAM_ODT_4RANK_CONFIGURATION	\
+	}								\
+},
+
+#endif /* __BOARD_DDR_H__ */
diff --git a/configs/octeon_ebb7304_defconfig b/configs/octeon_ebb7304_defconfig
index f8d27b01dc..105fe5b00a 100644
--- a/configs/octeon_ebb7304_defconfig
+++ b/configs/octeon_ebb7304_defconfig
@@ -38,6 +38,9 @@ CONFIG_SPI_FLASH_STMICRO=y
 # CONFIG_NETDEVICES is not set
 CONFIG_PCI=y
 CONFIG_DM_PCI=y
+CONFIG_RAM=y
+CONFIG_RAM_OCTEON=y
+CONFIG_RAM_OCTEON_DDR4=y
 CONFIG_DEBUG_UART_SHIFT=3
 CONFIG_DEBUG_UART_ANNOUNCE=y
 CONFIG_SYS_NS16550=y
diff --git a/include/configs/octeon_common.h b/include/configs/octeon_common.h
index 530f02ad3c..541b81801e 100644
--- a/include/configs/octeon_common.h
+++ b/include/configs/octeon_common.h
@@ -7,13 +7,18 @@
 #ifndef __OCTEON_COMMON_H__
 #define __OCTEON_COMMON_H__
 
-/* No DDR init yet -> run in L2 cache with limited resources */
+#if defined(CONFIG_RAM_OCTEON)
+#define CONFIG_SYS_MALLOC_LEN		(16 << 20)
+#define CONFIG_SYS_INIT_SP_OFFSET	0x20100000
+#else
+/* No DDR init -> run in L2 cache with limited resources */
 #define CONFIG_SYS_MALLOC_LEN		(256 << 10)
+#define CONFIG_SYS_INIT_SP_OFFSET	0x00180000
+#endif
+
 #define CONFIG_SYS_SDRAM_BASE		0xffffffff80000000
 #define CONFIG_SYS_MONITOR_BASE		CONFIG_SYS_TEXT_BASE
 
 #define CONFIG_SYS_LOAD_ADDR		(CONFIG_SYS_SDRAM_BASE + (1 << 20))
 
-#define CONFIG_SYS_INIT_SP_OFFSET	0x180000
-
 #endif /* __OCTEON_COMMON_H__ */

From 72a53ac59b9897d86471f5d44d2d4cbf82209d8e Mon Sep 17 00:00:00 2001
From: Stefan Roese <sr@denx.de>
Date: Mon, 24 Aug 2020 13:04:36 +0200
Subject: [PATCH 10/27] usb: xhci: xhci-dwc3.c: Use dev_remap_addr() instead of
 dev_get_addr()

On MIPS platforms, mapping of the base address is needed. This patch
switches from dev_get_addr() to dev_remap_addr() to get the mapped base
address of the xHCI controller.

Signed-off-by: Stefan Roese <sr@denx.de>
Reviewed-by: Bin Meng <bmeng.cn@gmail.com>
Cc: Bin Meng <bmeng.cn@gmail.com>
Cc: Marek Vasut <marex@denx.de>
---
 drivers/usb/host/xhci-dwc3.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/usb/host/xhci-dwc3.c b/drivers/usb/host/xhci-dwc3.c
index 27f84102db..045de2ffde 100644
--- a/drivers/usb/host/xhci-dwc3.c
+++ b/drivers/usb/host/xhci-dwc3.c
@@ -122,7 +122,7 @@ static int xhci_dwc3_probe(struct udevice *dev)
 	u32 reg;
 	int ret;
 
-	hccr = (struct xhci_hccr *)((uintptr_t)dev_read_addr(dev));
+	hccr = (struct xhci_hccr *)((uintptr_t)dev_remap_addr(dev));
 	hcor = (struct xhci_hcor *)((uintptr_t)hccr +
 			HC_LENGTH(xhci_readl(&(hccr)->cr_capbase)));
 

From e68efa1ecf4552158451f9b097fbcc15ce927489 Mon Sep 17 00:00:00 2001
From: Stefan Roese <sr@denx.de>
Date: Mon, 24 Aug 2020 13:04:37 +0200
Subject: [PATCH 11/27] usb: xhci: xhci_bulk_tx: Don't "BUG" when comparing
 addresses

Octeon uses mapped addresses for virtual and physical memory. It's not
that easy to calculate the resulting addresses here. So let's remove
this BUG_ON() completely, as it's not really helpful.

Please also note, that BUG_ON() is not recommended any more in the Linux
kernel.

Signed-off-by: Stefan Roese <sr@denx.de>
Reviewed-by: Bin Meng <bmeng.cn@gmail.com>
Cc: Bin Meng <bmeng.cn@gmail.com>
Cc: Marek Vasut <marex@denx.de>
---
 drivers/usb/host/xhci-ring.c | 2 --
 1 file changed, 2 deletions(-)

diff --git a/drivers/usb/host/xhci-ring.c b/drivers/usb/host/xhci-ring.c
index b118207d93..13065d7ca9 100644
--- a/drivers/usb/host/xhci-ring.c
+++ b/drivers/usb/host/xhci-ring.c
@@ -722,8 +722,6 @@ int xhci_bulk_tx(struct usb_device *udev, unsigned long pipe,
 
 	BUG_ON(TRB_TO_SLOT_ID(field) != slot_id);
 	BUG_ON(TRB_TO_EP_INDEX(field) != ep_index);
-	BUG_ON(*(void **)(uintptr_t)le64_to_cpu(event->trans_event.buffer) -
-		buffer > (size_t)length);
 
 	record_transfer_result(udev, event, length);
 	xhci_acknowledge_event(ctrl);

From 92ca2fee08d560bbc1d30a9eb357c862ccdd94c8 Mon Sep 17 00:00:00 2001
From: Stefan Roese <sr@denx.de>
Date: Mon, 24 Aug 2020 13:04:38 +0200
Subject: [PATCH 12/27] usb: xhci: octeon: Add DWC3 glue layer for Octeon

This patch adds the glue layer for the MIPS Octeon SoCs. It's ported
mainly from the Linux code.

Signed-off-by: Stefan Roese <sr@denx.de>
Reviewed-by: Bin Meng <bmeng.cn@gmail.com>
Cc: Bin Meng <bmeng.cn@gmail.com>
Cc: Marek Vasut <marex@denx.de>
---
 drivers/usb/host/Kconfig            |   9 +
 drivers/usb/host/Makefile           |   1 +
 drivers/usb/host/dwc3-octeon-glue.c | 393 ++++++++++++++++++++++++++++
 3 files changed, 403 insertions(+)
 create mode 100644 drivers/usb/host/dwc3-octeon-glue.c

diff --git a/drivers/usb/host/Kconfig b/drivers/usb/host/Kconfig
index 4eb7b34e24..0971a7c813 100644
--- a/drivers/usb/host/Kconfig
+++ b/drivers/usb/host/Kconfig
@@ -46,6 +46,15 @@ config USB_XHCI_MVEBU
 	  SoCs, which includes Armada8K, Armada3700 and other Armada
 	  family SoCs.
 
+config USB_XHCI_OCTEON
+	bool "Support for Marvell Octeon family on-chip xHCI USB controller"
+	depends on ARCH_OCTEON
+	default y
+	help
+	  Enables support for the on-chip xHCI controller on Marvell Octeon
+	  family SoCs. This is a driver for the dwc3 to provide the glue logic
+	  to configure the controller.
+
 config USB_XHCI_PCI
 	bool "Support for PCI-based xHCI USB controller"
 	depends on DM_USB
diff --git a/drivers/usb/host/Makefile b/drivers/usb/host/Makefile
index 29d4f87e38..a12e8f2702 100644
--- a/drivers/usb/host/Makefile
+++ b/drivers/usb/host/Makefile
@@ -56,6 +56,7 @@ obj-$(CONFIG_USB_XHCI_OMAP) += xhci-omap.o
 obj-$(CONFIG_USB_XHCI_PCI) += xhci-pci.o
 obj-$(CONFIG_USB_XHCI_RCAR) += xhci-rcar.o
 obj-$(CONFIG_USB_XHCI_STI) += dwc3-sti-glue.o
+obj-$(CONFIG_USB_XHCI_OCTEON) += dwc3-octeon-glue.o
 
 # designware
 obj-$(CONFIG_USB_DWC2) += dwc2.o
diff --git a/drivers/usb/host/dwc3-octeon-glue.c b/drivers/usb/host/dwc3-octeon-glue.c
new file mode 100644
index 0000000000..39b3185616
--- /dev/null
+++ b/drivers/usb/host/dwc3-octeon-glue.c
@@ -0,0 +1,393 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Octeon family DWC3 specific glue layer
+ *
+ * Copyright (C) 2020 Stefan Roese <sr@denx.de>
+ *
+ * The low-level init code is based on the Linux driver octeon-usb.c by
+ * David Daney <david.daney@cavium.com>, which is:
+ * Copyright (C) 2010-2017 Cavium Networks
+ */
+
+#include <dm.h>
+#include <errno.h>
+#include <usb.h>
+#include <asm/io.h>
+#include <dm/lists.h>
+#include <dm/of_access.h>
+#include <linux/bitfield.h>
+#include <linux/delay.h>
+#include <linux/err.h>
+#include <linux/io.h>
+#include <linux/usb/dwc3.h>
+#include <linux/usb/otg.h>
+#include <mach/octeon-model.h>
+
+DECLARE_GLOBAL_DATA_PTR;
+
+#define CVMX_GPIO_BIT_CFGX(i)	(0x0001070000000900ull + ((i) * 8))
+#define CVMX_GPIO_XBIT_CFGX(i)	(0x0001070000000900ull + \
+				 ((i) & 31) * 8 - 8 * 16)
+
+#define GPIO_BIT_CFG_TX_OE		BIT_ULL(0)
+#define GPIO_BIT_CFG_OUTPUT_SEL		GENMASK_ULL(20, 16)
+
+#define UCTL_CTL_UCTL_RST		BIT_ULL(0)
+#define UCTL_CTL_UAHC_RST		BIT_ULL(1)
+#define UCTL_CTL_UPHY_RST		BIT_ULL(2)
+#define UCTL_CTL_DRD_MODE		BIT_ULL(3)
+#define UCTL_CTL_SCLK_EN		BIT_ULL(4)
+#define UCTL_CTL_HS_POWER_EN		BIT_ULL(12)
+#define UCTL_CTL_SS_POWER_EN		BIT_ULL(14)
+#define UCTL_CTL_H_CLKDIV_SEL		GENMASK_ULL(26, 24)
+#define UCTL_CTL_H_CLKDIV_RST		BIT_ULL(28)
+#define UCTL_CTL_H_CLK_EN		BIT_ULL(30)
+#define UCTL_CTL_REF_CLK_FSEL		GENMASK_ULL(37, 32)
+#define UCTL_CTL_REF_CLK_DIV2		BIT_ULL(38)
+#define UCTL_CTL_REF_SSP_EN		BIT_ULL(39)
+#define UCTL_CTL_MPLL_MULTIPLIER	GENMASK_ULL(46, 40)
+#define UCTL_CTL_SSC_EN			BIT_ULL(59)
+#define UCTL_CTL_REF_CLK_SEL		GENMASK_ULL(61, 60)
+
+#define UCTL_HOST_CFG			0xe0
+#define UCTL_HOST_CFG_PPC_ACTIVE_HIGH_EN BIT_ULL(24)
+#define UCTL_HOST_CFG_PPC_EN		BIT_ULL(25)
+
+#define UCTL_SHIM_CFG			0xe8
+#define UCTL_SHIM_CFG_CSR_ENDIAN_MODE	GENMASK_ULL(1, 0)
+#define UCTL_SHIM_CFG_DMA_ENDIAN_MODE	GENMASK_ULL(9, 8)
+
+#define OCTEON_H_CLKDIV_SEL		8
+#define OCTEON_MIN_H_CLK_RATE		150000000
+#define OCTEON_MAX_H_CLK_RATE		300000000
+
+#define CLOCK_50MHZ			50000000
+#define CLOCK_100MHZ			100000000
+#define CLOCK_125MHZ			125000000
+
+static u8 clk_div[OCTEON_H_CLKDIV_SEL] = {1, 2, 4, 6, 8, 16, 24, 32};
+
+static int dwc3_octeon_config_power(struct udevice *dev, void __iomem *base)
+{
+	u64 uctl_host_cfg;
+	u64 gpio_bit;
+	u32 gpio_pwr[3];
+	int gpio, len, power_active_low;
+	const struct device_node *node = dev_np(dev);
+	int index = ((u64)base >> 24) & 1;
+	void __iomem *gpio_bit_cfg;
+
+	if (of_find_property(node, "power", &len)) {
+		if (len == 12) {
+			dev_read_u32_array(dev, "power", gpio_pwr, 3);
+			power_active_low = gpio_pwr[2] & 0x01;
+			gpio = gpio_pwr[1];
+		} else if (len == 8) {
+			dev_read_u32_array(dev, "power", gpio_pwr, 2);
+			power_active_low = 0;
+			gpio = gpio_pwr[1];
+		} else {
+			printf("dwc3 controller clock init failure\n");
+			return -EINVAL;
+		}
+
+		gpio_bit_cfg = ioremap(CVMX_GPIO_BIT_CFGX(gpio), 0);
+
+		if ((OCTEON_IS_MODEL(OCTEON_CN73XX) ||
+		     OCTEON_IS_MODEL(OCTEON_CNF75XX)) && gpio <= 31) {
+			gpio_bit = ioread64(gpio_bit_cfg);
+			gpio_bit |= GPIO_BIT_CFG_TX_OE;
+			gpio_bit &= ~GPIO_BIT_CFG_OUTPUT_SEL;
+			gpio_bit |= FIELD_PREP(GPIO_BIT_CFG_OUTPUT_SEL,
+					       index == 0 ? 0x14 : 0x15);
+			iowrite64(gpio_bit, gpio_bit_cfg);
+		} else if (gpio <= 15) {
+			gpio_bit = ioread64(gpio_bit_cfg);
+			gpio_bit |= GPIO_BIT_CFG_TX_OE;
+			gpio_bit &= ~GPIO_BIT_CFG_OUTPUT_SEL;
+			gpio_bit |= FIELD_PREP(GPIO_BIT_CFG_OUTPUT_SEL,
+					       index == 0 ? 0x14 : 0x19);
+			iowrite64(gpio_bit, gpio_bit_cfg);
+		} else {
+			gpio_bit_cfg = ioremap(CVMX_GPIO_XBIT_CFGX(gpio), 0);
+
+			gpio_bit = ioread64(gpio_bit_cfg);
+			gpio_bit |= GPIO_BIT_CFG_TX_OE;
+			gpio_bit &= ~GPIO_BIT_CFG_OUTPUT_SEL;
+			gpio_bit |= FIELD_PREP(GPIO_BIT_CFG_OUTPUT_SEL,
+					       index == 0 ? 0x14 : 0x19);
+			iowrite64(gpio_bit, gpio_bit_cfg);
+		}
+
+		/* Enable XHCI power control and set if active high or low. */
+		uctl_host_cfg = ioread64(base + UCTL_HOST_CFG);
+		uctl_host_cfg |= UCTL_HOST_CFG_PPC_EN;
+		if (power_active_low)
+			uctl_host_cfg &= ~UCTL_HOST_CFG_PPC_ACTIVE_HIGH_EN;
+		else
+			uctl_host_cfg |= UCTL_HOST_CFG_PPC_ACTIVE_HIGH_EN;
+		iowrite64(uctl_host_cfg, base + UCTL_HOST_CFG);
+
+		/* Wait for power to stabilize */
+		mdelay(10);
+	} else {
+		/* Disable XHCI power control and set if active high. */
+		uctl_host_cfg = ioread64(base + UCTL_HOST_CFG);
+		uctl_host_cfg &= ~UCTL_HOST_CFG_PPC_EN;
+		uctl_host_cfg &= ~UCTL_HOST_CFG_PPC_ACTIVE_HIGH_EN;
+		iowrite64(uctl_host_cfg, base + UCTL_HOST_CFG);
+		dev_warn(dev, "dwc3 controller clock init failure.\n");
+	}
+
+	return 0;
+}
+
+static int dwc3_octeon_clocks_start(struct udevice *dev, void __iomem *base)
+{
+	u64 uctl_ctl;
+	int ref_clk_sel = 2;
+	u64 div;
+	u32 clock_rate;
+	int mpll_mul;
+	int i;
+	u64 h_clk_rate;
+	void __iomem *uctl_ctl_reg = base;
+	const char *ss_clock_type;
+	const char *hs_clock_type;
+
+	i = dev_read_u32(dev, "refclk-frequency", &clock_rate);
+	if (i) {
+		printf("No UCTL \"refclk-frequency\"\n");
+		return -EINVAL;
+	}
+
+	ss_clock_type = dev_read_string(dev, "refclk-type-ss");
+	if (!ss_clock_type) {
+		printf("No UCTL \"refclk-type-ss\"\n");
+		return -EINVAL;
+	}
+
+	hs_clock_type = dev_read_string(dev, "refclk-type-hs");
+	if (!hs_clock_type) {
+		printf("No UCTL \"refclk-type-hs\"\n");
+		return -EINVAL;
+	}
+
+	if (strcmp("dlmc_ref_clk0", ss_clock_type) == 0) {
+		if (strcmp(hs_clock_type, "dlmc_ref_clk0") == 0) {
+			ref_clk_sel = 0;
+		} else if (strcmp(hs_clock_type, "pll_ref_clk") == 0) {
+			ref_clk_sel = 2;
+		} else {
+			printf("Invalid HS clock type %s, using pll_ref_clk\n",
+			       hs_clock_type);
+		}
+	} else if (strcmp(ss_clock_type, "dlmc_ref_clk1") == 0) {
+		if (strcmp(hs_clock_type, "dlmc_ref_clk1") == 0) {
+			ref_clk_sel = 1;
+		} else if (strcmp(hs_clock_type, "pll_ref_clk") == 0) {
+			ref_clk_sel = 3;
+		} else {
+			printf("Invalid HS clock type %s, using pll_ref_clk\n",
+			       hs_clock_type);
+			ref_clk_sel = 3;
+		}
+	} else {
+		printf("Invalid SS clock type %s, using dlmc_ref_clk0\n",
+		       ss_clock_type);
+	}
+
+	if ((ref_clk_sel == 0 || ref_clk_sel == 1) &&
+	    clock_rate != CLOCK_100MHZ)
+		printf("Invalid UCTL clock rate of %u\n", clock_rate);
+
+	/*
+	 * Step 1: Wait for all voltages to be stable...that surely
+	 *         happened before this driver is started. SKIP
+	 */
+
+	/* Step 2: Select GPIO for overcurrent indication, if desired. SKIP */
+
+	/* Step 3: Assert all resets. */
+	uctl_ctl = ioread64(uctl_ctl_reg);
+	uctl_ctl |= UCTL_CTL_UCTL_RST | UCTL_CTL_UAHC_RST | UCTL_CTL_UPHY_RST;
+	iowrite64(uctl_ctl, uctl_ctl_reg);
+
+	/* Step 4a: Reset the clock dividers. */
+	uctl_ctl = ioread64(uctl_ctl_reg);
+	uctl_ctl |= UCTL_CTL_H_CLKDIV_RST;
+	iowrite64(uctl_ctl, uctl_ctl_reg);
+
+	/* Step 4b: Select controller clock frequency. */
+	for (div = ARRAY_SIZE(clk_div) - 1; div >= 0; div--) {
+		h_clk_rate = gd->bus_clk / clk_div[div];
+		if (h_clk_rate <= OCTEON_MAX_H_CLK_RATE &&
+		    h_clk_rate >= OCTEON_MIN_H_CLK_RATE)
+			break;
+	}
+	uctl_ctl = ioread64(uctl_ctl_reg);
+	uctl_ctl &= ~UCTL_CTL_H_CLKDIV_SEL;
+	uctl_ctl |= FIELD_PREP(UCTL_CTL_H_CLKDIV_SEL, div);
+	uctl_ctl |= UCTL_CTL_H_CLK_EN;
+	iowrite64(uctl_ctl, uctl_ctl_reg);
+	uctl_ctl = ioread64(uctl_ctl_reg);
+	if (div != FIELD_GET(UCTL_CTL_H_CLKDIV_SEL, uctl_ctl) ||
+	    !(uctl_ctl & UCTL_CTL_H_CLK_EN)) {
+		printf("dwc3 controller clock init failure\n");
+		return -EINVAL;
+	}
+
+	/* Step 4c: Deassert the controller clock divider reset. */
+	uctl_ctl = ioread64(uctl_ctl_reg);
+	uctl_ctl &= ~UCTL_CTL_H_CLKDIV_RST;
+	iowrite64(uctl_ctl, uctl_ctl_reg);
+
+	/* Step 5a: Reference clock configuration. */
+	uctl_ctl = ioread64(uctl_ctl_reg);
+	uctl_ctl &= ~UCTL_CTL_REF_CLK_SEL;
+	uctl_ctl |= FIELD_PREP(UCTL_CTL_REF_CLK_SEL, ref_clk_sel);
+	uctl_ctl &= ~UCTL_CTL_REF_CLK_FSEL;
+	uctl_ctl |= FIELD_PREP(UCTL_CTL_REF_CLK_FSEL, 0x07);
+	uctl_ctl &= ~UCTL_CTL_REF_CLK_DIV2;
+
+	switch (clock_rate) {
+	default:
+		printf("Invalid ref_clk %u, using %u instead\n", CLOCK_100MHZ,
+		       clock_rate);
+		fallthrough;
+	case CLOCK_100MHZ:
+		mpll_mul = 0x19;
+		if (ref_clk_sel < 2) {
+			uctl_ctl &= ~UCTL_CTL_REF_CLK_FSEL;
+			uctl_ctl |= FIELD_PREP(UCTL_CTL_REF_CLK_FSEL, 0x27);
+		}
+		break;
+	case CLOCK_50MHZ:
+		mpll_mul = 0x32;
+		break;
+	case CLOCK_125MHZ:
+		mpll_mul = 0x28;
+		break;
+	}
+	uctl_ctl &= ~UCTL_CTL_MPLL_MULTIPLIER;
+	uctl_ctl |= FIELD_PREP(UCTL_CTL_MPLL_MULTIPLIER, mpll_mul);
+
+	/* Step 5b: Configure and enable spread-spectrum for SuperSpeed. */
+	uctl_ctl |= UCTL_CTL_SSC_EN;
+
+	/* Step 5c: Enable SuperSpeed. */
+	uctl_ctl |= UCTL_CTL_REF_SSP_EN;
+
+	/* Step 5d: Configure PHYs. SKIP */
+
+	/* Step 6a & 6b: Power up PHYs. */
+	uctl_ctl |= UCTL_CTL_HS_POWER_EN;
+	uctl_ctl |= UCTL_CTL_SS_POWER_EN;
+	iowrite64(uctl_ctl, uctl_ctl_reg);
+
+	/* Step 7: Wait 10 controller-clock cycles to take effect. */
+	udelay(10);
+
+	/* Step 8a: Deassert UCTL reset signal. */
+	uctl_ctl = ioread64(uctl_ctl_reg);
+	uctl_ctl &= ~UCTL_CTL_UCTL_RST;
+	iowrite64(uctl_ctl, uctl_ctl_reg);
+
+	/* Step 8b: Wait 10 controller-clock cycles. */
+	udelay(10);
+
+	/* Step 8c: Setup power-power control. */
+	if (dwc3_octeon_config_power(dev, base)) {
+		printf("Error configuring power\n");
+		return -EINVAL;
+	}
+
+	/* Step 8d: Deassert UAHC reset signal. */
+	uctl_ctl = ioread64(uctl_ctl_reg);
+	uctl_ctl &= ~UCTL_CTL_UAHC_RST;
+	iowrite64(uctl_ctl, uctl_ctl_reg);
+
+	/* Step 8e: Wait 10 controller-clock cycles. */
+	udelay(10);
+
+	/* Step 9: Enable conditional coprocessor clock of UCTL. */
+	uctl_ctl = ioread64(uctl_ctl_reg);
+	uctl_ctl |= UCTL_CTL_SCLK_EN;
+	iowrite64(uctl_ctl, uctl_ctl_reg);
+
+	/* Step 10: Set for host mode only. */
+	uctl_ctl = ioread64(uctl_ctl_reg);
+	uctl_ctl &= ~UCTL_CTL_DRD_MODE;
+	iowrite64(uctl_ctl, uctl_ctl_reg);
+
+	return 0;
+}
+
+static void dwc3_octeon_set_endian_mode(void __iomem *base)
+{
+	u64 shim_cfg;
+
+	shim_cfg = ioread64(base + UCTL_SHIM_CFG);
+	shim_cfg &= ~UCTL_SHIM_CFG_CSR_ENDIAN_MODE;
+	shim_cfg |= FIELD_PREP(UCTL_SHIM_CFG_CSR_ENDIAN_MODE, 1);
+	shim_cfg &= ~UCTL_SHIM_CFG_DMA_ENDIAN_MODE;
+	shim_cfg |= FIELD_PREP(UCTL_SHIM_CFG_DMA_ENDIAN_MODE, 1);
+	iowrite64(shim_cfg, base + UCTL_SHIM_CFG);
+}
+
+static void dwc3_octeon_phy_reset(void __iomem *base)
+{
+	u64 uctl_ctl;
+
+	uctl_ctl = ioread64(base);
+	uctl_ctl &= ~UCTL_CTL_UPHY_RST;
+	iowrite64(uctl_ctl, base);
+}
+
+static int octeon_dwc3_glue_probe(struct udevice *dev)
+{
+	void __iomem *base;
+
+	base = dev_remap_addr(dev);
+	if (IS_ERR(base))
+		return PTR_ERR(base);
+
+	dwc3_octeon_clocks_start(dev, base);
+	dwc3_octeon_set_endian_mode(base);
+	dwc3_octeon_phy_reset(base);
+
+	return 0;
+}
+
+static int octeon_dwc3_glue_bind(struct udevice *dev)
+{
+	ofnode node, dwc3_node;
+
+	/* Find snps,dwc3 node from subnode */
+	dwc3_node = ofnode_null();
+	ofnode_for_each_subnode(node, dev->node) {
+		if (ofnode_device_is_compatible(node, "snps,dwc3"))
+			dwc3_node = node;
+	}
+
+	if (!ofnode_valid(dwc3_node)) {
+		printf("Can't find dwc3 subnode for %s\n", dev->name);
+		return -ENODEV;
+	}
+
+	return dm_scan_fdt_dev(dev);
+}
+
+static const struct udevice_id octeon_dwc3_glue_ids[] = {
+	{ .compatible = "cavium,octeon-7130-usb-uctl" },
+	{ }
+};
+
+U_BOOT_DRIVER(dwc3_octeon_glue) = {
+	.name = "dwc3_octeon_glue",
+	.id = UCLASS_NOP,
+	.of_match = octeon_dwc3_glue_ids,
+	.probe = octeon_dwc3_glue_probe,
+	.bind = octeon_dwc3_glue_bind,
+	.flags = DM_FLAG_ALLOC_PRIV_DMA,
+};

From fd569c878bb145c5ff55b0f4ecfd9f457d26e178 Mon Sep 17 00:00:00 2001
From: Stefan Roese <sr@denx.de>
Date: Mon, 24 Aug 2020 13:04:39 +0200
Subject: [PATCH 13/27] mips: octeon: cpu.c: Add table for selective swapping

Import octeon_should_swizzle_table[] which is needed for the area
specific swapping. It will be used by the platform specific
mangle-port.h header.

Imported from Linux v5.7.

Signed-off-by: Stefan Roese <sr@denx.de>
---
 arch/mips/mach-octeon/cpu.c | 21 +++++++++++++++++++++
 1 file changed, 21 insertions(+)

diff --git a/arch/mips/mach-octeon/cpu.c b/arch/mips/mach-octeon/cpu.c
index 2680a2e6ed..6f87a4ef8c 100644
--- a/arch/mips/mach-octeon/cpu.c
+++ b/arch/mips/mach-octeon/cpu.c
@@ -13,6 +13,27 @@
 
 DECLARE_GLOBAL_DATA_PTR;
 
+/*
+ * TRUE for devices having registers with little-endian byte
+ * order, FALSE for registers with native-endian byte order.
+ * PCI mandates little-endian, USB and SATA are configurable,
+ * but we chose little-endian for these.
+ *
+ * This table will be referened in the Octeon platform specific
+ * mangle-port.h header.
+ */
+const bool octeon_should_swizzle_table[256] = {
+	[0x00] = true,	/* bootbus/CF */
+	[0x1b] = true,	/* PCI mmio window */
+	[0x1c] = true,	/* PCI mmio window */
+	[0x1d] = true,	/* PCI mmio window */
+	[0x1e] = true,	/* PCI mmio window */
+	[0x68] = true,	/* OCTEON III USB */
+	[0x69] = true,	/* OCTEON III USB */
+	[0x6c] = true,	/* OCTEON III SATA */
+	[0x6f] = true,	/* OCTEON II USB */
+};
+
 static int get_clocks(void)
 {
 	const u64 ref_clock = PLL_REF_CLK;

From b28d35234cc2cbb68fb1bd0aa0a40323834ee454 Mon Sep 17 00:00:00 2001
From: Stefan Roese <sr@denx.de>
Date: Mon, 24 Aug 2020 13:04:40 +0200
Subject: [PATCH 14/27] mips: octeon: Add mangle-port.h

Import platform specific mangle-port.h header, allowing a area specific
swapping, which is needed on Octeon for USB & PCI areas.

Imported from Linux v5.7.

Signed-off-by: Stefan Roese <sr@denx.de>
---
 arch/mips/mach-octeon/include/mangle-port.h | 56 +++++++++++++++++++++
 1 file changed, 56 insertions(+)
 create mode 100644 arch/mips/mach-octeon/include/mangle-port.h

diff --git a/arch/mips/mach-octeon/include/mangle-port.h b/arch/mips/mach-octeon/include/mangle-port.h
new file mode 100644
index 0000000000..7e95dcef5a
--- /dev/null
+++ b/arch/mips/mach-octeon/include/mangle-port.h
@@ -0,0 +1,56 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * Copyright (C) 2003, 2004 Ralf Baechle
+ */
+
+#ifndef __ASM_MACH_GENERIC_MANGLE_PORT_H
+#define __ASM_MACH_GENERIC_MANGLE_PORT_H
+
+#include <asm/byteorder.h>
+
+#ifdef __BIG_ENDIAN
+
+static inline bool __should_swizzle_bits(volatile void *a)
+{
+	extern const bool octeon_should_swizzle_table[];
+	u64 did = ((u64)(uintptr_t)a >> 40) & 0xff;
+
+	return octeon_should_swizzle_table[did];
+}
+
+# define __swizzle_addr_b(port)	(port)
+# define __swizzle_addr_w(port)	(port)
+# define __swizzle_addr_l(port)	(port)
+# define __swizzle_addr_q(port)	(port)
+
+#else /* __LITTLE_ENDIAN */
+
+#define __should_swizzle_bits(a)	false
+
+static inline bool __should_swizzle_addr(u64 p)
+{
+	/* boot bus? */
+	return ((p >> 40) & 0xff) == 0;
+}
+
+# define __swizzle_addr_b(port)	\
+	(__should_swizzle_addr(port) ? (port) ^ 7 : (port))
+# define __swizzle_addr_w(port)	\
+	(__should_swizzle_addr(port) ? (port) ^ 6 : (port))
+# define __swizzle_addr_l(port)	\
+	(__should_swizzle_addr(port) ? (port) ^ 4 : (port))
+# define __swizzle_addr_q(port)	(port)
+
+#endif /* __BIG_ENDIAN */
+
+
+# define ioswabb(a, x)		(x)
+# define __mem_ioswabb(a, x)	(x)
+# define ioswabw(a, x)		(__should_swizzle_bits(a) ? le16_to_cpu(x) : x)
+# define __mem_ioswabw(a, x)	(x)
+# define ioswabl(a, x)		(__should_swizzle_bits(a) ? le32_to_cpu(x) : x)
+# define __mem_ioswabl(a, x)	(x)
+# define ioswabq(a, x)		(__should_swizzle_bits(a) ? le64_to_cpu(x) : x)
+# define __mem_ioswabq(a, x)	(x)
+
+#endif /* __ASM_MACH_GENERIC_MANGLE_PORT_H */

From 399b867fac244c4506f0a9365e7d53bfe74e66b4 Mon Sep 17 00:00:00 2001
From: Stefan Roese <sr@denx.de>
Date: Mon, 24 Aug 2020 13:04:41 +0200
Subject: [PATCH 15/27] mips: octeon: cache.c: Flush all pending writes in
 flush_dcache_range()

As noticed while working on the USB xHCI support, Octeon needs to flush
all pending writes so that the values are present in the memory. Add
this "syncw" instruction (twice) to flush_dcache_range().

Signed-off-by: Stefan Roese <sr@denx.de>
---
 arch/mips/mach-octeon/cache.c | 12 ++++++------
 1 file changed, 6 insertions(+), 6 deletions(-)

diff --git a/arch/mips/mach-octeon/cache.c b/arch/mips/mach-octeon/cache.c
index 9a88bb97c7..f293d65dae 100644
--- a/arch/mips/mach-octeon/cache.c
+++ b/arch/mips/mach-octeon/cache.c
@@ -5,14 +5,13 @@
 
 #include <cpu_func.h>
 
-/*
- * The Octeon platform is cache coherent and cache flushes and invalidates
- * are not needed. Define some platform specific empty flush_foo()
- * functions here to overwrite the _weak common function as a no-op.
- * This effectively disables all cache operations.
- */
+/* Octeon memory write barrier */
+#define CVMX_SYNCW	asm volatile ("syncw\nsyncw\n" : : : "memory")
+
 void flush_dcache_range(ulong start_addr, ulong stop)
 {
+	/* Flush all pending writes */
+	CVMX_SYNCW;
 }
 
 void flush_cache(ulong start_addr, ulong size)
@@ -21,4 +20,5 @@ void flush_cache(ulong start_addr, ulong size)
 
 void invalidate_dcache_range(ulong start_addr, ulong stop)
 {
+	/* Don't need to do anything for OCTEON */
 }

From d25d2db84776614a6a70f27f560f637ffb6c853a Mon Sep 17 00:00:00 2001
From: Stefan Roese <sr@denx.de>
Date: Mon, 24 Aug 2020 13:04:42 +0200
Subject: [PATCH 16/27] mips: octeon: Add USB DT nodes

Add the USB device tree nodes to the Octeon dts/dtsi files.

Signed-off-by: Stefan Roese <sr@denx.de>
Reviewed-by: Bin Meng <bmeng.cn@gmail.com>
---
 arch/mips/dts/mrvl,cn73xx.dtsi        | 60 +++++++++++++++++++++++++++
 arch/mips/dts/mrvl,octeon-ebb7304.dts | 24 +++++++++++
 2 files changed, 84 insertions(+)

diff --git a/arch/mips/dts/mrvl,cn73xx.dtsi b/arch/mips/dts/mrvl,cn73xx.dtsi
index 44a5a03014..40eb85ee0c 100644
--- a/arch/mips/dts/mrvl,cn73xx.dtsi
+++ b/arch/mips/dts/mrvl,cn73xx.dtsi
@@ -143,5 +143,65 @@
 			spi-max-frequency = <25000000>;
 			clocks = <&clk OCTEON_CLK_IO>;
 		};
+
+		/* USB 0 */
+		usb0: uctl@1180068000000 {
+			compatible = "cavium,octeon-7130-usb-uctl";
+			reg = <0x11800 0x68000000 0x0 0x100>;
+			ranges; /* Direct mapping */
+			#address-cells = <2>;
+			#size-cells = <2>;
+			/* Only 100MHz allowed */
+			refclk-frequency = <100000000>;
+			/* Only "dlmc_ref_clk0" is supported for 73xx */
+			refclk-type-ss = "dlmc_ref_clk0";
+			/* Only "dlmc_ref_clk0" is supported for 73xx */
+			refclk-type-hs = "dlmc_ref_clk0";
+
+			/*
+			 * Power is specified by three parts:
+			 * 1) GPIO handle (must be &gpio)
+			 * 2) GPIO pin number
+			 * 3) Active high (0) or active low (1)
+			 */
+			xhci@1680000000000 {
+				compatible = "cavium,octeon-7130-xhci","synopsys,dwc3","snps,dwc3";
+				reg = <0x16800 0x00000000 0x10 0x0>;
+				interrupts = <0x68080 4>; /* UAHC_IMAN, level */
+				maximum-speed = "super-speed";
+				dr_mode = "host";
+				snps,dis_u3_susphy_quirk;
+				snps,dis_u2_susphy_quirk;
+				snps,dis_enblslpm_quirk;
+			};
+		};
+
+		/* USB 1 */
+		usb1: uctl@1180069000000 {
+			compatible = "cavium,octeon-7130-usb-uctl";
+			reg = <0x11800 0x69000000 0x0 0x100>;
+			ranges; /* Direct mapping */
+			#address-cells = <2>;
+			#size-cells = <2>;
+			/* 50MHz, 100MHz and 125MHz allowed */
+			refclk-frequency = <100000000>;
+			/* Either "dlmc_ref_clk0" or "dlmc_ref_clk0" */
+			refclk-type-ss = "dlmc_ref_clk0";
+			/* Either "dlmc_ref_clk0" "dlmc_ref_clk1" or "pll_ref_clk" */
+			refclk-type-hs = "dlmc_ref_clk0";
+
+			/*
+			 * Power is specified by three parts:
+			 * 1) GPIO handle (must be &gpio)
+			 * 2) GPIO pin number
+			 * 3) Active high (0) or active low (1)
+			 */
+			xhci@1690000000000 {
+				compatible = "cavium,octeon-7130-xhci","synopsys,dwc3","snps,dwc3";
+				reg = <0x16900 0x00000000 0x10 0x0>;
+				interrupts = <0x69080 4>; /* UAHC_IMAN, level */
+				dr_mode = "host";
+			};
+		};
 	};
 };
diff --git a/arch/mips/dts/mrvl,octeon-ebb7304.dts b/arch/mips/dts/mrvl,octeon-ebb7304.dts
index 6b2e5e84bc..993b4f6890 100644
--- a/arch/mips/dts/mrvl,octeon-ebb7304.dts
+++ b/arch/mips/dts/mrvl,octeon-ebb7304.dts
@@ -113,3 +113,27 @@
 		reg = <0>;
 	};
 };
+
+/* USB 0 */
+&usb0 {
+	status = "okay";
+	/*
+	 * Power is specified by three parts:
+	 * 1) GPIO handle (must be &gpio)
+	 * 2) GPIO pin number
+	 * 3) Active high (0) or active low (1)
+	 */
+	power = <&gpio 20 0>;
+};
+
+/* USB 1 */
+&usb1 {
+	status = "okay";
+	/*
+	 * Power is specified by three parts:
+	 * 1) GPIO handle (must be &gpio)
+	 * 2) GPIO pin number
+	 * 3) Active high (0) or active low (1)
+	 */
+	power = <&gpio 21 0>;
+};

From 60b407a86a88bbed8f1cd1dc2807c8a400564b1a Mon Sep 17 00:00:00 2001
From: Stefan Roese <sr@denx.de>
Date: Mon, 24 Aug 2020 13:04:43 +0200
Subject: [PATCH 17/27] mips: octeon: octeon_ebb7304_defconfig: Enable USB
 support

This patch enables USB support with some helpful commands, like fs
support.

Signed-off-by: Stefan Roese <sr@denx.de>
---
 configs/octeon_ebb7304_defconfig | 17 +++++++++++++++++
 1 file changed, 17 insertions(+)

diff --git a/configs/octeon_ebb7304_defconfig b/configs/octeon_ebb7304_defconfig
index 105fe5b00a..a98d73a268 100644
--- a/configs/octeon_ebb7304_defconfig
+++ b/configs/octeon_ebb7304_defconfig
@@ -15,12 +15,19 @@ CONFIG_HUSH_PARSER=y
 CONFIG_CMD_GPIO=y
 CONFIG_CMD_I2C=y
 CONFIG_CMD_MTD=y
+CONFIG_CMD_PART=y
 CONFIG_CMD_PCI=y
+CONFIG_CMD_USB=y
 CONFIG_CMD_DHCP=y
 CONFIG_CMD_PING=y
 CONFIG_CMD_TIME=y
+CONFIG_CMD_EXT4=y
+CONFIG_CMD_FAT=y
+CONFIG_CMD_FS_GENERIC=y
+# CONFIG_DOS_PARTITION is not set
 CONFIG_ENV_IS_IN_FLASH=y
 CONFIG_ENV_ADDR=0x1FBFE000
+CONFIG_BLK=y
 CONFIG_CLK=y
 # CONFIG_INPUT is not set
 CONFIG_MTD=y
@@ -48,4 +55,14 @@ CONFIG_SPI=y
 CONFIG_OCTEON_SPI=y
 CONFIG_SYSRESET=y
 CONFIG_SYSRESET_OCTEON=y
+CONFIG_USB=y
+CONFIG_DM_USB=y
+CONFIG_USB_XHCI_HCD=y
+CONFIG_USB_XHCI_DWC3=y
+CONFIG_USB_HOST_ETHER=y
+CONFIG_USB_ETHER_ASIX=y
+CONFIG_USB_ETHER_ASIX88179=y
+CONFIG_USB_ETHER_MCS7830=y
+CONFIG_USB_ETHER_RTL8152=y
+CONFIG_USB_ETHER_SMSC95XX=y
 CONFIG_HEXDUMP=y

From 97e795ccca4c04db62765366f33143840815cf5c Mon Sep 17 00:00:00 2001
From: Stefan Roese <sr@denx.de>
Date: Thu, 20 Aug 2020 07:21:55 +0200
Subject: [PATCH 18/27] mips: octeon: octeon-model.h: Enable inclusion from
 assembler files

Add the #ifdef __ASSEMBLY__ checks to enable inclusion of this header
from assembler files.

Signed-off-by: Stefan Roese <sr@denx.de>
---
 arch/mips/mach-octeon/include/mach/octeon-model.h | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/arch/mips/mach-octeon/include/mach/octeon-model.h b/arch/mips/mach-octeon/include/mach/octeon-model.h
index a346b3472b..22d6df6a9e 100644
--- a/arch/mips/mach-octeon/include/mach/octeon-model.h
+++ b/arch/mips/mach-octeon/include/mach/octeon-model.h
@@ -262,6 +262,8 @@
 		  )));							\
 	})
 
+#ifndef __ASSEMBLY__
+
 #ifndef OCTEON_IS_MODEL
 
 static inline int __octeon_is_model_runtime_internal__(u32 model)
@@ -310,4 +312,6 @@ static inline u32 cvmx_get_octeon_family(void)
 	return (read_c0_prid() & OCTEON_FAMILY_MASK);
 }
 
+#endif /* __ASSEMBLY__ */
+
 #endif /* __OCTEON_MODEL_H__ */

From 7c6f274a369061078eadb9f576002f7cf9ab37a5 Mon Sep 17 00:00:00 2001
From: Stefan Roese <sr@denx.de>
Date: Thu, 20 Aug 2020 07:21:56 +0200
Subject: [PATCH 19/27] mips: octeon: lowlevel_init.S: Add NMI handling code
 for SMP Linux booting

This patch adds the necessary lowlevel init code, to enable SMP Linux
booting. This code will be used with the platform specific Octeon Linux
boot command "bootoctlinux", which starts a configurable number of cores
into Linux.

Additionally some erratas and lowlevel register initializations are
copied from the original Cavium / Marvell U-Boot source code, enabling
booting into the Linux kernel.

Signed-off-by: Stefan Roese <sr@denx.de>
---
 arch/mips/mach-octeon/lowlevel_init.S | 76 +++++++++++++++++++++++++++
 1 file changed, 76 insertions(+)

diff --git a/arch/mips/mach-octeon/lowlevel_init.S b/arch/mips/mach-octeon/lowlevel_init.S
index fa87cb4e34..56d1d2261e 100644
--- a/arch/mips/mach-octeon/lowlevel_init.S
+++ b/arch/mips/mach-octeon/lowlevel_init.S
@@ -10,10 +10,36 @@
 #include <asm/mipsregs.h>
 #include <asm/addrspace.h>
 #include <asm/asm.h>
+#include <mach/octeon-model.h>
+
+#define COP0_CVMCTL_REG		$9,7	/* Cavium control */
+#define COP0_CVMMEMCTL_REG	$11,7	/* Cavium memory control */
+#define COP0_PROC_ID_REG	$15,0
 
 	.set noreorder
 
 LEAF(lowlevel_init)
+
+	/* Set LMEMSZ in CVMMEMCTL register */
+	dmfc0	a0, COP0_CVMMEMCTL_REG
+	dins	a0, zero, 0, 9
+	mfc0	a4, COP0_PROC_ID_REG
+	li	a5, OCTEON_CN63XX_PASS1_0 /* Octeon cn63xx pass1 chip id */
+	bgt	a5, a4, 2f
+	 ori	 a0, 0x104	/* setup 4 lines of scratch */
+	ori	a6, a5, 8	/* Octeon cn63xx pass2 chip id */
+	bge	a4, a6, 2f
+	 nop
+	li	a6, 4
+	ins	a0, a6, 11, 4	/* Set WBTHRESH=4 as per Core-14752 errata */
+2:
+	dmtc0	a0, COP0_CVMMEMCTL_REG
+
+	/* Set REPUN bit in CVMCTL register */
+	dmfc0	a0, COP0_CVMCTL_REG
+	ori	a0, 1<<14	/* enable fixup of unaligned mem access */
+	dmtc0	a0, COP0_CVMCTL_REG
+
 	jr	ra
 	 nop
 	END(lowlevel_init)
@@ -67,3 +93,53 @@ __dummy:
 	 nop
 
 	END(mips_mach_early_init)
+
+LEAF(nmi_bootvector)
+
+	/*
+	 * From Marvell original bootvector setup
+	 */
+	mfc0	k0, CP0_STATUS
+	/* Enable 64-bit addressing, set ERL (should already be set) */
+	ori	k0, 0x84
+	mtc0	k0, CP0_STATUS
+	/* Core-14345, clear L1 Dcache virtual tags if the core hit an NMI */
+	cache	17, 0($0)
+
+	/*
+	 * Needed for Linux kernel booting, otherwise it hangs while
+	 * zero'ing all of CVMSEG
+	 */
+	dmfc0	a0, COP0_CVMMEMCTL_REG
+	dins	a0, zero, 0, 9
+	ori	a0, 0x104	/* setup 4 lines of scratch */
+	dmtc0	a0, COP0_CVMMEMCTL_REG
+
+	/*
+	 * Load parameters and entry point
+	 */
+	PTR_LA	t9, nmi_handler_para
+	sync
+
+	ld	s0, 0x00(t9)
+	ld	a0, 0x08(t9)
+	ld	a1, 0x10(t9)
+	ld	a2, 0x18(t9)
+	ld	a3, 0x20(t9)
+
+	/* Finally jump to entry point (start kernel etc) */
+	j	s0
+	 nop
+
+	END(nmi_bootvector)
+
+	/*
+	 * Add here some space for the NMI parameters (entry point and args)
+	 */
+	.globl nmi_handler_para
+nmi_handler_para:
+	.dword	0	// entry-point
+	.dword	0	// arg0
+	.dword	0	// arg1
+	.dword	0	// arg2
+	.dword	0	// arg3

From b1fed50a43f7f6d0d50ab855b5f25ca7e6f3a70e Mon Sep 17 00:00:00 2001
From: Aaron Williams <awilliams@marvell.com>
Date: Thu, 20 Aug 2020 07:21:57 +0200
Subject: [PATCH 20/27] mips: octeon: Add header cvmx-regs.h

This header includes common register defines and accessor functions.

Signed-off-by: Aaron Williams <awilliams@marvell.com>
Signed-off-by: Stefan Roese <sr@denx.de>
---
 .../mips/mach-octeon/include/mach/cvmx-regs.h | 144 ++++++++++++++++++
 1 file changed, 144 insertions(+)
 create mode 100644 arch/mips/mach-octeon/include/mach/cvmx-regs.h

diff --git a/arch/mips/mach-octeon/include/mach/cvmx-regs.h b/arch/mips/mach-octeon/include/mach/cvmx-regs.h
new file mode 100644
index 0000000000..b84fc9fd57
--- /dev/null
+++ b/arch/mips/mach-octeon/include/mach/cvmx-regs.h
@@ -0,0 +1,144 @@
+/* SPDX-License-Identifier: GPL-2.0+ */
+/*
+ * Copyright (C) 2020 Stefan Roese <sr@denx.de>
+ */
+
+#ifndef __CVMX_REGS_H__
+#define __CVMX_REGS_H__
+
+#include <linux/bitfield.h>
+#include <linux/bitops.h>
+#include <linux/io.h>
+
+/* General defines */
+#define CVMX_MAX_CORES		48
+/* Maximum # of bits to define core in node */
+#define CVMX_NODE_NO_SHIFT	7
+#define CVMX_NODE_BITS		2	/* Number of bits to define a node */
+#define CVMX_MAX_NODES		(1 << CVMX_NODE_BITS)
+#define CVMX_NODE_MASK		(CVMX_MAX_NODES - 1)
+#define CVMX_NODE_IO_SHIFT	36
+#define CVMX_NODE_MEM_SHIFT	40
+#define CVMX_NODE_IO_MASK	((u64)CVMX_NODE_MASK << CVMX_NODE_IO_SHIFT)
+
+#define CVMX_MIPS_MAX_CORE_BITS	10	/* Maximum # of bits to define cores */
+#define CVMX_MIPS_MAX_CORES	(1 << CVMX_MIPS_MAX_CORE_BITS)
+
+#define MAX_CORE_TADS		8
+
+#define CAST_ULL(v)		((unsigned long long)(v))
+#define CASTPTR(type, v)	((type *)(long)(v))
+
+/* Regs */
+#define CVMX_CIU_PP_RST		0x0001010000000100ULL
+#define CVMX_CIU3_NMI		0x0001010000000160ULL
+#define CVMX_CIU_FUSE		0x00010100000001a0ULL
+#define CVMX_CIU_NMI		0x0001070000000718ULL
+
+#define CVMX_MIO_BOOT_LOC_CFGX(x) (0x0001180000000080ULL + ((x) & 1) * 8)
+#define MIO_BOOT_LOC_CFG_BASE		GENMASK_ULL(27, 3)
+#define MIO_BOOT_LOC_CFG_EN		BIT_ULL(31)
+
+#define CVMX_MIO_BOOT_LOC_ADR	0x0001180000000090ULL
+#define MIO_BOOT_LOC_ADR_ADR		GENMASK_ULL(7, 3)
+
+#define CVMX_MIO_BOOT_LOC_DAT	0x0001180000000098ULL
+
+#define CVMX_MIO_FUS_DAT2	0x0001180000001410ULL
+#define MIO_FUS_DAT2_NOCRYPTO		BIT_ULL(26)
+#define MIO_FUS_DAT2_NOMUL		BIT_ULL(27)
+#define MIO_FUS_DAT2_DORM_CRYPTO	BIT_ULL(34)
+
+#define CVMX_MIO_FUS_RCMD	0x0001180000001500ULL
+#define MIO_FUS_RCMD_ADDR		GENMASK_ULL(7, 0)
+#define MIO_FUS_RCMD_PEND		BIT_ULL(12)
+#define MIO_FUS_RCMD_DAT		GENMASK_ULL(23, 16)
+
+#define CVMX_RNM_CTL_STATUS	0x0001180040000000ULL
+#define RNM_CTL_STATUS_EER_VAL		BIT_ULL(9)
+
+/* turn the variable name into a string */
+#define CVMX_TMP_STR(x)		CVMX_TMP_STR2(x)
+#define CVMX_TMP_STR2(x)	#x
+
+#define CVMX_RDHWRNV(result, regstr)					\
+	asm volatile ("rdhwr %[rt],$" CVMX_TMP_STR(regstr) : [rt] "=d" (result))
+
+#define CVMX_SYNCW					\
+	asm volatile ("syncw\nsyncw\n" : : : "memory")
+
+/* ToDo: Currently only node = 0 supported */
+static inline u64 csr_rd_node(int node, u64 addr)
+{
+	void __iomem *base;
+
+	base = ioremap_nocache(addr, 0x100);
+	return ioread64(base);
+}
+
+static inline u64 csr_rd(u64 addr)
+{
+	return csr_rd_node(0, addr);
+}
+
+static inline void csr_wr_node(int node, u64 addr, u64 val)
+{
+	void __iomem *base;
+
+	base = ioremap_nocache(addr, 0x100);
+	iowrite64(val, base);
+}
+
+static inline void csr_wr(u64 addr, u64 val)
+{
+	csr_wr_node(0, addr, val);
+}
+
+/*
+ * We need to use the volatile access here, otherwise the IO accessor
+ * functions might swap the bytes
+ */
+static inline u64 cvmx_read64_uint64(u64 addr)
+{
+	return *(volatile u64 *)addr;
+}
+
+static inline void cvmx_write64_uint64(u64 addr, u64 val)
+{
+	*(volatile u64 *)addr = val;
+}
+
+static inline u32 cvmx_read64_uint32(u64 addr)
+{
+	return *(volatile u32 *)addr;
+}
+
+static inline void cvmx_write64_uint32(u64 addr, u32 val)
+{
+	*(volatile u32 *)addr = val;
+}
+
+static inline void *cvmx_phys_to_ptr(u64 addr)
+{
+	return (void *)CKSEG0ADDR(addr);
+}
+
+static inline u64 cvmx_ptr_to_phys(void *ptr)
+{
+	return virt_to_phys(ptr);
+}
+
+/**
+ * Number of the Core on which the program is currently running.
+ *
+ * @return core number
+ */
+static inline unsigned int cvmx_get_core_num(void)
+{
+	unsigned int core_num;
+
+	CVMX_RDHWRNV(core_num, 0);
+	return core_num;
+}
+
+#endif /* __CVMX_REGS_H__ */

From 5d7282195adf07be30806603d63567a829b80215 Mon Sep 17 00:00:00 2001
From: Aaron Williams <awilliams@marvell.com>
Date: Thu, 20 Aug 2020 07:21:58 +0200
Subject: [PATCH 21/27] mips: octeon: Add header octeon-feature.h

This header includes the Octeon feature detection used in many Octeon
drivers.

Signed-off-by: Aaron Williams <awilliams@marvell.com>
Signed-off-by: Stefan Roese <sr@denx.de>
---
 .../mach-octeon/include/mach/octeon-feature.h | 442 ++++++++++++++++++
 1 file changed, 442 insertions(+)
 create mode 100644 arch/mips/mach-octeon/include/mach/octeon-feature.h

diff --git a/arch/mips/mach-octeon/include/mach/octeon-feature.h b/arch/mips/mach-octeon/include/mach/octeon-feature.h
new file mode 100644
index 0000000000..1202716ba5
--- /dev/null
+++ b/arch/mips/mach-octeon/include/mach/octeon-feature.h
@@ -0,0 +1,442 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * Copyright (C) 2020 Marvell International Ltd.
+ */
+
+#ifndef __OCTEON_FEATURE_H__
+#define __OCTEON_FEATURE_H__
+
+/*
+ * Octeon models are declared after the macros in octeon-model.h with the
+ * suffix _FEATURE. The individual features are declared with the
+ * _FEATURE_ infix.
+ */
+enum octeon_feature {
+	/*
+	 * Checks on the critical path are moved to the top (8 positions)
+	 * so that the compiler generates one less insn than for the rest
+	 * of the checks.
+	 */
+	OCTEON_FEATURE_PKND, /* CN68XX uses port kinds for packet interface */
+	/* CN68XX has different fields in word0 - word2 */
+	OCTEON_FEATURE_CN68XX_WQE,
+
+	/*
+	 * Features
+	 */
+	/*
+	 * Octeon models in the CN5XXX family and higher support atomic
+	 * add instructions to memory (saa/saad)
+	 */
+	OCTEON_FEATURE_SAAD,
+	/* Does this Octeon support the ZIP offload engine? */
+	OCTEON_FEATURE_ZIP,
+	/* Does this Octeon support crypto acceleration using COP2? */
+	OCTEON_FEATURE_CRYPTO,
+	/* Can crypto be enabled by calling cvmx_crypto_dormant_enable()? */
+	OCTEON_FEATURE_DORM_CRYPTO,
+	OCTEON_FEATURE_PCIE,	/* Does this Octeon support PCI express? */
+	OCTEON_FEATURE_SRIO,	/* Does this Octeon support SRIO */
+	OCTEON_FEATURE_ILK,	/* Does this Octeon support Interlaken */
+	/*
+	 * Some Octeon models support internal memory for storing
+	 * cryptographic keys
+	 */
+	OCTEON_FEATURE_KEY_MEMORY,
+	/* Octeon has a LED controller for banks of external LEDs */
+	OCTEON_FEATURE_LED_CONTROLLER,
+	OCTEON_FEATURE_TRA,	/* Octeon has a trace buffer */
+	OCTEON_FEATURE_MGMT_PORT, /* Octeon has a management port */
+	OCTEON_FEATURE_RAID,	/* Octeon has a raid unit */
+	OCTEON_FEATURE_USB,	/* Octeon has a builtin USB */
+	/* Octeon IPD can run without using work queue entries */
+	OCTEON_FEATURE_NO_WPTR,
+	OCTEON_FEATURE_DFA,	/* Octeon has DFA state machines */
+	/*
+	 * Octeon MDIO block supports clause 45 transactions for
+	 * 10 Gig support
+	 */
+	OCTEON_FEATURE_MDIO_CLAUSE_45,
+	/*
+	 * CN52XX and CN56XX used a block named NPEI for PCIe access.
+	 * Newer chips replaced this with SLI+DPI
+	 */
+	OCTEON_FEATURE_NPEI,
+	OCTEON_FEATURE_HFA,	/* Octeon has DFA/HFA */
+	OCTEON_FEATURE_DFM,	/* Octeon has DFM */
+	OCTEON_FEATURE_CIU2,	/* Octeon has CIU2 */
+	/* Octeon has DMA Instruction Completion Interrupt mode */
+	OCTEON_FEATURE_DICI_MODE,
+	/* Octeon has Bit Select Extractor schedulor */
+	OCTEON_FEATURE_BIT_EXTRACTOR,
+	OCTEON_FEATURE_NAND,	/* Octeon has NAND */
+	OCTEON_FEATURE_MMC,	/* Octeon has built-in MMC support */
+	OCTEON_FEATURE_ROM,	/* Octeon has built-in ROM support */
+	OCTEON_FEATURE_AUTHENTIK, /* Octeon has Authentik ROM support */
+	OCTEON_FEATURE_MULTICAST_TIMER, /* Octeon has multi_cast timer */
+	OCTEON_FEATURE_MULTINODE, /* Octeon has node support */
+	OCTEON_FEATURE_CIU3,	/* Octeon has CIU3 */
+	OCTEON_FEATURE_FPA3,	/* Octeon has FPA first seen on 78XX */
+	/* CN78XX has different fields in word0 - word2 */
+	OCTEON_FEATURE_CN78XX_WQE,
+	OCTEON_FEATURE_PKO3,	/* Octeon has enhanced PKO block */
+	OCTEON_FEATURE_SPI,	/* Octeon supports SPI interfaces */
+	OCTEON_FEATURE_ZIP3,	/* Octeon has zip first seen on 78XX */
+	OCTEON_FEATURE_BCH,	/* Octeon supports BCH ECC */
+	OCTEON_FEATURE_PKI,	/* Octeon has PKI block */
+	OCTEON_FEATURE_OCLA,	/* Octeon has OCLA */
+	OCTEON_FEATURE_FAU,	/* Octeon has FAU */
+	OCTEON_FEATURE_BGX,	/* Octeon has BGX */
+	OCTEON_FEATURE_BGX_MIX,	/* On of the BGX is used for MIX */
+	OCTEON_FEATURE_HNA,	/* Octeon has HNA */
+	OCTEON_FEATURE_BGX_XCV,	/* Octeon has BGX XCV RGMII support */
+	OCTEON_FEATURE_TSO,	/* Octeon has tcp segmentation offload */
+	OCTEON_FEATURE_TDM,	/* Octeon has PCM/TDM support */
+	OCTEON_FEATURE_PTP,	/* Octeon has PTP support */
+	OCTEON_MAX_FEATURE
+};
+
+static inline int octeon_has_feature_OCTEON_FEATURE_SAAD(void)
+{
+	return true;
+}
+
+static inline int octeon_has_feature_OCTEON_FEATURE_ZIP(void)
+{
+	if (OCTEON_IS_MODEL(OCTEON_CNF71XX) ||
+	    OCTEON_IS_MODEL(OCTEON_CN70XX) || OCTEON_IS_MODEL(OCTEON_CNF75XX))
+		return 0;
+	else
+		return !cvmx_fuse_read(121);
+}
+
+static inline int octeon_has_feature_OCTEON_FEATURE_ZIP3(void)
+{
+	return (OCTEON_IS_MODEL(OCTEON_CN78XX) ||
+		OCTEON_IS_MODEL(OCTEON_CN73XX));
+}
+
+static inline int octeon_has_feature_OCTEON_FEATURE_BCH(void)
+{
+	return (OCTEON_IS_MODEL(OCTEON_CN70XX) ||
+		OCTEON_IS_MODEL(OCTEON_CNF75XX) ||
+		OCTEON_IS_MODEL(OCTEON_CN73XX));
+}
+
+static inline int octeon_has_feature_OCTEON_FEATURE_CRYPTO(void)
+{
+	/* OCTEON II and later */
+	u64 val;
+
+	val = csr_rd(CVMX_MIO_FUS_DAT2);
+	if (val & MIO_FUS_DAT2_NOCRYPTO || val & MIO_FUS_DAT2_NOMUL)
+		return 0;
+	else if (!(val & MIO_FUS_DAT2_DORM_CRYPTO))
+		return 1;
+
+	val = csr_rd(CVMX_RNM_CTL_STATUS);
+	return val & RNM_CTL_STATUS_EER_VAL;
+}
+
+static inline int octeon_has_feature_OCTEON_FEATURE_DORM_CRYPTO(void)
+{
+	/* OCTEON II and later */
+	u64 val;
+
+	val = csr_rd(CVMX_MIO_FUS_DAT2);
+	return !(val & MIO_FUS_DAT2_NOCRYPTO) && !(val & MIO_FUS_DAT2_NOMUL) &&
+		(val & MIO_FUS_DAT2_DORM_CRYPTO);
+}
+
+static inline int octeon_has_feature_OCTEON_FEATURE_PCIE(void)
+{
+	/* OCTEON II and later have PCIe */
+	return true;
+}
+
+static inline int octeon_has_feature_OCTEON_FEATURE_SRIO(void)
+{
+	if (OCTEON_IS_MODEL(OCTEON_CNF75XX)) {
+		if (cvmx_fuse_read(1601) == 0)
+			return 0;
+		else
+			return 1;
+	} else {
+		return (OCTEON_IS_MODEL(OCTEON_CN63XX) ||
+			OCTEON_IS_MODEL(OCTEON_CN66XX));
+	}
+}
+
+static inline int octeon_has_feature_OCTEON_FEATURE_ILK(void)
+{
+	return (OCTEON_IS_MODEL(OCTEON_CN68XX) ||
+		OCTEON_IS_MODEL(OCTEON_CN78XX));
+}
+
+static inline int octeon_has_feature_OCTEON_FEATURE_KEY_MEMORY(void)
+{
+	/* OCTEON II or later */
+	return true;
+}
+
+static inline int octeon_has_feature_OCTEON_FEATURE_LED_CONTROLLER(void)
+{
+	return false;
+}
+
+static inline int octeon_has_feature_OCTEON_FEATURE_TRA(void)
+{
+	return !OCTEON_IS_OCTEON3();
+}
+
+static inline int octeon_has_feature_OCTEON_FEATURE_MGMT_PORT(void)
+{
+	/* OCTEON II or later */
+	return true;
+}
+
+static inline int octeon_has_feature_OCTEON_FEATURE_RAID(void)
+{
+	return !OCTEON_IS_MODEL(OCTEON_CNF75XX);
+}
+
+static inline int octeon_has_feature_OCTEON_FEATURE_USB(void)
+{
+	return true;
+}
+
+static inline int octeon_has_feature_OCTEON_FEATURE_NO_WPTR(void)
+{
+	return true;
+}
+
+static inline int octeon_has_feature_OCTEON_FEATURE_DFA(void)
+{
+	return 0;
+}
+
+static inline int octeon_has_feature_OCTEON_FEATURE_HFA(void)
+{
+	if (OCTEON_IS_MODEL(OCTEON_CNF75XX))
+		return 0;
+	else
+		return !cvmx_fuse_read(90);
+}
+
+static inline int octeon_has_feature_OCTEON_FEATURE_HNA(void)
+{
+	if (OCTEON_IS_MODEL(OCTEON_CN78XX) || OCTEON_IS_MODEL(OCTEON_CN73XX))
+		return !cvmx_fuse_read(134);
+	else
+		return 0;
+}
+
+static inline int octeon_has_feature_OCTEON_FEATURE_DFM(void)
+{
+	if (!(OCTEON_IS_MODEL(OCTEON_CN63XX) || OCTEON_IS_MODEL(OCTEON_CN66XX)))
+		return 0;
+	else
+		return !cvmx_fuse_read(90);
+}
+
+static inline int octeon_has_feature_OCTEON_FEATURE_MDIO_CLAUSE_45(void)
+{
+	return true;
+}
+
+static inline int octeon_has_feature_OCTEON_FEATURE_NPEI(void)
+{
+	return false;
+}
+
+static inline int octeon_has_feature_OCTEON_FEATURE_PKND(void)
+{
+	return OCTEON_IS_MODEL(OCTEON_CN68XX) ||
+		OCTEON_IS_MODEL(OCTEON_CNF75XX) ||
+		OCTEON_IS_MODEL(OCTEON_CN73XX) ||
+		OCTEON_IS_MODEL(OCTEON_CN78XX);
+}
+
+static inline int octeon_has_feature_OCTEON_FEATURE_CN68XX_WQE(void)
+{
+	return OCTEON_IS_MODEL(OCTEON_CN68XX);
+}
+
+static inline int octeon_has_feature_OCTEON_FEATURE_CIU2(void)
+{
+	return OCTEON_IS_MODEL(OCTEON_CN68XX);
+}
+
+static inline int octeon_has_feature_OCTEON_FEATURE_CIU3(void)
+{
+	return (OCTEON_IS_MODEL(OCTEON_CN78XX) ||
+		OCTEON_IS_MODEL(OCTEON_CNF75XX) ||
+		OCTEON_IS_MODEL(OCTEON_CN73XX));
+}
+
+static inline int octeon_has_feature_OCTEON_FEATURE_FPA3(void)
+{
+	return (OCTEON_IS_MODEL(OCTEON_CN78XX) ||
+		OCTEON_IS_MODEL(OCTEON_CNF75XX) ||
+		OCTEON_IS_MODEL(OCTEON_CN73XX));
+}
+
+static inline int octeon_has_feature_OCTEON_FEATURE_NAND(void)
+{
+	return (OCTEON_IS_MODEL(OCTEON_CN63XX) ||
+		OCTEON_IS_MODEL(OCTEON_CN66XX) ||
+		OCTEON_IS_MODEL(OCTEON_CN68XX) ||
+		OCTEON_IS_MODEL(OCTEON_CN73XX) ||
+		OCTEON_IS_MODEL(OCTEON_CNF75XX) ||
+		OCTEON_IS_MODEL(OCTEON_CN70XX));
+}
+
+static inline int octeon_has_feature_OCTEON_FEATURE_DICI_MODE(void)
+{
+	return (OCTEON_IS_MODEL(OCTEON_CN68XX_PASS2_X) ||
+		OCTEON_IS_MODEL(OCTEON_CN61XX) ||
+		OCTEON_IS_MODEL(OCTEON_CNF71XX) ||
+		OCTEON_IS_MODEL(OCTEON_CN70XX));
+}
+
+static inline int octeon_has_feature_OCTEON_FEATURE_BIT_EXTRACTOR(void)
+{
+	return (OCTEON_IS_MODEL(OCTEON_CN68XX_PASS2_X) ||
+		OCTEON_IS_MODEL(OCTEON_CN61XX) ||
+		OCTEON_IS_MODEL(OCTEON_CNF71XX) ||
+		OCTEON_IS_MODEL(OCTEON_CN70XX));
+}
+
+static inline int octeon_has_feature_OCTEON_FEATURE_MMC(void)
+{
+	return (OCTEON_IS_MODEL(OCTEON_CN61XX) ||
+		OCTEON_IS_MODEL(OCTEON_CNF71XX) || OCTEON_IS_OCTEON3());
+}
+
+static inline int octeon_has_feature_OCTEON_FEATURE_ROM(void)
+{
+	return OCTEON_IS_MODEL(OCTEON_CN66XX) ||
+		OCTEON_IS_MODEL(OCTEON_CN61XX) ||
+		OCTEON_IS_MODEL(OCTEON_CNF71XX);
+}
+
+static inline int octeon_has_feature_OCTEON_FEATURE_AUTHENTIK(void)
+{
+	if (OCTEON_IS_MODEL(OCTEON_CN66XX) ||
+	    OCTEON_IS_MODEL(OCTEON_CN61XX) ||
+	    OCTEON_IS_MODEL(OCTEON_CNF71XX) ||
+	    OCTEON_IS_MODEL(OCTEON_CN70XX)) {
+		u64 val;
+
+		val = csr_rd(CVMX_MIO_FUS_DAT2);
+		return (val & MIO_FUS_DAT2_NOCRYPTO) &&
+			(val & MIO_FUS_DAT2_DORM_CRYPTO);
+	}
+
+	return 0;
+}
+
+static inline int octeon_has_feature_OCTEON_FEATURE_MULTICAST_TIMER(void)
+{
+	return (OCTEON_IS_MODEL(OCTEON_CN66XX_PASS1_2) ||
+		OCTEON_IS_MODEL(OCTEON_CN61XX) ||
+		OCTEON_IS_MODEL(OCTEON_CNF71XX) ||
+		OCTEON_IS_MODEL(OCTEON_CN70XX));
+}
+
+static inline int octeon_has_feature_OCTEON_FEATURE_MULTINODE(void)
+{
+	return (!OCTEON_IS_MODEL(OCTEON_CN76XX) &&
+		OCTEON_IS_MODEL(OCTEON_CN78XX));
+}
+
+static inline int octeon_has_feature_OCTEON_FEATURE_CN78XX_WQE(void)
+{
+	return (OCTEON_IS_MODEL(OCTEON_CN78XX) ||
+		OCTEON_IS_MODEL(OCTEON_CNF75XX) ||
+		OCTEON_IS_MODEL(OCTEON_CN73XX));
+}
+
+static inline int octeon_has_feature_OCTEON_FEATURE_SPI(void)
+{
+	return (OCTEON_IS_MODEL(OCTEON_CN66XX) ||
+		OCTEON_IS_MODEL(OCTEON_CN61XX) ||
+		OCTEON_IS_MODEL(OCTEON_CNF71XX) || OCTEON_IS_OCTEON3());
+}
+
+static inline int octeon_has_feature_OCTEON_FEATURE_PKI(void)
+{
+	return (OCTEON_IS_MODEL(OCTEON_CN78XX) ||
+		OCTEON_IS_MODEL(OCTEON_CNF75XX) ||
+		OCTEON_IS_MODEL(OCTEON_CN73XX));
+}
+
+static inline int octeon_has_feature_OCTEON_FEATURE_PKO3(void)
+{
+	return (OCTEON_IS_MODEL(OCTEON_CN78XX) ||
+		OCTEON_IS_MODEL(OCTEON_CNF75XX) ||
+		OCTEON_IS_MODEL(OCTEON_CN73XX));
+}
+
+static inline int octeon_has_feature_OCTEON_FEATURE_OCLA(void)
+{
+	return OCTEON_IS_OCTEON3();
+}
+
+static inline int octeon_has_feature_OCTEON_FEATURE_FAU(void)
+{
+	return (!OCTEON_IS_MODEL(OCTEON_CN78XX) &&
+		!OCTEON_IS_MODEL(OCTEON_CNF75XX) &&
+		!OCTEON_IS_MODEL(OCTEON_CN73XX));
+}
+
+static inline int octeon_has_feature_OCTEON_FEATURE_BGX(void)
+{
+	return (OCTEON_IS_MODEL(OCTEON_CN78XX) ||
+		OCTEON_IS_MODEL(OCTEON_CNF75XX) ||
+		OCTEON_IS_MODEL(OCTEON_CN73XX));
+}
+
+static inline int octeon_has_feature_OCTEON_FEATURE_BGX_MIX(void)
+{
+	return (OCTEON_IS_MODEL(OCTEON_CN78XX) ||
+		OCTEON_IS_MODEL(OCTEON_CNF75XX) ||
+		OCTEON_IS_MODEL(OCTEON_CN73XX));
+}
+
+static inline int octeon_has_feature_OCTEON_FEATURE_BGX_XCV(void)
+{
+	return OCTEON_IS_MODEL(OCTEON_CN73XX);
+}
+
+static inline int octeon_has_feature_OCTEON_FEATURE_TSO(void)
+{
+	return (OCTEON_IS_MODEL(OCTEON_CN73XX) ||
+		OCTEON_IS_MODEL(OCTEON_CNF75XX) ||
+		OCTEON_IS_MODEL(OCTEON_CN78XX_PASS2_X));
+}
+
+static inline int octeon_has_feature_OCTEON_FEATURE_TDM(void)
+{
+	return OCTEON_IS_MODEL(OCTEON_CN61XX) ||
+		OCTEON_IS_MODEL(OCTEON_CNF71XX) ||
+		OCTEON_IS_MODEL(OCTEON_CN70XX);
+}
+
+static inline int octeon_has_feature_OCTEON_FEATURE_PTP(void)
+{
+	return OCTEON_IS_MODEL(OCTEON_CN6XXX) ||
+		OCTEON_IS_MODEL(OCTEON_CNF7XXX) ||
+		OCTEON_IS_MODEL(OCTEON_CN73XX) ||
+		OCTEON_IS_MODEL(OCTEON_CNF75XX) ||
+		OCTEON_IS_MODEL(OCTEON_CN78XX_PASS2_X);
+}
+
+/*
+ * Answer ``Is the bit for feature set in the bitmap?''
+ * @param feature
+ * @return 1 when the feature is present and 0 otherwise, -1 in case of error.
+ */
+#define octeon_has_feature(feature_x) octeon_has_feature_##feature_x()
+
+#endif /* __OCTEON_FEATURE_H__ */

From 99b937e56802dbb2bba82035149e23a2a7afeda1 Mon Sep 17 00:00:00 2001
From: Aaron Williams <awilliams@marvell.com>
Date: Thu, 20 Aug 2020 07:21:59 +0200
Subject: [PATCH 22/27] mips: octeon: Add header cvmx-fuse.h

Add header to handle Octeon fuse access.

Signed-off-by: Aaron Williams <awilliams@marvell.com>
Signed-off-by: Stefan Roese <sr@denx.de>
---
 .../mips/mach-octeon/include/mach/cvmx-fuse.h | 71 +++++++++++++++++++
 1 file changed, 71 insertions(+)
 create mode 100644 arch/mips/mach-octeon/include/mach/cvmx-fuse.h

diff --git a/arch/mips/mach-octeon/include/mach/cvmx-fuse.h b/arch/mips/mach-octeon/include/mach/cvmx-fuse.h
new file mode 100644
index 0000000000..a06a1326cb
--- /dev/null
+++ b/arch/mips/mach-octeon/include/mach/cvmx-fuse.h
@@ -0,0 +1,71 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * Copyright (C) 2020 Marvell International Ltd.
+ */
+
+#ifndef __CVMX_FUSE_H__
+#define __CVMX_FUSE_H__
+
+/**
+ * Read a byte of fuse data
+ * @param node		node to read from
+ * @param byte_addr	address to read
+ *
+ * @return fuse value: 0 or 1
+ */
+static inline u8 cvmx_fuse_read_byte_node(u8 node, int byte_addr)
+{
+	u64 val;
+
+	val = FIELD_PREP(MIO_FUS_RCMD_ADDR, byte_addr) | MIO_FUS_RCMD_PEND;
+	csr_wr_node(node, CVMX_MIO_FUS_RCMD, val);
+
+	do {
+		val = csr_rd_node(node, CVMX_MIO_FUS_RCMD);
+	} while (val & MIO_FUS_RCMD_PEND);
+
+	return FIELD_GET(MIO_FUS_RCMD_DAT, val);
+}
+
+/**
+ * Read a byte of fuse data
+ * @param byte_addr   address to read
+ *
+ * @return fuse value: 0 or 1
+ */
+static inline u8 cvmx_fuse_read_byte(int byte_addr)
+{
+	return cvmx_fuse_read_byte_node(0, byte_addr);
+}
+
+/**
+ * Read a single fuse bit
+ *
+ * @param node   Node number
+ * @param fuse   Fuse number (0-1024)
+ *
+ * @return fuse value: 0 or 1
+ */
+static inline int cvmx_fuse_read_node(u8 node, int fuse)
+{
+	return (cvmx_fuse_read_byte_node(node, fuse >> 3) >> (fuse & 0x7)) & 1;
+}
+
+/**
+ * Read a single fuse bit
+ *
+ * @param fuse   Fuse number (0-1024)
+ *
+ * @return fuse value: 0 or 1
+ */
+static inline int cvmx_fuse_read(int fuse)
+{
+	return cvmx_fuse_read_node(0, fuse);
+}
+
+static inline int cvmx_octeon_fuse_locked(void)
+{
+	return cvmx_fuse_read(123);
+}
+
+#endif /* __CVMX_FUSE_H__ */

From afb4828ede04f15b4f5691b8bf1a76226ed83d79 Mon Sep 17 00:00:00 2001
From: Aaron Williams <awilliams@marvell.com>
Date: Thu, 20 Aug 2020 07:22:00 +0200
Subject: [PATCH 23/27] mips: octeon: Add header cvmx-bootinfo.h

Add header to handle bootinfo support, needed for Octeon Linux kernel
booting.

Signed-off-by: Aaron Williams <awilliams@marvell.com>
Signed-off-by: Stefan Roese <sr@denx.de>
---
 .../mach-octeon/include/mach/cvmx-bootinfo.h  | 350 ++++++++++++++++++
 1 file changed, 350 insertions(+)
 create mode 100644 arch/mips/mach-octeon/include/mach/cvmx-bootinfo.h

diff --git a/arch/mips/mach-octeon/include/mach/cvmx-bootinfo.h b/arch/mips/mach-octeon/include/mach/cvmx-bootinfo.h
new file mode 100644
index 0000000000..337987178f
--- /dev/null
+++ b/arch/mips/mach-octeon/include/mach/cvmx-bootinfo.h
@@ -0,0 +1,350 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * Copyright (C) 2020 Marvell International Ltd.
+ */
+
+/*
+ * Header file containing the ABI with the bootloader.
+ */
+
+#ifndef __CVMX_BOOTINFO_H__
+#define __CVMX_BOOTINFO_H__
+
+#include "cvmx-coremask.h"
+
+/*
+ * Current major and minor versions of the CVMX bootinfo block that is
+ * passed from the bootloader to the application.  This is versioned
+ * so that applications can properly handle multiple bootloader
+ * versions.
+ */
+#define CVMX_BOOTINFO_MAJ_VER 1
+#define CVMX_BOOTINFO_MIN_VER 4
+
+#if (CVMX_BOOTINFO_MAJ_VER == 1)
+#define CVMX_BOOTINFO_OCTEON_SERIAL_LEN 20
+/*
+ * This structure is populated by the bootloader.  For binary
+ * compatibility the only changes that should be made are
+ * adding members to the end of the structure, and the minor
+ * version should be incremented at that time.
+ * If an incompatible change is made, the major version
+ * must be incremented, and the minor version should be reset
+ * to 0.
+ */
+struct cvmx_bootinfo {
+	u32 major_version;
+	u32 minor_version;
+
+	u64 stack_top;
+	u64 heap_base;
+	u64 heap_end;
+	u64 desc_vaddr;
+
+	u32 exception_base_addr;
+	u32 stack_size;
+	u32 flags;
+	u32 core_mask;
+	/* DRAM size in megabytes */
+	u32 dram_size;
+	/* physical address of free memory descriptor block*/
+	u32 phy_mem_desc_addr;
+	/* used to pass flags from app to debugger */
+	u32 debugger_flags_base_addr;
+
+	/* CPU clock speed, in hz */
+	u32 eclock_hz;
+
+	/* DRAM clock speed, in hz */
+	u32 dclock_hz;
+
+	u32 reserved0;
+	u16 board_type;
+	u8 board_rev_major;
+	u8 board_rev_minor;
+	u16 reserved1;
+	u8 reserved2;
+	u8 reserved3;
+	char board_serial_number[CVMX_BOOTINFO_OCTEON_SERIAL_LEN];
+	u8 mac_addr_base[6];
+	u8 mac_addr_count;
+#if (CVMX_BOOTINFO_MIN_VER >= 1)
+	/*
+	 * Several boards support compact flash on the Octeon boot
+	 * bus.	 The CF memory spaces may be mapped to different
+	 * addresses on different boards.  These are the physical
+	 * addresses, so care must be taken to use the correct
+	 * XKPHYS/KSEG0 addressing depending on the application's
+	 * ABI.	 These values will be 0 if CF is not present.
+	 */
+	u64 compact_flash_common_base_addr;
+	u64 compact_flash_attribute_base_addr;
+	/*
+	 * Base address of the LED display (as on EBT3000 board)
+	 * This will be 0 if LED display not present.
+	 */
+	u64 led_display_base_addr;
+#endif
+#if (CVMX_BOOTINFO_MIN_VER >= 2)
+	/* DFA reference clock in hz (if applicable)*/
+	u32 dfa_ref_clock_hz;
+
+	/*
+	 * flags indicating various configuration options.  These
+	 * flags supercede the 'flags' variable and should be used
+	 * instead if available.
+	 */
+	u32 config_flags;
+#endif
+#if (CVMX_BOOTINFO_MIN_VER >= 3)
+	/*
+	 * Address of the OF Flattened Device Tree structure
+	 * describing the board.
+	 */
+	u64 fdt_addr;
+#endif
+#if (CVMX_BOOTINFO_MIN_VER >= 4)
+	/*
+	 * Coremask used for processors with more than 32 cores
+	 * or with OCI.  This replaces core_mask.
+	 */
+	struct cvmx_coremask ext_core_mask;
+#endif
+};
+
+#define CVMX_BOOTINFO_CFG_FLAG_PCI_HOST			(1ull << 0)
+#define CVMX_BOOTINFO_CFG_FLAG_PCI_TARGET		(1ull << 1)
+#define CVMX_BOOTINFO_CFG_FLAG_DEBUG			(1ull << 2)
+#define CVMX_BOOTINFO_CFG_FLAG_NO_MAGIC			(1ull << 3)
+/*
+ * This flag is set if the TLB mappings are not contained in the
+ * 0x10000000 - 0x20000000 boot bus region.
+ */
+#define CVMX_BOOTINFO_CFG_FLAG_OVERSIZE_TLB_MAPPING	(1ull << 4)
+#define CVMX_BOOTINFO_CFG_FLAG_BREAK			(1ull << 5)
+
+#endif /*   (CVMX_BOOTINFO_MAJ_VER == 1) */
+
+/* Type defines for board and chip types */
+enum cvmx_board_types_enum {
+	CVMX_BOARD_TYPE_NULL = 0,
+	CVMX_BOARD_TYPE_SIM = 1,
+	CVMX_BOARD_TYPE_EBT3000 = 2,
+	CVMX_BOARD_TYPE_KODAMA = 3,
+	CVMX_BOARD_TYPE_NIAGARA = 4,
+	CVMX_BOARD_TYPE_NAC38 = 5,	/* formerly NAO38 */
+	CVMX_BOARD_TYPE_THUNDER = 6,
+	CVMX_BOARD_TYPE_TRANTOR = 7,
+	CVMX_BOARD_TYPE_EBH3000 = 8,
+	CVMX_BOARD_TYPE_EBH3100 = 9,
+	CVMX_BOARD_TYPE_HIKARI = 10,
+	CVMX_BOARD_TYPE_CN3010_EVB_HS5 = 11,
+	CVMX_BOARD_TYPE_CN3005_EVB_HS5 = 12,
+	CVMX_BOARD_TYPE_KBP = 13,
+	/* Deprecated, CVMX_BOARD_TYPE_CN3010_EVB_HS5 supports the CN3020 */
+	CVMX_BOARD_TYPE_CN3020_EVB_HS5 = 14,
+	CVMX_BOARD_TYPE_EBT5800 = 15,
+	CVMX_BOARD_TYPE_NICPRO2 = 16,
+	CVMX_BOARD_TYPE_EBH5600 = 17,
+	CVMX_BOARD_TYPE_EBH5601 = 18,
+	CVMX_BOARD_TYPE_EBH5200 = 19,
+	CVMX_BOARD_TYPE_BBGW_REF = 20,
+	CVMX_BOARD_TYPE_NIC_XLE_4G = 21,
+	CVMX_BOARD_TYPE_EBT5600 = 22,
+	CVMX_BOARD_TYPE_EBH5201 = 23,
+	CVMX_BOARD_TYPE_EBT5200 = 24,
+	CVMX_BOARD_TYPE_CB5600	= 25,
+	CVMX_BOARD_TYPE_CB5601	= 26,
+	CVMX_BOARD_TYPE_CB5200	= 27,
+	/* Special 'generic' board type, supports many boards */
+	CVMX_BOARD_TYPE_GENERIC = 28,
+	CVMX_BOARD_TYPE_EBH5610 = 29,
+	CVMX_BOARD_TYPE_LANAI2_A = 30,
+	CVMX_BOARD_TYPE_LANAI2_U = 31,
+	CVMX_BOARD_TYPE_EBB5600 = 32,
+	CVMX_BOARD_TYPE_EBB6300 = 33,
+	CVMX_BOARD_TYPE_NIC_XLE_10G = 34,
+	CVMX_BOARD_TYPE_LANAI2_G = 35,
+	CVMX_BOARD_TYPE_EBT5810 = 36,
+	CVMX_BOARD_TYPE_NIC10E = 37,
+	CVMX_BOARD_TYPE_EP6300C = 38,
+	CVMX_BOARD_TYPE_EBB6800 = 39,
+	CVMX_BOARD_TYPE_NIC4E = 40,
+	CVMX_BOARD_TYPE_NIC2E = 41,
+	CVMX_BOARD_TYPE_EBB6600 = 42,
+	CVMX_BOARD_TYPE_REDWING = 43,
+	CVMX_BOARD_TYPE_NIC68_4 = 44,
+	CVMX_BOARD_TYPE_NIC10E_66 = 45,
+	CVMX_BOARD_TYPE_MAX,
+
+	/*
+	 * The range from CVMX_BOARD_TYPE_MAX to
+	 * CVMX_BOARD_TYPE_CUST_DEFINED_MIN is reserved for future
+	 * SDK use.
+	 */
+
+	/*
+	 * Set aside a range for customer boards.  These numbers are managed
+	 * by Cavium.
+	 */
+	CVMX_BOARD_TYPE_CUST_DEFINED_MIN = 10000,
+	CVMX_BOARD_TYPE_CUST_WSX16 = 10001,
+	CVMX_BOARD_TYPE_CUST_NS0216 = 10002,
+	CVMX_BOARD_TYPE_CUST_NB5 = 10003,
+	CVMX_BOARD_TYPE_CUST_WMR500 = 10004,
+	CVMX_BOARD_TYPE_CUST_ITB101 = 10005,
+	CVMX_BOARD_TYPE_CUST_NTE102 = 10006,
+	CVMX_BOARD_TYPE_CUST_AGS103 = 10007,
+	CVMX_BOARD_TYPE_CUST_GST104 = 10008,
+	CVMX_BOARD_TYPE_CUST_GCT105 = 10009,
+	CVMX_BOARD_TYPE_CUST_AGS106 = 10010,
+	CVMX_BOARD_TYPE_CUST_SGM107 = 10011,
+	CVMX_BOARD_TYPE_CUST_GCT108 = 10012,
+	CVMX_BOARD_TYPE_CUST_AGS109 = 10013,
+	CVMX_BOARD_TYPE_CUST_GCT110 = 10014,
+	CVMX_BOARD_TYPE_CUST_L2_AIR_SENDER = 10015,
+	CVMX_BOARD_TYPE_CUST_L2_AIR_RECEIVER = 10016,
+	CVMX_BOARD_TYPE_CUST_L2_ACCTON2_TX = 10017,
+	CVMX_BOARD_TYPE_CUST_L2_ACCTON2_RX = 10018,
+	CVMX_BOARD_TYPE_CUST_L2_WSTRNSNIC_TX = 10019,
+	CVMX_BOARD_TYPE_CUST_L2_WSTRNSNIC_RX = 10020,
+	CVMX_BOARD_TYPE_CUST_L2_ZINWELL = 10021,
+	CVMX_BOARD_TYPE_CUST_DEFINED_MAX = 20000,
+
+	/*
+	 * Set aside a range for customer private use.	The SDK won't
+	 * use any numbers in this range.
+	 */
+	CVMX_BOARD_TYPE_CUST_PRIVATE_MIN = 20001,
+	CVMX_BOARD_TYPE_UBNT_E100 = 20002,
+	CVMX_BOARD_TYPE_CUST_DSR1000N = 20006,
+	CVMX_BOARD_TYPE_KONTRON_S1901 = 21901,
+	CVMX_BOARD_TYPE_CUST_PRIVATE_MAX = 30000,
+
+	/* The remaining range is reserved for future use. */
+};
+
+enum cvmx_chip_types_enum {
+	CVMX_CHIP_TYPE_NULL = 0,
+	CVMX_CHIP_SIM_TYPE_DEPRECATED = 1,
+	CVMX_CHIP_TYPE_OCTEON_SAMPLE = 2,
+	CVMX_CHIP_TYPE_MAX,
+};
+
+/*
+ * Compatibility alias for NAC38 name change, planned to be removed
+ * from SDK 1.7
+ */
+#define CVMX_BOARD_TYPE_NAO38	CVMX_BOARD_TYPE_NAC38
+
+/* Functions to return string based on type */
+#define ENUM_BRD_TYPE_CASE(x)						\
+	case x:								\
+		return(#x + 16)		/* Skip CVMX_BOARD_TYPE_ */
+
+static inline const char *cvmx_board_type_to_string(enum
+						    cvmx_board_types_enum type)
+{
+	switch (type) {
+		ENUM_BRD_TYPE_CASE(CVMX_BOARD_TYPE_NULL);
+		ENUM_BRD_TYPE_CASE(CVMX_BOARD_TYPE_SIM);
+		ENUM_BRD_TYPE_CASE(CVMX_BOARD_TYPE_EBT3000);
+		ENUM_BRD_TYPE_CASE(CVMX_BOARD_TYPE_KODAMA);
+		ENUM_BRD_TYPE_CASE(CVMX_BOARD_TYPE_NIAGARA);
+		ENUM_BRD_TYPE_CASE(CVMX_BOARD_TYPE_NAC38);
+		ENUM_BRD_TYPE_CASE(CVMX_BOARD_TYPE_THUNDER);
+		ENUM_BRD_TYPE_CASE(CVMX_BOARD_TYPE_TRANTOR);
+		ENUM_BRD_TYPE_CASE(CVMX_BOARD_TYPE_EBH3000);
+		ENUM_BRD_TYPE_CASE(CVMX_BOARD_TYPE_EBH3100);
+		ENUM_BRD_TYPE_CASE(CVMX_BOARD_TYPE_HIKARI);
+		ENUM_BRD_TYPE_CASE(CVMX_BOARD_TYPE_CN3010_EVB_HS5);
+		ENUM_BRD_TYPE_CASE(CVMX_BOARD_TYPE_CN3005_EVB_HS5);
+		ENUM_BRD_TYPE_CASE(CVMX_BOARD_TYPE_KBP);
+		ENUM_BRD_TYPE_CASE(CVMX_BOARD_TYPE_CN3020_EVB_HS5);
+		ENUM_BRD_TYPE_CASE(CVMX_BOARD_TYPE_EBT5800);
+		ENUM_BRD_TYPE_CASE(CVMX_BOARD_TYPE_NICPRO2);
+		ENUM_BRD_TYPE_CASE(CVMX_BOARD_TYPE_EBH5600);
+		ENUM_BRD_TYPE_CASE(CVMX_BOARD_TYPE_EBH5601);
+		ENUM_BRD_TYPE_CASE(CVMX_BOARD_TYPE_EBH5200);
+		ENUM_BRD_TYPE_CASE(CVMX_BOARD_TYPE_BBGW_REF);
+		ENUM_BRD_TYPE_CASE(CVMX_BOARD_TYPE_NIC_XLE_4G);
+		ENUM_BRD_TYPE_CASE(CVMX_BOARD_TYPE_EBT5600);
+		ENUM_BRD_TYPE_CASE(CVMX_BOARD_TYPE_EBH5201);
+		ENUM_BRD_TYPE_CASE(CVMX_BOARD_TYPE_EBT5200);
+		ENUM_BRD_TYPE_CASE(CVMX_BOARD_TYPE_CB5600);
+		ENUM_BRD_TYPE_CASE(CVMX_BOARD_TYPE_CB5601);
+		ENUM_BRD_TYPE_CASE(CVMX_BOARD_TYPE_CB5200);
+		ENUM_BRD_TYPE_CASE(CVMX_BOARD_TYPE_GENERIC);
+		ENUM_BRD_TYPE_CASE(CVMX_BOARD_TYPE_EBH5610);
+		ENUM_BRD_TYPE_CASE(CVMX_BOARD_TYPE_LANAI2_A);
+		ENUM_BRD_TYPE_CASE(CVMX_BOARD_TYPE_LANAI2_U);
+		ENUM_BRD_TYPE_CASE(CVMX_BOARD_TYPE_EBB5600);
+		ENUM_BRD_TYPE_CASE(CVMX_BOARD_TYPE_EBB6300);
+		ENUM_BRD_TYPE_CASE(CVMX_BOARD_TYPE_NIC_XLE_10G);
+		ENUM_BRD_TYPE_CASE(CVMX_BOARD_TYPE_LANAI2_G);
+		ENUM_BRD_TYPE_CASE(CVMX_BOARD_TYPE_EBT5810);
+		ENUM_BRD_TYPE_CASE(CVMX_BOARD_TYPE_NIC10E);
+		ENUM_BRD_TYPE_CASE(CVMX_BOARD_TYPE_EP6300C);
+		ENUM_BRD_TYPE_CASE(CVMX_BOARD_TYPE_EBB6800);
+		ENUM_BRD_TYPE_CASE(CVMX_BOARD_TYPE_NIC4E);
+		ENUM_BRD_TYPE_CASE(CVMX_BOARD_TYPE_NIC2E);
+		ENUM_BRD_TYPE_CASE(CVMX_BOARD_TYPE_EBB6600);
+		ENUM_BRD_TYPE_CASE(CVMX_BOARD_TYPE_REDWING);
+		ENUM_BRD_TYPE_CASE(CVMX_BOARD_TYPE_NIC68_4);
+		ENUM_BRD_TYPE_CASE(CVMX_BOARD_TYPE_NIC10E_66);
+		ENUM_BRD_TYPE_CASE(CVMX_BOARD_TYPE_MAX);
+
+		/* Customer boards listed here */
+		ENUM_BRD_TYPE_CASE(CVMX_BOARD_TYPE_CUST_DEFINED_MIN);
+		ENUM_BRD_TYPE_CASE(CVMX_BOARD_TYPE_CUST_WSX16);
+		ENUM_BRD_TYPE_CASE(CVMX_BOARD_TYPE_CUST_NS0216);
+		ENUM_BRD_TYPE_CASE(CVMX_BOARD_TYPE_CUST_NB5);
+		ENUM_BRD_TYPE_CASE(CVMX_BOARD_TYPE_CUST_WMR500);
+		ENUM_BRD_TYPE_CASE(CVMX_BOARD_TYPE_CUST_ITB101);
+		ENUM_BRD_TYPE_CASE(CVMX_BOARD_TYPE_CUST_NTE102);
+		ENUM_BRD_TYPE_CASE(CVMX_BOARD_TYPE_CUST_AGS103);
+		ENUM_BRD_TYPE_CASE(CVMX_BOARD_TYPE_CUST_GST104);
+		ENUM_BRD_TYPE_CASE(CVMX_BOARD_TYPE_CUST_GCT105);
+		ENUM_BRD_TYPE_CASE(CVMX_BOARD_TYPE_CUST_AGS106);
+		ENUM_BRD_TYPE_CASE(CVMX_BOARD_TYPE_CUST_SGM107);
+		ENUM_BRD_TYPE_CASE(CVMX_BOARD_TYPE_CUST_GCT108);
+		ENUM_BRD_TYPE_CASE(CVMX_BOARD_TYPE_CUST_AGS109);
+		ENUM_BRD_TYPE_CASE(CVMX_BOARD_TYPE_CUST_GCT110);
+		ENUM_BRD_TYPE_CASE(CVMX_BOARD_TYPE_CUST_L2_AIR_SENDER);
+		ENUM_BRD_TYPE_CASE(CVMX_BOARD_TYPE_CUST_L2_AIR_RECEIVER);
+		ENUM_BRD_TYPE_CASE(CVMX_BOARD_TYPE_CUST_L2_ACCTON2_TX);
+		ENUM_BRD_TYPE_CASE(CVMX_BOARD_TYPE_CUST_L2_ACCTON2_RX);
+		ENUM_BRD_TYPE_CASE(CVMX_BOARD_TYPE_CUST_L2_WSTRNSNIC_TX);
+		ENUM_BRD_TYPE_CASE(CVMX_BOARD_TYPE_CUST_L2_WSTRNSNIC_RX);
+		ENUM_BRD_TYPE_CASE(CVMX_BOARD_TYPE_CUST_L2_ZINWELL);
+		ENUM_BRD_TYPE_CASE(CVMX_BOARD_TYPE_CUST_DEFINED_MAX);
+
+		/* Customer private range */
+		ENUM_BRD_TYPE_CASE(CVMX_BOARD_TYPE_CUST_PRIVATE_MIN);
+		ENUM_BRD_TYPE_CASE(CVMX_BOARD_TYPE_UBNT_E100);
+		ENUM_BRD_TYPE_CASE(CVMX_BOARD_TYPE_CUST_DSR1000N);
+		ENUM_BRD_TYPE_CASE(CVMX_BOARD_TYPE_KONTRON_S1901);
+		ENUM_BRD_TYPE_CASE(CVMX_BOARD_TYPE_CUST_PRIVATE_MAX);
+	}
+
+	return NULL;
+}
+
+#define ENUM_CHIP_TYPE_CASE(x)						\
+	case x:								\
+		return(#x + 15)		/* Skip CVMX_CHIP_TYPE */
+
+static inline const char *cvmx_chip_type_to_string(enum
+						   cvmx_chip_types_enum type)
+{
+	switch (type) {
+		ENUM_CHIP_TYPE_CASE(CVMX_CHIP_TYPE_NULL);
+		ENUM_CHIP_TYPE_CASE(CVMX_CHIP_SIM_TYPE_DEPRECATED);
+		ENUM_CHIP_TYPE_CASE(CVMX_CHIP_TYPE_OCTEON_SAMPLE);
+		ENUM_CHIP_TYPE_CASE(CVMX_CHIP_TYPE_MAX);
+	}
+
+	return "Unsupported Chip";
+}
+
+#endif /* __CVMX_BOOTINFO_H__ */

From b0ce80588d107f5132243110d70e2bcd95e39d88 Mon Sep 17 00:00:00 2001
From: Aaron Williams <awilliams@marvell.com>
Date: Thu, 20 Aug 2020 07:22:01 +0200
Subject: [PATCH 24/27] mips: octeon: Add coremask support

This patch adds the coremask handling functions.

Signed-off-by: Aaron Williams <awilliams@marvell.com>
Signed-off-by: Stefan Roese <sr@denx.de>
---
 arch/mips/mach-octeon/Makefile                |   1 +
 arch/mips/mach-octeon/cvmx-coremask.c         | 366 +++++++++
 .../mach-octeon/include/mach/cvmx-coremask.h  | 752 ++++++++++++++++++
 3 files changed, 1119 insertions(+)
 create mode 100644 arch/mips/mach-octeon/cvmx-coremask.c
 create mode 100644 arch/mips/mach-octeon/include/mach/cvmx-coremask.h

diff --git a/arch/mips/mach-octeon/Makefile b/arch/mips/mach-octeon/Makefile
index 2e37ca572c..5155f89a1e 100644
--- a/arch/mips/mach-octeon/Makefile
+++ b/arch/mips/mach-octeon/Makefile
@@ -8,3 +8,4 @@ obj-y += cache.o
 obj-y += clock.o
 obj-y += cpu.o
 obj-y += dram.o
+obj-y += cvmx-coremask.o
diff --git a/arch/mips/mach-octeon/cvmx-coremask.c b/arch/mips/mach-octeon/cvmx-coremask.c
new file mode 100644
index 0000000000..cff8c08b97
--- /dev/null
+++ b/arch/mips/mach-octeon/cvmx-coremask.c
@@ -0,0 +1,366 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Copyright (C) 2018-2020 Marvell International Ltd.
+ */
+
+#include <env.h>
+#include <errno.h>
+
+#include <linux/compat.h>
+#include <linux/ctype.h>
+
+#include <mach/cvmx-regs.h>
+#include <mach/cvmx-coremask.h>
+#include <mach/cvmx-fuse.h>
+#include <mach/octeon-model.h>
+#include <mach/octeon-feature.h>
+
+struct cvmx_coremask *get_coremask_override(struct cvmx_coremask *pcm)
+{
+	struct cvmx_coremask pcm_override = CVMX_COREMASK_MAX;
+	char *cptr;
+
+	/* The old code sets the number of cores to be to 16 in this case. */
+	cvmx_coremask_set_cores(pcm, 0, 16);
+
+	if (OCTEON_IS_OCTEON2() || OCTEON_IS_OCTEON3())
+		cvmx_coremask_copy(pcm, &pcm_override);
+
+	cptr = env_get("coremask_override");
+	if (cptr) {
+		if (cvmx_coremask_str2bmp(pcm, cptr) < 0)
+			return NULL;
+	}
+
+	return pcm;
+}
+
+/* Validate the coremask that is passed to a boot* function. */
+int validate_coremask(struct cvmx_coremask *pcm)
+{
+	struct cvmx_coremask coremask_override;
+	struct cvmx_coremask fuse_coremask;
+
+	if (!get_coremask_override(&coremask_override))
+		return -1;
+
+	octeon_get_available_coremask(&fuse_coremask);
+
+	if (!cvmx_coremask_is_subset(&fuse_coremask, pcm)) {
+		puts("ERROR: Can't boot cores that don't exist!\n");
+		puts("Available coremask:\n");
+		cvmx_coremask_print(&fuse_coremask);
+		return -1;
+	}
+
+	if (!cvmx_coremask_is_subset(&coremask_override, pcm)) {
+		struct cvmx_coremask print_cm;
+
+		puts("Notice: coremask changed from:\n");
+		cvmx_coremask_print(pcm);
+		puts("based on coremask_override of:\n");
+		cvmx_coremask_print(&coremask_override);
+		cvmx_coremask_and(&print_cm, pcm, &coremask_override);
+		puts("to:\n");
+		cvmx_coremask_print(&print_cm);
+	}
+
+	return 0;
+}
+
+/**
+ * In CIU_FUSE for the 78XX, odd and even cores are separated out.
+ * For example, a CIU_FUSE value of 0xfffffefffffe indicates that bits 0 and 1
+ * are set.
+ * This function converts the bit number in the CIU_FUSE register to a
+ * physical core number.
+ */
+static int convert_ciu_fuse_to_physical_core(int core, int max_cores)
+{
+	if (!octeon_has_feature(OCTEON_FEATURE_CIU3))
+		return core;
+	else if (!OCTEON_IS_MODEL(OCTEON_CN78XX))
+		return core;
+	else if (core < (max_cores / 2))
+		return core * 2;
+	else
+		return ((core - (max_cores / 2)) * 2) + 1;
+}
+
+/**
+ * Get the total number of fuses blown as well as the number blown per tad.
+ *
+ * @param	coremask	fuse coremask
+ * @param[out]	tad_blown_count	number of cores blown for each tad
+ * @param	num_tads	number of tads
+ * @param	max_cores	maximum number of cores
+ *
+ * @return	void
+ */
+void fill_tad_corecount(u64 coremask, int tad_blown_count[], int num_tads,
+			int max_cores)
+{
+	int core, physical_core;
+
+	for (core = 0; core < max_cores; core++) {
+		if (!(coremask & (1ULL << core))) {
+			int tad;
+
+			physical_core =
+				convert_ciu_fuse_to_physical_core(core,
+								  max_cores);
+			tad = physical_core % num_tads;
+			tad_blown_count[tad]++;
+		}
+	}
+}
+
+u64 get_core_pattern(int num_tads, int max_cores)
+{
+	u64 pattern = 1ULL;
+	int cnt;
+
+	for (cnt = 1; cnt < (max_cores / num_tads); cnt++)
+		pattern |= pattern << num_tads;
+
+	return pattern;
+}
+
+/**
+ * For CN78XX and CN68XX this function returns the logical coremask from the
+ * CIU_FUSE register value. For other models there is no difference.
+ *
+ * @param ciu_fuse_value	fuse value from CIU_FUSE register
+ * @return logical coremask of CIU_FUSE value.
+ */
+u64 get_logical_coremask(u64 ciu_fuse_value)
+{
+	int tad_blown_count[MAX_CORE_TADS] = {0};
+	int tad;
+	u64 logical_coremask = 0;
+	u64 tad_mask, pattern;
+	int num_tads, max_cores;
+
+	if (OCTEON_IS_MODEL(OCTEON_CN78XX)) {
+		num_tads = 8;
+		max_cores = 48;
+	} else if (OCTEON_IS_MODEL(OCTEON_CN73XX) ||
+		   OCTEON_IS_MODEL(OCTEON_CNF75XX)) {
+		num_tads = 4;
+		max_cores = 16;
+	} else if (OCTEON_IS_MODEL(OCTEON_CN68XX)) {
+		num_tads = 4;
+		max_cores = 32;
+	} else {
+		/* Most Octeon devices don't need any mapping. */
+		return ciu_fuse_value;
+	}
+
+	pattern = get_core_pattern(num_tads, max_cores);
+	fill_tad_corecount(ciu_fuse_value, tad_blown_count,
+			   num_tads, max_cores);
+
+	for (tad = 0; tad < num_tads; tad++) {
+		tad_mask = pattern << tad;
+		logical_coremask |= tad_mask >> (tad_blown_count[tad] * num_tads);
+	}
+	return logical_coremask;
+}
+
+/**
+ * Returns the available coremask either from env or fuses.
+ * If the fuses are blown and locked, they are the definitive coremask.
+ *
+ * @param pcm	pointer to coremask to fill in
+ * @return pointer to coremask
+ */
+struct cvmx_coremask *octeon_get_available_coremask(struct cvmx_coremask *pcm)
+{
+	u8 node_mask = 0x01;	/* ToDo: Currently only one node is supported */
+	u64 ciu_fuse;
+	u64 cores;
+
+	cvmx_coremask_clear_all(pcm);
+
+	if (octeon_has_feature(OCTEON_FEATURE_CIU3)) {
+		int node;
+
+		cvmx_coremask_for_each_node(node, node_mask) {
+			ciu_fuse = (csr_rd(CVMX_CIU_FUSE) &
+				    0x0000FFFFFFFFFFFFULL);
+
+			ciu_fuse = get_logical_coremask(ciu_fuse);
+			cvmx_coremask_set64_node(pcm, node, ciu_fuse);
+		}
+
+		return pcm;
+	}
+
+	ciu_fuse = (csr_rd(CVMX_CIU_FUSE) & 0x0000FFFFFFFFFFFFULL);
+	ciu_fuse = get_logical_coremask(ciu_fuse);
+
+	if (OCTEON_IS_MODEL(OCTEON_CN68XX))
+		cvmx_coremask_set64(pcm, ciu_fuse);
+
+	/* Get number of cores from fuse register, convert to coremask */
+	cores = __builtin_popcountll(ciu_fuse);
+
+	cvmx_coremask_set_cores(pcm, 0, cores);
+
+	return pcm;
+}
+
+int cvmx_coremask_str2bmp(struct cvmx_coremask *pcm, char *hexstr)
+{
+	int i, j;
+	int l;		/* length of the hexstr in characters */
+	int lb;		/* number of bits taken by hexstr */
+	int hldr_offset;/* holder's offset within the coremask */
+	int hldr_xsz;	/* holder's size in the number of hex digits */
+	u64 h;
+	char c;
+
+#define MINUS_ONE (hexstr[0] == '-' && hexstr[1] == '1' && hexstr[2] == 0)
+	if (MINUS_ONE) {
+		cvmx_coremask_set_all(pcm);
+		return 0;
+	}
+
+	/* Skip '0x' from hexstr */
+	if (hexstr[0] == '0' && (hexstr[1] == 'x' || hexstr[1] == 'X'))
+		hexstr += 2;
+
+	if (!strlen(hexstr)) {
+		printf("%s: Error: hex string is empty\n", __func__);
+		return -2;
+	}
+
+	/* Trim leading zeros */
+	while (*hexstr == '0')
+		hexstr++;
+
+	cvmx_coremask_clear_all(pcm);
+	l = strlen(hexstr);
+
+	/* If length is 0 then the hex string must be all zeros */
+	if (l == 0)
+		return 0;
+
+	for (i = 0; i < l; i++) {
+		if (isxdigit((int)hexstr[i]) == 0) {
+			printf("%s: Non-hex digit within hexstr\n", __func__);
+			return -2;
+		}
+	}
+
+	lb = (l - 1) * 4;
+	if (hexstr[0] > '7')
+		lb += 4;
+	else if (hexstr[0] > '3')
+		lb += 3;
+	else if (hexstr[0] > '1')
+		lb += 2;
+	else
+		lb += 1;
+	if (lb > CVMX_MIPS_MAX_CORES) {
+		printf("%s: hexstr (%s) is too long\n", __func__, hexstr);
+		return -1;
+	}
+
+	hldr_offset = 0;
+	hldr_xsz = 2 * sizeof(u64);
+	for (i = l; i > 0; i -= hldr_xsz) {
+		c = hexstr[i];
+		hexstr[i] = 0;
+		j = i - hldr_xsz;
+		if (j < 0)
+			j = 0;
+		h = simple_strtoull(&hexstr[j], NULL, 16);
+		if (errno == EINVAL) {
+			printf("%s: strtou returns w/ EINVAL\n", __func__);
+			return -2;
+		}
+		pcm->coremask_bitmap[hldr_offset] = h;
+		hexstr[i] = c;
+		hldr_offset++;
+	}
+
+	return 0;
+}
+
+void cvmx_coremask_print(const struct cvmx_coremask *pcm)
+{
+	int i, j;
+	int start;
+	int found = 0;
+
+	/*
+	 * Print one node per line. Since the bitmap is stored LSB to MSB
+	 * we reverse the order when printing.
+	 */
+	if (!octeon_has_feature(OCTEON_FEATURE_MULTINODE)) {
+		start = 0;
+		for (j = CVMX_COREMASK_MAX_CORES_PER_NODE -
+			     CVMX_COREMASK_HLDRSZ;
+		     j >= 0; j -= CVMX_COREMASK_HLDRSZ) {
+			if (pcm->coremask_bitmap[j / CVMX_COREMASK_HLDRSZ] != 0)
+				start = 1;
+			if (start) {
+				printf(" 0x%llx",
+				       (u64)pcm->coremask_bitmap[j /
+						CVMX_COREMASK_HLDRSZ]);
+			}
+		}
+
+		if (start)
+			found = 1;
+
+		/*
+		 * If the coremask is empty print <EMPTY> so it is not
+		 * confusing
+		 */
+		if (!found)
+			printf("<EMPTY>");
+		printf("\n");
+
+		return;
+	}
+
+	for (i = 0; i < CVMX_MAX_USED_CORES_BMP;
+	     i += CVMX_COREMASK_MAX_CORES_PER_NODE) {
+		printf("%s  node %d:", i > 0 ? "\n" : "",
+		       cvmx_coremask_core_to_node(i));
+		start = 0;
+
+		for (j = i + CVMX_COREMASK_MAX_CORES_PER_NODE -
+			     CVMX_COREMASK_HLDRSZ;
+		     j >= i;
+		     j -= CVMX_COREMASK_HLDRSZ) {
+			/* Don't start printing until we get a non-zero word. */
+			if (pcm->coremask_bitmap[j / CVMX_COREMASK_HLDRSZ] != 0)
+				start = 1;
+
+			if (start) {
+				printf(" 0x%llx", (u64)pcm->coremask_bitmap[j /
+							CVMX_COREMASK_HLDRSZ]);
+			}
+		}
+
+		if (start)
+			found = 1;
+	}
+
+	i /= CVMX_COREMASK_HLDRSZ;
+	for (; i < CVMX_COREMASK_BMPSZ; i++) {
+		if (pcm->coremask_bitmap[i]) {
+			printf("  EXTRA GARBAGE[%i]: %016llx\n", i,
+			       (u64)pcm->coremask_bitmap[i]);
+		}
+	}
+
+	/* If the coremask is empty print <EMPTY> so it is not confusing */
+	if (!found)
+		printf("<EMPTY>");
+
+	printf("\n");
+}
diff --git a/arch/mips/mach-octeon/include/mach/cvmx-coremask.h b/arch/mips/mach-octeon/include/mach/cvmx-coremask.h
new file mode 100644
index 0000000000..c34ff46d3a
--- /dev/null
+++ b/arch/mips/mach-octeon/include/mach/cvmx-coremask.h
@@ -0,0 +1,752 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * Copyright (C) 2020 Marvell International Ltd.
+ */
+
+/**
+ * Module to support operations on bitmap of cores. Coremask can be used to
+ * select a specific core, a group of cores, or all available cores, for
+ * initialization and differentiation of roles within a single shared binary
+ * executable image.
+ *
+ * The core numbers used in this file are the same value as what is found in
+ * the COP0_EBASE register and the rdhwr 0 instruction.
+ *
+ * For the CN78XX and other multi-node environments the core numbers are not
+ * contiguous.  The core numbers for the CN78XX are as follows:
+ *
+ * Node 0:	Cores 0 - 47
+ * Node 1:	Cores 128 - 175
+ * Node 2:	Cores 256 - 303
+ * Node 3:	Cores 384 - 431
+ *
+ * The coremask environment generally tries to be node agnostic in order to
+ * provide future compatibility if more cores are added to future processors
+ * or more nodes are supported.
+ */
+
+#ifndef __CVMX_COREMASK_H__
+#define __CVMX_COREMASK_H__
+
+#include "cvmx-regs.h"
+
+/* bits per holder */
+#define CVMX_COREMASK_HLDRSZ	((int)(sizeof(u64) * 8))
+
+/** Maximum allowed cores per node */
+#define CVMX_COREMASK_MAX_CORES_PER_NODE	(1 << CVMX_NODE_NO_SHIFT)
+
+/** Maximum number of bits actually used in the coremask */
+#define CVMX_MAX_USED_CORES_BMP	(1 << (CVMX_NODE_NO_SHIFT + CVMX_NODE_BITS))
+
+/* the number of valid bits in and the mask of the most significant holder */
+#define CVMX_COREMASK_MSHLDR_NBITS			\
+	(CVMX_MIPS_MAX_CORES % CVMX_COREMASK_HLDRSZ)
+
+#define CVMX_COREMASK_MSHLDR_MASK				\
+	((CVMX_COREMASK_MSHLDR_NBITS) ?				\
+	 (((u64)1 << CVMX_COREMASK_MSHLDR_NBITS) - 1) :		\
+	 ((u64)-1))
+
+/* cvmx_coremask size in u64 */
+#define CVMX_COREMASK_BMPSZ					\
+	((int)(CVMX_MIPS_MAX_CORES / CVMX_COREMASK_HLDRSZ +	\
+	       (CVMX_COREMASK_MSHLDR_NBITS != 0)))
+
+#define CVMX_COREMASK_USED_BMPSZ				\
+	(CVMX_MAX_USED_CORES_BMP / CVMX_COREMASK_HLDRSZ)
+
+#define CVMX_COREMASK_BMP_NODE_CORE_IDX(node, core)			\
+	((((node) << CVMX_NODE_NO_SHIFT) + (core)) / CVMX_COREMASK_HLDRSZ)
+/**
+ * Maximum available coremask.
+ */
+#define CVMX_COREMASK_MAX				\
+	{ {						\
+			0x0000FFFFFFFFFFFF, 0,		\
+				0x0000FFFFFFFFFFFF, 0,	\
+				0x0000FFFFFFFFFFFF, 0,	\
+				0x0000FFFFFFFFFFFF, 0,	\
+				0, 0,			\
+				0, 0,			\
+				0, 0,			\
+				0, 0} }
+
+/**
+ * Empty coremask
+ */
+#define CVMX_COREMASK_EMPTY					\
+	{ { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0} }
+
+struct cvmx_coremask {
+	u64 coremask_bitmap[CVMX_COREMASK_BMPSZ];
+};
+
+/**
+ * Macro to iterate through all available cores in a coremask
+ *
+ * @param core - core variable to use to iterate
+ * @param pcm - pointer to core mask
+ *
+ * Use this like a for statement
+ */
+#define cvmx_coremask_for_each_core(core, pcm)			\
+	for ((core) = -1;					\
+	     (core) = cvmx_coremask_next_core((core), pcm),	\
+		     (core) >= 0;)
+
+/**
+ * Given a node and node mask, return the next available node.
+ *
+ * @param node		starting node number
+ * @param node_mask	node mask to use to find the next node
+ *
+ * @return next node number or -1 if no more nodes are available
+ */
+static inline int cvmx_coremask_next_node(int node, u8 node_mask)
+{
+	int next_offset;
+
+	next_offset = __builtin_ffs(node_mask >> (node + 1));
+	if (next_offset == 0)
+		return -1;
+	else
+		return node + next_offset;
+}
+
+/**
+ * Iterate through all nodes in a node mask
+ *
+ * @param node		node iterator variable
+ * @param node_mask	mask to use for iterating
+ *
+ * Use this like a for statement
+ */
+#define cvmx_coremask_for_each_node(node, node_mask)		\
+	for ((node) = __builtin_ffs(node_mask) - 1;		\
+	     (node) >= 0 && (node) < CVMX_MAX_NODES;		\
+	     (node) = cvmx_coremask_next_node(node, node_mask))
+
+/**
+ * Is ``core'' set in the coremask?
+ *
+ * @param pcm is the pointer to the coremask.
+ * @param core
+ * @return 1 if core is set and 0 if not.
+ */
+static inline int cvmx_coremask_is_core_set(const struct cvmx_coremask *pcm,
+					    int core)
+{
+	int n, i;
+
+	n = core % CVMX_COREMASK_HLDRSZ;
+	i = core / CVMX_COREMASK_HLDRSZ;
+
+	return (pcm->coremask_bitmap[i] & ((u64)1 << n)) != 0;
+}
+
+/**
+ * Is ``current core'' set in the coremask?
+ *
+ * @param pcm is the pointer to the coremask.
+ * @return 1 if core is set and 0 if not.
+ */
+static inline int cvmx_coremask_is_self_set(const struct cvmx_coremask *pcm)
+{
+	return cvmx_coremask_is_core_set(pcm, (int)cvmx_get_core_num());
+}
+
+/**
+ * Is coremask empty?
+ * @param pcm is the pointer to the coremask.
+ * @return 1 if *pcm is empty (all zeros), 0 if not empty.
+ */
+static inline int cvmx_coremask_is_empty(const struct cvmx_coremask *pcm)
+{
+	int i;
+
+	for (i = 0; i < CVMX_COREMASK_USED_BMPSZ; i++)
+		if (pcm->coremask_bitmap[i] != 0)
+			return 0;
+
+	return 1;
+}
+
+/**
+ * Set ``core'' in the coremask.
+ *
+ * @param pcm is the pointer to the coremask.
+ * @param core
+ * @return 0.
+ */
+static inline int cvmx_coremask_set_core(struct cvmx_coremask *pcm, int core)
+{
+	int n, i;
+
+	n = core % CVMX_COREMASK_HLDRSZ;
+	i = core / CVMX_COREMASK_HLDRSZ;
+	pcm->coremask_bitmap[i] |= ((u64)1 << n);
+
+	return 0;
+}
+
+/**
+ * Set ``current core'' in the coremask.
+ *
+ * @param pcm is the pointer to the coremask.
+ * @return 0.
+ */
+static inline int cvmx_coremask_set_self(struct cvmx_coremask *pcm)
+{
+	return cvmx_coremask_set_core(pcm, (int)cvmx_get_core_num());
+}
+
+/**
+ * Clear ``core'' from the coremask.
+ *
+ * @param pcm is the pointer to the coremask.
+ * @param core
+ * @return 0.
+ */
+static inline int cvmx_coremask_clear_core(struct cvmx_coremask *pcm, int core)
+{
+	int n, i;
+
+	n = core % CVMX_COREMASK_HLDRSZ;
+	i = core / CVMX_COREMASK_HLDRSZ;
+	pcm->coremask_bitmap[i] &= ~((u64)1 << n);
+
+	return 0;
+}
+
+/**
+ * Clear ``current core'' from the coremask.
+ *
+ * @param pcm is the pointer to the coremask.
+ * @return 0.
+ */
+static inline int cvmx_coremask_clear_self(struct cvmx_coremask *pcm)
+{
+	return cvmx_coremask_clear_core(pcm, cvmx_get_core_num());
+}
+
+/**
+ * Toggle ``core'' in the coremask.
+ *
+ * @param pcm is the pointer to the coremask.
+ * @param core
+ * @return 0.
+ */
+static inline int cvmx_coremask_toggle_core(struct cvmx_coremask *pcm, int core)
+{
+	int n, i;
+
+	n = core % CVMX_COREMASK_HLDRSZ;
+	i = core / CVMX_COREMASK_HLDRSZ;
+	pcm->coremask_bitmap[i] ^= ((u64)1 << n);
+
+	return 0;
+}
+
+/**
+ * Toggle ``current core'' in the coremask.
+ *
+ * @param pcm is the pointer to the coremask.
+ * @return 0.
+ */
+static inline int cvmx_coremask_toggle_self(struct cvmx_coremask *pcm)
+{
+	return cvmx_coremask_toggle_core(pcm, cvmx_get_core_num());
+}
+
+/**
+ * Set the lower 64-bit of the coremask.
+ * @param pcm	pointer to coremask
+ * @param coremask_64	64-bit coremask to apply to the first node (0)
+ */
+static inline void cvmx_coremask_set64(struct cvmx_coremask *pcm,
+				       u64 coremask_64)
+{
+	pcm->coremask_bitmap[0] = coremask_64;
+}
+
+/**
+ * Set the 64-bit of the coremask for a particular node.
+ * @param pcm	pointer to coremask
+ * @param node	node to set
+ * @param coremask_64	64-bit coremask to apply to the specified node
+ */
+static inline void cvmx_coremask_set64_node(struct cvmx_coremask *pcm,
+					    u8 node,
+					    u64 coremask_64)
+{
+	pcm->coremask_bitmap[CVMX_COREMASK_BMP_NODE_CORE_IDX(node, 0)] =
+		coremask_64;
+}
+
+/**
+ * Gets the lower 64-bits of the coremask
+ *
+ * @param[in] pcm - pointer to coremask
+ * @return 64-bit coremask for the first node
+ */
+static inline u64 cvmx_coremask_get64(const struct cvmx_coremask *pcm)
+{
+	return pcm->coremask_bitmap[0];
+}
+
+/**
+ * Gets the lower 64-bits of the coremask for the specified node
+ *
+ * @param[in] pcm - pointer to coremask
+ * @param node - node to get coremask for
+ * @return 64-bit coremask for the first node
+ */
+static inline u64 cvmx_coremask_get64_node(const struct cvmx_coremask *pcm,
+					   u8 node)
+{
+	return pcm->coremask_bitmap[CVMX_COREMASK_BMP_NODE_CORE_IDX(node, 0)];
+}
+
+/**
+ * Gets the lower 32-bits of the coremask for compatibility
+ *
+ * @param[in] pcm - pointer to coremask
+ * @return 32-bit coremask for the first node
+ * @deprecated This function is to maintain compatibility with older
+ *             SDK applications and may disappear at some point.
+ * This function is not compatible with the CN78XX or any other
+ * Octeon device with more than 32 cores.
+ */
+static inline u32 cvmx_coremask_get32(const struct cvmx_coremask *pcm)
+{
+	return pcm->coremask_bitmap[0] & 0xffffffff;
+}
+
+/*
+ * cvmx_coremask_cmp() returns an integer less than, equal to, or
+ * greater than zero if *pcm1 is found, respectively, to be less than,
+ * to match, or be greater than *pcm2.
+ */
+static inline int cvmx_coremask_cmp(const struct cvmx_coremask *pcm1,
+				    const struct cvmx_coremask *pcm2)
+{
+	int i;
+
+	/* Start from highest node for arithemtically correct result */
+	for (i = CVMX_COREMASK_USED_BMPSZ - 1; i >= 0; i--)
+		if (pcm1->coremask_bitmap[i] != pcm2->coremask_bitmap[i]) {
+			return (pcm1->coremask_bitmap[i] >
+				pcm2->coremask_bitmap[i]) ? 1 : -1;
+		}
+
+	return 0;
+}
+
+/*
+ * cvmx_coremask_OPx(pcm1, pcm2[, pcm3]), where OPx can be
+ * - and
+ * - or
+ * - xor
+ * - not
+ * ...
+ * For binary operators, pcm3 <-- pcm1 OPX pcm2.
+ * For unaries, pcm2 <-- OPx pcm1.
+ */
+#define CVMX_COREMASK_BINARY_DEFUN(binary_op, op)		\
+	static inline int cvmx_coremask_##binary_op(		\
+		struct cvmx_coremask *pcm1,				\
+		const struct cvmx_coremask *pcm2,			\
+		const struct cvmx_coremask *pcm3)			\
+	{							\
+		int i;						\
+								\
+		for (i = 0; i < CVMX_COREMASK_USED_BMPSZ; i++)	\
+			pcm1->coremask_bitmap[i] =		\
+				pcm2->coremask_bitmap[i]	\
+				op				\
+				pcm3->coremask_bitmap[i];	\
+								\
+		return 0;					\
+	}
+
+#define CVMX_COREMASK_UNARY_DEFUN(unary_op, op)			\
+	static inline int cvmx_coremask_##unary_op(		\
+		struct cvmx_coremask *pcm1,				\
+		const struct cvmx_coremask *pcm2)			\
+	{							\
+		int i;						\
+								\
+		for (i = 0; i < CVMX_COREMASK_USED_BMPSZ; i++)	\
+			pcm1->coremask_bitmap[i] =		\
+				op				\
+				pcm2->coremask_bitmap[i];	\
+								\
+		return 0;					\
+	}
+
+/* cvmx_coremask_and(pcm1, pcm2, pcm3): pcm1 = pmc2 & pmc3 */
+CVMX_COREMASK_BINARY_DEFUN(and, &)
+/* cvmx_coremask_or(pcm1, pcm2, pcm3): pcm1 = pmc2 | pmc3  */
+CVMX_COREMASK_BINARY_DEFUN(or, |)
+/* cvmx_coremask_xor(pcm1, pcm2, pcm3): pcm1 = pmc2 ^ pmc3 */
+CVMX_COREMASK_BINARY_DEFUN(xor, ^)
+/* cvmx_coremask_maskoff(pcm1, pcm2, pcm3): pcm1 = pmc2 & ~pmc3 */
+CVMX_COREMASK_BINARY_DEFUN(maskoff, & ~)
+/* cvmx_coremask_not(pcm1, pcm2): pcm1 = ~pcm2       */
+CVMX_COREMASK_UNARY_DEFUN(not, ~)
+/* cvmx_coremask_fill(pcm1, pcm2): pcm1 = -1      */
+CVMX_COREMASK_UNARY_DEFUN(fill, -1 |)
+/* cvmx_coremask_clear(pcm1, pcm2): pcm1 = 0     */
+CVMX_COREMASK_UNARY_DEFUN(clear, 0 &)
+/* cvmx_coremask_dup(pcm1, pcm2): pcm1 = pcm2       */
+CVMX_COREMASK_UNARY_DEFUN(dup, +)
+
+/*
+ * Macros using the unary functions defined w/
+ * CVMX_COREMASK_UNARY_DEFUN
+ * - set *pcm to its complement
+ * - set all bits in *pcm to 0
+ * - set all (valid) bits in *pcm to 1
+ */
+#define cvmx_coremask_complement(pcm)	cvmx_coremask_not(pcm, pcm)
+/* On clear, even clear the unused bits */
+#define cvmx_coremask_clear_all(pcm)					\
+	*(pcm) = (struct cvmx_coremask)CVMX_COREMASK_EMPTY
+#define cvmx_coremask_set_all(pcm)	cvmx_coremask_fill(pcm, NULL)
+
+/*
+ * convert a string of hex digits to struct cvmx_coremask
+ *
+ * @param pcm
+ * @param hexstr can be
+ *	- "[1-9A-Fa-f][0-9A-Fa-f]*", or
+ *	- "-1" to set the bits for all the cores.
+ * return
+ *	 0 for success,
+ *	-1 for string too long (i.e., hexstr takes more bits than
+ *	   CVMX_MIPS_MAX_CORES),
+ *	-2 for conversion problems from hex string to an unsigned
+ *	   long long, e.g., non-hex char in hexstr, and
+ *	-3 for hexstr starting with '0'.
+ * NOTE:
+ *	This function clears the bitmask in *pcm before the conversion.
+ */
+int cvmx_coremask_str2bmp(struct cvmx_coremask *pcm, char *hexstr);
+
+/*
+ * convert a struct cvmx_coremask to a string of hex digits
+ *
+ * @param pcm
+ * @param hexstr is "[1-9A-Fa-f][0-9A-Fa-f]*"
+ *
+ * return 0.
+ */
+int cvmx_coremask_bmp2str(const struct cvmx_coremask *pcm, char *hexstr);
+
+/*
+ * Returns the index of the lowest bit in a coremask holder.
+ */
+static inline int cvmx_coremask_lowest_bit(u64 h)
+{
+	return __builtin_ctzll(h);
+}
+
+/*
+ * Returns the 0-based index of the highest bit in a coremask holder.
+ */
+static inline int cvmx_coremask_highest_bit(u64 h)
+{
+	return (64 - __builtin_clzll(h) - 1);
+}
+
+/**
+ * Returns the last core within the coremask and -1 when the coremask
+ * is empty.
+ *
+ * @param[in] pcm - pointer to coremask
+ * @returns last core set in the coremask or -1 if all clear
+ *
+ */
+static inline int cvmx_coremask_get_last_core(const struct cvmx_coremask *pcm)
+{
+	int i;
+	int found = -1;
+
+	for (i = 0; i < CVMX_COREMASK_USED_BMPSZ; i++) {
+		if (pcm->coremask_bitmap[i])
+			found = i;
+	}
+
+	if (found == -1)
+		return -1;
+
+	return found * CVMX_COREMASK_HLDRSZ +
+		cvmx_coremask_highest_bit(pcm->coremask_bitmap[found]);
+}
+
+/**
+ * Returns the first core within the coremask and -1 when the coremask
+ * is empty.
+ *
+ * @param[in] pcm - pointer to coremask
+ * @returns first core set in the coremask or -1 if all clear
+ *
+ */
+static inline int cvmx_coremask_get_first_core(const struct cvmx_coremask *pcm)
+{
+	int i;
+
+	for (i = 0; i < CVMX_COREMASK_USED_BMPSZ; i++)
+		if (pcm->coremask_bitmap[i])
+			break;
+
+	if (i == CVMX_COREMASK_USED_BMPSZ)
+		return -1;
+
+	return i * CVMX_COREMASK_HLDRSZ +
+		cvmx_coremask_lowest_bit(pcm->coremask_bitmap[i]);
+}
+
+/**
+ * Given a core and coremask, return the next available core in the coremask
+ * or -1 if none are available.
+ *
+ * @param core - starting core to check (can be -1 for core 0)
+ * @param pcm - pointer to coremask to check for the next core.
+ *
+ * @return next core following the core parameter or -1 if no more cores.
+ */
+static inline int cvmx_coremask_next_core(int core,
+					  const struct cvmx_coremask *pcm)
+{
+	int n, i;
+
+	core++;
+	n = core % CVMX_COREMASK_HLDRSZ;
+	i = core / CVMX_COREMASK_HLDRSZ;
+
+	if (pcm->coremask_bitmap[i] != 0) {
+		for (; n < CVMX_COREMASK_HLDRSZ; n++)
+			if (pcm->coremask_bitmap[i] & (1ULL << n))
+				return ((i * CVMX_COREMASK_HLDRSZ) + n);
+	}
+
+	for (i = i + 1; i < CVMX_COREMASK_USED_BMPSZ; i++) {
+		if (pcm->coremask_bitmap[i] != 0)
+			return (i * CVMX_COREMASK_HLDRSZ) +
+				cvmx_coremask_lowest_bit(pcm->coremask_bitmap[i]);
+	}
+	return -1;
+}
+
+/**
+ * Compute coremask for count cores starting with start_core.
+ * Note that the coremask for multi-node processors may have
+ * gaps.
+ *
+ * @param[out]  pcm        pointer to core mask data structure
+ * @param	start_core starting code number
+ * @param       count      number of cores
+ *
+ */
+static inline void cvmx_coremask_set_cores(struct cvmx_coremask *pcm,
+					   unsigned int start_core,
+					   unsigned int count)
+{
+	int node;
+	int core;	/** Current core in node */
+	int cores_in_node;
+	int i;
+
+	assert(CVMX_MAX_CORES < CVMX_COREMASK_HLDRSZ);
+	node = start_core >> CVMX_NODE_NO_SHIFT;
+	core = start_core & ((1 << CVMX_NODE_NO_SHIFT) - 1);
+	assert(core < CVMX_MAX_CORES);
+
+	cvmx_coremask_clear_all(pcm);
+	while (count > 0) {
+		if (count + core > CVMX_MAX_CORES)
+			cores_in_node = CVMX_MAX_CORES - core;
+		else
+			cores_in_node = count;
+
+		i = CVMX_COREMASK_BMP_NODE_CORE_IDX(node, core);
+		pcm->coremask_bitmap[i] = ((1ULL << cores_in_node) - 1) << core;
+		count -= cores_in_node;
+		core = 0;
+		node++;
+	}
+}
+
+/**
+ * Makes a copy of a coremask
+ *
+ * @param[out] dest - pointer to destination coremask
+ * @param[in]  src  - pointer to source coremask
+ */
+static inline void cvmx_coremask_copy(struct cvmx_coremask *dest,
+				      const struct cvmx_coremask *src)
+{
+	memcpy(dest, src, sizeof(*dest));
+}
+
+/**
+ * Test to see if the specified core is first core in coremask.
+ *
+ * @param[in]  pcm  pointer to the coremask to test against
+ * @param[in]  core core to check
+ *
+ * @return  1 if the core is first core in the coremask, 0 otherwise
+ *
+ */
+static inline int cvmx_coremask_is_core_first_core(const struct cvmx_coremask *pcm,
+						   unsigned int core)
+{
+	int n, i;
+
+	n = core / CVMX_COREMASK_HLDRSZ;
+
+	for (i = 0; i < n; i++)
+		if (pcm->coremask_bitmap[i] != 0)
+			return 0;
+
+	/* From now on we only care about the core number within an entry */
+	core &= (CVMX_COREMASK_HLDRSZ - 1);
+	if (__builtin_ffsll(pcm->coremask_bitmap[n]) < (core + 1))
+		return 0;
+
+	return (__builtin_ffsll(pcm->coremask_bitmap[n]) == core + 1);
+}
+
+/*
+ * NOTE:
+ * cvmx_coremask_is_first_core() was retired due to improper usage.
+ * For inquiring about the current core being the initializing
+ * core for an application, use cvmx_is_init_core().
+ * For simply inquring if the current core is numerically
+ * lowest in a given mask, use :
+ *	cvmx_coremask_is_core_first_core( pcm, dvmx_get_core_num())
+ */
+
+/**
+ * Returns the number of 1 bits set in a coremask
+ *
+ * @param[in] pcm - pointer to core mask
+ *
+ * @return number of bits set in the coremask
+ */
+static inline int cvmx_coremask_get_core_count(const struct cvmx_coremask *pcm)
+{
+	int i;
+	int count = 0;
+
+	for (i = 0; i < CVMX_COREMASK_USED_BMPSZ; i++)
+		count += __builtin_popcountll(pcm->coremask_bitmap[i]);
+
+	return count;
+}
+
+/**
+ * For multi-node systems, return the node a core belongs to.
+ *
+ * @param core - core number (0-1023)
+ *
+ * @return node number core belongs to
+ */
+static inline int cvmx_coremask_core_to_node(int core)
+{
+	return (core >> CVMX_NODE_NO_SHIFT) & CVMX_NODE_MASK;
+}
+
+/**
+ * Given a core number on a multi-node system, return the core number for a
+ * particular node.
+ *
+ * @param core - global core number
+ *
+ * @returns core number local to the node.
+ */
+static inline int cvmx_coremask_core_on_node(int core)
+{
+	return (core & ((1 << CVMX_NODE_NO_SHIFT) - 1));
+}
+
+/**
+ * Returns if one coremask is a subset of another coremask
+ *
+ * @param main - main coremask to test
+ * @param subset - subset coremask to test
+ *
+ * @return 0 if the subset contains cores not in the main coremask or 1 if
+ *         the subset is fully contained in the main coremask.
+ */
+static inline int cvmx_coremask_is_subset(const struct cvmx_coremask *main,
+					  const struct cvmx_coremask *subset)
+{
+	int i;
+
+	for (i = 0; i < CVMX_COREMASK_USED_BMPSZ; i++)
+		if ((main->coremask_bitmap[i] & subset->coremask_bitmap[i]) !=
+		    subset->coremask_bitmap[i])
+			return 0;
+	return 1;
+}
+
+/**
+ * Returns if one coremask intersects another coremask
+ *
+ * @param c1 - main coremask to test
+ * @param c2 - subset coremask to test
+ *
+ * @return 1 if coremask c1 intersects coremask c2, 0 if they are exclusive
+ */
+static inline int cvmx_coremask_intersects(const struct cvmx_coremask *c1,
+					   const struct cvmx_coremask *c2)
+{
+	int i;
+
+	for (i = 0; i < CVMX_COREMASK_USED_BMPSZ; i++)
+		if ((c1->coremask_bitmap[i] & c2->coremask_bitmap[i]) != 0)
+			return 1;
+	return 0;
+}
+
+/**
+ * Masks a single node of a coremask
+ *
+ * @param pcm - coremask to mask [inout]
+ * @param node       - node number to mask against
+ */
+static inline void cvmx_coremask_mask_node(struct cvmx_coremask *pcm, int node)
+{
+	int i;
+
+	for (i = 0; i < CVMX_COREMASK_BMP_NODE_CORE_IDX(node, 0); i++)
+		pcm->coremask_bitmap[i] = 0;
+
+	for (i = CVMX_COREMASK_BMP_NODE_CORE_IDX(node + 1, 0);
+	     i < CVMX_COREMASK_USED_BMPSZ; i++)
+		pcm->coremask_bitmap[i] = 0;
+}
+
+/**
+ * Prints out a coremask in the form of node X: 0x... 0x...
+ *
+ * @param[in] pcm - pointer to core mask
+ *
+ * @return nothing
+ */
+void cvmx_coremask_print(const struct cvmx_coremask *pcm);
+
+static inline void cvmx_coremask_dprint(const struct cvmx_coremask *pcm)
+{
+	if (IS_ENABLED(DEBUG))
+		cvmx_coremask_print(pcm);
+}
+
+struct cvmx_coremask *octeon_get_available_coremask(struct cvmx_coremask *pcm);
+
+int validate_coremask(struct cvmx_coremask *pcm);
+
+#endif /* __CVMX_COREMASK_H__ */

From 4b43e7e210ae95c6841751ae42a5a45d5c03212b Mon Sep 17 00:00:00 2001
From: Aaron Williams <awilliams@marvell.com>
Date: Thu, 20 Aug 2020 07:22:02 +0200
Subject: [PATCH 25/27] mips: octeon: Add bootmem support

This is needed for Linux booting, as the memory infos need to be passed
in this bootmem format to the Linux kernel.

Signed-off-by: Aaron Williams <awilliams@marvell.com>
Signed-off-by: Stefan Roese <sr@denx.de>
---
 arch/mips/mach-octeon/Makefile                |    1 +
 arch/mips/mach-octeon/cvmx-bootmem.c          | 1460 +++++++++++++++++
 .../mach-octeon/include/mach/cvmx-bootmem.h   |  533 ++++++
 3 files changed, 1994 insertions(+)
 create mode 100644 arch/mips/mach-octeon/cvmx-bootmem.c
 create mode 100644 arch/mips/mach-octeon/include/mach/cvmx-bootmem.h

diff --git a/arch/mips/mach-octeon/Makefile b/arch/mips/mach-octeon/Makefile
index 5155f89a1e..e96f0deb1b 100644
--- a/arch/mips/mach-octeon/Makefile
+++ b/arch/mips/mach-octeon/Makefile
@@ -9,3 +9,4 @@ obj-y += clock.o
 obj-y += cpu.o
 obj-y += dram.o
 obj-y += cvmx-coremask.o
+obj-y += cvmx-bootmem.o
diff --git a/arch/mips/mach-octeon/cvmx-bootmem.c b/arch/mips/mach-octeon/cvmx-bootmem.c
new file mode 100644
index 0000000000..80bb7ac6c8
--- /dev/null
+++ b/arch/mips/mach-octeon/cvmx-bootmem.c
@@ -0,0 +1,1460 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Copyright (C) 2018-2020 Marvell International Ltd.
+ */
+
+/*
+ * Simple allocate only memory allocator. Used to allocate memory at
+ * application start time.
+ */
+
+#include <asm/global_data.h>
+
+#include <linux/compat.h>
+#include <linux/io.h>
+#include <linux/types.h>
+
+#include <mach/octeon-model.h>
+#include <mach/cvmx-bootmem.h>
+#include <mach/cvmx-coremask.h>
+#include <mach/cvmx-regs.h>
+
+DECLARE_GLOBAL_DATA_PTR;
+
+#define CVMX_MIPS32_SPACE_KSEG0		1L
+#define CVMX_MIPS_SPACE_XKPHYS		2LL
+
+#define CVMX_ADD_SEG(seg, add)		((((u64)(seg)) << 62) | (add))
+#define CVMX_ADD_SEG32(seg, add)	(((u32)(seg) << 31) | (u32)(add))
+
+/**
+ * This is the physical location of a struct cvmx_bootmem_desc
+ * structure in Octeon's memory. Note that dues to addressing
+ * limits or runtime environment it might not be possible to
+ * create a C pointer to this structure.
+ */
+static u64 cvmx_bootmem_desc_addr;
+
+/**
+ * This macro returns the size of a member of a structure.
+ * Logically it is the same as "sizeof(s::field)" in C++, but
+ * C lacks the "::" operator.
+ */
+#define SIZEOF_FIELD(s, field) sizeof(((s *)NULL)->field)
+
+/**
+ * This macro returns a member of the struct cvmx_bootmem_desc
+ * structure. These members can't be directly addressed as
+ * they might be in memory not directly reachable. In the case
+ * where bootmem is compiled with LINUX_HOST, the structure
+ * itself might be located on a remote Octeon. The argument
+ * "field" is the member name of the struct cvmx_bootmem_desc to read.
+ * Regardless of the type of the field, the return type is always
+ * a u64.
+ */
+#define CVMX_BOOTMEM_DESC_GET_FIELD(field)				\
+	__cvmx_bootmem_desc_get(cvmx_bootmem_desc_addr,			\
+				offsetof(struct cvmx_bootmem_desc, field), \
+				SIZEOF_FIELD(struct cvmx_bootmem_desc, field))
+
+/**
+ * This macro writes a member of the struct cvmx_bootmem_desc
+ * structure. These members can't be directly addressed as
+ * they might be in memory not directly reachable. In the case
+ * where bootmem is compiled with LINUX_HOST, the structure
+ * itself might be located on a remote Octeon. The argument
+ * "field" is the member name of the struct cvmx_bootmem_desc to write.
+ */
+#define CVMX_BOOTMEM_DESC_SET_FIELD(field, value)			\
+	__cvmx_bootmem_desc_set(cvmx_bootmem_desc_addr,			\
+				offsetof(struct cvmx_bootmem_desc, field), \
+				SIZEOF_FIELD(struct cvmx_bootmem_desc, field), \
+				value)
+
+/**
+ * This macro returns a member of the
+ * struct cvmx_bootmem_named_block_desc structure. These members can't
+ * be directly addressed as they might be in memory not directly
+ * reachable. In the case where bootmem is compiled with
+ * LINUX_HOST, the structure itself might be located on a remote
+ * Octeon. The argument "field" is the member name of the
+ * struct cvmx_bootmem_named_block_desc to read. Regardless of the type
+ * of the field, the return type is always a u64. The "addr"
+ * parameter is the physical address of the structure.
+ */
+#define CVMX_BOOTMEM_NAMED_GET_FIELD(addr, field)			\
+	__cvmx_bootmem_desc_get(addr,					\
+		offsetof(struct cvmx_bootmem_named_block_desc,  field),	\
+		SIZEOF_FIELD(struct cvmx_bootmem_named_block_desc, field))
+
+/**
+ * This macro writes a member of the struct cvmx_bootmem_named_block_desc
+ * structure. These members can't be directly addressed as
+ * they might be in memory not directly reachable. In the case
+ * where bootmem is compiled with LINUX_HOST, the structure
+ * itself might be located on a remote Octeon. The argument
+ * "field" is the member name of the
+ * struct cvmx_bootmem_named_block_desc to write. The "addr" parameter
+ * is the physical address of the structure.
+ */
+#define CVMX_BOOTMEM_NAMED_SET_FIELD(addr, field, value)		\
+	__cvmx_bootmem_desc_set(addr,					\
+		offsetof(struct cvmx_bootmem_named_block_desc, field),	\
+		SIZEOF_FIELD(struct cvmx_bootmem_named_block_desc, field), \
+				value)
+
+/**
+ * This function is the implementation of the get macros defined
+ * for individual structure members. The argument are generated
+ * by the macros inorder to read only the needed memory.
+ *
+ * @param base   64bit physical address of the complete structure
+ * @param offset Offset from the beginning of the structure to the member being
+ *               accessed.
+ * @param size   Size of the structure member.
+ *
+ * @return Value of the structure member promoted into a u64.
+ */
+static inline u64 __cvmx_bootmem_desc_get(u64 base, int offset,
+					  int size)
+{
+	base = (1ull << 63) | (base + offset);
+	switch (size) {
+	case 4:
+		return cvmx_read64_uint32(base);
+	case 8:
+		return cvmx_read64_uint64(base);
+	default:
+		return 0;
+	}
+}
+
+/**
+ * This function is the implementation of the set macros defined
+ * for individual structure members. The argument are generated
+ * by the macros in order to write only the needed memory.
+ *
+ * @param base   64bit physical address of the complete structure
+ * @param offset Offset from the beginning of the structure to the member being
+ *               accessed.
+ * @param size   Size of the structure member.
+ * @param value  Value to write into the structure
+ */
+static inline void __cvmx_bootmem_desc_set(u64 base, int offset, int size,
+					   u64 value)
+{
+	base = (1ull << 63) | (base + offset);
+	switch (size) {
+	case 4:
+		cvmx_write64_uint32(base, value);
+		break;
+	case 8:
+		cvmx_write64_uint64(base, value);
+		break;
+	default:
+		break;
+	}
+}
+
+/**
+ * This function returns the address of the bootmem descriptor lock.
+ *
+ * @return 64-bit address in KSEG0 of the bootmem descriptor block
+ */
+static inline u64 __cvmx_bootmem_get_lock_addr(void)
+{
+	return (1ull << 63) |
+		(cvmx_bootmem_desc_addr + offsetof(struct cvmx_bootmem_desc, lock));
+}
+
+/**
+ * This function retrieves the string name of a named block. It is
+ * more complicated than a simple memcpy() since the named block
+ * descriptor may not be directly accessible.
+ *
+ * @param addr   Physical address of the named block descriptor
+ * @param str    String to receive the named block string name
+ * @param len    Length of the string buffer, which must match the length
+ *               stored in the bootmem descriptor.
+ */
+static void CVMX_BOOTMEM_NAMED_GET_NAME(u64 addr, char *str, int len)
+{
+	int l = len;
+	char *ptr = str;
+
+	addr |= (1ull << 63);
+	addr += offsetof(struct cvmx_bootmem_named_block_desc, name);
+	while (l) {
+		/*
+		 * With big-endian in memory byte order, this gives uniform
+		 * results for the CPU in either big or Little endian mode.
+		 */
+		u64 blob = cvmx_read64_uint64(addr);
+		int sa = 56;
+
+		addr += sizeof(u64);
+		while (l && sa >= 0) {
+			*ptr++ = (char)(blob >> sa);
+			l--;
+			sa -= 8;
+		}
+	}
+	str[len] = 0;
+}
+
+/**
+ * This function stores the string name of a named block. It is
+ * more complicated than a simple memcpy() since the named block
+ * descriptor may not be directly accessible.
+ *
+ * @param addr   Physical address of the named block descriptor
+ * @param str    String to store into the named block string name
+ * @param len    Length of the string buffer, which must match the length
+ *               stored in the bootmem descriptor.
+ */
+void CVMX_BOOTMEM_NAMED_SET_NAME(u64 addr, const char *str, int len)
+{
+	int l = len;
+
+	addr |= (1ull << 63);
+	addr += offsetof(struct cvmx_bootmem_named_block_desc, name);
+
+	while (l) {
+		/*
+		 * With big-endian in memory byte order, this gives uniform
+		 * results for the CPU in either big or Little endian mode.
+		 */
+		u64 blob = 0;
+		int sa = 56;
+
+		while (l && sa >= 0) {
+			u64 c = (u8)(*str++);
+
+			l--;
+			if (l == 0)
+				c = 0;
+			blob |= c << sa;
+			sa -= 8;
+		}
+		cvmx_write64_uint64(addr, blob);
+		addr += sizeof(u64);
+	}
+}
+
+/* See header file for descriptions of functions */
+
+/*
+ * Wrapper functions are provided for reading/writing the size and next block
+ * values as these may not be directly addressible (in 32 bit applications, for
+ * instance.)
+ *
+ * Offsets of data elements in bootmem list, must match
+ * struct cvmx_bootmem_block_header
+ */
+#define NEXT_OFFSET 0
+#define SIZE_OFFSET 8
+
+static void cvmx_bootmem_phy_set_size(u64 addr, u64 size)
+{
+	cvmx_write64_uint64((addr + SIZE_OFFSET) | (1ull << 63), size);
+}
+
+static void cvmx_bootmem_phy_set_next(u64 addr, u64 next)
+{
+	cvmx_write64_uint64((addr + NEXT_OFFSET) | (1ull << 63), next);
+}
+
+static u64 cvmx_bootmem_phy_get_size(u64 addr)
+{
+	return cvmx_read64_uint64((addr + SIZE_OFFSET) | (1ull << 63));
+}
+
+static u64 cvmx_bootmem_phy_get_next(u64 addr)
+{
+	return cvmx_read64_uint64((addr + NEXT_OFFSET) | (1ull << 63));
+}
+
+/**
+ * Check the version information on the bootmem descriptor
+ *
+ * @param exact_match
+ *               Exact major version to check against. A zero means
+ *               check that the version supports named blocks.
+ *
+ * @return Zero if the version is correct. Negative if the version is
+ *         incorrect. Failures also cause a message to be displayed.
+ */
+static int __cvmx_bootmem_check_version(int exact_match)
+{
+	int major_version;
+
+	major_version = CVMX_BOOTMEM_DESC_GET_FIELD(major_version);
+	if (major_version > 3 ||
+	    (exact_match && major_version) != exact_match) {
+		debug("ERROR: Incompatible bootmem descriptor version: %d.%d at addr: 0x%llx\n",
+		      major_version,
+		      (int)CVMX_BOOTMEM_DESC_GET_FIELD(minor_version),
+		      CAST_ULL(cvmx_bootmem_desc_addr));
+		return -1;
+	} else {
+		return 0;
+	}
+}
+
+/**
+ * Get the low level bootmem descriptor lock. If no locking
+ * is specified in the flags, then nothing is done.
+ *
+ * @param flags  CVMX_BOOTMEM_FLAG_NO_LOCKING means this functions should do
+ *               nothing. This is used to support nested bootmem calls.
+ */
+static inline void __cvmx_bootmem_lock(u32 flags)
+{
+	if (!(flags & CVMX_BOOTMEM_FLAG_NO_LOCKING)) {
+		/*
+		 * Unfortunately we can't use the normal cvmx-spinlock code as
+		 * the memory for the bootmem descriptor may be not accessible
+		 * by a C pointer. We use a 64bit XKPHYS address to access the
+		 * memory directly
+		 */
+		u64 lock_addr = (1ull << 63) |
+			(cvmx_bootmem_desc_addr + offsetof(struct cvmx_bootmem_desc,
+							   lock));
+		unsigned int tmp;
+
+		__asm__ __volatile__(".set noreorder\n"
+				     "1: ll   %[tmp], 0(%[addr])\n"
+				     "   bnez %[tmp], 1b\n"
+				     "   li   %[tmp], 1\n"
+				     "   sc   %[tmp], 0(%[addr])\n"
+				     "   beqz %[tmp], 1b\n"
+				     "   nop\n"
+				     ".set reorder\n"
+				     : [tmp] "=&r"(tmp)
+				     : [addr] "r"(lock_addr)
+				     : "memory");
+	}
+}
+
+/**
+ * Release the low level bootmem descriptor lock. If no locking
+ * is specified in the flags, then nothing is done.
+ *
+ * @param flags  CVMX_BOOTMEM_FLAG_NO_LOCKING means this functions should do
+ *               nothing. This is used to support nested bootmem calls.
+ */
+static inline void __cvmx_bootmem_unlock(u32 flags)
+{
+	if (!(flags & CVMX_BOOTMEM_FLAG_NO_LOCKING)) {
+		/*
+		 * Unfortunately we can't use the normal cvmx-spinlock code as
+		 * the memory for the bootmem descriptor may be not accessible
+		 * by a C pointer. We use a 64bit XKPHYS address to access the
+		 * memory directly
+		 */
+		u64 lock_addr = __cvmx_bootmem_get_lock_addr();
+
+		CVMX_SYNCW;
+		__asm__ __volatile__("sw $0, 0(%[addr])\n"
+				     : : [addr] "r"(lock_addr)
+				     : "memory");
+		CVMX_SYNCW;
+	}
+}
+
+/*
+ * Some of the cvmx-bootmem functions dealing with C pointers are not
+ * supported when we are compiling for CVMX_BUILD_FOR_LINUX_HOST. This
+ * ifndef removes these functions when they aren't needed.
+ *
+ * This functions takes an address range and adjusts it as necessary
+ * to match the ABI that is currently being used.  This is required to
+ * ensure that bootmem_alloc* functions only return valid pointers for
+ * 32 bit ABIs
+ */
+static int __cvmx_validate_mem_range(u64 *min_addr_ptr,
+				     u64 *max_addr_ptr)
+{
+	u64 max_phys = (1ull << 29) - 0x10;	/* KSEG0 */
+
+	*min_addr_ptr = min_t(u64, max_t(u64, *min_addr_ptr, 0x0), max_phys);
+	if (!*max_addr_ptr) {
+		*max_addr_ptr = max_phys;
+	} else {
+		*max_addr_ptr = max_t(u64, min_t(u64, *max_addr_ptr,
+						 max_phys), 0x0);
+	}
+
+	return 0;
+}
+
+u64 cvmx_bootmem_phy_alloc_range(u64 size, u64 alignment,
+				 u64 min_addr, u64 max_addr)
+{
+	s64 address;
+
+	__cvmx_validate_mem_range(&min_addr, &max_addr);
+	address = cvmx_bootmem_phy_alloc(size, min_addr, max_addr,
+					 alignment, 0);
+	if (address > 0)
+		return address;
+	else
+		return 0;
+}
+
+void *cvmx_bootmem_alloc_range(u64 size, u64 alignment,
+			       u64 min_addr, u64 max_addr)
+{
+	s64 address;
+
+	__cvmx_validate_mem_range(&min_addr, &max_addr);
+	address = cvmx_bootmem_phy_alloc(size, min_addr, max_addr,
+					 alignment, 0);
+
+	if (address > 0)
+		return cvmx_phys_to_ptr(address);
+	else
+		return NULL;
+}
+
+void *cvmx_bootmem_alloc_address(u64 size, u64 address,
+				 u64 alignment)
+{
+	return cvmx_bootmem_alloc_range(size, alignment, address,
+					address + size);
+}
+
+void *cvmx_bootmem_alloc_node(u64 node, u64 size, u64 alignment)
+{
+	return cvmx_bootmem_alloc_range(size, alignment,
+					node << CVMX_NODE_MEM_SHIFT,
+					((node + 1) << CVMX_NODE_MEM_SHIFT) - 1);
+}
+
+void *cvmx_bootmem_alloc(u64 size, u64 alignment)
+{
+	return cvmx_bootmem_alloc_range(size, alignment, 0, 0);
+}
+
+void *cvmx_bootmem_alloc_named_range_once(u64 size, u64 min_addr,
+					  u64 max_addr, u64 align,
+					  const char *name,
+					  void (*init)(void *))
+{
+	u64 named_block_desc_addr;
+	void *ptr;
+	s64 addr;
+
+	__cvmx_bootmem_lock(0);
+
+	__cvmx_validate_mem_range(&min_addr, &max_addr);
+	named_block_desc_addr =
+		cvmx_bootmem_phy_named_block_find(name,
+						  CVMX_BOOTMEM_FLAG_NO_LOCKING);
+
+	if (named_block_desc_addr) {
+		addr = CVMX_BOOTMEM_NAMED_GET_FIELD(named_block_desc_addr,
+						    base_addr);
+		__cvmx_bootmem_unlock(0);
+		return cvmx_phys_to_ptr(addr);
+	}
+
+	addr = cvmx_bootmem_phy_named_block_alloc(size, min_addr, max_addr,
+						  align, name,
+						  CVMX_BOOTMEM_FLAG_NO_LOCKING);
+
+	if (addr < 0) {
+		__cvmx_bootmem_unlock(0);
+		return NULL;
+	}
+	ptr = cvmx_phys_to_ptr(addr);
+
+	if (init)
+		init(ptr);
+	else
+		memset(ptr, 0, size);
+
+	__cvmx_bootmem_unlock(0);
+	return ptr;
+}
+
+void *cvmx_bootmem_alloc_named_range_flags(u64 size, u64 min_addr,
+					   u64 max_addr, u64 align,
+					   const char *name, u32 flags)
+{
+	s64 addr;
+
+	__cvmx_validate_mem_range(&min_addr, &max_addr);
+	addr = cvmx_bootmem_phy_named_block_alloc(size, min_addr, max_addr,
+						  align, name, flags);
+	if (addr >= 0)
+		return cvmx_phys_to_ptr(addr);
+	else
+		return NULL;
+}
+
+void *cvmx_bootmem_alloc_named_range(u64 size, u64 min_addr,
+				     u64 max_addr, u64 align,
+				     const char *name)
+{
+	return cvmx_bootmem_alloc_named_range_flags(size, min_addr, max_addr,
+						    align, name, 0);
+}
+
+void *cvmx_bootmem_alloc_named_address(u64 size, u64 address,
+				       const char *name)
+{
+	return cvmx_bootmem_alloc_named_range(size, address, address + size,
+					      0, name);
+}
+
+void *cvmx_bootmem_alloc_named(u64 size, u64 alignment,
+			       const char *name)
+{
+	return cvmx_bootmem_alloc_named_range(size, 0, 0, alignment, name);
+}
+
+void *cvmx_bootmem_alloc_named_flags(u64 size, u64 alignment,
+				     const char *name, u32 flags)
+{
+	return cvmx_bootmem_alloc_named_range_flags(size, 0, 0, alignment,
+						    name, flags);
+}
+
+int cvmx_bootmem_free_named(const char *name)
+{
+	return cvmx_bootmem_phy_named_block_free(name, 0);
+}
+
+/**
+ * Find a named block with flags
+ *
+ * @param name is the block name
+ * @param flags indicates the need to use locking during search
+ * @return pointer to named block descriptor
+ *
+ * Note: this function returns a pointer to a static structure,
+ * and is therefore not re-entrant.
+ * Making this function re-entrant will break backward compatibility.
+ */
+const struct cvmx_bootmem_named_block_desc *
+__cvmx_bootmem_find_named_block_flags(const char *name, u32 flags)
+{
+	static struct cvmx_bootmem_named_block_desc desc;
+	u64 named_addr = cvmx_bootmem_phy_named_block_find(name, flags);
+
+	if (named_addr) {
+		desc.base_addr = CVMX_BOOTMEM_NAMED_GET_FIELD(named_addr,
+							      base_addr);
+		desc.size = CVMX_BOOTMEM_NAMED_GET_FIELD(named_addr, size);
+		strncpy(desc.name, name, sizeof(desc.name));
+		desc.name[sizeof(desc.name) - 1] = 0;
+		return &desc;
+	} else {
+		return NULL;
+	}
+}
+
+const struct cvmx_bootmem_named_block_desc *
+cvmx_bootmem_find_named_block(const char *name)
+{
+	return __cvmx_bootmem_find_named_block_flags(name, 0);
+}
+
+void cvmx_bootmem_print_named(void)
+{
+	cvmx_bootmem_phy_named_block_print();
+}
+
+int cvmx_bootmem_init(u64 mem_desc_addr)
+{
+	if (!cvmx_bootmem_desc_addr)
+		cvmx_bootmem_desc_addr = mem_desc_addr;
+
+	return 0;
+}
+
+u64 cvmx_bootmem_available_mem(u64 min_block_size)
+{
+	return cvmx_bootmem_phy_available_mem(min_block_size);
+}
+
+/*
+ * The cvmx_bootmem_phy* functions below return 64 bit physical
+ * addresses, and expose more features that the cvmx_bootmem_functions
+ * above.  These are required for full memory space access in 32 bit
+ * applications, as well as for using some advance features.  Most
+ * applications should not need to use these.
+ */
+
+s64 cvmx_bootmem_phy_alloc(u64 req_size, u64 address_min,
+			   u64 address_max, u64 alignment,
+			   u32 flags)
+{
+	u64 head_addr, ent_addr, ent_size;
+	u64 target_ent_addr = 0, target_prev_addr = 0;
+	u64 target_size = ~0ull;
+	u64 free_start, free_end;
+	u64 next_addr, prev_addr = 0;
+	u64 new_ent_addr = 0, new_ent_size;
+	u64 desired_min_addr, usable_max;
+	u64 align, align_mask;
+
+	debug("%s: req_size: 0x%llx, min_addr: 0x%llx, max_addr: 0x%llx, align: 0x%llx\n",
+	      __func__, CAST_ULL(req_size), CAST_ULL(address_min),
+	      CAST_ULL(address_max), CAST_ULL(alignment));
+
+	if (__cvmx_bootmem_check_version(0))
+		return -1;
+
+	/*
+	 * Do a variety of checks to validate the arguments.  The
+	 * allocator code will later assume that these checks have
+	 * been made.  We validate that the requested constraints are
+	 * not self-contradictory before we look through the list of
+	 * available memory
+	 */
+
+	/* 0 is not a valid req_size for this allocator */
+	if (!req_size)
+		return -1;
+
+	/* Round req_size up to multiple of minimum alignment bytes */
+	req_size = (req_size + (CVMX_BOOTMEM_ALIGNMENT_SIZE - 1)) &
+		~(CVMX_BOOTMEM_ALIGNMENT_SIZE - 1);
+
+	/* Make sure alignment is power of 2, and at least the minimum */
+	for (align = CVMX_BOOTMEM_ALIGNMENT_SIZE;
+	     align < (1ull << 48);
+	     align <<= 1) {
+		if (align >= alignment)
+			break;
+	}
+
+	align_mask = ~(align - 1);
+
+	/*
+	 * Adjust address minimum based on requested alignment (round
+	 * up to meet alignment).  Do this here so we can reject
+	 * impossible requests up front. (NOP for address_min == 0)
+	 */
+	address_min = (address_min + (align - 1)) & align_mask;
+
+	/*
+	 * Convert !0 address_min and 0 address_max to special case of
+	 * range that specifies an exact memory block to allocate.  Do
+	 * this before other checks and adjustments so that this
+	 * tranformation will be validated
+	 */
+	if (address_min && !address_max)
+		address_max = address_min + req_size;
+	else if (!address_min && !address_max)
+		address_max = ~0ull;	/* If no limits given, use max */
+
+	/*
+	 * Reject inconsistent args.  We have adjusted these, so this
+	 * may fail due to our internal changes even if this check
+	 * would pass for the values the user supplied.
+	 */
+	if (req_size > address_max - address_min)
+		return -1;
+
+	__cvmx_bootmem_lock(flags);
+
+	/* Walk through the list entries to find the right fit */
+	head_addr = CVMX_BOOTMEM_DESC_GET_FIELD(head_addr);
+
+	for (ent_addr = head_addr;
+	     ent_addr != 0ULL && ent_addr < address_max;
+	     prev_addr = ent_addr,
+		     ent_addr = cvmx_bootmem_phy_get_next(ent_addr)) {
+		/* Raw free block size */
+		ent_size = cvmx_bootmem_phy_get_size(ent_addr);
+		next_addr = cvmx_bootmem_phy_get_next(ent_addr);
+
+		/* Validate the free list ascending order */
+		if (ent_size < CVMX_BOOTMEM_ALIGNMENT_SIZE ||
+		    (next_addr && ent_addr > next_addr)) {
+			debug("ERROR: %s: bad free list ent: %#llx, next: %#llx\n",
+			      __func__, CAST_ULL(ent_addr),
+			      CAST_ULL(next_addr));
+			goto error_out;
+		}
+
+		/* adjust free block edges for alignment */
+		free_start = (ent_addr + align - 1) & align_mask;
+		free_end = (ent_addr + ent_size) &  align_mask;
+
+		/* check that free block is large enough */
+		if ((free_start + req_size) > free_end)
+			continue;
+
+		/* check that desired start is within the free block */
+		if (free_end < address_min || free_start > address_max)
+			continue;
+		if ((free_end - address_min) < req_size)
+			continue;
+		if ((address_max - free_start) < req_size)
+			continue;
+
+		/* Found usebale free block */
+		target_ent_addr = ent_addr;
+		target_prev_addr = prev_addr;
+		target_size = ent_size;
+
+		/* Continue looking for highest/best block that fits */
+	}
+
+	/* Bail if the search has resulted in no eligible free blocks */
+	if (target_ent_addr == 0) {
+		debug("%s: eligible free block not found\n", __func__);
+		goto error_out;
+	}
+
+	/* Found the free block to allocate from */
+	ent_addr = target_ent_addr;
+	prev_addr = target_prev_addr;
+	ent_size = target_size;
+
+	debug("%s: using free block at %#010llx size %#llx\n",
+	      __func__, CAST_ULL(ent_addr), CAST_ULL(ent_size));
+
+	/* Always allocate from the end of a free block */
+	usable_max = min_t(u64, address_max, ent_addr + ent_size);
+	desired_min_addr = usable_max - req_size;
+	desired_min_addr &= align_mask;
+
+	/* Split current free block into up to 3 free blocks */
+
+	/* Check for head room */
+	if (desired_min_addr > ent_addr) {
+		/* Create a new free block at the allocation address */
+		new_ent_addr = desired_min_addr;
+		new_ent_size = ent_size - (desired_min_addr - ent_addr);
+
+		cvmx_bootmem_phy_set_next(new_ent_addr,
+					  cvmx_bootmem_phy_get_next(ent_addr));
+		cvmx_bootmem_phy_set_size(new_ent_addr, new_ent_size);
+
+		/* Split out head room into a new free block */
+		ent_size -= new_ent_size;
+		cvmx_bootmem_phy_set_next(ent_addr, new_ent_addr);
+		cvmx_bootmem_phy_set_size(ent_addr, ent_size);
+
+		debug("%s: splitting head, addr %#llx size %#llx\n",
+		      __func__, CAST_ULL(ent_addr), CAST_ULL(ent_size));
+
+		/* Make the allocation target the current free block */
+		prev_addr = ent_addr;
+		ent_addr = new_ent_addr;
+		ent_size = new_ent_size;
+	}
+
+	/* Check for tail room */
+	if ((desired_min_addr + req_size) < (ent_addr + ent_size)) {
+		new_ent_addr = ent_addr + req_size;
+		new_ent_size = ent_size - req_size;
+
+		/* Create a new free block from tail room */
+		cvmx_bootmem_phy_set_next(new_ent_addr,
+					  cvmx_bootmem_phy_get_next(ent_addr));
+		cvmx_bootmem_phy_set_size(new_ent_addr, new_ent_size);
+
+		debug("%s: splitting tail, addr %#llx size %#llx\n",
+		      __func__, CAST_ULL(new_ent_addr), CAST_ULL(new_ent_size));
+
+		/* Adjust the current block to exclude tail room */
+		ent_size = ent_size - new_ent_size;
+		cvmx_bootmem_phy_set_next(ent_addr, new_ent_addr);
+		cvmx_bootmem_phy_set_size(ent_addr, ent_size);
+	}
+
+	/* The current free block IS the allocation target */
+	if (desired_min_addr != ent_addr || ent_size != req_size)
+		debug("ERROR: %s: internal error - addr %#llx %#llx size %#llx %#llx\n",
+		      __func__, CAST_ULL(desired_min_addr), CAST_ULL(ent_addr),
+		      CAST_ULL(ent_size), CAST_ULL(req_size));
+
+	/* Remove the current free block from list */
+	if (prev_addr) {
+		cvmx_bootmem_phy_set_next(prev_addr,
+					  cvmx_bootmem_phy_get_next(ent_addr));
+	} else {
+		/* head of list being returned, so update head ptr */
+		CVMX_BOOTMEM_DESC_SET_FIELD(head_addr,
+					    cvmx_bootmem_phy_get_next(ent_addr));
+	}
+
+	__cvmx_bootmem_unlock(flags);
+	debug("%s: allocated size: %#llx, at addr: %#010llx\n",
+	      __func__,
+	      CAST_ULL(req_size),
+	      CAST_ULL(desired_min_addr));
+
+	return desired_min_addr;
+
+error_out:
+	/* Requested memory not found or argument error */
+	__cvmx_bootmem_unlock(flags);
+	return -1;
+}
+
+int __cvmx_bootmem_phy_free(u64 phy_addr, u64 size, u32 flags)
+{
+	u64 cur_addr;
+	u64 prev_addr = 0;	/* zero is invalid */
+	int retval = 0;
+
+	debug("%s addr: %#llx, size: %#llx\n", __func__,
+	      CAST_ULL(phy_addr), CAST_ULL(size));
+
+	if (__cvmx_bootmem_check_version(0))
+		return 0;
+
+	/* 0 is not a valid size for this allocator */
+	if (!size || !phy_addr)
+		return 0;
+
+	/* Round size up to mult of minimum alignment bytes */
+	size = (size + (CVMX_BOOTMEM_ALIGNMENT_SIZE - 1)) &
+		~(CVMX_BOOTMEM_ALIGNMENT_SIZE - 1);
+
+	__cvmx_bootmem_lock(flags);
+	cur_addr = CVMX_BOOTMEM_DESC_GET_FIELD(head_addr);
+	if (cur_addr == 0 || phy_addr < cur_addr) {
+		/* add at front of list - special case with changing head ptr */
+		if (cur_addr && phy_addr + size > cur_addr)
+			goto bootmem_free_done;	/* error, overlapping section */
+		else if (phy_addr + size == cur_addr) {
+			/* Add to front of existing first block */
+			cvmx_bootmem_phy_set_next(phy_addr,
+						  cvmx_bootmem_phy_get_next(cur_addr));
+			cvmx_bootmem_phy_set_size(phy_addr,
+						  cvmx_bootmem_phy_get_size(cur_addr) + size);
+			CVMX_BOOTMEM_DESC_SET_FIELD(head_addr, phy_addr);
+
+		} else {
+			/* New block before first block */
+			/* OK if cur_addr is 0 */
+			cvmx_bootmem_phy_set_next(phy_addr, cur_addr);
+			cvmx_bootmem_phy_set_size(phy_addr, size);
+			CVMX_BOOTMEM_DESC_SET_FIELD(head_addr, phy_addr);
+		}
+		retval = 1;
+		goto bootmem_free_done;
+	}
+
+	/* Find place in list to add block */
+	while (cur_addr && phy_addr > cur_addr) {
+		prev_addr = cur_addr;
+		cur_addr = cvmx_bootmem_phy_get_next(cur_addr);
+	}
+
+	if (!cur_addr) {
+		/*
+		 * We have reached the end of the list, add on to end, checking
+		 * to see if we need to combine with last block
+		 */
+		if (prev_addr + cvmx_bootmem_phy_get_size(prev_addr) == phy_addr) {
+			cvmx_bootmem_phy_set_size(prev_addr,
+						  cvmx_bootmem_phy_get_size(prev_addr) + size);
+		} else {
+			cvmx_bootmem_phy_set_next(prev_addr, phy_addr);
+			cvmx_bootmem_phy_set_size(phy_addr, size);
+			cvmx_bootmem_phy_set_next(phy_addr, 0);
+		}
+		retval = 1;
+		goto bootmem_free_done;
+	} else {
+		/*
+		 * insert between prev and cur nodes, checking for merge with
+		 * either/both
+		 */
+		if (prev_addr + cvmx_bootmem_phy_get_size(prev_addr) == phy_addr) {
+			/* Merge with previous */
+			cvmx_bootmem_phy_set_size(prev_addr,
+						  cvmx_bootmem_phy_get_size(prev_addr) + size);
+			if (phy_addr + size == cur_addr) {
+				/* Also merge with current */
+				cvmx_bootmem_phy_set_size(prev_addr,
+							  cvmx_bootmem_phy_get_size(cur_addr) +
+							  cvmx_bootmem_phy_get_size(prev_addr));
+				cvmx_bootmem_phy_set_next(prev_addr,
+							  cvmx_bootmem_phy_get_next(cur_addr));
+			}
+			retval = 1;
+			goto bootmem_free_done;
+		} else if (phy_addr + size == cur_addr) {
+			/* Merge with current */
+			cvmx_bootmem_phy_set_size(phy_addr,
+						  cvmx_bootmem_phy_get_size(cur_addr) + size);
+			cvmx_bootmem_phy_set_next(phy_addr,
+						  cvmx_bootmem_phy_get_next(cur_addr));
+			cvmx_bootmem_phy_set_next(prev_addr, phy_addr);
+			retval = 1;
+			goto bootmem_free_done;
+		}
+
+		/* It is a standalone block, add in between prev and cur */
+		cvmx_bootmem_phy_set_size(phy_addr, size);
+		cvmx_bootmem_phy_set_next(phy_addr, cur_addr);
+		cvmx_bootmem_phy_set_next(prev_addr, phy_addr);
+	}
+	retval = 1;
+
+bootmem_free_done:
+	__cvmx_bootmem_unlock(flags);
+	return retval;
+}
+
+void cvmx_bootmem_phy_list_print(void)
+{
+	u64 addr;
+
+	addr = CVMX_BOOTMEM_DESC_GET_FIELD(head_addr);
+	printf("\n\n\nPrinting bootmem block list, descriptor: 0x%llx, head is 0x%llx\n",
+	       CAST_ULL(cvmx_bootmem_desc_addr), CAST_ULL(addr));
+	printf("Descriptor version: %d.%d\n",
+	       (int)CVMX_BOOTMEM_DESC_GET_FIELD(major_version),
+	       (int)CVMX_BOOTMEM_DESC_GET_FIELD(minor_version));
+	if (CVMX_BOOTMEM_DESC_GET_FIELD(major_version) > 3)
+		debug("Warning: Bootmem descriptor version is newer than expected\n");
+
+	if (!addr)
+		printf("mem list is empty!\n");
+
+	while (addr) {
+		printf("Block address: 0x%08llx, size: 0x%08llx, next: 0x%08llx\n", CAST_ULL(addr),
+		       CAST_ULL(cvmx_bootmem_phy_get_size(addr)),
+		       CAST_ULL(cvmx_bootmem_phy_get_next(addr)));
+		addr = cvmx_bootmem_phy_get_next(addr);
+	}
+	printf("\n\n");
+}
+
+u64 cvmx_bootmem_phy_available_mem(u64 min_block_size)
+{
+	u64 addr;
+
+	u64 available_mem = 0;
+
+	__cvmx_bootmem_lock(0);
+	addr = CVMX_BOOTMEM_DESC_GET_FIELD(head_addr);
+	while (addr) {
+		if (cvmx_bootmem_phy_get_size(addr) >= min_block_size)
+			available_mem += cvmx_bootmem_phy_get_size(addr);
+		addr = cvmx_bootmem_phy_get_next(addr);
+	}
+	__cvmx_bootmem_unlock(0);
+	return available_mem;
+}
+
+u64 cvmx_bootmem_phy_named_block_find(const char *name, u32 flags)
+{
+	u64 result = 0;
+
+	debug("%s: %s\n", __func__, name);
+
+	__cvmx_bootmem_lock(flags);
+	if (!__cvmx_bootmem_check_version(3)) {
+		int i;
+		u64 named_block_array_addr =
+			CVMX_BOOTMEM_DESC_GET_FIELD(named_block_array_addr);
+		int num_blocks =
+			CVMX_BOOTMEM_DESC_GET_FIELD(named_block_num_blocks);
+		int name_length =
+			CVMX_BOOTMEM_DESC_GET_FIELD(named_block_name_len);
+		u64 named_addr = named_block_array_addr;
+
+		for (i = 0; i < num_blocks; i++) {
+			u64 named_size =
+				CVMX_BOOTMEM_NAMED_GET_FIELD(named_addr, size);
+			if (name && named_size) {
+				char name_tmp[name_length + 1];
+
+				CVMX_BOOTMEM_NAMED_GET_NAME(named_addr,
+							    name_tmp,
+							    name_length);
+				if (!strncmp(name, name_tmp, name_length)) {
+					result = named_addr;
+					break;
+				}
+			} else if (!name && !named_size) {
+				result = named_addr;
+				break;
+			}
+
+			named_addr +=
+				sizeof(struct cvmx_bootmem_named_block_desc);
+		}
+	}
+	__cvmx_bootmem_unlock(flags);
+	return result;
+}
+
+int cvmx_bootmem_phy_named_block_free(const char *name, u32 flags)
+{
+	u64 named_block_addr;
+
+	if (__cvmx_bootmem_check_version(3))
+		return 0;
+
+	debug("%s: %s\n", __func__, name);
+
+	/*
+	 * Take lock here, as name lookup/block free/name free need to be
+	 * atomic
+	 */
+	__cvmx_bootmem_lock(flags);
+
+	named_block_addr = cvmx_bootmem_phy_named_block_find(name,
+							     CVMX_BOOTMEM_FLAG_NO_LOCKING);
+	if (named_block_addr) {
+		u64 named_addr =
+			CVMX_BOOTMEM_NAMED_GET_FIELD(named_block_addr,
+						     base_addr);
+		u64 named_size =
+			CVMX_BOOTMEM_NAMED_GET_FIELD(named_block_addr, size);
+
+		debug("%s: %s, base: 0x%llx, size: 0x%llx\n",
+		      __func__, name, CAST_ULL(named_addr),
+		      CAST_ULL(named_size));
+
+		__cvmx_bootmem_phy_free(named_addr, named_size,
+					CVMX_BOOTMEM_FLAG_NO_LOCKING);
+
+		/* Set size to zero to indicate block not used. */
+		CVMX_BOOTMEM_NAMED_SET_FIELD(named_block_addr, size, 0);
+	}
+
+	__cvmx_bootmem_unlock(flags);
+	return !!named_block_addr;	/* 0 on failure, 1 on success */
+}
+
+s64 cvmx_bootmem_phy_named_block_alloc(u64 size, u64 min_addr,
+				       u64 max_addr,
+				       u64 alignment, const char *name,
+				       u32 flags)
+{
+	s64 addr_allocated;
+	u64 named_block_desc_addr;
+
+	debug("%s: size: 0x%llx, min: 0x%llx, max: 0x%llx, align: 0x%llx, name: %s\n",
+	      __func__, CAST_ULL(size), CAST_ULL(min_addr), CAST_ULL(max_addr),
+	      CAST_ULL(alignment), name);
+
+	if (__cvmx_bootmem_check_version(3))
+		return -1;
+
+	/*
+	 * Take lock here, as name lookup/block alloc/name add need to be
+	 * atomic
+	 */
+	__cvmx_bootmem_lock(flags);
+
+	named_block_desc_addr =
+		cvmx_bootmem_phy_named_block_find(name, flags |
+						  CVMX_BOOTMEM_FLAG_NO_LOCKING);
+	if (named_block_desc_addr) {
+		__cvmx_bootmem_unlock(flags);
+		return -1;
+	}
+
+	/* Get pointer to first available named block descriptor */
+	named_block_desc_addr =
+		cvmx_bootmem_phy_named_block_find(NULL, flags |
+						  CVMX_BOOTMEM_FLAG_NO_LOCKING);
+	if (!named_block_desc_addr) {
+		__cvmx_bootmem_unlock(flags);
+		return -1;
+	}
+
+	/*
+	 * Round size up to mult of minimum alignment bytes
+	 * We need the actual size allocated to allow for blocks to be
+	 * coallesced when they are freed.  The alloc routine does the
+	 * same rounding up on all allocations.
+	 */
+	size = (size + (CVMX_BOOTMEM_ALIGNMENT_SIZE - 1)) &
+		~(CVMX_BOOTMEM_ALIGNMENT_SIZE - 1);
+
+	addr_allocated = cvmx_bootmem_phy_alloc(size, min_addr, max_addr,
+						alignment,
+						flags | CVMX_BOOTMEM_FLAG_NO_LOCKING);
+	if (addr_allocated >= 0) {
+		CVMX_BOOTMEM_NAMED_SET_FIELD(named_block_desc_addr, base_addr,
+					     addr_allocated);
+		CVMX_BOOTMEM_NAMED_SET_FIELD(named_block_desc_addr, size, size);
+		CVMX_BOOTMEM_NAMED_SET_NAME(named_block_desc_addr, name,
+					    CVMX_BOOTMEM_DESC_GET_FIELD(named_block_name_len));
+	}
+
+	__cvmx_bootmem_unlock(flags);
+	return addr_allocated;
+}
+
+void cvmx_bootmem_phy_named_block_print(void)
+{
+	int i;
+	int printed = 0;
+
+	u64 named_block_array_addr =
+		CVMX_BOOTMEM_DESC_GET_FIELD(named_block_array_addr);
+	int num_blocks = CVMX_BOOTMEM_DESC_GET_FIELD(named_block_num_blocks);
+	int name_length = CVMX_BOOTMEM_DESC_GET_FIELD(named_block_name_len);
+	u64 named_block_addr = named_block_array_addr;
+
+	debug("%s: desc addr: 0x%llx\n",
+	      __func__, CAST_ULL(cvmx_bootmem_desc_addr));
+
+	if (__cvmx_bootmem_check_version(3))
+		return;
+
+	printf("List of currently allocated named bootmem blocks:\n");
+	for (i = 0; i < num_blocks; i++) {
+		u64 named_size =
+			CVMX_BOOTMEM_NAMED_GET_FIELD(named_block_addr, size);
+		if (named_size) {
+			char name_tmp[name_length + 1];
+			u64 named_addr =
+				CVMX_BOOTMEM_NAMED_GET_FIELD(named_block_addr,
+							     base_addr);
+			CVMX_BOOTMEM_NAMED_GET_NAME(named_block_addr, name_tmp,
+						    name_length);
+			printed++;
+			printf("Name: %s, address: 0x%08llx, size: 0x%08llx, index: %d\n", name_tmp,
+			       CAST_ULL(named_addr),
+			       CAST_ULL(named_size), i);
+		}
+		named_block_addr +=
+			sizeof(struct cvmx_bootmem_named_block_desc);
+	}
+
+	if (!printed)
+		printf("No named bootmem blocks exist.\n");
+}
+
+s64 cvmx_bootmem_phy_mem_list_init(u64 mem_size,
+				   u32 low_reserved_bytes,
+				   struct cvmx_bootmem_desc *desc_buffer)
+{
+	u64 cur_block_addr;
+	s64 addr;
+	int i;
+
+	debug("%s (arg desc ptr: %p, cvmx_bootmem_desc: 0x%llx)\n",
+	      __func__, desc_buffer, CAST_ULL(cvmx_bootmem_desc_addr));
+
+	/*
+	 * Descriptor buffer needs to be in 32 bit addressable space to be
+	 * compatible with 32 bit applications
+	 */
+	if (!desc_buffer) {
+		debug("ERROR: no memory for cvmx_bootmem descriptor provided\n");
+		return 0;
+	}
+
+	if (mem_size > OCTEON_MAX_PHY_MEM_SIZE) {
+		mem_size = OCTEON_MAX_PHY_MEM_SIZE;
+		debug("ERROR: requested memory size too large, truncating to maximum size\n");
+	}
+
+	if (cvmx_bootmem_desc_addr)
+		return 1;
+
+	/* Initialize cvmx pointer to descriptor */
+	cvmx_bootmem_init(cvmx_ptr_to_phys(desc_buffer));
+
+	/* Fill the bootmem descriptor */
+	CVMX_BOOTMEM_DESC_SET_FIELD(lock, 0);
+	CVMX_BOOTMEM_DESC_SET_FIELD(flags, 0);
+	CVMX_BOOTMEM_DESC_SET_FIELD(head_addr, 0);
+	CVMX_BOOTMEM_DESC_SET_FIELD(major_version, CVMX_BOOTMEM_DESC_MAJ_VER);
+	CVMX_BOOTMEM_DESC_SET_FIELD(minor_version, CVMX_BOOTMEM_DESC_MIN_VER);
+	CVMX_BOOTMEM_DESC_SET_FIELD(app_data_addr, 0);
+	CVMX_BOOTMEM_DESC_SET_FIELD(app_data_size, 0);
+
+	/*
+	 * Set up global pointer to start of list, exclude low 64k for exception
+	 * vectors, space for global descriptor
+	 */
+	cur_block_addr = (OCTEON_DDR0_BASE + low_reserved_bytes);
+
+	if (mem_size <= OCTEON_DDR0_SIZE) {
+		__cvmx_bootmem_phy_free(cur_block_addr,
+					mem_size - low_reserved_bytes, 0);
+		goto frees_done;
+	}
+
+	__cvmx_bootmem_phy_free(cur_block_addr,
+				OCTEON_DDR0_SIZE - low_reserved_bytes, 0);
+
+	mem_size -= OCTEON_DDR0_SIZE;
+
+	/* Add DDR2 block next if present */
+	if (mem_size > OCTEON_DDR1_SIZE) {
+		__cvmx_bootmem_phy_free(OCTEON_DDR1_BASE, OCTEON_DDR1_SIZE, 0);
+		__cvmx_bootmem_phy_free(OCTEON_DDR2_BASE,
+					mem_size - OCTEON_DDR1_SIZE, 0);
+	} else {
+		__cvmx_bootmem_phy_free(OCTEON_DDR1_BASE, mem_size, 0);
+	}
+frees_done:
+
+	/* Initialize the named block structure */
+	CVMX_BOOTMEM_DESC_SET_FIELD(named_block_name_len, CVMX_BOOTMEM_NAME_LEN);
+	CVMX_BOOTMEM_DESC_SET_FIELD(named_block_num_blocks,
+				    CVMX_BOOTMEM_NUM_NAMED_BLOCKS);
+	CVMX_BOOTMEM_DESC_SET_FIELD(named_block_array_addr, 0);
+
+	/* Allocate this near the top of the low 256 MBytes of memory */
+	addr = cvmx_bootmem_phy_alloc(CVMX_BOOTMEM_NUM_NAMED_BLOCKS *
+				      sizeof(struct cvmx_bootmem_named_block_desc),
+				      0, 0x10000000, 0,
+				      CVMX_BOOTMEM_FLAG_END_ALLOC);
+	if (addr >= 0)
+		CVMX_BOOTMEM_DESC_SET_FIELD(named_block_array_addr, addr);
+
+	debug("%s: named_block_array_addr: 0x%llx)\n",
+	      __func__, CAST_ULL(addr));
+
+	if (addr < 0) {
+		debug("FATAL ERROR: unable to allocate memory for bootmem descriptor!\n");
+		return 0;
+	}
+
+	for (i = 0; i < CVMX_BOOTMEM_NUM_NAMED_BLOCKS; i++) {
+		CVMX_BOOTMEM_NAMED_SET_FIELD(addr, base_addr, 0);
+		CVMX_BOOTMEM_NAMED_SET_FIELD(addr, size, 0);
+		addr += sizeof(struct cvmx_bootmem_named_block_desc);
+	}
+
+	return 1;
+}
+
+s64 cvmx_bootmem_phy_mem_list_init_multi(u8 node_mask,
+					 u32 mem_sizes[],
+					 u32 low_reserved_bytes,
+					 struct cvmx_bootmem_desc *desc_buffer)
+{
+	u64 cur_block_addr;
+	u64 mem_size;
+	s64 addr;
+	int i;
+	int node;
+	u64 node_base;	/* Make u64 to reduce type casting */
+
+	mem_sizes[0] = gd->ram_size / (1024 * 1024);
+
+	debug("cvmx_bootmem_phy_mem_list_init (arg desc ptr: %p, cvmx_bootmem_desc: 0x%llx)\n",
+	      desc_buffer, CAST_ULL(cvmx_bootmem_desc_addr));
+
+	/*
+	 * Descriptor buffer needs to be in 32 bit addressable space to be
+	 * compatible with 32 bit applications
+	 */
+	if (!desc_buffer) {
+		debug("ERROR: no memory for cvmx_bootmem descriptor provided\n");
+		return 0;
+	}
+
+	cvmx_coremask_for_each_node(node, node_mask) {
+		if ((mem_sizes[node] * 1024 * 1024) > OCTEON_MAX_PHY_MEM_SIZE) {
+			mem_sizes[node] = OCTEON_MAX_PHY_MEM_SIZE /
+				(1024 * 1024);
+			debug("ERROR node#%lld: requested memory size too large, truncating to maximum size\n",
+			      CAST_ULL(node));
+		}
+	}
+
+	if (cvmx_bootmem_desc_addr)
+		return 1;
+
+	/* Initialize cvmx pointer to descriptor */
+	cvmx_bootmem_init(cvmx_ptr_to_phys(desc_buffer));
+
+	/* Fill the bootmem descriptor */
+	CVMX_BOOTMEM_DESC_SET_FIELD(lock, 0);
+	CVMX_BOOTMEM_DESC_SET_FIELD(flags, 0);
+	CVMX_BOOTMEM_DESC_SET_FIELD(head_addr, 0);
+	CVMX_BOOTMEM_DESC_SET_FIELD(major_version, CVMX_BOOTMEM_DESC_MAJ_VER);
+	CVMX_BOOTMEM_DESC_SET_FIELD(minor_version, CVMX_BOOTMEM_DESC_MIN_VER);
+	CVMX_BOOTMEM_DESC_SET_FIELD(app_data_addr, 0);
+	CVMX_BOOTMEM_DESC_SET_FIELD(app_data_size, 0);
+
+	cvmx_coremask_for_each_node(node, node_mask) {
+		if (node != 0)	/* do not reserve memory on remote nodes */
+			low_reserved_bytes = 0;
+
+		mem_size = (u64)mem_sizes[node] * (1024 * 1024); /* MBytes */
+
+		/*
+		 * Set up global pointer to start of list, exclude low 64k
+		 * for exception vectors, space for global descriptor
+		 */
+
+		node_base = (u64)node << CVMX_NODE_MEM_SHIFT;
+		cur_block_addr = (OCTEON_DDR0_BASE + low_reserved_bytes) |
+			node_base;
+
+		if (mem_size <= OCTEON_DDR0_SIZE) {
+			__cvmx_bootmem_phy_free(cur_block_addr,
+						mem_size - low_reserved_bytes,
+						0);
+			continue;
+		}
+
+		__cvmx_bootmem_phy_free(cur_block_addr,
+					OCTEON_DDR0_SIZE - low_reserved_bytes,
+					0);
+
+		mem_size -= OCTEON_DDR0_SIZE;
+
+		/* Add DDR2 block next if present */
+		if (mem_size > OCTEON_DDR1_SIZE) {
+			__cvmx_bootmem_phy_free(OCTEON_DDR1_BASE |
+						node_base,
+						OCTEON_DDR1_SIZE, 0);
+			__cvmx_bootmem_phy_free(OCTEON_DDR2_BASE |
+						node_base,
+						mem_size - OCTEON_DDR1_SIZE, 0);
+		} else {
+			__cvmx_bootmem_phy_free(OCTEON_DDR1_BASE |
+						node_base,
+						mem_size, 0);
+		}
+	}
+
+	debug("%s: Initialize the named block\n", __func__);
+
+	/* Initialize the named block structure */
+	CVMX_BOOTMEM_DESC_SET_FIELD(named_block_name_len, CVMX_BOOTMEM_NAME_LEN);
+	CVMX_BOOTMEM_DESC_SET_FIELD(named_block_num_blocks,
+				    CVMX_BOOTMEM_NUM_NAMED_BLOCKS);
+	CVMX_BOOTMEM_DESC_SET_FIELD(named_block_array_addr, 0);
+
+	/* Allocate this near the top of the low 256 MBytes of memory */
+	addr = cvmx_bootmem_phy_alloc(CVMX_BOOTMEM_NUM_NAMED_BLOCKS *
+				      sizeof(struct cvmx_bootmem_named_block_desc),
+				      0, 0x10000000, 0,
+				      CVMX_BOOTMEM_FLAG_END_ALLOC);
+	if (addr >= 0)
+		CVMX_BOOTMEM_DESC_SET_FIELD(named_block_array_addr, addr);
+
+	debug("cvmx_bootmem_phy_mem_list_init: named_block_array_addr: 0x%llx)\n",
+	      CAST_ULL(addr));
+
+	if (addr < 0) {
+		debug("FATAL ERROR: unable to allocate memory for bootmem descriptor!\n");
+		return 0;
+	}
+
+	for (i = 0; i < CVMX_BOOTMEM_NUM_NAMED_BLOCKS; i++) {
+		CVMX_BOOTMEM_NAMED_SET_FIELD(addr, base_addr, 0);
+		CVMX_BOOTMEM_NAMED_SET_FIELD(addr, size, 0);
+		addr += sizeof(struct cvmx_bootmem_named_block_desc);
+	}
+
+	// test-only: DEBUG ifdef???
+	cvmx_bootmem_phy_list_print();
+
+	return 1;
+}
+
+int cvmx_bootmem_reserve_memory(u64 start_addr, u64 size,
+				const char *name, u32 flags)
+{
+	u64 addr;
+	int rc = 1;
+	static unsigned int block_num;
+	char block_name[CVMX_BOOTMEM_NAME_LEN];
+
+	debug("%s: start %#llx, size: %#llx, name: %s, flags:%#x)\n",
+	      __func__, CAST_ULL(start_addr), CAST_ULL(size), name, flags);
+
+	if (__cvmx_bootmem_check_version(3))
+		return 0;
+
+	addr = CVMX_BOOTMEM_DESC_GET_FIELD(head_addr);
+	if (!addr)
+		return 0;
+
+	if (!name)
+		name = "__cvmx_bootmem_reserved";
+
+	while (addr && rc) {
+		u64 block_size = cvmx_bootmem_phy_get_size(addr);
+		u64 reserve_size = 0;
+
+		if (addr >= start_addr && addr < start_addr + size) {
+			reserve_size = size - (addr - start_addr);
+			if (block_size < reserve_size)
+				reserve_size = block_size;
+		} else if (start_addr > addr &&
+			   start_addr < (addr + block_size)) {
+			reserve_size = block_size - (start_addr - addr);
+		}
+
+		if (reserve_size) {
+			snprintf(block_name, sizeof(block_name),
+				 "%.32s_%012llx_%u",
+				 name, (unsigned long long)start_addr,
+				 (unsigned int)block_num);
+
+			debug("%s: Reserving 0x%llx bytes at address 0x%llx with name %s\n",
+			      __func__, CAST_ULL(reserve_size),
+			      CAST_ULL(addr), block_name);
+
+			if (cvmx_bootmem_phy_named_block_alloc(reserve_size,
+							       addr, 0, 0,
+							       block_name,
+							       flags) == -1) {
+				debug("%s: Failed to reserve 0x%llx bytes at address 0x%llx\n",
+				      __func__, CAST_ULL(reserve_size),
+				      (unsigned long long)addr);
+				rc = 0;
+				break;
+			}
+
+			debug("%s: Reserved 0x%llx bytes at address 0x%llx with name %s\n",
+			      __func__, CAST_ULL(reserve_size),
+			      CAST_ULL(addr), block_name);
+		}
+
+		addr = cvmx_bootmem_phy_get_next(addr);
+		block_num++;
+	}
+
+	return rc;
+}
+
+void cvmx_bootmem_lock(void)
+{
+	__cvmx_bootmem_lock(0);
+}
+
+void cvmx_bootmem_unlock(void)
+{
+	__cvmx_bootmem_unlock(0);
+}
+
+void *__cvmx_phys_addr_to_ptr(u64 phys, int size)
+{
+	void *tmp;
+
+	if (sizeof(void *) == 8) {
+		tmp = CASTPTR(void, CVMX_ADD_SEG(CVMX_MIPS_SPACE_XKPHYS, phys));
+	} else {
+		u32 phy32 = (u32)(phys & 0x7fffffffULL);
+
+		tmp = CASTPTR(void, CVMX_ADD_SEG32(CVMX_MIPS32_SPACE_KSEG0,
+						   phy32));
+	}
+
+	return tmp;
+}
+
+void *__cvmx_bootmem_internal_get_desc_ptr(void)
+{
+	return cvmx_phys_to_ptr(cvmx_bootmem_desc_addr);
+}
diff --git a/arch/mips/mach-octeon/include/mach/cvmx-bootmem.h b/arch/mips/mach-octeon/include/mach/cvmx-bootmem.h
new file mode 100644
index 0000000000..d60668c9ad
--- /dev/null
+++ b/arch/mips/mach-octeon/include/mach/cvmx-bootmem.h
@@ -0,0 +1,533 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * Copyright (C) 2020 Marvell International Ltd.
+ */
+
+/**
+ * @file
+ * Simple allocate only memory allocator. Used to allocate memory at application
+ * start time.
+ */
+
+#ifndef __CVMX_BOOTMEM_H__
+#define __CVMX_BOOTMEM_H__
+
+/* Must be multiple of 8, changing breaks ABI */
+#define CVMX_BOOTMEM_NAME_LEN		128
+/* Can change without breaking ABI */
+#define CVMX_BOOTMEM_NUM_NAMED_BLOCKS	64
+/* minimum alignment of bootmem alloced blocks */
+#define CVMX_BOOTMEM_ALIGNMENT_SIZE	(16ull)
+
+/* Flags for cvmx_bootmem_phy_mem* functions */
+/* Allocate from end of block instead of beginning */
+#define CVMX_BOOTMEM_FLAG_END_ALLOC	(1 << 0)
+#define CVMX_BOOTMEM_FLAG_NO_LOCKING	(1 << 1) /* Don't do any locking. */
+
+/* Real physical addresses of memory regions */
+#define OCTEON_DDR0_BASE    (0x0ULL)
+#define OCTEON_DDR0_SIZE    (0x010000000ULL)
+#define OCTEON_DDR1_BASE    ((OCTEON_IS_OCTEON2() || OCTEON_IS_OCTEON3()) \
+			     ? 0x20000000ULL : 0x410000000ULL)
+#define OCTEON_DDR1_SIZE    (0x010000000ULL)
+#define OCTEON_DDR2_BASE    ((OCTEON_IS_OCTEON2() || OCTEON_IS_OCTEON3()) \
+			     ? 0x30000000ULL : 0x20000000ULL)
+#define OCTEON_DDR2_SIZE    ((OCTEON_IS_OCTEON2() || OCTEON_IS_OCTEON3()) \
+			     ? 0x7d0000000ULL : 0x3e0000000ULL)
+#define OCTEON_MAX_PHY_MEM_SIZE ((OCTEON_IS_MODEL(OCTEON_CN68XX))	\
+				 ? 128 * 1024 * 1024 * 1024ULL		\
+				 : (OCTEON_IS_OCTEON2())		\
+				 ? 32 * 1024 * 1024 * 1024ull		\
+				 : (OCTEON_IS_OCTEON3())		\
+				 ? 512 * 1024 * 1024 * 1024ULL		\
+				 : 16 * 1024 * 1024 * 1024ULL)
+
+/*
+ * First bytes of each free physical block of memory contain this structure,
+ * which is used to maintain the free memory list.  Since the bootloader is
+ * only 32 bits, there is a union providing 64 and 32 bit versions.  The
+ * application init code converts addresses to 64 bit addresses before the
+ * application starts.
+ */
+struct cvmx_bootmem_block_header {
+	/* Note: these are referenced from assembly routines in the bootloader,
+	 * so this structure should not be changed without changing those
+	 * routines as well.
+	 */
+	u64 next_block_addr;
+	u64 size;
+
+};
+
+/*
+ * Structure for named memory blocks
+ * Number of descriptors
+ * available can be changed without affecting compatibility,
+ * but name length changes require a bump in the bootmem
+ * descriptor version
+ * Note: This structure must be naturally 64 bit aligned, as a single
+ * memory image will be used by both 32 and 64 bit programs.
+ */
+struct cvmx_bootmem_named_block_desc {
+	u64 base_addr;	/* Base address of named block */
+	/*
+	 * Size actually allocated for named block (may differ from requested)
+	 */
+	u64 size;
+	char name[CVMX_BOOTMEM_NAME_LEN]; /* name of named block */
+};
+
+/* Current descriptor versions */
+/* CVMX bootmem descriptor major version */
+#define CVMX_BOOTMEM_DESC_MAJ_VER	3
+/* CVMX bootmem descriptor minor version */
+#define CVMX_BOOTMEM_DESC_MIN_VER	0
+
+/*
+ * First three members of cvmx_bootmem_desc_t are left in original
+ * positions for backwards compatibility.
+ */
+struct cvmx_bootmem_desc {
+	/* Linux compatible proxy for __BIG_ENDIAN */
+	u32 lock;	/* spinlock to control access to list */
+	u32 flags;	/* flags for indicating various conditions */
+	u64 head_addr;
+
+	/* incremented changed when incompatible changes made */
+	u32 major_version;
+	/*
+	 * incremented changed when compatible changes made, reset to
+	 * zero when major incremented
+	 */
+	u32 minor_version;
+	u64 app_data_addr;
+	u64 app_data_size;
+
+	/* number of elements in named blocks array */
+	u32 named_block_num_blocks;
+	/* length of name array in bootmem blocks */
+	u32 named_block_name_len;
+	/* address of named memory block descriptors */
+	u64 named_block_array_addr;
+};
+
+/**
+ * Initialize the boot alloc memory structures. This is
+ * normally called inside of cvmx_user_app_init()
+ *
+ * @param mem_desc_addr	Address of the free memory list
+ * @return
+ */
+int cvmx_bootmem_init(u64 mem_desc_addr);
+
+/**
+ * Allocate a block of memory from the free list that was passed
+ * to the application by the bootloader.
+ * This is an allocate-only algorithm, so freeing memory is not possible.
+ *
+ * @param size      Size in bytes of block to allocate
+ * @param alignment Alignment required - must be power of 2
+ *
+ * @return pointer to block of memory, NULL on error
+ */
+void *cvmx_bootmem_alloc(u64 size, u64 alignment);
+
+/**
+ * Allocate a block of memory from the free list that was passed
+ * to the application by the bootloader from a specific node.
+ * This is an allocate-only algorithm, so freeing memory is not possible.
+ *
+ * @param node	The node to allocate memory from
+ * @param size  Size in bytes of block to allocate
+ * @param alignment Alignment required - must be power of 2
+ *
+ * @return pointer to block of memory, NULL on error
+ */
+void *cvmx_bootmem_alloc_node(u64 node, u64 size, u64 alignment);
+
+/**
+ * Allocate a block of memory from the free list that was
+ * passed to the application by the bootloader at a specific
+ * address. This is an allocate-only algorithm, so
+ * freeing memory is not possible. Allocation will fail if
+ * memory cannot be allocated at the specified address.
+ *
+ * @param size      Size in bytes of block to allocate
+ * @param address   Physical address to allocate memory at.  If this
+ *                  memory is not available, the allocation fails.
+ * @param alignment Alignment required - must be power of 2
+ * @return pointer to block of memory, NULL on error
+ */
+void *cvmx_bootmem_alloc_address(u64 size, u64 address,
+				 u64 alignment);
+
+/**
+ * Allocate a block of memory from the free list that was
+ * passed to the application by the bootloader within a specified
+ * address range. This is an allocate-only algorithm, so
+ * freeing memory is not possible. Allocation will fail if
+ * memory cannot be allocated in the requested range.
+ *
+ * @param size      Size in bytes of block to allocate
+ * @param min_addr  defines the minimum address of the range
+ * @param max_addr  defines the maximum address of the range
+ * @param alignment Alignment required - must be power of 2
+ * @return pointer to block of memory, NULL on error
+ */
+void *cvmx_bootmem_alloc_range(u64 size, u64 alignment,
+			       u64 min_addr, u64 max_addr);
+
+/**
+ * Allocate a block of memory from the free list that was passed
+ * to the application by the bootloader, and assign it a name in the
+ * global named block table.  (part of the cvmx_bootmem_descriptor_t structure)
+ * Named blocks can later be freed.
+ *
+ * @param size  Size in bytes of block to allocate
+ * @param alignment Alignment required - must be power of 2
+ * @param name  name of block - must be less than CVMX_BOOTMEM_NAME_LEN bytes
+ *
+ * @return pointer to block of memory, NULL on error
+ */
+void *cvmx_bootmem_alloc_named(u64 size, u64 alignment,
+			       const char *name);
+
+/**
+ * Allocate a block of memory from the free list that was passed
+ * to the application by the bootloader, and assign it a name in the
+ * global named block table.  (part of the cvmx_bootmem_descriptor_t structure)
+ * Named blocks can later be freed.
+ *
+ * @param size Size in bytes of block to allocate
+ * @param alignment Alignment required - must be power of 2
+ * @param name name of block - must be less than CVMX_BOOTMEM_NAME_LEN bytes
+ * @param flags     Flags to control options for the allocation.
+ *
+ * @return pointer to block of memory, NULL on error
+ */
+void *cvmx_bootmem_alloc_named_flags(u64 size, u64 alignment,
+				     const char *name, u32 flags);
+
+/**
+ * Allocate a block of memory from the free list that was passed
+ * to the application by the bootloader, and assign it a name in the
+ * global named block table.  (part of the cvmx_bootmem_descriptor_t structure)
+ * Named blocks can later be freed.
+ *
+ * @param size    Size in bytes of block to allocate
+ * @param address Physical address to allocate memory at.  If this
+ *                memory is not available, the allocation fails.
+ * @param name    name of block - must be less than CVMX_BOOTMEM_NAME_LEN bytes
+ *
+ * @return pointer to block of memory, NULL on error
+ */
+void *cvmx_bootmem_alloc_named_address(u64 size, u64 address,
+				       const char *name);
+
+/**
+ * Allocate a block of memory from a specific range of the free list
+ * that was passed to the application by the bootloader, and assign it
+ * a name in the global named block table.  (part of the
+ * cvmx_bootmem_descriptor_t structure) Named blocks can later be
+ * freed.  If request cannot be satisfied within the address range
+ * specified, NULL is returned
+ *
+ * @param size      Size in bytes of block to allocate
+ * @param min_addr  minimum address of range
+ * @param max_addr  maximum address of range
+ * @param align  Alignment of memory to be allocated. (must be a power of 2)
+ * @param name   name of block - must be less than CVMX_BOOTMEM_NAME_LEN bytes
+ *
+ * @return pointer to block of memory, NULL on error
+ */
+void *cvmx_bootmem_alloc_named_range(u64 size, u64 min_addr,
+				     u64 max_addr, u64 align,
+				     const char *name);
+
+/**
+ * Allocate if needed a block of memory from a specific range of the
+ * free list that was passed to the application by the bootloader, and
+ * assign it a name in the global named block table.  (part of the
+ * cvmx_bootmem_descriptor_t structure) Named blocks can later be
+ * freed.  If the requested name block is already allocated, return
+ * the pointer to block of memory.  If request cannot be satisfied
+ * within the address range specified, NULL is returned
+ *
+ * @param size   Size in bytes of block to allocate
+ * @param min_addr  minimum address of range
+ * @param max_addr  maximum address of range
+ * @param align  Alignment of memory to be allocated. (must be a power of 2)
+ * @param name   name of block - must be less than CVMX_BOOTMEM_NAME_LEN bytes
+ * @param init   Initialization function
+ *
+ * The initialization function is optional, if omitted the named block
+ * is initialized to all zeros when it is created, i.e. once.
+ *
+ * @return pointer to block of memory, NULL on error
+ */
+void *cvmx_bootmem_alloc_named_range_once(u64 size,
+					  u64 min_addr,
+					  u64 max_addr,
+					  u64 align,
+					  const char *name,
+					  void (*init)(void *));
+
+/**
+ * Allocate all free memory starting at the start address.  This is used to
+ * prevent any free blocks from later being allocated within the reserved space.
+ * Note that any memory allocated with this function cannot be later freed.
+ *
+ * @param start_addr  Starting address to reserve
+ * @param size        Size in bytes to reserve starting at start_addr
+ * @param name        Name to assign to reserved blocks
+ * @param flags       Flags to use when reserving memory
+ *
+ * @return 0 on failure,
+ *         !0 on success
+ */
+int cvmx_bootmem_reserve_memory(u64 start_addr, u64 size,
+				const char *name, u32 flags);
+
+/**
+ * Frees a previously allocated named bootmem block.
+ *
+ * @param name   name of block to free
+ *
+ * @return 0 on failure,
+ *         !0 on success
+ */
+int cvmx_bootmem_free_named(const char *name);
+
+/**
+ * Finds a named bootmem block by name.
+ *
+ * @param name   name of block to free
+ *
+ * @return pointer to named block descriptor on success
+ *         0 on failure
+ */
+const struct cvmx_bootmem_named_block_desc *
+cvmx_bootmem_find_named_block(const char *name);
+
+/**
+ * Returns the size of available memory in bytes, only
+ * counting blocks that are at least as big as the minimum block
+ * size.
+ *
+ * @param min_block_size
+ *               Minimum block size to count in total.
+ *
+ * @return Number of bytes available for allocation that meet the
+ * block size requirement
+ */
+u64 cvmx_bootmem_available_mem(u64 min_block_size);
+
+/**
+ * Prints out the list of named blocks that have been allocated
+ * along with their addresses and sizes.
+ * This is primarily used for debugging purposes
+ */
+void cvmx_bootmem_print_named(void);
+
+/**
+ * Allocates a block of physical memory from the free list, at
+ * (optional) requested address and alignment.
+ *
+ * @param req_size size of region to allocate.  All requests are
+ * rounded up to be a multiple CVMX_BOOTMEM_ALIGNMENT_SIZE bytes size
+ *
+ * @param address_min Minimum address that block can occupy.
+ *
+ * @param address_max Specifies the maximum address_min (inclusive)
+ * that the allocation can use.
+ *
+ * @param alignment Requested alignment of the block.  If this
+ *                  alignment cannot be met, the allocation fails.
+ *                  This must be a power of 2.  (Note: Alignment of
+ *                  CVMX_BOOTMEM_ALIGNMENT_SIZE bytes is required, and
+ *                  internally enforced.  Requested alignments of less
+ *                  than CVMX_BOOTMEM_ALIGNMENT_SIZE are set to
+ *                  CVMX_BOOTMEM_ALIGNMENT_SIZE.)
+ * @param flags     Flags to control options for the allocation.
+ *
+ * @return physical address of block allocated, or -1 on failure
+ */
+s64 cvmx_bootmem_phy_alloc(u64 req_size, u64 address_min, u64 address_max,
+			   u64 alignment, u32 flags);
+
+/**
+ * Allocates a named block of physical memory from the free list, at
+ * (optional) requested address and alignment.
+ *
+ * @param size size of region to allocate.  All requests are rounded
+ * up to be a multiple CVMX_BOOTMEM_ALIGNMENT_SIZE bytes size
+ *
+ * @param min_addr  Minimum address that block can occupy.
+ *
+ * @param max_addr Specifies the maximum address_min (inclusive) that
+ * the allocation can use.
+ *
+ * @param alignment Requested alignment of the block.  If this
+ *                  alignment cannot be met, the allocation fails.
+ *                  This must be a power of 2.  (Note: Alignment of
+ *                  CVMX_BOOTMEM_ALIGNMENT_SIZE bytes is required, and
+ *                  internally enforced.  Requested alignments of less
+ *                  than CVMX_BOOTMEM_ALIGNMENT_SIZE are set to
+ *                  CVMX_BOOTMEM_ALIGNMENT_SIZE.)
+ *
+ * @param name      name to assign to named block
+ *
+ * @param flags     Flags to control options for the allocation.
+ *
+ * @return physical address of block allocated, or -1 on failure
+ */
+s64 cvmx_bootmem_phy_named_block_alloc(u64 size, u64 min_addr, u64 max_addr,
+				       u64 alignment, const char *name,
+				       u32 flags);
+
+/**
+ * Finds a named memory block by name.
+ * Also used for finding an unused entry in the named block table.
+ *
+ * @param name Name of memory block to find.  If NULL pointer given,
+ *             then finds unused descriptor, if available.
+ *
+ * @param flags  Flags to control options for the allocation.
+ *
+ * @return Physical address of the memory block descriptor, zero if not
+ *         found. If zero returned when name parameter is NULL, then no
+ *         memory block descriptors are available.
+ */
+u64 cvmx_bootmem_phy_named_block_find(const char *name, u32 flags);
+
+/**
+ * Returns the size of available memory in bytes, only
+ * counting blocks that are at least as big as the minimum block
+ * size.
+ *
+ * @param min_block_size
+ *               Minimum block size to count in total.
+ *
+ * @return Number of bytes available for allocation that meet the
+ * block size requirement
+ */
+u64 cvmx_bootmem_phy_available_mem(u64 min_block_size);
+
+/**
+ * Frees a named block.
+ *
+ * @param name   name of block to free
+ * @param flags  flags for passing options
+ *
+ * @return 0 on failure
+ *         1 on success
+ */
+int cvmx_bootmem_phy_named_block_free(const char *name, u32 flags);
+
+/**
+ * Frees a block to the bootmem allocator list.  This must
+ * be used with care, as the size provided must match the size
+ * of the block that was allocated, or the list will become
+ * corrupted.
+ *
+ * IMPORTANT:  This is only intended to be used as part of named block
+ * frees and initial population of the free memory list.
+ *                                                      *
+ *
+ * @param phy_addr physical address of block
+ * @param size     size of block in bytes.
+ * @param flags    flags for passing options
+ *
+ * @return 1 on success,
+ *         0 on failure
+ */
+int __cvmx_bootmem_phy_free(u64 phy_addr, u64 size, u32 flags);
+
+/**
+ * Prints the list of currently allocated named blocks
+ *
+ */
+void cvmx_bootmem_phy_named_block_print(void);
+
+/**
+ * Prints the list of available memory.
+ *
+ */
+void cvmx_bootmem_phy_list_print(void);
+
+/**
+ * This function initializes the free memory list used by cvmx_bootmem.
+ * This must be called before any allocations can be done.
+ *
+ * @param mem_size Total memory available, in bytes
+ *
+ * @param low_reserved_bytes Number of bytes to reserve (leave out of
+ * free list) at address 0x0.
+ *
+ * @param desc_buffer Buffer for the bootmem descriptor.  This must be
+ *                 a 32 bit addressable address.
+ *
+ * @return 1 on success
+ *         0 on failure
+ */
+s64 cvmx_bootmem_phy_mem_list_init(u64 mem_size, u32 low_reserved_bytes,
+				   struct cvmx_bootmem_desc *desc_buffer);
+
+/**
+ * This function initializes the free memory list used by cvmx_bootmem.
+ * This must be called before any allocations can be done.
+ *
+ * @param nodemask Nodemask - one bit per node (bit0->node0, bit1->node1,...)
+ *
+ * @param mem_size[] Array of memory sizes in MBytes per node ([0]->node0,...)
+ *
+ * @param low_reserved_bytes Number of bytes to reserve (leave out of
+ * free list) at address 0x0.
+ *
+ * @param desc_buffer Buffer for the bootmem descriptor.  This must be
+ *                 a 32 bit addressable address.
+ *
+ * @return 1 on success
+ *         0 on failure
+ */
+s64 cvmx_bootmem_phy_mem_list_init_multi(u8 nodemask, u32 mem_size[],
+					 u32 low_reserved_bytes,
+					 struct cvmx_bootmem_desc *desc_buffer);
+/**
+ * Locks the bootmem allocator.  This is useful in certain situations
+ * where multiple allocations must be made without being interrupted.
+ * This should be used with the CVMX_BOOTMEM_FLAG_NO_LOCKING flag.
+ *
+ */
+void cvmx_bootmem_lock(void);
+
+/**
+ * Unlocks the bootmem allocator.  This is useful in certain situations
+ * where multiple allocations must be made without being interrupted.
+ * This should be used with the CVMX_BOOTMEM_FLAG_NO_LOCKING flag.
+ *
+ */
+void cvmx_bootmem_unlock(void);
+
+/**
+ * Internal use function to get the current descriptor pointer
+ */
+void *__cvmx_bootmem_internal_get_desc_ptr(void);
+
+/**
+ * Internal use.  This is userd to get a pointer to a physical
+ * address.  For linux n32 the physical address in mmaped to a virtual
+ * address and the virtual address is returned.  For n64 the address
+ * is converted to an xkphys address and the xkhpys address is
+ * returned.
+ */
+void *__cvmx_phys_addr_to_ptr(u64 phys, int size);
+const struct cvmx_bootmem_named_block_desc *
+__cvmx_bootmem_find_named_block_flags(const char *name, u32 flags);
+void *cvmx_bootmem_alloc_named_range_flags(u64 size, u64 min_addr,
+					   u64 max_addr, u64 align,
+					   const char *name, u32 flags);
+u64 cvmx_bootmem_phy_alloc_range(u64 size, u64 alignment,
+				 u64 min_addr, u64 max_addr);
+
+#endif /*   __CVMX_BOOTMEM_H__ */

From e602dd5238454c4b6c5b01dc83fbfd802de52095 Mon Sep 17 00:00:00 2001
From: Aaron Williams <awilliams@marvell.com>
Date: Thu, 20 Aug 2020 07:22:03 +0200
Subject: [PATCH 26/27] mips: octeon: Add bootoctlinux command

Octeon needs a platform specific cmd to boot the Linux kernel, as
specific parameters need to be passed and special handling for the
multiple cores (SMP) is needed.

Co-developed-by: Stefan Roese <sr@denx.de>
Signed-off-by: Aaron Williams <awilliams@marvell.com>
Signed-off-by: Stefan Roese <sr@denx.de>
[use gd->ram_base instead of gd->bd->bi_memstart]
Signed-off-by: Daniel Schwierzeck <daniel.schwierzeck@gmail.com>
---
 arch/mips/mach-octeon/Makefile                |   1 +
 arch/mips/mach-octeon/bootoctlinux.c          | 661 ++++++++++++++++++
 .../mach-octeon/include/mach/bootoct_cmd.h    |  54 ++
 3 files changed, 716 insertions(+)
 create mode 100644 arch/mips/mach-octeon/bootoctlinux.c
 create mode 100644 arch/mips/mach-octeon/include/mach/bootoct_cmd.h

diff --git a/arch/mips/mach-octeon/Makefile b/arch/mips/mach-octeon/Makefile
index e96f0deb1b..3486aa9d8b 100644
--- a/arch/mips/mach-octeon/Makefile
+++ b/arch/mips/mach-octeon/Makefile
@@ -10,3 +10,4 @@ obj-y += cpu.o
 obj-y += dram.o
 obj-y += cvmx-coremask.o
 obj-y += cvmx-bootmem.o
+obj-y += bootoctlinux.o
diff --git a/arch/mips/mach-octeon/bootoctlinux.c b/arch/mips/mach-octeon/bootoctlinux.c
new file mode 100644
index 0000000000..75d7e83bd7
--- /dev/null
+++ b/arch/mips/mach-octeon/bootoctlinux.c
@@ -0,0 +1,661 @@
+// SPDX-License-Identifier: GPL-2.0+
+/*
+ * Copyright (C) 2020 Stefan Roese <sr@denx.de>
+ */
+
+#include <command.h>
+#include <config.h>
+#include <cpu_func.h>
+#include <dm.h>
+#include <elf.h>
+#include <env.h>
+#include <ram.h>
+
+#include <asm/io.h>
+#include <linux/compat.h>
+#include <linux/ctype.h>
+#include <linux/delay.h>
+#include <linux/io.h>
+
+#include <mach/cvmx-coremask.h>
+#include <mach/cvmx-bootinfo.h>
+#include <mach/cvmx-bootmem.h>
+#include <mach/cvmx-regs.h>
+#include <mach/cvmx-fuse.h>
+#include <mach/octeon-model.h>
+#include <mach/octeon-feature.h>
+#include <mach/bootoct_cmd.h>
+
+DECLARE_GLOBAL_DATA_PTR;
+
+/* ToDo: Revisit these settings */
+#define OCTEON_RESERVED_LOW_MEM_SIZE		(512 * 1024)
+#define OCTEON_RESERVED_LOW_BOOT_MEM_SIZE	(1024 * 1024)
+#define BOOTLOADER_BOOTMEM_DESC_SPACE		(1024 * 1024)
+
+/* Default stack and heap sizes, in bytes */
+#define DEFAULT_STACK_SIZE			(1 * 1024 * 1024)
+#define DEFAULT_HEAP_SIZE			(3 * 1024 * 1024)
+
+/**
+ * NOTE: This must duplicate octeon_boot_descriptor_t in the toolchain
+ * octeon-app-init.h file.
+ */
+enum {
+	/* If set, core should do app-wide init, only one core per app will have
+	 * this flag set.
+	 */
+	BOOT_FLAG_INIT_CORE     = 1,
+	OCTEON_BL_FLAG_DEBUG    = 1 << 1,
+	OCTEON_BL_FLAG_NO_MAGIC = 1 << 2,
+	/* If set, use uart1 for console */
+	OCTEON_BL_FLAG_CONSOLE_UART1 = 1 << 3,
+	OCTEON_BL_FLAG_CONSOLE_PCI = 1 << 4,	/* If set, use PCI console */
+	/* Call exit on break on serial port */
+	OCTEON_BL_FLAG_BREAK    = 1 << 5,
+	/*
+	 * Be sure to update OCTEON_APP_INIT_H_VERSION when new fields are added
+	 * and to conditionalize the new flag's usage based on the version.
+	 */
+} octeon_boot_descriptor_flag;
+
+/**
+ * NOTE: This must duplicate octeon_boot_descriptor_t in the toolchain
+ * octeon-app-init.h file.
+ */
+#ifndef OCTEON_CURRENT_DESC_VERSION
+# define OCTEON_CURRENT_DESC_VERSION	7
+#endif
+/**
+ * NOTE: This must duplicate octeon_boot_descriptor_t in the toolchain
+ * octeon-app-init.h file.
+ */
+/* Version 7 changes: Change names of deprecated fields */
+#ifndef OCTEON_ARGV_MAX_ARGS
+# define OCTEON_ARGV_MAX_ARGS		64
+#endif
+
+/**
+ * NOTE: This must duplicate octeon_boot_descriptor_t in the toolchain
+ * octeon-app-init.h file.
+ */
+#ifndef OCTEON_SERIAL_LEN
+# define OCTEON_SERIAL_LEN		20
+#endif
+
+/**
+ * Bootloader structure used to pass info to Octeon executive startup code.
+ * NOTE: all fields are deprecated except for:
+ *  * desc_version
+ *  * desc_size,
+ *  * heap_base
+ *  * heap_end
+ *  * eclock_hz
+ *  * flags
+ *  * argc
+ *  * argv
+ *  * cvmx_desc_vaddr
+ *  * debugger_flags_base_addr
+ *
+ *  All other fields have been moved to the cvmx_descriptor, and the new
+ *  fields should be added there. They are left as placeholders in this
+ *  structure for binary compatibility.
+ *
+ * NOTE: This structure must match what is in the toolchain octeon-app-init.h
+ * file.
+ */
+struct octeon_boot_descriptor {
+	/* Start of block referenced by assembly code - do not change! */
+	u32 desc_version;
+	u32 desc_size;
+	u64 stack_top;
+	u64 heap_base;
+	u64 heap_end;
+	u64 deprecated17;
+	u64 deprecated16;
+	/* End of block referenced by assembly code - do not change! */
+	u32 deprecated18;
+	u32 deprecated15;
+	u32 deprecated14;
+	u32 argc;  /* argc for main() */
+	u32 argv[OCTEON_ARGV_MAX_ARGS];  /* argv for main() */
+	u32 flags;   /* Flags for application */
+	u32 core_mask;   /* Coremask running this image */
+	u32 dram_size;  /* DEPRECATED, DRAM size in megabyes. Used up to SDK 1.8.1 */
+	u32 phy_mem_desc_addr;
+	u32 debugger_flags_base_addr;  /* used to pass flags from app to debugger. */
+	u32 eclock_hz;  /* CPU clock speed, in hz. */
+	u32 deprecated10;
+	u32 deprecated9;
+	u16 deprecated8;
+	u8  deprecated7;
+	u8  deprecated6;
+	u16 deprecated5;
+	u8  deprecated4;
+	u8  deprecated3;
+	char deprecated2[OCTEON_SERIAL_LEN];
+	u8  deprecated1[6];
+	u8  deprecated0;
+	u64 cvmx_desc_vaddr;  /* Address of cvmx descriptor */
+};
+
+static struct octeon_boot_descriptor boot_desc[CVMX_MIPS_MAX_CORES];
+static struct cvmx_bootinfo cvmx_bootinfo_array[CVMX_MIPS_MAX_CORES];
+
+/**
+ * Programs the boot bus moveable region
+ * @param	base	base address to place the boot bus moveable region
+ *			(bits [31:7])
+ * @param	region_num	Selects which region, 0 or 1 for node 0,
+ *				2 or 3 for node 1
+ * @param	enable		Set true to enable, false to disable
+ * @param	data		Pointer to data to put in the region, up to
+ *				16 dwords.
+ * @param	num_words	Number of data dwords (up to 32)
+ *
+ * @return	0 for success, -1 on error
+ */
+static int octeon_set_moveable_region(u32 base, int region_num,
+				      bool enable, const u64 *data,
+				      unsigned int num_words)
+{
+	int node = region_num >> 1;
+	u64 val;
+	int i;
+	u8 node_mask = 0x01;	/* ToDo: Currently only one node is supported */
+
+	debug("%s(0x%x, %d, %d, %p, %u)\n", __func__, base, region_num, enable,
+	      data, num_words);
+
+	if (num_words > 32) {
+		printf("%s: Too many words (%d) for region %d\n", __func__,
+		       num_words, region_num);
+		return -1;
+	}
+
+	if (base & 0x7f) {
+		printf("%s: Error: base address 0x%x must be 128 byte aligned\n",
+		       __func__, base);
+		return -1;
+	}
+
+	if (region_num > (node_mask > 1 ? 3 : 1)) {
+		printf("%s: Region number %d out of range\n",
+		       __func__, region_num);
+		return -1;
+	}
+
+	if (!data && num_words > 0) {
+		printf("%s: Error: NULL data\n", __func__);
+		return -1;
+	}
+
+	region_num &= 1;
+
+	val = MIO_BOOT_LOC_CFG_EN |
+		FIELD_PREP(MIO_BOOT_LOC_CFG_BASE, base >> 7);
+	debug("%s: Setting MIO_BOOT_LOC_CFG(%d) on node %d to 0x%llx\n",
+	      __func__, region_num, node, val);
+	csr_wr(CVMX_MIO_BOOT_LOC_CFGX(region_num & 1), val);
+
+	val = FIELD_PREP(MIO_BOOT_LOC_ADR_ADR, (region_num ? 0x80 : 0x00) >> 3);
+	debug("%s: Setting MIO_BOOT_LOC_ADR start to 0x%llx\n", __func__, val);
+	csr_wr(CVMX_MIO_BOOT_LOC_ADR, val);
+
+	for (i = 0; i < num_words; i++) {
+		debug("  0x%02llx: 0x%016llx\n",
+		      csr_rd(CVMX_MIO_BOOT_LOC_ADR), data[i]);
+		csr_wr(CVMX_MIO_BOOT_LOC_DAT, data[i]);
+	}
+
+	return 0;
+}
+
+/**
+ * Parse comma separated numbers into an array
+ *
+ * @param[out] values values read for each node
+ * @param[in] str string to parse
+ * @param base 0 for auto, otherwise 8, 10 or 16 for the number base
+ *
+ * @return number of values read.
+ */
+static int octeon_parse_nodes(u64 values[CVMX_MAX_NODES],
+			      const char *str, int base)
+{
+	int node = 0;
+	char *sep;
+
+	do {
+		debug("Parsing node %d: \"%s\"\n", node, str);
+		values[node] = simple_strtoull(str, &sep, base);
+		debug("  node %d: 0x%llx\n", node, values[node]);
+		str = sep + 1;
+	} while (++node < CVMX_MAX_NODES && *sep == ',');
+
+	debug("%s: returning %d\n", __func__, node);
+	return node;
+}
+
+/**
+ * Parse command line arguments
+ *
+ * @param argc			number of arguments
+ * @param[in] argv		array of argument strings
+ * @param cmd			command type
+ * @param[out] boot_args	parsed values
+ *
+ * @return number of arguments parsed
+ */
+int octeon_parse_bootopts(int argc, char *const argv[],
+			  enum octeon_boot_cmd_type cmd,
+			  struct octeon_boot_args *boot_args)
+{
+	u64 node_values[CVMX_MAX_NODES];
+	int arg, j;
+	int num_values;
+	int node;
+	u8 node_mask = 0x01;	/* ToDo: Currently only one node is supported */
+
+	debug("%s(%d, %p, %d, %p)\n", __func__, argc, argv, cmd, boot_args);
+	memset(boot_args, 0, sizeof(*boot_args));
+	boot_args->stack_size = DEFAULT_STACK_SIZE;
+	boot_args->heap_size = DEFAULT_HEAP_SIZE;
+	boot_args->node_mask = 0;
+
+	for (arg = 0; arg < argc; arg++) {
+		debug("  argv[%d]: %s\n", arg, argv[arg]);
+		if (cmd == BOOTOCT && !strncmp(argv[arg], "stack=", 6)) {
+			boot_args->stack_size = simple_strtoul(argv[arg] + 6,
+							       NULL, 0);
+		} else if (cmd == BOOTOCT && !strncmp(argv[arg], "heap=", 5)) {
+			boot_args->heap_size = simple_strtoul(argv[arg] + 5,
+							      NULL, 0);
+		} else if (!strncmp(argv[arg], "debug", 5)) {
+			puts("setting debug flag!\n");
+			boot_args->boot_flags |= OCTEON_BL_FLAG_DEBUG;
+		} else if (cmd == BOOTOCT && !strncmp(argv[arg], "break", 5)) {
+			puts("setting break flag!\n");
+			boot_args->boot_flags |= OCTEON_BL_FLAG_BREAK;
+		} else if (!strncmp(argv[arg], "forceboot", 9)) {
+			boot_args->forceboot = true;
+		} else if (!strncmp(argv[arg], "nodemask=", 9)) {
+			boot_args->node_mask = simple_strtoul(argv[arg] + 9,
+							      NULL, 16);
+		} else if (!strncmp(argv[arg], "numcores=", 9)) {
+			memset(node_values, 0, sizeof(node_values));
+			num_values = octeon_parse_nodes(node_values,
+							argv[arg] + 9, 0);
+			for (j = 0; j < num_values; j++)
+				boot_args->num_cores[j] = node_values[j];
+			boot_args->num_cores_set = true;
+		} else if (!strncmp(argv[arg], "skipcores=", 10)) {
+			memset(node_values, 0, sizeof(node_values));
+			num_values = octeon_parse_nodes(node_values,
+							argv[arg] + 10, 0);
+			for (j = 0; j < num_values; j++)
+				boot_args->num_skipped[j] = node_values[j];
+			boot_args->num_skipped_set = true;
+		} else if (!strncmp(argv[arg], "console_uart=", 13)) {
+			boot_args->console_uart = simple_strtoul(argv[arg] + 13,
+								 NULL, 0);
+			if (boot_args->console_uart == 1) {
+				boot_args->boot_flags |=
+					OCTEON_BL_FLAG_CONSOLE_UART1;
+			} else if (!boot_args->console_uart) {
+				boot_args->boot_flags &=
+					~OCTEON_BL_FLAG_CONSOLE_UART1;
+			}
+		} else if (!strncmp(argv[arg], "coremask=", 9)) {
+			memset(node_values, 0, sizeof(node_values));
+			num_values = octeon_parse_nodes(node_values,
+							argv[arg] + 9, 16);
+			for (j = 0; j < num_values; j++)
+				cvmx_coremask_set64_node(&boot_args->coremask,
+							 j, node_values[j]);
+			boot_args->coremask_set = true;
+		} else if (cmd == BOOTOCTLINUX &&
+			   !strncmp(argv[arg], "namedblock=", 11)) {
+			boot_args->named_block = argv[arg] + 11;
+		} else if (!strncmp(argv[arg], "endbootargs", 11)) {
+			boot_args->endbootargs = 1;
+			arg++;
+			if (argc >= arg && cmd != BOOTOCTLINUX)
+				boot_args->app_name = argv[arg];
+			break;
+		} else {
+			debug(" Unknown argument \"%s\"\n", argv[arg]);
+		}
+	}
+
+	if (boot_args->coremask_set && boot_args->num_cores_set) {
+		puts("Warning: both coremask and numcores are set, using coremask.\n");
+	} else if (!boot_args->coremask_set && !boot_args->num_cores_set) {
+		cvmx_coremask_set_core(&boot_args->coremask, 0);
+		boot_args->coremask_set = true;
+	} else if ((!boot_args->coremask_set) && boot_args->num_cores_set) {
+		cvmx_coremask_for_each_node(node, node_mask)
+			cvmx_coremask_set64_node(&boot_args->coremask, node,
+				((1ull << boot_args->num_cores[node]) - 1) <<
+					boot_args->num_skipped[node]);
+		boot_args->coremask_set = true;
+	}
+
+	/* Update the node mask based on the coremask or the number of cores */
+	for (j = 0; j < CVMX_MAX_NODES; j++) {
+		if (cvmx_coremask_get64_node(&boot_args->coremask, j))
+			boot_args->node_mask |= 1 << j;
+	}
+
+	debug("%s: return %d\n", __func__, arg);
+	return arg;
+}
+
+int do_bootoctlinux(struct cmd_tbl *cmdtp, int flag, int argc,
+		    char *const argv[])
+{
+	typedef void __noreturn (*kernel_entry_t)(int, ulong, ulong, ulong);
+	kernel_entry_t kernel;
+	struct octeon_boot_args boot_args;
+	int arg_start = 1;
+	int arg_count;
+	u64 addr = 0;		/* Address of the ELF image     */
+	int arg0;
+	u64 arg1;
+	u64 arg2;
+	u64 arg3;
+	int ret;
+	struct cvmx_coremask core_mask;
+	struct cvmx_coremask coremask_to_run;
+	struct cvmx_coremask avail_coremask;
+	int first_core;
+	int core;
+	struct ram_info ram;
+	struct udevice *dev;
+	const u64 *nmi_code;
+	int num_dwords;
+	u8 node_mask = 0x01;
+	int i;
+
+	cvmx_coremask_clear_all(&core_mask);
+	cvmx_coremask_clear_all(&coremask_to_run);
+
+	if (argc >= 2 && (isxdigit(argv[1][0]) && (isxdigit(argv[1][1]) ||
+						   argv[1][1] == 'x' ||
+						   argv[1][1] == 'X' ||
+						   argv[1][1] == '\0'))) {
+		addr = simple_strtoul(argv[1], NULL, 16);
+		if (!addr)
+			addr = CONFIG_SYS_LOAD_ADDR;
+		arg_start++;
+	}
+	if (addr == 0)
+		addr = CONFIG_SYS_LOAD_ADDR;
+
+	debug("%s: arg start: %d\n", __func__, arg_start);
+	arg_count = octeon_parse_bootopts(argc - arg_start, argv + arg_start,
+					  BOOTOCTLINUX, &boot_args);
+
+	debug("%s:\n"
+	      " named block: %s\n"
+	      " node mask: 0x%x\n"
+	      " stack size: 0x%x\n"
+	      " heap size: 0x%x\n"
+	      " boot flags: 0x%x\n"
+	      " force boot: %s\n"
+	      " coremask set: %s\n"
+	      " num cores set: %s\n"
+	      " num skipped set: %s\n"
+	      " endbootargs: %s\n",
+	      __func__,
+	      boot_args.named_block ? boot_args.named_block : "none",
+	      boot_args.node_mask,
+	      boot_args.stack_size,
+	      boot_args.heap_size,
+	      boot_args.boot_flags,
+	      boot_args.forceboot ? "true" : "false",
+	      boot_args.coremask_set ? "true" : "false",
+	      boot_args.num_cores_set ? "true" : "false",
+	      boot_args.num_skipped_set ? "true" : "false",
+	      boot_args.endbootargs ? "true" : "false");
+	debug(" num cores: ");
+	for (i = 0; i < CVMX_MAX_NODES; i++)
+		debug("%s%d", i > 0 ? ", " : "", boot_args.num_cores[i]);
+	debug("\n num skipped: ");
+	for (i = 0; i < CVMX_MAX_NODES; i++) {
+		debug("%s%d", i > 0 ? ", " : "", boot_args.num_skipped[i]);
+		debug("\n coremask:\n");
+		cvmx_coremask_dprint(&boot_args.coremask);
+	}
+
+	if (boot_args.endbootargs) {
+		debug("endbootargs set, adjusting argc from %d to %d, arg_count: %d, arg_start: %d\n",
+		      argc, argc - (arg_count + arg_start), arg_count,
+		      arg_start);
+		argc -= (arg_count + arg_start);
+		argv += (arg_count + arg_start);
+	}
+
+	/*
+	 * numcores specification overrides a coremask on the same command line
+	 */
+	cvmx_coremask_copy(&core_mask, &boot_args.coremask);
+
+	/*
+	 * Remove cores from coremask based on environment variable stored in
+	 * flash
+	 */
+	if (validate_coremask(&core_mask) != 0) {
+		puts("Invalid coremask.\n");
+		return 1;
+	} else if (cvmx_coremask_is_empty(&core_mask)) {
+		puts("Coremask is empty after coremask_override mask.  Nothing to do.\n");
+		return 0;
+	}
+
+	if (cvmx_coremask_intersects(&core_mask, &coremask_to_run)) {
+		puts("ERROR: Can't load code on core twice!  Provided coremask:\n");
+		cvmx_coremask_print(&core_mask);
+		puts("overlaps previously loaded coremask:\n");
+		cvmx_coremask_print(&coremask_to_run);
+		return -1;
+	}
+
+	debug("Setting up boot descriptor block with core mask:\n");
+	cvmx_coremask_dprint(&core_mask);
+
+	/*
+	 * Add coremask to global mask of cores that have been set up and are
+	 * runable
+	 */
+	cvmx_coremask_or(&coremask_to_run, &coremask_to_run, &core_mask);
+
+	/* Get RAM size */
+	ret = uclass_get_device(UCLASS_RAM, 0, &dev);
+	if (ret) {
+		debug("DRAM init failed: %d\n", ret);
+		return ret;
+	}
+
+	ret = ram_get_info(dev, &ram);
+	if (ret) {
+		debug("Cannot get DRAM size: %d\n", ret);
+		return ret;
+	}
+
+	/*
+	 * Load kernel ELF image, or try binary if ELF is not detected.
+	 * This way the much smaller vmlinux.bin can also be started but
+	 * has to be loaded at the correct address (ep as parameter).
+	 */
+	if (!valid_elf_image(addr))
+		printf("Booting binary image instead (vmlinux.bin)...\n");
+	else
+		addr = load_elf_image_shdr(addr);
+
+	/* Set kernel entry point */
+	kernel = (kernel_entry_t)addr;
+
+	/* Init bootmem list for Linux kernel booting */
+	if (!cvmx_bootmem_phy_mem_list_init(
+		    ram.size, OCTEON_RESERVED_LOW_MEM_SIZE,
+		    (void *)CKSEG0ADDR(BOOTLOADER_BOOTMEM_DESC_SPACE))) {
+		printf("FATAL: Error initializing free memory list\n");
+		return 0;
+	}
+
+	first_core = cvmx_coremask_get_first_core(&coremask_to_run);
+
+	cvmx_coremask_for_each_core(core, &coremask_to_run) {
+		debug("%s: Activating core %d\n",  __func__, core);
+
+		cvmx_bootinfo_array[core].core_mask =
+			cvmx_coremask_get32(&coremask_to_run);
+		cvmx_coremask_copy(&cvmx_bootinfo_array[core].ext_core_mask,
+				   &coremask_to_run);
+
+		if (core == first_core)
+			cvmx_bootinfo_array[core].flags |= BOOT_FLAG_INIT_CORE;
+
+		cvmx_bootinfo_array[core].dram_size = ram.size / (1024 * 1024);
+
+		cvmx_bootinfo_array[core].dclock_hz = gd->mem_clk * 1000000;
+		cvmx_bootinfo_array[core].eclock_hz = gd->cpu_clk;
+
+		cvmx_bootinfo_array[core].led_display_base_addr = 0;
+		cvmx_bootinfo_array[core].phy_mem_desc_addr =
+			((u32)(u64)__cvmx_bootmem_internal_get_desc_ptr()) &
+			0x7ffffff;
+
+		cvmx_bootinfo_array[core].major_version = CVMX_BOOTINFO_MAJ_VER;
+		cvmx_bootinfo_array[core].minor_version = CVMX_BOOTINFO_MIN_VER;
+		cvmx_bootinfo_array[core].fdt_addr = virt_to_phys(gd->fdt_blob);
+
+		boot_desc[core].dram_size = gd->ram_size / (1024 * 1024);
+		boot_desc[core].cvmx_desc_vaddr =
+			virt_to_phys(&cvmx_bootinfo_array[core]);
+
+		boot_desc[core].desc_version = OCTEON_CURRENT_DESC_VERSION;
+		boot_desc[core].desc_size = sizeof(boot_desc[0]);
+
+		boot_desc[core].flags = cvmx_bootinfo_array[core].flags;
+		boot_desc[core].eclock_hz = cvmx_bootinfo_array[core].eclock_hz;
+
+		boot_desc[core].argc = argc;
+		for (i = 0; i < argc; i++)
+			boot_desc[core].argv[i] = (u32)virt_to_phys(argv[i]);
+	}
+
+	core = 0;
+	arg0 = argc;
+	arg1 = (u64)argv;
+	arg2 = 0x1;	/* Core 0 sets init core for Linux */
+	arg3 = XKPHYS | virt_to_phys(&boot_desc[core]);
+
+	debug("## Transferring control to Linux (at address %p) ...\n", kernel);
+
+	/*
+	 * Flush cache before jumping to application. Let's flush the
+	 * whole SDRAM area, since we don't know the size of the image
+	 * that was loaded.
+	 */
+	flush_cache(gd->ram_base, gd->ram_top - gd->ram_base);
+
+	/* Take all cores out of reset */
+	csr_wr(CVMX_CIU_PP_RST, 0);
+	sync();
+
+	/* Wait a short while for the other cores... */
+	mdelay(100);
+
+	/* Install boot code into moveable bus for NMI (other cores) */
+	nmi_code = (const u64 *)nmi_bootvector;
+	num_dwords = (((u64)&nmi_handler_para[0] - (u64)nmi_code) + 7) / 8;
+
+	ret = octeon_set_moveable_region(0x1fc00000, 0, true, nmi_code,
+					 num_dwords);
+	if (ret) {
+		printf("Error installing NMI handler for SMP core startup\n");
+		return 0;
+	}
+
+	/* Write NMI handler parameters for Linux kernel booting */
+	nmi_handler_para[0] = (u64)kernel;
+	nmi_handler_para[1] = arg0;
+	nmi_handler_para[2] = arg1;
+	nmi_handler_para[3] = 0; /* Don't set init core for secondary cores */
+	nmi_handler_para[4] = arg3;
+	sync();
+
+	/* Wait a short while for the other cores... */
+	mdelay(100);
+
+	/*
+	 * Cores have already been taken out of reset to conserve power.
+	 * We need to send a NMI to get the cores out of their wait loop
+	 */
+	octeon_get_available_coremask(&avail_coremask);
+	debug("Available coremask:\n");
+	cvmx_coremask_dprint(&avail_coremask);
+	debug("Starting coremask:\n");
+	cvmx_coremask_dprint(&coremask_to_run);
+	debug("Sending NMIs to other cores\n");
+	if (octeon_has_feature(OCTEON_FEATURE_CIU3)) {
+		u64 avail_cm;
+		int node;
+
+		cvmx_coremask_for_each_node(node, node_mask) {
+			avail_cm = cvmx_coremask_get64_node(&avail_coremask,
+							    node);
+
+			if (avail_cm != 0) {
+				debug("Sending NMI  to node %d, coremask=0x%llx, CIU3_NMI=0x%llx\n",
+				      node, avail_cm,
+				      (node > 0 ? -1ull : -2ull) & avail_cm);
+				csr_wr(CVMX_CIU3_NMI,
+				       (node > 0 ? -1ull : -2ull) & avail_cm);
+			}
+		}
+	} else {
+		csr_wr(CVMX_CIU_NMI,
+		       -2ull & cvmx_coremask_get64(&avail_coremask));
+	}
+	debug("Done sending NMIs\n");
+
+	/* Wait a short while for the other cores... */
+	mdelay(100);
+
+	/*
+	 * pass address parameter as argv[0] (aka command name),
+	 * and all remaining args
+	 * a0 = argc
+	 * a1 = argv (32 bit physical addresses, not pointers)
+	 * a2 = init core
+	 * a3 = boot descriptor address
+	 * a4/t0 = entry point (only used by assembly stub)
+	 */
+	kernel(arg0, arg1, arg2, arg3);
+
+	return 0;
+}
+
+U_BOOT_CMD(bootoctlinux, 32, 0, do_bootoctlinux,
+	   "Boot from a linux ELF image in memory",
+	   "elf_address [coremask=mask_to_run | numcores=core_cnt_to_run] "
+	   "[forceboot] [skipcores=core_cnt_to_skip] [namedblock=name] [endbootargs] [app_args ...]\n"
+	   "elf_address - address of ELF image to load. If 0, default load address\n"
+	   "              is  used.\n"
+	   "coremask    - mask of cores to run on.  Anded with coremask_override\n"
+	   "              environment variable to ensure only working cores are used\n"
+	   "numcores    - number of cores to run on.  Runs on specified number of cores,\n"
+	   "              taking into account the coremask_override.\n"
+	   "skipcores   - only meaningful with numcores.  Skips this many cores\n"
+	   "              (starting from 0) when loading the numcores cores.\n"
+	   "              For example, setting skipcores to 1 will skip core 0\n"
+	   "              and load the application starting at the next available core.\n"
+	   "forceboot   - if set, boots application even if core 0 is not in mask\n"
+	   "namedblock	- specifies a named block to load the kernel\n"
+	   "endbootargs - if set, bootloader does not process any further arguments and\n"
+	   "              only passes the arguments that follow to the kernel.\n"
+	   "              If not set, the kernel gets the entire commnad line as\n"
+	   "              arguments.\n" "\n");
diff --git a/arch/mips/mach-octeon/include/mach/bootoct_cmd.h b/arch/mips/mach-octeon/include/mach/bootoct_cmd.h
new file mode 100644
index 0000000000..657698ba54
--- /dev/null
+++ b/arch/mips/mach-octeon/include/mach/bootoct_cmd.h
@@ -0,0 +1,54 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * Copyright (C) 2020 Marvell International Ltd.
+ */
+
+#ifndef __BOOTOCT_CMD_H__
+#define __BOOTOCT_CMD_H__
+
+#include "cvmx-coremask.h"
+
+enum octeon_boot_cmd_type {
+	BOOTOCT,
+	BOOTOCTLINUX,
+	BOOTOCTELF
+};
+
+/** Structure to contain results of command line argument parsing */
+struct octeon_boot_args {
+	struct cvmx_coremask coremask;	/** Parsed coremask */
+	int num_cores[CVMX_MAX_NODES];	/** number of cores */
+	int num_skipped[CVMX_MAX_NODES];/** number of skipped cores */
+	const char *app_name;		/** Application name */
+	const char *named_block;	/** Named block to load Linux into */
+	u32 stack_size;			/** stack size */
+	u32 heap_size;			/** heap size */
+	u32 boot_flags;			/** boot flags */
+	int node_mask;			/** Node mask to use */
+	int console_uart;		/** serial console number */
+	bool forceboot;			/** force booting if core 0 not set */
+	bool coremask_set;		/** set if coremask was set */
+	bool num_cores_set;		/** Set if num_cores was set */
+	bool num_skipped_set;		/** Set if num_skipped was set */
+	/** Set if endbootargs parameter was passed. */
+	bool endbootargs;
+};
+
+/**
+ * Parse command line arguments
+ *
+ * @param argc			number of arguments
+ * @param[in] argv		array of argument strings
+ * @param cmd			command type
+ * @param[out] boot_args	parsed values
+ *
+ * @return number of arguments parsed
+ */
+int octeon_parse_bootopts(int argc, char *const argv[],
+			  enum octeon_boot_cmd_type cmd,
+			  struct octeon_boot_args *boot_args);
+
+void nmi_bootvector(void);
+extern u64 nmi_handler_para[];
+
+#endif /* __BOOTOCT_CMD_H__ */

From 1471560b2c375c6e667acc896e99fa271100d299 Mon Sep 17 00:00:00 2001
From: Stefan Roese <sr@denx.de>
Date: Thu, 20 Aug 2020 07:22:04 +0200
Subject: [PATCH 27/27] mips: octeon: octeon_common.h: Increase
 CONFIG_SYS_BOOTM_LEN

Increase CONFIG_SYS_BOOTM_LEN to 64MiB for Linux kernel booting.

Signed-off-by: Stefan Roese <sr@denx.de>
---
 include/configs/octeon_common.h | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/include/configs/octeon_common.h b/include/configs/octeon_common.h
index 541b81801e..109ef4064d 100644
--- a/include/configs/octeon_common.h
+++ b/include/configs/octeon_common.h
@@ -21,4 +21,6 @@
 
 #define CONFIG_SYS_LOAD_ADDR		(CONFIG_SYS_SDRAM_BASE + (1 << 20))
 
+#define CONFIG_SYS_BOOTM_LEN		(64 << 20)	/* 64M */
+
 #endif /* __OCTEON_COMMON_H__ */