128 files changed, 18749 insertions, 1372 deletions
diff --git a/drivers/crypto/caam/Kconfig b/drivers/crypto/caam/Kconfig
index 3831a6f..5400c8a 100644
--- a/drivers/crypto/caam/Kconfig
+++ b/drivers/crypto/caam/Kconfig
@@ -160,6 +160,7 @@ config CRYPTO_DEV_FSL_DPAA2_CAAM
 	select CRYPTO_BLKCIPHER
 	select CRYPTO_AUTHENC
 	select CRYPTO_AEAD
+	select CRYPTO_HASH
 	---help---
 	  CAAM driver for QorIQ Data Path Acceleration Architecture 2.
 	  It handles DPSECI DPAA2 objects that sit on the Management Complex
@@ -172,3 +173,7 @@ config CRYPTO_DEV_FSL_CAAM_CRYPTO_API_DESC
 	def_tristate (CRYPTO_DEV_FSL_CAAM_CRYPTO_API || \
 		      CRYPTO_DEV_FSL_CAAM_CRYPTO_API_QI || \
 		      CRYPTO_DEV_FSL_DPAA2_CAAM)
+
+config CRYPTO_DEV_FSL_CAAM_AHASH_API_DESC
+	def_tristate (CRYPTO_DEV_FSL_CAAM_AHASH_API || \
+		      CRYPTO_DEV_FSL_DPAA2_CAAM)
diff --git a/drivers/crypto/caam/Makefile b/drivers/crypto/caam/Makefile
index 01f73a2..90a2a65 100644
--- a/drivers/crypto/caam/Makefile
+++ b/drivers/crypto/caam/Makefile
@@ -14,6 +14,7 @@ obj-$(CONFIG_CRYPTO_DEV_FSL_CAAM_CRYPTO_API) += caamalg.o
 obj-$(CONFIG_CRYPTO_DEV_FSL_CAAM_CRYPTO_API_QI) += caamalg_qi.o
 obj-$(CONFIG_CRYPTO_DEV_FSL_CAAM_CRYPTO_API_DESC) += caamalg_desc.o
 obj-$(CONFIG_CRYPTO_DEV_FSL_CAAM_AHASH_API) += caamhash.o
+obj-$(CONFIG_CRYPTO_DEV_FSL_CAAM_AHASH_API_DESC) += caamhash_desc.o
 obj-$(CONFIG_CRYPTO_DEV_FSL_CAAM_RNG_API) += caamrng.o
 obj-$(CONFIG_CRYPTO_DEV_FSL_CAAM_PKC_API) += caam_pkc.o
 
diff --git a/drivers/crypto/caam/caamalg.c b/drivers/crypto/caam/caamalg.c
index 6480a01..0de4a84 100644
--- a/drivers/crypto/caam/caamalg.c
+++ b/drivers/crypto/caam/caamalg.c
@@ -118,6 +118,7 @@ static int aead_null_set_sh_desc(struct crypto_aead *aead)
 {
 	struct caam_ctx *ctx = crypto_aead_ctx(aead);
 	struct device *jrdev = ctx->jrdev;
+	struct caam_drv_private *ctrlpriv = dev_get_drvdata(jrdev->parent);
 	u32 *desc;
 	int rem_bytes = CAAM_DESC_BYTES_MAX - AEAD_DESC_JOB_IO_LEN -
 			ctx->adata.keylen_pad;
@@ -136,7 +137,8 @@ static int aead_null_set_sh_desc(struct crypto_aead *aead)
 
 	/* aead_encrypt shared descriptor */
 	desc = ctx->sh_desc_enc;
-	cnstr_shdsc_aead_null_encap(desc, &ctx->adata, ctx->authsize);
+	cnstr_shdsc_aead_null_encap(desc, &ctx->adata, ctx->authsize,
+				    ctrlpriv->era);
 	dma_sync_single_for_device(jrdev, ctx->sh_desc_enc_dma,
 				   desc_bytes(desc), DMA_TO_DEVICE);
 
@@ -154,7 +156,8 @@ static int aead_null_set_sh_desc(struct crypto_aead *aead)
 
 	/* aead_decrypt shared descriptor */
 	desc = ctx->sh_desc_dec;
-	cnstr_shdsc_aead_null_decap(desc, &ctx->adata, ctx->authsize);
+	cnstr_shdsc_aead_null_decap(desc, &ctx->adata, ctx->authsize,
+				    ctrlpriv->era);
 	dma_sync_single_for_device(jrdev, ctx->sh_desc_dec_dma,
 				   desc_bytes(desc), DMA_TO_DEVICE);
 
@@ -168,6 +171,7 @@ static int aead_set_sh_desc(struct crypto_aead *aead)
 	unsigned int ivsize = crypto_aead_ivsize(aead);
 	struct caam_ctx *ctx = crypto_aead_ctx(aead);
 	struct device *jrdev = ctx->jrdev;
+	struct caam_drv_private *ctrlpriv = dev_get_drvdata(jrdev->parent);
 	u32 ctx1_iv_off = 0;
 	u32 *desc, *nonce = NULL;
 	u32 inl_mask;
@@ -234,7 +238,7 @@ static int aead_set_sh_desc(struct crypto_aead *aead)
 	desc = ctx->sh_desc_enc;
 	cnstr_shdsc_aead_encap(desc, &ctx->cdata, &ctx->adata, ivsize,
 			       ctx->authsize, is_rfc3686, nonce, ctx1_iv_off,
-			       false);
+			       false, ctrlpriv->era);
 	dma_sync_single_for_device(jrdev, ctx->sh_desc_enc_dma,
 				   desc_bytes(desc), DMA_TO_DEVICE);
 
@@ -266,7 +270,7 @@ skip_enc:
 	desc = ctx->sh_desc_dec;
 	cnstr_shdsc_aead_decap(desc, &ctx->cdata, &ctx->adata, ivsize,
 			       ctx->authsize, alg->caam.geniv, is_rfc3686,
-			       nonce, ctx1_iv_off, false);
+			       nonce, ctx1_iv_off, false, ctrlpriv->era);
 	dma_sync_single_for_device(jrdev, ctx->sh_desc_dec_dma,
 				   desc_bytes(desc), DMA_TO_DEVICE);
 
@@ -300,7 +304,7 @@ skip_enc:
 	desc = ctx->sh_desc_enc;
 	cnstr_shdsc_aead_givencap(desc, &ctx->cdata, &ctx->adata, ivsize,
 				  ctx->authsize, is_rfc3686, nonce,
-				  ctx1_iv_off, false);
+				  ctx1_iv_off, false, ctrlpriv->era);
 	dma_sync_single_for_device(jrdev, ctx->sh_desc_enc_dma,
 				   desc_bytes(desc), DMA_TO_DEVICE);
 
@@ -510,6 +514,7 @@ static int aead_setkey(struct crypto_aead *aead,
 {
 	struct caam_ctx *ctx = crypto_aead_ctx(aead);
 	struct device *jrdev = ctx->jrdev;
+	struct caam_drv_private *ctrlpriv = dev_get_drvdata(jrdev->parent);
 	struct crypto_authenc_keys keys;
 	int ret = 0;
 
@@ -524,6 +529,27 @@ static int aead_setkey(struct crypto_aead *aead,
 		       DUMP_PREFIX_ADDRESS, 16, 4, key, keylen, 1);
 #endif
 
+	/*
+	 * If DKP is supported, use it in the shared descriptor to generate
+	 * the split key.
+	 */
+	if (ctrlpriv->era >= 6) {
+		ctx->adata.keylen = keys.authkeylen;
+		ctx->adata.keylen_pad = split_key_len(ctx->adata.algtype &
+						      OP_ALG_ALGSEL_MASK);
+
+		if (ctx->adata.keylen_pad + keys.enckeylen > CAAM_MAX_KEY_SIZE)
+			goto badkey;
+
+		memcpy(ctx->key, keys.authkey, keys.authkeylen);
+		memcpy(ctx->key + ctx->adata.keylen_pad, keys.enckey,
+		       keys.enckeylen);
+		dma_sync_single_for_device(jrdev, ctx->key_dma,
+					   ctx->adata.keylen_pad +
+					   keys.enckeylen, DMA_TO_DEVICE);
+		goto skip_split_key;
+	}
+
 	ret = gen_split_key(ctx->jrdev, ctx->key, &ctx->adata, keys.authkey,
 			    keys.authkeylen, CAAM_MAX_KEY_SIZE -
 			    keys.enckeylen);
@@ -540,6 +566,8 @@ static int aead_setkey(struct crypto_aead *aead,
 		       DUMP_PREFIX_ADDRESS, 16, 4, ctx->key,
 		       ctx->adata.keylen_pad + keys.enckeylen, 1);
 #endif
+
+skip_split_key:
 	ctx->cdata.keylen = keys.enckeylen;
 	return aead_set_sh_desc(aead);
 badkey:
@@ -986,9 +1014,6 @@ static void init_aead_job(struct aead_request *req,
 		append_seq_out_ptr(desc, dst_dma,
 				   req->assoclen + req->cryptlen - authsize,
 				   out_options);
-
-	/* REG3 = assoclen */
-	append_math_add_imm_u32(desc, REG3, ZERO, IMM, req->assoclen);
 }
 
 static void init_gcm_job(struct aead_request *req,
@@ -1003,6 +1028,7 @@ static void init_gcm_job(struct aead_request *req,
 	unsigned int last;
 
 	init_aead_job(req, edesc, all_contig, encrypt);
+	append_math_add_imm_u32(desc, REG3, ZERO, IMM, req->assoclen);
 
 	/* BUG This should not be specific to generic GCM. */
 	last = 0;
@@ -1029,6 +1055,7 @@ static void init_authenc_job(struct aead_request *req,
 						 struct caam_aead_alg, aead);
 	unsigned int ivsize = crypto_aead_ivsize(aead);
 	struct caam_ctx *ctx = crypto_aead_ctx(aead);
+	struct caam_drv_private *ctrlpriv = dev_get_drvdata(ctx->jrdev->parent);
 	const bool ctr_mode = ((ctx->cdata.algtype & OP_ALG_AAI_MASK) ==
 			       OP_ALG_AAI_CTR_MOD128);
 	const bool is_rfc3686 = alg->caam.rfc3686;
@@ -1052,6 +1079,15 @@ static void init_authenc_job(struct aead_request *req,
 
 	init_aead_job(req, edesc, all_contig, encrypt);
 
+	/*
+	 * {REG3, DPOVRD} = assoclen, depending on whether MATH command supports
+	 * having DPOVRD as destination.
+	 */
+	if (ctrlpriv->era < 3)
+		append_math_add_imm_u32(desc, REG3, ZERO, IMM, req->assoclen);
+	else
+		append_math_add_imm_u32(desc, DPOVRD, ZERO, IMM, req->assoclen);
+
 	if (ivsize && ((is_rfc3686 && encrypt) || !alg->caam.geniv))
 		append_load_as_imm(desc, req->iv, ivsize,
 				   LDST_CLASS_1_CCB |
diff --git a/drivers/crypto/caam/caamalg_desc.c b/drivers/crypto/caam/caamalg_desc.c
index d162120..86cf6b3 100644
--- a/drivers/crypto/caam/caamalg_desc.c
+++ b/drivers/crypto/caam/caamalg_desc.c
@@ -45,16 +45,16 @@ static inline void append_dec_op1(u32 *desc, u32 type)
  * cnstr_shdsc_aead_null_encap - IPSec ESP encapsulation shared descriptor
  *                               (non-protocol) with no (null) encryption.
  * @desc: pointer to buffer used for descriptor construction
- * @adata: pointer to authentication transform definitions. Note that since a
- *         split key is to be used, the size of the split key itself is
- *         specified. Valid algorithm values - one of OP_ALG_ALGSEL_{MD5, SHA1,
- *         SHA224, SHA256, SHA384, SHA512} ANDed with OP_ALG_AAI_HMAC_PRECOMP.
+ * @adata: pointer to authentication transform definitions.
+ *         A split key is required for SEC Era < 6; the size of the split key
+ *         is specified in this case. Valid algorithm values - one of
+ *         OP_ALG_ALGSEL_{MD5, SHA1, SHA224, SHA256, SHA384, SHA512} ANDed
+ *         with OP_ALG_AAI_HMAC_PRECOMP.
  * @icvsize: integrity check value (ICV) size (truncated or full)
- *
- * Note: Requires an MDHA split key.
+ * @era: SEC Era
  */
 void cnstr_shdsc_aead_null_encap(u32 * const desc, struct alginfo *adata,
-				 unsigned int icvsize)
+				 unsigned int icvsize, int era)
 {
 	u32 *key_jump_cmd, *read_move_cmd, *write_move_cmd;
 
@@ -63,13 +63,18 @@ void cnstr_shdsc_aead_null_encap(u32 * const desc, struct alginfo *adata,
 	/* Skip if already shared */
 	key_jump_cmd = append_jump(desc, JUMP_JSL | JUMP_TEST_ALL |
 				   JUMP_COND_SHRD);
-	if (adata->key_inline)
-		append_key_as_imm(desc, adata->key_virt, adata->keylen_pad,
-				  adata->keylen, CLASS_2 | KEY_DEST_MDHA_SPLIT |
-				  KEY_ENC);
-	else
-		append_key(desc, adata->key_dma, adata->keylen, CLASS_2 |
-			   KEY_DEST_MDHA_SPLIT | KEY_ENC);
+	if (era < 6) {
+		if (adata->key_inline)
+			append_key_as_imm(desc, adata->key_virt,
+					  adata->keylen_pad, adata->keylen,
+					  CLASS_2 | KEY_DEST_MDHA_SPLIT |
+					  KEY_ENC);
+		else
+			append_key(desc, adata->key_dma, adata->keylen,
+				   CLASS_2 | KEY_DEST_MDHA_SPLIT | KEY_ENC);
+	} else {
+		append_proto_dkp(desc, adata);
+	}
 	set_jump_tgt_here(desc, key_jump_cmd);
 
 	/* assoclen + cryptlen = seqinlen */
@@ -121,16 +126,16 @@ EXPORT_SYMBOL(cnstr_shdsc_aead_null_encap);
  * cnstr_shdsc_aead_null_decap - IPSec ESP decapsulation shared descriptor
  *                               (non-protocol) with no (null) decryption.
  * @desc: pointer to buffer used for descriptor construction
- * @adata: pointer to authentication transform definitions. Note that since a
- *         split key is to be used, the size of the split key itself is
- *         specified. Valid algorithm values - one of OP_ALG_ALGSEL_{MD5, SHA1,
- *         SHA224, SHA256, SHA384, SHA512} ANDed with OP_ALG_AAI_HMAC_PRECOMP.
+ * @adata: pointer to authentication transform definitions.
+ *         A split key is required for SEC Era < 6; the size of the split key
+ *         is specified in this case. Valid algorithm values - one of
+ *         OP_ALG_ALGSEL_{MD5, SHA1, SHA224, SHA256, SHA384, SHA512} ANDed
+ *         with OP_ALG_AAI_HMAC_PRECOMP.
  * @icvsize: integrity check value (ICV) size (truncated or full)
- *
- * Note: Requires an MDHA split key.
+ * @era: SEC Era
  */
 void cnstr_shdsc_aead_null_decap(u32 * const desc, struct alginfo *adata,
-				 unsigned int icvsize)
+				 unsigned int icvsize, int era)
 {
 	u32 *key_jump_cmd, *read_move_cmd, *write_move_cmd, *jump_cmd;
 
@@ -139,13 +144,18 @@ void cnstr_shdsc_aead_null_decap(u32 * const desc, struct alginfo *adata,
 	/* Skip if already shared */
 	key_jump_cmd = append_jump(desc, JUMP_JSL | JUMP_TEST_ALL |
 				   JUMP_COND_SHRD);
-	if (adata->key_inline)
-		append_key_as_imm(desc, adata->key_virt, adata->keylen_pad,
-				  adata->keylen, CLASS_2 |
-				  KEY_DEST_MDHA_SPLIT | KEY_ENC);
-	else
-		append_key(desc, adata->key_dma, adata->keylen, CLASS_2 |
-			   KEY_DEST_MDHA_SPLIT | KEY_ENC);
+	if (era < 6) {
+		if (adata->key_inline)
+			append_key_as_imm(desc, adata->key_virt,
+					  adata->keylen_pad, adata->keylen,
+					  CLASS_2 | KEY_DEST_MDHA_SPLIT |
+					  KEY_ENC);
+		else
+			append_key(desc, adata->key_dma, adata->keylen,
+				   CLASS_2 | KEY_DEST_MDHA_SPLIT | KEY_ENC);
+	} else {
+		append_proto_dkp(desc, adata);
+	}
 	set_jump_tgt_here(desc, key_jump_cmd);
 
 	/* Class 2 operation */
@@ -204,7 +214,7 @@ EXPORT_SYMBOL(cnstr_shdsc_aead_null_decap);
 static void init_sh_desc_key_aead(u32 * const desc,
 				  struct alginfo * const cdata,
 				  struct alginfo * const adata,
-				  const bool is_rfc3686, u32 *nonce)
+				  const bool is_rfc3686, u32 *nonce, int era)
 {
 	u32 *key_jump_cmd;
 	unsigned int enckeylen = cdata->keylen;
@@ -224,13 +234,18 @@ static void init_sh_desc_key_aead(u32 * const desc,
 	if (is_rfc3686)
 		enckeylen -= CTR_RFC3686_NONCE_SIZE;
 
-	if (adata->key_inline)
-		append_key_as_imm(desc, adata->key_virt, adata->keylen_pad,
-				  adata->keylen, CLASS_2 |
-				  KEY_DEST_MDHA_SPLIT | KEY_ENC);
-	else
-		append_key(desc, adata->key_dma, adata->keylen, CLASS_2 |
-			   KEY_DEST_MDHA_SPLIT | KEY_ENC);
+	if (era < 6) {
+		if (adata->key_inline)
+			append_key_as_imm(desc, adata->key_virt,
+					  adata->keylen_pad, adata->keylen,
+					  CLASS_2 | KEY_DEST_MDHA_SPLIT |
+					  KEY_ENC);
+		else
+			append_key(desc, adata->key_dma, adata->keylen,
+				   CLASS_2 | KEY_DEST_MDHA_SPLIT | KEY_ENC);
+	} else {
+		append_proto_dkp(desc, adata);
+	}
 
 	if (cdata->key_inline)
 		append_key_as_imm(desc, cdata->key_virt, enckeylen,
@@ -261,26 +276,27 @@ static void init_sh_desc_key_aead(u32 * const desc,
  * @cdata: pointer to block cipher transform definitions
  *         Valid algorithm values - one of OP_ALG_ALGSEL_{AES, DES, 3DES} ANDed
  *         with OP_ALG_AAI_CBC or OP_ALG_AAI_CTR_MOD128.
- * @adata: pointer to authentication transform definitions. Note that since a
- *         split key is to be used, the size of the split key itself is
- *         specified. Valid algorithm values - one of OP_ALG_ALGSEL_{MD5, SHA1,
- *         SHA224, SHA256, SHA384, SHA512} ANDed with OP_ALG_AAI_HMAC_PRECOMP.
+ * @adata: pointer to authentication transform definitions.
+ *         A split key is required for SEC Era < 6; the size of the split key
+ *         is specified in this case. Valid algorithm values - one of
+ *         OP_ALG_ALGSEL_{MD5, SHA1, SHA224, SHA256, SHA384, SHA512} ANDed
+ *         with OP_ALG_AAI_HMAC_PRECOMP.
  * @ivsize: initialization vector size
  * @icvsize: integrity check value (ICV) size (truncated or full)
  * @is_rfc3686: true when ctr(aes) is wrapped by rfc3686 template
  * @nonce: pointer to rfc3686 nonce
  * @ctx1_iv_off: IV offset in CONTEXT1 register
  * @is_qi: true when called from caam/qi
- *
- * Note: Requires an MDHA split key.
+ * @era: SEC Era
  */
 void cnstr_shdsc_aead_encap(u32 * const desc, struct alginfo *cdata,
 			    struct alginfo *adata, unsigned int ivsize,
 			    unsigned int icvsize, const bool is_rfc3686,
-			    u32 *nonce, const u32 ctx1_iv_off, const bool is_qi)
+			    u32 *nonce, const u32 ctx1_iv_off, const bool is_qi,
+			    int era)
 {
 	/* Note: Context registers are saved. */
-	init_sh_desc_key_aead(desc, cdata, adata, is_rfc3686, nonce);
+	init_sh_desc_key_aead(desc, cdata, adata, is_rfc3686, nonce, era);
 
 	/* Class 2 operation */
 	append_operation(desc, adata->algtype | OP_ALG_AS_INITFINAL |
@@ -306,8 +322,13 @@ void cnstr_shdsc_aead_encap(u32 * const desc, struct alginfo *cdata,
 	}
 
 	/* Read and write assoclen bytes */
-	append_math_add(desc, VARSEQINLEN, ZERO, REG3, CAAM_CMD_SZ);
-	append_math_add(desc, VARSEQOUTLEN, ZERO, REG3, CAAM_CMD_SZ);
+	if (is_qi || era < 3) {
+		append_math_add(desc, VARSEQINLEN, ZERO, REG3, CAAM_CMD_SZ);
+		append_math_add(desc, VARSEQOUTLEN, ZERO, REG3, CAAM_CMD_SZ);
+	} else {
+		append_math_add(desc, VARSEQINLEN, ZERO, DPOVRD, CAAM_CMD_SZ);
+		append_math_add(desc, VARSEQOUTLEN, ZERO, DPOVRD, CAAM_CMD_SZ);
+	}
 
 	/* Skip assoc data */
 	append_seq_fifo_store(desc, 0, FIFOST_TYPE_SKIP | FIFOLDST_VLF);
@@ -350,27 +371,27 @@ EXPORT_SYMBOL(cnstr_shdsc_aead_encap);
  * @cdata: pointer to block cipher transform definitions
  *         Valid algorithm values - one of OP_ALG_ALGSEL_{AES, DES, 3DES} ANDed
  *         with OP_ALG_AAI_CBC or OP_ALG_AAI_CTR_MOD128.
- * @adata: pointer to authentication transform definitions. Note that since a
- *         split key is to be used, the size of the split key itself is
- *         specified. Valid algorithm values - one of OP_ALG_ALGSEL_{MD5, SHA1,
- *         SHA224, SHA256, SHA384, SHA512} ANDed with OP_ALG_AAI_HMAC_PRECOMP.
+ * @adata: pointer to authentication transform definitions.
+ *         A split key is required for SEC Era < 6; the size of the split key
+ *         is specified in this case. Valid algorithm values - one of
+ *         OP_ALG_ALGSEL_{MD5, SHA1, SHA224, SHA256, SHA384, SHA512} ANDed
+ *         with OP_ALG_AAI_HMAC_PRECOMP.
  * @ivsize: initialization vector size
  * @icvsize: integrity check value (ICV) size (truncated or full)
  * @is_rfc3686: true when ctr(aes) is wrapped by rfc3686 template
  * @nonce: pointer to rfc3686 nonce
  * @ctx1_iv_off: IV offset in CONTEXT1 register
  * @is_qi: true when called from caam/qi
- *
- * Note: Requires an MDHA split key.
+ * @era: SEC Era
  */
 void cnstr_shdsc_aead_decap(u32 * const desc, struct alginfo *cdata,
 			    struct alginfo *adata, unsigned int ivsize,
 			    unsigned int icvsize, const bool geniv,
 			    const bool is_rfc3686, u32 *nonce,
-			    const u32 ctx1_iv_off, const bool is_qi)
+			    const u32 ctx1_iv_off, const bool is_qi, int era)
 {
 	/* Note: Context registers are saved. */
-	init_sh_desc_key_aead(desc, cdata, adata, is_rfc3686, nonce);
+	init_sh_desc_key_aead(desc, cdata, adata, is_rfc3686, nonce, era);
 
 	/* Class 2 operation */
 	append_operation(desc, adata->algtype | OP_ALG_AS_INITFINAL |
@@ -397,11 +418,23 @@ void cnstr_shdsc_aead_decap(u32 * const desc, struct alginfo *cdata,
 	}
 
 	/* Read and write assoclen bytes */
-	append_math_add(desc, VARSEQINLEN, ZERO, REG3, CAAM_CMD_SZ);
-	if (geniv)
-		append_math_add_imm_u32(desc, VARSEQOUTLEN, REG3, IMM, ivsize);
-	else
-		append_math_add(desc, VARSEQOUTLEN, ZERO, REG3, CAAM_CMD_SZ);
+	if (is_qi || era < 3) {
+		append_math_add(desc, VARSEQINLEN, ZERO, REG3, CAAM_CMD_SZ);
+		if (geniv)
+			append_math_add_imm_u32(desc, VARSEQOUTLEN, REG3, IMM,
+						ivsize);
+		else
+			append_math_add(desc, VARSEQOUTLEN, ZERO, REG3,
+					CAAM_CMD_SZ);
+	} else {
+		append_math_add(desc, VARSEQINLEN, ZERO, DPOVRD, CAAM_CMD_SZ);
+		if (geniv)
+			append_math_add_imm_u32(desc, VARSEQOUTLEN, DPOVRD, IMM,
+						ivsize);
+		else
+			append_math_add(desc, VARSEQOUTLEN, ZERO, DPOVRD,
+					CAAM_CMD_SZ);
+	}
 
 	/* Skip assoc data */
 	append_seq_fifo_store(desc, 0, FIFOST_TYPE_SKIP | FIFOLDST_VLF);
@@ -456,29 +489,29 @@ EXPORT_SYMBOL(cnstr_shdsc_aead_decap);
  * @cdata: pointer to block cipher transform definitions
  *         Valid algorithm values - one of OP_ALG_ALGSEL_{AES, DES, 3DES} ANDed
  *         with OP_ALG_AAI_CBC or OP_ALG_AAI_CTR_MOD128.
- * @adata: pointer to authentication transform definitions. Note that since a
- *         split key is to be used, the size of the split key itself is
- *         specified. Valid algorithm values - one of OP_ALG_ALGSEL_{MD5, SHA1,
- *         SHA224, SHA256, SHA384, SHA512} ANDed with OP_ALG_AAI_HMAC_PRECOMP.
+ * @adata: pointer to authentication transform definitions.
+ *         A split key is required for SEC Era < 6; the size of the split key
+ *         is specified in this case. Valid algorithm values - one of
+ *         OP_ALG_ALGSEL_{MD5, SHA1, SHA224, SHA256, SHA384, SHA512} ANDed
+ *         with OP_ALG_AAI_HMAC_PRECOMP.
  * @ivsize: initialization vector size
  * @icvsize: integrity check value (ICV) size (truncated or full)
  * @is_rfc3686: true when ctr(aes) is wrapped by rfc3686 template
  * @nonce: pointer to rfc3686 nonce
  * @ctx1_iv_off: IV offset in CONTEXT1 register
  * @is_qi: true when called from caam/qi
- *
- * Note: Requires an MDHA split key.
+ * @era: SEC Era
  */
 void cnstr_shdsc_aead_givencap(u32 * const desc, struct alginfo *cdata,
 			       struct alginfo *adata, unsigned int ivsize,
 			       unsigned int icvsize, const bool is_rfc3686,
 			       u32 *nonce, const u32 ctx1_iv_off,
-			       const bool is_qi)
+			       const bool is_qi, int era)
 {
 	u32 geniv, moveiv;
 
 	/* Note: Context registers are saved. */
-	init_sh_desc_key_aead(desc, cdata, adata, is_rfc3686, nonce);
+	init_sh_desc_key_aead(desc, cdata, adata, is_rfc3686, nonce, era);
 
 	if (is_qi) {
 		u32 *wait_load_cmd;
@@ -528,8 +561,13 @@ copy_iv:
 			 OP_ALG_ENCRYPT);
 
 	/* Read and write assoclen bytes */
-	append_math_add(desc, VARSEQINLEN, ZERO, REG3, CAAM_CMD_SZ);
-	append_math_add(desc, VARSEQOUTLEN, ZERO, REG3, CAAM_CMD_SZ);
+	if (is_qi || era < 3) {
+		append_math_add(desc, VARSEQINLEN, ZERO, REG3, CAAM_CMD_SZ);
+		append_math_add(desc, VARSEQOUTLEN, ZERO, REG3, CAAM_CMD_SZ);
+	} else {
+		append_math_add(desc, VARSEQINLEN, ZERO, DPOVRD, CAAM_CMD_SZ);
+		append_math_add(desc, VARSEQOUTLEN, ZERO, DPOVRD, CAAM_CMD_SZ);
+	}
 
 	/* Skip assoc data */
 	append_seq_fifo_store(desc, 0, FIFOST_TYPE_SKIP | FIFOLDST_VLF);
@@ -588,19 +626,20 @@ EXPORT_SYMBOL(cnstr_shdsc_aead_givencap);
  * @cdata: pointer to block cipher transform definitions
  *         Valid algorithm values - one of OP_ALG_ALGSEL_AES ANDed
  *         with OP_ALG_AAI_CBC
- * @adata: pointer to authentication transform definitions. Note that since a
- *         split key is to be used, the size of the split key itself is
- *         specified. Valid algorithm values OP_ALG_ALGSEL_SHA1 ANDed with
- *         OP_ALG_AAI_HMAC_PRECOMP.
+ * @adata: pointer to authentication transform definitions.
+ *         A split key is required for SEC Era < 6; the size of the split key
+ *         is specified in this case. Valid algorithm values OP_ALG_ALGSEL_SHA1
+ *         ANDed with OP_ALG_AAI_HMAC_PRECOMP.
  * @assoclen: associated data length
  * @ivsize: initialization vector size
  * @authsize: authentication data size
  * @blocksize: block cipher size
+ * @era: SEC Era
  */
 void cnstr_shdsc_tls_encap(u32 * const desc, struct alginfo *cdata,
 			   struct alginfo *adata, unsigned int assoclen,
 			   unsigned int ivsize, unsigned int authsize,
-			   unsigned int blocksize)
+			   unsigned int blocksize, int era)
 {
 	u32 *key_jump_cmd, *zero_payload_jump_cmd;
 	u32 genpad, idx_ld_datasz, idx_ld_pad, stidx;
@@ -628,13 +667,18 @@ void cnstr_shdsc_tls_encap(u32 * const desc, struct alginfo *cdata,
 	key_jump_cmd = append_jump(desc, JUMP_JSL | JUMP_TEST_ALL |
 				   JUMP_COND_SHRD);
 
-	if (adata->key_inline)
-		append_key_as_imm(desc, adata->key_virt, adata->keylen_pad,
-				  adata->keylen, CLASS_2 | KEY_DEST_MDHA_SPLIT |
-				  KEY_ENC);
-	else
-		append_key(desc, adata->key_dma, adata->keylen, CLASS_2 |
-			   KEY_DEST_MDHA_SPLIT | KEY_ENC);
+	if (era < 6) {
+		if (adata->key_inline)
+			append_key_as_imm(desc, adata->key_virt,
+					  adata->keylen_pad, adata->keylen,
+					  CLASS_2 | KEY_DEST_MDHA_SPLIT |
+					  KEY_ENC);
+		else
+			append_key(desc, adata->key_dma, adata->keylen,
+				   CLASS_2 | KEY_DEST_MDHA_SPLIT | KEY_ENC);
+	} else {
+		append_proto_dkp(desc, adata);
+	}
 
 	if (cdata->key_inline)
 		append_key_as_imm(desc, cdata->key_virt, cdata->keylen,
@@ -741,19 +785,20 @@ EXPORT_SYMBOL(cnstr_shdsc_tls_encap);
  * @cdata: pointer to block cipher transform definitions
  *         Valid algorithm values - one of OP_ALG_ALGSEL_AES ANDed
  *         with OP_ALG_AAI_CBC
- * @adata: pointer to authentication transform definitions. Note that since a
- *         split key is to be used, the size of the split key itself is
- *         specified. Valid algorithm values OP_ALG_ALGSEL_ SHA1 ANDed with
- *         OP_ALG_AAI_HMAC_PRECOMP.
+ * @adata: pointer to authentication transform definitions.
+ *         A split key is required for SEC Era < 6; the size of the split key
+ *         is specified in this case. Valid algorithm values OP_ALG_ALGSEL_SHA1
+ *         ANDed with OP_ALG_AAI_HMAC_PRECOMP.
  * @assoclen: associated data length
  * @ivsize: initialization vector size
  * @authsize: authentication data size
  * @blocksize: block cipher size
+ * @era: SEC Era
  */
 void cnstr_shdsc_tls_decap(u32 * const desc, struct alginfo *cdata,
 			   struct alginfo *adata, unsigned int assoclen,
 			   unsigned int ivsize, unsigned int authsize,
-			   unsigned int blocksize)
+			   unsigned int blocksize, int era)
 {
 	u32 stidx, jumpback;
 	u32 *key_jump_cmd, *zero_payload_jump_cmd, *skip_zero_jump_cmd;
@@ -771,8 +816,11 @@ void cnstr_shdsc_tls_decap(u32 * const desc, struct alginfo *cdata,
 	key_jump_cmd = append_jump(desc, JUMP_JSL | JUMP_TEST_ALL |
 				   JUMP_COND_SHRD);
 
-	append_key(desc, adata->key_dma, adata->keylen, CLASS_2 |
-		   KEY_DEST_MDHA_SPLIT | KEY_ENC);
+	if (era < 6)
+		append_key(desc, adata->key_dma, adata->keylen, CLASS_2 |
+			   KEY_DEST_MDHA_SPLIT | KEY_ENC);
+	else
+		append_proto_dkp(desc, adata);
 
 	append_key(desc, cdata->key_dma, cdata->keylen, CLASS_1 |
 		   KEY_DEST_CLASS_REG);
@@ -1618,7 +1666,7 @@ void cnstr_shdsc_ablkcipher_encap(u32 * const desc, struct alginfo *cdata,
 
 	/* Load nonce into CONTEXT1 reg */
 	if (is_rfc3686) {
-		u8 *nonce = cdata->key_virt + cdata->keylen;
+		const u8 *nonce = cdata->key_virt + cdata->keylen;
 
 		append_load_as_imm(desc, nonce, CTR_RFC3686_NONCE_SIZE,
 				   LDST_CLASS_IND_CCB |
@@ -1683,7 +1731,7 @@ void cnstr_shdsc_ablkcipher_decap(u32 * const desc, struct alginfo *cdata,
 
 	/* Load nonce into CONTEXT1 reg */
 	if (is_rfc3686) {
-		u8 *nonce = cdata->key_virt + cdata->keylen;
+		const u8 *nonce = cdata->key_virt + cdata->keylen;
 
 		append_load_as_imm(desc, nonce, CTR_RFC3686_NONCE_SIZE,
 				   LDST_CLASS_IND_CCB |
@@ -1752,7 +1800,7 @@ void cnstr_shdsc_ablkcipher_givencap(u32 * const desc, struct alginfo *cdata,
 
 	/* Load Nonce into CONTEXT1 reg */
 	if (is_rfc3686) {
-		u8 *nonce = cdata->key_virt + cdata->keylen;
+		const u8 *nonce = cdata->key_virt + cdata->keylen;
 
 		append_load_as_imm(desc, nonce, CTR_RFC3686_NONCE_SIZE,
 				   LDST_CLASS_IND_CCB |
diff --git a/drivers/crypto/caam/caamalg_desc.h b/drivers/crypto/caam/caamalg_desc.h
index 6b436f6..e6c95fa 100644
--- a/drivers/crypto/caam/caamalg_desc.h
+++ b/drivers/crypto/caam/caamalg_desc.h
@@ -51,38 +51,38 @@
 					 15 * CAAM_CMD_SZ)
 
 void cnstr_shdsc_aead_null_encap(u32 * const desc, struct alginfo *adata,
-				 unsigned int icvsize);
+				 unsigned int icvsize, int era);
 
 void cnstr_shdsc_aead_null_decap(u32 * const desc, struct alginfo *adata,
-				 unsigned int icvsize);
+				 unsigned int icvsize, int era);
 
 void cnstr_shdsc_aead_encap(u32 * const desc, struct alginfo *cdata,
 			    struct alginfo *adata, unsigned int ivsize,
 			    unsigned int icvsize, const bool is_rfc3686,
 			    u32 *nonce, const u32 ctx1_iv_off,
-			    const bool is_qi);
+			    const bool is_qi, int era);
 
 void cnstr_shdsc_aead_decap(u32 * const desc, struct alginfo *cdata,
 			    struct alginfo *adata, unsigned int ivsize,
 			    unsigned int icvsize, const bool geniv,
 			    const bool is_rfc3686, u32 *nonce,
-			    const u32 ctx1_iv_off, const bool is_qi);
+			    const u32 ctx1_iv_off, const bool is_qi, int era);
 
 void cnstr_shdsc_aead_givencap(u32 * const desc, struct alginfo *cdata,
 			       struct alginfo *adata, unsigned int ivsize,
 			       unsigned int icvsize, const bool is_rfc3686,
 			       u32 *nonce, const u32 ctx1_iv_off,
-			       const bool is_qi);
+			       const bool is_qi, int era);
 
 void cnstr_shdsc_tls_encap(u32 *const desc, struct alginfo *cdata,
 			   struct alginfo *adata, unsigned int assoclen,
 			   unsigned int ivsize, unsigned int authsize,
-			   unsigned int blocksize);
+			   unsigned int blocksize, int era);
 
 void cnstr_shdsc_tls_decap(u32 *const desc, struct alginfo *cdata,
 			   struct alginfo *adata, unsigned int assoclen,
 			   unsigned int ivsize, unsigned int authsize,
-			   unsigned int blocksize);
+			   unsigned int blocksize, int era);
 
 void cnstr_shdsc_gcm_encap(u32 * const desc, struct alginfo *cdata,
 			   unsigned int ivsize, unsigned int icvsize,
diff --git a/drivers/crypto/caam/caamalg_qi.c b/drivers/crypto/caam/caamalg_qi.c
index d6a9b0c..f9b0ebf 100644
--- a/drivers/crypto/caam/caamalg_qi.c
+++ b/drivers/crypto/caam/caamalg_qi.c
@@ -74,6 +74,7 @@ static int aead_set_sh_desc(struct crypto_aead *aead)
 	const bool ctr_mode = ((ctx->cdata.algtype & OP_ALG_AAI_MASK) ==
 			       OP_ALG_AAI_CTR_MOD128);
 	const bool is_rfc3686 = alg->caam.rfc3686;
+	struct caam_drv_private *ctrlpriv = dev_get_drvdata(ctx->jrdev->parent);
 
 	if (!ctx->cdata.keylen || !ctx->authsize)
 		return 0;
@@ -124,7 +125,7 @@ static int aead_set_sh_desc(struct crypto_aead *aead)
 
 	cnstr_shdsc_aead_encap(ctx->sh_desc_enc, &ctx->cdata, &ctx->adata,
 			       ivsize, ctx->authsize, is_rfc3686, nonce,
-			       ctx1_iv_off, true);
+			       ctx1_iv_off, true, ctrlpriv->era);
 
 skip_enc:
 	/* aead_decrypt shared descriptor */
@@ -149,7 +150,8 @@ skip_enc:
 
 	cnstr_shdsc_aead_decap(ctx->sh_desc_dec, &ctx->cdata, &ctx->adata,
 			       ivsize, ctx->authsize, alg->caam.geniv,
-			       is_rfc3686, nonce, ctx1_iv_off, true);
+			       is_rfc3686, nonce, ctx1_iv_off, true,
+			       ctrlpriv->era);
 
 	if (!alg->caam.geniv)
 		goto skip_givenc;
@@ -176,7 +178,7 @@ skip_enc:
 
 	cnstr_shdsc_aead_givencap(ctx->sh_desc_enc, &ctx->cdata, &ctx->adata,
 				  ivsize, ctx->authsize, is_rfc3686, nonce,
-				  ctx1_iv_off, true);
+				  ctx1_iv_off, true, ctrlpriv->era);
 
 skip_givenc:
 	return 0;
@@ -197,6 +199,7 @@ static int aead_setkey(struct crypto_aead *aead, const u8 *key,
 {
 	struct caam_ctx *ctx = crypto_aead_ctx(aead);
 	struct device *jrdev = ctx->jrdev;
+	struct caam_drv_private *ctrlpriv = dev_get_drvdata(jrdev->parent);
 	struct crypto_authenc_keys keys;
 	int ret = 0;
 
@@ -211,6 +214,27 @@ static int aead_setkey(struct crypto_aead *aead, const u8 *key,
 		       DUMP_PREFIX_ADDRESS, 16, 4, key, keylen, 1);
 #endif
 
+	/*
+	 * If DKP is supported, use it in the shared descriptor to generate
+	 * the split key.
+	 */
+	if (ctrlpriv->era >= 6) {
+		ctx->adata.keylen = keys.authkeylen;
+		ctx->adata.keylen_pad = split_key_len(ctx->adata.algtype &
+						      OP_ALG_ALGSEL_MASK);
+
+		if (ctx->adata.keylen_pad + keys.enckeylen > CAAM_MAX_KEY_SIZE)
+			goto badkey;
+
+		memcpy(ctx->key, keys.authkey, keys.authkeylen);
+		memcpy(ctx->key + ctx->adata.keylen_pad, keys.enckey,
+		       keys.enckeylen);
+		dma_sync_single_for_device(jrdev, ctx->key_dma,
+					   ctx->adata.keylen_pad +
+					   keys.enckeylen, DMA_TO_DEVICE);
+		goto skip_split_key;
+	}
+
 	ret = gen_split_key(jrdev, ctx->key, &ctx->adata, keys.authkey,
 			    keys.authkeylen, CAAM_MAX_KEY_SIZE -
 			    keys.enckeylen);
@@ -227,6 +251,7 @@ static int aead_setkey(struct crypto_aead *aead, const u8 *key,
 		       ctx->adata.keylen_pad + keys.enckeylen, 1);
 #endif
 
+skip_split_key:
 	ctx->cdata.keylen = keys.enckeylen;
 
 	ret = aead_set_sh_desc(aead);
@@ -266,6 +291,7 @@ static int tls_set_sh_desc(struct crypto_aead *tls)
 	unsigned int assoclen = 13; /* always 13 bytes for TLS */
 	unsigned int data_len[2];
 	u32 inl_mask;
+	struct caam_drv_private *ctrlpriv = dev_get_drvdata(ctx->jrdev->parent);
 
 	if (!ctx->cdata.keylen || !ctx->authsize)
 		return 0;
@@ -296,17 +322,20 @@ static int tls_set_sh_desc(struct crypto_aead *tls)
 	ctx->cdata.key_inline = !!(inl_mask & 2);
 
 	cnstr_shdsc_tls_encap(ctx->sh_desc_enc, &ctx->cdata, &ctx->adata,
-			      assoclen, ivsize, ctx->authsize, blocksize);
+			      assoclen, ivsize, ctx->authsize, blocksize,
+			      ctrlpriv->era);
 
 	/*
 	 * TLS 1.0 decrypt shared descriptor
 	 * Keys do not fit inline, regardless of algorithms used
 	 */
+	ctx->adata.key_inline = false;
 	ctx->adata.key_dma = ctx->key_dma;
 	ctx->cdata.key_dma = ctx->key_dma + ctx->adata.keylen_pad;
 
 	cnstr_shdsc_tls_decap(ctx->sh_desc_dec, &ctx->cdata, &ctx->adata,
-			      assoclen, ivsize, ctx->authsize, blocksize);
+			      assoclen, ivsize, ctx->authsize, blocksize,
+			      ctrlpriv->era);
 
 	return 0;
 }
@@ -326,6 +355,7 @@ static int tls_setkey(struct crypto_aead *tls, const u8 *key,
 {
 	struct caam_ctx *ctx = crypto_aead_ctx(tls);
 	struct device *jrdev = ctx->jrdev;
+	struct caam_drv_private *ctrlpriv = dev_get_drvdata(jrdev->parent);
 	struct crypto_authenc_keys keys;
 	int ret = 0;
 
@@ -340,6 +370,27 @@ static int tls_setkey(struct crypto_aead *tls, const u8 *key,
 		       DUMP_PREFIX_ADDRESS, 16, 4, key, keylen, 1);
 #endif
 
+	/*
+	 * If DKP is supported, use it in the shared descriptor to generate
+	 * the split key.
+	 */
+	if (ctrlpriv->era >= 6) {
+		ctx->adata.keylen = keys.authkeylen;
+		ctx->adata.keylen_pad = split_key_len(ctx->adata.algtype &
+						      OP_ALG_ALGSEL_MASK);
+
+		if (ctx->adata.keylen_pad + keys.enckeylen > CAAM_MAX_KEY_SIZE)
+			goto badkey;
+
+		memcpy(ctx->key, keys.authkey, keys.authkeylen);
+		memcpy(ctx->key + ctx->adata.keylen_pad, keys.enckey,
+		       keys.enckeylen);
+		dma_sync_single_for_device(jrdev, ctx->key_dma,
+					   ctx->adata.keylen_pad +
+					   keys.enckeylen, DMA_TO_DEVICE);
+		goto skip_split_key;
+	}
+
 	ret = gen_split_key(jrdev, ctx->key, &ctx->adata, keys.authkey,
 			    keys.authkeylen, CAAM_MAX_KEY_SIZE -
 			    keys.enckeylen);
@@ -359,6 +410,7 @@ static int tls_setkey(struct crypto_aead *tls, const u8 *key,
 		       ctx->adata.keylen_pad + keys.enckeylen, 1);
 #endif
 
+skip_split_key:
 	ctx->cdata.keylen = keys.enckeylen;
 
 	ret = tls_set_sh_desc(tls);
diff --git a/drivers/crypto/caam/caamalg_qi2.c b/drivers/crypto/caam/caamalg_qi2.c
index 102b084..37f9ca7 100644
--- a/drivers/crypto/caam/caamalg_qi2.c
+++ b/drivers/crypto/caam/caamalg_qi2.c
@@ -42,6 +42,7 @@
 #include "sg_sw_qm2.h"
 #include "key_gen.h"
 #include "caamalg_desc.h"
+#include "caamhash_desc.h"
 #include "../../../drivers/staging/fsl-mc/include/mc.h"
 #include "../../../drivers/staging/fsl-mc/include/dpaa2-io.h"
 #include "../../../drivers/staging/fsl-mc/include/dpaa2-fd.h"
@@ -88,6 +89,7 @@ struct caam_aead_alg {
  * caam_ctx - per-session context
  * @flc: Flow Contexts array
  * @key:  virtual address of the key(s): [authentication key], encryption key
+ * @flc_dma: I/O virtual addresses of the Flow Contexts
  * @key_dma: I/O virtual address of the key
  * @dev: dpseci device
  * @adata: authentication algorithm details
@@ -97,6 +99,7 @@ struct caam_aead_alg {
 struct caam_ctx {
 	struct caam_flc flc[NUM_OP];
 	u8 key[CAAM_MAX_KEY_SIZE];
+	dma_addr_t flc_dma[NUM_OP];
 	dma_addr_t key_dma;
 	struct device *dev;
 	struct alginfo adata;
@@ -154,6 +157,8 @@ static struct caam_request *to_caam_req(struct crypto_async_request *areq)
 	case CRYPTO_ALG_TYPE_AEAD:
 		return aead_request_ctx(container_of(areq, struct aead_request,
 						     base));
+	case CRYPTO_ALG_TYPE_AHASH:
+		return ahash_request_ctx(ahash_request_cast(areq));
 	default:
 		return ERR_PTR(-EINVAL);
 	}
@@ -189,6 +194,7 @@ static int aead_set_sh_desc(struct crypto_aead *aead)
 	struct caam_ctx *ctx = crypto_aead_ctx(aead);
 	unsigned int ivsize = crypto_aead_ivsize(aead);
 	struct device *dev = ctx->dev;
+	struct dpaa2_caam_priv *priv = dev_get_drvdata(dev);
 	struct caam_flc *flc;
 	u32 *desc;
 	u32 ctx1_iv_off = 0;
@@ -250,19 +256,17 @@ static int aead_set_sh_desc(struct crypto_aead *aead)
 	if (alg->caam.geniv)
 		cnstr_shdsc_aead_givencap(desc, &ctx->cdata, &ctx->adata,
 					  ivsize, ctx->authsize, is_rfc3686,
-					  nonce, ctx1_iv_off, true);
+					  nonce, ctx1_iv_off, true,
+					  priv->sec_attr.era);
 	else
 		cnstr_shdsc_aead_encap(desc, &ctx->cdata, &ctx->adata,
 				       ivsize, ctx->authsize, is_rfc3686, nonce,
-				       ctx1_iv_off, true);
+				       ctx1_iv_off, true, priv->sec_attr.era);
 
 	flc->flc[1] = desc_len(desc); /* SDL */
-	flc->flc_dma = dma_map_single(dev, flc, sizeof(flc->flc) +
-				      desc_bytes(desc), DMA_TO_DEVICE);
-	if (dma_mapping_error(dev, flc->flc_dma)) {
-		dev_err(dev, "unable to map shared descriptor\n");
-		return -ENOMEM;
-	}
+	dma_sync_single_for_device(dev, ctx->flc_dma[ENCRYPT],
+				   sizeof(flc->flc) + desc_bytes(desc),
+				   DMA_BIDIRECTIONAL);
 
 	/* aead_decrypt shared descriptor */
 	if (desc_inline_query(DESC_QI_AEAD_DEC_LEN +
@@ -286,18 +290,14 @@ static int aead_set_sh_desc(struct crypto_aead *aead)
 
 	flc = &ctx->flc[DECRYPT];
 	desc = flc->sh_desc;
-
 	cnstr_shdsc_aead_decap(desc, &ctx->cdata, &ctx->adata,
 			       ivsize, ctx->authsize, alg->caam.geniv,
-			       is_rfc3686, nonce, ctx1_iv_off, true);
-
+			       is_rfc3686, nonce, ctx1_iv_off, true,
+			       priv->sec_attr.era);
 	flc->flc[1] = desc_len(desc); /* SDL */
-	flc->flc_dma = dma_map_single(dev, flc, sizeof(flc->flc) +
-				      desc_bytes(desc), DMA_TO_DEVICE);
-	if (dma_mapping_error(dev, flc->flc_dma)) {
-		dev_err(dev, "unable to map shared descriptor\n");
-		return -ENOMEM;
-	}
+	dma_sync_single_for_device(dev, ctx->flc_dma[DECRYPT],
+				   sizeof(flc->flc) + desc_bytes(desc),
+				   DMA_BIDIRECTIONAL);
 
 	return 0;
 }
@@ -333,137 +333,12 @@ static void split_key_sh_done(void *cbk_ctx, u32 err)
 	complete(&res->completion);
 }
 
-static int gen_split_key_sh(struct device *dev, u8 *key_out,
-			    struct alginfo * const adata, const u8 *key_in,
-			    u32 keylen)
-{
-	struct caam_request *req_ctx;
-	u32 *desc;
-	struct split_key_sh_result result;
-	dma_addr_t dma_addr_in, dma_addr_out;
-	struct caam_flc *flc;
-	struct dpaa2_fl_entry *in_fle, *out_fle;
-	int ret = -ENOMEM;
-
-	req_ctx = kzalloc(sizeof(*req_ctx), GFP_KERNEL | GFP_DMA);
-	if (!req_ctx)
-		return -ENOMEM;
-
-	in_fle = &req_ctx->fd_flt[1];
-	out_fle = &req_ctx->fd_flt[0];
-
-	flc = kzalloc(sizeof(*flc), GFP_KERNEL | GFP_DMA);
-	if (!flc)
-		goto err_flc;
-
-	dma_addr_in = dma_map_single(dev, (void *)key_in, keylen,
-				     DMA_TO_DEVICE);
-	if (dma_mapping_error(dev, dma_addr_in)) {
-		dev_err(dev, "unable to map key input memory\n");
-		goto err_dma_addr_in;
-	}
-
-	dma_addr_out = dma_map_single(dev, key_out, adata->keylen_pad,
-				      DMA_FROM_DEVICE);
-	if (dma_mapping_error(dev, dma_addr_out)) {
-		dev_err(dev, "unable to map key output memory\n");
-		goto err_dma_addr_out;
-	}
-
-	desc = flc->sh_desc;
-
-	init_sh_desc(desc, 0);
-	append_key(desc, dma_addr_in, keylen, CLASS_2 | KEY_DEST_CLASS_REG);
-
-	/* Sets MDHA up into an HMAC-INIT */
-	append_operation(desc, (adata->algtype & OP_ALG_ALGSEL_MASK) |
-			 OP_ALG_AAI_HMAC | OP_TYPE_CLASS2_ALG | OP_ALG_DECRYPT |
-			 OP_ALG_AS_INIT);
-
-	/*
-	 * do a FIFO_LOAD of zero, this will trigger the internal key expansion
-	 * into both pads inside MDHA
-	 */
-	append_fifo_load_as_imm(desc, NULL, 0, LDST_CLASS_2_CCB |
-				FIFOLD_TYPE_MSG | FIFOLD_TYPE_LAST2);
-
-	/*
-	 * FIFO_STORE with the explicit split-key content store
-	 * (0x26 output type)
-	 */
-	append_fifo_store(desc, dma_addr_out, adata->keylen,
-			  LDST_CLASS_2_CCB | FIFOST_TYPE_SPLIT_KEK);
-
-	flc->flc[1] = desc_len(desc); /* SDL */
-	flc->flc_dma = dma_map_single(dev, flc, sizeof(flc->flc) +
-				      desc_bytes(desc), DMA_TO_DEVICE);
-	if (dma_mapping_error(dev, flc->flc_dma)) {
-		dev_err(dev, "unable to map shared descriptor\n");
-		goto err_flc_dma;
-	}
-
-	dpaa2_fl_set_final(in_fle, true);
-	dpaa2_fl_set_format(in_fle, dpaa2_fl_single);
-	dpaa2_fl_set_addr(in_fle, dma_addr_in);
-	dpaa2_fl_set_len(in_fle, keylen);
-	dpaa2_fl_set_format(out_fle, dpaa2_fl_single);
-	dpaa2_fl_set_addr(out_fle, dma_addr_out);
-	dpaa2_fl_set_len(out_fle, adata->keylen_pad);
-
-#ifdef DEBUG
-	print_hex_dump(KERN_ERR, "ctx.key@" __stringify(__LINE__)": ",
-		       DUMP_PREFIX_ADDRESS, 16, 4, key_in, keylen, 1);
-	print_hex_dump(KERN_ERR, "desc@" __stringify(__LINE__)": ",
-		       DUMP_PREFIX_ADDRESS, 16, 4, desc, desc_bytes(desc), 1);
-#endif
-
-	result.err = 0;
-	init_completion(&result.completion);
-	result.dev = dev;
-
-	req_ctx->flc = flc;
-	req_ctx->cbk = split_key_sh_done;
-	req_ctx->ctx = &result;
-
-	ret = dpaa2_caam_enqueue(dev, req_ctx);
-	if (ret == -EINPROGRESS) {
-		/* in progress */
-		wait_for_completion(&result.completion);
-		ret = result.err;
-#ifdef DEBUG
-		print_hex_dump(KERN_ERR, "ctx.key@" __stringify(__LINE__)": ",
-			       DUMP_PREFIX_ADDRESS, 16, 4, key_out,
-			       adata->keylen_pad, 1);
-#endif
-	}
-
-	dma_unmap_single(dev, flc->flc_dma, sizeof(flc->flc) + desc_bytes(desc),
-			 DMA_TO_DEVICE);
-err_flc_dma:
-	dma_unmap_single(dev, dma_addr_out, adata->keylen_pad, DMA_FROM_DEVICE);
-err_dma_addr_out:
-	dma_unmap_single(dev, dma_addr_in, keylen, DMA_TO_DEVICE);
-err_dma_addr_in:
-	kfree(flc);
-err_flc:
-	kfree(req_ctx);
-	return ret;
-}
-
-static int gen_split_aead_key(struct caam_ctx *ctx, const u8 *key_in,
-			      u32 authkeylen)
-{
-	return gen_split_key_sh(ctx->dev, ctx->key, &ctx->adata, key_in,
-				authkeylen);
-}
-
 static int aead_setkey(struct crypto_aead *aead, const u8 *key,
 		       unsigned int keylen)
 {
 	struct caam_ctx *ctx = crypto_aead_ctx(aead);
 	struct device *dev = ctx->dev;
 	struct crypto_authenc_keys keys;
-	int ret;
 
 	if (crypto_authenc_extractkeys(&keys, key, keylen) != 0)
 		goto badkey;
@@ -476,34 +351,17 @@ static int aead_setkey(struct crypto_aead *aead, const u8 *key,
 		       DUMP_PREFIX_ADDRESS, 16, 4, key, keylen, 1);
 #endif
 
-	ctx->adata.keylen = split_key_len(ctx->adata.algtype &
-					  OP_ALG_ALGSEL_MASK);
-	ctx->adata.keylen_pad = split_key_pad_len(ctx->adata.algtype &
-						  OP_ALG_ALGSEL_MASK);
-
-#ifdef DEBUG
-	dev_err(dev, "split keylen %d split keylen padded %d\n",
-		ctx->adata.keylen, ctx->adata.keylen_pad);
-	print_hex_dump(KERN_ERR, "ctx.key@" __stringify(__LINE__)": ",
-		       DUMP_PREFIX_ADDRESS, 16, 4, keys.authkey, keylen, 1);
-#endif
+	ctx->adata.keylen = keys.authkeylen;
+	ctx->adata.keylen_pad = split_key_len(ctx->adata.algtype &
+					      OP_ALG_ALGSEL_MASK);
 
 	if (ctx->adata.keylen_pad + keys.enckeylen > CAAM_MAX_KEY_SIZE)
 		goto badkey;
 
-	ret = gen_split_aead_key(ctx, keys.authkey, keys.authkeylen);
-	if (ret)
-		goto badkey;
-
-	/* postpend encryption key to auth split key */
+	memcpy(ctx->key, keys.authkey, keys.authkeylen);
 	memcpy(ctx->key + ctx->adata.keylen_pad, keys.enckey, keys.enckeylen);
-
-	ctx->key_dma = dma_map_single(dev, ctx->key, ctx->adata.keylen_pad +
-				      keys.enckeylen, DMA_TO_DEVICE);
-	if (dma_mapping_error(dev, ctx->key_dma)) {
-		dev_err(dev, "unable to map key i/o memory\n");
-		return -ENOMEM;
-	}
+	dma_sync_single_for_device(dev, ctx->key_dma, ctx->adata.keylen_pad +
+				   keys.enckeylen, DMA_BIDIRECTIONAL);
 #ifdef DEBUG
 	print_hex_dump(KERN_ERR, "ctx.key@" __stringify(__LINE__)": ",
 		       DUMP_PREFIX_ADDRESS, 16, 4, ctx->key,
@@ -512,12 +370,7 @@ static int aead_setkey(struct crypto_aead *aead, const u8 *key,
 
 	ctx->cdata.keylen = keys.enckeylen;
 
-	ret = aead_set_sh_desc(aead);
-	if (ret)
-		dma_unmap_single(dev, ctx->key_dma, ctx->adata.keylen_pad +
-				 keys.enckeylen, DMA_TO_DEVICE);
-
-	return ret;
+	return aead_set_sh_desc(aead);
 badkey:
 	crypto_aead_set_flags(aead, CRYPTO_TFM_RES_BAD_KEY_LEN);
 	return -EINVAL;
@@ -901,6 +754,7 @@ static int tls_set_sh_desc(struct crypto_aead *tls)
 	unsigned int ivsize = crypto_aead_ivsize(tls);
 	unsigned int blocksize = crypto_aead_blocksize(tls);
 	struct device *dev = ctx->dev;
+	struct dpaa2_caam_priv *priv = dev_get_drvdata(dev);
 	struct caam_flc *flc;
 	u32 *desc;
 	unsigned int assoclen = 13; /* always 13 bytes for TLS */
@@ -937,39 +791,30 @@ static int tls_set_sh_desc(struct crypto_aead *tls)
 
 	flc = &ctx->flc[ENCRYPT];
 	desc = flc->sh_desc;
-
 	cnstr_shdsc_tls_encap(desc, &ctx->cdata, &ctx->adata,
-			      assoclen, ivsize, ctx->authsize, blocksize);
-
+			      assoclen, ivsize, ctx->authsize, blocksize,
+			      priv->sec_attr.era);
 	flc->flc[1] = desc_len(desc);
-	flc->flc_dma = dma_map_single(dev, flc, sizeof(flc->flc) +
-				      desc_bytes(desc), DMA_TO_DEVICE);
-
-	if (dma_mapping_error(dev, flc->flc_dma)) {
-		dev_err(dev, "unable to map shared descriptor\n");
-		return -ENOMEM;
-	}
+	dma_sync_single_for_device(dev, ctx->flc_dma[ENCRYPT],
+				   sizeof(flc->flc) + desc_bytes(desc),
+				   DMA_BIDIRECTIONAL);
 
 	/*
 	 * TLS 1.0 decrypt shared descriptor
 	 * Keys do not fit inline, regardless of algorithms used
 	 */
+	ctx->adata.key_inline = false;
 	ctx->adata.key_dma = ctx->key_dma;
 	ctx->cdata.key_dma = ctx->key_dma + ctx->adata.keylen_pad;
 
 	flc = &ctx->flc[DECRYPT];
 	desc = flc->sh_desc;
-
 	cnstr_shdsc_tls_decap(desc, &ctx->cdata, &ctx->adata, assoclen, ivsize,
-			      ctx->authsize, blocksize);
-
+			      ctx->authsize, blocksize, priv->sec_attr.era);
 	flc->flc[1] = desc_len(desc); /* SDL */
-	flc->flc_dma = dma_map_single(dev, flc, sizeof(flc->flc) +
-				      desc_bytes(desc), DMA_TO_DEVICE);
-	if (dma_mapping_error(dev, flc->flc_dma)) {
-		dev_err(dev, "unable to map shared descriptor\n");
-		return -ENOMEM;
-	}
+	dma_sync_single_for_device(dev, ctx->flc_dma[DECRYPT],
+				   sizeof(flc->flc) + desc_bytes(desc),
+				   DMA_BIDIRECTIONAL);
 
 	return 0;
 }
@@ -980,7 +825,6 @@ static int tls_setkey(struct crypto_aead *tls, const u8 *key,
 	struct caam_ctx *ctx = crypto_aead_ctx(tls);
 	struct device *dev = ctx->dev;
 	struct crypto_authenc_keys keys;
-	int ret;
 
 	if (crypto_authenc_extractkeys(&keys, key, keylen) != 0)
 		goto badkey;
@@ -993,35 +837,17 @@ static int tls_setkey(struct crypto_aead *tls, const u8 *key,
 		       DUMP_PREFIX_ADDRESS, 16, 4, key, keylen, 1);
 #endif
 
-	ctx->adata.keylen = split_key_len(ctx->adata.algtype &
-					  OP_ALG_ALGSEL_MASK);
-	ctx->adata.keylen_pad = split_key_pad_len(ctx->adata.algtype &
-						  OP_ALG_ALGSEL_MASK);
-
-#ifdef DEBUG
-	dev_err(dev, "split keylen %d split keylen padded %d\n",
-		ctx->adata.keylen, ctx->adata.keylen_pad);
-	print_hex_dump(KERN_ERR, "ctx.key@" __stringify(__LINE__)": ",
-		       DUMP_PREFIX_ADDRESS, 16, 4, keys.authkey,
-		       keys.authkeylen + keys.enckeylen, 1);
-#endif
+	ctx->adata.keylen = keys.authkeylen;
+	ctx->adata.keylen_pad = split_key_len(ctx->adata.algtype &
+					      OP_ALG_ALGSEL_MASK);
 
 	if (ctx->adata.keylen_pad + keys.enckeylen > CAAM_MAX_KEY_SIZE)
 		goto badkey;
 
-	ret = gen_split_aead_key(ctx, keys.authkey, keys.authkeylen);
-	if (ret)
-		goto badkey;
-
-	/* postpend encryption key to auth split key */
+	memcpy(ctx->key, keys.authkey, keys.authkeylen);
 	memcpy(ctx->key + ctx->adata.keylen_pad, keys.enckey, keys.enckeylen);
-
-	ctx->key_dma = dma_map_single(dev, ctx->key, ctx->adata.keylen_pad +
-				      keys.enckeylen, DMA_TO_DEVICE);
-	if (dma_mapping_error(dev, ctx->key_dma)) {
-		dev_err(dev, "unable to map key i/o memory\n");
-		return -ENOMEM;
-	}
+	dma_sync_single_for_device(dev, ctx->key_dma, ctx->adata.keylen_pad +
+				   keys.enckeylen, DMA_BIDIRECTIONAL);
 #ifdef DEBUG
 	print_hex_dump(KERN_ERR, "ctx.key@" __stringify(__LINE__)": ",
 		       DUMP_PREFIX_ADDRESS, 16, 4, ctx->key,
@@ -1030,12 +856,7 @@ static int tls_setkey(struct crypto_aead *tls, const u8 *key,
 
 	ctx->cdata.keylen = keys.enckeylen;
 
-	ret = tls_set_sh_desc(tls);
-	if (ret)
-		dma_unmap_single(dev, ctx->key_dma, ctx->adata.keylen_pad +
-				 keys.enckeylen, DMA_TO_DEVICE);
-
-	return ret;
+	return tls_set_sh_desc(tls);
 badkey:
 	crypto_aead_set_flags(tls, CRYPTO_TFM_RES_BAD_KEY_LEN);
 	return -EINVAL;
@@ -1080,14 +901,10 @@ static int gcm_set_sh_desc(struct crypto_aead *aead)
 	flc = &ctx->flc[ENCRYPT];
 	desc = flc->sh_desc;
 	cnstr_shdsc_gcm_encap(desc, &ctx->cdata, ivsize, ctx->authsize, true);
-
 	flc->flc[1] = desc_len(desc); /* SDL */
-	flc->flc_dma = dma_map_single(dev, flc, sizeof(flc->flc) +
-				      desc_bytes(desc), DMA_TO_DEVICE);
-	if (dma_mapping_error(dev, flc->flc_dma)) {
-		dev_err(dev, "unable to map shared descriptor\n");
-		return -ENOMEM;
-	}
+	dma_sync_single_for_device(dev, ctx->flc_dma[ENCRYPT],
+				   sizeof(flc->flc) + desc_bytes(desc),
+				   DMA_BIDIRECTIONAL);
 
 	/*
 	 * Job Descriptor and Shared Descriptors
@@ -1104,14 +921,10 @@ static int gcm_set_sh_desc(struct crypto_aead *aead)
 	flc = &ctx->flc[DECRYPT];
 	desc = flc->sh_desc;
 	cnstr_shdsc_gcm_decap(desc, &ctx->cdata, ivsize, ctx->authsize, true);
-
 	flc->flc[1] = desc_len(desc); /* SDL */
-	flc->flc_dma = dma_map_single(dev, flc, sizeof(flc->flc) +
-				      desc_bytes(desc), DMA_TO_DEVICE);
-	if (dma_mapping_error(dev, flc->flc_dma)) {
-		dev_err(dev, "unable to map shared descriptor\n");
-		return -ENOMEM;
-	}
+	dma_sync_single_for_device(dev, ctx->flc_dma[DECRYPT],
+				   sizeof(flc->flc) + desc_bytes(desc),
+				   DMA_BIDIRECTIONAL);
 
 	return 0;
 }
@@ -1131,7 +944,6 @@ static int gcm_setkey(struct crypto_aead *aead,
 {
 	struct caam_ctx *ctx = crypto_aead_ctx(aead);
 	struct device *dev = ctx->dev;
-	int ret;
 
 #ifdef DEBUG
 	print_hex_dump(KERN_ERR, "key in @" __stringify(__LINE__)": ",
@@ -1139,19 +951,11 @@ static int gcm_setkey(struct crypto_aead *aead,
 #endif
 
 	memcpy(ctx->key, key, keylen);
-	ctx->key_dma = dma_map_single(dev, ctx->key, keylen, DMA_TO_DEVICE);
-	if (dma_mapping_error(dev, ctx->key_dma)) {
-		dev_err(dev, "unable to map key i/o memory\n");
-		return -ENOMEM;
-	}
+	dma_sync_single_for_device(dev, ctx->key_dma, keylen,
+				   DMA_BIDIRECTIONAL);
 	ctx->cdata.keylen = keylen;
 
-	ret = gcm_set_sh_desc(aead);
-	if (ret)
-		dma_unmap_single(dev, ctx->key_dma, ctx->cdata.keylen,
-				 DMA_TO_DEVICE);
-
-	return ret;
+	return gcm_set_sh_desc(aead);
 }
 
 static int rfc4106_set_sh_desc(struct crypto_aead *aead)
@@ -1185,14 +989,10 @@ static int rfc4106_set_sh_desc(struct crypto_aead *aead)
 	desc = flc->sh_desc;
 	cnstr_shdsc_rfc4106_encap(desc, &ctx->cdata, ivsize, ctx->authsize,
 				  true);
-
 	flc->flc[1] = desc_len(desc); /* SDL */
-	flc->flc_dma = dma_map_single(dev, flc, sizeof(flc->flc) +
-				      desc_bytes(desc), DMA_TO_DEVICE);
-	if (dma_mapping_error(dev, flc->flc_dma)) {
-		dev_err(dev, "unable to map shared descriptor\n");
-		return -ENOMEM;
-	}
+	dma_sync_single_for_device(dev, ctx->flc_dma[ENCRYPT],
+				   sizeof(flc->flc) + desc_bytes(desc),
+				   DMA_BIDIRECTIONAL);
 
 	/*
 	 * Job Descriptor and Shared Descriptors
@@ -1209,14 +1009,10 @@ static int rfc4106_set_sh_desc(struct crypto_aead *aead)
 	desc = flc->sh_desc;
 	cnstr_shdsc_rfc4106_decap(desc, &ctx->cdata, ivsize, ctx->authsize,
 				  true);
-
 	flc->flc[1] = desc_len(desc); /* SDL */
-	flc->flc_dma = dma_map_single(dev, flc, sizeof(flc->flc) +
-				      desc_bytes(desc), DMA_TO_DEVICE);
-	if (dma_mapping_error(dev, flc->flc_dma)) {
-		dev_err(dev, "unable to map shared descriptor\n");
-		return -ENOMEM;
-	}
+	dma_sync_single_for_device(dev, ctx->flc_dma[DECRYPT],
+				   sizeof(flc->flc) + desc_bytes(desc),
+				   DMA_BIDIRECTIONAL);
 
 	return 0;
 }
@@ -1237,7 +1033,6 @@ static int rfc4106_setkey(struct crypto_aead *aead,
 {
 	struct caam_ctx *ctx = crypto_aead_ctx(aead);
 	struct device *dev = ctx->dev;
-	int ret;
 
 	if (keylen < 4)
 		return -EINVAL;
@@ -1253,19 +1048,10 @@ static int rfc4106_setkey(struct crypto_aead *aead,
 	 * in the nonce. Update the AES key length.
 	 */
 	ctx->cdata.keylen = keylen - 4;
-	ctx->key_dma = dma_map_single(dev, ctx->key, ctx->cdata.keylen,
-				      DMA_TO_DEVICE);
-	if (dma_mapping_error(dev, ctx->key_dma)) {
-		dev_err(dev, "unable to map key i/o memory\n");
-		return -ENOMEM;
-	}
+	dma_sync_single_for_device(dev, ctx->key_dma, ctx->cdata.keylen,
+				   DMA_BIDIRECTIONAL);
 
-	ret = rfc4106_set_sh_desc(aead);
-	if (ret)
-		dma_unmap_single(dev, ctx->key_dma, ctx->cdata.keylen,
-				 DMA_TO_DEVICE);
-
-	return ret;
+	return rfc4106_set_sh_desc(aead);
 }
 
 static int rfc4543_set_sh_desc(struct crypto_aead *aead)
@@ -1299,14 +1085,10 @@ static int rfc4543_set_sh_desc(struct crypto_aead *aead)
 	desc = flc->sh_desc;
 	cnstr_shdsc_rfc4543_encap(desc, &ctx->cdata, ivsize, ctx->authsize,
 				  true);
-
 	flc->flc[1] = desc_len(desc); /* SDL */
-	flc->flc_dma = dma_map_single(dev, flc, sizeof(flc->flc) +
-				      desc_bytes(desc), DMA_TO_DEVICE);
-	if (dma_mapping_error(dev, flc->flc_dma)) {
-		dev_err(dev, "unable to map shared descriptor\n");
-		return -ENOMEM;
-	}
+	dma_sync_single_for_device(dev, ctx->flc_dma[ENCRYPT],
+				   sizeof(flc->flc) + desc_bytes(desc),
+				   DMA_BIDIRECTIONAL);
 
 	/*
 	 * Job Descriptor and Shared Descriptors
@@ -1323,14 +1105,10 @@ static int rfc4543_set_sh_desc(struct crypto_aead *aead)
 	desc = flc->sh_desc;
 	cnstr_shdsc_rfc4543_decap(desc, &ctx->cdata, ivsize, ctx->authsize,
 				  true);
-
 	flc->flc[1] = desc_len(desc); /* SDL */
-	flc->flc_dma = dma_map_single(dev, flc, sizeof(flc->flc) +
-				      desc_bytes(desc), DMA_TO_DEVICE);
-	if (dma_mapping_error(dev, flc->flc_dma)) {
-		dev_err(dev, "unable to map shared descriptor\n");
-		return -ENOMEM;
-	}
+	dma_sync_single_for_device(dev, ctx->flc_dma[DECRYPT],
+				   sizeof(flc->flc) + desc_bytes(desc),
+				   DMA_BIDIRECTIONAL);
 
 	return 0;
 }
@@ -1351,7 +1129,6 @@ static int rfc4543_setkey(struct crypto_aead *aead,
 {
 	struct caam_ctx *ctx = crypto_aead_ctx(aead);
 	struct device *dev = ctx->dev;
-	int ret;
 
 	if (keylen < 4)
 		return -EINVAL;
@@ -1367,19 +1144,10 @@ static int rfc4543_setkey(struct crypto_aead *aead,
 	 * in the nonce. Update the AES key length.
 	 */
 	ctx->cdata.keylen = keylen - 4;
-	ctx->key_dma = dma_map_single(dev, ctx->key, ctx->cdata.keylen,
-				      DMA_TO_DEVICE);
-	if (dma_mapping_error(dev, ctx->key_dma)) {
-		dev_err(dev, "unable to map key i/o memory\n");
-		return -ENOMEM;
-	}
+	dma_sync_single_for_device(dev, ctx->key_dma, ctx->cdata.keylen,
+				   DMA_BIDIRECTIONAL);
 
-	ret = rfc4543_set_sh_desc(aead);
-	if (ret)
-		dma_unmap_single(dev, ctx->key_dma, ctx->cdata.keylen,
-				 DMA_TO_DEVICE);
-
-	return ret;
+	return rfc4543_set_sh_desc(aead);
 }
 
 static int ablkcipher_setkey(struct crypto_ablkcipher *ablkcipher,
@@ -1397,7 +1165,6 @@ static int ablkcipher_setkey(struct crypto_ablkcipher *ablkcipher,
 			       OP_ALG_AAI_CTR_MOD128);
 	const bool is_rfc3686 = (ctr_mode && strstr(alg_name, "rfc3686"));
 
-	memcpy(ctx->key, key, keylen);
 #ifdef DEBUG
 	print_hex_dump(KERN_ERR, "key in @" __stringify(__LINE__)": ",
 		       DUMP_PREFIX_ADDRESS, 16, 4, key, keylen, 1);
@@ -1420,59 +1187,39 @@ static int ablkcipher_setkey(struct crypto_ablkcipher *ablkcipher,
 		keylen -= CTR_RFC3686_NONCE_SIZE;
 	}
 
-	ctx->key_dma = dma_map_single(dev, ctx->key, keylen, DMA_TO_DEVICE);
-	if (dma_mapping_error(dev, ctx->key_dma)) {
-		dev_err(dev, "unable to map key i/o memory\n");
-		return -ENOMEM;
-	}
 	ctx->cdata.keylen = keylen;
-	ctx->cdata.key_virt = ctx->key;
+	ctx->cdata.key_virt = key;
 	ctx->cdata.key_inline = true;
 
 	/* ablkcipher_encrypt shared descriptor */
 	flc = &ctx->flc[ENCRYPT];
 	desc = flc->sh_desc;
-
 	cnstr_shdsc_ablkcipher_encap(desc, &ctx->cdata, ivsize,
 				     is_rfc3686, ctx1_iv_off);
-
 	flc->flc[1] = desc_len(desc); /* SDL */
-	flc->flc_dma = dma_map_single(dev, flc, sizeof(flc->flc) +
-				      desc_bytes(desc), DMA_TO_DEVICE);
-	if (dma_mapping_error(dev, flc->flc_dma)) {
-		dev_err(dev, "unable to map shared descriptor\n");
-		return -ENOMEM;
-	}
+	dma_sync_single_for_device(dev, ctx->flc_dma[ENCRYPT],
+				   sizeof(flc->flc) + desc_bytes(desc),
+				   DMA_BIDIRECTIONAL);
 
 	/* ablkcipher_decrypt shared descriptor */
 	flc = &ctx->flc[DECRYPT];
 	desc = flc->sh_desc;
-
 	cnstr_shdsc_ablkcipher_decap(desc, &ctx->cdata, ivsize,
 				     is_rfc3686, ctx1_iv_off);
-
 	flc->flc[1] = desc_len(desc); /* SDL */
-	flc->flc_dma = dma_map_single(dev, flc, sizeof(flc->flc) +
-				      desc_bytes(desc), DMA_TO_DEVICE);
-	if (dma_mapping_error(dev, flc->flc_dma)) {
-		dev_err(dev, "unable to map shared descriptor\n");
-		return -ENOMEM;
-	}
+	dma_sync_single_for_device(dev, ctx->flc_dma[DECRYPT],
+				   sizeof(flc->flc) + desc_bytes(desc),
+				   DMA_BIDIRECTIONAL);
 
 	/* ablkcipher_givencrypt shared descriptor */
 	flc = &ctx->flc[GIVENCRYPT];
 	desc = flc->sh_desc;
-
 	cnstr_shdsc_ablkcipher_givencap(desc, &ctx->cdata,
 					ivsize, is_rfc3686, ctx1_iv_off);
-
 	flc->flc[1] = desc_len(desc); /* SDL */
-	flc->flc_dma = dma_map_single(dev, flc, sizeof(flc->flc) +
-				      desc_bytes(desc), DMA_TO_DEVICE);
-	if (dma_mapping_error(dev, flc->flc_dma)) {
-		dev_err(dev, "unable to map shared descriptor\n");
-		return -ENOMEM;
-	}
+	dma_sync_single_for_device(dev, ctx->flc_dma[GIVENCRYPT],
+				   sizeof(flc->flc) + desc_bytes(desc),
+				   DMA_BIDIRECTIONAL);
 
 	return 0;
 }
@@ -1492,42 +1239,27 @@ static int xts_ablkcipher_setkey(struct crypto_ablkcipher *ablkcipher,
 		return -EINVAL;
 	}
 
-	memcpy(ctx->key, key, keylen);
-	ctx->key_dma = dma_map_single(dev, ctx->key, keylen, DMA_TO_DEVICE);
-	if (dma_mapping_error(dev, ctx->key_dma)) {
-		dev_err(dev, "unable to map key i/o memory\n");
-		return -ENOMEM;
-	}
 	ctx->cdata.keylen = keylen;
-	ctx->cdata.key_virt = ctx->key;
+	ctx->cdata.key_virt = key;
 	ctx->cdata.key_inline = true;
 
 	/* xts_ablkcipher_encrypt shared descriptor */
 	flc = &ctx->flc[ENCRYPT];
 	desc = flc->sh_desc;
 	cnstr_shdsc_xts_ablkcipher_encap(desc, &ctx->cdata);
-
 	flc->flc[1] = desc_len(desc); /* SDL */
-	flc->flc_dma = dma_map_single(dev, flc, sizeof(flc->flc) +
-				      desc_bytes(desc), DMA_TO_DEVICE);
-	if (dma_mapping_error(dev, flc->flc_dma)) {
-		dev_err(dev, "unable to map shared descriptor\n");
-		return -ENOMEM;
-	}
+	dma_sync_single_for_device(dev, ctx->flc_dma[ENCRYPT],
+				   sizeof(flc->flc) + desc_bytes(desc),
+				   DMA_BIDIRECTIONAL);
 
 	/* xts_ablkcipher_decrypt shared descriptor */
 	flc = &ctx->flc[DECRYPT];
 	desc = flc->sh_desc;
-
 	cnstr_shdsc_xts_ablkcipher_decap(desc, &ctx->cdata);
-
 	flc->flc[1] = desc_len(desc); /* SDL */
-	flc->flc_dma = dma_map_single(dev, flc, sizeof(flc->flc) +
-				      desc_bytes(desc), DMA_TO_DEVICE);
-	if (dma_mapping_error(dev, flc->flc_dma)) {
-		dev_err(dev, "unable to map shared descriptor\n");
-		return -ENOMEM;
-	}
+	dma_sync_single_for_device(dev, ctx->flc_dma[DECRYPT],
+				   sizeof(flc->flc) + desc_bytes(desc),
+				   DMA_BIDIRECTIONAL);
 
 	return 0;
 }
@@ -1940,6 +1672,7 @@ static int aead_encrypt(struct aead_request *req)
 		return PTR_ERR(edesc);
 
 	caam_req->flc = &ctx->flc[ENCRYPT];
+	caam_req->flc_dma = ctx->flc_dma[ENCRYPT];
 	caam_req->op_type = ENCRYPT;
 	caam_req->cbk = aead_encrypt_done;
 	caam_req->ctx = &req->base;
@@ -1968,6 +1701,7 @@ static int aead_decrypt(struct aead_request *req)
 		return PTR_ERR(edesc);
 
 	caam_req->flc = &ctx->flc[DECRYPT];
+	caam_req->flc_dma = ctx->flc_dma[DECRYPT];
 	caam_req->op_type = DECRYPT;
 	caam_req->cbk = aead_decrypt_done;
 	caam_req->ctx = &req->base;
@@ -2053,6 +1787,7 @@ static int tls_encrypt(struct aead_request *req)
 		return PTR_ERR(edesc);
 
 	caam_req->flc = &ctx->flc[ENCRYPT];
+	caam_req->flc_dma = ctx->flc_dma[ENCRYPT];
 	caam_req->op_type = ENCRYPT;
 	caam_req->cbk = tls_encrypt_done;
 	caam_req->ctx = &req->base;
@@ -2081,6 +1816,7 @@ static int tls_decrypt(struct aead_request *req)
 		return PTR_ERR(edesc);
 
 	caam_req->flc = &ctx->flc[DECRYPT];
+	caam_req->flc_dma = ctx->flc_dma[DECRYPT];
 	caam_req->op_type = DECRYPT;
 	caam_req->cbk = tls_decrypt_done;
 	caam_req->ctx = &req->base;
@@ -2167,6 +1903,7 @@ static int ablkcipher_encrypt(struct ablkcipher_request *req)
 		return PTR_ERR(edesc);
 
 	caam_req->flc = &ctx->flc[ENCRYPT];
+	caam_req->flc_dma = ctx->flc_dma[ENCRYPT];
 	caam_req->op_type = ENCRYPT;
 	caam_req->cbk = ablkcipher_done;
 	caam_req->ctx = &req->base;
@@ -2196,6 +1933,7 @@ static int ablkcipher_givencrypt(struct skcipher_givcrypt_request *greq)
 		return PTR_ERR(edesc);
 
 	caam_req->flc = &ctx->flc[GIVENCRYPT];
+	caam_req->flc_dma = ctx->flc_dma[GIVENCRYPT];
 	caam_req->op_type = GIVENCRYPT;
 	caam_req->cbk = ablkcipher_done;
 	caam_req->ctx = &req->base;
@@ -2224,6 +1962,7 @@ static int ablkcipher_decrypt(struct ablkcipher_request *req)
 		return PTR_ERR(edesc);
 
 	caam_req->flc = &ctx->flc[DECRYPT];
+	caam_req->flc_dma = ctx->flc_dma[DECRYPT];
 	caam_req->op_type = DECRYPT;
 	caam_req->cbk = ablkcipher_done;
 	caam_req->ctx = &req->base;
@@ -2250,6 +1989,8 @@ static int caam_cra_init(struct crypto_tfm *tfm)
 	struct caam_crypto_alg *caam_alg = container_of(alg, typeof(*caam_alg),
 							crypto_alg);
 	struct caam_ctx *ctx = crypto_tfm_ctx(tfm);
+	dma_addr_t dma_addr;
+	int i;
 
 	/* copy descriptor header template value */
 	ctx->cdata.algtype = OP_TYPE_CLASS1_ALG |
@@ -2259,6 +2000,19 @@ static int caam_cra_init(struct crypto_tfm *tfm)
 
 	ctx->dev = caam_alg->caam.dev;
 
+	dma_addr = dma_map_single_attrs(ctx->dev, ctx->flc,
+					offsetof(struct caam_ctx, flc_dma),
+					DMA_BIDIRECTIONAL,
+					DMA_ATTR_SKIP_CPU_SYNC);
+	if (dma_mapping_error(ctx->dev, dma_addr)) {
+		dev_err(ctx->dev, "unable to map key, shared descriptors\n");
+		return -ENOMEM;
+	}
+
+	for (i = 0; i < NUM_OP; i++)
+		ctx->flc_dma[i] = dma_addr + i * sizeof(ctx->flc[i]);
+	ctx->key_dma = dma_addr + NUM_OP * sizeof(ctx->flc[0]);
+
 	return 0;
 }
 
@@ -2279,21 +2033,9 @@ static int caam_cra_init_aead(struct crypto_aead *tfm)
 
 static void caam_exit_common(struct caam_ctx *ctx)
 {
-	int i;
-
-	for (i = 0; i < NUM_OP; i++) {
-		if (!ctx->flc[i].flc_dma)
-			continue;
-		dma_unmap_single(ctx->dev, ctx->flc[i].flc_dma,
-				 sizeof(ctx->flc[i].flc) +
-					desc_bytes(ctx->flc[i].sh_desc),
-				 DMA_TO_DEVICE);
-	}
-
-	if (ctx->key_dma)
-		dma_unmap_single(ctx->dev, ctx->key_dma,
-				 ctx->cdata.keylen + ctx->adata.keylen_pad,
-				 DMA_TO_DEVICE);
+	dma_unmap_single_attrs(ctx->dev, ctx->flc_dma[0],
+			       offsetof(struct caam_ctx, flc_dma),
+			       DMA_BIDIRECTIONAL, DMA_ATTR_SKIP_CPU_SYNC);
 }
 
 static void caam_cra_exit(struct crypto_tfm *tfm)
@@ -3662,6 +3404,1690 @@ static void caam_aead_alg_init(struct caam_aead_alg *t_alg)
 	alg->exit = caam_cra_exit_aead;
 }
 
+/* max hash key is max split key size */
+#define CAAM_MAX_HASH_KEY_SIZE		(SHA512_DIGEST_SIZE * 2)
+
+#define CAAM_MAX_HASH_BLOCK_SIZE	SHA512_BLOCK_SIZE
+#define CAAM_MAX_HASH_DIGEST_SIZE	SHA512_DIGEST_SIZE
+
+#define DESC_HASH_MAX_USED_BYTES	(DESC_AHASH_FINAL_LEN + \
+					 CAAM_MAX_HASH_KEY_SIZE)
+#define DESC_HASH_MAX_USED_LEN		(DESC_HASH_MAX_USED_BYTES / CAAM_CMD_SZ)
+
+/* caam context sizes for hashes: running digest + 8 */
+#define HASH_MSG_LEN			8
+#define MAX_CTX_LEN			(HASH_MSG_LEN + SHA512_DIGEST_SIZE)
+
+enum hash_optype {
+	UPDATE = 0,
+	UPDATE_FIRST,
+	FINALIZE,
+	DIGEST,
+	HASH_NUM_OP
+};
+
+/**
+ * caam_hash_ctx - ahash per-session context
+ * @flc: Flow Contexts array
+ * @flc_dma: I/O virtual addresses of the Flow Contexts
+ * @key:  virtual address of the authentication key
+ * @dev: dpseci device
+ * @ctx_len: size of Context Register
+ * @adata: hashing algorithm details
+ */
+struct caam_hash_ctx {
+	struct caam_flc flc[HASH_NUM_OP];
+	dma_addr_t flc_dma[HASH_NUM_OP];
+	u8 key[CAAM_MAX_HASH_KEY_SIZE];
+	struct device *dev;
+	int ctx_len;
+	struct alginfo adata;
+};
+
+/* ahash state */
+struct caam_hash_state {
+	struct caam_request caam_req;
+	dma_addr_t buf_dma;
+	dma_addr_t ctx_dma;
+	u8 buf_0[CAAM_MAX_HASH_BLOCK_SIZE] ____cacheline_aligned;
+	int buflen_0;
+	u8 buf_1[CAAM_MAX_HASH_BLOCK_SIZE] ____cacheline_aligned;
+	int buflen_1;
+	u8 caam_ctx[MAX_CTX_LEN] ____cacheline_aligned;
+	int (*update)(struct ahash_request *req);
+	int (*final)(struct ahash_request *req);
+	int (*finup)(struct ahash_request *req);
+	int current_buf;
+};
+
+struct caam_export_state {
+	u8 buf[CAAM_MAX_HASH_BLOCK_SIZE];
+	u8 caam_ctx[MAX_CTX_LEN];
+	int buflen;
+	int (*update)(struct ahash_request *req);
+	int (*final)(struct ahash_request *req);
+	int (*finup)(struct ahash_request *req);
+};
+
+static inline void switch_buf(struct caam_hash_state *state)
+{
+	state->current_buf ^= 1;
+}
+
+static inline u8 *current_buf(struct caam_hash_state *state)
+{
+	return state->current_buf ? state->buf_1 : state->buf_0;
+}
+
+static inline u8 *alt_buf(struct caam_hash_state *state)
+{
+	return state->current_buf ? state->buf_0 : state->buf_1;
+}
+
+static inline int *current_buflen(struct caam_hash_state *state)
+{
+	return state->current_buf ? &state->buflen_1 : &state->buflen_0;
+}
+
+static inline int *alt_buflen(struct caam_hash_state *state)
+{
+	return state->current_buf ? &state->buflen_0 : &state->buflen_1;
+}
+
+/* Map current buffer in state (if length > 0) and put it in link table */
+static inline int buf_map_to_qm_sg(struct device *dev,
+				   struct dpaa2_sg_entry *qm_sg,
+				   struct caam_hash_state *state)
+{
+	int buflen = *current_buflen(state);
+
+	if (!buflen)
+		return 0;
+
+	state->buf_dma = dma_map_single(dev, current_buf(state), buflen,
+					DMA_TO_DEVICE);
+	if (dma_mapping_error(dev, state->buf_dma)) {
+		dev_err(dev, "unable to map buf\n");
+		state->buf_dma = 0;
+		return -ENOMEM;
+	}
+
+	dma_to_qm_sg_one(qm_sg, state->buf_dma, buflen, 0);
+
+	return 0;
+}
+
+/* Map state->caam_ctx, and add it to link table */
+static inline int ctx_map_to_qm_sg(struct device *dev,
+				   struct caam_hash_state *state, int ctx_len,
+				   struct dpaa2_sg_entry *qm_sg, u32 flag)
+{
+	state->ctx_dma = dma_map_single(dev, state->caam_ctx, ctx_len, flag);
+	if (dma_mapping_error(dev, state->ctx_dma)) {
+		dev_err(dev, "unable to map ctx\n");
+		state->ctx_dma = 0;
+		return -ENOMEM;
+	}
+
+	dma_to_qm_sg_one(qm_sg, state->ctx_dma, ctx_len, 0);
+
+	return 0;
+}
+
+static int ahash_set_sh_desc(struct crypto_ahash *ahash)
+{
+	struct caam_hash_ctx *ctx = crypto_ahash_ctx(ahash);
+	int digestsize = crypto_ahash_digestsize(ahash);
+	struct dpaa2_caam_priv *priv = dev_get_drvdata(ctx->dev);
+	struct caam_flc *flc;
+	u32 *desc;
+
+	ctx->adata.key_virt = ctx->key;
+	ctx->adata.key_inline = true;
+
+	/* ahash_update shared descriptor */
+	flc = &ctx->flc[UPDATE];
+	desc = flc->sh_desc;
+	cnstr_shdsc_ahash(desc, &ctx->adata, OP_ALG_AS_UPDATE, ctx->ctx_len,
+			  ctx->ctx_len, true, priv->sec_attr.era);
+	flc->flc[1] = desc_len(desc); /* SDL */
+	dma_sync_single_for_device(ctx->dev, ctx->flc_dma[UPDATE],
+				   desc_bytes(desc), DMA_BIDIRECTIONAL);
+#ifdef DEBUG
+	print_hex_dump(KERN_ERR,
+		       "ahash update shdesc@" __stringify(__LINE__)": ",
+		       DUMP_PREFIX_ADDRESS, 16, 4, desc, desc_bytes(desc), 1);
+#endif
+
+	/* ahash_update_first shared descriptor */
+	flc = &ctx->flc[UPDATE_FIRST];
+	desc = flc->sh_desc;
+	cnstr_shdsc_ahash(desc, &ctx->adata, OP_ALG_AS_INIT, ctx->ctx_len,
+			  ctx->ctx_len, false, priv->sec_attr.era);
+	flc->flc[1] = desc_len(desc); /* SDL */
+	dma_sync_single_for_device(ctx->dev, ctx->flc_dma[UPDATE_FIRST],
+				   desc_bytes(desc), DMA_BIDIRECTIONAL);
+#ifdef DEBUG
+	print_hex_dump(KERN_ERR,
+		       "ahash update first shdesc@" __stringify(__LINE__)": ",
+		       DUMP_PREFIX_ADDRESS, 16, 4, desc, desc_bytes(desc), 1);
+#endif
+
+	/* ahash_final shared descriptor */
+	flc = &ctx->flc[FINALIZE];
+	desc = flc->sh_desc;
+	cnstr_shdsc_ahash(desc, &ctx->adata, OP_ALG_AS_FINALIZE, digestsize,
+			  ctx->ctx_len, true, priv->sec_attr.era);
+	flc->flc[1] = desc_len(desc); /* SDL */
+	dma_sync_single_for_device(ctx->dev, ctx->flc_dma[FINALIZE],
+				   desc_bytes(desc), DMA_BIDIRECTIONAL);
+#ifdef DEBUG
+	print_hex_dump(KERN_ERR,
+		       "ahash final shdesc@" __stringify(__LINE__)": ",
+		       DUMP_PREFIX_ADDRESS, 16, 4, desc, desc_bytes(desc), 1);
+#endif
+
+	/* ahash_digest shared descriptor */
+	flc = &ctx->flc[DIGEST];
+	desc = flc->sh_desc;
+	cnstr_shdsc_ahash(desc, &ctx->adata, OP_ALG_AS_INITFINAL, digestsize,
+			  ctx->ctx_len, false, priv->sec_attr.era);
+	flc->flc[1] = desc_len(desc); /* SDL */
+	dma_sync_single_for_device(ctx->dev, ctx->flc_dma[DIGEST],
+				   desc_bytes(desc), DMA_BIDIRECTIONAL);
+#ifdef DEBUG
+	print_hex_dump(KERN_ERR,
+		       "ahash digest shdesc@" __stringify(__LINE__)": ",
+		       DUMP_PREFIX_ADDRESS, 16, 4, desc, desc_bytes(desc), 1);
+#endif
+
+	return 0;
+}
+
+/* Digest hash size if it is too large */
+static int hash_digest_key(struct caam_hash_ctx *ctx, const u8 *key_in,
+			   u32 *keylen, u8 *key_out, u32 digestsize)
+{
+	struct caam_request *req_ctx;
+	u32 *desc;
+	struct split_key_sh_result result;
+	dma_addr_t src_dma, dst_dma;
+	struct caam_flc *flc;
+	dma_addr_t flc_dma;
+	int ret = -ENOMEM;
+	struct dpaa2_fl_entry *in_fle, *out_fle;
+
+	req_ctx = kzalloc(sizeof(*req_ctx), GFP_KERNEL | GFP_DMA);
+	if (!req_ctx)
+		return -ENOMEM;
+
+	in_fle = &req_ctx->fd_flt[1];
+	out_fle = &req_ctx->fd_flt[0];
+
+	flc = kzalloc(sizeof(*flc), GFP_KERNEL | GFP_DMA);
+	if (!flc)
+		goto err_flc;
+
+	src_dma = dma_map_single(ctx->dev, (void *)key_in, *keylen,
+				 DMA_TO_DEVICE);
+	if (dma_mapping_error(ctx->dev, src_dma)) {
+		dev_err(ctx->dev, "unable to map key input memory\n");
+		goto err_src_dma;
+	}
+	dst_dma = dma_map_single(ctx->dev, (void *)key_out, digestsize,
+				 DMA_FROM_DEVICE);
+	if (dma_mapping_error(ctx->dev, dst_dma)) {
+		dev_err(ctx->dev, "unable to map key output memory\n");
+		goto err_dst_dma;
+	}
+
+	desc = flc->sh_desc;
+
+	init_sh_desc(desc, 0);
+
+	/* descriptor to perform unkeyed hash on key_in */
+	append_operation(desc, ctx->adata.algtype | OP_ALG_ENCRYPT |
+			 OP_ALG_AS_INITFINAL);
+	append_seq_fifo_load(desc, *keylen, FIFOLD_CLASS_CLASS2 |
+			     FIFOLD_TYPE_LAST2 | FIFOLD_TYPE_MSG);
+	append_seq_store(desc, digestsize, LDST_CLASS_2_CCB |
+			 LDST_SRCDST_BYTE_CONTEXT);
+
+	flc->flc[1] = desc_len(desc); /* SDL */
+	flc_dma = dma_map_single(ctx->dev, flc, sizeof(flc->flc) +
+				 desc_bytes(desc), DMA_TO_DEVICE);
+	if (dma_mapping_error(ctx->dev, flc_dma)) {
+		dev_err(ctx->dev, "unable to map shared descriptor\n");
+		goto err_flc_dma;
+	}
+
+	dpaa2_fl_set_final(in_fle, true);
+	dpaa2_fl_set_format(in_fle, dpaa2_fl_single);
+	dpaa2_fl_set_addr(in_fle, src_dma);
+	dpaa2_fl_set_len(in_fle, *keylen);
+	dpaa2_fl_set_format(out_fle, dpaa2_fl_single);
+	dpaa2_fl_set_addr(out_fle, dst_dma);
+	dpaa2_fl_set_len(out_fle, digestsize);
+
+#ifdef DEBUG
+	print_hex_dump(KERN_ERR, "key_in@" __stringify(__LINE__)": ",
+		       DUMP_PREFIX_ADDRESS, 16, 4, key_in, *keylen, 1);
+	print_hex_dump(KERN_ERR, "shdesc@" __stringify(__LINE__)": ",
+		       DUMP_PREFIX_ADDRESS, 16, 4, desc, desc_bytes(desc), 1);
+#endif
+
+	result.err = 0;
+	init_completion(&result.completion);
+	result.dev = ctx->dev;
+
+	req_ctx->flc = flc;
+	req_ctx->flc_dma = flc_dma;
+	req_ctx->cbk = split_key_sh_done;
+	req_ctx->ctx = &result;
+
+	ret = dpaa2_caam_enqueue(ctx->dev, req_ctx);
+	if (ret == -EINPROGRESS) {
+		/* in progress */
+		wait_for_completion(&result.completion);
+		ret = result.err;
+#ifdef DEBUG
+		print_hex_dump(KERN_ERR,
+			       "digested key@" __stringify(__LINE__)": ",
+			       DUMP_PREFIX_ADDRESS, 16, 4, key_in, digestsize,
+			       1);
+#endif
+	}
+
+	dma_unmap_single(ctx->dev, flc_dma, sizeof(flc->flc) + desc_bytes(desc),
+			 DMA_TO_DEVICE);
+err_flc_dma:
+	dma_unmap_single(ctx->dev, dst_dma, digestsize, DMA_FROM_DEVICE);
+err_dst_dma:
+	dma_unmap_single(ctx->dev, src_dma, *keylen, DMA_TO_DEVICE);
+err_src_dma:
+	kfree(flc);
+err_flc:
+	kfree(req_ctx);
+
+	*keylen = digestsize;
+
+	return ret;
+}
+
+static int ahash_setkey(struct crypto_ahash *ahash, const u8 *key,
+			unsigned int keylen)
+{
+	struct caam_hash_ctx *ctx = crypto_ahash_ctx(ahash);
+	unsigned int blocksize = crypto_tfm_alg_blocksize(&ahash->base);
+	unsigned int digestsize = crypto_ahash_digestsize(ahash);
+	int ret;
+	u8 *hashed_key = NULL;
+
+#ifdef DEBUG
+	dev_err(ctx->dev, "keylen %d blocksize %d\n", keylen, blocksize);
+#endif
+
+	if (keylen > blocksize) {
+		hashed_key = kmalloc_array(digestsize, sizeof(*hashed_key),
+					   GFP_KERNEL | GFP_DMA);
+		if (!hashed_key)
+			return -ENOMEM;
+		ret = hash_digest_key(ctx, key, &keylen, hashed_key,
+				      digestsize);
+		if (ret)
+			goto bad_free_key;
+		key = hashed_key;
+	}
+
+	ctx->adata.keylen = keylen;
+	ctx->adata.keylen_pad = split_key_len(ctx->adata.algtype &
+					      OP_ALG_ALGSEL_MASK);
+	if (ctx->adata.keylen_pad > CAAM_MAX_HASH_KEY_SIZE)
+		goto bad_free_key;
+
+	memcpy(ctx->key, key, keylen);
+
+	kfree(hashed_key);
+	return ahash_set_sh_desc(ahash);
+bad_free_key:
+	kfree(hashed_key);
+	crypto_ahash_set_flags(ahash, CRYPTO_TFM_RES_BAD_KEY_LEN);
+	return -EINVAL;
+}
+
+static inline void ahash_unmap(struct device *dev, struct ahash_edesc *edesc,
+			       struct ahash_request *req, int dst_len)
+{
+	struct caam_hash_state *state = ahash_request_ctx(req);
+
+	if (edesc->src_nents)
+		dma_unmap_sg(dev, req->src, edesc->src_nents, DMA_TO_DEVICE);
+	if (edesc->dst_dma)
+		dma_unmap_single(dev, edesc->dst_dma, dst_len, DMA_FROM_DEVICE);
+
+	if (edesc->qm_sg_bytes)
+		dma_unmap_single(dev, edesc->qm_sg_dma, edesc->qm_sg_bytes,
+				 DMA_TO_DEVICE);
+
+	if (state->buf_dma) {
+		dma_unmap_single(dev, state->buf_dma, *current_buflen(state),
+				 DMA_TO_DEVICE);
+		state->buf_dma = 0;
+	}
+}
+
+static inline void ahash_unmap_ctx(struct device *dev,
+				   struct ahash_edesc *edesc,
+				   struct ahash_request *req, int dst_len,
+				   u32 flag)
+{
+	struct crypto_ahash *ahash = crypto_ahash_reqtfm(req);
+	struct caam_hash_ctx *ctx = crypto_ahash_ctx(ahash);
+	struct caam_hash_state *state = ahash_request_ctx(req);
+
+	if (state->ctx_dma) {
+		dma_unmap_single(dev, state->ctx_dma, ctx->ctx_len, flag);
+		state->ctx_dma = 0;
+	}
+	ahash_unmap(dev, edesc, req, dst_len);
+}
+
+static void ahash_done(void *cbk_ctx, u32 status)
+{
+	struct crypto_async_request *areq = cbk_ctx;
+	struct ahash_request *req = ahash_request_cast(areq);
+	struct crypto_ahash *ahash = crypto_ahash_reqtfm(req);
+	struct caam_hash_state *state = ahash_request_ctx(req);
+	struct ahash_edesc *edesc = state->caam_req.edesc;
+	struct caam_hash_ctx *ctx = crypto_ahash_ctx(ahash);
+	int digestsize = crypto_ahash_digestsize(ahash);
+	int ecode = 0;
+
+#ifdef DEBUG
+	dev_err(ctx->dev, "%s %d: err 0x%x\n", __func__, __LINE__, status);
+#endif
+
+	if (unlikely(status)) {
+		caam_qi2_strstatus(ctx->dev, status);
+		ecode = -EIO;
+	}
+
+	ahash_unmap(ctx->dev, edesc, req, digestsize);
+	qi_cache_free(edesc);
+
+#ifdef DEBUG
+	print_hex_dump(KERN_ERR, "ctx@" __stringify(__LINE__)": ",
+		       DUMP_PREFIX_ADDRESS, 16, 4, state->caam_ctx,
+		       ctx->ctx_len, 1);
+	if (req->result)
+		print_hex_dump(KERN_ERR, "result@" __stringify(__LINE__)": ",
+			       DUMP_PREFIX_ADDRESS, 16, 4, req->result,
+			       digestsize, 1);
+#endif
+
+	req->base.complete(&req->base, ecode);
+}
+
+static void ahash_done_bi(void *cbk_ctx, u32 status)
+{
+	struct crypto_async_request *areq = cbk_ctx;
+	struct ahash_request *req = ahash_request_cast(areq);
+	struct crypto_ahash *ahash = crypto_ahash_reqtfm(req);
+	struct caam_hash_state *state = ahash_request_ctx(req);
+	struct ahash_edesc *edesc = state->caam_req.edesc;
+	struct caam_hash_ctx *ctx = crypto_ahash_ctx(ahash);
+	int ecode = 0;
+#ifdef DEBUG
+	int digestsize = crypto_ahash_digestsize(ahash);
+
+	dev_err(ctx->dev, "%s %d: err 0x%x\n", __func__, __LINE__, status);
+#endif
+
+	if (unlikely(status)) {
+		caam_qi2_strstatus(ctx->dev, status);
+		ecode = -EIO;
+	}
+
+	ahash_unmap_ctx(ctx->dev, edesc, req, ctx->ctx_len, DMA_BIDIRECTIONAL);
+	switch_buf(state);
+	qi_cache_free(edesc);
+
+#ifdef DEBUG
+	print_hex_dump(KERN_ERR, "ctx@" __stringify(__LINE__)": ",
+		       DUMP_PREFIX_ADDRESS, 16, 4, state->caam_ctx,
+		       ctx->ctx_len, 1);
+	if (req->result)
+		print_hex_dump(KERN_ERR, "result@" __stringify(__LINE__)": ",
+			       DUMP_PREFIX_ADDRESS, 16, 4, req->result,
+			       digestsize, 1);
+#endif
+
+	req->base.complete(&req->base, ecode);
+}
+
+static void ahash_done_ctx_src(void *cbk_ctx, u32 status)
+{
+	struct crypto_async_request *areq = cbk_ctx;
+	struct ahash_request *req = ahash_request_cast(areq);
+	struct crypto_ahash *ahash = crypto_ahash_reqtfm(req);
+	struct caam_hash_state *state = ahash_request_ctx(req);
+	struct ahash_edesc *edesc = state->caam_req.edesc;
+	struct caam_hash_ctx *ctx = crypto_ahash_ctx(ahash);
+	int digestsize = crypto_ahash_digestsize(ahash);
+	int ecode = 0;
+
+#ifdef DEBUG
+	dev_err(ctx->dev, "%s %d: err 0x%x\n", __func__, __LINE__, status);
+#endif
+
+	if (unlikely(status)) {
+		caam_qi2_strstatus(ctx->dev, status);
+		ecode = -EIO;
+	}
+
+	ahash_unmap_ctx(ctx->dev, edesc, req, digestsize, DMA_TO_DEVICE);
+	qi_cache_free(edesc);
+
+#ifdef DEBUG
+	print_hex_dump(KERN_ERR, "ctx@" __stringify(__LINE__)": ",
+		       DUMP_PREFIX_ADDRESS, 16, 4, state->caam_ctx,
+		       ctx->ctx_len, 1);
+	if (req->result)
+		print_hex_dump(KERN_ERR, "result@" __stringify(__LINE__)": ",
+			       DUMP_PREFIX_ADDRESS, 16, 4, req->result,
+			       digestsize, 1);
+#endif
+
+	req->base.complete(&req->base, ecode);
+}
+
+static void ahash_done_ctx_dst(void *cbk_ctx, u32 status)
+{
+	struct crypto_async_request *areq = cbk_ctx;
+	struct ahash_request *req = ahash_request_cast(areq);
+	struct crypto_ahash *ahash = crypto_ahash_reqtfm(req);
+	struct caam_hash_state *state = ahash_request_ctx(req);
+	struct ahash_edesc *edesc = state->caam_req.edesc;
+	struct caam_hash_ctx *ctx = crypto_ahash_ctx(ahash);
+	int ecode = 0;
+#ifdef DEBUG
+	int digestsize = crypto_ahash_digestsize(ahash);
+
+	dev_err(ctx->dev, "%s %d: err 0x%x\n", __func__, __LINE__, status);
+#endif
+
+	if (unlikely(status)) {
+		caam_qi2_strstatus(ctx->dev, status);
+		ecode = -EIO;
+	}
+
+	ahash_unmap_ctx(ctx->dev, edesc, req, ctx->ctx_len, DMA_FROM_DEVICE);
+	switch_buf(state);
+	qi_cache_free(edesc);
+
+#ifdef DEBUG
+	print_hex_dump(KERN_ERR, "ctx@" __stringify(__LINE__)": ",
+		       DUMP_PREFIX_ADDRESS, 16, 4, state->caam_ctx,
+		       ctx->ctx_len, 1);
+	if (req->result)
+		print_hex_dump(KERN_ERR, "result@" __stringify(__LINE__)": ",
+			       DUMP_PREFIX_ADDRESS, 16, 4, req->result,
+			       digestsize, 1);
+#endif
+
+	req->base.complete(&req->base, ecode);
+}
+
+static int ahash_update_ctx(struct ahash_request *req)
+{
+	struct crypto_ahash *ahash = crypto_ahash_reqtfm(req);
+	struct caam_hash_ctx *ctx = crypto_ahash_ctx(ahash);
+	struct caam_hash_state *state = ahash_request_ctx(req);
+	struct caam_request *req_ctx = &state->caam_req;
+	struct dpaa2_fl_entry *in_fle = &req_ctx->fd_flt[1];
+	struct dpaa2_fl_entry *out_fle = &req_ctx->fd_flt[0];
+	gfp_t flags = (req->base.flags & CRYPTO_TFM_REQ_MAY_SLEEP) ?
+		      GFP_KERNEL : GFP_ATOMIC;
+	u8 *buf = current_buf(state);
+	int *buflen = current_buflen(state);
+	u8 *next_buf = alt_buf(state);
+	int *next_buflen = alt_buflen(state), last_buflen;
+	int in_len = *buflen + req->nbytes, to_hash;
+	int src_nents, mapped_nents, qm_sg_bytes, qm_sg_src_index;
+	struct ahash_edesc *edesc;
+	int ret = 0;
+
+	last_buflen = *next_buflen;
+	*next_buflen = in_len & (crypto_tfm_alg_blocksize(&ahash->base) - 1);
+	to_hash = in_len - *next_buflen;
+
+	if (to_hash) {
+		struct dpaa2_sg_entry *sg_table;
+
+		src_nents = sg_nents_for_len(req->src,
+					     req->nbytes - (*next_buflen));
+		if (src_nents < 0) {
+			dev_err(ctx->dev, "Invalid number of src SG.\n");
+			return src_nents;
+		}
+
+		if (src_nents) {
+			mapped_nents = dma_map_sg(ctx->dev, req->src, src_nents,
+						  DMA_TO_DEVICE);
+			if (!mapped_nents) {
+				dev_err(ctx->dev, "unable to DMA map source\n");
+				return -ENOMEM;
+			}
+		} else {
+			mapped_nents = 0;
+		}
+
+		/* allocate space for base edesc and link tables */
+		edesc = qi_cache_zalloc(GFP_DMA | flags);
+		if (!edesc) {
+			dma_unmap_sg(ctx->dev, req->src, src_nents,
+				     DMA_TO_DEVICE);
+			return -ENOMEM;
+		}
+
+		edesc->src_nents = src_nents;
+		qm_sg_src_index = 1 + (*buflen ? 1 : 0);
+		qm_sg_bytes = (qm_sg_src_index + mapped_nents) *
+			      sizeof(*sg_table);
+		sg_table = &edesc->sgt[0];
+
+		ret = ctx_map_to_qm_sg(ctx->dev, state, ctx->ctx_len, sg_table,
+				       DMA_BIDIRECTIONAL);
+		if (ret)
+			goto unmap_ctx;
+
+		ret = buf_map_to_qm_sg(ctx->dev, sg_table + 1, state);
+		if (ret)
+			goto unmap_ctx;
+
+		if (mapped_nents) {
+			sg_to_qm_sg_last(req->src, mapped_nents,
+					 sg_table + qm_sg_src_index, 0);
+			if (*next_buflen)
+				scatterwalk_map_and_copy(next_buf, req->src,
+							 to_hash - *buflen,
+							 *next_buflen, 0);
+		} else {
+			dpaa2_sg_set_final(sg_table + qm_sg_src_index - 1,
+					   true);
+		}
+
+		edesc->qm_sg_dma = dma_map_single(ctx->dev, sg_table,
+						  qm_sg_bytes, DMA_TO_DEVICE);
+		if (dma_mapping_error(ctx->dev, edesc->qm_sg_dma)) {
+			dev_err(ctx->dev, "unable to map S/G table\n");
+			ret = -ENOMEM;
+			goto unmap_ctx;
+		}
+		edesc->qm_sg_bytes = qm_sg_bytes;
+
+		memset(&req_ctx->fd_flt, 0, sizeof(req_ctx->fd_flt));
+		dpaa2_fl_set_final(in_fle, true);
+		dpaa2_fl_set_format(in_fle, dpaa2_fl_sg);
+		dpaa2_fl_set_addr(in_fle, edesc->qm_sg_dma);
+		dpaa2_fl_set_len(in_fle, ctx->ctx_len + to_hash);
+		dpaa2_fl_set_format(out_fle, dpaa2_fl_single);
+		dpaa2_fl_set_addr(out_fle, state->ctx_dma);
+		dpaa2_fl_set_len(out_fle, ctx->ctx_len);
+
+		req_ctx->flc = &ctx->flc[UPDATE];
+		req_ctx->flc_dma = ctx->flc_dma[UPDATE];
+		req_ctx->cbk = ahash_done_bi;
+		req_ctx->ctx = &req->base;
+		req_ctx->edesc = edesc;
+
+		ret = dpaa2_caam_enqueue(ctx->dev, req_ctx);
+		if (ret != -EINPROGRESS &&
+		    !(ret == -EBUSY &&
+		      req->base.flags & CRYPTO_TFM_REQ_MAY_BACKLOG))
+			goto unmap_ctx;
+	} else if (*next_buflen) {
+		scatterwalk_map_and_copy(buf + *buflen, req->src, 0,
+					 req->nbytes, 0);
+		*buflen = *next_buflen;
+		*next_buflen = last_buflen;
+	}
+#ifdef DEBUG
+	print_hex_dump(KERN_ERR, "buf@" __stringify(__LINE__)": ",
+		       DUMP_PREFIX_ADDRESS, 16, 4, buf, *buflen, 1);
+	print_hex_dump(KERN_ERR, "next buf@" __stringify(__LINE__)": ",
+		       DUMP_PREFIX_ADDRESS, 16, 4, next_buf,
+		       *next_buflen, 1);
+#endif
+
+	return ret;
+unmap_ctx:
+	ahash_unmap_ctx(ctx->dev, edesc, req, ctx->ctx_len, DMA_BIDIRECTIONAL);
+	qi_cache_free(edesc);
+	return ret;
+}
+
+static int ahash_final_ctx(struct ahash_request *req)
+{
+	struct crypto_ahash *ahash = crypto_ahash_reqtfm(req);
+	struct caam_hash_ctx *ctx = crypto_ahash_ctx(ahash);
+	struct caam_hash_state *state = ahash_request_ctx(req);
+	struct caam_request *req_ctx = &state->caam_req;
+	struct dpaa2_fl_entry *in_fle = &req_ctx->fd_flt[1];
+	struct dpaa2_fl_entry *out_fle = &req_ctx->fd_flt[0];
+	gfp_t flags = (req->base.flags & CRYPTO_TFM_REQ_MAY_SLEEP) ?
+		      GFP_KERNEL : GFP_ATOMIC;
+	int buflen = *current_buflen(state);
+	int qm_sg_bytes, qm_sg_src_index;
+	int digestsize = crypto_ahash_digestsize(ahash);
+	struct ahash_edesc *edesc;
+	struct dpaa2_sg_entry *sg_table;
+	int ret;
+
+	/* allocate space for base edesc and link tables */
+	edesc = qi_cache_zalloc(GFP_DMA | flags);
+	if (!edesc)
+		return -ENOMEM;
+
+	qm_sg_src_index = 1 + (buflen ? 1 : 0);
+	qm_sg_bytes = qm_sg_src_index * sizeof(*sg_table);
+	sg_table = &edesc->sgt[0];
+
+	ret = ctx_map_to_qm_sg(ctx->dev, state, ctx->ctx_len, sg_table,
+			       DMA_TO_DEVICE);
+	if (ret)
+		goto unmap_ctx;
+
+	ret = buf_map_to_qm_sg(ctx->dev, sg_table + 1, state);
+	if (ret)
+		goto unmap_ctx;
+
+	dpaa2_sg_set_final(sg_table + qm_sg_src_index - 1, true);
+
+	edesc->qm_sg_dma = dma_map_single(ctx->dev, sg_table, qm_sg_bytes,
+					  DMA_TO_DEVICE);
+	if (dma_mapping_error(ctx->dev, edesc->qm_sg_dma)) {
+		dev_err(ctx->dev, "unable to map S/G table\n");
+		ret = -ENOMEM;
+		goto unmap_ctx;
+	}
+	edesc->qm_sg_bytes = qm_sg_bytes;
+
+	edesc->dst_dma = dma_map_single(ctx->dev, req->result, digestsize,
+					DMA_FROM_DEVICE);
+	if (dma_mapping_error(ctx->dev, edesc->dst_dma)) {
+		dev_err(ctx->dev, "unable to map dst\n");
+		edesc->dst_dma = 0;
+		ret = -ENOMEM;
+		goto unmap_ctx;
+	}
+
+	memset(&req_ctx->fd_flt, 0, sizeof(req_ctx->fd_flt));
+	dpaa2_fl_set_final(in_fle, true);
+	dpaa2_fl_set_format(in_fle, dpaa2_fl_sg);
+	dpaa2_fl_set_addr(in_fle, edesc->qm_sg_dma);
+	dpaa2_fl_set_len(in_fle, ctx->ctx_len + buflen);
+	dpaa2_fl_set_format(out_fle, dpaa2_fl_single);
+	dpaa2_fl_set_addr(out_fle, edesc->dst_dma);
+	dpaa2_fl_set_len(out_fle, digestsize);
+
+	req_ctx->flc = &ctx->flc[FINALIZE];
+	req_ctx->flc_dma = ctx->flc_dma[FINALIZE];
+	req_ctx->cbk = ahash_done_ctx_src;
+	req_ctx->ctx = &req->base;
+	req_ctx->edesc = edesc;
+
+	ret = dpaa2_caam_enqueue(ctx->dev, req_ctx);
+	if (ret == -EINPROGRESS ||
+	    (ret == -EBUSY && req->base.flags & CRYPTO_TFM_REQ_MAY_BACKLOG))
+		return ret;
+
+unmap_ctx:
+	ahash_unmap_ctx(ctx->dev, edesc, req, digestsize, DMA_FROM_DEVICE);
+	qi_cache_free(edesc);
+	return ret;
+}
+
+static int ahash_finup_ctx(struct ahash_request *req)
+{
+	struct crypto_ahash *ahash = crypto_ahash_reqtfm(req);
+	struct caam_hash_ctx *ctx = crypto_ahash_ctx(ahash);
+	struct caam_hash_state *state = ahash_request_ctx(req);
+	struct caam_request *req_ctx = &state->caam_req;
+	struct dpaa2_fl_entry *in_fle = &req_ctx->fd_flt[1];
+	struct dpaa2_fl_entry *out_fle = &req_ctx->fd_flt[0];
+	gfp_t flags = (req->base.flags & CRYPTO_TFM_REQ_MAY_SLEEP) ?
+		      GFP_KERNEL : GFP_ATOMIC;
+	int buflen = *current_buflen(state);
+	int qm_sg_bytes, qm_sg_src_index;
+	int src_nents, mapped_nents;
+	int digestsize = crypto_ahash_digestsize(ahash);
+	struct ahash_edesc *edesc;
+	struct dpaa2_sg_entry *sg_table;
+	int ret;
+
+	src_nents = sg_nents_for_len(req->src, req->nbytes);
+	if (src_nents < 0) {
+		dev_err(ctx->dev, "Invalid number of src SG.\n");
+		return src_nents;
+	}
+
+	if (src_nents) {
+		mapped_nents = dma_map_sg(ctx->dev, req->src, src_nents,
+					  DMA_TO_DEVICE);
+		if (!mapped_nents) {
+			dev_err(ctx->dev, "unable to DMA map source\n");
+			return -ENOMEM;
+		}
+	} else {
+		mapped_nents = 0;
+	}
+
+	/* allocate space for base edesc and link tables */
+	edesc = qi_cache_zalloc(GFP_DMA | flags);
+	if (!edesc) {
+		dma_unmap_sg(ctx->dev, req->src, src_nents, DMA_TO_DEVICE);
+		return -ENOMEM;
+	}
+
+	edesc->src_nents = src_nents;
+	qm_sg_src_index = 1 + (buflen ? 1 : 0);
+	qm_sg_bytes = (qm_sg_src_index + mapped_nents) * sizeof(*sg_table);
+	sg_table = &edesc->sgt[0];
+
+	ret = ctx_map_to_qm_sg(ctx->dev, state, ctx->ctx_len, sg_table,
+			       DMA_TO_DEVICE);
+	if (ret)
+		goto unmap_ctx;
+
+	ret = buf_map_to_qm_sg(ctx->dev, sg_table + 1, state);
+	if (ret)
+		goto unmap_ctx;
+
+	sg_to_qm_sg_last(req->src, mapped_nents, sg_table + qm_sg_src_index, 0);
+
+	edesc->qm_sg_dma = dma_map_single(ctx->dev, sg_table, qm_sg_bytes,
+					  DMA_TO_DEVICE);
+	if (dma_mapping_error(ctx->dev, edesc->qm_sg_dma)) {
+		dev_err(ctx->dev, "unable to map S/G table\n");
+		ret = -ENOMEM;
+		goto unmap_ctx;
+	}
+	edesc->qm_sg_bytes = qm_sg_bytes;
+
+	edesc->dst_dma = dma_map_single(ctx->dev, req->result, digestsize,
+					DMA_FROM_DEVICE);
+	if (dma_mapping_error(ctx->dev, edesc->dst_dma)) {
+		dev_err(ctx->dev, "unable to map dst\n");
+		edesc->dst_dma = 0;
+		ret = -ENOMEM;
+		goto unmap_ctx;
+	}
+
+	memset(&req_ctx->fd_flt, 0, sizeof(req_ctx->fd_flt));
+	dpaa2_fl_set_final(in_fle, true);
+	dpaa2_fl_set_format(in_fle, dpaa2_fl_sg);
+	dpaa2_fl_set_addr(in_fle, edesc->qm_sg_dma);
+	dpaa2_fl_set_len(in_fle, ctx->ctx_len + buflen + req->nbytes);
+	dpaa2_fl_set_format(out_fle, dpaa2_fl_single);
+	dpaa2_fl_set_addr(out_fle, edesc->dst_dma);
+	dpaa2_fl_set_len(out_fle, digestsize);
+
+	req_ctx->flc = &ctx->flc[FINALIZE];
+	req_ctx->flc_dma = ctx->flc_dma[FINALIZE];
+	req_ctx->cbk = ahash_done_ctx_src;
+	req_ctx->ctx = &req->base;
+	req_ctx->edesc = edesc;
+
+	ret = dpaa2_caam_enqueue(ctx->dev, req_ctx);
+	if (ret == -EINPROGRESS ||
+	    (ret == -EBUSY && req->base.flags & CRYPTO_TFM_REQ_MAY_BACKLOG))
+		return ret;
+
+unmap_ctx:
+	ahash_unmap_ctx(ctx->dev, edesc, req, digestsize, DMA_FROM_DEVICE);
+	qi_cache_free(edesc);
+	return ret;
+}
+
+static int ahash_digest(struct ahash_request *req)
+{
+	struct crypto_ahash *ahash = crypto_ahash_reqtfm(req);
+	struct caam_hash_ctx *ctx = crypto_ahash_ctx(ahash);
+	struct caam_hash_state *state = ahash_request_ctx(req);
+	struct caam_request *req_ctx = &state->caam_req;
+	struct dpaa2_fl_entry *in_fle = &req_ctx->fd_flt[1];
+	struct dpaa2_fl_entry *out_fle = &req_ctx->fd_flt[0];
+	gfp_t flags = (req->base.flags & CRYPTO_TFM_REQ_MAY_SLEEP) ?
+		      GFP_KERNEL : GFP_ATOMIC;
+	int digestsize = crypto_ahash_digestsize(ahash);
+	int src_nents, mapped_nents;
+	struct ahash_edesc *edesc;
+	int ret = -ENOMEM;
+
+	state->buf_dma = 0;
+
+	src_nents = sg_nents_for_len(req->src, req->nbytes);
+	if (src_nents < 0) {
+		dev_err(ctx->dev, "Invalid number of src SG.\n");
+		return src_nents;
+	}
+
+	if (src_nents) {
+		mapped_nents = dma_map_sg(ctx->dev, req->src, src_nents,
+					  DMA_TO_DEVICE);
+		if (!mapped_nents) {
+			dev_err(ctx->dev, "unable to map source for DMA\n");
+			return ret;
+		}
+	} else {
+		mapped_nents = 0;
+	}
+
+	/* allocate space for base edesc and link tables */
+	edesc = qi_cache_zalloc(GFP_DMA | flags);
+	if (!edesc) {
+		dma_unmap_sg(ctx->dev, req->src, src_nents, DMA_TO_DEVICE);
+		return ret;
+	}
+
+	edesc->src_nents = src_nents;
+	memset(&req_ctx->fd_flt, 0, sizeof(req_ctx->fd_flt));
+
+	if (mapped_nents > 1) {
+		int qm_sg_bytes;
+		struct dpaa2_sg_entry *sg_table = &edesc->sgt[0];
+
+		qm_sg_bytes = mapped_nents * sizeof(*sg_table);
+		sg_to_qm_sg_last(req->src, mapped_nents, sg_table, 0);
+		edesc->qm_sg_dma = dma_map_single(ctx->dev, sg_table,
+						  qm_sg_bytes, DMA_TO_DEVICE);
+		if (dma_mapping_error(ctx->dev, edesc->qm_sg_dma)) {
+			dev_err(ctx->dev, "unable to map S/G table\n");
+			goto unmap;
+		}
+		edesc->qm_sg_bytes = qm_sg_bytes;
+		dpaa2_fl_set_format(in_fle, dpaa2_fl_sg);
+		dpaa2_fl_set_addr(in_fle, edesc->qm_sg_dma);
+	} else {
+		dpaa2_fl_set_format(in_fle, dpaa2_fl_single);
+		dpaa2_fl_set_addr(in_fle, sg_dma_address(req->src));
+	}
+
+	edesc->dst_dma = dma_map_single(ctx->dev, req->result, digestsize,
+					DMA_FROM_DEVICE);
+	if (dma_mapping_error(ctx->dev, edesc->dst_dma)) {
+		dev_err(ctx->dev, "unable to map dst\n");
+		edesc->dst_dma = 0;
+		goto unmap;
+	}
+
+	dpaa2_fl_set_final(in_fle, true);
+	dpaa2_fl_set_len(in_fle, req->nbytes);
+	dpaa2_fl_set_format(out_fle, dpaa2_fl_single);
+	dpaa2_fl_set_addr(out_fle, edesc->dst_dma);
+	dpaa2_fl_set_len(out_fle, digestsize);
+
+	req_ctx->flc = &ctx->flc[DIGEST];
+	req_ctx->flc_dma = ctx->flc_dma[DIGEST];
+	req_ctx->cbk = ahash_done;
+	req_ctx->ctx = &req->base;
+	req_ctx->edesc = edesc;
+	ret = dpaa2_caam_enqueue(ctx->dev, req_ctx);
+	if (ret == -EINPROGRESS ||
+	    (ret == -EBUSY && req->base.flags & CRYPTO_TFM_REQ_MAY_BACKLOG))
+		return ret;
+
+unmap:
+	ahash_unmap(ctx->dev, edesc, req, digestsize);
+	qi_cache_free(edesc);
+	return ret;
+}
+
+static int ahash_final_no_ctx(struct ahash_request *req)
+{
+	struct crypto_ahash *ahash = crypto_ahash_reqtfm(req);
+	struct caam_hash_ctx *ctx = crypto_ahash_ctx(ahash);
+	struct caam_hash_state *state = ahash_request_ctx(req);
+	struct caam_request *req_ctx = &state->caam_req;
+	struct dpaa2_fl_entry *in_fle = &req_ctx->fd_flt[1];
+	struct dpaa2_fl_entry *out_fle = &req_ctx->fd_flt[0];
+	gfp_t flags = (req->base.flags & CRYPTO_TFM_REQ_MAY_SLEEP) ?
+		      GFP_KERNEL : GFP_ATOMIC;
+	u8 *buf = current_buf(state);
+	int buflen = *current_buflen(state);
+	int digestsize = crypto_ahash_digestsize(ahash);
+	struct ahash_edesc *edesc;
+	int ret = -ENOMEM;
+
+	/* allocate space for base edesc and link tables */
+	edesc = qi_cache_zalloc(GFP_DMA | flags);
+	if (!edesc)
+		return ret;
+
+	state->buf_dma = dma_map_single(ctx->dev, buf, buflen, DMA_TO_DEVICE);
+	if (dma_mapping_error(ctx->dev, state->buf_dma)) {
+		dev_err(ctx->dev, "unable to map src\n");
+		goto unmap;
+	}
+
+	edesc->dst_dma = dma_map_single(ctx->dev, req->result, digestsize,
+					DMA_FROM_DEVICE);
+	if (dma_mapping_error(ctx->dev, edesc->dst_dma)) {
+		dev_err(ctx->dev, "unable to map dst\n");
+		edesc->dst_dma = 0;
+		goto unmap;
+	}
+
+	memset(&req_ctx->fd_flt, 0, sizeof(req_ctx->fd_flt));
+	dpaa2_fl_set_final(in_fle, true);
+	dpaa2_fl_set_format(in_fle, dpaa2_fl_single);
+	dpaa2_fl_set_addr(in_fle, state->buf_dma);
+	dpaa2_fl_set_len(in_fle, buflen);
+	dpaa2_fl_set_format(out_fle, dpaa2_fl_single);
+	dpaa2_fl_set_addr(out_fle, edesc->dst_dma);
+	dpaa2_fl_set_len(out_fle, digestsize);
+
+	req_ctx->flc = &ctx->flc[DIGEST];
+	req_ctx->flc_dma = ctx->flc_dma[DIGEST];
+	req_ctx->cbk = ahash_done;
+	req_ctx->ctx = &req->base;
+	req_ctx->edesc = edesc;
+
+	ret = dpaa2_caam_enqueue(ctx->dev, req_ctx);
+	if (ret == -EINPROGRESS ||
+	    (ret == -EBUSY && req->base.flags & CRYPTO_TFM_REQ_MAY_BACKLOG))
+		return ret;
+
+unmap:
+	ahash_unmap(ctx->dev, edesc, req, digestsize);
+	qi_cache_free(edesc);
+	return ret;
+}
+
+static int ahash_update_no_ctx(struct ahash_request *req)
+{
+	struct crypto_ahash *ahash = crypto_ahash_reqtfm(req);
+	struct caam_hash_ctx *ctx = crypto_ahash_ctx(ahash);
+	struct caam_hash_state *state = ahash_request_ctx(req);
+	struct caam_request *req_ctx = &state->caam_req;
+	struct dpaa2_fl_entry *in_fle = &req_ctx->fd_flt[1];
+	struct dpaa2_fl_entry *out_fle = &req_ctx->fd_flt[0];
+	gfp_t flags = (req->base.flags & CRYPTO_TFM_REQ_MAY_SLEEP) ?
+		      GFP_KERNEL : GFP_ATOMIC;
+	u8 *buf = current_buf(state);
+	int *buflen = current_buflen(state);
+	u8 *next_buf = alt_buf(state);
+	int *next_buflen = alt_buflen(state);
+	int in_len = *buflen + req->nbytes, to_hash;
+	int qm_sg_bytes, src_nents, mapped_nents;
+	struct ahash_edesc *edesc;
+	int ret = 0;
+
+	*next_buflen = in_len & (crypto_tfm_alg_blocksize(&ahash->base) - 1);
+	to_hash = in_len - *next_buflen;
+
+	if (to_hash) {
+		struct dpaa2_sg_entry *sg_table;
+
+		src_nents = sg_nents_for_len(req->src,
+					     req->nbytes - *next_buflen);
+		if (src_nents < 0) {
+			dev_err(ctx->dev, "Invalid number of src SG.\n");
+			return src_nents;
+		}
+
+		if (src_nents) {
+			mapped_nents = dma_map_sg(ctx->dev, req->src, src_nents,
+						  DMA_TO_DEVICE);
+			if (!mapped_nents) {
+				dev_err(ctx->dev, "unable to DMA map source\n");
+				return -ENOMEM;
+			}
+		} else {
+			mapped_nents = 0;
+		}
+
+		/* allocate space for base edesc and link tables */
+		edesc = qi_cache_zalloc(GFP_DMA | flags);
+		if (!edesc) {
+			dma_unmap_sg(ctx->dev, req->src, src_nents,
+				     DMA_TO_DEVICE);
+			return -ENOMEM;
+		}
+
+		edesc->src_nents = src_nents;
+		qm_sg_bytes = (1 + mapped_nents) * sizeof(*sg_table);
+		sg_table = &edesc->sgt[0];
+
+		ret = buf_map_to_qm_sg(ctx->dev, sg_table, state);
+		if (ret)
+			goto unmap_ctx;
+
+		sg_to_qm_sg_last(req->src, mapped_nents, sg_table + 1, 0);
+
+		if (*next_buflen)
+			scatterwalk_map_and_copy(next_buf, req->src,
+						 to_hash - *buflen,
+						 *next_buflen, 0);
+
+		edesc->qm_sg_dma = dma_map_single(ctx->dev, sg_table,
+						  qm_sg_bytes, DMA_TO_DEVICE);
+		if (dma_mapping_error(ctx->dev, edesc->qm_sg_dma)) {
+			dev_err(ctx->dev, "unable to map S/G table\n");
+			ret = -ENOMEM;
+			goto unmap_ctx;
+		}
+		edesc->qm_sg_bytes = qm_sg_bytes;
+
+		state->ctx_dma = dma_map_single(ctx->dev, state->caam_ctx,
+						ctx->ctx_len, DMA_FROM_DEVICE);
+		if (dma_mapping_error(ctx->dev, state->ctx_dma)) {
+			dev_err(ctx->dev, "unable to map ctx\n");
+			state->ctx_dma = 0;
+			ret = -ENOMEM;
+			goto unmap_ctx;
+		}
+
+		memset(&req_ctx->fd_flt, 0, sizeof(req_ctx->fd_flt));
+		dpaa2_fl_set_final(in_fle, true);
+		dpaa2_fl_set_format(in_fle, dpaa2_fl_sg);
+		dpaa2_fl_set_addr(in_fle, edesc->qm_sg_dma);
+		dpaa2_fl_set_len(in_fle, to_hash);
+		dpaa2_fl_set_format(out_fle, dpaa2_fl_single);
+		dpaa2_fl_set_addr(out_fle, state->ctx_dma);
+		dpaa2_fl_set_len(out_fle, ctx->ctx_len);
+
+		req_ctx->flc = &ctx->flc[UPDATE_FIRST];
+		req_ctx->flc_dma = ctx->flc_dma[UPDATE_FIRST];
+		req_ctx->cbk = ahash_done_ctx_dst;
+		req_ctx->ctx = &req->base;
+		req_ctx->edesc = edesc;
+
+		ret = dpaa2_caam_enqueue(ctx->dev, req_ctx);
+		if (ret != -EINPROGRESS &&
+		    !(ret == -EBUSY &&
+		      req->base.flags & CRYPTO_TFM_REQ_MAY_BACKLOG))
+			goto unmap_ctx;
+
+		state->update = ahash_update_ctx;
+		state->finup = ahash_finup_ctx;
+		state->final = ahash_final_ctx;
+	} else if (*next_buflen) {
+		scatterwalk_map_and_copy(buf + *buflen, req->src, 0,
+					 req->nbytes, 0);
+		*buflen = *next_buflen;
+		*next_buflen = 0;
+	}
+#ifdef DEBUG
+	print_hex_dump(KERN_ERR, "buf@" __stringify(__LINE__)": ",
+		       DUMP_PREFIX_ADDRESS, 16, 4, buf, *buflen, 1);
+	print_hex_dump(KERN_ERR, "next buf@" __stringify(__LINE__)": ",
+		       DUMP_PREFIX_ADDRESS, 16, 4, next_buf,
+		       *next_buflen, 1);
+#endif
+
+	return ret;
+unmap_ctx:
+	ahash_unmap_ctx(ctx->dev, edesc, req, ctx->ctx_len, DMA_TO_DEVICE);
+	qi_cache_free(edesc);
+	return ret;
+}
+
+static int ahash_finup_no_ctx(struct ahash_request *req)
+{
+	struct crypto_ahash *ahash = crypto_ahash_reqtfm(req);
+	struct caam_hash_ctx *ctx = crypto_ahash_ctx(ahash);
+	struct caam_hash_state *state = ahash_request_ctx(req);
+	struct caam_request *req_ctx = &state->caam_req;
+	struct dpaa2_fl_entry *in_fle = &req_ctx->fd_flt[1];
+	struct dpaa2_fl_entry *out_fle = &req_ctx->fd_flt[0];
+	gfp_t flags = (req->base.flags & CRYPTO_TFM_REQ_MAY_SLEEP) ?
+		      GFP_KERNEL : GFP_ATOMIC;
+	int buflen = *current_buflen(state);
+	int qm_sg_bytes, src_nents, mapped_nents;
+	int digestsize = crypto_ahash_digestsize(ahash);
+	struct ahash_edesc *edesc;
+	struct dpaa2_sg_entry *sg_table;
+	int ret;
+
+	src_nents = sg_nents_for_len(req->src, req->nbytes);
+	if (src_nents < 0) {
+		dev_err(ctx->dev, "Invalid number of src SG.\n");
+		return src_nents;
+	}
+
+	if (src_nents) {
+		mapped_nents = dma_map_sg(ctx->dev, req->src, src_nents,
+					  DMA_TO_DEVICE);
+		if (!mapped_nents) {
+			dev_err(ctx->dev, "unable to DMA map source\n");
+			return -ENOMEM;
+		}
+	} else {
+		mapped_nents = 0;
+	}
+
+	/* allocate space for base edesc and link tables */
+	edesc = qi_cache_zalloc(GFP_DMA | flags);
+	if (!edesc) {
+		dma_unmap_sg(ctx->dev, req->src, src_nents, DMA_TO_DEVICE);
+		return -ENOMEM;
+	}
+
+	edesc->src_nents = src_nents;
+	qm_sg_bytes = (2 + mapped_nents) * sizeof(*sg_table);
+	sg_table = &edesc->sgt[0];
+
+	ret = buf_map_to_qm_sg(ctx->dev, sg_table, state);
+	if (ret)
+		goto unmap;
+
+	sg_to_qm_sg_last(req->src, mapped_nents, sg_table + 1, 0);
+
+	edesc->qm_sg_dma = dma_map_single(ctx->dev, sg_table, qm_sg_bytes,
+					  DMA_TO_DEVICE);
+	if (dma_mapping_error(ctx->dev, edesc->qm_sg_dma)) {
+		dev_err(ctx->dev, "unable to map S/G table\n");
+		ret = -ENOMEM;
+		goto unmap;
+	}
+	edesc->qm_sg_bytes = qm_sg_bytes;
+
+	edesc->dst_dma = dma_map_single(ctx->dev, req->result, digestsize,
+					DMA_FROM_DEVICE);
+	if (dma_mapping_error(ctx->dev, edesc->dst_dma)) {
+		dev_err(ctx->dev, "unable to map dst\n");
+		edesc->dst_dma = 0;
+		ret = -ENOMEM;
+		goto unmap;
+	}
+
+	memset(&req_ctx->fd_flt, 0, sizeof(req_ctx->fd_flt));
+	dpaa2_fl_set_final(in_fle, true);
+	dpaa2_fl_set_format(in_fle, dpaa2_fl_sg);
+	dpaa2_fl_set_addr(in_fle, edesc->qm_sg_dma);
+	dpaa2_fl_set_len(in_fle, buflen + req->nbytes);
+	dpaa2_fl_set_format(out_fle, dpaa2_fl_single);
+	dpaa2_fl_set_addr(out_fle, edesc->dst_dma);
+	dpaa2_fl_set_len(out_fle, digestsize);
+
+	req_ctx->flc = &ctx->flc[DIGEST];
+	req_ctx->flc_dma = ctx->flc_dma[DIGEST];
+	req_ctx->cbk = ahash_done;
+	req_ctx->ctx = &req->base;
+	req_ctx->edesc = edesc;
+	ret = dpaa2_caam_enqueue(ctx->dev, req_ctx);
+	if (ret != -EINPROGRESS &&
+	    !(ret == -EBUSY && req->base.flags & CRYPTO_TFM_REQ_MAY_BACKLOG))
+		goto unmap;
+
+	return ret;
+unmap:
+	ahash_unmap(ctx->dev, edesc, req, digestsize);
+	qi_cache_free(edesc);
+	return -ENOMEM;
+}
+
+static int ahash_update_first(struct ahash_request *req)
+{
+	struct crypto_ahash *ahash = crypto_ahash_reqtfm(req);
+	struct caam_hash_ctx *ctx = crypto_ahash_ctx(ahash);
+	struct caam_hash_state *state = ahash_request_ctx(req);
+	struct caam_request *req_ctx = &state->caam_req;
+	struct dpaa2_fl_entry *in_fle = &req_ctx->fd_flt[1];
+	struct dpaa2_fl_entry *out_fle = &req_ctx->fd_flt[0];
+	gfp_t flags = (req->base.flags & CRYPTO_TFM_REQ_MAY_SLEEP) ?
+		      GFP_KERNEL : GFP_ATOMIC;
+	u8 *next_buf = alt_buf(state);
+	int *next_buflen = alt_buflen(state);
+	int to_hash;
+	int src_nents, mapped_nents;
+	struct ahash_edesc *edesc;
+	int ret = 0;
+
+	*next_buflen = req->nbytes & (crypto_tfm_alg_blocksize(&ahash->base) -
+				      1);
+	to_hash = req->nbytes - *next_buflen;
+
+	if (to_hash) {
+		struct dpaa2_sg_entry *sg_table;
+
+		src_nents = sg_nents_for_len(req->src,
+					     req->nbytes - (*next_buflen));
+		if (src_nents < 0) {
+			dev_err(ctx->dev, "Invalid number of src SG.\n");
+			return src_nents;
+		}
+
+		if (src_nents) {
+			mapped_nents = dma_map_sg(ctx->dev, req->src, src_nents,
+						  DMA_TO_DEVICE);
+			if (!mapped_nents) {
+				dev_err(ctx->dev, "unable to map source for DMA\n");
+				return -ENOMEM;
+			}
+		} else {
+			mapped_nents = 0;
+		}
+
+		/* allocate space for base edesc and link tables */
+		edesc = qi_cache_zalloc(GFP_DMA | flags);
+		if (!edesc) {
+			dma_unmap_sg(ctx->dev, req->src, src_nents,
+				     DMA_TO_DEVICE);
+			return -ENOMEM;
+		}
+
+		edesc->src_nents = src_nents;
+		sg_table = &edesc->sgt[0];
+
+		memset(&req_ctx->fd_flt, 0, sizeof(req_ctx->fd_flt));
+		dpaa2_fl_set_final(in_fle, true);
+		dpaa2_fl_set_len(in_fle, to_hash);
+
+		if (mapped_nents > 1) {
+			int qm_sg_bytes;
+
+			sg_to_qm_sg_last(req->src, mapped_nents, sg_table, 0);
+			qm_sg_bytes = mapped_nents * sizeof(*sg_table);
+			edesc->qm_sg_dma = dma_map_single(ctx->dev, sg_table,
+							  qm_sg_bytes,
+							  DMA_TO_DEVICE);
+			if (dma_mapping_error(ctx->dev, edesc->qm_sg_dma)) {
+				dev_err(ctx->dev, "unable to map S/G table\n");
+				ret = -ENOMEM;
+				goto unmap_ctx;
+			}
+			edesc->qm_sg_bytes = qm_sg_bytes;
+			dpaa2_fl_set_format(in_fle, dpaa2_fl_sg);
+			dpaa2_fl_set_addr(in_fle, edesc->qm_sg_dma);
+		} else {
+			dpaa2_fl_set_format(in_fle, dpaa2_fl_single);
+			dpaa2_fl_set_addr(in_fle, sg_dma_address(req->src));
+		}
+
+		if (*next_buflen)
+			scatterwalk_map_and_copy(next_buf, req->src, to_hash,
+						 *next_buflen, 0);
+
+		state->ctx_dma = dma_map_single(ctx->dev, state->caam_ctx,
+						ctx->ctx_len, DMA_FROM_DEVICE);
+		if (dma_mapping_error(ctx->dev, state->ctx_dma)) {
+			dev_err(ctx->dev, "unable to map ctx\n");
+			state->ctx_dma = 0;
+			ret = -ENOMEM;
+			goto unmap_ctx;
+		}
+
+		dpaa2_fl_set_format(out_fle, dpaa2_fl_single);
+		dpaa2_fl_set_addr(out_fle, state->ctx_dma);
+		dpaa2_fl_set_len(out_fle, ctx->ctx_len);
+
+		req_ctx->flc = &ctx->flc[UPDATE_FIRST];
+		req_ctx->flc_dma = ctx->flc_dma[UPDATE_FIRST];
+		req_ctx->cbk = ahash_done_ctx_dst;
+		req_ctx->ctx = &req->base;
+		req_ctx->edesc = edesc;
+
+		ret = dpaa2_caam_enqueue(ctx->dev, req_ctx);
+		if (ret != -EINPROGRESS &&
+		    !(ret == -EBUSY && req->base.flags &
+		      CRYPTO_TFM_REQ_MAY_BACKLOG))
+			goto unmap_ctx;
+
+		state->update = ahash_update_ctx;
+		state->finup = ahash_finup_ctx;
+		state->final = ahash_final_ctx;
+	} else if (*next_buflen) {
+		state->update = ahash_update_no_ctx;
+		state->finup = ahash_finup_no_ctx;
+		state->final = ahash_final_no_ctx;
+		scatterwalk_map_and_copy(next_buf, req->src, 0,
+					 req->nbytes, 0);
+		switch_buf(state);
+	}
+#ifdef DEBUG
+	print_hex_dump(KERN_ERR, "next buf@" __stringify(__LINE__)": ",
+		       DUMP_PREFIX_ADDRESS, 16, 4, next_buf, *next_buflen, 1);
+#endif
+
+	return ret;
+unmap_ctx:
+	ahash_unmap_ctx(ctx->dev, edesc, req, ctx->ctx_len, DMA_TO_DEVICE);
+	qi_cache_free(edesc);
+	return ret;
+}
+
+static int ahash_finup_first(struct ahash_request *req)
+{
+	return ahash_digest(req);
+}
+
+static int ahash_init(struct ahash_request *req)
+{
+	struct caam_hash_state *state = ahash_request_ctx(req);
+
+	state->update = ahash_update_first;
+	state->finup = ahash_finup_first;
+	state->final = ahash_final_no_ctx;
+
+	state->ctx_dma = 0;
+	state->current_buf = 0;
+	state->buf_dma = 0;
+	state->buflen_0 = 0;
+	state->buflen_1 = 0;
+
+	return 0;
+}
+
+static int ahash_update(struct ahash_request *req)
+{
+	struct caam_hash_state *state = ahash_request_ctx(req);
+
+	return state->update(req);
+}
+
+static int ahash_finup(struct ahash_request *req)
+{
+	struct caam_hash_state *state = ahash_request_ctx(req);
+
+	return state->finup(req);
+}
+
+static int ahash_final(struct ahash_request *req)
+{
+	struct caam_hash_state *state = ahash_request_ctx(req);
+
+	return state->final(req);
+}
+
+static int ahash_export(struct ahash_request *req, void *out)
+{
+	struct caam_hash_state *state = ahash_request_ctx(req);
+	struct caam_export_state *export = out;
+	int len;
+	u8 *buf;
+
+	if (state->current_buf) {
+		buf = state->buf_1;
+		len = state->buflen_1;
+	} else {
+		buf = state->buf_0;
+		len = state->buflen_0;
+	}
+
+	memcpy(export->buf, buf, len);
+	memcpy(export->caam_ctx, state->caam_ctx, sizeof(export->caam_ctx));
+	export->buflen = len;
+	export->update = state->update;
+	export->final = state->final;
+	export->finup = state->finup;
+
+	return 0;
+}
+
+static int ahash_import(struct ahash_request *req, const void *in)
+{
+	struct caam_hash_state *state = ahash_request_ctx(req);
+	const struct caam_export_state *export = in;
+
+	memset(state, 0, sizeof(*state));
+	memcpy(state->buf_0, export->buf, export->buflen);
+	memcpy(state->caam_ctx, export->caam_ctx, sizeof(state->caam_ctx));
+	state->buflen_0 = export->buflen;
+	state->update = export->update;
+	state->final = export->final;
+	state->finup = export->finup;
+
+	return 0;
+}
+
+struct caam_hash_template {
+	char name[CRYPTO_MAX_ALG_NAME];
+	char driver_name[CRYPTO_MAX_ALG_NAME];
+	char hmac_name[CRYPTO_MAX_ALG_NAME];
+	char hmac_driver_name[CRYPTO_MAX_ALG_NAME];
+	unsigned int blocksize;
+	struct ahash_alg template_ahash;
+	u32 alg_type;
+};
+
+/* ahash descriptors */
+static struct caam_hash_template driver_hash[] = {
+	{
+		.name = "sha1",
+		.driver_name = "sha1-caam-qi2",
+		.hmac_name = "hmac(sha1)",
+		.hmac_driver_name = "hmac-sha1-caam-qi2",
+		.blocksize = SHA1_BLOCK_SIZE,
+		.template_ahash = {
+			.init = ahash_init,
+			.update = ahash_update,
+			.final = ahash_final,
+			.finup = ahash_finup,
+			.digest = ahash_digest,
+			.export = ahash_export,
+			.import = ahash_import,
+			.setkey = ahash_setkey,
+			.halg = {
+				.digestsize = SHA1_DIGEST_SIZE,
+				.statesize = sizeof(struct caam_export_state),
+			},
+		},
+		.alg_type = OP_ALG_ALGSEL_SHA1,
+	}, {
+		.name = "sha224",
+		.driver_name = "sha224-caam-qi2",
+		.hmac_name = "hmac(sha224)",
+		.hmac_driver_name = "hmac-sha224-caam-qi2",
+		.blocksize = SHA224_BLOCK_SIZE,
+		.template_ahash = {
+			.init = ahash_init,
+			.update = ahash_update,
+			.final = ahash_final,
+			.finup = ahash_finup,
+			.digest = ahash_digest,
+			.export = ahash_export,
+			.import = ahash_import,
+			.setkey = ahash_setkey,
+			.halg = {
+				.digestsize = SHA224_DIGEST_SIZE,
+				.statesize = sizeof(struct caam_export_state),
+			},
+		},
+		.alg_type = OP_ALG_ALGSEL_SHA224,
+	}, {
+		.name = "sha256",
+		.driver_name = "sha256-caam-qi2",
+		.hmac_name = "hmac(sha256)",
+		.hmac_driver_name = "hmac-sha256-caam-qi2",
+		.blocksize = SHA256_BLOCK_SIZE,
+		.template_ahash = {
+			.init = ahash_init,
+			.update = ahash_update,
+			.final = ahash_final,
+			.finup = ahash_finup,
+			.digest = ahash_digest,
+			.export = ahash_export,
+			.import = ahash_import,
+			.setkey = ahash_setkey,
+			.halg = {
+				.digestsize = SHA256_DIGEST_SIZE,
+				.statesize = sizeof(struct caam_export_state),
+			},
+		},
+		.alg_type = OP_ALG_ALGSEL_SHA256,
+	}, {
+		.name = "sha384",
+		.driver_name = "sha384-caam-qi2",
+		.hmac_name = "hmac(sha384)",
+		.hmac_driver_name = "hmac-sha384-caam-qi2",
+		.blocksize = SHA384_BLOCK_SIZE,
+		.template_ahash = {
+			.init = ahash_init,
+			.update = ahash_update,
+			.final = ahash_final,
+			.finup = ahash_finup,
+			.digest = ahash_digest,
+			.export = ahash_export,
+			.import = ahash_import,
+			.setkey = ahash_setkey,
+			.halg = {
+				.digestsize = SHA384_DIGEST_SIZE,
+				.statesize = sizeof(struct caam_export_state),
+			},
+		},
+		.alg_type = OP_ALG_ALGSEL_SHA384,
+	}, {
+		.name = "sha512",
+		.driver_name = "sha512-caam-qi2",
+		.hmac_name = "hmac(sha512)",
+		.hmac_driver_name = "hmac-sha512-caam-qi2",
+		.blocksize = SHA512_BLOCK_SIZE,
+		.template_ahash = {
+			.init = ahash_init,
+			.update = ahash_update,
+			.final = ahash_final,
+			.finup = ahash_finup,
+			.digest = ahash_digest,
+			.export = ahash_export,
+			.import = ahash_import,
+			.setkey = ahash_setkey,
+			.halg = {
+				.digestsize = SHA512_DIGEST_SIZE,
+				.statesize = sizeof(struct caam_export_state),
+			},
+		},
+		.alg_type = OP_ALG_ALGSEL_SHA512,
+	}, {
+		.name = "md5",
+		.driver_name = "md5-caam-qi2",
+		.hmac_name = "hmac(md5)",
+		.hmac_driver_name = "hmac-md5-caam-qi2",
+		.blocksize = MD5_BLOCK_WORDS * 4,
+		.template_ahash = {
+			.init = ahash_init,
+			.update = ahash_update,
+			.final = ahash_final,
+			.finup = ahash_finup,
+			.digest = ahash_digest,
+			.export = ahash_export,
+			.import = ahash_import,
+			.setkey = ahash_setkey,
+			.halg = {
+				.digestsize = MD5_DIGEST_SIZE,
+				.statesize = sizeof(struct caam_export_state),
+			},
+		},
+		.alg_type = OP_ALG_ALGSEL_MD5,
+	}
+};
+
+struct caam_hash_alg {
+	struct list_head entry;
+	struct device *dev;
+	int alg_type;
+	struct ahash_alg ahash_alg;
+};
+
+static int caam_hash_cra_init(struct crypto_tfm *tfm)
+{
+	struct crypto_ahash *ahash = __crypto_ahash_cast(tfm);
+	struct crypto_alg *base = tfm->__crt_alg;
+	struct hash_alg_common *halg =
+		 container_of(base, struct hash_alg_common, base);
+	struct ahash_alg *alg =
+		 container_of(halg, struct ahash_alg, halg);
+	struct caam_hash_alg *caam_hash =
+		 container_of(alg, struct caam_hash_alg, ahash_alg);
+	struct caam_hash_ctx *ctx = crypto_tfm_ctx(tfm);
+	/* Sizes for MDHA running digests: MD5, SHA1, 224, 256, 384, 512 */
+	static const u8 runninglen[] = { HASH_MSG_LEN + MD5_DIGEST_SIZE,
+					 HASH_MSG_LEN + SHA1_DIGEST_SIZE,
+					 HASH_MSG_LEN + 32,
+					 HASH_MSG_LEN + SHA256_DIGEST_SIZE,
+					 HASH_MSG_LEN + 64,
+					 HASH_MSG_LEN + SHA512_DIGEST_SIZE };
+	dma_addr_t dma_addr;
+	int i;
+
+	ctx->dev = caam_hash->dev;
+
+	dma_addr = dma_map_single_attrs(ctx->dev, ctx->flc, sizeof(ctx->flc),
+					DMA_BIDIRECTIONAL,
+					DMA_ATTR_SKIP_CPU_SYNC);
+	if (dma_mapping_error(ctx->dev, dma_addr)) {
+		dev_err(ctx->dev, "unable to map shared descriptors\n");
+		return -ENOMEM;
+	}
+
+	for (i = 0; i < HASH_NUM_OP; i++)
+		ctx->flc_dma[i] = dma_addr + i * sizeof(ctx->flc[i]);
+
+	/* copy descriptor header template value */
+	ctx->adata.algtype = OP_TYPE_CLASS2_ALG | caam_hash->alg_type;
+
+	ctx->ctx_len = runninglen[(ctx->adata.algtype &
+				   OP_ALG_ALGSEL_SUBMASK) >>
+				  OP_ALG_ALGSEL_SHIFT];
+
+	crypto_ahash_set_reqsize(__crypto_ahash_cast(tfm),
+				 sizeof(struct caam_hash_state));
+
+	return ahash_set_sh_desc(ahash);
+}
+
+static void caam_hash_cra_exit(struct crypto_tfm *tfm)
+{
+	struct caam_hash_ctx *ctx = crypto_tfm_ctx(tfm);
+
+	dma_unmap_single_attrs(ctx->dev, ctx->flc_dma[0], sizeof(ctx->flc),
+			       DMA_BIDIRECTIONAL, DMA_ATTR_SKIP_CPU_SYNC);
+}
+
+static struct caam_hash_alg *caam_hash_alloc(struct device *dev,
+	struct caam_hash_template *template, bool keyed)
+{
+	struct caam_hash_alg *t_alg;
+	struct ahash_alg *halg;
+	struct crypto_alg *alg;
+
+	t_alg = kzalloc(sizeof(*t_alg), GFP_KERNEL);
+	if (!t_alg)
+		return ERR_PTR(-ENOMEM);
+
+	t_alg->ahash_alg = template->template_ahash;
+	halg = &t_alg->ahash_alg;
+	alg = &halg->halg.base;
+
+	if (keyed) {
+		snprintf(alg->cra_name, CRYPTO_MAX_ALG_NAME, "%s",
+			 template->hmac_name);
+		snprintf(alg->cra_driver_name, CRYPTO_MAX_ALG_NAME, "%s",
+			 template->hmac_driver_name);
+	} else {
+		snprintf(alg->cra_name, CRYPTO_MAX_ALG_NAME, "%s",
+			 template->name);
+		snprintf(alg->cra_driver_name, CRYPTO_MAX_ALG_NAME, "%s",
+			 template->driver_name);
+	}
+	alg->cra_module = THIS_MODULE;
+	alg->cra_init = caam_hash_cra_init;
+	alg->cra_exit = caam_hash_cra_exit;
+	alg->cra_ctxsize = sizeof(struct caam_hash_ctx);
+	alg->cra_priority = CAAM_CRA_PRIORITY;
+	alg->cra_blocksize = template->blocksize;
+	alg->cra_alignmask = 0;
+	alg->cra_flags = CRYPTO_ALG_ASYNC | CRYPTO_ALG_TYPE_AHASH;
+	alg->cra_type = &crypto_ahash_type;
+
+	t_alg->alg_type = template->alg_type;
+	t_alg->dev = dev;
+
+	return t_alg;
+}
+
 static void dpaa2_caam_fqdan_cb(struct dpaa2_io_notification_ctx *nctx)
 {
 	struct dpaa2_caam_priv_per_cpu *ppriv;
@@ -4122,6 +5548,7 @@ static int __cold dpaa2_dpseci_disable(struct dpaa2_caam_priv *priv)
 }
 
 static struct list_head alg_list;
+static struct list_head hash_list;
 
 static int dpaa2_caam_probe(struct fsl_mc_device *dpseci_dev)
 {
@@ -4285,6 +5712,61 @@ static int dpaa2_caam_probe(struct fsl_mc_device *dpseci_dev)
 	if (registered)
 		dev_info(dev, "algorithms registered in /proc/crypto\n");
 
+	/* register hash algorithms the device supports */
+	INIT_LIST_HEAD(&hash_list);
+
+	/*
+	 * Skip registration of any hashing algorithms if MD block
+	 * is not present.
+	 */
+	if (!priv->sec_attr.md_acc_num)
+		return 0;
+
+	for (i = 0; i < ARRAY_SIZE(driver_hash); i++) {
+		struct caam_hash_alg *t_alg;
+		struct caam_hash_template *alg = driver_hash + i;
+
+		/* register hmac version */
+		t_alg = caam_hash_alloc(dev, alg, true);
+		if (IS_ERR(t_alg)) {
+			err = PTR_ERR(t_alg);
+			dev_warn(dev, "%s hash alg allocation failed: %d\n",
+				 alg->driver_name, err);
+			continue;
+		}
+
+		err = crypto_register_ahash(&t_alg->ahash_alg);
+		if (err) {
+			dev_warn(dev, "%s alg registration failed: %d\n",
+				 t_alg->ahash_alg.halg.base.cra_driver_name,
+				 err);
+			kfree(t_alg);
+		} else {
+			list_add_tail(&t_alg->entry, &hash_list);
+		}
+
+		/* register unkeyed version */
+		t_alg = caam_hash_alloc(dev, alg, false);
+		if (IS_ERR(t_alg)) {
+			err = PTR_ERR(t_alg);
+			dev_warn(dev, "%s alg allocation failed: %d\n",
+				 alg->driver_name, err);
+			continue;
+		}
+
+		err = crypto_register_ahash(&t_alg->ahash_alg);
+		if (err) {
+			dev_warn(dev, "%s alg registration failed: %d\n",
+				 t_alg->ahash_alg.halg.base.cra_driver_name,
+				 err);
+			kfree(t_alg);
+		} else {
+			list_add_tail(&t_alg->entry, &hash_list);
+		}
+	}
+	if (!list_empty(&hash_list))
+		dev_info(dev, "hash algorithms registered in /proc/crypto\n");
+
 	return err;
 
 err_bind:
@@ -4329,6 +5811,16 @@ static int __cold dpaa2_caam_remove(struct fsl_mc_device *ls_dev)
 		}
 	}
 
+	if (hash_list.next) {
+		struct caam_hash_alg *t_hash_alg, *p;
+
+		list_for_each_entry_safe(t_hash_alg, p, &hash_list, entry) {
+			crypto_unregister_ahash(&t_hash_alg->ahash_alg);
+			list_del(&t_hash_alg->entry);
+			kfree(t_hash_alg);
+		}
+	}
+
 	dpaa2_dpseci_disable(priv);
 	dpaa2_dpseci_dpio_free(priv);
 	dpaa2_dpseci_free(priv);
@@ -4359,7 +5851,7 @@ int dpaa2_caam_enqueue(struct device *dev, struct caam_request *req)
 		}
 	}
 
-	dpaa2_fl_set_flc(&req->fd_flt[1], req->flc->flc_dma);
+	dpaa2_fl_set_flc(&req->fd_flt[1], req->flc_dma);
 
 	req->fd_flt_dma = dma_map_single(dev, req->fd_flt, sizeof(req->fd_flt),
 					 DMA_BIDIRECTIONAL);
@@ -4372,7 +5864,7 @@ int dpaa2_caam_enqueue(struct device *dev, struct caam_request *req)
 	dpaa2_fd_set_format(&fd, dpaa2_fd_list);
 	dpaa2_fd_set_addr(&fd, req->fd_flt_dma);
 	dpaa2_fd_set_len(&fd, req->fd_flt[1].len);
-	dpaa2_fd_set_flc(&fd, req->flc->flc_dma);
+	dpaa2_fd_set_flc(&fd, req->flc_dma);
 
 	/*
 	 * There is no guarantee that preemption is disabled here,
diff --git a/drivers/crypto/caam/caamalg_qi2.h b/drivers/crypto/caam/caamalg_qi2.h
index 2ba179d..87ef74a 100644
--- a/drivers/crypto/caam/caamalg_qi2.h
+++ b/drivers/crypto/caam/caamalg_qi2.h
@@ -213,16 +213,30 @@ struct ablkcipher_edesc {
 	struct dpaa2_sg_entry sgt[0];
 };
 
+/*
+ * ahash_edesc - s/w-extended ahash descriptor
+ * @dst_dma: I/O virtual address of req->result
+ * @qm_sg_dma: I/O virtual address of h/w link table
+ * @src_nents: number of segments in input scatterlist
+ * @qm_sg_bytes: length of dma mapped qm_sg space
+ * @sgt: pointer to h/w link table
+ */
+struct ahash_edesc {
+	dma_addr_t dst_dma;
+	dma_addr_t qm_sg_dma;
+	int src_nents;
+	int qm_sg_bytes;
+	struct dpaa2_sg_entry sgt[0];
+};
+
 /**
  * caam_flc - Flow Context (FLC)
  * @flc: Flow Context options
  * @sh_desc: Shared Descriptor
- * @flc_dma: DMA address of the Flow Context
  */
 struct caam_flc {
 	u32 flc[16];
 	u32 sh_desc[MAX_SDLEN];
-	dma_addr_t flc_dma;
 } ____cacheline_aligned;
 
 enum optype {
@@ -240,6 +254,7 @@ enum optype {
  *          fd_flt[1] - FLE pointing to input buffer
  * @fd_flt_dma: DMA address for the frame list table
  * @flc: Flow Context
+ * @flc_dma: I/O virtual address of Flow Context
  * @op_type: operation type
  * @cbk: Callback function to invoke when job is completed
  * @ctx: arbit context attached with request by the application
@@ -249,6 +264,7 @@ struct caam_request {
 	struct dpaa2_fl_entry fd_flt[2];
 	dma_addr_t fd_flt_dma;
 	struct caam_flc *flc;
+	dma_addr_t flc_dma;
 	enum optype op_type;
 	void (*cbk)(void *ctx, u32 err);
 	void *ctx;
diff --git a/drivers/crypto/caam/caamhash.c b/drivers/crypto/caam/caamhash.c
index 698580b..5ba970e 100644
--- a/drivers/crypto/caam/caamhash.c
+++ b/drivers/crypto/caam/caamhash.c
@@ -62,6 +62,7 @@
 #include "error.h"
 #include "sg_sw_sec4.h"
 #include "key_gen.h"
+#include "caamhash_desc.h"
 
 #define CAAM_CRA_PRIORITY		3000
 
@@ -71,14 +72,6 @@
 #define CAAM_MAX_HASH_BLOCK_SIZE	SHA512_BLOCK_SIZE
 #define CAAM_MAX_HASH_DIGEST_SIZE	SHA512_DIGEST_SIZE
 
-/* length of descriptors text */
-#define DESC_AHASH_BASE			(3 * CAAM_CMD_SZ)
-#define DESC_AHASH_UPDATE_LEN		(6 * CAAM_CMD_SZ)
-#define DESC_AHASH_UPDATE_FIRST_LEN	(DESC_AHASH_BASE + 4 * CAAM_CMD_SZ)
-#define DESC_AHASH_FINAL_LEN		(DESC_AHASH_BASE + 5 * CAAM_CMD_SZ)
-#define DESC_AHASH_FINUP_LEN		(DESC_AHASH_BASE + 5 * CAAM_CMD_SZ)
-#define DESC_AHASH_DIGEST_LEN		(DESC_AHASH_BASE + 4 * CAAM_CMD_SZ)
-
 #define DESC_HASH_MAX_USED_BYTES	(DESC_AHASH_FINAL_LEN + \
 					 CAAM_MAX_HASH_KEY_SIZE)
 #define DESC_HASH_MAX_USED_LEN		(DESC_HASH_MAX_USED_BYTES / CAAM_CMD_SZ)
@@ -218,7 +211,7 @@ static inline int buf_map_to_sec4_sg(struct device *jrdev,
 }
 
 /* Map state->caam_ctx, and add it to link table */
-static inline int ctx_map_to_sec4_sg(u32 *desc, struct device *jrdev,
+static inline int ctx_map_to_sec4_sg(struct device *jrdev,
 				     struct caam_hash_state *state, int ctx_len,
 				     struct sec4_sg_entry *sec4_sg, u32 flag)
 {
@@ -234,66 +227,20 @@ static inline int ctx_map_to_sec4_sg(u32 *desc, struct device *jrdev,
 	return 0;
 }
 
-/*
- * For ahash update, final and finup (import_ctx = true)
- *     import context, read and write to seqout
- * For ahash firsts and digest (import_ctx = false)
- *     read and write to seqout
- */
-static inline void ahash_gen_sh_desc(u32 *desc, u32 state, int digestsize,
-				     struct caam_hash_ctx *ctx, bool import_ctx)
-{
-	u32 op = ctx->adata.algtype;
-	u32 *skip_key_load;
-
-	init_sh_desc(desc, HDR_SHARE_SERIAL);
-
-	/* Append key if it has been set; ahash update excluded */
-	if ((state != OP_ALG_AS_UPDATE) && (ctx->adata.keylen)) {
-		/* Skip key loading if already shared */
-		skip_key_load = append_jump(desc, JUMP_JSL | JUMP_TEST_ALL |
-					    JUMP_COND_SHRD);
-
-		append_key_as_imm(desc, ctx->key, ctx->adata.keylen_pad,
-				  ctx->adata.keylen, CLASS_2 |
-				  KEY_DEST_MDHA_SPLIT | KEY_ENC);
-
-		set_jump_tgt_here(desc, skip_key_load);
-
-		op |= OP_ALG_AAI_HMAC_PRECOMP;
-	}
-
-	/* If needed, import context from software */
-	if (import_ctx)
-		append_seq_load(desc, ctx->ctx_len, LDST_CLASS_2_CCB |
-				LDST_SRCDST_BYTE_CONTEXT);
-
-	/* Class 2 operation */
-	append_operation(desc, op | state | OP_ALG_ENCRYPT);
-
-	/*
-	 * Load from buf and/or src and write to req->result or state->context
-	 * Calculate remaining bytes to read
-	 */
-	append_math_add(desc, VARSEQINLEN, SEQINLEN, REG0, CAAM_CMD_SZ);
-	/* Read remaining bytes */
-	append_seq_fifo_load(desc, 0, FIFOLD_CLASS_CLASS2 | FIFOLD_TYPE_LAST2 |
-			     FIFOLD_TYPE_MSG | KEY_VLF);
-	/* Store class2 context bytes */
-	append_seq_store(desc, digestsize, LDST_CLASS_2_CCB |
-			 LDST_SRCDST_BYTE_CONTEXT);
-}
-
 static int ahash_set_sh_desc(struct crypto_ahash *ahash)
 {
 	struct caam_hash_ctx *ctx = crypto_ahash_ctx(ahash);
 	int digestsize = crypto_ahash_digestsize(ahash);
 	struct device *jrdev = ctx->jrdev;
+	struct caam_drv_private *ctrlpriv = dev_get_drvdata(jrdev->parent);
 	u32 *desc;
 
+	ctx->adata.key_virt = ctx->key;
+
 	/* ahash_update shared descriptor */
 	desc = ctx->sh_desc_update;
-	ahash_gen_sh_desc(desc, OP_ALG_AS_UPDATE, ctx->ctx_len, ctx, true);
+	cnstr_shdsc_ahash(desc, &ctx->adata, OP_ALG_AS_UPDATE, ctx->ctx_len,
+			  ctx->ctx_len, true, ctrlpriv->era);
 	dma_sync_single_for_device(jrdev, ctx->sh_desc_update_dma,
 				   desc_bytes(desc), DMA_TO_DEVICE);
 #ifdef DEBUG
@@ -304,7 +251,8 @@ static int ahash_set_sh_desc(struct crypto_ahash *ahash)
 
 	/* ahash_update_first shared descriptor */
 	desc = ctx->sh_desc_update_first;
-	ahash_gen_sh_desc(desc, OP_ALG_AS_INIT, ctx->ctx_len, ctx, false);
+	cnstr_shdsc_ahash(desc, &ctx->adata, OP_ALG_AS_INIT, ctx->ctx_len,
+			  ctx->ctx_len, false, ctrlpriv->era);
 	dma_sync_single_for_device(jrdev, ctx->sh_desc_update_first_dma,
 				   desc_bytes(desc), DMA_TO_DEVICE);
 #ifdef DEBUG
@@ -315,7 +263,8 @@ static int ahash_set_sh_desc(struct crypto_ahash *ahash)
 
 	/* ahash_final shared descriptor */
 	desc = ctx->sh_desc_fin;
-	ahash_gen_sh_desc(desc, OP_ALG_AS_FINALIZE, digestsize, ctx, true);
+	cnstr_shdsc_ahash(desc, &ctx->adata, OP_ALG_AS_FINALIZE, digestsize,
+			  ctx->ctx_len, true, ctrlpriv->era);
 	dma_sync_single_for_device(jrdev, ctx->sh_desc_fin_dma,
 				   desc_bytes(desc), DMA_TO_DEVICE);
 #ifdef DEBUG
@@ -326,7 +275,8 @@ static int ahash_set_sh_desc(struct crypto_ahash *ahash)
 
 	/* ahash_digest shared descriptor */
 	desc = ctx->sh_desc_digest;
-	ahash_gen_sh_desc(desc, OP_ALG_AS_INITFINAL, digestsize, ctx, false);
+	cnstr_shdsc_ahash(desc, &ctx->adata, OP_ALG_AS_INITFINAL, digestsize,
+			  ctx->ctx_len, false, ctrlpriv->era);
 	dma_sync_single_for_device(jrdev, ctx->sh_desc_digest_dma,
 				   desc_bytes(desc), DMA_TO_DEVICE);
 #ifdef DEBUG
@@ -421,6 +371,7 @@ static int ahash_setkey(struct crypto_ahash *ahash,
 	struct caam_hash_ctx *ctx = crypto_ahash_ctx(ahash);
 	int blocksize = crypto_tfm_alg_blocksize(&ahash->base);
 	int digestsize = crypto_ahash_digestsize(ahash);
+	struct caam_drv_private *ctrlpriv = dev_get_drvdata(ctx->jrdev->parent);
 	int ret;
 	u8 *hashed_key = NULL;
 
@@ -441,16 +392,26 @@ static int ahash_setkey(struct crypto_ahash *ahash,
 		key = hashed_key;
 	}
 
-	ret = gen_split_key(ctx->jrdev, ctx->key, &ctx->adata, key, keylen,
-			    CAAM_MAX_HASH_KEY_SIZE);
-	if (ret)
-		goto bad_free_key;
+	/*
+	 * If DKP is supported, use it in the shared descriptor to generate
+	 * the split key.
+	 */
+	if (ctrlpriv->era >= 6) {
+		ctx->adata.key_inline = true;
+		ctx->adata.keylen = keylen;
+		ctx->adata.keylen_pad = split_key_len(ctx->adata.algtype &
+						      OP_ALG_ALGSEL_MASK);
 
-#ifdef DEBUG
-	print_hex_dump(KERN_ERR, "ctx.key@"__stringify(__LINE__)": ",
-		       DUMP_PREFIX_ADDRESS, 16, 4, ctx->key,
-		       ctx->adata.keylen_pad, 1);
-#endif
+		if (ctx->adata.keylen_pad > CAAM_MAX_HASH_KEY_SIZE)
+			goto bad_free_key;
+
+		memcpy(ctx->key, key, keylen);
+	} else {
+		ret = gen_split_key(ctx->jrdev, ctx->key, &ctx->adata, key,
+				    keylen, CAAM_MAX_HASH_KEY_SIZE);
+		if (ret)
+			goto bad_free_key;
+	}
 
 	kfree(hashed_key);
 	return ahash_set_sh_desc(ahash);
@@ -773,7 +734,7 @@ static int ahash_update_ctx(struct ahash_request *req)
 		edesc->src_nents = src_nents;
 		edesc->sec4_sg_bytes = sec4_sg_bytes;
 
-		ret = ctx_map_to_sec4_sg(desc, jrdev, state, ctx->ctx_len,
+		ret = ctx_map_to_sec4_sg(jrdev, state, ctx->ctx_len,
 					 edesc->sec4_sg, DMA_BIDIRECTIONAL);
 		if (ret)
 			goto unmap_ctx;
@@ -871,9 +832,8 @@ static int ahash_final_ctx(struct ahash_request *req)
 	desc = edesc->hw_desc;
 
 	edesc->sec4_sg_bytes = sec4_sg_bytes;
-	edesc->src_nents = 0;
 
-	ret = ctx_map_to_sec4_sg(desc, jrdev, state, ctx->ctx_len,
+	ret = ctx_map_to_sec4_sg(jrdev, state, ctx->ctx_len,
 				 edesc->sec4_sg, DMA_TO_DEVICE);
 	if (ret)
 		goto unmap_ctx;
@@ -967,7 +927,7 @@ static int ahash_finup_ctx(struct ahash_request *req)
 
 	edesc->src_nents = src_nents;
 
-	ret = ctx_map_to_sec4_sg(desc, jrdev, state, ctx->ctx_len,
+	ret = ctx_map_to_sec4_sg(jrdev, state, ctx->ctx_len,
 				 edesc->sec4_sg, DMA_TO_DEVICE);
 	if (ret)
 		goto unmap_ctx;
@@ -1123,7 +1083,6 @@ static int ahash_final_no_ctx(struct ahash_request *req)
 		dev_err(jrdev, "unable to map dst\n");
 		goto unmap;
 	}
-	edesc->src_nents = 0;
 
 #ifdef DEBUG
 	print_hex_dump(KERN_ERR, "jobdesc@"__stringify(__LINE__)": ",
@@ -1205,7 +1164,6 @@ static int ahash_update_no_ctx(struct ahash_request *req)
 
 		edesc->src_nents = src_nents;
 		edesc->sec4_sg_bytes = sec4_sg_bytes;
-		edesc->dst_dma = 0;
 
 		ret = buf_map_to_sec4_sg(jrdev, edesc->sec4_sg, state);
 		if (ret)
@@ -1417,7 +1375,6 @@ static int ahash_update_first(struct ahash_request *req)
 		}
 
 		edesc->src_nents = src_nents;
-		edesc->dst_dma = 0;
 
 		ret = ahash_edesc_add_src(ctx, edesc, req, mapped_nents, 0, 0,
 					  to_hash);
diff --git a/drivers/crypto/caam/caamhash_desc.c b/drivers/crypto/caam/caamhash_desc.c
new file mode 100644
index 0000000..af39a18
--- /dev/null
+++ b/drivers/crypto/caam/caamhash_desc.c
@@ -0,0 +1,108 @@
+/*
+ * Shared descriptors for ahash algorithms
+ *
+ * Copyright 2017 NXP
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *     * Redistributions of source code must retain the above copyright
+ *	 notice, this list of conditions and the following disclaimer.
+ *     * Redistributions in binary form must reproduce the above copyright
+ *	 notice, this list of conditions and the following disclaimer in the
+ *	 documentation and/or other materials provided with the distribution.
+ *     * Neither the names of the above-listed copyright holders nor the
+ *	 names of any contributors may be used to endorse or promote products
+ *	 derived from this software without specific prior written permission.
+ *
+ *
+ * ALTERNATIVELY, this software may be distributed under the terms of the
+ * GNU General Public License ("GPL") as published by the Free Software
+ * Foundation, either version 2 of that License or (at your option) any
+ * later version.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDERS OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include "compat.h"
+#include "desc_constr.h"
+#include "caamhash_desc.h"
+
+/**
+ * cnstr_shdsc_ahash - ahash shared descriptor
+ * @desc: pointer to buffer used for descriptor construction
+ * @adata: pointer to authentication transform definitions.
+ *         A split key is required for SEC Era < 6; the size of the split key
+ *         is specified in this case.
+ *         Valid algorithm values - one of OP_ALG_ALGSEL_{MD5, SHA1, SHA224,
+ *         SHA256, SHA384, SHA512}.
+ * @state: algorithm state OP_ALG_AS_{INIT, FINALIZE, INITFINALIZE, UPDATE}
+ * @digestsize: algorithm's digest size
+ * @ctx_len: size of Context Register
+ * @import_ctx: true if previous Context Register needs to be restored
+ *              must be true for ahash update and final
+ *              must be false for for ahash first and digest
+ * @era: SEC Era
+ */
+void cnstr_shdsc_ahash(u32 * const desc, struct alginfo *adata, u32 state,
+		       int digestsize, int ctx_len, bool import_ctx, int era)
+{
+	u32 op = adata->algtype;
+
+	init_sh_desc(desc, HDR_SHARE_SERIAL);
+
+	/* Append key if it has been set; ahash update excluded */
+	if (state != OP_ALG_AS_UPDATE && adata->keylen) {
+		u32 *skip_key_load;
+
+		/* Skip key loading if already shared */
+		skip_key_load = append_jump(desc, JUMP_JSL | JUMP_TEST_ALL |
+					    JUMP_COND_SHRD);
+
+		if (era < 6)
+			append_key_as_imm(desc, adata->key_virt,
+					  adata->keylen_pad,
+					  adata->keylen, CLASS_2 |
+					  KEY_DEST_MDHA_SPLIT | KEY_ENC);
+		else
+			append_proto_dkp(desc, adata);
+
+		set_jump_tgt_here(desc, skip_key_load);
+
+		op |= OP_ALG_AAI_HMAC_PRECOMP;
+	}
+
+	/* If needed, import context from software */
+	if (import_ctx)
+		append_seq_load(desc, ctx_len, LDST_CLASS_2_CCB |
+				LDST_SRCDST_BYTE_CONTEXT);
+
+	/* Class 2 operation */
+	append_operation(desc, op | state | OP_ALG_ENCRYPT);
+
+	/*
+	 * Load from buf and/or src and write to req->result or state->context
+	 * Calculate remaining bytes to read
+	 */
+	append_math_add(desc, VARSEQINLEN, SEQINLEN, REG0, CAAM_CMD_SZ);
+	/* Read remaining bytes */
+	append_seq_fifo_load(desc, 0, FIFOLD_CLASS_CLASS2 | FIFOLD_TYPE_LAST2 |
+			     FIFOLD_TYPE_MSG | KEY_VLF);
+	/* Store class2 context bytes */
+	append_seq_store(desc, digestsize, LDST_CLASS_2_CCB |
+			 LDST_SRCDST_BYTE_CONTEXT);
+}
+EXPORT_SYMBOL(cnstr_shdsc_ahash);
+
+MODULE_LICENSE("Dual BSD/GPL");
+MODULE_DESCRIPTION("FSL CAAM ahash descriptors support");
+MODULE_AUTHOR("NXP Semiconductors");
diff --git a/drivers/crypto/caam/caamhash_desc.h b/drivers/crypto/caam/caamhash_desc.h
new file mode 100644
index 0000000..b0a9a54
--- /dev/null
+++ b/drivers/crypto/caam/caamhash_desc.h
@@ -0,0 +1,49 @@
+/*
+ * Shared descriptors for ahash algorithms
+ *
+ * Copyright 2017 NXP
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *     * Redistributions of source code must retain the above copyright
+ *	 notice, this list of conditions and the following disclaimer.
+ *     * Redistributions in binary form must reproduce the above copyright
+ *	 notice, this list of conditions and the following disclaimer in the
+ *	 documentation and/or other materials provided with the distribution.
+ *     * Neither the names of the above-listed copyright holders nor the
+ *	 names of any contributors may be used to endorse or promote products
+ *	 derived from this software without specific prior written permission.
+ *
+ *
+ * ALTERNATIVELY, this software may be distributed under the terms of the
+ * GNU General Public License ("GPL") as published by the Free Software
+ * Foundation, either version 2 of that License or (at your option) any
+ * later version.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDERS OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#ifndef _CAAMHASH_DESC_H_
+#define _CAAMHASH_DESC_H_
+
+/* length of descriptors text */
+#define DESC_AHASH_BASE			(3 * CAAM_CMD_SZ)
+#define DESC_AHASH_UPDATE_LEN		(6 * CAAM_CMD_SZ)
+#define DESC_AHASH_UPDATE_FIRST_LEN	(DESC_AHASH_BASE + 4 * CAAM_CMD_SZ)
+#define DESC_AHASH_FINAL_LEN		(DESC_AHASH_BASE + 5 * CAAM_CMD_SZ)
+#define DESC_AHASH_DIGEST_LEN		(DESC_AHASH_BASE + 4 * CAAM_CMD_SZ)
+
+void cnstr_shdsc_ahash(u32 * const desc, struct alginfo *adata, u32 state,
+		       int digestsize, int ctx_len, bool import_ctx, int era);
+
+#endif /* _CAAMHASH_DESC_H_ */
diff --git a/drivers/crypto/caam/ctrl.c b/drivers/crypto/caam/ctrl.c
index 8f9642c..f330a28 100644
--- a/drivers/crypto/caam/ctrl.c
+++ b/drivers/crypto/caam/ctrl.c
@@ -612,6 +612,8 @@ static int caam_probe(struct platform_device *pdev)
 		goto iounmap_ctrl;
 	}
 
+	ctrlpriv->era = caam_get_era();
+
 	ret = of_platform_populate(nprop, caam_match, NULL, dev);
 	if (ret) {
 		dev_err(dev, "JR platform devices creation error\n");
@@ -742,7 +744,7 @@ static int caam_probe(struct platform_device *pdev)
 
 	/* Report "alive" for developer to see */
 	dev_info(dev, "device ID = 0x%016llx (Era %d)\n", caam_id,
-		 caam_get_era());
+		 ctrlpriv->era);
 	dev_info(dev, "job rings = %d, qi = %d, dpaa2 = %s\n",
 		 ctrlpriv->total_jobrs, ctrlpriv->qi_present,
 		 caam_dpaa2 ? "yes" : "no");
diff --git a/drivers/crypto/caam/desc.h b/drivers/crypto/caam/desc.h
index a8c3be7..5fe5123 100644
--- a/drivers/crypto/caam/desc.h
+++ b/drivers/crypto/caam/desc.h
@@ -446,6 +446,18 @@
 #define OP_PCLID_DSAVERIFY	(0x16 << OP_PCLID_SHIFT)
 #define OP_PCLID_RSAENC_PUBKEY  (0x18 << OP_PCLID_SHIFT)
 #define OP_PCLID_RSADEC_PRVKEY  (0x19 << OP_PCLID_SHIFT)
+#define OP_PCLID_DKP_MD5	(0x20 << OP_PCLID_SHIFT)
+#define OP_PCLID_DKP_SHA1	(0x21 << OP_PCLID_SHIFT)
+#define OP_PCLID_DKP_SHA224	(0x22 << OP_PCLID_SHIFT)
+#define OP_PCLID_DKP_SHA256	(0x23 << OP_PCLID_SHIFT)
+#define OP_PCLID_DKP_SHA384	(0x24 << OP_PCLID_SHIFT)
+#define OP_PCLID_DKP_SHA512	(0x25 << OP_PCLID_SHIFT)
+#define OP_PCLID_DKP_RIF_MD5	(0x60 << OP_PCLID_SHIFT)
+#define OP_PCLID_DKP_RIF_SHA1	(0x61 << OP_PCLID_SHIFT)
+#define OP_PCLID_DKP_RIF_SHA224	(0x62 << OP_PCLID_SHIFT)
+#define OP_PCLID_DKP_RIF_SHA256	(0x63 << OP_PCLID_SHIFT)
+#define OP_PCLID_DKP_RIF_SHA384	(0x64 << OP_PCLID_SHIFT)
+#define OP_PCLID_DKP_RIF_SHA512	(0x65 << OP_PCLID_SHIFT)
 
 /* Assuming OP_TYPE = OP_TYPE_DECAP_PROTOCOL/ENCAP_PROTOCOL */
 #define OP_PCLID_IPSEC		(0x01 << OP_PCLID_SHIFT)
@@ -1095,6 +1107,22 @@
 /* MacSec protinfos */
 #define OP_PCL_MACSEC				 0x0001
 
+/* Derived Key Protocol (DKP) Protinfo */
+#define OP_PCL_DKP_SRC_SHIFT	14
+#define OP_PCL_DKP_SRC_MASK	(3 << OP_PCL_DKP_SRC_SHIFT)
+#define OP_PCL_DKP_SRC_IMM	(0 << OP_PCL_DKP_SRC_SHIFT)
+#define OP_PCL_DKP_SRC_SEQ	(1 << OP_PCL_DKP_SRC_SHIFT)
+#define OP_PCL_DKP_SRC_PTR	(2 << OP_PCL_DKP_SRC_SHIFT)
+#define OP_PCL_DKP_SRC_SGF	(3 << OP_PCL_DKP_SRC_SHIFT)
+#define OP_PCL_DKP_DST_SHIFT	12
+#define OP_PCL_DKP_DST_MASK	(3 << OP_PCL_DKP_DST_SHIFT)
+#define OP_PCL_DKP_DST_IMM	(0 << OP_PCL_DKP_DST_SHIFT)
+#define OP_PCL_DKP_DST_SEQ	(1 << OP_PCL_DKP_DST_SHIFT)
+#define OP_PCL_DKP_DST_PTR	(2 << OP_PCL_DKP_DST_SHIFT)
+#define OP_PCL_DKP_DST_SGF	(3 << OP_PCL_DKP_DST_SHIFT)
+#define OP_PCL_DKP_KEY_SHIFT	0
+#define OP_PCL_DKP_KEY_MASK	(0xfff << OP_PCL_DKP_KEY_SHIFT)
+
 /* PKI unidirectional protocol protinfo bits */
 #define OP_PCL_PKPROT_TEST			 0x0008
 #define OP_PCL_PKPROT_DECRYPT			 0x0004
@@ -1455,6 +1483,7 @@
 #define MATH_DEST_REG1		(0x01 << MATH_DEST_SHIFT)
 #define MATH_DEST_REG2		(0x02 << MATH_DEST_SHIFT)
 #define MATH_DEST_REG3		(0x03 << MATH_DEST_SHIFT)
+#define MATH_DEST_DPOVRD	(0x07 << MATH_DEST_SHIFT)
 #define MATH_DEST_SEQINLEN	(0x08 << MATH_DEST_SHIFT)
 #define MATH_DEST_SEQOUTLEN	(0x09 << MATH_DEST_SHIFT)
 #define MATH_DEST_VARSEQINLEN	(0x0a << MATH_DEST_SHIFT)
diff --git a/drivers/crypto/caam/desc_constr.h b/drivers/crypto/caam/desc_constr.h
index 2d9dbec..f110712 100644
--- a/drivers/crypto/caam/desc_constr.h
+++ b/drivers/crypto/caam/desc_constr.h
@@ -108,7 +108,7 @@ static inline void init_job_desc_shared(u32 * const desc, dma_addr_t ptr,
 	append_ptr(desc, ptr);
 }
 
-static inline void append_data(u32 * const desc, void *data, int len)
+static inline void append_data(u32 * const desc, const void *data, int len)
 {
 	u32 *offset = desc_end(desc);
 
@@ -171,7 +171,7 @@ static inline void append_cmd_ptr_extlen(u32 * const desc, dma_addr_t ptr,
 	append_cmd(desc, len);
 }
 
-static inline void append_cmd_data(u32 * const desc, void *data, int len,
+static inline void append_cmd_data(u32 * const desc, const void *data, int len,
 				   u32 command)
 {
 	append_cmd(desc, command | IMMEDIATE | len);
@@ -271,7 +271,7 @@ APPEND_SEQ_PTR_INTLEN(in, IN)
 APPEND_SEQ_PTR_INTLEN(out, OUT)
 
 #define APPEND_CMD_PTR_TO_IMM(cmd, op) \
-static inline void append_##cmd##_as_imm(u32 * const desc, void *data, \
+static inline void append_##cmd##_as_imm(u32 * const desc, const void *data, \
 					 unsigned int len, u32 options) \
 { \
 	PRINT_POS; \
@@ -312,7 +312,7 @@ APPEND_CMD_PTR_LEN(seq_out_ptr, SEQ_OUT_PTR, u32)
  * from length of immediate data provided, e.g., split keys
  */
 #define APPEND_CMD_PTR_TO_IMM2(cmd, op) \
-static inline void append_##cmd##_as_imm(u32 * const desc, void *data, \
+static inline void append_##cmd##_as_imm(u32 * const desc, const void *data, \
 					 unsigned int data_len, \
 					 unsigned int len, u32 options) \
 { \
@@ -452,7 +452,7 @@ struct alginfo {
 	unsigned int keylen_pad;
 	union {
 		dma_addr_t key_dma;
-		void *key_virt;
+		const void *key_virt;
 	};
 	bool key_inline;
 };
@@ -496,4 +496,45 @@ static inline int desc_inline_query(unsigned int sd_base_len,
 	return (rem_bytes >= 0) ? 0 : -1;
 }
 
+/**
+ * append_proto_dkp - Derived Key Protocol (DKP): key -> split key
+ * @desc: pointer to buffer used for descriptor construction
+ * @adata: pointer to authentication transform definitions.
+ *         keylen should be the length of initial key, while keylen_pad
+ *         the length of the derived (split) key.
+ *         Valid algorithm values - one of OP_ALG_ALGSEL_{MD5, SHA1, SHA224,
+ *         SHA256, SHA384, SHA512}.
+ */
+static inline void append_proto_dkp(u32 * const desc, struct alginfo *adata)
+{
+	u32 protid;
+
+	/*
+	 * Quick & dirty translation from OP_ALG_ALGSEL_{MD5, SHA*}
+	 * to OP_PCLID_DKP_{MD5, SHA*}
+	 */
+	protid = (adata->algtype & OP_ALG_ALGSEL_SUBMASK) |
+		 (0x20 << OP_ALG_ALGSEL_SHIFT);
+
+	if (adata->key_inline) {
+		int words;
+
+		append_operation(desc, OP_TYPE_UNI_PROTOCOL | protid |
+				 OP_PCL_DKP_SRC_IMM | OP_PCL_DKP_DST_IMM |
+				 adata->keylen);
+		append_data(desc, adata->key_virt, adata->keylen);
+
+		/* Reserve space in descriptor buffer for the derived key */
+		words = (ALIGN(adata->keylen_pad, CAAM_CMD_SZ) -
+			 ALIGN(adata->keylen, CAAM_CMD_SZ)) / CAAM_CMD_SZ;
+		if (words)
+			(*desc) = cpu_to_caam32(caam32_to_cpu(*desc) + words);
+	} else {
+		append_operation(desc, OP_TYPE_UNI_PROTOCOL | protid |
+				 OP_PCL_DKP_SRC_PTR | OP_PCL_DKP_DST_PTR |
+				 adata->keylen);
+		append_ptr(desc, adata->key_dma);
+	}
+}
+
 #endif /* DESC_CONSTR_H */
diff --git a/drivers/crypto/caam/intern.h b/drivers/crypto/caam/intern.h
index a523612..55aab74 100644
--- a/drivers/crypto/caam/intern.h
+++ b/drivers/crypto/caam/intern.h
@@ -83,6 +83,7 @@ struct caam_drv_private {
 	u8 qi_present;		/* Nonzero if QI present in device */
 	int secvio_irq;		/* Security violation interrupt number */
 	int virt_en;		/* Virtualization enabled in CAAM */
+	int era;		/* CAAM Era (internal HW revision) */
 
 #define	RNG4_MAX_HANDLES 2
 	/* RNG4 block */
diff --git a/drivers/crypto/caam/qi.c b/drivers/crypto/caam/qi.c
index 48185d5..baa7d51 100644
--- a/drivers/crypto/caam/qi.c
+++ b/drivers/crypto/caam/qi.c
@@ -109,7 +109,7 @@ int caam_qi_enqueue(struct device *qidev, struct caam_drv_req *req)
 
 	fd.cmd = 0;
 	fd.format = qm_fd_compound;
-	fd.cong_weight = req->fd_sgt[1].length;
+	fd.cong_weight = caam32_to_cpu(req->fd_sgt[1].length);
 	fd.addr = dma_map_single(qidev, req->fd_sgt, sizeof(req->fd_sgt),
 			      DMA_BIDIRECTIONAL);
 	if (dma_mapping_error(qidev, fd.addr)) {
diff --git a/drivers/dma/caam_dma.c b/drivers/dma/caam_dma.c
index cc9f9cf..e430b32 100644
--- a/drivers/dma/caam_dma.c
+++ b/drivers/dma/caam_dma.c
@@ -43,9 +43,8 @@ struct caam_dma_edesc {
 	struct dma_async_tx_descriptor async_tx;
 	struct list_head node;
 	struct caam_dma_ctx *ctx;
-	dma_addr_t sec4_sg_dma;
-	dma_addr_t sec4_sg_dma_dst;
-	size_t sec4_sg_bytes;
+	dma_addr_t src_dma;
+	dma_addr_t dst_dma;
 	unsigned int src_len;
 	unsigned int dst_len;
 	struct sec4_sg_entry *sec4_sg;
@@ -74,7 +73,7 @@ static struct dma_device *dma_dev;
 static struct caam_dma_sh_desc *dma_sh_desc;
 static LIST_HEAD(dma_ctx_list);
 
-static dma_cookie_t caam_jr_tx_submit(struct dma_async_tx_descriptor *tx)
+static dma_cookie_t caam_dma_tx_submit(struct dma_async_tx_descriptor *tx)
 {
 	struct caam_dma_edesc *edesc = NULL;
 	struct caam_dma_ctx *ctx = NULL;
@@ -104,12 +103,11 @@ static unsigned int caam_dma_sg_dma_len(struct scatterlist *sg,
 	return len;
 }
 
-static struct caam_dma_edesc *caam_dma_edesc_alloc(struct dma_chan *chan,
-						   unsigned long flags,
-						   struct scatterlist *dst_sg,
-						   unsigned int dst_nents,
-						   struct scatterlist *src_sg,
-						   unsigned int src_nents)
+static struct caam_dma_edesc *
+caam_dma_sg_edesc_alloc(struct dma_chan *chan,
+			struct scatterlist *dst_sg, unsigned int dst_nents,
+			struct scatterlist *src_sg, unsigned int src_nents,
+			unsigned long flags)
 {
 	struct caam_dma_ctx *ctx = container_of(chan, struct caam_dma_ctx,
 						chan);
@@ -139,7 +137,7 @@ static struct caam_dma_edesc *caam_dma_edesc_alloc(struct dma_chan *chan,
 	}
 
 	dma_async_tx_descriptor_init(&edesc->async_tx, chan);
-	edesc->async_tx.tx_submit = caam_jr_tx_submit;
+	edesc->async_tx.tx_submit = caam_dma_tx_submit;
 	edesc->async_tx.flags = flags;
 	edesc->async_tx.cookie = -EBUSY;
 
@@ -161,9 +159,8 @@ static struct caam_dma_edesc *caam_dma_edesc_alloc(struct dma_chan *chan,
 		return ERR_PTR(-ENOMEM);
 	}
 
-	edesc->sec4_sg_dma = sec4_sg_dma_src;
-	edesc->sec4_sg_dma_dst = sec4_sg_dma_src +
-				 src_nents * sizeof(*sec4_sg);
+	edesc->src_dma = sec4_sg_dma_src;
+	edesc->dst_dma = sec4_sg_dma_src + src_nents * sizeof(*sec4_sg);
 	edesc->ctx = ctx;
 
 	return edesc;
@@ -215,20 +212,20 @@ static void caam_dma_done(struct device *dev, u32 *hwdesc, u32 err,
 		callback(callback_param);
 }
 
-void init_dma_job(dma_addr_t sh_desc_dma, struct caam_dma_edesc *edesc)
+static void caam_dma_sg_init_job_desc(struct caam_dma_edesc *edesc)
 {
 	u32 *jd = edesc->jd;
 	u32 *sh_desc = dma_sh_desc->desc;
+	dma_addr_t desc_dma = dma_sh_desc->desc_dma;
 
 	/* init the job descriptor */
-	init_job_desc_shared(jd, sh_desc_dma, desc_len(sh_desc), HDR_REVERSE);
+	init_job_desc_shared(jd, desc_dma, desc_len(sh_desc), HDR_REVERSE);
 
 	/* set SEQIN PTR */
-	append_seq_in_ptr(jd, edesc->sec4_sg_dma, edesc->src_len, LDST_SGF);
+	append_seq_in_ptr(jd, edesc->src_dma, edesc->src_len, LDST_SGF);
 
 	/* set SEQOUT PTR */
-	append_seq_out_ptr(jd, edesc->sec4_sg_dma_dst, edesc->dst_len,
-			   LDST_SGF);
+	append_seq_out_ptr(jd, edesc->dst_dma, edesc->dst_len, LDST_SGF);
 
 #ifdef DEBUG
 	print_hex_dump(KERN_ERR, "caam dma desc@" __stringify(__LINE__) ": ",
@@ -238,26 +235,75 @@ void init_dma_job(dma_addr_t sh_desc_dma, struct caam_dma_edesc *edesc)
 
 /* This function can be called from an interrupt context */
 static struct dma_async_tx_descriptor *
-caam_jr_prep_dma_sg(struct dma_chan *chan, struct scatterlist *dst_sg,
-		    unsigned int dst_nents, struct scatterlist *src_sg,
-		    unsigned int src_nents, unsigned long flags)
+caam_dma_prep_sg(struct dma_chan *chan, struct scatterlist *dst_sg,
+		 unsigned int dst_nents, struct scatterlist *src_sg,
+		 unsigned int src_nents, unsigned long flags)
 {
 	struct caam_dma_edesc *edesc;
 
 	/* allocate extended descriptor */
-	edesc = caam_dma_edesc_alloc(chan, flags, dst_sg, dst_nents, src_sg,
-				     src_nents);
+	edesc = caam_dma_sg_edesc_alloc(chan, dst_sg, dst_nents, src_sg,
+					src_nents, flags);
 	if (IS_ERR_OR_NULL(edesc))
 		return ERR_CAST(edesc);
 
 	/* Initialize job descriptor */
-	init_dma_job(dma_sh_desc->desc_dma, edesc);
+	caam_dma_sg_init_job_desc(edesc);
+
+	return &edesc->async_tx;
+}
+
+static void caam_dma_memcpy_init_job_desc(struct caam_dma_edesc *edesc)
+{
+	u32 *jd = edesc->jd;
+	u32 *sh_desc = dma_sh_desc->desc;
+	dma_addr_t desc_dma = dma_sh_desc->desc_dma;
+
+	/* init the job descriptor */
+	init_job_desc_shared(jd, desc_dma, desc_len(sh_desc), HDR_REVERSE);
+
+	/* set SEQIN PTR */
+	append_seq_in_ptr(jd, edesc->src_dma, edesc->src_len, 0);
+
+	/* set SEQOUT PTR */
+	append_seq_out_ptr(jd, edesc->dst_dma, edesc->dst_len, 0);
+
+#ifdef DEBUG
+	print_hex_dump(KERN_ERR, "caam dma desc@" __stringify(__LINE__) ": ",
+		       DUMP_PREFIX_ADDRESS, 16, 4, jd, desc_bytes(jd), 1);
+#endif
+}
+
+static struct dma_async_tx_descriptor *
+caam_dma_prep_memcpy(struct dma_chan *chan, dma_addr_t dst, dma_addr_t src,
+		     size_t len, unsigned long flags)
+{
+	struct caam_dma_edesc *edesc;
+	struct caam_dma_ctx *ctx = container_of(chan, struct caam_dma_ctx,
+						chan);
+
+	edesc = kzalloc(sizeof(*edesc) + DESC_JOB_IO_LEN, GFP_DMA | GFP_NOWAIT);
+	if (!edesc)
+		return ERR_PTR(-ENOMEM);
+
+	dma_async_tx_descriptor_init(&edesc->async_tx, chan);
+	edesc->async_tx.tx_submit = caam_dma_tx_submit;
+	edesc->async_tx.flags = flags;
+	edesc->async_tx.cookie = -EBUSY;
+
+	edesc->src_dma = src;
+	edesc->src_len = len;
+	edesc->dst_dma = dst;
+	edesc->dst_len = len;
+	edesc->ctx = ctx;
+
+	caam_dma_memcpy_init_job_desc(edesc);
 
 	return &edesc->async_tx;
 }
 
 /* This function can be called in an interrupt context */
-static void caam_jr_issue_pending(struct dma_chan *chan)
+static void caam_dma_issue_pending(struct dma_chan *chan)
 {
 	struct caam_dma_ctx *ctx = container_of(chan, struct caam_dma_ctx,
 						chan);
@@ -273,7 +319,7 @@ static void caam_jr_issue_pending(struct dma_chan *chan)
 	spin_unlock_bh(&ctx->edesc_lock);
 }
 
-static void caam_jr_free_chan_resources(struct dma_chan *chan)
+static void caam_dma_free_chan_resources(struct dma_chan *chan)
 {
 	struct caam_dma_ctx *ctx = container_of(chan, struct caam_dma_ctx,
 						chan);
@@ -439,11 +485,13 @@ static int __init caam_dma_probe(struct platform_device *pdev)
 	dma_dev->dev = dev;
 	dma_dev->residue_granularity = DMA_RESIDUE_GRANULARITY_DESCRIPTOR;
 	dma_cap_set(DMA_SG, dma_dev->cap_mask);
+	dma_cap_set(DMA_MEMCPY, dma_dev->cap_mask);
 	dma_cap_set(DMA_PRIVATE, dma_dev->cap_mask);
 	dma_dev->device_tx_status = dma_cookie_status;
-	dma_dev->device_issue_pending = caam_jr_issue_pending;
-	dma_dev->device_prep_dma_sg = caam_jr_prep_dma_sg;
-	dma_dev->device_free_chan_resources = caam_jr_free_chan_resources;
+	dma_dev->device_issue_pending = caam_dma_issue_pending;
+	dma_dev->device_prep_dma_sg = caam_dma_prep_sg;
+	dma_dev->device_prep_dma_memcpy = caam_dma_prep_memcpy;
+	dma_dev->device_free_chan_resources = caam_dma_free_chan_resources;
 
 	err = dma_async_device_register(dma_dev);
 	if (err) {
diff --git a/drivers/dma/fsl-qdma.c b/drivers/dma/fsl-qdma.c
index 6c4c281..60cd526 100644
--- a/drivers/dma/fsl-qdma.c
+++ b/drivers/dma/fsl-qdma.c
@@ -119,67 +119,111 @@
 
 u64 pre_addr, pre_queue;
 
+/* qDMA Command Descriptor Fotmats */
+
+/* Compound Command Descriptor Fotmat */
 struct fsl_qdma_ccdf {
-	u8 status;
-	u32 rev1:22;
-	u32 ser:1;
-	u32 rev2:1;
-	u32 rev3:20;
-	u32 offset:9;
-	u32 format:3;
+	__le32 status; /* ser, status */
+	__le32 cfg;	/* format, offset */
 	union {
 		struct {
-			u32 addr_lo;	/* low 32-bits of 40-bit address */
-			u32 addr_hi:8;	/* high 8-bits of 40-bit address */
-			u32 rev4:16;
-			u32 queue:3;
-			u32 rev5:3;
-			u32 dd:2;	/* dynamic debug */
-		};
-		struct {
-			u64 addr:40;
-			/* More efficient address accessor */
-			u64 __notaddress:24;
-		};
+			__le32 addr_lo;	/* low 32-bits of 40-bit address */
+			u8 addr_hi;	/* high 8-bits of 40-bit address */
+			u8 __reserved1[2];
+			u8 cfg8b_w1; /* dd, queue*/
+		} __packed;
+		__le64 data;
 	};
 } __packed;
 
+#define QDMA_CCDF_STATUS		20
+#define QDMA_CCDF_OFFSET		20
+#define QDMA_CCDF_MASK		GENMASK(28, 20)
+#define QDMA_CCDF_FOTMAT		BIT(29)
+#define QDMA_CCDF_SER			BIT(30)
+
+static inline u64 qdma_ccdf_addr_get64(const struct fsl_qdma_ccdf *ccdf)
+{
+	return le64_to_cpu(ccdf->data) & 0xffffffffffLLU;
+}
+static inline u64 qdma_ccdf_get_queue(const struct fsl_qdma_ccdf *ccdf)
+{
+	return ccdf->cfg8b_w1 & 0xff;
+}
+static inline void qdma_ccdf_addr_set64(struct fsl_qdma_ccdf *ccdf, u64 addr)
+{
+	ccdf->addr_hi = upper_32_bits(addr);
+	ccdf->addr_lo = cpu_to_le32(lower_32_bits(addr));
+}
+static inline int qdma_ccdf_get_offset(const struct fsl_qdma_ccdf *ccdf)
+{
+	return (le32_to_cpu(ccdf->cfg) & QDMA_CCDF_MASK) >> QDMA_CCDF_OFFSET;
+}
+static inline void qdma_ccdf_set_format(struct fsl_qdma_ccdf *ccdf, int offset)
+{
+	ccdf->cfg = cpu_to_le32(QDMA_CCDF_FOTMAT | offset);
+}
+static inline int qdma_ccdf_get_status(const struct fsl_qdma_ccdf *ccdf)
+{
+	return (le32_to_cpu(ccdf->status) & QDMA_CCDF_MASK) >> QDMA_CCDF_STATUS;
+}
+static inline void qdma_ccdf_set_ser(struct fsl_qdma_ccdf *ccdf, int status)
+{
+	ccdf->status = cpu_to_le32(QDMA_CCDF_SER | status);
+}
+/* qDMA Compound S/G Format */
 struct fsl_qdma_csgf {
-	u32 offset:13;
-	u32 rev1:19;
-	u32 length:30;
-	u32 f:1;
-	u32 e:1;
+	__le32 offset; /* offset */
+	__le32 cfg;	/* E bit, F bit, length */
 	union {
 		struct {
-			u32 addr_lo;	/* low 32-bits of 40-bit address */
-			u32 addr_hi:8;	/* high 8-bits of 40-bit address */
-			u32 rev2:24;
-		};
-		struct {
-			u64 addr:40;
-			/* More efficient address accessor */
-			u64 __notaddress:24;
+			__le32 addr_lo;	/* low 32-bits of 40-bit address */
+			u8 addr_hi;	/* high 8-bits of 40-bit address */
+			u8 __reserved1[3];
 		};
+		__le64 data;
 	};
 } __packed;
 
+#define QDMA_SG_FIN	BIT(30)
+#define QDMA_SG_EXT	BIT(31)
+#define QDMA_SG_LEN_MASK	GENMASK(29, 0)
+static inline u64 qdma_csgf_addr_get64(const struct fsl_qdma_csgf *sg)
+{
+	return be64_to_cpu(sg->data) & 0xffffffffffLLU;
+}
+static inline void qdma_csgf_addr_set64(struct fsl_qdma_csgf *sg, u64 addr)
+{
+	sg->addr_hi = upper_32_bits(addr);
+	sg->addr_lo = cpu_to_le32(lower_32_bits(addr));
+}
+static inline void qdma_csgf_set_len(struct fsl_qdma_csgf *csgf, int len)
+{
+	csgf->cfg = cpu_to_le32(len & QDMA_SG_LEN_MASK);
+}
+static inline void qdma_csgf_set_f(struct fsl_qdma_csgf *csgf, int len)
+{
+	csgf->cfg = cpu_to_le32(QDMA_SG_FIN | (len & QDMA_SG_LEN_MASK));
+}
+static inline void qdma_csgf_set_e(struct fsl_qdma_csgf *csgf, int len)
+{
+	csgf->cfg = cpu_to_le32(QDMA_SG_EXT | (len & QDMA_SG_LEN_MASK));
+}
+
+/* qDMA Source Descriptor Format */
 struct fsl_qdma_sdf {
-	u32 rev3:32;
-	u32 ssd:12;	/* souce stride distance */
-	u32 sss:12;	/* souce stride size */
-	u32 rev4:8;
-	u32 rev5:32;
-	u32 cmd;
+	__le32 rev3;
+	__le32 cfg; /* rev4, bit[0-11] - ssd, bit[12-23] sss */
+	__le32 rev5;
+	__le32 cmd;
 } __packed;
 
+/*qDMA Destination Descriptor Format*/
 struct fsl_qdma_ddf {
-	u32 rev1:32;
-	u32 dsd:12;	/* Destination stride distance */
-	u32 dss:12;	/* Destination stride size */
-	u32 rev2:8;
-	u32 rev3:32;
-	u32 cmd;
+	__le32 rev1;
+	__le32 cfg; /* rev2, bit[0-11] - dsd, bit[12-23] - dss */
+	__le32 rev3;
+	__le32 cmd;
 } __packed;
 
 struct fsl_qdma_chan {
@@ -304,24 +348,27 @@ static void fsl_qdma_comp_fill_memcpy(struct fsl_qdma_comp *fsl_comp,
 
 	memset(fsl_comp->virt_addr, 0, FSL_QDMA_BASE_BUFFER_SIZE);
 	/* Head Command Descriptor(Frame Descriptor) */
-	ccdf->addr = fsl_comp->bus_addr + 16;
-	ccdf->format = 1; /* Compound S/G format */
+	qdma_ccdf_addr_set64(ccdf, fsl_comp->bus_addr + 16);
+	qdma_ccdf_set_format(ccdf, qdma_ccdf_get_offset(ccdf));
+	qdma_ccdf_set_ser(ccdf, qdma_ccdf_get_status(ccdf));
 	/* Status notification is enqueued to status queue. */
-	ccdf->ser = 1;
 	/* Compound Command Descriptor(Frame List Table) */
-	csgf_desc->addr = fsl_comp->bus_addr + 64;
+	qdma_csgf_addr_set64(csgf_desc, fsl_comp->bus_addr + 64);
 	/* It must be 32 as Compound S/G Descriptor */
-	csgf_desc->length = 32;
-	csgf_src->addr = src;
-	csgf_src->length = len;
-	csgf_dest->addr = dst;
-	csgf_dest->length = len;
+	qdma_csgf_set_len(csgf_desc, 32);
+	qdma_csgf_addr_set64(csgf_src, src);
+	qdma_csgf_set_len(csgf_src, len);
+	qdma_csgf_addr_set64(csgf_dest, dst);
+	qdma_csgf_set_len(csgf_dest, len);
 	/* This entry is the last entry. */
-	csgf_dest->f = FSL_QDMA_F_LAST_ENTRY;
+	qdma_csgf_set_f(csgf_dest, len);
 	/* Descriptor Buffer */
-	sdf->cmd = FSL_QDMA_CMD_RWTTYPE << FSL_QDMA_CMD_RWTTYPE_OFFSET;
-	ddf->cmd = FSL_QDMA_CMD_RWTTYPE << FSL_QDMA_CMD_RWTTYPE_OFFSET;
-	ddf->cmd |= FSL_QDMA_CMD_LWC << FSL_QDMA_CMD_LWC_OFFSET;
+	sdf->cmd = cpu_to_le32(
+			FSL_QDMA_CMD_RWTTYPE << FSL_QDMA_CMD_RWTTYPE_OFFSET);
+	ddf->cmd = cpu_to_le32(
+			FSL_QDMA_CMD_RWTTYPE << FSL_QDMA_CMD_RWTTYPE_OFFSET);
+	ddf->cmd |= cpu_to_le32(
+			FSL_QDMA_CMD_LWC << FSL_QDMA_CMD_LWC_OFFSET);
 }
 
 static void fsl_qdma_comp_fill_sg(
@@ -345,49 +392,48 @@ static void fsl_qdma_comp_fill_sg(
 	csgf_dest = (struct fsl_qdma_csgf *)fsl_comp->virt_addr + 3;
 	sdf = (struct fsl_qdma_sdf *)fsl_comp->virt_addr + 4;
 	ddf = (struct fsl_qdma_ddf *)fsl_comp->virt_addr + 5;
-
 	memset(fsl_comp->virt_addr, 0, FSL_QDMA_BASE_BUFFER_SIZE);
 	/* Head Command Descriptor(Frame Descriptor) */
-	ccdf->addr = fsl_comp->bus_addr + 16;
-	ccdf->format = 1; /* Compound S/G format */
+	qdma_ccdf_addr_set64(ccdf, fsl_comp->bus_addr + 16);
+	qdma_ccdf_set_format(ccdf, qdma_ccdf_get_offset(ccdf));
 	/* Status notification is enqueued to status queue. */
-	ccdf->ser = 1;
+	qdma_ccdf_set_ser(ccdf, qdma_ccdf_get_status(ccdf));
 
 	/* Compound Command Descriptor(Frame List Table) */
-	csgf_desc->addr = fsl_comp->bus_addr + 64;
+	qdma_csgf_addr_set64(csgf_desc, fsl_comp->bus_addr + 64);
 	/* It must be 32 as Compound S/G Descriptor */
-	csgf_desc->length = 32;
+	qdma_csgf_set_len(csgf_desc, 32);
 
 	sg_block = fsl_comp->sg_block;
-	csgf_src->addr = sg_block->bus_addr;
+	qdma_csgf_addr_set64(csgf_src, sg_block->bus_addr);
 	/* This entry link to the s/g entry. */
-	csgf_src->e = FSL_QDMA_E_SG_TABLE;
+	qdma_csgf_set_e(csgf_src, 32);
 
 	temp = sg_block + fsl_comp->sg_block_src;
-	csgf_dest->addr = temp->bus_addr;
+	qdma_csgf_addr_set64(csgf_dest, temp->bus_addr);
 	/* This entry is the last entry. */
-	csgf_dest->f = FSL_QDMA_F_LAST_ENTRY;
+	qdma_csgf_set_f(csgf_dest, 32);
 	/* This entry link to the s/g entry. */
-	csgf_dest->e = FSL_QDMA_E_SG_TABLE;
+	qdma_csgf_set_e(csgf_dest, 32);
 
 	for_each_sg(src_sg, sg, src_nents, i) {
 		temp = sg_block + i / (FSL_QDMA_EXPECT_SG_ENTRY_NUM - 1);
 		csgf_sg = (struct fsl_qdma_csgf *)temp->virt_addr +
 			  i % (FSL_QDMA_EXPECT_SG_ENTRY_NUM - 1);
-		csgf_sg->addr = sg_dma_address(sg);
-		csgf_sg->length = sg_dma_len(sg);
+		qdma_csgf_addr_set64(csgf_sg, sg_dma_address(sg));
+		qdma_csgf_set_len(csgf_sg, sg_dma_len(sg));
 		total_src_len += sg_dma_len(sg);
 
 		if (i == src_nents - 1)
-			csgf_sg->f = FSL_QDMA_F_LAST_ENTRY;
+			qdma_csgf_set_f(csgf_sg, sg_dma_len(sg));
 		if (i % (FSL_QDMA_EXPECT_SG_ENTRY_NUM - 1) ==
 		    FSL_QDMA_EXPECT_SG_ENTRY_NUM - 2) {
 			csgf_sg = (struct fsl_qdma_csgf *)temp->virt_addr +
 				  FSL_QDMA_EXPECT_SG_ENTRY_NUM - 1;
 			temp = sg_block +
 				i / (FSL_QDMA_EXPECT_SG_ENTRY_NUM - 1) + 1;
-			csgf_sg->addr = temp->bus_addr;
-			csgf_sg->e = FSL_QDMA_E_SG_TABLE;
+			qdma_csgf_addr_set64(csgf_sg, temp->bus_addr);
+			qdma_csgf_set_e(csgf_sg, sg_dma_len(sg));
 		}
 	}
 
@@ -396,20 +442,20 @@ static void fsl_qdma_comp_fill_sg(
 		temp = sg_block + i / (FSL_QDMA_EXPECT_SG_ENTRY_NUM - 1);
 		csgf_sg = (struct fsl_qdma_csgf *)temp->virt_addr +
 			  i % (FSL_QDMA_EXPECT_SG_ENTRY_NUM - 1);
-		csgf_sg->addr = sg_dma_address(sg);
-		csgf_sg->length = sg_dma_len(sg);
+		qdma_csgf_addr_set64(csgf_sg, sg_dma_address(sg));
+		qdma_csgf_set_len(csgf_sg, sg_dma_len(sg));
 		total_dst_len += sg_dma_len(sg);
 
 		if (i == dst_nents - 1)
-			csgf_sg->f = FSL_QDMA_F_LAST_ENTRY;
+			qdma_csgf_set_f(csgf_sg, sg_dma_len(sg));
 		if (i % (FSL_QDMA_EXPECT_SG_ENTRY_NUM - 1) ==
 		    FSL_QDMA_EXPECT_SG_ENTRY_NUM - 2) {
 			csgf_sg = (struct fsl_qdma_csgf *)temp->virt_addr +
 				  FSL_QDMA_EXPECT_SG_ENTRY_NUM - 1;
 			temp = sg_block +
 				i / (FSL_QDMA_EXPECT_SG_ENTRY_NUM - 1) + 1;
-			csgf_sg->addr = temp->bus_addr;
-			csgf_sg->e = FSL_QDMA_E_SG_TABLE;
+			qdma_csgf_addr_set64(csgf_sg, temp->bus_addr);
+			qdma_csgf_set_e(csgf_sg, sg_dma_len(sg));
 		}
 	}
 
@@ -417,12 +463,10 @@ static void fsl_qdma_comp_fill_sg(
 		dev_err(&fsl_comp->qchan->vchan.chan.dev->device,
 			"The data length for src and dst isn't match.\n");
 
-	csgf_src->length = total_src_len;
-	csgf_dest->length = total_dst_len;
+	qdma_csgf_set_len(csgf_src, total_src_len);
+	qdma_csgf_set_len(csgf_dest, total_dst_len);
 
 	/* Descriptor Buffer */
-	sdf->cmd = FSL_QDMA_CMD_RWTTYPE << FSL_QDMA_CMD_RWTTYPE_OFFSET;
-	ddf->cmd = FSL_QDMA_CMD_RWTTYPE << FSL_QDMA_CMD_RWTTYPE_OFFSET;
 }
 
 /*
@@ -694,13 +738,12 @@ static int fsl_qdma_queue_transfer_complete(struct fsl_qdma_engine *fsl_qdma)
 		if (reg & FSL_QDMA_BSQSR_QE)
 			return 0;
 		status_addr = fsl_status->virt_head;
-		if (status_addr->queue == pre_queue &&
-			status_addr->addr == pre_addr)
+		if (qdma_ccdf_get_queue(status_addr) == pre_queue &&
+			qdma_ccdf_addr_get64(status_addr) == pre_addr)
 			duplicate = 1;
-
-		i = status_addr->queue;
-		pre_queue = status_addr->queue;
-		pre_addr = status_addr->addr;
+		i = qdma_ccdf_get_queue(status_addr);
+		pre_queue = qdma_ccdf_get_queue(status_addr);
+		pre_addr = qdma_ccdf_addr_get64(status_addr);
 		temp_queue = fsl_queue + i;
 		spin_lock(&temp_queue->queue_lock);
 		if (list_empty(&temp_queue->comp_used)) {
@@ -716,8 +759,7 @@ static int fsl_qdma_queue_transfer_complete(struct fsl_qdma_engine *fsl_qdma)
 							list);
 			csgf_src = (struct fsl_qdma_csgf *)fsl_comp->virt_addr
 							   + 2;
-			if (fsl_comp->bus_addr + 16 !=
-					(dma_addr_t)status_addr->addr) {
+			if (fsl_comp->bus_addr + 16 != pre_addr) {
 				if (duplicate)
 					duplicate_handle = 1;
 				else {
@@ -730,7 +772,7 @@ static int fsl_qdma_queue_transfer_complete(struct fsl_qdma_engine *fsl_qdma)
 			if (duplicate_handle) {
 			reg = qdma_readl(fsl_qdma, block + FSL_QDMA_BSQMR);
 			reg |= FSL_QDMA_BSQMR_DI;
-			status_addr->addr = 0x0;
+			qdma_ccdf_addr_set64(status_addr, 0x0);
 			fsl_status->virt_head++;
 			if (fsl_status->virt_head == fsl_status->cq
 						   + fsl_status->n_cq)
@@ -743,7 +785,7 @@ static int fsl_qdma_queue_transfer_complete(struct fsl_qdma_engine *fsl_qdma)
 
 		reg = qdma_readl(fsl_qdma, block + FSL_QDMA_BSQMR);
 		reg |= FSL_QDMA_BSQMR_DI;
-		status_addr->addr = 0x0;
+		qdma_ccdf_addr_set64(status_addr, 0x0);
 		fsl_status->virt_head++;
 		if (fsl_status->virt_head == fsl_status->cq + fsl_status->n_cq)
 			fsl_status->virt_head = fsl_status->cq;
diff --git a/drivers/irqchip/irq-ls-scfg-msi.c b/drivers/irqchip/irq-ls-scfg-msi.c
index 119f4ef..57e3d90 100644
--- a/drivers/irqchip/irq-ls-scfg-msi.c
+++ b/drivers/irqchip/irq-ls-scfg-msi.c
@@ -316,6 +316,7 @@ static const struct of_device_id ls_scfg_msi_id[] = {
 	{ .compatible = "fsl,1s1021a-msi", .data = &ls1021_msi_cfg},
 	{ .compatible = "fsl,1s1043a-msi", .data = &ls1021_msi_cfg},
 
+	{ .compatible = "fsl,ls1012a-msi", .data = &ls1021_msi_cfg },
 	{ .compatible = "fsl,ls1021a-msi", .data = &ls1021_msi_cfg },
 	{ .compatible = "fsl,ls1043a-msi", .data = &ls1021_msi_cfg },
 	{ .compatible = "fsl,ls1043a-v1.1-msi", .data = &ls1043_v1_1_msi_cfg },
diff --git a/drivers/mmc/host/sdhci-of-esdhc.c b/drivers/mmc/host/sdhci-of-esdhc.c
index 4b0f375..ae3fee6 100644
--- a/drivers/mmc/host/sdhci-of-esdhc.c
+++ b/drivers/mmc/host/sdhci-of-esdhc.c
@@ -502,6 +502,12 @@ static void esdhc_of_set_clock(struct sdhci_host *host, unsigned int clock)
 			clock -= 5000000;
 	}
 
+	/* Workaround to reduce the clock frequency for ls1021a esdhc */
+	if (of_find_compatible_node(NULL, NULL, "fsl,ls1021a-esdhc")) {
+		if (clock == 50000000)
+			clock = 46500000;
+	}
+
 	temp = sdhci_readl(host, ESDHC_SYSTEM_CONTROL);
 	temp &= ~(ESDHC_CLOCK_SDCLKEN | ESDHC_CLOCK_IPGEN | ESDHC_CLOCK_HCKEN |
 		  ESDHC_CLOCK_PEREN | ESDHC_CLOCK_MASK);
@@ -591,10 +597,18 @@ static void esdhc_clock_enable(struct sdhci_host *host, bool enable)
 
 static void esdhc_reset(struct sdhci_host *host, u8 mask)
 {
+	u32 val;
+
 	sdhci_reset(host, mask);
 
 	sdhci_writel(host, host->ier, SDHCI_INT_ENABLE);
 	sdhci_writel(host, host->ier, SDHCI_SIGNAL_ENABLE);
+
+	if (mask & SDHCI_RESET_ALL) {
+		val = sdhci_readl(host, ESDHC_TBCTL);
+		val &= ~ESDHC_TB_EN;
+		sdhci_writel(host, val, ESDHC_TBCTL);
+	}
 }
 
 /* The SCFG, Supplemental Configuration Unit, provides SoC specific
diff --git a/drivers/net/ethernet/freescale/Kconfig b/drivers/net/ethernet/freescale/Kconfig
index 74a2864..8a5bb4e 100644
--- a/drivers/net/ethernet/freescale/Kconfig
+++ b/drivers/net/ethernet/freescale/Kconfig
@@ -95,4 +95,5 @@ config GIANFAR
 
 source "drivers/net/ethernet/freescale/sdk_fman/Kconfig"
 source "drivers/net/ethernet/freescale/sdk_dpaa/Kconfig"
+source "drivers/net/ethernet/freescale/dpaa/Kconfig"
 endif # NET_VENDOR_FREESCALE
diff --git a/drivers/net/ethernet/freescale/Makefile b/drivers/net/ethernet/freescale/Makefile
index a5d4405..f7d4d6b 100644
--- a/drivers/net/ethernet/freescale/Makefile
+++ b/drivers/net/ethernet/freescale/Makefile
@@ -24,3 +24,4 @@ ucc_geth_driver-objs := ucc_geth.o ucc_geth_ethtool.o
 obj-$(if $(CONFIG_FSL_SDK_FMAN),y) += sdk_fman/
 obj-$(if $(CONFIG_FSL_SDK_DPAA_ETH),y) += sdk_dpaa/
 obj-$(CONFIG_FSL_FMAN) += fman/
+obj-$(CONFIG_FSL_DPAA_ETH) += dpaa/
diff --git a/drivers/net/ethernet/freescale/dpaa/Kconfig b/drivers/net/ethernet/freescale/dpaa/Kconfig
new file mode 100644
index 0000000..a654736
--- /dev/null
+++ b/drivers/net/ethernet/freescale/dpaa/Kconfig
@@ -0,0 +1,10 @@
+menuconfig FSL_DPAA_ETH
+	tristate "DPAA Ethernet"
+	depends on FSL_DPAA && FSL_FMAN
+	select PHYLIB
+	select FSL_FMAN_MAC
+	---help---
+	  Data Path Acceleration Architecture Ethernet driver,
+	  supporting the Freescale QorIQ chips.
+	  Depends on Freescale Buffer Manager and Queue Manager
+	  driver and Frame Manager Driver.
diff --git a/drivers/net/ethernet/freescale/dpaa/Makefile b/drivers/net/ethernet/freescale/dpaa/Makefile
new file mode 100644
index 0000000..7db50bc
--- /dev/null
+++ b/drivers/net/ethernet/freescale/dpaa/Makefile
@@ -0,0 +1,12 @@
+#
+# Makefile for the Freescale DPAA Ethernet controllers
+#
+
+# Include FMan headers
+FMAN        = $(srctree)/drivers/net/ethernet/freescale/fman
+ccflags-y += -I$(FMAN)
+
+obj-$(CONFIG_FSL_DPAA_ETH) += fsl_dpa.o
+
+fsl_dpa-objs += dpaa_eth.o dpaa_ethtool.o dpaa_eth_sysfs.o
+CFLAGS_dpaa_eth.o := -I$(src)
diff --git a/drivers/net/ethernet/freescale/dpaa/dpaa_eth.c b/drivers/net/ethernet/freescale/dpaa/dpaa_eth.c
new file mode 100644
index 0000000..62f3b3a
--- /dev/null
+++ b/drivers/net/ethernet/freescale/dpaa/dpaa_eth.c
@@ -0,0 +1,2878 @@
+/* Copyright 2008 - 2016 Freescale Semiconductor Inc.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *     * Redistributions of source code must retain the above copyright
+ *	 notice, this list of conditions and the following disclaimer.
+ *     * Redistributions in binary form must reproduce the above copyright
+ *	 notice, this list of conditions and the following disclaimer in the
+ *	 documentation and/or other materials provided with the distribution.
+ *     * Neither the name of Freescale Semiconductor nor the
+ *	 names of its contributors may be used to endorse or promote products
+ *	 derived from this software without specific prior written permission.
+ *
+ * ALTERNATIVELY, this software may be distributed under the terms of the
+ * GNU General Public License ("GPL") as published by the Free Software
+ * Foundation, either version 2 of that License or (at your option) any
+ * later version.
+ *
+ * THIS SOFTWARE IS PROVIDED BY Freescale Semiconductor ``AS IS'' AND ANY
+ * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+ * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+ * DISCLAIMED. IN NO EVENT SHALL Freescale Semiconductor BE LIABLE FOR ANY
+ * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
+ * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+ * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+ * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+ * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
+
+#include <linux/init.h>
+#include <linux/module.h>
+#include <linux/of_platform.h>
+#include <linux/of_mdio.h>
+#include <linux/of_net.h>
+#include <linux/io.h>
+#include <linux/if_arp.h>
+#include <linux/if_vlan.h>
+#include <linux/icmp.h>
+#include <linux/ip.h>
+#include <linux/ipv6.h>
+#include <linux/udp.h>
+#include <linux/tcp.h>
+#include <linux/net.h>
+#include <linux/skbuff.h>
+#include <linux/etherdevice.h>
+#include <linux/if_ether.h>
+#include <linux/highmem.h>
+#include <linux/percpu.h>
+#include <linux/dma-mapping.h>
+#include <linux/sort.h>
+#include <soc/fsl/bman.h>
+#include <soc/fsl/qman.h>
+
+#include "fman.h"
+#include "fman_port.h"
+#include "mac.h"
+#include "dpaa_eth.h"
+
+/* CREATE_TRACE_POINTS only needs to be defined once. Other dpaa files
+ * using trace events only need to #include <trace/events/sched.h>
+ */
+#define CREATE_TRACE_POINTS
+#include "dpaa_eth_trace.h"
+
+static int debug = -1;
+module_param(debug, int, 0444);
+MODULE_PARM_DESC(debug, "Module/Driver verbosity level (0=none,...,16=all)");
+
+static u16 tx_timeout = 1000;
+module_param(tx_timeout, ushort, 0444);
+MODULE_PARM_DESC(tx_timeout, "The Tx timeout in ms");
+
+#define FM_FD_STAT_RX_ERRORS						\
+	(FM_FD_ERR_DMA | FM_FD_ERR_PHYSICAL	| \
+	 FM_FD_ERR_SIZE | FM_FD_ERR_CLS_DISCARD | \
+	 FM_FD_ERR_EXTRACTION | FM_FD_ERR_NO_SCHEME	| \
+	 FM_FD_ERR_PRS_TIMEOUT | FM_FD_ERR_PRS_ILL_INSTRUCT | \
+	 FM_FD_ERR_PRS_HDR_ERR)
+
+#define FM_FD_STAT_TX_ERRORS \
+	(FM_FD_ERR_UNSUPPORTED_FORMAT | \
+	 FM_FD_ERR_LENGTH | FM_FD_ERR_DMA)
+
+#define DPAA_MSG_DEFAULT (NETIF_MSG_DRV | NETIF_MSG_PROBE | \
+			  NETIF_MSG_LINK | NETIF_MSG_IFUP | \
+			  NETIF_MSG_IFDOWN)
+
+#define DPAA_INGRESS_CS_THRESHOLD 0x10000000
+/* Ingress congestion threshold on FMan ports
+ * The size in bytes of the ingress tail-drop threshold on FMan ports.
+ * Traffic piling up above this value will be rejected by QMan and discarded
+ * by FMan.
+ */
+
+/* Size in bytes of the FQ taildrop threshold */
+#define DPAA_FQ_TD 0x200000
+
+#define DPAA_CS_THRESHOLD_1G 0x06000000
+/* Egress congestion threshold on 1G ports, range 0x1000 .. 0x10000000
+ * The size in bytes of the egress Congestion State notification threshold on
+ * 1G ports. The 1G dTSECs can quite easily be flooded by cores doing Tx in a
+ * tight loop (e.g. by sending UDP datagrams at "while(1) speed"),
+ * and the larger the frame size, the more acute the problem.
+ * So we have to find a balance between these factors:
+ * - avoiding the device staying congested for a prolonged time (risking
+ *   the netdev watchdog to fire - see also the tx_timeout module param);
+ * - affecting performance of protocols such as TCP, which otherwise
+ *   behave well under the congestion notification mechanism;
+ * - preventing the Tx cores from tightly-looping (as if the congestion
+ *   threshold was too low to be effective);
+ * - running out of memory if the CS threshold is set too high.
+ */
+
+#define DPAA_CS_THRESHOLD_10G 0x10000000
+/* The size in bytes of the egress Congestion State notification threshold on
+ * 10G ports, range 0x1000 .. 0x10000000
+ */
+
+/* Largest value that the FQD's OAL field can hold */
+#define FSL_QMAN_MAX_OAL	127
+
+/* Default alignment for start of data in an Rx FD */
+#define DPAA_FD_DATA_ALIGNMENT  16
+
+/* Values for the L3R field of the FM Parse Results
+ */
+/* L3 Type field: First IP Present IPv4 */
+#define FM_L3_PARSE_RESULT_IPV4	0x8000
+/* L3 Type field: First IP Present IPv6 */
+#define FM_L3_PARSE_RESULT_IPV6	0x4000
+/* Values for the L4R field of the FM Parse Results */
+/* L4 Type field: UDP */
+#define FM_L4_PARSE_RESULT_UDP	0x40
+/* L4 Type field: TCP */
+#define FM_L4_PARSE_RESULT_TCP	0x20
+
+/* FD status field indicating whether the FM Parser has attempted to validate
+ * the L4 csum of the frame.
+ * Note that having this bit set doesn't necessarily imply that the checksum
+ * is valid. One would have to check the parse results to find that out.
+ */
+#define FM_FD_STAT_L4CV         0x00000004
+
+#define DPAA_SGT_MAX_ENTRIES 16 /* maximum number of entries in SG Table */
+#define DPAA_BUFF_RELEASE_MAX 8 /* maximum number of buffers released at once */
+
+#define FSL_DPAA_BPID_INV		0xff
+#define FSL_DPAA_ETH_MAX_BUF_COUNT	128
+#define FSL_DPAA_ETH_REFILL_THRESHOLD	80
+
+#define DPAA_TX_PRIV_DATA_SIZE	16
+#define DPAA_PARSE_RESULTS_SIZE sizeof(struct fman_prs_result)
+#define DPAA_TIME_STAMP_SIZE 8
+#define DPAA_HASH_RESULTS_SIZE 8
+#define DPAA_RX_PRIV_DATA_SIZE	(u16)(DPAA_TX_PRIV_DATA_SIZE + \
+					dpaa_rx_extra_headroom)
+
+#define DPAA_ETH_RX_QUEUES	128
+
+#define DPAA_ENQUEUE_RETRIES	100000
+
+enum port_type {RX, TX};
+
+struct fm_port_fqs {
+	struct dpaa_fq *tx_defq;
+	struct dpaa_fq *tx_errq;
+	struct dpaa_fq *rx_defq;
+	struct dpaa_fq *rx_errq;
+};
+
+/* All the dpa bps in use at any moment */
+static struct dpaa_bp *dpaa_bp_array[BM_MAX_NUM_OF_POOLS];
+
+/* The raw buffer size must be cacheline aligned */
+#define DPAA_BP_RAW_SIZE 4096
+/* When using more than one buffer pool, the raw sizes are as follows:
+ * 1 bp: 4KB
+ * 2 bp: 2KB, 4KB
+ * 3 bp: 1KB, 2KB, 4KB
+ * 4 bp: 1KB, 2KB, 4KB, 8KB
+ */
+static inline size_t bpool_buffer_raw_size(u8 index, u8 cnt)
+{
+	size_t res = DPAA_BP_RAW_SIZE / 4;
+	u8 i;
+
+	for (i = (cnt < 3) ? cnt : 3; i < 3 + index; i++)
+		res *= 2;
+	return res;
+}
+
+/* FMan-DMA requires 16-byte alignment for Rx buffers, but SKB_DATA_ALIGN is
+ * even stronger (SMP_CACHE_BYTES-aligned), so we just get away with that,
+ * via SKB_WITH_OVERHEAD(). We can't rely on netdev_alloc_frag() giving us
+ * half-page-aligned buffers, so we reserve some more space for start-of-buffer
+ * alignment.
+ */
+#define dpaa_bp_size(raw_size) SKB_WITH_OVERHEAD((raw_size) - SMP_CACHE_BYTES)
+
+static int dpaa_max_frm;
+
+static int dpaa_rx_extra_headroom;
+
+#define dpaa_get_max_mtu()	\
+	(dpaa_max_frm - (VLAN_ETH_HLEN + ETH_FCS_LEN))
+
+static int dpaa_netdev_init(struct net_device *net_dev,
+			    const struct net_device_ops *dpaa_ops,
+			    u16 tx_timeout)
+{
+	struct dpaa_priv *priv = netdev_priv(net_dev);
+	struct device *dev = net_dev->dev.parent;
+	struct dpaa_percpu_priv *percpu_priv;
+	const u8 *mac_addr;
+	int i, err;
+
+	/* Although we access another CPU's private data here
+	 * we do it at initialization so it is safe
+	 */
+	for_each_possible_cpu(i) {
+		percpu_priv = per_cpu_ptr(priv->percpu_priv, i);
+		percpu_priv->net_dev = net_dev;
+	}
+
+	net_dev->netdev_ops = dpaa_ops;
+	mac_addr = priv->mac_dev->addr;
+
+	net_dev->mem_start = priv->mac_dev->res->start;
+	net_dev->mem_end = priv->mac_dev->res->end;
+
+	net_dev->min_mtu = ETH_MIN_MTU;
+	net_dev->max_mtu = dpaa_get_max_mtu();
+
+	net_dev->hw_features |= (NETIF_F_IP_CSUM | NETIF_F_IPV6_CSUM |
+				 NETIF_F_LLTX);
+
+	net_dev->hw_features |= NETIF_F_SG | NETIF_F_HIGHDMA;
+	/* The kernels enables GSO automatically, if we declare NETIF_F_SG.
+	 * For conformity, we'll still declare GSO explicitly.
+	 */
+	net_dev->features |= NETIF_F_GSO;
+	net_dev->features |= NETIF_F_RXCSUM;
+
+	net_dev->priv_flags |= IFF_LIVE_ADDR_CHANGE;
+	/* we do not want shared skbs on TX */
+	net_dev->priv_flags &= ~IFF_TX_SKB_SHARING;
+
+	net_dev->features |= net_dev->hw_features;
+	net_dev->vlan_features = net_dev->features;
+
+	memcpy(net_dev->perm_addr, mac_addr, net_dev->addr_len);
+	memcpy(net_dev->dev_addr, mac_addr, net_dev->addr_len);
+
+	net_dev->ethtool_ops = &dpaa_ethtool_ops;
+
+	net_dev->needed_headroom = priv->tx_headroom;
+	net_dev->watchdog_timeo = msecs_to_jiffies(tx_timeout);
+
+	/* start without the RUNNING flag, phylib controls it later */
+	netif_carrier_off(net_dev);
+
+	err = register_netdev(net_dev);
+	if (err < 0) {
+		dev_err(dev, "register_netdev() = %d\n", err);
+		return err;
+	}
+
+	return 0;
+}
+
+static int dpaa_stop(struct net_device *net_dev)
+{
+	struct mac_device *mac_dev;
+	struct dpaa_priv *priv;
+	int i, err, error;
+
+	priv = netdev_priv(net_dev);
+	mac_dev = priv->mac_dev;
+
+	netif_tx_stop_all_queues(net_dev);
+	/* Allow the Fman (Tx) port to process in-flight frames before we
+	 * try switching it off.
+	 */
+	usleep_range(5000, 10000);
+
+	err = mac_dev->stop(mac_dev);
+	if (err < 0)
+		netif_err(priv, ifdown, net_dev, "mac_dev->stop() = %d\n",
+			  err);
+
+	for (i = 0; i < ARRAY_SIZE(mac_dev->port); i++) {
+		error = fman_port_disable(mac_dev->port[i]);
+		if (error)
+			err = error;
+	}
+
+	if (net_dev->phydev)
+		phy_disconnect(net_dev->phydev);
+	net_dev->phydev = NULL;
+
+	return err;
+}
+
+static void dpaa_tx_timeout(struct net_device *net_dev)
+{
+	struct dpaa_percpu_priv *percpu_priv;
+	const struct dpaa_priv	*priv;
+
+	priv = netdev_priv(net_dev);
+	percpu_priv = this_cpu_ptr(priv->percpu_priv);
+
+	netif_crit(priv, timer, net_dev, "Transmit timeout latency: %u ms\n",
+		   jiffies_to_msecs(jiffies - dev_trans_start(net_dev)));
+
+	percpu_priv->stats.tx_errors++;
+}
+
+/* Calculates the statistics for the given device by adding the statistics
+ * collected by each CPU.
+ */
+static struct rtnl_link_stats64 *dpaa_get_stats64(struct net_device *net_dev,
+						  struct rtnl_link_stats64 *s)
+{
+	int numstats = sizeof(struct rtnl_link_stats64) / sizeof(u64);
+	struct dpaa_priv *priv = netdev_priv(net_dev);
+	struct dpaa_percpu_priv *percpu_priv;
+	u64 *netstats = (u64 *)s;
+	u64 *cpustats;
+	int i, j;
+
+	for_each_possible_cpu(i) {
+		percpu_priv = per_cpu_ptr(priv->percpu_priv, i);
+
+		cpustats = (u64 *)&percpu_priv->stats;
+
+		/* add stats from all CPUs */
+		for (j = 0; j < numstats; j++)
+			netstats[j] += cpustats[j];
+	}
+
+	return s;
+}
+
+static int dpaa_setup_tc(struct net_device *net_dev, u32 handle, __be16 proto,
+			 struct tc_to_netdev *tc)
+{
+	struct dpaa_priv *priv = netdev_priv(net_dev);
+	int i;
+
+	if (tc->type != TC_SETUP_MQPRIO)
+		return -EINVAL;
+
+	if (tc->tc == priv->num_tc)
+		return 0;
+
+	if (!tc->tc) {
+		netdev_reset_tc(net_dev);
+		goto out;
+	}
+
+	if (tc->tc > DPAA_TC_NUM) {
+		netdev_err(net_dev, "Too many traffic classes: max %d supported.\n",
+			   DPAA_TC_NUM);
+		return -EINVAL;
+	}
+
+	netdev_set_num_tc(net_dev, tc->tc);
+
+	for (i = 0; i < tc->tc; i++)
+		netdev_set_tc_queue(net_dev, i, DPAA_TC_TXQ_NUM,
+				    i * DPAA_TC_TXQ_NUM);
+
+out:
+	priv->num_tc = tc->tc ? tc->tc : 1;
+	netif_set_real_num_tx_queues(net_dev, priv->num_tc * DPAA_TC_TXQ_NUM);
+	return 0;
+}
+
+static struct mac_device *dpaa_mac_dev_get(struct platform_device *pdev)
+{
+	struct platform_device *of_dev;
+	struct dpaa_eth_data *eth_data;
+	struct device *dpaa_dev, *dev;
+	struct device_node *mac_node;
+	struct mac_device *mac_dev;
+
+	dpaa_dev = &pdev->dev;
+	eth_data = dpaa_dev->platform_data;
+	if (!eth_data)
+		return ERR_PTR(-ENODEV);
+
+	mac_node = eth_data->mac_node;
+
+	of_dev = of_find_device_by_node(mac_node);
+	if (!of_dev) {
+		dev_err(dpaa_dev, "of_find_device_by_node(%s) failed\n",
+			mac_node->full_name);
+		of_node_put(mac_node);
+		return ERR_PTR(-EINVAL);
+	}
+	of_node_put(mac_node);
+
+	dev = &of_dev->dev;
+
+	mac_dev = dev_get_drvdata(dev);
+	if (!mac_dev) {
+		dev_err(dpaa_dev, "dev_get_drvdata(%s) failed\n",
+			dev_name(dev));
+		return ERR_PTR(-EINVAL);
+	}
+
+	return mac_dev;
+}
+
+static int dpaa_set_mac_address(struct net_device *net_dev, void *addr)
+{
+	const struct dpaa_priv *priv;
+	struct mac_device *mac_dev;
+	struct sockaddr old_addr;
+	int err;
+
+	priv = netdev_priv(net_dev);
+
+	memcpy(old_addr.sa_data, net_dev->dev_addr,  ETH_ALEN);
+
+	err = eth_mac_addr(net_dev, addr);
+	if (err < 0) {
+		netif_err(priv, drv, net_dev, "eth_mac_addr() = %d\n", err);
+		return err;
+	}
+
+	mac_dev = priv->mac_dev;
+
+	err = mac_dev->change_addr(mac_dev->fman_mac,
+				   (enet_addr_t *)net_dev->dev_addr);
+	if (err < 0) {
+		netif_err(priv, drv, net_dev, "mac_dev->change_addr() = %d\n",
+			  err);
+		/* reverting to previous address */
+		eth_mac_addr(net_dev, &old_addr);
+
+		return err;
+	}
+
+	return 0;
+}
+
+static void dpaa_set_rx_mode(struct net_device *net_dev)
+{
+	const struct dpaa_priv	*priv;
+	int err;
+
+	priv = netdev_priv(net_dev);
+
+	if (!!(net_dev->flags & IFF_PROMISC) != priv->mac_dev->promisc) {
+		priv->mac_dev->promisc = !priv->mac_dev->promisc;
+		err = priv->mac_dev->set_promisc(priv->mac_dev->fman_mac,
+						 priv->mac_dev->promisc);
+		if (err < 0)
+			netif_err(priv, drv, net_dev,
+				  "mac_dev->set_promisc() = %d\n",
+				  err);
+	}
+
+	err = priv->mac_dev->set_multi(net_dev, priv->mac_dev);
+	if (err < 0)
+		netif_err(priv, drv, net_dev, "mac_dev->set_multi() = %d\n",
+			  err);
+}
+
+static struct dpaa_bp *dpaa_bpid2pool(int bpid)
+{
+	if (WARN_ON(bpid < 0 || bpid >= BM_MAX_NUM_OF_POOLS))
+		return NULL;
+
+	return dpaa_bp_array[bpid];
+}
+
+/* checks if this bpool is already allocated */
+static bool dpaa_bpid2pool_use(int bpid)
+{
+	if (dpaa_bpid2pool(bpid)) {
+		atomic_inc(&dpaa_bp_array[bpid]->refs);
+		return true;
+	}
+
+	return false;
+}
+
+/* called only once per bpid by dpaa_bp_alloc_pool() */
+static void dpaa_bpid2pool_map(int bpid, struct dpaa_bp *dpaa_bp)
+{
+	dpaa_bp_array[bpid] = dpaa_bp;
+	atomic_set(&dpaa_bp->refs, 1);
+}
+
+static int dpaa_bp_alloc_pool(struct dpaa_bp *dpaa_bp)
+{
+	int err;
+
+	if (dpaa_bp->size == 0 || dpaa_bp->config_count == 0) {
+		pr_err("%s: Buffer pool is not properly initialized! Missing size or initial number of buffers\n",
+		       __func__);
+		return -EINVAL;
+	}
+
+	/* If the pool is already specified, we only create one per bpid */
+	if (dpaa_bp->bpid != FSL_DPAA_BPID_INV &&
+	    dpaa_bpid2pool_use(dpaa_bp->bpid))
+		return 0;
+
+	if (dpaa_bp->bpid == FSL_DPAA_BPID_INV) {
+		dpaa_bp->pool = bman_new_pool();
+		if (!dpaa_bp->pool) {
+			pr_err("%s: bman_new_pool() failed\n",
+			       __func__);
+			return -ENODEV;
+		}
+
+		dpaa_bp->bpid = (u8)bman_get_bpid(dpaa_bp->pool);
+	}
+
+	if (dpaa_bp->seed_cb) {
+		err = dpaa_bp->seed_cb(dpaa_bp);
+		if (err)
+			goto pool_seed_failed;
+	}
+
+	dpaa_bpid2pool_map(dpaa_bp->bpid, dpaa_bp);
+
+	return 0;
+
+pool_seed_failed:
+	pr_err("%s: pool seeding failed\n", __func__);
+	bman_free_pool(dpaa_bp->pool);
+
+	return err;
+}
+
+/* remove and free all the buffers from the given buffer pool */
+static void dpaa_bp_drain(struct dpaa_bp *bp)
+{
+	u8 num = 8;
+	int ret;
+
+	do {
+		struct bm_buffer bmb[8];
+		int i;
+
+		ret = bman_acquire(bp->pool, bmb, num);
+		if (ret < 0) {
+			if (num == 8) {
+				/* we have less than 8 buffers left;
+				 * drain them one by one
+				 */
+				num = 1;
+				ret = 1;
+				continue;
+			} else {
+				/* Pool is fully drained */
+				break;
+			}
+		}
+
+		if (bp->free_buf_cb)
+			for (i = 0; i < num; i++)
+				bp->free_buf_cb(bp, &bmb[i]);
+	} while (ret > 0);
+}
+
+static void dpaa_bp_free(struct dpaa_bp *dpaa_bp)
+{
+	struct dpaa_bp *bp = dpaa_bpid2pool(dpaa_bp->bpid);
+
+	/* the mapping between bpid and dpaa_bp is done very late in the
+	 * allocation procedure; if something failed before the mapping, the bp
+	 * was not configured, therefore we don't need the below instructions
+	 */
+	if (!bp)
+		return;
+
+	if (!atomic_dec_and_test(&bp->refs))
+		return;
+
+	if (bp->free_buf_cb)
+		dpaa_bp_drain(bp);
+
+	dpaa_bp_array[bp->bpid] = NULL;
+	bman_free_pool(bp->pool);
+}
+
+static void dpaa_bps_free(struct dpaa_priv *priv)
+{
+	int i;
+
+	for (i = 0; i < DPAA_BPS_NUM; i++)
+		dpaa_bp_free(priv->dpaa_bps[i]);
+}
+
+/* Use multiple WQs for FQ assignment:
+ *	- Tx Confirmation queues go to WQ1.
+ *	- Rx Error and Tx Error queues go to WQ5 (giving them a better chance
+ *	  to be scheduled, in case there are many more FQs in WQ6).
+ *	- Rx Default goes to WQ6.
+ *	- Tx queues go to different WQs depending on their priority. Equal
+ *	  chunks of NR_CPUS queues go to WQ6 (lowest priority), WQ2, WQ1 and
+ *	  WQ0 (highest priority).
+ * This ensures that Tx-confirmed buffers are timely released. In particular,
+ * it avoids congestion on the Tx Confirm FQs, which can pile up PFDRs if they
+ * are greatly outnumbered by other FQs in the system, while
+ * dequeue scheduling is round-robin.
+ */
+static inline void dpaa_assign_wq(struct dpaa_fq *fq, int idx)
+{
+	switch (fq->fq_type) {
+	case FQ_TYPE_TX_CONFIRM:
+	case FQ_TYPE_TX_CONF_MQ:
+		fq->wq = 1;
+		break;
+	case FQ_TYPE_RX_ERROR:
+	case FQ_TYPE_TX_ERROR:
+		fq->wq = 5;
+		break;
+	case FQ_TYPE_RX_DEFAULT:
+		fq->wq = 6;
+		break;
+	case FQ_TYPE_TX:
+		switch (idx / DPAA_TC_TXQ_NUM) {
+		case 0:
+			/* Low priority (best effort) */
+			fq->wq = 6;
+			break;
+		case 1:
+			/* Medium priority */
+			fq->wq = 2;
+			break;
+		case 2:
+			/* High priority */
+			fq->wq = 1;
+			break;
+		case 3:
+			/* Very high priority */
+			fq->wq = 0;
+			break;
+		default:
+			WARN(1, "Too many TX FQs: more than %d!\n",
+			     DPAA_ETH_TXQ_NUM);
+		}
+		break;
+	default:
+		WARN(1, "Invalid FQ type %d for FQID %d!\n",
+		     fq->fq_type, fq->fqid);
+	}
+}
+
+static struct dpaa_fq *dpaa_fq_alloc(struct device *dev,
+				     u32 start, u32 count,
+				     struct list_head *list,
+				     enum dpaa_fq_type fq_type)
+{
+	struct dpaa_fq *dpaa_fq;
+	int i;
+
+	dpaa_fq = devm_kzalloc(dev, sizeof(*dpaa_fq) * count,
+			       GFP_KERNEL);
+	if (!dpaa_fq)
+		return NULL;
+
+	for (i = 0; i < count; i++) {
+		dpaa_fq[i].fq_type = fq_type;
+		dpaa_fq[i].fqid = start ? start + i : 0;
+		list_add_tail(&dpaa_fq[i].list, list);
+	}
+
+	for (i = 0; i < count; i++)
+		dpaa_assign_wq(dpaa_fq + i, i);
+
+	return dpaa_fq;
+}
+
+static int dpaa_alloc_all_fqs(struct device *dev, struct list_head *list,
+			      struct fm_port_fqs *port_fqs)
+{
+	struct dpaa_fq *dpaa_fq;
+
+	dpaa_fq = dpaa_fq_alloc(dev, 0, 1, list, FQ_TYPE_RX_ERROR);
+	if (!dpaa_fq)
+		goto fq_alloc_failed;
+
+	port_fqs->rx_errq = &dpaa_fq[0];
+
+	dpaa_fq = dpaa_fq_alloc(dev, 0, 1, list, FQ_TYPE_RX_DEFAULT);
+	if (!dpaa_fq)
+		goto fq_alloc_failed;
+
+	port_fqs->rx_defq = &dpaa_fq[0];
+
+	if (!dpaa_fq_alloc(dev, 0, DPAA_ETH_TXQ_NUM, list, FQ_TYPE_TX_CONF_MQ))
+		goto fq_alloc_failed;
+
+	dpaa_fq = dpaa_fq_alloc(dev, 0, 1, list, FQ_TYPE_TX_ERROR);
+	if (!dpaa_fq)
+		goto fq_alloc_failed;
+
+	port_fqs->tx_errq = &dpaa_fq[0];
+
+	dpaa_fq = dpaa_fq_alloc(dev, 0, 1, list, FQ_TYPE_TX_CONFIRM);
+	if (!dpaa_fq)
+		goto fq_alloc_failed;
+
+	port_fqs->tx_defq = &dpaa_fq[0];
+
+	if (!dpaa_fq_alloc(dev, 0, DPAA_ETH_TXQ_NUM, list, FQ_TYPE_TX))
+		goto fq_alloc_failed;
+
+	return 0;
+
+fq_alloc_failed:
+	dev_err(dev, "dpaa_fq_alloc() failed\n");
+	return -ENOMEM;
+}
+
+static u32 rx_pool_channel;
+static DEFINE_SPINLOCK(rx_pool_channel_init);
+
+static int dpaa_get_channel(void)
+{
+	spin_lock(&rx_pool_channel_init);
+	if (!rx_pool_channel) {
+		u32 pool;
+		int ret;
+
+		ret = qman_alloc_pool(&pool);
+
+		if (!ret)
+			rx_pool_channel = pool;
+	}
+	spin_unlock(&rx_pool_channel_init);
+	if (!rx_pool_channel)
+		return -ENOMEM;
+	return rx_pool_channel;
+}
+
+static void dpaa_release_channel(void)
+{
+	qman_release_pool(rx_pool_channel);
+}
+
+static void dpaa_eth_add_channel(u16 channel)
+{
+	u32 pool = QM_SDQCR_CHANNELS_POOL_CONV(channel);
+	const cpumask_t *cpus = qman_affine_cpus();
+	struct qman_portal *portal;
+	int cpu;
+
+	for_each_cpu(cpu, cpus) {
+		portal = qman_get_affine_portal(cpu);
+		qman_p_static_dequeue_add(portal, pool);
+	}
+}
+
+/* Congestion group state change notification callback.
+ * Stops the device's egress queues while they are congested and
+ * wakes them upon exiting congested state.
+ * Also updates some CGR-related stats.
+ */
+static void dpaa_eth_cgscn(struct qman_portal *qm, struct qman_cgr *cgr,
+			   int congested)
+{
+	struct dpaa_priv *priv = (struct dpaa_priv *)container_of(cgr,
+		struct dpaa_priv, cgr_data.cgr);
+
+	if (congested) {
+		priv->cgr_data.congestion_start_jiffies = jiffies;
+		netif_tx_stop_all_queues(priv->net_dev);
+		priv->cgr_data.cgr_congested_count++;
+	} else {
+		priv->cgr_data.congested_jiffies +=
+			(jiffies - priv->cgr_data.congestion_start_jiffies);
+		netif_tx_wake_all_queues(priv->net_dev);
+	}
+}
+
+static int dpaa_eth_cgr_init(struct dpaa_priv *priv)
+{
+	struct qm_mcc_initcgr initcgr;
+	u32 cs_th;
+	int err;
+
+	err = qman_alloc_cgrid(&priv->cgr_data.cgr.cgrid);
+	if (err < 0) {
+		if (netif_msg_drv(priv))
+			pr_err("%s: Error %d allocating CGR ID\n",
+			       __func__, err);
+		goto out_error;
+	}
+	priv->cgr_data.cgr.cb = dpaa_eth_cgscn;
+
+	/* Enable Congestion State Change Notifications and CS taildrop */
+	memset(&initcgr, 0, sizeof(initcgr));
+	initcgr.we_mask = cpu_to_be16(QM_CGR_WE_CSCN_EN | QM_CGR_WE_CS_THRES);
+	initcgr.cgr.cscn_en = QM_CGR_EN;
+
+	/* Set different thresholds based on the MAC speed.
+	 * This may turn suboptimal if the MAC is reconfigured at a speed
+	 * lower than its max, e.g. if a dTSEC later negotiates a 100Mbps link.
+	 * In such cases, we ought to reconfigure the threshold, too.
+	 */
+	if (priv->mac_dev->if_support & SUPPORTED_10000baseT_Full)
+		cs_th = DPAA_CS_THRESHOLD_10G;
+	else
+		cs_th = DPAA_CS_THRESHOLD_1G;
+	qm_cgr_cs_thres_set64(&initcgr.cgr.cs_thres, cs_th, 1);
+
+	initcgr.we_mask |= cpu_to_be16(QM_CGR_WE_CSTD_EN);
+	initcgr.cgr.cstd_en = QM_CGR_EN;
+
+	err = qman_create_cgr(&priv->cgr_data.cgr, QMAN_CGR_FLAG_USE_INIT,
+			      &initcgr);
+	if (err < 0) {
+		if (netif_msg_drv(priv))
+			pr_err("%s: Error %d creating CGR with ID %d\n",
+			       __func__, err, priv->cgr_data.cgr.cgrid);
+		qman_release_cgrid(priv->cgr_data.cgr.cgrid);
+		goto out_error;
+	}
+	if (netif_msg_drv(priv))
+		pr_debug("Created CGR %d for netdev with hwaddr %pM on QMan channel %d\n",
+			 priv->cgr_data.cgr.cgrid, priv->mac_dev->addr,
+			 priv->cgr_data.cgr.chan);
+
+out_error:
+	return err;
+}
+
+static inline void dpaa_setup_ingress(const struct dpaa_priv *priv,
+				      struct dpaa_fq *fq,
+				      const struct qman_fq *template)
+{
+	fq->fq_base = *template;
+	fq->net_dev = priv->net_dev;
+
+	fq->flags = QMAN_FQ_FLAG_NO_ENQUEUE;
+	fq->channel = priv->channel;
+}
+
+static inline void dpaa_setup_egress(const struct dpaa_priv *priv,
+				     struct dpaa_fq *fq,
+				     struct fman_port *port,
+				     const struct qman_fq *template)
+{
+	fq->fq_base = *template;
+	fq->net_dev = priv->net_dev;
+
+	if (port) {
+		fq->flags = QMAN_FQ_FLAG_TO_DCPORTAL;
+		fq->channel = (u16)fman_port_get_qman_channel_id(port);
+	} else {
+		fq->flags = QMAN_FQ_FLAG_NO_MODIFY;
+	}
+}
+
+static void dpaa_fq_setup(struct dpaa_priv *priv,
+			  const struct dpaa_fq_cbs *fq_cbs,
+			  struct fman_port *tx_port)
+{
+	int egress_cnt = 0, conf_cnt = 0, num_portals = 0, cpu;
+	const cpumask_t *affine_cpus = qman_affine_cpus();
+	u16 portals[NR_CPUS];
+	struct dpaa_fq *fq;
+
+	for_each_cpu(cpu, affine_cpus)
+		portals[num_portals++] = qman_affine_channel(cpu);
+	if (num_portals == 0)
+		dev_err(priv->net_dev->dev.parent,
+			"No Qman software (affine) channels found");
+
+	/* Initialize each FQ in the list */
+	list_for_each_entry(fq, &priv->dpaa_fq_list, list) {
+		switch (fq->fq_type) {
+		case FQ_TYPE_RX_DEFAULT:
+			dpaa_setup_ingress(priv, fq, &fq_cbs->rx_defq);
+			break;
+		case FQ_TYPE_RX_ERROR:
+			dpaa_setup_ingress(priv, fq, &fq_cbs->rx_errq);
+			break;
+		case FQ_TYPE_TX:
+			dpaa_setup_egress(priv, fq, tx_port,
+					  &fq_cbs->egress_ern);
+			/* If we have more Tx queues than the number of cores,
+			 * just ignore the extra ones.
+			 */
+			if (egress_cnt < DPAA_ETH_TXQ_NUM)
+				priv->egress_fqs[egress_cnt++] = &fq->fq_base;
+			break;
+		case FQ_TYPE_TX_CONF_MQ:
+			priv->conf_fqs[conf_cnt++] = &fq->fq_base;
+			/* fall through */
+		case FQ_TYPE_TX_CONFIRM:
+			dpaa_setup_ingress(priv, fq, &fq_cbs->tx_defq);
+			break;
+		case FQ_TYPE_TX_ERROR:
+			dpaa_setup_ingress(priv, fq, &fq_cbs->tx_errq);
+			break;
+		default:
+			dev_warn(priv->net_dev->dev.parent,
+				 "Unknown FQ type detected!\n");
+			break;
+		}
+	}
+
+	 /* Make sure all CPUs receive a corresponding Tx queue. */
+	while (egress_cnt < DPAA_ETH_TXQ_NUM) {
+		list_for_each_entry(fq, &priv->dpaa_fq_list, list) {
+			if (fq->fq_type != FQ_TYPE_TX)
+				continue;
+			priv->egress_fqs[egress_cnt++] = &fq->fq_base;
+			if (egress_cnt == DPAA_ETH_TXQ_NUM)
+				break;
+		}
+	}
+}
+
+static inline int dpaa_tx_fq_to_id(const struct dpaa_priv *priv,
+				   struct qman_fq *tx_fq)
+{
+	int i;
+
+	for (i = 0; i < DPAA_ETH_TXQ_NUM; i++)
+		if (priv->egress_fqs[i] == tx_fq)
+			return i;
+
+	return -EINVAL;
+}
+
+static int dpaa_fq_init(struct dpaa_fq *dpaa_fq, bool td_enable)
+{
+	const struct dpaa_priv	*priv;
+	struct qman_fq *confq = NULL;
+	struct qm_mcc_initfq initfq;
+	struct device *dev;
+	struct qman_fq *fq;
+	int queue_id;
+	int err;
+
+	priv = netdev_priv(dpaa_fq->net_dev);
+	dev = dpaa_fq->net_dev->dev.parent;
+
+	if (dpaa_fq->fqid == 0)
+		dpaa_fq->flags |= QMAN_FQ_FLAG_DYNAMIC_FQID;
+
+	dpaa_fq->init = !(dpaa_fq->flags & QMAN_FQ_FLAG_NO_MODIFY);
+
+	err = qman_create_fq(dpaa_fq->fqid, dpaa_fq->flags, &dpaa_fq->fq_base);
+	if (err) {
+		dev_err(dev, "qman_create_fq() failed\n");
+		return err;
+	}
+	fq = &dpaa_fq->fq_base;
+
+	if (dpaa_fq->init) {
+		memset(&initfq, 0, sizeof(initfq));
+
+		initfq.we_mask = cpu_to_be16(QM_INITFQ_WE_FQCTRL);
+		/* Note: we may get to keep an empty FQ in cache */
+		initfq.fqd.fq_ctrl = cpu_to_be16(QM_FQCTRL_PREFERINCACHE);
+
+		/* Try to reduce the number of portal interrupts for
+		 * Tx Confirmation FQs.
+		 */
+		if (dpaa_fq->fq_type == FQ_TYPE_TX_CONFIRM)
+			initfq.fqd.fq_ctrl |= cpu_to_be16(QM_FQCTRL_AVOIDBLOCK);
+
+		/* FQ placement */
+		initfq.we_mask |= cpu_to_be16(QM_INITFQ_WE_DESTWQ);
+
+		qm_fqd_set_destwq(&initfq.fqd, dpaa_fq->channel, dpaa_fq->wq);
+
+		/* Put all egress queues in a congestion group of their own.
+		 * Sensu stricto, the Tx confirmation queues are Rx FQs,
+		 * rather than Tx - but they nonetheless account for the
+		 * memory footprint on behalf of egress traffic. We therefore
+		 * place them in the netdev's CGR, along with the Tx FQs.
+		 */
+		if (dpaa_fq->fq_type == FQ_TYPE_TX ||
+		    dpaa_fq->fq_type == FQ_TYPE_TX_CONFIRM ||
+		    dpaa_fq->fq_type == FQ_TYPE_TX_CONF_MQ) {
+			initfq.we_mask |= cpu_to_be16(QM_INITFQ_WE_CGID);
+			initfq.fqd.fq_ctrl |= cpu_to_be16(QM_FQCTRL_CGE);
+			initfq.fqd.cgid = (u8)priv->cgr_data.cgr.cgrid;
+			/* Set a fixed overhead accounting, in an attempt to
+			 * reduce the impact of fixed-size skb shells and the
+			 * driver's needed headroom on system memory. This is
+			 * especially the case when the egress traffic is
+			 * composed of small datagrams.
+			 * Unfortunately, QMan's OAL value is capped to an
+			 * insufficient value, but even that is better than
+			 * no overhead accounting at all.
+			 */
+			initfq.we_mask |= cpu_to_be16(QM_INITFQ_WE_OAC);
+			qm_fqd_set_oac(&initfq.fqd, QM_OAC_CG);
+			qm_fqd_set_oal(&initfq.fqd,
+				       min(sizeof(struct sk_buff) +
+				       priv->tx_headroom,
+				       (size_t)FSL_QMAN_MAX_OAL));
+		}
+
+		if (td_enable) {
+			initfq.we_mask |= cpu_to_be16(QM_INITFQ_WE_TDTHRESH);
+			qm_fqd_set_taildrop(&initfq.fqd, DPAA_FQ_TD, 1);
+			initfq.fqd.fq_ctrl = cpu_to_be16(QM_FQCTRL_TDE);
+		}
+
+		if (dpaa_fq->fq_type == FQ_TYPE_TX) {
+			queue_id = dpaa_tx_fq_to_id(priv, &dpaa_fq->fq_base);
+			if (queue_id >= 0)
+				confq = priv->conf_fqs[queue_id];
+			if (confq) {
+				initfq.we_mask |=
+					cpu_to_be16(QM_INITFQ_WE_CONTEXTA);
+			/* ContextA: OVOM=1(use contextA2 bits instead of ICAD)
+			 *	     A2V=1 (contextA A2 field is valid)
+			 *	     A0V=1 (contextA A0 field is valid)
+			 *	     B0V=1 (contextB field is valid)
+			 * ContextA A2: EBD=1 (deallocate buffers inside FMan)
+			 * ContextB B0(ASPID): 0 (absolute Virtual Storage ID)
+			 */
+				qm_fqd_context_a_set64(&initfq.fqd,
+						       0x1e00000080000000ULL);
+			}
+		}
+
+		/* Put all the ingress queues in our "ingress CGR". */
+		if (priv->use_ingress_cgr &&
+		    (dpaa_fq->fq_type == FQ_TYPE_RX_DEFAULT ||
+		     dpaa_fq->fq_type == FQ_TYPE_RX_ERROR)) {
+			initfq.we_mask |= cpu_to_be16(QM_INITFQ_WE_CGID);
+			initfq.fqd.fq_ctrl |= cpu_to_be16(QM_FQCTRL_CGE);
+			initfq.fqd.cgid = (u8)priv->ingress_cgr.cgrid;
+			/* Set a fixed overhead accounting, just like for the
+			 * egress CGR.
+			 */
+			initfq.we_mask |= cpu_to_be16(QM_INITFQ_WE_OAC);
+			qm_fqd_set_oac(&initfq.fqd, QM_OAC_CG);
+			qm_fqd_set_oal(&initfq.fqd,
+				       min(sizeof(struct sk_buff) +
+				       priv->tx_headroom,
+				       (size_t)FSL_QMAN_MAX_OAL));
+		}
+
+		/* Initialization common to all ingress queues */
+		if (dpaa_fq->flags & QMAN_FQ_FLAG_NO_ENQUEUE) {
+			initfq.we_mask |= cpu_to_be16(QM_INITFQ_WE_CONTEXTA);
+			initfq.fqd.fq_ctrl |= cpu_to_be16(QM_FQCTRL_HOLDACTIVE |
+						QM_FQCTRL_CTXASTASHING);
+			initfq.fqd.context_a.stashing.exclusive =
+				QM_STASHING_EXCL_DATA | QM_STASHING_EXCL_CTX |
+				QM_STASHING_EXCL_ANNOTATION;
+			qm_fqd_set_stashing(&initfq.fqd, 1, 2,
+					    DIV_ROUND_UP(sizeof(struct qman_fq),
+							 64));
+		}
+
+		err = qman_init_fq(fq, QMAN_INITFQ_FLAG_SCHED, &initfq);
+		if (err < 0) {
+			dev_err(dev, "qman_init_fq(%u) = %d\n",
+				qman_fq_fqid(fq), err);
+			qman_destroy_fq(fq);
+			return err;
+		}
+	}
+
+	dpaa_fq->fqid = qman_fq_fqid(fq);
+
+	return 0;
+}
+
+static int dpaa_fq_free_entry(struct device *dev, struct qman_fq *fq)
+{
+	const struct dpaa_priv  *priv;
+	struct dpaa_fq *dpaa_fq;
+	int err, error;
+
+	err = 0;
+
+	dpaa_fq = container_of(fq, struct dpaa_fq, fq_base);
+	priv = netdev_priv(dpaa_fq->net_dev);
+
+	if (dpaa_fq->init) {
+		err = qman_retire_fq(fq, NULL);
+		if (err < 0 && netif_msg_drv(priv))
+			dev_err(dev, "qman_retire_fq(%u) = %d\n",
+				qman_fq_fqid(fq), err);
+
+		error = qman_oos_fq(fq);
+		if (error < 0 && netif_msg_drv(priv)) {
+			dev_err(dev, "qman_oos_fq(%u) = %d\n",
+				qman_fq_fqid(fq), error);
+			if (err >= 0)
+				err = error;
+		}
+	}
+
+	qman_destroy_fq(fq);
+	list_del(&dpaa_fq->list);
+
+	return err;
+}
+
+static int dpaa_fq_free(struct device *dev, struct list_head *list)
+{
+	struct dpaa_fq *dpaa_fq, *tmp;
+	int err, error;
+
+	err = 0;
+	list_for_each_entry_safe(dpaa_fq, tmp, list, list) {
+		error = dpaa_fq_free_entry(dev, (struct qman_fq *)dpaa_fq);
+		if (error < 0 && err >= 0)
+			err = error;
+	}
+
+	return err;
+}
+
+static int dpaa_eth_init_tx_port(struct fman_port *port, struct dpaa_fq *errq,
+				 struct dpaa_fq *defq,
+				 struct dpaa_buffer_layout *buf_layout)
+{
+	struct fman_buffer_prefix_content buf_prefix_content;
+	struct fman_port_params params;
+	int err;
+
+	memset(&params, 0, sizeof(params));
+	memset(&buf_prefix_content, 0, sizeof(buf_prefix_content));
+
+	buf_prefix_content.priv_data_size = buf_layout->priv_data_size;
+	buf_prefix_content.pass_prs_result = true;
+	buf_prefix_content.pass_hash_result = true;
+	buf_prefix_content.pass_time_stamp = false;
+	buf_prefix_content.data_align = DPAA_FD_DATA_ALIGNMENT;
+
+	params.specific_params.non_rx_params.err_fqid = errq->fqid;
+	params.specific_params.non_rx_params.dflt_fqid = defq->fqid;
+
+	err = fman_port_config(port, &params);
+	if (err) {
+		pr_err("%s: fman_port_config failed\n", __func__);
+		return err;
+	}
+
+	err = fman_port_cfg_buf_prefix_content(port, &buf_prefix_content);
+	if (err) {
+		pr_err("%s: fman_port_cfg_buf_prefix_content failed\n",
+		       __func__);
+		return err;
+	}
+
+	err = fman_port_init(port);
+	if (err)
+		pr_err("%s: fm_port_init failed\n", __func__);
+
+	return err;
+}
+
+static int dpaa_eth_init_rx_port(struct fman_port *port, struct dpaa_bp **bps,
+				 size_t count, struct dpaa_fq *errq,
+				 struct dpaa_fq *defq,
+				 struct dpaa_buffer_layout *buf_layout)
+{
+	struct fman_buffer_prefix_content buf_prefix_content;
+	struct fman_port_rx_params *rx_p;
+	struct fman_port_params params;
+	int i, err;
+
+	memset(&params, 0, sizeof(params));
+	memset(&buf_prefix_content, 0, sizeof(buf_prefix_content));
+
+	buf_prefix_content.priv_data_size = buf_layout->priv_data_size;
+	buf_prefix_content.pass_prs_result = true;
+	buf_prefix_content.pass_hash_result = true;
+	buf_prefix_content.pass_time_stamp = false;
+	buf_prefix_content.data_align = DPAA_FD_DATA_ALIGNMENT;
+
+	rx_p = &params.specific_params.rx_params;
+	rx_p->err_fqid = errq->fqid;
+	rx_p->dflt_fqid = defq->fqid;
+
+	count = min(ARRAY_SIZE(rx_p->ext_buf_pools.ext_buf_pool), count);
+	rx_p->ext_buf_pools.num_of_pools_used = (u8)count;
+	for (i = 0; i < count; i++) {
+		rx_p->ext_buf_pools.ext_buf_pool[i].id =  bps[i]->bpid;
+		rx_p->ext_buf_pools.ext_buf_pool[i].size = (u16)bps[i]->size;
+	}
+
+	err = fman_port_config(port, &params);
+	if (err) {
+		pr_err("%s: fman_port_config failed\n", __func__);
+		return err;
+	}
+
+	err = fman_port_cfg_buf_prefix_content(port, &buf_prefix_content);
+	if (err) {
+		pr_err("%s: fman_port_cfg_buf_prefix_content failed\n",
+		       __func__);
+		return err;
+	}
+
+	err = fman_port_init(port);
+	if (err)
+		pr_err("%s: fm_port_init failed\n", __func__);
+
+	return err;
+}
+
+static int dpaa_eth_init_ports(struct mac_device *mac_dev,
+			       struct dpaa_bp **bps, size_t count,
+			       struct fm_port_fqs *port_fqs,
+			       struct dpaa_buffer_layout *buf_layout,
+			       struct device *dev)
+{
+	struct fman_port *rxport = mac_dev->port[RX];
+	struct fman_port *txport = mac_dev->port[TX];
+	int err;
+
+	err = dpaa_eth_init_tx_port(txport, port_fqs->tx_errq,
+				    port_fqs->tx_defq, &buf_layout[TX]);
+	if (err)
+		return err;
+
+	err = dpaa_eth_init_rx_port(rxport, bps, count, port_fqs->rx_errq,
+				    port_fqs->rx_defq, &buf_layout[RX]);
+
+	return err;
+}
+
+static int dpaa_bman_release(const struct dpaa_bp *dpaa_bp,
+			     struct bm_buffer *bmb, int cnt)
+{
+	int err;
+
+	err = bman_release(dpaa_bp->pool, bmb, cnt);
+	/* Should never occur, address anyway to avoid leaking the buffers */
+	if (unlikely(WARN_ON(err)) && dpaa_bp->free_buf_cb)
+		while (cnt-- > 0)
+			dpaa_bp->free_buf_cb(dpaa_bp, &bmb[cnt]);
+
+	return cnt;
+}
+
+static void dpaa_release_sgt_members(struct qm_sg_entry *sgt)
+{
+	struct bm_buffer bmb[DPAA_BUFF_RELEASE_MAX];
+	struct dpaa_bp *dpaa_bp;
+	int i = 0, j;
+
+	memset(bmb, 0, sizeof(bmb));
+
+	do {
+		dpaa_bp = dpaa_bpid2pool(sgt[i].bpid);
+		if (!dpaa_bp)
+			return;
+
+		j = 0;
+		do {
+			WARN_ON(qm_sg_entry_is_ext(&sgt[i]));
+
+			bm_buffer_set64(&bmb[j], qm_sg_entry_get64(&sgt[i]));
+
+			j++; i++;
+		} while (j < ARRAY_SIZE(bmb) &&
+				!qm_sg_entry_is_final(&sgt[i - 1]) &&
+				sgt[i - 1].bpid == sgt[i].bpid);
+
+		dpaa_bman_release(dpaa_bp, bmb, j);
+	} while (!qm_sg_entry_is_final(&sgt[i - 1]));
+}
+
+static void dpaa_fd_release(const struct net_device *net_dev,
+			    const struct qm_fd *fd)
+{
+	struct qm_sg_entry *sgt;
+	struct dpaa_bp *dpaa_bp;
+	struct bm_buffer bmb;
+	dma_addr_t addr;
+	void *vaddr;
+
+	bmb.data = 0;
+	bm_buffer_set64(&bmb, qm_fd_addr(fd));
+
+	dpaa_bp = dpaa_bpid2pool(fd->bpid);
+	if (!dpaa_bp)
+		return;
+
+	if (qm_fd_get_format(fd) == qm_fd_sg) {
+		vaddr = phys_to_virt(qm_fd_addr(fd));
+		sgt = vaddr + qm_fd_get_offset(fd);
+
+		dma_unmap_single(dpaa_bp->dev, qm_fd_addr(fd), dpaa_bp->size,
+				 DMA_FROM_DEVICE);
+
+		dpaa_release_sgt_members(sgt);
+
+		addr = dma_map_single(dpaa_bp->dev, vaddr, dpaa_bp->size,
+				      DMA_FROM_DEVICE);
+		if (dma_mapping_error(dpaa_bp->dev, addr)) {
+			dev_err(dpaa_bp->dev, "DMA mapping failed");
+			return;
+		}
+		bm_buffer_set64(&bmb, addr);
+	}
+
+	dpaa_bman_release(dpaa_bp, &bmb, 1);
+}
+
+static void count_ern(struct dpaa_percpu_priv *percpu_priv,
+		      const union qm_mr_entry *msg)
+{
+	switch (msg->ern.rc & QM_MR_RC_MASK) {
+	case QM_MR_RC_CGR_TAILDROP:
+		percpu_priv->ern_cnt.cg_tdrop++;
+		break;
+	case QM_MR_RC_WRED:
+		percpu_priv->ern_cnt.wred++;
+		break;
+	case QM_MR_RC_ERROR:
+		percpu_priv->ern_cnt.err_cond++;
+		break;
+	case QM_MR_RC_ORPWINDOW_EARLY:
+		percpu_priv->ern_cnt.early_window++;
+		break;
+	case QM_MR_RC_ORPWINDOW_LATE:
+		percpu_priv->ern_cnt.late_window++;
+		break;
+	case QM_MR_RC_FQ_TAILDROP:
+		percpu_priv->ern_cnt.fq_tdrop++;
+		break;
+	case QM_MR_RC_ORPWINDOW_RETIRED:
+		percpu_priv->ern_cnt.fq_retired++;
+		break;
+	case QM_MR_RC_ORP_ZERO:
+		percpu_priv->ern_cnt.orp_zero++;
+		break;
+	}
+}
+
+/* Turn on HW checksum computation for this outgoing frame.
+ * If the current protocol is not something we support in this regard
+ * (or if the stack has already computed the SW checksum), we do nothing.
+ *
+ * Returns 0 if all goes well (or HW csum doesn't apply), and a negative value
+ * otherwise.
+ *
+ * Note that this function may modify the fd->cmd field and the skb data buffer
+ * (the Parse Results area).
+ */
+static int dpaa_enable_tx_csum(struct dpaa_priv *priv,
+			       struct sk_buff *skb,
+			       struct qm_fd *fd,
+			       char *parse_results)
+{
+	struct fman_prs_result *parse_result;
+	u16 ethertype = ntohs(skb->protocol);
+	struct ipv6hdr *ipv6h = NULL;
+	struct iphdr *iph;
+	int retval = 0;
+	u8 l4_proto;
+
+	if (skb->ip_summed != CHECKSUM_PARTIAL)
+		return 0;
+
+	/* Note: L3 csum seems to be already computed in sw, but we can't choose
+	 * L4 alone from the FM configuration anyway.
+	 */
+
+	/* Fill in some fields of the Parse Results array, so the FMan
+	 * can find them as if they came from the FMan Parser.
+	 */
+	parse_result = (struct fman_prs_result *)parse_results;
+
+	/* If we're dealing with VLAN, get the real Ethernet type */
+	if (ethertype == ETH_P_8021Q) {
+		/* We can't always assume the MAC header is set correctly
+		 * by the stack, so reset to beginning of skb->data
+		 */
+		skb_reset_mac_header(skb);
+		ethertype = ntohs(vlan_eth_hdr(skb)->h_vlan_encapsulated_proto);
+	}
+
+	/* Fill in the relevant L3 parse result fields
+	 * and read the L4 protocol type
+	 */
+	switch (ethertype) {
+	case ETH_P_IP:
+		parse_result->l3r = cpu_to_be16(FM_L3_PARSE_RESULT_IPV4);
+		iph = ip_hdr(skb);
+		WARN_ON(!iph);
+		l4_proto = iph->protocol;
+		break;
+	case ETH_P_IPV6:
+		parse_result->l3r = cpu_to_be16(FM_L3_PARSE_RESULT_IPV6);
+		ipv6h = ipv6_hdr(skb);
+		WARN_ON(!ipv6h);
+		l4_proto = ipv6h->nexthdr;
+		break;
+	default:
+		/* We shouldn't even be here */
+		if (net_ratelimit())
+			netif_alert(priv, tx_err, priv->net_dev,
+				    "Can't compute HW csum for L3 proto 0x%x\n",
+				    ntohs(skb->protocol));
+		retval = -EIO;
+		goto return_error;
+	}
+
+	/* Fill in the relevant L4 parse result fields */
+	switch (l4_proto) {
+	case IPPROTO_UDP:
+		parse_result->l4r = FM_L4_PARSE_RESULT_UDP;
+		break;
+	case IPPROTO_TCP:
+		parse_result->l4r = FM_L4_PARSE_RESULT_TCP;
+		break;
+	default:
+		if (net_ratelimit())
+			netif_alert(priv, tx_err, priv->net_dev,
+				    "Can't compute HW csum for L4 proto 0x%x\n",
+				    l4_proto);
+		retval = -EIO;
+		goto return_error;
+	}
+
+	/* At index 0 is IPOffset_1 as defined in the Parse Results */
+	parse_result->ip_off[0] = (u8)skb_network_offset(skb);
+	parse_result->l4_off = (u8)skb_transport_offset(skb);
+
+	/* Enable L3 (and L4, if TCP or UDP) HW checksum. */
+	fd->cmd |= cpu_to_be32(FM_FD_CMD_RPD | FM_FD_CMD_DTC);
+
+	/* On P1023 and similar platforms fd->cmd interpretation could
+	 * be disabled by setting CONTEXT_A bit ICMD; currently this bit
+	 * is not set so we do not need to check; in the future, if/when
+	 * using context_a we need to check this bit
+	 */
+
+return_error:
+	return retval;
+}
+
+static int dpaa_bp_add_8_bufs(const struct dpaa_bp *dpaa_bp)
+{
+	struct device *dev = dpaa_bp->dev;
+	struct bm_buffer bmb[8];
+	dma_addr_t addr;
+	void *new_buf;
+	u8 i;
+
+	for (i = 0; i < 8; i++) {
+		new_buf = netdev_alloc_frag(dpaa_bp->raw_size);
+		if (unlikely(!new_buf)) {
+			dev_err(dev, "netdev_alloc_frag() failed, size %zu\n",
+				dpaa_bp->raw_size);
+			goto release_previous_buffs;
+		}
+		new_buf = PTR_ALIGN(new_buf, SMP_CACHE_BYTES);
+
+		addr = dma_map_single(dev, new_buf,
+				      dpaa_bp->size, DMA_FROM_DEVICE);
+		if (unlikely(dma_mapping_error(dev, addr))) {
+			dev_err(dpaa_bp->dev, "DMA map failed");
+			goto release_previous_buffs;
+		}
+
+		bmb[i].data = 0;
+		bm_buffer_set64(&bmb[i], addr);
+	}
+
+release_bufs:
+	return dpaa_bman_release(dpaa_bp, bmb, i);
+
+release_previous_buffs:
+	WARN_ONCE(1, "dpaa_eth: failed to add buffers on Rx\n");
+
+	bm_buffer_set64(&bmb[i], 0);
+	/* Avoid releasing a completely null buffer; bman_release() requires
+	 * at least one buffer.
+	 */
+	if (likely(i))
+		goto release_bufs;
+
+	return 0;
+}
+
+static int dpaa_bp_seed(struct dpaa_bp *dpaa_bp)
+{
+	int i;
+
+	/* Give each CPU an allotment of "config_count" buffers */
+	for_each_possible_cpu(i) {
+		int *count_ptr = per_cpu_ptr(dpaa_bp->percpu_count, i);
+		int j;
+
+		/* Although we access another CPU's counters here
+		 * we do it at boot time so it is safe
+		 */
+		for (j = 0; j < dpaa_bp->config_count; j += 8)
+			*count_ptr += dpaa_bp_add_8_bufs(dpaa_bp);
+	}
+	return 0;
+}
+
+/* Add buffers/(pages) for Rx processing whenever bpool count falls below
+ * REFILL_THRESHOLD.
+ */
+static int dpaa_eth_refill_bpool(struct dpaa_bp *dpaa_bp, int *countptr)
+{
+	int count = *countptr;
+	int new_bufs;
+
+	if (unlikely(count < FSL_DPAA_ETH_REFILL_THRESHOLD)) {
+		do {
+			new_bufs = dpaa_bp_add_8_bufs(dpaa_bp);
+			if (unlikely(!new_bufs)) {
+				/* Avoid looping forever if we've temporarily
+				 * run out of memory. We'll try again at the
+				 * next NAPI cycle.
+				 */
+				break;
+			}
+			count += new_bufs;
+		} while (count < FSL_DPAA_ETH_MAX_BUF_COUNT);
+
+		*countptr = count;
+		if (unlikely(count < FSL_DPAA_ETH_MAX_BUF_COUNT))
+			return -ENOMEM;
+	}
+
+	return 0;
+}
+
+static int dpaa_eth_refill_bpools(struct dpaa_priv *priv)
+{
+	struct dpaa_bp *dpaa_bp;
+	int *countptr;
+	int res, i;
+
+	for (i = 0; i < DPAA_BPS_NUM; i++) {
+		dpaa_bp = priv->dpaa_bps[i];
+		if (!dpaa_bp)
+			return -EINVAL;
+		countptr = this_cpu_ptr(dpaa_bp->percpu_count);
+		res  = dpaa_eth_refill_bpool(dpaa_bp, countptr);
+		if (res)
+			return res;
+	}
+	return 0;
+}
+
+/* Cleanup function for outgoing frame descriptors that were built on Tx path,
+ * either contiguous frames or scatter/gather ones.
+ * Skb freeing is not handled here.
+ *
+ * This function may be called on error paths in the Tx function, so guard
+ * against cases when not all fd relevant fields were filled in.
+ *
+ * Return the skb backpointer, since for S/G frames the buffer containing it
+ * gets freed here.
+ */
+static struct sk_buff *dpaa_cleanup_tx_fd(const struct dpaa_priv *priv,
+					  const struct qm_fd *fd)
+{
+	const enum dma_data_direction dma_dir = DMA_TO_DEVICE;
+	struct device *dev = priv->net_dev->dev.parent;
+	dma_addr_t addr = qm_fd_addr(fd);
+	const struct qm_sg_entry *sgt;
+	struct sk_buff **skbh, *skb;
+	int nr_frags, i;
+
+	skbh = (struct sk_buff **)phys_to_virt(addr);
+	skb = *skbh;
+
+	if (unlikely(qm_fd_get_format(fd) == qm_fd_sg)) {
+		nr_frags = skb_shinfo(skb)->nr_frags;
+		dma_unmap_single(dev, addr, qm_fd_get_offset(fd) +
+				 sizeof(struct qm_sg_entry) * (1 + nr_frags),
+				 dma_dir);
+
+		/* The sgt buffer has been allocated with netdev_alloc_frag(),
+		 * it's from lowmem.
+		 */
+		sgt = phys_to_virt(addr + qm_fd_get_offset(fd));
+
+		/* sgt[0] is from lowmem, was dma_map_single()-ed */
+		dma_unmap_single(dev, qm_sg_addr(&sgt[0]),
+				 qm_sg_entry_get_len(&sgt[0]), dma_dir);
+
+		/* remaining pages were mapped with skb_frag_dma_map() */
+		for (i = 1; i < nr_frags; i++) {
+			WARN_ON(qm_sg_entry_is_ext(&sgt[i]));
+
+			dma_unmap_page(dev, qm_sg_addr(&sgt[i]),
+				       qm_sg_entry_get_len(&sgt[i]), dma_dir);
+		}
+
+		/* Free the page frag that we allocated on Tx */
+		skb_free_frag(phys_to_virt(addr));
+	} else {
+		dma_unmap_single(dev, addr,
+				 skb_tail_pointer(skb) - (u8 *)skbh, dma_dir);
+	}
+
+	return skb;
+}
+
+static u8 rx_csum_offload(const struct dpaa_priv *priv, const struct qm_fd *fd)
+{
+	/* The parser has run and performed L4 checksum validation.
+	 * We know there were no parser errors (and implicitly no
+	 * L4 csum error), otherwise we wouldn't be here.
+	 */
+	if ((priv->net_dev->features & NETIF_F_RXCSUM) &&
+	    (be32_to_cpu(fd->status) & FM_FD_STAT_L4CV))
+		return CHECKSUM_UNNECESSARY;
+
+	/* We're here because either the parser didn't run or the L4 checksum
+	 * was not verified. This may include the case of a UDP frame with
+	 * checksum zero or an L4 proto other than TCP/UDP
+	 */
+	return CHECKSUM_NONE;
+}
+
+/* Build a linear skb around the received buffer.
+ * We are guaranteed there is enough room at the end of the data buffer to
+ * accommodate the shared info area of the skb.
+ */
+static struct sk_buff *contig_fd_to_skb(const struct dpaa_priv *priv,
+					const struct qm_fd *fd)
+{
+	ssize_t fd_off = qm_fd_get_offset(fd);
+	dma_addr_t addr = qm_fd_addr(fd);
+	struct dpaa_bp *dpaa_bp;
+	struct sk_buff *skb;
+	void *vaddr;
+
+	vaddr = phys_to_virt(addr);
+	WARN_ON(!IS_ALIGNED((unsigned long)vaddr, SMP_CACHE_BYTES));
+
+	dpaa_bp = dpaa_bpid2pool(fd->bpid);
+	if (!dpaa_bp)
+		goto free_buffer;
+
+	skb = build_skb(vaddr, dpaa_bp->size +
+			SKB_DATA_ALIGN(sizeof(struct skb_shared_info)));
+	if (unlikely(!skb)) {
+		WARN_ONCE(1, "Build skb failure on Rx\n");
+		goto free_buffer;
+	}
+	WARN_ON(fd_off != priv->rx_headroom);
+	skb_reserve(skb, fd_off);
+	skb_put(skb, qm_fd_get_length(fd));
+
+	skb->ip_summed = rx_csum_offload(priv, fd);
+
+	return skb;
+
+free_buffer:
+	skb_free_frag(vaddr);
+	return NULL;
+}
+
+/* Build an skb with the data of the first S/G entry in the linear portion and
+ * the rest of the frame as skb fragments.
+ *
+ * The page fragment holding the S/G Table is recycled here.
+ */
+static struct sk_buff *sg_fd_to_skb(const struct dpaa_priv *priv,
+				    const struct qm_fd *fd)
+{
+	ssize_t fd_off = qm_fd_get_offset(fd);
+	dma_addr_t addr = qm_fd_addr(fd);
+	const struct qm_sg_entry *sgt;
+	struct page *page, *head_page;
+	struct dpaa_bp *dpaa_bp;
+	void *vaddr, *sg_vaddr;
+	int frag_off, frag_len;
+	struct sk_buff *skb;
+	dma_addr_t sg_addr;
+	int page_offset;
+	unsigned int sz;
+	int *count_ptr;
+	int i;
+
+	vaddr = phys_to_virt(addr);
+	WARN_ON(!IS_ALIGNED((unsigned long)vaddr, SMP_CACHE_BYTES));
+
+	/* Iterate through the SGT entries and add data buffers to the skb */
+	sgt = vaddr + fd_off;
+	for (i = 0; i < DPAA_SGT_MAX_ENTRIES; i++) {
+		/* Extension bit is not supported */
+		WARN_ON(qm_sg_entry_is_ext(&sgt[i]));
+
+		sg_addr = qm_sg_addr(&sgt[i]);
+		sg_vaddr = phys_to_virt(sg_addr);
+		WARN_ON(!IS_ALIGNED((unsigned long)sg_vaddr,
+				    SMP_CACHE_BYTES));
+
+		/* We may use multiple Rx pools */
+		dpaa_bp = dpaa_bpid2pool(sgt[i].bpid);
+		if (!dpaa_bp)
+			goto free_buffers;
+
+		count_ptr = this_cpu_ptr(dpaa_bp->percpu_count);
+		dma_unmap_single(dpaa_bp->dev, sg_addr, dpaa_bp->size,
+				 DMA_FROM_DEVICE);
+		if (i == 0) {
+			sz = dpaa_bp->size +
+				SKB_DATA_ALIGN(sizeof(struct skb_shared_info));
+			skb = build_skb(sg_vaddr, sz);
+			if (WARN_ON(unlikely(!skb)))
+				goto free_buffers;
+
+			skb->ip_summed = rx_csum_offload(priv, fd);
+
+			/* Make sure forwarded skbs will have enough space
+			 * on Tx, if extra headers are added.
+			 */
+			WARN_ON(fd_off != priv->rx_headroom);
+			skb_reserve(skb, fd_off);
+			skb_put(skb, qm_sg_entry_get_len(&sgt[i]));
+		} else {
+			/* Not the first S/G entry; all data from buffer will
+			 * be added in an skb fragment; fragment index is offset
+			 * by one since first S/G entry was incorporated in the
+			 * linear part of the skb.
+			 *
+			 * Caution: 'page' may be a tail page.
+			 */
+			page = virt_to_page(sg_vaddr);
+			head_page = virt_to_head_page(sg_vaddr);
+
+			/* Compute offset in (possibly tail) page */
+			page_offset = ((unsigned long)sg_vaddr &
+					(PAGE_SIZE - 1)) +
+				(page_address(page) - page_address(head_page));
+			/* page_offset only refers to the beginning of sgt[i];
+			 * but the buffer itself may have an internal offset.
+			 */
+			frag_off = qm_sg_entry_get_off(&sgt[i]) + page_offset;
+			frag_len = qm_sg_entry_get_len(&sgt[i]);
+			/* skb_add_rx_frag() does no checking on the page; if
+			 * we pass it a tail page, we'll end up with
+			 * bad page accounting and eventually with segafults.
+			 */
+			skb_add_rx_frag(skb, i - 1, head_page, frag_off,
+					frag_len, dpaa_bp->size);
+		}
+		/* Update the pool count for the current {cpu x bpool} */
+		(*count_ptr)--;
+
+		if (qm_sg_entry_is_final(&sgt[i]))
+			break;
+	}
+	WARN_ONCE(i == DPAA_SGT_MAX_ENTRIES, "No final bit on SGT\n");
+
+	/* free the SG table buffer */
+	skb_free_frag(vaddr);
+
+	return skb;
+
+free_buffers:
+	/* compensate sw bpool counter changes */
+	for (i--; i >= 0; i--) {
+		dpaa_bp = dpaa_bpid2pool(sgt[i].bpid);
+		if (dpaa_bp) {
+			count_ptr = this_cpu_ptr(dpaa_bp->percpu_count);
+			(*count_ptr)++;
+		}
+	}
+	/* free all the SG entries */
+	for (i = 0; i < DPAA_SGT_MAX_ENTRIES ; i++) {
+		sg_addr = qm_sg_addr(&sgt[i]);
+		sg_vaddr = phys_to_virt(sg_addr);
+		skb_free_frag(sg_vaddr);
+		dpaa_bp = dpaa_bpid2pool(sgt[i].bpid);
+		if (dpaa_bp) {
+			count_ptr = this_cpu_ptr(dpaa_bp->percpu_count);
+			(*count_ptr)--;
+		}
+
+		if (qm_sg_entry_is_final(&sgt[i]))
+			break;
+	}
+	/* free the SGT fragment */
+	skb_free_frag(vaddr);
+
+	return NULL;
+}
+
+static int skb_to_contig_fd(struct dpaa_priv *priv,
+			    struct sk_buff *skb, struct qm_fd *fd,
+			    int *offset)
+{
+	struct net_device *net_dev = priv->net_dev;
+	struct device *dev = net_dev->dev.parent;
+	enum dma_data_direction dma_dir;
+	unsigned char *buffer_start;
+	struct sk_buff **skbh;
+	dma_addr_t addr;
+	int err;
+
+	/* We are guaranteed to have at least tx_headroom bytes
+	 * available, so just use that for offset.
+	 */
+	fd->bpid = FSL_DPAA_BPID_INV;
+	buffer_start = skb->data - priv->tx_headroom;
+	dma_dir = DMA_TO_DEVICE;
+
+	skbh = (struct sk_buff **)buffer_start;
+	*skbh = skb;
+
+	/* Enable L3/L4 hardware checksum computation.
+	 *
+	 * We must do this before dma_map_single(DMA_TO_DEVICE), because we may
+	 * need to write into the skb.
+	 */
+	err = dpaa_enable_tx_csum(priv, skb, fd,
+				  ((char *)skbh) + DPAA_TX_PRIV_DATA_SIZE);
+	if (unlikely(err < 0)) {
+		if (net_ratelimit())
+			netif_err(priv, tx_err, net_dev, "HW csum error: %d\n",
+				  err);
+		return err;
+	}
+
+	/* Fill in the rest of the FD fields */
+	qm_fd_set_contig(fd, priv->tx_headroom, skb->len);
+	fd->cmd |= cpu_to_be32(FM_FD_CMD_FCO);
+
+	/* Map the entire buffer size that may be seen by FMan, but no more */
+	addr = dma_map_single(dev, skbh,
+			      skb_tail_pointer(skb) - buffer_start, dma_dir);
+	if (unlikely(dma_mapping_error(dev, addr))) {
+		if (net_ratelimit())
+			netif_err(priv, tx_err, net_dev, "dma_map_single() failed\n");
+		return -EINVAL;
+	}
+	qm_fd_addr_set64(fd, addr);
+
+	return 0;
+}
+
+static int skb_to_sg_fd(struct dpaa_priv *priv,
+			struct sk_buff *skb, struct qm_fd *fd)
+{
+	const enum dma_data_direction dma_dir = DMA_TO_DEVICE;
+	const int nr_frags = skb_shinfo(skb)->nr_frags;
+	struct net_device *net_dev = priv->net_dev;
+	struct device *dev = net_dev->dev.parent;
+	struct qm_sg_entry *sgt;
+	struct sk_buff **skbh;
+	int i, j, err, sz;
+	void *buffer_start;
+	skb_frag_t *frag;
+	dma_addr_t addr;
+	size_t frag_len;
+	void *sgt_buf;
+
+	/* get a page frag to store the SGTable */
+	sz = SKB_DATA_ALIGN(priv->tx_headroom +
+		sizeof(struct qm_sg_entry) * (1 + nr_frags));
+	sgt_buf = netdev_alloc_frag(sz);
+	if (unlikely(!sgt_buf)) {
+		netdev_err(net_dev, "netdev_alloc_frag() failed for size %d\n",
+			   sz);
+		return -ENOMEM;
+	}
+
+	/* Enable L3/L4 hardware checksum computation.
+	 *
+	 * We must do this before dma_map_single(DMA_TO_DEVICE), because we may
+	 * need to write into the skb.
+	 */
+	err = dpaa_enable_tx_csum(priv, skb, fd,
+				  sgt_buf + DPAA_TX_PRIV_DATA_SIZE);
+	if (unlikely(err < 0)) {
+		if (net_ratelimit())
+			netif_err(priv, tx_err, net_dev, "HW csum error: %d\n",
+				  err);
+		goto csum_failed;
+	}
+
+	sgt = (struct qm_sg_entry *)(sgt_buf + priv->tx_headroom);
+	qm_sg_entry_set_len(&sgt[0], skb_headlen(skb));
+	sgt[0].bpid = FSL_DPAA_BPID_INV;
+	sgt[0].offset = 0;
+	addr = dma_map_single(dev, skb->data,
+			      skb_headlen(skb), dma_dir);
+	if (unlikely(dma_mapping_error(dev, addr))) {
+		dev_err(dev, "DMA mapping failed");
+		err = -EINVAL;
+		goto sg0_map_failed;
+	}
+	qm_sg_entry_set64(&sgt[0], addr);
+
+	/* populate the rest of SGT entries */
+	frag = &skb_shinfo(skb)->frags[0];
+	frag_len = frag->size;
+	for (i = 1; i <= nr_frags; i++, frag++) {
+		WARN_ON(!skb_frag_page(frag));
+		addr = skb_frag_dma_map(dev, frag, 0,
+					frag_len, dma_dir);
+		if (unlikely(dma_mapping_error(dev, addr))) {
+			dev_err(dev, "DMA mapping failed");
+			err = -EINVAL;
+			goto sg_map_failed;
+		}
+
+		qm_sg_entry_set_len(&sgt[i], frag_len);
+		sgt[i].bpid = FSL_DPAA_BPID_INV;
+		sgt[i].offset = 0;
+
+		/* keep the offset in the address */
+		qm_sg_entry_set64(&sgt[i], addr);
+		frag_len = frag->size;
+	}
+	qm_sg_entry_set_f(&sgt[i - 1], frag_len);
+
+	qm_fd_set_sg(fd, priv->tx_headroom, skb->len);
+
+	/* DMA map the SGT page */
+	buffer_start = (void *)sgt - priv->tx_headroom;
+	skbh = (struct sk_buff **)buffer_start;
+	*skbh = skb;
+
+	addr = dma_map_single(dev, buffer_start, priv->tx_headroom +
+			      sizeof(struct qm_sg_entry) * (1 + nr_frags),
+			      dma_dir);
+	if (unlikely(dma_mapping_error(dev, addr))) {
+		dev_err(dev, "DMA mapping failed");
+		err = -EINVAL;
+		goto sgt_map_failed;
+	}
+
+	fd->bpid = FSL_DPAA_BPID_INV;
+	fd->cmd |= cpu_to_be32(FM_FD_CMD_FCO);
+	qm_fd_addr_set64(fd, addr);
+
+	return 0;
+
+sgt_map_failed:
+sg_map_failed:
+	for (j = 0; j < i; j++)
+		dma_unmap_page(dev, qm_sg_addr(&sgt[j]),
+			       qm_sg_entry_get_len(&sgt[j]), dma_dir);
+sg0_map_failed:
+csum_failed:
+	skb_free_frag(sgt_buf);
+
+	return err;
+}
+
+static inline int dpaa_xmit(struct dpaa_priv *priv,
+			    struct rtnl_link_stats64 *percpu_stats,
+			    int queue,
+			    struct qm_fd *fd)
+{
+	struct qman_fq *egress_fq;
+	int err, i;
+
+	egress_fq = priv->egress_fqs[queue];
+	if (fd->bpid == FSL_DPAA_BPID_INV)
+		fd->cmd |= cpu_to_be32(qman_fq_fqid(priv->conf_fqs[queue]));
+
+	/* Trace this Tx fd */
+	trace_dpaa_tx_fd(priv->net_dev, egress_fq, fd);
+
+	for (i = 0; i < DPAA_ENQUEUE_RETRIES; i++) {
+		err = qman_enqueue(egress_fq, fd);
+		if (err != -EBUSY)
+			break;
+	}
+
+	if (unlikely(err < 0)) {
+		percpu_stats->tx_errors++;
+		percpu_stats->tx_fifo_errors++;
+		return err;
+	}
+
+	percpu_stats->tx_packets++;
+	percpu_stats->tx_bytes += qm_fd_get_length(fd);
+
+	return 0;
+}
+
+static int dpaa_start_xmit(struct sk_buff *skb, struct net_device *net_dev)
+{
+	const int queue_mapping = skb_get_queue_mapping(skb);
+	bool nonlinear = skb_is_nonlinear(skb);
+	struct rtnl_link_stats64 *percpu_stats;
+	struct dpaa_percpu_priv *percpu_priv;
+	struct dpaa_priv *priv;
+	struct qm_fd fd;
+	int offset = 0;
+	int err = 0;
+
+	priv = netdev_priv(net_dev);
+	percpu_priv = this_cpu_ptr(priv->percpu_priv);
+	percpu_stats = &percpu_priv->stats;
+
+	qm_fd_clear_fd(&fd);
+
+	if (!nonlinear) {
+		/* We're going to store the skb backpointer at the beginning
+		 * of the data buffer, so we need a privately owned skb
+		 *
+		 * We've made sure skb is not shared in dev->priv_flags,
+		 * we need to verify the skb head is not cloned
+		 */
+		if (skb_cow_head(skb, priv->tx_headroom))
+			goto enomem;
+
+		WARN_ON(skb_is_nonlinear(skb));
+	}
+
+	/* MAX_SKB_FRAGS is equal or larger than our dpaa_SGT_MAX_ENTRIES;
+	 * make sure we don't feed FMan with more fragments than it supports.
+	 */
+	if (nonlinear &&
+	    likely(skb_shinfo(skb)->nr_frags < DPAA_SGT_MAX_ENTRIES)) {
+		/* Just create a S/G fd based on the skb */
+		err = skb_to_sg_fd(priv, skb, &fd);
+		percpu_priv->tx_frag_skbuffs++;
+	} else {
+		/* If the egress skb contains more fragments than we support
+		 * we have no choice but to linearize it ourselves.
+		 */
+		if (unlikely(nonlinear) && __skb_linearize(skb))
+			goto enomem;
+
+		/* Finally, create a contig FD from this skb */
+		err = skb_to_contig_fd(priv, skb, &fd, &offset);
+	}
+	if (unlikely(err < 0))
+		goto skb_to_fd_failed;
+
+	if (likely(dpaa_xmit(priv, percpu_stats, queue_mapping, &fd) == 0))
+		return NETDEV_TX_OK;
+
+	dpaa_cleanup_tx_fd(priv, &fd);
+skb_to_fd_failed:
+enomem:
+	percpu_stats->tx_errors++;
+	dev_kfree_skb(skb);
+	return NETDEV_TX_OK;
+}
+
+static void dpaa_rx_error(struct net_device *net_dev,
+			  const struct dpaa_priv *priv,
+			  struct dpaa_percpu_priv *percpu_priv,
+			  const struct qm_fd *fd,
+			  u32 fqid)
+{
+	if (net_ratelimit())
+		netif_err(priv, hw, net_dev, "Err FD status = 0x%08x\n",
+			  be32_to_cpu(fd->status) & FM_FD_STAT_RX_ERRORS);
+
+	percpu_priv->stats.rx_errors++;
+
+	if (be32_to_cpu(fd->status) & FM_FD_ERR_DMA)
+		percpu_priv->rx_errors.dme++;
+	if (be32_to_cpu(fd->status) & FM_FD_ERR_PHYSICAL)
+		percpu_priv->rx_errors.fpe++;
+	if (be32_to_cpu(fd->status) & FM_FD_ERR_SIZE)
+		percpu_priv->rx_errors.fse++;
+	if (be32_to_cpu(fd->status) & FM_FD_ERR_PRS_HDR_ERR)
+		percpu_priv->rx_errors.phe++;
+
+	dpaa_fd_release(net_dev, fd);
+}
+
+static void dpaa_tx_error(struct net_device *net_dev,
+			  const struct dpaa_priv *priv,
+			  struct dpaa_percpu_priv *percpu_priv,
+			  const struct qm_fd *fd,
+			  u32 fqid)
+{
+	struct sk_buff *skb;
+
+	if (net_ratelimit())
+		netif_warn(priv, hw, net_dev, "FD status = 0x%08x\n",
+			   be32_to_cpu(fd->status) & FM_FD_STAT_TX_ERRORS);
+
+	percpu_priv->stats.tx_errors++;
+
+	skb = dpaa_cleanup_tx_fd(priv, fd);
+	dev_kfree_skb(skb);
+}
+
+static int dpaa_eth_poll(struct napi_struct *napi, int budget)
+{
+	struct dpaa_napi_portal *np =
+			container_of(napi, struct dpaa_napi_portal, napi);
+
+	int cleaned = qman_p_poll_dqrr(np->p, budget);
+
+	if (cleaned < budget) {
+		napi_complete_done(napi, cleaned);
+		qman_p_irqsource_add(np->p, QM_PIRQ_DQRI);
+
+	} else if (np->down) {
+		qman_p_irqsource_add(np->p, QM_PIRQ_DQRI);
+	}
+
+	return cleaned;
+}
+
+static void dpaa_tx_conf(struct net_device *net_dev,
+			 const struct dpaa_priv *priv,
+			 struct dpaa_percpu_priv *percpu_priv,
+			 const struct qm_fd *fd,
+			 u32 fqid)
+{
+	struct sk_buff	*skb;
+
+	if (unlikely(be32_to_cpu(fd->status) & FM_FD_STAT_TX_ERRORS)) {
+		if (net_ratelimit())
+			netif_warn(priv, hw, net_dev, "FD status = 0x%08x\n",
+				   be32_to_cpu(fd->status) &
+				   FM_FD_STAT_TX_ERRORS);
+
+		percpu_priv->stats.tx_errors++;
+	}
+
+	percpu_priv->tx_confirm++;
+
+	skb = dpaa_cleanup_tx_fd(priv, fd);
+
+	consume_skb(skb);
+}
+
+static inline int dpaa_eth_napi_schedule(struct dpaa_percpu_priv *percpu_priv,
+					 struct qman_portal *portal)
+{
+	if (unlikely(in_irq() || !in_serving_softirq())) {
+		/* Disable QMan IRQ and invoke NAPI */
+		qman_p_irqsource_remove(portal, QM_PIRQ_DQRI);
+
+		percpu_priv->np.p = portal;
+		napi_schedule(&percpu_priv->np.napi);
+		percpu_priv->in_interrupt++;
+		return 1;
+	}
+	return 0;
+}
+
+static enum qman_cb_dqrr_result rx_error_dqrr(struct qman_portal *portal,
+					      struct qman_fq *fq,
+					      const struct qm_dqrr_entry *dq)
+{
+	struct dpaa_fq *dpaa_fq = container_of(fq, struct dpaa_fq, fq_base);
+	struct dpaa_percpu_priv *percpu_priv;
+	struct net_device *net_dev;
+	struct dpaa_bp *dpaa_bp;
+	struct dpaa_priv *priv;
+
+	net_dev = dpaa_fq->net_dev;
+	priv = netdev_priv(net_dev);
+	dpaa_bp = dpaa_bpid2pool(dq->fd.bpid);
+	if (!dpaa_bp)
+		return qman_cb_dqrr_consume;
+
+	percpu_priv = this_cpu_ptr(priv->percpu_priv);
+
+	if (dpaa_eth_napi_schedule(percpu_priv, portal))
+		return qman_cb_dqrr_stop;
+
+	if (dpaa_eth_refill_bpools(priv))
+		/* Unable to refill the buffer pool due to insufficient
+		 * system memory. Just release the frame back into the pool,
+		 * otherwise we'll soon end up with an empty buffer pool.
+		 */
+		dpaa_fd_release(net_dev, &dq->fd);
+	else
+		dpaa_rx_error(net_dev, priv, percpu_priv, &dq->fd, fq->fqid);
+
+	return qman_cb_dqrr_consume;
+}
+
+static enum qman_cb_dqrr_result rx_default_dqrr(struct qman_portal *portal,
+						struct qman_fq *fq,
+						const struct qm_dqrr_entry *dq)
+{
+	struct rtnl_link_stats64 *percpu_stats;
+	struct dpaa_percpu_priv *percpu_priv;
+	const struct qm_fd *fd = &dq->fd;
+	dma_addr_t addr = qm_fd_addr(fd);
+	enum qm_fd_format fd_format;
+	struct net_device *net_dev;
+	u32 fd_status;
+	struct dpaa_bp *dpaa_bp;
+	struct dpaa_priv *priv;
+	unsigned int skb_len;
+	struct sk_buff *skb;
+	int *count_ptr;
+
+	fd_status = be32_to_cpu(fd->status);
+	fd_format = qm_fd_get_format(fd);
+	net_dev = ((struct dpaa_fq *)fq)->net_dev;
+	priv = netdev_priv(net_dev);
+	dpaa_bp = dpaa_bpid2pool(dq->fd.bpid);
+	if (!dpaa_bp)
+		return qman_cb_dqrr_consume;
+
+	/* Trace the Rx fd */
+	trace_dpaa_rx_fd(net_dev, fq, &dq->fd);
+
+	percpu_priv = this_cpu_ptr(priv->percpu_priv);
+	percpu_stats = &percpu_priv->stats;
+
+	if (unlikely(dpaa_eth_napi_schedule(percpu_priv, portal)))
+		return qman_cb_dqrr_stop;
+
+	/* Make sure we didn't run out of buffers */
+	if (unlikely(dpaa_eth_refill_bpools(priv))) {
+		/* Unable to refill the buffer pool due to insufficient
+		 * system memory. Just release the frame back into the pool,
+		 * otherwise we'll soon end up with an empty buffer pool.
+		 */
+		dpaa_fd_release(net_dev, &dq->fd);
+		return qman_cb_dqrr_consume;
+	}
+
+	if (unlikely(fd_status & FM_FD_STAT_RX_ERRORS) != 0) {
+		if (net_ratelimit())
+			netif_warn(priv, hw, net_dev, "FD status = 0x%08x\n",
+				   fd_status & FM_FD_STAT_RX_ERRORS);
+
+		percpu_stats->rx_errors++;
+		dpaa_fd_release(net_dev, fd);
+		return qman_cb_dqrr_consume;
+	}
+
+	dpaa_bp = dpaa_bpid2pool(fd->bpid);
+	if (!dpaa_bp)
+		return qman_cb_dqrr_consume;
+
+	dma_unmap_single(dpaa_bp->dev, addr, dpaa_bp->size, DMA_FROM_DEVICE);
+
+	/* prefetch the first 64 bytes of the frame or the SGT start */
+	prefetch(phys_to_virt(addr) + qm_fd_get_offset(fd));
+
+	fd_format = qm_fd_get_format(fd);
+	/* The only FD types that we may receive are contig and S/G */
+	WARN_ON((fd_format != qm_fd_contig) && (fd_format != qm_fd_sg));
+
+	/* Account for either the contig buffer or the SGT buffer (depending on
+	 * which case we were in) having been removed from the pool.
+	 */
+	count_ptr = this_cpu_ptr(dpaa_bp->percpu_count);
+	(*count_ptr)--;
+
+	if (likely(fd_format == qm_fd_contig))
+		skb = contig_fd_to_skb(priv, fd);
+	else
+		skb = sg_fd_to_skb(priv, fd);
+	if (!skb)
+		return qman_cb_dqrr_consume;
+
+	skb->protocol = eth_type_trans(skb, net_dev);
+
+	skb_len = skb->len;
+
+	if (unlikely(netif_receive_skb(skb) == NET_RX_DROP))
+		return qman_cb_dqrr_consume;
+
+	percpu_stats->rx_packets++;
+	percpu_stats->rx_bytes += skb_len;
+
+	return qman_cb_dqrr_consume;
+}
+
+static enum qman_cb_dqrr_result conf_error_dqrr(struct qman_portal *portal,
+						struct qman_fq *fq,
+						const struct qm_dqrr_entry *dq)
+{
+	struct dpaa_percpu_priv *percpu_priv;
+	struct net_device *net_dev;
+	struct dpaa_priv *priv;
+
+	net_dev = ((struct dpaa_fq *)fq)->net_dev;
+	priv = netdev_priv(net_dev);
+
+	percpu_priv = this_cpu_ptr(priv->percpu_priv);
+
+	if (dpaa_eth_napi_schedule(percpu_priv, portal))
+		return qman_cb_dqrr_stop;
+
+	dpaa_tx_error(net_dev, priv, percpu_priv, &dq->fd, fq->fqid);
+
+	return qman_cb_dqrr_consume;
+}
+
+static enum qman_cb_dqrr_result conf_dflt_dqrr(struct qman_portal *portal,
+					       struct qman_fq *fq,
+					       const struct qm_dqrr_entry *dq)
+{
+	struct dpaa_percpu_priv *percpu_priv;
+	struct net_device *net_dev;
+	struct dpaa_priv *priv;
+
+	net_dev = ((struct dpaa_fq *)fq)->net_dev;
+	priv = netdev_priv(net_dev);
+
+	/* Trace the fd */
+	trace_dpaa_tx_conf_fd(net_dev, fq, &dq->fd);
+
+	percpu_priv = this_cpu_ptr(priv->percpu_priv);
+
+	if (dpaa_eth_napi_schedule(percpu_priv, portal))
+		return qman_cb_dqrr_stop;
+
+	dpaa_tx_conf(net_dev, priv, percpu_priv, &dq->fd, fq->fqid);
+
+	return qman_cb_dqrr_consume;
+}
+
+static void egress_ern(struct qman_portal *portal,
+		       struct qman_fq *fq,
+		       const union qm_mr_entry *msg)
+{
+	const struct qm_fd *fd = &msg->ern.fd;
+	struct dpaa_percpu_priv *percpu_priv;
+	const struct dpaa_priv *priv;
+	struct net_device *net_dev;
+	struct sk_buff *skb;
+
+	net_dev = ((struct dpaa_fq *)fq)->net_dev;
+	priv = netdev_priv(net_dev);
+	percpu_priv = this_cpu_ptr(priv->percpu_priv);
+
+	percpu_priv->stats.tx_dropped++;
+	percpu_priv->stats.tx_fifo_errors++;
+	count_ern(percpu_priv, msg);
+
+	skb = dpaa_cleanup_tx_fd(priv, fd);
+	dev_kfree_skb_any(skb);
+}
+
+static const struct dpaa_fq_cbs dpaa_fq_cbs = {
+	.rx_defq = { .cb = { .dqrr = rx_default_dqrr } },
+	.tx_defq = { .cb = { .dqrr = conf_dflt_dqrr } },
+	.rx_errq = { .cb = { .dqrr = rx_error_dqrr } },
+	.tx_errq = { .cb = { .dqrr = conf_error_dqrr } },
+	.egress_ern = { .cb = { .ern = egress_ern } }
+};
+
+static void dpaa_eth_napi_enable(struct dpaa_priv *priv)
+{
+	struct dpaa_percpu_priv *percpu_priv;
+	int i;
+
+	for_each_possible_cpu(i) {
+		percpu_priv = per_cpu_ptr(priv->percpu_priv, i);
+
+		percpu_priv->np.down = 0;
+		napi_enable(&percpu_priv->np.napi);
+	}
+}
+
+static void dpaa_eth_napi_disable(struct dpaa_priv *priv)
+{
+	struct dpaa_percpu_priv *percpu_priv;
+	int i;
+
+	for_each_possible_cpu(i) {
+		percpu_priv = per_cpu_ptr(priv->percpu_priv, i);
+
+		percpu_priv->np.down = 1;
+		napi_disable(&percpu_priv->np.napi);
+	}
+}
+
+static int dpaa_open(struct net_device *net_dev)
+{
+	struct mac_device *mac_dev;
+	struct dpaa_priv *priv;
+	int err, i;
+
+	priv = netdev_priv(net_dev);
+	mac_dev = priv->mac_dev;
+	dpaa_eth_napi_enable(priv);
+
+	net_dev->phydev = mac_dev->init_phy(net_dev, priv->mac_dev);
+	if (!net_dev->phydev) {
+		netif_err(priv, ifup, net_dev, "init_phy() failed\n");
+		err = -ENODEV;
+		goto phy_init_failed;
+	}
+
+	for (i = 0; i < ARRAY_SIZE(mac_dev->port); i++) {
+		err = fman_port_enable(mac_dev->port[i]);
+		if (err)
+			goto mac_start_failed;
+	}
+
+	err = priv->mac_dev->start(mac_dev);
+	if (err < 0) {
+		netif_err(priv, ifup, net_dev, "mac_dev->start() = %d\n", err);
+		goto mac_start_failed;
+	}
+
+	netif_tx_start_all_queues(net_dev);
+
+	return 0;
+
+mac_start_failed:
+	for (i = 0; i < ARRAY_SIZE(mac_dev->port); i++)
+		fman_port_disable(mac_dev->port[i]);
+
+phy_init_failed:
+	dpaa_eth_napi_disable(priv);
+
+	return err;
+}
+
+static int dpaa_eth_stop(struct net_device *net_dev)
+{
+	struct dpaa_priv *priv;
+	int err;
+
+	err = dpaa_stop(net_dev);
+
+	priv = netdev_priv(net_dev);
+	dpaa_eth_napi_disable(priv);
+
+	return err;
+}
+
+static int dpaa_ioctl(struct net_device *net_dev, struct ifreq *rq, int cmd)
+{
+	if (!net_dev->phydev)
+		return -EINVAL;
+	return phy_mii_ioctl(net_dev->phydev, rq, cmd);
+}
+
+static const struct net_device_ops dpaa_ops = {
+	.ndo_open = dpaa_open,
+	.ndo_start_xmit = dpaa_start_xmit,
+	.ndo_stop = dpaa_eth_stop,
+	.ndo_tx_timeout = dpaa_tx_timeout,
+	.ndo_get_stats64 = dpaa_get_stats64,
+	.ndo_set_mac_address = dpaa_set_mac_address,
+	.ndo_validate_addr = eth_validate_addr,
+	.ndo_set_rx_mode = dpaa_set_rx_mode,
+	.ndo_do_ioctl = dpaa_ioctl,
+	.ndo_setup_tc = dpaa_setup_tc,
+};
+
+static int dpaa_napi_add(struct net_device *net_dev)
+{
+	struct dpaa_priv *priv = netdev_priv(net_dev);
+	struct dpaa_percpu_priv *percpu_priv;
+	int cpu;
+
+	for_each_possible_cpu(cpu) {
+		percpu_priv = per_cpu_ptr(priv->percpu_priv, cpu);
+
+		netif_napi_add(net_dev, &percpu_priv->np.napi,
+			       dpaa_eth_poll, NAPI_POLL_WEIGHT);
+	}
+
+	return 0;
+}
+
+static void dpaa_napi_del(struct net_device *net_dev)
+{
+	struct dpaa_priv *priv = netdev_priv(net_dev);
+	struct dpaa_percpu_priv *percpu_priv;
+	int cpu;
+
+	for_each_possible_cpu(cpu) {
+		percpu_priv = per_cpu_ptr(priv->percpu_priv, cpu);
+
+		netif_napi_del(&percpu_priv->np.napi);
+	}
+}
+
+static inline void dpaa_bp_free_pf(const struct dpaa_bp *bp,
+				   struct bm_buffer *bmb)
+{
+	dma_addr_t addr = bm_buf_addr(bmb);
+
+	dma_unmap_single(bp->dev, addr, bp->size, DMA_FROM_DEVICE);
+
+	skb_free_frag(phys_to_virt(addr));
+}
+
+/* Alloc the dpaa_bp struct and configure default values */
+static struct dpaa_bp *dpaa_bp_alloc(struct device *dev)
+{
+	struct dpaa_bp *dpaa_bp;
+
+	dpaa_bp = devm_kzalloc(dev, sizeof(*dpaa_bp), GFP_KERNEL);
+	if (!dpaa_bp)
+		return ERR_PTR(-ENOMEM);
+
+	dpaa_bp->bpid = FSL_DPAA_BPID_INV;
+	dpaa_bp->percpu_count = devm_alloc_percpu(dev, *dpaa_bp->percpu_count);
+	dpaa_bp->config_count = FSL_DPAA_ETH_MAX_BUF_COUNT;
+
+	dpaa_bp->seed_cb = dpaa_bp_seed;
+	dpaa_bp->free_buf_cb = dpaa_bp_free_pf;
+
+	return dpaa_bp;
+}
+
+/* Place all ingress FQs (Rx Default, Rx Error) in a dedicated CGR.
+ * We won't be sending congestion notifications to FMan; for now, we just use
+ * this CGR to generate enqueue rejections to FMan in order to drop the frames
+ * before they reach our ingress queues and eat up memory.
+ */
+static int dpaa_ingress_cgr_init(struct dpaa_priv *priv)
+{
+	struct qm_mcc_initcgr initcgr;
+	u32 cs_th;
+	int err;
+
+	err = qman_alloc_cgrid(&priv->ingress_cgr.cgrid);
+	if (err < 0) {
+		if (netif_msg_drv(priv))
+			pr_err("Error %d allocating CGR ID\n", err);
+		goto out_error;
+	}
+
+	/* Enable CS TD, but disable Congestion State Change Notifications. */
+	memset(&initcgr, 0, sizeof(initcgr));
+	initcgr.we_mask = cpu_to_be16(QM_CGR_WE_CS_THRES);
+	initcgr.cgr.cscn_en = QM_CGR_EN;
+	cs_th = DPAA_INGRESS_CS_THRESHOLD;
+	qm_cgr_cs_thres_set64(&initcgr.cgr.cs_thres, cs_th, 1);
+
+	initcgr.we_mask |= cpu_to_be16(QM_CGR_WE_CSTD_EN);
+	initcgr.cgr.cstd_en = QM_CGR_EN;
+
+	/* This CGR will be associated with the SWP affined to the current CPU.
+	 * However, we'll place all our ingress FQs in it.
+	 */
+	err = qman_create_cgr(&priv->ingress_cgr, QMAN_CGR_FLAG_USE_INIT,
+			      &initcgr);
+	if (err < 0) {
+		if (netif_msg_drv(priv))
+			pr_err("Error %d creating ingress CGR with ID %d\n",
+			       err, priv->ingress_cgr.cgrid);
+		qman_release_cgrid(priv->ingress_cgr.cgrid);
+		goto out_error;
+	}
+	if (netif_msg_drv(priv))
+		pr_debug("Created ingress CGR %d for netdev with hwaddr %pM\n",
+			 priv->ingress_cgr.cgrid, priv->mac_dev->addr);
+
+	priv->use_ingress_cgr = true;
+
+out_error:
+	return err;
+}
+
+static const struct of_device_id dpaa_match[];
+
+static inline u16 dpaa_get_headroom(struct dpaa_buffer_layout *bl)
+{
+	u16 headroom;
+
+	/* The frame headroom must accommodate:
+	 * - the driver private data area
+	 * - parse results, hash results, timestamp if selected
+	 * If either hash results or time stamp are selected, both will
+	 * be copied to/from the frame headroom, as TS is located between PR and
+	 * HR in the IC and IC copy size has a granularity of 16bytes
+	 * (see description of FMBM_RICP and FMBM_TICP registers in DPAARM)
+	 *
+	 * Also make sure the headroom is a multiple of data_align bytes
+	 */
+	headroom = (u16)(bl->priv_data_size + DPAA_PARSE_RESULTS_SIZE +
+		DPAA_TIME_STAMP_SIZE + DPAA_HASH_RESULTS_SIZE);
+
+	return DPAA_FD_DATA_ALIGNMENT ? ALIGN(headroom,
+					      DPAA_FD_DATA_ALIGNMENT) :
+					headroom;
+}
+
+static int dpaa_eth_probe(struct platform_device *pdev)
+{
+	struct dpaa_bp *dpaa_bps[DPAA_BPS_NUM] = {NULL};
+	struct dpaa_percpu_priv *percpu_priv;
+	struct net_device *net_dev = NULL;
+	struct dpaa_fq *dpaa_fq, *tmp;
+	struct dpaa_priv *priv = NULL;
+	struct fm_port_fqs port_fqs;
+	struct mac_device *mac_dev;
+	int err = 0, i, channel;
+	struct device *dev;
+
+	dev = &pdev->dev;
+
+	/* Allocate this early, so we can store relevant information in
+	 * the private area
+	 */
+	net_dev = alloc_etherdev_mq(sizeof(*priv), DPAA_ETH_TXQ_NUM);
+	if (!net_dev) {
+		dev_err(dev, "alloc_etherdev_mq() failed\n");
+		goto alloc_etherdev_mq_failed;
+	}
+
+	/* Do this here, so we can be verbose early */
+	SET_NETDEV_DEV(net_dev, dev);
+	dev_set_drvdata(dev, net_dev);
+
+	priv = netdev_priv(net_dev);
+	priv->net_dev = net_dev;
+
+	priv->msg_enable = netif_msg_init(debug, DPAA_MSG_DEFAULT);
+
+	mac_dev = dpaa_mac_dev_get(pdev);
+	if (IS_ERR(mac_dev)) {
+		dev_err(dev, "dpaa_mac_dev_get() failed\n");
+		err = PTR_ERR(mac_dev);
+		goto mac_probe_failed;
+	}
+
+	/* If fsl_fm_max_frm is set to a higher value than the all-common 1500,
+	 * we choose conservatively and let the user explicitly set a higher
+	 * MTU via ifconfig. Otherwise, the user may end up with different MTUs
+	 * in the same LAN.
+	 * If on the other hand fsl_fm_max_frm has been chosen below 1500,
+	 * start with the maximum allowed.
+	 */
+	net_dev->mtu = min(dpaa_get_max_mtu(), ETH_DATA_LEN);
+
+	netdev_dbg(net_dev, "Setting initial MTU on net device: %d\n",
+		   net_dev->mtu);
+
+	priv->buf_layout[RX].priv_data_size = DPAA_RX_PRIV_DATA_SIZE; /* Rx */
+	priv->buf_layout[TX].priv_data_size = DPAA_TX_PRIV_DATA_SIZE; /* Tx */
+
+	/* device used for DMA mapping */
+	arch_setup_dma_ops(dev, 0, 0, NULL, false);
+	err = dma_coerce_mask_and_coherent(dev, DMA_BIT_MASK(40));
+	if (err) {
+		dev_err(dev, "dma_coerce_mask_and_coherent() failed\n");
+		goto dev_mask_failed;
+	}
+
+	/* bp init */
+	for (i = 0; i < DPAA_BPS_NUM; i++) {
+		int err;
+
+		dpaa_bps[i] = dpaa_bp_alloc(dev);
+		if (IS_ERR(dpaa_bps[i]))
+			return PTR_ERR(dpaa_bps[i]);
+		/* the raw size of the buffers used for reception */
+		dpaa_bps[i]->raw_size = bpool_buffer_raw_size(i, DPAA_BPS_NUM);
+		/* avoid runtime computations by keeping the usable size here */
+		dpaa_bps[i]->size = dpaa_bp_size(dpaa_bps[i]->raw_size);
+		dpaa_bps[i]->dev = dev;
+
+		err = dpaa_bp_alloc_pool(dpaa_bps[i]);
+		if (err < 0) {
+			dpaa_bps_free(priv);
+			priv->dpaa_bps[i] = NULL;
+			goto bp_create_failed;
+		}
+		priv->dpaa_bps[i] = dpaa_bps[i];
+	}
+
+	INIT_LIST_HEAD(&priv->dpaa_fq_list);
+
+	memset(&port_fqs, 0, sizeof(port_fqs));
+
+	err = dpaa_alloc_all_fqs(dev, &priv->dpaa_fq_list, &port_fqs);
+	if (err < 0) {
+		dev_err(dev, "dpaa_alloc_all_fqs() failed\n");
+		goto fq_probe_failed;
+	}
+
+	priv->mac_dev = mac_dev;
+
+	channel = dpaa_get_channel();
+	if (channel < 0) {
+		dev_err(dev, "dpaa_get_channel() failed\n");
+		err = channel;
+		goto get_channel_failed;
+	}
+
+	priv->channel = (u16)channel;
+
+	/* Start a thread that will walk the CPUs with affine portals
+	 * and add this pool channel to each's dequeue mask.
+	 */
+	dpaa_eth_add_channel(priv->channel);
+
+	dpaa_fq_setup(priv, &dpaa_fq_cbs, priv->mac_dev->port[TX]);
+
+	/* Create a congestion group for this netdev, with
+	 * dynamically-allocated CGR ID.
+	 * Must be executed after probing the MAC, but before
+	 * assigning the egress FQs to the CGRs.
+	 */
+	err = dpaa_eth_cgr_init(priv);
+	if (err < 0) {
+		dev_err(dev, "Error initializing CGR\n");
+		goto tx_cgr_init_failed;
+	}
+
+	err = dpaa_ingress_cgr_init(priv);
+	if (err < 0) {
+		dev_err(dev, "Error initializing ingress CGR\n");
+		goto rx_cgr_init_failed;
+	}
+
+	/* Add the FQs to the interface, and make them active */
+	list_for_each_entry_safe(dpaa_fq, tmp, &priv->dpaa_fq_list, list) {
+		err = dpaa_fq_init(dpaa_fq, false);
+		if (err < 0)
+			goto fq_alloc_failed;
+	}
+
+	priv->tx_headroom = dpaa_get_headroom(&priv->buf_layout[TX]);
+	priv->rx_headroom = dpaa_get_headroom(&priv->buf_layout[RX]);
+
+	/* All real interfaces need their ports initialized */
+	err = dpaa_eth_init_ports(mac_dev, dpaa_bps, DPAA_BPS_NUM, &port_fqs,
+				  &priv->buf_layout[0], dev);
+	if (err)
+		goto init_ports_failed;
+
+	priv->percpu_priv = devm_alloc_percpu(dev, *priv->percpu_priv);
+	if (!priv->percpu_priv) {
+		dev_err(dev, "devm_alloc_percpu() failed\n");
+		err = -ENOMEM;
+		goto alloc_percpu_failed;
+	}
+	for_each_possible_cpu(i) {
+		percpu_priv = per_cpu_ptr(priv->percpu_priv, i);
+		memset(percpu_priv, 0, sizeof(*percpu_priv));
+	}
+
+	priv->num_tc = 1;
+	netif_set_real_num_tx_queues(net_dev, priv->num_tc * DPAA_TC_TXQ_NUM);
+
+	/* Initialize NAPI */
+	err = dpaa_napi_add(net_dev);
+	if (err < 0)
+		goto napi_add_failed;
+
+	err = dpaa_netdev_init(net_dev, &dpaa_ops, tx_timeout);
+	if (err < 0)
+		goto netdev_init_failed;
+
+	dpaa_eth_sysfs_init(&net_dev->dev);
+
+	netif_info(priv, probe, net_dev, "Probed interface %s\n",
+		   net_dev->name);
+
+	return 0;
+
+netdev_init_failed:
+napi_add_failed:
+	dpaa_napi_del(net_dev);
+alloc_percpu_failed:
+init_ports_failed:
+	dpaa_fq_free(dev, &priv->dpaa_fq_list);
+fq_alloc_failed:
+	qman_delete_cgr_safe(&priv->ingress_cgr);
+	qman_release_cgrid(priv->ingress_cgr.cgrid);
+rx_cgr_init_failed:
+	qman_delete_cgr_safe(&priv->cgr_data.cgr);
+	qman_release_cgrid(priv->cgr_data.cgr.cgrid);
+tx_cgr_init_failed:
+get_channel_failed:
+	dpaa_bps_free(priv);
+bp_create_failed:
+fq_probe_failed:
+dev_mask_failed:
+mac_probe_failed:
+	dev_set_drvdata(dev, NULL);
+	free_netdev(net_dev);
+alloc_etherdev_mq_failed:
+	for (i = 0; i < DPAA_BPS_NUM && dpaa_bps[i]; i++) {
+		if (atomic_read(&dpaa_bps[i]->refs) == 0)
+			devm_kfree(dev, dpaa_bps[i]);
+	}
+	return err;
+}
+
+static int dpaa_remove(struct platform_device *pdev)
+{
+	struct net_device *net_dev;
+	struct dpaa_priv *priv;
+	struct device *dev;
+	int err;
+
+	dev = &pdev->dev;
+	net_dev = dev_get_drvdata(dev);
+
+	priv = netdev_priv(net_dev);
+
+	dpaa_eth_sysfs_remove(dev);
+
+	dev_set_drvdata(dev, NULL);
+	unregister_netdev(net_dev);
+
+	err = dpaa_fq_free(dev, &priv->dpaa_fq_list);
+
+	qman_delete_cgr_safe(&priv->ingress_cgr);
+	qman_release_cgrid(priv->ingress_cgr.cgrid);
+	qman_delete_cgr_safe(&priv->cgr_data.cgr);
+	qman_release_cgrid(priv->cgr_data.cgr.cgrid);
+
+	dpaa_napi_del(net_dev);
+
+	dpaa_bps_free(priv);
+
+	free_netdev(net_dev);
+
+	return err;
+}
+
+static struct platform_device_id dpaa_devtype[] = {
+	{
+		.name = "dpaa-ethernet",
+		.driver_data = 0,
+	}, {
+	}
+};
+MODULE_DEVICE_TABLE(platform, dpaa_devtype);
+
+static struct platform_driver dpaa_driver = {
+	.driver = {
+		.name = KBUILD_MODNAME,
+	},
+	.id_table = dpaa_devtype,
+	.probe = dpaa_eth_probe,
+	.remove = dpaa_remove
+};
+
+static int __init dpaa_load(void)
+{
+	int err;
+
+	pr_debug("FSL DPAA Ethernet driver\n");
+
+	/* initialize dpaa_eth mirror values */
+	dpaa_rx_extra_headroom = fman_get_rx_extra_headroom();
+	dpaa_max_frm = fman_get_max_frm();
+
+	err = platform_driver_register(&dpaa_driver);
+	if (err < 0)
+		pr_err("Error, platform_driver_register() = %d\n", err);
+
+	return err;
+}
+module_init(dpaa_load);
+
+static void __exit dpaa_unload(void)
+{
+	platform_driver_unregister(&dpaa_driver);
+
+	/* Only one channel is used and needs to be released after all
+	 * interfaces are removed
+	 */
+	dpaa_release_channel();
+}
+module_exit(dpaa_unload);
+
+MODULE_LICENSE("Dual BSD/GPL");
+MODULE_DESCRIPTION("FSL DPAA Ethernet driver");
diff --git a/drivers/net/ethernet/freescale/dpaa/dpaa_eth.h b/drivers/net/ethernet/freescale/dpaa/dpaa_eth.h
new file mode 100644
index 0000000..9941a78
--- /dev/null
+++ b/drivers/net/ethernet/freescale/dpaa/dpaa_eth.h
@@ -0,0 +1,191 @@
+/* Copyright 2008 - 2016 Freescale Semiconductor Inc.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *     * Redistributions of source code must retain the above copyright
+ *	 notice, this list of conditions and the following disclaimer.
+ *     * Redistributions in binary form must reproduce the above copyright
+ *	 notice, this list of conditions and the following disclaimer in the
+ *	 documentation and/or other materials provided with the distribution.
+ *     * Neither the name of Freescale Semiconductor nor the
+ *	 names of its contributors may be used to endorse or promote products
+ *	 derived from this software without specific prior written permission.
+ *
+ * ALTERNATIVELY, this software may be distributed under the terms of the
+ * GNU General Public License ("GPL") as published by the Free Software
+ * Foundation, either version 2 of that License or (at your option) any
+ * later version.
+ *
+ * THIS SOFTWARE IS PROVIDED BY Freescale Semiconductor ``AS IS'' AND ANY
+ * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+ * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+ * DISCLAIMED. IN NO EVENT SHALL Freescale Semiconductor BE LIABLE FOR ANY
+ * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
+ * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+ * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+ * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+ * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#ifndef __DPAA_H
+#define __DPAA_H
+
+#include <linux/netdevice.h>
+#include <soc/fsl/qman.h>
+#include <soc/fsl/bman.h>
+
+#include "fman.h"
+#include "mac.h"
+#include "dpaa_eth_trace.h"
+
+/* Number of prioritised traffic classes */
+#define DPAA_TC_NUM		4
+/* Number of Tx queues per traffic class */
+#define DPAA_TC_TXQ_NUM		NR_CPUS
+/* Total number of Tx queues */
+#define DPAA_ETH_TXQ_NUM	(DPAA_TC_NUM * DPAA_TC_TXQ_NUM)
+
+#define DPAA_BPS_NUM 3 /* number of bpools per interface */
+
+/* More detailed FQ types - used for fine-grained WQ assignments */
+enum dpaa_fq_type {
+	FQ_TYPE_RX_DEFAULT = 1, /* Rx Default FQs */
+	FQ_TYPE_RX_ERROR,	/* Rx Error FQs */
+	FQ_TYPE_TX,		/* "Real" Tx FQs */
+	FQ_TYPE_TX_CONFIRM,	/* Tx default Conf FQ (actually an Rx FQ) */
+	FQ_TYPE_TX_CONF_MQ,	/* Tx conf FQs (one for each Tx FQ) */
+	FQ_TYPE_TX_ERROR,	/* Tx Error FQs (these are actually Rx FQs) */
+};
+
+struct dpaa_fq {
+	struct qman_fq fq_base;
+	struct list_head list;
+	struct net_device *net_dev;
+	bool init;
+	u32 fqid;
+	u32 flags;
+	u16 channel;
+	u8 wq;
+	enum dpaa_fq_type fq_type;
+};
+
+struct dpaa_fq_cbs {
+	struct qman_fq rx_defq;
+	struct qman_fq tx_defq;
+	struct qman_fq rx_errq;
+	struct qman_fq tx_errq;
+	struct qman_fq egress_ern;
+};
+
+struct dpaa_bp {
+	/* device used in the DMA mapping operations */
+	struct device *dev;
+	/* current number of buffers in the buffer pool alloted to each CPU */
+	int __percpu *percpu_count;
+	/* all buffers allocated for this pool have this raw size */
+	size_t raw_size;
+	/* all buffers in this pool have this same usable size */
+	size_t size;
+	/* the buffer pools are initialized with config_count buffers for each
+	 * CPU; at runtime the number of buffers per CPU is constantly brought
+	 * back to this level
+	 */
+	u16 config_count;
+	u8 bpid;
+	struct bman_pool *pool;
+	/* bpool can be seeded before use by this cb */
+	int (*seed_cb)(struct dpaa_bp *);
+	/* bpool can be emptied before freeing by this cb */
+	void (*free_buf_cb)(const struct dpaa_bp *, struct bm_buffer *);
+	atomic_t refs;
+};
+
+struct dpaa_rx_errors {
+	u64 dme;		/* DMA Error */
+	u64 fpe;		/* Frame Physical Error */
+	u64 fse;		/* Frame Size Error */
+	u64 phe;		/* Header Error */
+};
+
+/* Counters for QMan ERN frames - one counter per rejection code */
+struct dpaa_ern_cnt {
+	u64 cg_tdrop;		/* Congestion group taildrop */
+	u64 wred;		/* WRED congestion */
+	u64 err_cond;		/* Error condition */
+	u64 early_window;	/* Order restoration, frame too early */
+	u64 late_window;	/* Order restoration, frame too late */
+	u64 fq_tdrop;		/* FQ taildrop */
+	u64 fq_retired;		/* FQ is retired */
+	u64 orp_zero;		/* ORP disabled */
+};
+
+struct dpaa_napi_portal {
+	struct napi_struct napi;
+	struct qman_portal *p;
+	bool down;
+};
+
+struct dpaa_percpu_priv {
+	struct net_device *net_dev;
+	struct dpaa_napi_portal np;
+	u64 in_interrupt;
+	u64 tx_confirm;
+	/* fragmented (non-linear) skbuffs received from the stack */
+	u64 tx_frag_skbuffs;
+	struct rtnl_link_stats64 stats;
+	struct dpaa_rx_errors rx_errors;
+	struct dpaa_ern_cnt ern_cnt;
+};
+
+struct dpaa_buffer_layout {
+	u16 priv_data_size;
+};
+
+struct dpaa_priv {
+	struct dpaa_percpu_priv __percpu *percpu_priv;
+	struct dpaa_bp *dpaa_bps[DPAA_BPS_NUM];
+	/* Store here the needed Tx headroom for convenience and speed
+	 * (even though it can be computed based on the fields of buf_layout)
+	 */
+	u16 tx_headroom;
+	struct net_device *net_dev;
+	struct mac_device *mac_dev;
+	struct qman_fq *egress_fqs[DPAA_ETH_TXQ_NUM];
+	struct qman_fq *conf_fqs[DPAA_ETH_TXQ_NUM];
+
+	u16 channel;
+	struct list_head dpaa_fq_list;
+
+	u8 num_tc;
+	u32 msg_enable;	/* net_device message level */
+
+	struct {
+		/* All egress queues to a given net device belong to one
+		 * (and the same) congestion group.
+		 */
+		struct qman_cgr cgr;
+		/* If congested, when it began. Used for performance stats. */
+		u32 congestion_start_jiffies;
+		/* Number of jiffies the Tx port was congested. */
+		u32 congested_jiffies;
+		/* Counter for the number of times the CGR
+		 * entered congestion state
+		 */
+		u32 cgr_congested_count;
+	} cgr_data;
+	/* Use a per-port CGR for ingress traffic. */
+	bool use_ingress_cgr;
+	struct qman_cgr ingress_cgr;
+
+	struct dpaa_buffer_layout buf_layout[2];
+	u16 rx_headroom;
+};
+
+/* from dpaa_ethtool.c */
+extern const struct ethtool_ops dpaa_ethtool_ops;
+
+/* from dpaa_eth_sysfs.c */
+void dpaa_eth_sysfs_remove(struct device *dev);
+void dpaa_eth_sysfs_init(struct device *dev);
+#endif	/* __DPAA_H */
diff --git a/drivers/net/ethernet/freescale/dpaa/dpaa_eth_sysfs.c b/drivers/net/ethernet/freescale/dpaa/dpaa_eth_sysfs.c
new file mode 100644
index 0000000..ec75d1c
--- /dev/null
+++ b/drivers/net/ethernet/freescale/dpaa/dpaa_eth_sysfs.c
@@ -0,0 +1,165 @@
+/* Copyright 2008-2016 Freescale Semiconductor Inc.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *     * Redistributions of source code must retain the above copyright
+ *	 notice, this list of conditions and the following disclaimer.
+ *     * Redistributions in binary form must reproduce the above copyright
+ *	 notice, this list of conditions and the following disclaimer in the
+ *	 documentation and/or other materials provided with the distribution.
+ *     * Neither the name of Freescale Semiconductor nor the
+ *	 names of its contributors may be used to endorse or promote products
+ *	 derived from this software without specific prior written permission.
+ *
+ *
+ * ALTERNATIVELY, this software may be distributed under the terms of the
+ * GNU General Public License ("GPL") as published by the Free Software
+ * Foundation, either version 2 of that License or (at your option) any
+ * later version.
+ *
+ * THIS SOFTWARE IS PROVIDED BY Freescale Semiconductor ``AS IS'' AND ANY
+ * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+ * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+ * DISCLAIMED. IN NO EVENT SHALL Freescale Semiconductor BE LIABLE FOR ANY
+ * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
+ * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+ * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+ * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+ * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include <linux/init.h>
+#include <linux/module.h>
+#include <linux/io.h>
+#include <linux/of_net.h>
+#include "dpaa_eth.h"
+#include "mac.h"
+
+static ssize_t dpaa_eth_show_addr(struct device *dev,
+				  struct device_attribute *attr, char *buf)
+{
+	struct dpaa_priv *priv = netdev_priv(to_net_dev(dev));
+	struct mac_device *mac_dev = priv->mac_dev;
+
+	if (mac_dev)
+		return sprintf(buf, "%llx",
+				(unsigned long long)mac_dev->res->start);
+	else
+		return sprintf(buf, "none");
+}
+
+static ssize_t dpaa_eth_show_fqids(struct device *dev,
+				   struct device_attribute *attr, char *buf)
+{
+	struct dpaa_priv *priv = netdev_priv(to_net_dev(dev));
+	struct dpaa_fq *prev = NULL;
+	char *prevstr = NULL;
+	struct dpaa_fq *tmp;
+	struct dpaa_fq *fq;
+	u32 first_fqid = 0;
+	u32 last_fqid = 0;
+	ssize_t bytes = 0;
+	char *str;
+	int i = 0;
+
+	list_for_each_entry_safe(fq, tmp, &priv->dpaa_fq_list, list) {
+		switch (fq->fq_type) {
+		case FQ_TYPE_RX_DEFAULT:
+			str = "Rx default";
+			break;
+		case FQ_TYPE_RX_ERROR:
+			str = "Rx error";
+			break;
+		case FQ_TYPE_TX_CONFIRM:
+			str = "Tx default confirmation";
+			break;
+		case FQ_TYPE_TX_CONF_MQ:
+			str = "Tx confirmation (mq)";
+			break;
+		case FQ_TYPE_TX_ERROR:
+			str = "Tx error";
+			break;
+		case FQ_TYPE_TX:
+			str = "Tx";
+			break;
+		default:
+			str = "Unknown";
+		}
+
+		if (prev && (abs(fq->fqid - prev->fqid) != 1 ||
+			     str != prevstr)) {
+			if (last_fqid == first_fqid)
+				bytes += sprintf(buf + bytes,
+					"%s: %d\n", prevstr, prev->fqid);
+			else
+				bytes += sprintf(buf + bytes,
+					"%s: %d - %d\n", prevstr,
+					first_fqid, last_fqid);
+		}
+
+		if (prev && abs(fq->fqid - prev->fqid) == 1 &&
+		    str == prevstr) {
+			last_fqid = fq->fqid;
+		} else {
+			first_fqid = fq->fqid;
+			last_fqid = fq->fqid;
+		}
+
+		prev = fq;
+		prevstr = str;
+		i++;
+	}
+
+	if (prev) {
+		if (last_fqid == first_fqid)
+			bytes += sprintf(buf + bytes, "%s: %d\n", prevstr,
+					prev->fqid);
+		else
+			bytes += sprintf(buf + bytes, "%s: %d - %d\n", prevstr,
+					first_fqid, last_fqid);
+	}
+
+	return bytes;
+}
+
+static ssize_t dpaa_eth_show_bpids(struct device *dev,
+				   struct device_attribute *attr, char *buf)
+{
+	struct dpaa_priv *priv = netdev_priv(to_net_dev(dev));
+	ssize_t bytes = 0;
+	int i = 0;
+
+	for (i = 0; i < DPAA_BPS_NUM; i++)
+		bytes += snprintf(buf + bytes, PAGE_SIZE - bytes, "%u\n",
+				  priv->dpaa_bps[i]->bpid);
+
+	return bytes;
+}
+
+static struct device_attribute dpaa_eth_attrs[] = {
+	__ATTR(device_addr, 0444, dpaa_eth_show_addr, NULL),
+	__ATTR(fqids, 0444, dpaa_eth_show_fqids, NULL),
+	__ATTR(bpids, 0444, dpaa_eth_show_bpids, NULL),
+};
+
+void dpaa_eth_sysfs_init(struct device *dev)
+{
+	int i;
+
+	for (i = 0; i < ARRAY_SIZE(dpaa_eth_attrs); i++)
+		if (device_create_file(dev, &dpaa_eth_attrs[i])) {
+			dev_err(dev, "Error creating sysfs file\n");
+			while (i > 0)
+				device_remove_file(dev, &dpaa_eth_attrs[--i]);
+			return;
+		}
+}
+
+void dpaa_eth_sysfs_remove(struct device *dev)
+{
+	int i;
+
+	for (i = 0; i < ARRAY_SIZE(dpaa_eth_attrs); i++)
+		device_remove_file(dev, &dpaa_eth_attrs[i]);
+}
diff --git a/drivers/net/ethernet/freescale/dpaa/dpaa_eth_trace.h b/drivers/net/ethernet/freescale/dpaa/dpaa_eth_trace.h
new file mode 100644
index 0000000..409c1dc
--- /dev/null
+++ b/drivers/net/ethernet/freescale/dpaa/dpaa_eth_trace.h
@@ -0,0 +1,141 @@
+/* Copyright 2013-2015 Freescale Semiconductor Inc.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *     * Redistributions of source code must retain the above copyright
+ *	 notice, this list of conditions and the following disclaimer.
+ *     * Redistributions in binary form must reproduce the above copyright
+ *	 notice, this list of conditions and the following disclaimer in the
+ *	 documentation and/or other materials provided with the distribution.
+ *     * Neither the name of Freescale Semiconductor nor the
+ *	 names of its contributors may be used to endorse or promote products
+ *	 derived from this software without specific prior written permission.
+ *
+ *
+ * ALTERNATIVELY, this software may be distributed under the terms of the
+ * GNU General Public License ("GPL") as published by the Free Software
+ * Foundation, either version 2 of that License or (at your option) any
+ * later version.
+ *
+ * THIS SOFTWARE IS PROVIDED BY Freescale Semiconductor ``AS IS'' AND ANY
+ * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+ * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+ * DISCLAIMED. IN NO EVENT SHALL Freescale Semiconductor BE LIABLE FOR ANY
+ * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
+ * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+ * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+ * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+ * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#undef TRACE_SYSTEM
+#define TRACE_SYSTEM	dpaa_eth
+
+#if !defined(_DPAA_ETH_TRACE_H) || defined(TRACE_HEADER_MULTI_READ)
+#define _DPAA_ETH_TRACE_H
+
+#include <linux/skbuff.h>
+#include <linux/netdevice.h>
+#include "dpaa_eth.h"
+#include <linux/tracepoint.h>
+
+#define fd_format_name(format)	{ qm_fd_##format, #format }
+#define fd_format_list	\
+	fd_format_name(contig),	\
+	fd_format_name(sg)
+
+/* This is used to declare a class of events.
+ * individual events of this type will be defined below.
+ */
+
+/* Store details about a frame descriptor and the FQ on which it was
+ * transmitted/received.
+ */
+DECLARE_EVENT_CLASS(dpaa_eth_fd,
+	/* Trace function prototype */
+	TP_PROTO(struct net_device *netdev,
+		 struct qman_fq *fq,
+		 const struct qm_fd *fd),
+
+	/* Repeat argument list here */
+	TP_ARGS(netdev, fq, fd),
+
+	/* A structure containing the relevant information we want to record.
+	 * Declare name and type for each normal element, name, type and size
+	 * for arrays. Use __string for variable length strings.
+	 */
+	TP_STRUCT__entry(
+		__field(u32,	fqid)
+		__field(u64,	fd_addr)
+		__field(u8,	fd_format)
+		__field(u16,	fd_offset)
+		__field(u32,	fd_length)
+		__field(u32,	fd_status)
+		__string(name,	netdev->name)
+	),
+
+	/* The function that assigns values to the above declared fields */
+	TP_fast_assign(
+		__entry->fqid = fq->fqid;
+		__entry->fd_addr = qm_fd_addr_get64(fd);
+		__entry->fd_format = qm_fd_get_format(fd);
+		__entry->fd_offset = qm_fd_get_offset(fd);
+		__entry->fd_length = qm_fd_get_length(fd);
+		__entry->fd_status = fd->status;
+		__assign_str(name, netdev->name);
+	),
+
+	/* This is what gets printed when the trace event is triggered */
+	TP_printk("[%s] fqid=%d, fd: addr=0x%llx, format=%s, off=%u, len=%u, status=0x%08x",
+		  __get_str(name), __entry->fqid, __entry->fd_addr,
+		  __print_symbolic(__entry->fd_format, fd_format_list),
+		  __entry->fd_offset, __entry->fd_length, __entry->fd_status)
+);
+
+/* Now declare events of the above type. Format is:
+ * DEFINE_EVENT(class, name, proto, args), with proto and args same as for class
+ */
+
+/* Tx (egress) fd */
+DEFINE_EVENT(dpaa_eth_fd, dpaa_tx_fd,
+
+	TP_PROTO(struct net_device *netdev,
+		 struct qman_fq *fq,
+		 const struct qm_fd *fd),
+
+	TP_ARGS(netdev, fq, fd)
+);
+
+/* Rx fd */
+DEFINE_EVENT(dpaa_eth_fd, dpaa_rx_fd,
+
+	TP_PROTO(struct net_device *netdev,
+		 struct qman_fq *fq,
+		 const struct qm_fd *fd),
+
+	TP_ARGS(netdev, fq, fd)
+);
+
+/* Tx confirmation fd */
+DEFINE_EVENT(dpaa_eth_fd, dpaa_tx_conf_fd,
+
+	TP_PROTO(struct net_device *netdev,
+		 struct qman_fq *fq,
+		 const struct qm_fd *fd),
+
+	TP_ARGS(netdev, fq, fd)
+);
+
+/* If only one event of a certain type needs to be declared, use TRACE_EVENT().
+ * The syntax is the same as for DECLARE_EVENT_CLASS().
+ */
+
+#endif /* _DPAA_ETH_TRACE_H */
+
+/* This must be outside ifdef _DPAA_ETH_TRACE_H */
+#undef TRACE_INCLUDE_PATH
+#define TRACE_INCLUDE_PATH .
+#undef TRACE_INCLUDE_FILE
+#define TRACE_INCLUDE_FILE	dpaa_eth_trace
+#include <trace/define_trace.h>
diff --git a/drivers/net/ethernet/freescale/dpaa/dpaa_ethtool.c b/drivers/net/ethernet/freescale/dpaa/dpaa_ethtool.c
new file mode 100644
index 0000000..15571e2
--- /dev/null
+++ b/drivers/net/ethernet/freescale/dpaa/dpaa_ethtool.c
@@ -0,0 +1,417 @@
+/* Copyright 2008-2016 Freescale Semiconductor, Inc.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *     * Redistributions of source code must retain the above copyright
+ *	 notice, this list of conditions and the following disclaimer.
+ *     * Redistributions in binary form must reproduce the above copyright
+ *	 notice, this list of conditions and the following disclaimer in the
+ *	 documentation and/or other materials provided with the distribution.
+ *     * Neither the name of Freescale Semiconductor nor the
+ *	 names of its contributors may be used to endorse or promote products
+ *	 derived from this software without specific prior written permission.
+ *
+ *
+ * ALTERNATIVELY, this software may be distributed under the terms of the
+ * GNU General Public License ("GPL") as published by the Free Software
+ * Foundation, either version 2 of that License or (at your option) any
+ * later version.
+ *
+ * THIS SOFTWARE IS PROVIDED BY Freescale Semiconductor ``AS IS'' AND ANY
+ * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+ * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+ * DISCLAIMED. IN NO EVENT SHALL Freescale Semiconductor BE LIABLE FOR ANY
+ * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
+ * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+ * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+ * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+ * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
+
+#include <linux/string.h>
+
+#include "dpaa_eth.h"
+#include "mac.h"
+
+static const char dpaa_stats_percpu[][ETH_GSTRING_LEN] = {
+	"interrupts",
+	"rx packets",
+	"tx packets",
+	"tx confirm",
+	"tx S/G",
+	"tx error",
+	"rx error",
+};
+
+static char dpaa_stats_global[][ETH_GSTRING_LEN] = {
+	/* dpa rx errors */
+	"rx dma error",
+	"rx frame physical error",
+	"rx frame size error",
+	"rx header error",
+
+	/* demultiplexing errors */
+	"qman cg_tdrop",
+	"qman wred",
+	"qman error cond",
+	"qman early window",
+	"qman late window",
+	"qman fq tdrop",
+	"qman fq retired",
+	"qman orp disabled",
+
+	/* congestion related stats */
+	"congestion time (ms)",
+	"entered congestion",
+	"congested (0/1)"
+};
+
+#define DPAA_STATS_PERCPU_LEN ARRAY_SIZE(dpaa_stats_percpu)
+#define DPAA_STATS_GLOBAL_LEN ARRAY_SIZE(dpaa_stats_global)
+
+static int dpaa_get_link_ksettings(struct net_device *net_dev,
+				   struct ethtool_link_ksettings *cmd)
+{
+	int err;
+
+	if (!net_dev->phydev) {
+		netdev_dbg(net_dev, "phy device not initialized\n");
+		return 0;
+	}
+
+	err = phy_ethtool_ksettings_get(net_dev->phydev, cmd);
+
+	return err;
+}
+
+static int dpaa_set_link_ksettings(struct net_device *net_dev,
+				   const struct ethtool_link_ksettings *cmd)
+{
+	int err;
+
+	if (!net_dev->phydev) {
+		netdev_err(net_dev, "phy device not initialized\n");
+		return -ENODEV;
+	}
+
+	err = phy_ethtool_ksettings_set(net_dev->phydev, cmd);
+	if (err < 0)
+		netdev_err(net_dev, "phy_ethtool_ksettings_set() = %d\n", err);
+
+	return err;
+}
+
+static void dpaa_get_drvinfo(struct net_device *net_dev,
+			     struct ethtool_drvinfo *drvinfo)
+{
+	int len;
+
+	strlcpy(drvinfo->driver, KBUILD_MODNAME,
+		sizeof(drvinfo->driver));
+	len = snprintf(drvinfo->version, sizeof(drvinfo->version),
+		       "%X", 0);
+	len = snprintf(drvinfo->fw_version, sizeof(drvinfo->fw_version),
+		       "%X", 0);
+
+	if (len >= sizeof(drvinfo->fw_version)) {
+		/* Truncated output */
+		netdev_notice(net_dev, "snprintf() = %d\n", len);
+	}
+	strlcpy(drvinfo->bus_info, dev_name(net_dev->dev.parent->parent),
+		sizeof(drvinfo->bus_info));
+}
+
+static u32 dpaa_get_msglevel(struct net_device *net_dev)
+{
+	return ((struct dpaa_priv *)netdev_priv(net_dev))->msg_enable;
+}
+
+static void dpaa_set_msglevel(struct net_device *net_dev,
+			      u32 msg_enable)
+{
+	((struct dpaa_priv *)netdev_priv(net_dev))->msg_enable = msg_enable;
+}
+
+static int dpaa_nway_reset(struct net_device *net_dev)
+{
+	int err;
+
+	if (!net_dev->phydev) {
+		netdev_err(net_dev, "phy device not initialized\n");
+		return -ENODEV;
+	}
+
+	err = 0;
+	if (net_dev->phydev->autoneg) {
+		err = phy_start_aneg(net_dev->phydev);
+		if (err < 0)
+			netdev_err(net_dev, "phy_start_aneg() = %d\n",
+				   err);
+	}
+
+	return err;
+}
+
+static void dpaa_get_pauseparam(struct net_device *net_dev,
+				struct ethtool_pauseparam *epause)
+{
+	struct mac_device *mac_dev;
+	struct dpaa_priv *priv;
+
+	priv = netdev_priv(net_dev);
+	mac_dev = priv->mac_dev;
+
+	if (!net_dev->phydev) {
+		netdev_err(net_dev, "phy device not initialized\n");
+		return;
+	}
+
+	epause->autoneg = mac_dev->autoneg_pause;
+	epause->rx_pause = mac_dev->rx_pause_active;
+	epause->tx_pause = mac_dev->tx_pause_active;
+}
+
+static int dpaa_set_pauseparam(struct net_device *net_dev,
+			       struct ethtool_pauseparam *epause)
+{
+	struct mac_device *mac_dev;
+	struct phy_device *phydev;
+	bool rx_pause, tx_pause;
+	struct dpaa_priv *priv;
+	u32 newadv, oldadv;
+	int err;
+
+	priv = netdev_priv(net_dev);
+	mac_dev = priv->mac_dev;
+
+	phydev = net_dev->phydev;
+	if (!phydev) {
+		netdev_err(net_dev, "phy device not initialized\n");
+		return -ENODEV;
+	}
+
+	if (!(phydev->supported & SUPPORTED_Pause) ||
+	    (!(phydev->supported & SUPPORTED_Asym_Pause) &&
+	    (epause->rx_pause != epause->tx_pause)))
+		return -EINVAL;
+
+	/* The MAC should know how to handle PAUSE frame autonegotiation before
+	 * adjust_link is triggered by a forced renegotiation of sym/asym PAUSE
+	 * settings.
+	 */
+	mac_dev->autoneg_pause = !!epause->autoneg;
+	mac_dev->rx_pause_req = !!epause->rx_pause;
+	mac_dev->tx_pause_req = !!epause->tx_pause;
+
+	/* Determine the sym/asym advertised PAUSE capabilities from the desired
+	 * rx/tx pause settings.
+	 */
+	newadv = 0;
+	if (epause->rx_pause)
+		newadv = ADVERTISED_Pause | ADVERTISED_Asym_Pause;
+	if (epause->tx_pause)
+		newadv |= ADVERTISED_Asym_Pause;
+
+	oldadv = phydev->advertising &
+			(ADVERTISED_Pause | ADVERTISED_Asym_Pause);
+
+	/* If there are differences between the old and the new advertised
+	 * values, restart PHY autonegotiation and advertise the new values.
+	 */
+	if (oldadv != newadv) {
+		phydev->advertising &= ~(ADVERTISED_Pause
+				| ADVERTISED_Asym_Pause);
+		phydev->advertising |= newadv;
+		if (phydev->autoneg) {
+			err = phy_start_aneg(phydev);
+			if (err < 0)
+				netdev_err(net_dev, "phy_start_aneg() = %d\n",
+					   err);
+		}
+	}
+
+	fman_get_pause_cfg(mac_dev, &rx_pause, &tx_pause);
+	err = fman_set_mac_active_pause(mac_dev, rx_pause, tx_pause);
+	if (err < 0)
+		netdev_err(net_dev, "set_mac_active_pause() = %d\n", err);
+
+	return err;
+}
+
+static int dpaa_get_sset_count(struct net_device *net_dev, int type)
+{
+	unsigned int total_stats, num_stats;
+
+	num_stats   = num_online_cpus() + 1;
+	total_stats = num_stats * (DPAA_STATS_PERCPU_LEN + DPAA_BPS_NUM) +
+			DPAA_STATS_GLOBAL_LEN;
+
+	switch (type) {
+	case ETH_SS_STATS:
+		return total_stats;
+	default:
+		return -EOPNOTSUPP;
+	}
+}
+
+static void copy_stats(struct dpaa_percpu_priv *percpu_priv, int num_cpus,
+		       int crr_cpu, u64 *bp_count, u64 *data)
+{
+	int num_values = num_cpus + 1;
+	int crr = 0, j;
+
+	/* update current CPU's stats and also add them to the total values */
+	data[crr * num_values + crr_cpu] = percpu_priv->in_interrupt;
+	data[crr++ * num_values + num_cpus] += percpu_priv->in_interrupt;
+
+	data[crr * num_values + crr_cpu] = percpu_priv->stats.rx_packets;
+	data[crr++ * num_values + num_cpus] += percpu_priv->stats.rx_packets;
+
+	data[crr * num_values + crr_cpu] = percpu_priv->stats.tx_packets;
+	data[crr++ * num_values + num_cpus] += percpu_priv->stats.tx_packets;
+
+	data[crr * num_values + crr_cpu] = percpu_priv->tx_confirm;
+	data[crr++ * num_values + num_cpus] += percpu_priv->tx_confirm;
+
+	data[crr * num_values + crr_cpu] = percpu_priv->tx_frag_skbuffs;
+	data[crr++ * num_values + num_cpus] += percpu_priv->tx_frag_skbuffs;
+
+	data[crr * num_values + crr_cpu] = percpu_priv->stats.tx_errors;
+	data[crr++ * num_values + num_cpus] += percpu_priv->stats.tx_errors;
+
+	data[crr * num_values + crr_cpu] = percpu_priv->stats.rx_errors;
+	data[crr++ * num_values + num_cpus] += percpu_priv->stats.rx_errors;
+
+	for (j = 0; j < DPAA_BPS_NUM; j++) {
+		data[crr * num_values + crr_cpu] = bp_count[j];
+		data[crr++ * num_values + num_cpus] += bp_count[j];
+	}
+}
+
+static void dpaa_get_ethtool_stats(struct net_device *net_dev,
+				   struct ethtool_stats *stats, u64 *data)
+{
+	u64 bp_count[DPAA_BPS_NUM], cg_time, cg_num;
+	struct dpaa_percpu_priv *percpu_priv;
+	struct dpaa_rx_errors rx_errors;
+	unsigned int num_cpus, offset;
+	struct dpaa_ern_cnt ern_cnt;
+	struct dpaa_bp *dpaa_bp;
+	struct dpaa_priv *priv;
+	int total_stats, i, j;
+	bool cg_status;
+
+	total_stats = dpaa_get_sset_count(net_dev, ETH_SS_STATS);
+	priv     = netdev_priv(net_dev);
+	num_cpus = num_online_cpus();
+
+	memset(&bp_count, 0, sizeof(bp_count));
+	memset(&rx_errors, 0, sizeof(struct dpaa_rx_errors));
+	memset(&ern_cnt, 0, sizeof(struct dpaa_ern_cnt));
+	memset(data, 0, total_stats * sizeof(u64));
+
+	for_each_online_cpu(i) {
+		percpu_priv = per_cpu_ptr(priv->percpu_priv, i);
+		for (j = 0; j < DPAA_BPS_NUM; j++) {
+			dpaa_bp = priv->dpaa_bps[j];
+			if (!dpaa_bp->percpu_count)
+				continue;
+			bp_count[j] = *(per_cpu_ptr(dpaa_bp->percpu_count, i));
+		}
+		rx_errors.dme += percpu_priv->rx_errors.dme;
+		rx_errors.fpe += percpu_priv->rx_errors.fpe;
+		rx_errors.fse += percpu_priv->rx_errors.fse;
+		rx_errors.phe += percpu_priv->rx_errors.phe;
+
+		ern_cnt.cg_tdrop     += percpu_priv->ern_cnt.cg_tdrop;
+		ern_cnt.wred         += percpu_priv->ern_cnt.wred;
+		ern_cnt.err_cond     += percpu_priv->ern_cnt.err_cond;
+		ern_cnt.early_window += percpu_priv->ern_cnt.early_window;
+		ern_cnt.late_window  += percpu_priv->ern_cnt.late_window;
+		ern_cnt.fq_tdrop     += percpu_priv->ern_cnt.fq_tdrop;
+		ern_cnt.fq_retired   += percpu_priv->ern_cnt.fq_retired;
+		ern_cnt.orp_zero     += percpu_priv->ern_cnt.orp_zero;
+
+		copy_stats(percpu_priv, num_cpus, i, bp_count, data);
+	}
+
+	offset = (num_cpus + 1) * (DPAA_STATS_PERCPU_LEN + DPAA_BPS_NUM);
+	memcpy(data + offset, &rx_errors, sizeof(struct dpaa_rx_errors));
+
+	offset += sizeof(struct dpaa_rx_errors) / sizeof(u64);
+	memcpy(data + offset, &ern_cnt, sizeof(struct dpaa_ern_cnt));
+
+	/* gather congestion related counters */
+	cg_num    = 0;
+	cg_status = 0;
+	cg_time   = jiffies_to_msecs(priv->cgr_data.congested_jiffies);
+	if (qman_query_cgr_congested(&priv->cgr_data.cgr, &cg_status) == 0) {
+		cg_num    = priv->cgr_data.cgr_congested_count;
+
+		/* reset congestion stats (like QMan API does */
+		priv->cgr_data.congested_jiffies   = 0;
+		priv->cgr_data.cgr_congested_count = 0;
+	}
+
+	offset += sizeof(struct dpaa_ern_cnt) / sizeof(u64);
+	data[offset++] = cg_time;
+	data[offset++] = cg_num;
+	data[offset++] = cg_status;
+}
+
+static void dpaa_get_strings(struct net_device *net_dev, u32 stringset,
+			     u8 *data)
+{
+	unsigned int i, j, num_cpus, size;
+	char string_cpu[ETH_GSTRING_LEN];
+	u8 *strings;
+
+	memset(string_cpu, 0, sizeof(string_cpu));
+	strings   = data;
+	num_cpus  = num_online_cpus();
+	size      = DPAA_STATS_GLOBAL_LEN * ETH_GSTRING_LEN;
+
+	for (i = 0; i < DPAA_STATS_PERCPU_LEN; i++) {
+		for (j = 0; j < num_cpus; j++) {
+			snprintf(string_cpu, ETH_GSTRING_LEN, "%s [CPU %d]",
+				 dpaa_stats_percpu[i], j);
+			memcpy(strings, string_cpu, ETH_GSTRING_LEN);
+			strings += ETH_GSTRING_LEN;
+		}
+		snprintf(string_cpu, ETH_GSTRING_LEN, "%s [TOTAL]",
+			 dpaa_stats_percpu[i]);
+		memcpy(strings, string_cpu, ETH_GSTRING_LEN);
+		strings += ETH_GSTRING_LEN;
+	}
+	for (i = 0; i < DPAA_BPS_NUM; i++) {
+		for (j = 0; j < num_cpus; j++) {
+			snprintf(string_cpu, ETH_GSTRING_LEN,
+				 "bpool %c [CPU %d]", 'a' + i, j);
+			memcpy(strings, string_cpu, ETH_GSTRING_LEN);
+			strings += ETH_GSTRING_LEN;
+		}
+		snprintf(string_cpu, ETH_GSTRING_LEN, "bpool %c [TOTAL]",
+			 'a' + i);
+		memcpy(strings, string_cpu, ETH_GSTRING_LEN);
+		strings += ETH_GSTRING_LEN;
+	}
+	memcpy(strings, dpaa_stats_global, size);
+}
+
+const struct ethtool_ops dpaa_ethtool_ops = {
+	.get_drvinfo = dpaa_get_drvinfo,
+	.get_msglevel = dpaa_get_msglevel,
+	.set_msglevel = dpaa_set_msglevel,
+	.nway_reset = dpaa_nway_reset,
+	.get_pauseparam = dpaa_get_pauseparam,
+	.set_pauseparam = dpaa_set_pauseparam,
+	.get_link = ethtool_op_get_link,
+	.get_sset_count = dpaa_get_sset_count,
+	.get_ethtool_stats = dpaa_get_ethtool_stats,
+	.get_strings = dpaa_get_strings,
+	.get_link_ksettings = dpaa_get_link_ksettings,
+	.set_link_ksettings = dpaa_set_link_ksettings,
+};
diff --git a/drivers/net/ethernet/freescale/fman/Kconfig b/drivers/net/ethernet/freescale/fman/Kconfig
index 79b7c84..dc0850b 100644
--- a/drivers/net/ethernet/freescale/fman/Kconfig
+++ b/drivers/net/ethernet/freescale/fman/Kconfig
@@ -1,6 +1,6 @@
 config FSL_FMAN
 	tristate "FMan support"
-	depends on FSL_SOC || COMPILE_TEST
+	depends on FSL_SOC || ARCH_LAYERSCAPE || COMPILE_TEST
 	select GENERIC_ALLOCATOR
 	select PHYLIB
 	default n
diff --git a/drivers/net/ethernet/freescale/fman/fman.c b/drivers/net/ethernet/freescale/fman/fman.c
index dafd9e1..4aefe24 100644
--- a/drivers/net/ethernet/freescale/fman/fman.c
+++ b/drivers/net/ethernet/freescale/fman/fman.c
@@ -59,6 +59,7 @@
 #define DMA_OFFSET		0x000C2000
 #define FPM_OFFSET		0x000C3000
 #define IMEM_OFFSET		0x000C4000
+#define HWP_OFFSET		0x000C7000
 #define CGP_OFFSET		0x000DB000
 
 /* Exceptions bit map */
@@ -218,6 +219,9 @@
 
 #define QMI_GS_HALT_NOT_BUSY		0x00000002
 
+/* HWP defines */
+#define HWP_RPIMAC_PEN			0x00000001
+
 /* IRAM defines */
 #define IRAM_IADD_AIE			0x80000000
 #define IRAM_READY			0x80000000
@@ -475,6 +479,12 @@ struct fman_dma_regs {
 	u32 res00e0[0x400 - 56];
 };
 
+struct fman_hwp_regs {
+	u32 res0000[0x844 / 4];		/* 0x000..0x843 */
+	u32 fmprrpimac;	/* FM Parser Internal memory access control */
+	u32 res[(0x1000 - 0x848) / 4];	/* 0x848..0xFFF */
+};
+
 /* Structure that holds current FMan state.
  * Used for saving run time information.
  */
@@ -606,6 +616,7 @@ struct fman {
 	struct fman_bmi_regs __iomem *bmi_regs;
 	struct fman_qmi_regs __iomem *qmi_regs;
 	struct fman_dma_regs __iomem *dma_regs;
+	struct fman_hwp_regs __iomem *hwp_regs;
 	fman_exceptions_cb *exception_cb;
 	fman_bus_error_cb *bus_error_cb;
 	/* Spinlock for FMan use */
@@ -999,6 +1010,12 @@ static void qmi_init(struct fman_qmi_regs __iomem *qmi_rg,
 	iowrite32be(tmp_reg, &qmi_rg->fmqm_ien);
 }
 
+static void hwp_init(struct fman_hwp_regs __iomem *hwp_rg)
+{
+	/* enable HW Parser */
+	iowrite32be(HWP_RPIMAC_PEN, &hwp_rg->fmprrpimac);
+}
+
 static int enable(struct fman *fman, struct fman_cfg *cfg)
 {
 	u32 cfg_reg = 0;
@@ -1195,7 +1212,7 @@ static int fill_soc_specific_params(struct fman_state_struct *state)
 		state->max_num_of_open_dmas	= 32;
 		state->fm_port_num_of_cg	= 256;
 		state->num_of_rx_ports	= 6;
-		state->total_fifo_size	= 122 * 1024;
+		state->total_fifo_size	= 136 * 1024;
 		break;
 
 	case 2:
@@ -1793,6 +1810,7 @@ static int fman_config(struct fman *fman)
 	fman->bmi_regs = base_addr + BMI_OFFSET;
 	fman->qmi_regs = base_addr + QMI_OFFSET;
 	fman->dma_regs = base_addr + DMA_OFFSET;
+	fman->hwp_regs = base_addr + HWP_OFFSET;
 	fman->base_addr = base_addr;
 
 	spin_lock_init(&fman->spinlock);
@@ -1890,6 +1908,7 @@ static int fman_reset(struct fman *fman)
 
 		goto _return;
 	} else {
+#ifdef CONFIG_PPC
 		struct device_node *guts_node;
 		struct ccsr_guts __iomem *guts_regs;
 		u32 devdisr2, reg;
@@ -1921,6 +1940,7 @@ static int fman_reset(struct fman *fman)
 
 		/* Enable all MACs */
 		iowrite32be(reg, &guts_regs->devdisr2);
+#endif
 
 		/* Perform FMan reset */
 		iowrite32be(FPM_RSTC_FM_RESET, &fman->fpm_regs->fm_rstc);
@@ -1932,25 +1952,31 @@ static int fman_reset(struct fman *fman)
 		} while (((ioread32be(&fman->fpm_regs->fm_rstc)) &
 			 FPM_RSTC_FM_RESET) && --count);
 		if (count == 0) {
+#ifdef CONFIG_PPC
 			iounmap(guts_regs);
 			of_node_put(guts_node);
+#endif
 			err = -EBUSY;
 			goto _return;
 		}
+#ifdef CONFIG_PPC
 
 		/* Restore devdisr2 value */
 		iowrite32be(devdisr2, &guts_regs->devdisr2);
 
 		iounmap(guts_regs);
 		of_node_put(guts_node);
+#endif
 
 		goto _return;
 
+#ifdef CONFIG_PPC
 guts_regs:
 		of_node_put(guts_node);
 guts_node:
 		dev_dbg(fman->dev, "%s: Didn't perform FManV3 reset due to Errata A007273!\n",
 			__func__);
+#endif
 	}
 _return:
 	return err;
@@ -2054,6 +2080,9 @@ static int fman_init(struct fman *fman)
 	/* Init QMI Registers */
 	qmi_init(fman->qmi_regs, fman->cfg);
 
+	/* Init HW Parser */
+	hwp_init(fman->hwp_regs);
+
 	err = enable(fman, cfg);
 	if (err != 0)
 		return err;
@@ -2868,6 +2897,13 @@ static struct fman *read_dts_node(struct platform_device *of_dev)
 
 	fman->dev = &of_dev->dev;
 
+	err = of_platform_populate(fm_node, NULL, NULL, &of_dev->dev);
+	if (err) {
+		dev_err(&of_dev->dev, "%s: of_platform_populate() failed\n",
+			__func__);
+		goto fman_free;
+	}
+
 	return fman;
 
 fman_node_put:
diff --git a/drivers/net/ethernet/freescale/fman/fman.h b/drivers/net/ethernet/freescale/fman/fman.h
index 57aae8d..f53e147 100644
--- a/drivers/net/ethernet/freescale/fman/fman.h
+++ b/drivers/net/ethernet/freescale/fman/fman.h
@@ -134,14 +134,14 @@ enum fman_exceptions {
 struct fman_prs_result {
 	u8 lpid;		/* Logical port id */
 	u8 shimr;		/* Shim header result  */
-	u16 l2r;		/* Layer 2 result */
-	u16 l3r;		/* Layer 3 result */
+	__be16 l2r;		/* Layer 2 result */
+	__be16 l3r;		/* Layer 3 result */
 	u8 l4r;		/* Layer 4 result */
 	u8 cplan;		/* Classification plan id */
-	u16 nxthdr;		/* Next Header  */
-	u16 cksum;		/* Running-sum */
+	__be16 nxthdr;		/* Next Header  */
+	__be16 cksum;		/* Running-sum */
 	/* Flags&fragment-offset field of the last IP-header */
-	u16 flags_frag_off;
+	__be16 flags_frag_off;
 	/* Routing type field of a IPV6 routing extension header */
 	u8 route_type;
 	/* Routing Extension Header Present; last bit is IP valid */
diff --git a/drivers/net/ethernet/freescale/fman/fman_dtsec.c b/drivers/net/ethernet/freescale/fman/fman_dtsec.c
index c88918c..98bba10 100644
--- a/drivers/net/ethernet/freescale/fman/fman_dtsec.c
+++ b/drivers/net/ethernet/freescale/fman/fman_dtsec.c
@@ -337,7 +337,7 @@ struct fman_mac {
 	u8 mac_id;
 	u32 exceptions;
 	bool ptp_tsu_enabled;
-	bool en_tsu_err_exeption;
+	bool en_tsu_err_exception;
 	struct dtsec_cfg *dtsec_drv_param;
 	void *fm;
 	struct fman_rev_info fm_rev_info;
@@ -381,6 +381,9 @@ static int init(struct dtsec_regs __iomem *regs, struct dtsec_cfg *cfg,
 
 	/* check RGMII support */
 	if (iface == PHY_INTERFACE_MODE_RGMII ||
+	    iface == PHY_INTERFACE_MODE_RGMII_ID ||
+	    iface == PHY_INTERFACE_MODE_RGMII_RXID ||
+	    iface == PHY_INTERFACE_MODE_RGMII_TXID ||
 	    iface == PHY_INTERFACE_MODE_RMII)
 		if (tmp & DTSEC_ID2_INT_REDUCED_OFF)
 			return -EINVAL;
@@ -390,7 +393,10 @@ static int init(struct dtsec_regs __iomem *regs, struct dtsec_cfg *cfg,
 		if (tmp & DTSEC_ID2_INT_REDUCED_OFF)
 			return -EINVAL;
 
-	is_rgmii = iface == PHY_INTERFACE_MODE_RGMII;
+	is_rgmii = iface == PHY_INTERFACE_MODE_RGMII ||
+		   iface == PHY_INTERFACE_MODE_RGMII_ID ||
+		   iface == PHY_INTERFACE_MODE_RGMII_RXID ||
+		   iface == PHY_INTERFACE_MODE_RGMII_TXID;
 	is_sgmii = iface == PHY_INTERFACE_MODE_SGMII;
 	is_qsgmii = iface == PHY_INTERFACE_MODE_QSGMII;
 
@@ -1247,12 +1253,12 @@ int dtsec_set_exception(struct fman_mac *dtsec,
 		switch (exception) {
 		case FM_MAC_EX_1G_1588_TS_RX_ERR:
 			if (enable) {
-				dtsec->en_tsu_err_exeption = true;
+				dtsec->en_tsu_err_exception = true;
 				iowrite32be(ioread32be(&regs->tmr_pemask) |
 					    TMR_PEMASK_TSREEN,
 					    &regs->tmr_pemask);
 			} else {
-				dtsec->en_tsu_err_exeption = false;
+				dtsec->en_tsu_err_exception = false;
 				iowrite32be(ioread32be(&regs->tmr_pemask) &
 					    ~TMR_PEMASK_TSREEN,
 					    &regs->tmr_pemask);
@@ -1420,7 +1426,7 @@ struct fman_mac *dtsec_config(struct fman_mac_params *params)
 	dtsec->event_cb = params->event_cb;
 	dtsec->dev_id = params->dev_id;
 	dtsec->ptp_tsu_enabled = dtsec->dtsec_drv_param->ptp_tsu_en;
-	dtsec->en_tsu_err_exeption = dtsec->dtsec_drv_param->ptp_exception_en;
+	dtsec->en_tsu_err_exception = dtsec->dtsec_drv_param->ptp_exception_en;
 
 	dtsec->fm = params->fm;
 	dtsec->basex_if = params->basex_if;
diff --git a/drivers/net/ethernet/freescale/fman/fman_memac.c b/drivers/net/ethernet/freescale/fman/fman_memac.c
index 71a5ded..c029688 100644
--- a/drivers/net/ethernet/freescale/fman/fman_memac.c
+++ b/drivers/net/ethernet/freescale/fman/fman_memac.c
@@ -38,6 +38,7 @@
 #include <linux/slab.h>
 #include <linux/io.h>
 #include <linux/phy.h>
+#include <linux/phy_fixed.h>
 #include <linux/of_mdio.h>
 
 /* PCS registers */
@@ -442,7 +443,10 @@ static int init(struct memac_regs __iomem *regs, struct memac_cfg *cfg,
 		break;
 	default:
 		tmp |= IF_MODE_GMII;
-		if (phy_if == PHY_INTERFACE_MODE_RGMII)
+		if (phy_if == PHY_INTERFACE_MODE_RGMII ||
+		    phy_if == PHY_INTERFACE_MODE_RGMII_ID ||
+		    phy_if == PHY_INTERFACE_MODE_RGMII_RXID ||
+		    phy_if == PHY_INTERFACE_MODE_RGMII_TXID)
 			tmp |= IF_MODE_RGMII | IF_MODE_RGMII_AUTO;
 	}
 	iowrite32be(tmp, &regs->if_mode);
diff --git a/drivers/net/ethernet/freescale/fman/fman_port.c b/drivers/net/ethernet/freescale/fman/fman_port.c
index 9f3bb50..57bf44f 100644
--- a/drivers/net/ethernet/freescale/fman/fman_port.c
+++ b/drivers/net/ethernet/freescale/fman/fman_port.c
@@ -62,6 +62,7 @@
 
 #define BMI_PORT_REGS_OFFSET				0
 #define QMI_PORT_REGS_OFFSET				0x400
+#define HWP_PORT_REGS_OFFSET				0x800
 
 /* Default values */
 #define DFLT_PORT_BUFFER_PREFIX_CONTEXT_DATA_ALIGN		\
@@ -182,7 +183,7 @@
 #define NIA_ENG_BMI					0x00500000
 #define NIA_ENG_QMI_ENQ					0x00540000
 #define NIA_ENG_QMI_DEQ					0x00580000
-
+#define NIA_ENG_HWP					0x00440000
 #define NIA_BMI_AC_ENQ_FRAME				0x00000002
 #define NIA_BMI_AC_TX_RELEASE				0x000002C0
 #define NIA_BMI_AC_RELEASE				0x000000C0
@@ -317,6 +318,19 @@ struct fman_port_qmi_regs {
 	u32 fmqm_pndcc;		/* PortID n Dequeue Confirm Counter */
 };
 
+#define HWP_HXS_COUNT 16
+#define HWP_HXS_PHE_REPORT 0x00000800
+#define HWP_HXS_PCAC_PSTAT 0x00000100
+#define HWP_HXS_PCAC_PSTOP 0x00000001
+struct fman_port_hwp_regs {
+	struct {
+		u32 ssa; /* Soft Sequence Attachment */
+		u32 lcv; /* Line-up Enable Confirmation Mask */
+	} pmda[HWP_HXS_COUNT]; /* Parse Memory Direct Access Registers */
+	u32 reserved080[(0x3f8 - 0x080) / 4]; /* (0x080-0x3f7) */
+	u32 fmpr_pcac; /* Configuration Access Control */
+};
+
 /* QMI dequeue prefetch modes */
 enum fman_port_deq_prefetch {
 	FMAN_PORT_DEQ_NO_PREFETCH, /* No prefetch mode */
@@ -436,6 +450,7 @@ struct fman_port {
 
 	union fman_port_bmi_regs __iomem *bmi_regs;
 	struct fman_port_qmi_regs __iomem *qmi_regs;
+	struct fman_port_hwp_regs __iomem *hwp_regs;
 
 	struct fman_sp_buffer_offsets buffer_offsets;
 
@@ -521,9 +536,12 @@ static int init_bmi_rx(struct fman_port *port)
 	/* NIA */
 	tmp = (u32)cfg->rx_fd_bits << BMI_NEXT_ENG_FD_BITS_SHIFT;
 
-	tmp |= NIA_ENG_BMI | NIA_BMI_AC_ENQ_FRAME;
+	tmp |= NIA_ENG_HWP;
 	iowrite32be(tmp, &regs->fmbm_rfne);
 
+	/* Parser Next Engine NIA */
+	iowrite32be(NIA_ENG_BMI | NIA_BMI_AC_ENQ_FRAME, &regs->fmbm_rfpne);
+
 	/* Enqueue NIA */
 	iowrite32be(NIA_ENG_QMI_ENQ | NIA_ORDER_RESTOR, &regs->fmbm_rfene);
 
@@ -665,6 +683,50 @@ static int init_qmi(struct fman_port *port)
 	return 0;
 }
 
+static void stop_port_hwp(struct fman_port *port)
+{
+	struct fman_port_hwp_regs __iomem *regs = port->hwp_regs;
+	int cnt = 100;
+
+	iowrite32be(HWP_HXS_PCAC_PSTOP, &regs->fmpr_pcac);
+
+	while (cnt-- > 0 &&
+	       (ioread32be(&regs->fmpr_pcac) & HWP_HXS_PCAC_PSTAT))
+		udelay(10);
+	if (!cnt)
+		pr_err("Timeout stopping HW Parser\n");
+}
+
+static void start_port_hwp(struct fman_port *port)
+{
+	struct fman_port_hwp_regs __iomem *regs = port->hwp_regs;
+	int cnt = 100;
+
+	iowrite32be(0, &regs->fmpr_pcac);
+
+	while (cnt-- > 0 &&
+	       !(ioread32be(&regs->fmpr_pcac) & HWP_HXS_PCAC_PSTAT))
+		udelay(10);
+	if (!cnt)
+		pr_err("Timeout starting HW Parser\n");
+}
+
+static void init_hwp(struct fman_port *port)
+{
+	struct fman_port_hwp_regs __iomem *regs = port->hwp_regs;
+	int i;
+
+	stop_port_hwp(port);
+
+	for (i = 0; i < HWP_HXS_COUNT; i++) {
+		/* enable HXS error reporting into FD[STATUS] PHE */
+		iowrite32be(0x00000000, &regs->pmda[i].ssa);
+		iowrite32be(0xffffffff, &regs->pmda[i].lcv);
+	}
+
+	start_port_hwp(port);
+}
+
 static int init(struct fman_port *port)
 {
 	int err;
@@ -673,6 +735,8 @@ static int init(struct fman_port *port)
 	switch (port->port_type) {
 	case FMAN_PORT_TYPE_RX:
 		err = init_bmi_rx(port);
+		if (!err)
+			init_hwp(port);
 		break;
 	case FMAN_PORT_TYPE_TX:
 		err = init_bmi_tx(port);
@@ -686,7 +750,8 @@ static int init(struct fman_port *port)
 
 	/* Init QMI registers */
 	err = init_qmi(port);
-	return err;
+	if (err)
+		return err;
 
 	return 0;
 }
@@ -1247,7 +1312,7 @@ int fman_port_config(struct fman_port *port, struct fman_port_params *params)
 	/* Allocate the FM driver's parameters structure */
 	port->cfg = kzalloc(sizeof(*port->cfg), GFP_KERNEL);
 	if (!port->cfg)
-		goto err_params;
+		return -EINVAL;
 
 	/* Initialize FM port parameters which will be kept by the driver */
 	port->port_type = port->dts_params.type;
@@ -1276,6 +1341,7 @@ int fman_port_config(struct fman_port *port, struct fman_port_params *params)
 	/* set memory map pointers */
 	port->bmi_regs = base_addr + BMI_PORT_REGS_OFFSET;
 	port->qmi_regs = base_addr + QMI_PORT_REGS_OFFSET;
+	port->hwp_regs = base_addr + HWP_PORT_REGS_OFFSET;
 
 	port->max_frame_length = DFLT_PORT_MAX_FRAME_LENGTH;
 	/* resource distribution. */
@@ -1327,8 +1393,6 @@ int fman_port_config(struct fman_port *port, struct fman_port_params *params)
 
 err_port_cfg:
 	kfree(port->cfg);
-err_params:
-	kfree(port);
 	return -EINVAL;
 }
 EXPORT_SYMBOL(fman_port_config);
diff --git a/drivers/net/ethernet/freescale/fman/mac.c b/drivers/net/ethernet/freescale/fman/mac.c
index 81021f8..66d73d0 100644
--- a/drivers/net/ethernet/freescale/fman/mac.c
+++ b/drivers/net/ethernet/freescale/fman/mac.c
@@ -594,6 +594,7 @@ static const u16 phy2speed[] = {
 	[PHY_INTERFACE_MODE_RGMII_RXID]	= SPEED_1000,
 	[PHY_INTERFACE_MODE_RGMII_TXID]	= SPEED_1000,
 	[PHY_INTERFACE_MODE_RTBI]		= SPEED_1000,
+	[PHY_INTERFACE_MODE_QSGMII]		= SPEED_1000,
 	[PHY_INTERFACE_MODE_XGMII]		= SPEED_10000
 };
 
diff --git a/drivers/net/ethernet/freescale/sdk_dpaa/Kconfig b/drivers/net/ethernet/freescale/sdk_dpaa/Kconfig
index 92118b7..b21d236 100644
--- a/drivers/net/ethernet/freescale/sdk_dpaa/Kconfig
+++ b/drivers/net/ethernet/freescale/sdk_dpaa/Kconfig
@@ -20,6 +20,29 @@ config FSL_DPAA_CEETM
 	help
 	Enable QoS offloading support through the CEETM hardware block.
 
+config FSL_DPAA_CEETM_CCS_THRESHOLD_1G
+	hex "CEETM egress congestion threshold on 1G ports"
+	depends on FSL_DPAA_CEETM
+	range 0x1000 0x10000000
+	default "0x000a0000"
+	help
+	  The size in bytes of the CEETM egress Class Congestion State threshold on 1G ports.
+	  The threshold needs to be configured keeping in mind the following factors:
+	       - A threshold too large will buffer frames for a long time in the TX queues,
+	       when a small shaping rate is configured. This will cause buffer pool depletion
+	       or out of memory errors. This in turn will cause frame loss on RX;
+	       - A threshold too small will cause unnecessary frame loss by entering
+	       congestion too often.
+
+config FSL_DPAA_CEETM_CCS_THRESHOLD_10G
+	hex "CEETM egress congestion threshold on 10G ports"
+	depends on FSL_DPAA_CEETM
+	range 0x1000 0x20000000
+	default "0x00640000"
+	help
+	  The size in bytes of the CEETM egress Class Congestion State threshold on 10G ports.
+	  See FSL_DPAA_CEETM_CCS_THRESHOLD_1G for details.
+
 config FSL_DPAA_OFFLINE_PORTS
 	bool "Offline Ports support"
 	depends on FSL_SDK_DPAA_ETH
diff --git a/drivers/net/ethernet/freescale/sdk_dpaa/dpaa_eth.c b/drivers/net/ethernet/freescale/sdk_dpaa/dpaa_eth.c
index 7026f91..1f76aa3 100644
--- a/drivers/net/ethernet/freescale/sdk_dpaa/dpaa_eth.c
+++ b/drivers/net/ethernet/freescale/sdk_dpaa/dpaa_eth.c
@@ -775,6 +775,17 @@ static int dpa_private_netdev_init(struct net_device *net_dev)
 	/* Advertise NETIF_F_HW_ACCEL_MQ to avoid Tx timeout warnings */
 	net_dev->features |= NETIF_F_HW_ACCEL_MQ;
 
+#ifndef CONFIG_PPC
+	/* Due to the A010022 FMan errata, we can not use contig frames larger
+	 * than 4K, nor S/G frames. We need to stop advertising S/G and GSO
+	 * support.
+	 */
+	if (unlikely(dpaa_errata_a010022)) {
+		net_dev->hw_features &= ~NETIF_F_SG;
+		net_dev->features &= ~NETIF_F_GSO;
+	}
+#endif
+
 	return dpa_netdev_init(net_dev, mac_addr, tx_timeout);
 }
 
@@ -863,7 +874,7 @@ static int dpa_priv_bp_create(struct net_device *net_dev, struct dpa_bp *dpa_bp,
 
 	for (i = 0; i < count; i++) {
 		int err;
-		err = dpa_bp_alloc(&dpa_bp[i]);
+		err = dpa_bp_alloc(&dpa_bp[i], net_dev->dev.parent);
 		if (err < 0) {
 			dpa_bp_free(priv);
 			priv->dpa_bp = NULL;
diff --git a/drivers/net/ethernet/freescale/sdk_dpaa/dpaa_eth.h b/drivers/net/ethernet/freescale/sdk_dpaa/dpaa_eth.h
index b1703bc..57c9bef 100644
--- a/drivers/net/ethernet/freescale/sdk_dpaa/dpaa_eth.h
+++ b/drivers/net/ethernet/freescale/sdk_dpaa/dpaa_eth.h
@@ -514,17 +514,6 @@ dpa_fd_offset(const struct qm_fd *fd)
 	return fd->offset;
 }
 
-/* Verifies if the skb length is below the interface MTU */
-static inline int dpa_check_rx_mtu(struct sk_buff *skb, int mtu)
-{
-	if (unlikely(skb->len > mtu))
-		if ((skb->protocol != htons(ETH_P_8021Q))
-				|| (skb->len > mtu + 4))
-			return -1;
-
-	return 0;
-}
-
 static inline uint16_t dpa_get_headroom(struct dpa_buffer_layout_s *bl)
 {
 	uint16_t headroom;
diff --git a/drivers/net/ethernet/freescale/sdk_dpaa/dpaa_eth_base.c b/drivers/net/ethernet/freescale/sdk_dpaa/dpaa_eth_base.c
index 507e77c..be6e4f8 100644
--- a/drivers/net/ethernet/freescale/sdk_dpaa/dpaa_eth_base.c
+++ b/drivers/net/ethernet/freescale/sdk_dpaa/dpaa_eth_base.c
@@ -176,7 +176,7 @@ int dpa_bp_create(struct net_device *net_dev, struct dpa_bp *dpa_bp,
 
 	for (i = 0; i < count; i++) {
 		int err;
-		err = dpa_bp_alloc(&dpa_bp[i]);
+		err = dpa_bp_alloc(&dpa_bp[i], net_dev->dev.parent);
 		if (err < 0) {
 			dpa_bp_free(priv);
 			priv->dpa_bp = NULL;
diff --git a/drivers/net/ethernet/freescale/sdk_dpaa/dpaa_eth_ceetm.c b/drivers/net/ethernet/freescale/sdk_dpaa/dpaa_eth_ceetm.c
index cac613b..b090db1 100644
--- a/drivers/net/ethernet/freescale/sdk_dpaa/dpaa_eth_ceetm.c
+++ b/drivers/net/ethernet/freescale/sdk_dpaa/dpaa_eth_ceetm.c
@@ -69,24 +69,27 @@ static void get_dcp_and_sp(struct net_device *dev, enum qm_dc_portal *dcp_id,
 static void ceetm_ern(struct qman_portal *portal, struct qman_fq *fq,
 		      const struct qm_mr_entry *msg)
 {
-	struct net_device *net_dev;
-	struct ceetm_class *cls;
+	struct dpa_percpu_priv_s *dpa_percpu_priv;
 	struct ceetm_class_stats *cstats = NULL;
 	const struct dpa_priv_s *dpa_priv;
-	struct dpa_percpu_priv_s *dpa_percpu_priv;
-	struct sk_buff *skb;
 	struct qm_fd fd = msg->ern.fd;
+	struct net_device *net_dev;
+	struct ceetm_fq *ceetm_fq;
+	struct ceetm_class *cls;
+	struct sk_buff *skb;
 
-	net_dev = ((struct ceetm_fq *)fq)->net_dev;
+	ceetm_fq = container_of(fq, struct ceetm_fq, fq);
+	net_dev = ceetm_fq->net_dev;
 	dpa_priv = netdev_priv(net_dev);
 	dpa_percpu_priv = raw_cpu_ptr(dpa_priv->percpu_priv);
 
 	/* Increment DPA counters */
 	dpa_percpu_priv->stats.tx_dropped++;
 	dpa_percpu_priv->stats.tx_fifo_errors++;
+	count_ern(dpa_percpu_priv, msg);
 
 	/* Increment CEETM counters */
-	cls = ((struct ceetm_fq *)fq)->ceetm_cls;
+	cls = ceetm_fq->ceetm_cls;
 	switch (cls->type) {
 	case CEETM_PRIO:
 		cstats = this_cpu_ptr(cls->prio.cstats);
@@ -99,11 +102,15 @@ static void ceetm_ern(struct qman_portal *portal, struct qman_fq *fq,
 	if (cstats)
 		cstats->ern_drop_count++;
 
+	/* Release the buffers that were supposed to be recycled. */
 	if (fd.bpid != 0xff) {
 		dpa_fd_release(net_dev, &fd);
 		return;
 	}
 
+	/* Release the frames that were supposed to return on the
+	 * confirmation path.
+	 */
 	skb = _dpa_cleanup_tx_fd(dpa_priv, &fd);
 	dev_kfree_skb_any(skb);
 }
@@ -125,16 +132,16 @@ static void ceetm_cscn(struct qm_ceetm_ccg *ccg, void *cb_ctx, int congested)
 		break;
 	}
 
+	ceetm_fq->congested = congested;
+
 	if (congested) {
 		dpa_priv->cgr_data.congestion_start_jiffies = jiffies;
-		netif_tx_stop_all_queues(dpa_priv->net_dev);
 		dpa_priv->cgr_data.cgr_congested_count++;
 		if (cstats)
 			cstats->congested_count++;
 	} else {
 		dpa_priv->cgr_data.congested_jiffies +=
 			(jiffies - dpa_priv->cgr_data.congestion_start_jiffies);
-		netif_tx_wake_all_queues(dpa_priv->net_dev);
 	}
 }
 
@@ -148,6 +155,7 @@ static int ceetm_alloc_fq(struct ceetm_fq **fq, struct net_device *dev,
 
 	(*fq)->net_dev = dev;
 	(*fq)->ceetm_cls = cls;
+	(*fq)->congested = 0;
 	return 0;
 }
 
@@ -185,9 +193,9 @@ static int ceetm_config_ccg(struct qm_ceetm_ccg **ccg,
 
 	/* Set the congestion state thresholds according to the link speed */
 	if (dpa_priv->mac_dev->if_support & SUPPORTED_10000baseT_Full)
-		cs_th = CONFIG_FSL_DPAA_CS_THRESHOLD_10G;
+		cs_th = CONFIG_FSL_DPAA_CEETM_CCS_THRESHOLD_10G;
 	else
-		cs_th = CONFIG_FSL_DPAA_CS_THRESHOLD_1G;
+		cs_th = CONFIG_FSL_DPAA_CEETM_CCS_THRESHOLD_1G;
 
 	qm_cgr_cs_thres_set64(&ccg_params.cs_thres_in, cs_th, 1);
 	qm_cgr_cs_thres_set64(&ccg_params.cs_thres_out,
@@ -1908,17 +1916,22 @@ static struct ceetm_class *ceetm_classify(struct sk_buff *skb,
 
 int __hot ceetm_tx(struct sk_buff *skb, struct net_device *net_dev)
 {
-	int ret;
-	bool act_drop = false;
+	const int queue_mapping = dpa_get_queue_mapping(skb);
 	struct Qdisc *sch = net_dev->qdisc;
-	struct ceetm_class *cl;
-	struct dpa_priv_s *priv_dpa;
-	struct qman_fq *egress_fq, *conf_fq;
-	struct ceetm_qdisc *priv = qdisc_priv(sch);
-	struct ceetm_qdisc_stats *qstats = this_cpu_ptr(priv->root.qstats);
 	struct ceetm_class_stats *cstats;
-	const int queue_mapping = dpa_get_queue_mapping(skb);
-	spinlock_t *root_lock = qdisc_lock(sch);
+	struct ceetm_qdisc_stats *qstats;
+	struct dpa_priv_s *priv_dpa;
+	struct ceetm_fq *ceetm_fq;
+	struct ceetm_qdisc *priv;
+	struct qman_fq *conf_fq;
+	struct ceetm_class *cl;
+	spinlock_t *root_lock;
+	bool act_drop = false;
+	int ret;
+
+	root_lock = qdisc_lock(sch);
+	priv = qdisc_priv(sch);
+	qstats = this_cpu_ptr(priv->root.qstats);
 
 	spin_lock(root_lock);
 	cl = ceetm_classify(skb, sch, &ret, &act_drop);
@@ -1945,11 +1958,11 @@ int __hot ceetm_tx(struct sk_buff *skb, struct net_device *net_dev)
 	 */
 	switch (cl->type) {
 	case CEETM_PRIO:
-		egress_fq = &cl->prio.fq->fq;
+		ceetm_fq = cl->prio.fq;
 		cstats = this_cpu_ptr(cl->prio.cstats);
 		break;
 	case CEETM_WBFS:
-		egress_fq = &cl->wbfs.fq->fq;
+		ceetm_fq = cl->wbfs.fq;
 		cstats = this_cpu_ptr(cl->wbfs.cstats);
 		break;
 	default:
@@ -1957,8 +1970,16 @@ int __hot ceetm_tx(struct sk_buff *skb, struct net_device *net_dev)
 		goto drop;
 	}
 
+	/* If the FQ is congested, avoid enqueuing the frame and dropping it
+	 * when it returns on the ERN path. Drop it here directly instead.
+	 */
+	if (unlikely(ceetm_fq->congested)) {
+		qstats->drops++;
+		goto drop;
+	}
+
 	bstats_update(&cstats->bstats, skb);
-	return dpa_tx_extended(skb, net_dev, egress_fq, conf_fq);
+	return dpa_tx_extended(skb, net_dev, &ceetm_fq->fq, conf_fq);
 
 drop:
 	dev_kfree_skb_any(skb);
diff --git a/drivers/net/ethernet/freescale/sdk_dpaa/dpaa_eth_ceetm.h b/drivers/net/ethernet/freescale/sdk_dpaa/dpaa_eth_ceetm.h
index 63cc3475..0ff3b9d 100644
--- a/drivers/net/ethernet/freescale/sdk_dpaa/dpaa_eth_ceetm.h
+++ b/drivers/net/ethernet/freescale/sdk_dpaa/dpaa_eth_ceetm.h
@@ -105,6 +105,7 @@ struct ceetm_fq {
 	struct qman_fq fq;
 	struct net_device *net_dev;
 	struct ceetm_class *ceetm_cls;
+	int congested; /* Congestion status */
 };
 
 struct root_q {
diff --git a/drivers/net/ethernet/freescale/sdk_dpaa/dpaa_eth_common.c b/drivers/net/ethernet/freescale/sdk_dpaa/dpaa_eth_common.c
index 19a8a3c..bbaf29c 100644
--- a/drivers/net/ethernet/freescale/sdk_dpaa/dpaa_eth_common.c
+++ b/drivers/net/ethernet/freescale/sdk_dpaa/dpaa_eth_common.c
@@ -469,14 +469,18 @@ int dpa_ioctl(struct net_device *dev, struct ifreq *rq, int cmd)
 #ifdef CONFIG_FSL_DPAA_1588
 	struct dpa_priv_s *priv = netdev_priv(dev);
 #endif
-	int ret = 0;
+	int ret = -EINVAL;
 
-	/* at least one timestamping feature must be enabled */
-#ifdef CONFIG_FSL_DPAA_TS
 	if (!netif_running(dev))
-#endif
 		return -EINVAL;
 
+	if (cmd == SIOCGMIIREG) {
+		if (!dev->phydev)
+			ret = -EINVAL;
+		else
+			ret = phy_mii_ioctl(dev->phydev, rq, cmd);
+	}
+
 #ifdef CONFIG_FSL_DPAA_TS
 	if (cmd == SIOCSHWTSTAMP)
 		return dpa_ts_ioctl(dev, rq, cmd);
@@ -711,11 +715,10 @@ void dpa_set_buffers_layout(struct mac_device *mac_dev,
 EXPORT_SYMBOL(dpa_set_buffers_layout);
 
 int __attribute__((nonnull))
-dpa_bp_alloc(struct dpa_bp *dpa_bp)
+dpa_bp_alloc(struct dpa_bp *dpa_bp, struct device *dev)
 {
 	int err;
 	struct bman_pool_params	 bp_params;
-	struct platform_device *pdev;
 
 	if (dpa_bp->size == 0 || dpa_bp->config_count == 0) {
 		pr_err("Buffer pool is not properly initialized! Missing size or initial number of buffers");
@@ -748,44 +751,25 @@ dpa_bp_alloc(struct dpa_bp *dpa_bp)
 
 	dpa_bp->bpid = (uint8_t)bman_get_params(dpa_bp->pool)->bpid;
 
-	pdev = platform_device_register_simple("dpaa_eth_bpool",
-			dpa_bp->bpid, NULL, 0);
-	if (IS_ERR(pdev)) {
-		pr_err("platform_device_register_simple() failed\n");
-		err = PTR_ERR(pdev);
-		goto pdev_register_failed;
-	}
-	{
-		struct dma_map_ops *ops = get_dma_ops(&pdev->dev);
-		ops->dma_supported = NULL;
-	}
-	err = dma_coerce_mask_and_coherent(&pdev->dev, DMA_BIT_MASK(40));
+	err = dma_coerce_mask_and_coherent(dev, DMA_BIT_MASK(40));
 	if (err) {
 		pr_err("dma_coerce_mask_and_coherent() failed\n");
-		goto pdev_mask_failed;
+		goto bman_free_pool;
 	}
-#ifdef CONFIG_FMAN_ARM
-	/* force coherency */
-	pdev->dev.archdata.dma_coherent = true;
-	arch_setup_dma_ops(&pdev->dev, 0, 0, NULL, true);
-#endif
 
-	dpa_bp->dev = &pdev->dev;
+	dpa_bp->dev = dev;
 
 	if (dpa_bp->seed_cb) {
 		err = dpa_bp->seed_cb(dpa_bp);
 		if (err)
-			goto pool_seed_failed;
+			goto bman_free_pool;
 	}
 
 	dpa_bpid2pool_map(dpa_bp->bpid, dpa_bp);
 
 	return 0;
 
-pool_seed_failed:
-pdev_mask_failed:
-	platform_device_unregister(pdev);
-pdev_register_failed:
+bman_free_pool:
 	bman_free_pool(dpa_bp->pool);
 
 	return err;
@@ -847,9 +831,6 @@ _dpa_bp_free(struct dpa_bp *dpa_bp)
 
 	dpa_bp_array[bp->bpid] = NULL;
 	bman_free_pool(bp->pool);
-
-	if (bp->dev)
-		platform_device_unregister(to_platform_device(bp->dev));
 }
 
 void __cold __attribute__((nonnull))
diff --git a/drivers/net/ethernet/freescale/sdk_dpaa/dpaa_eth_common.h b/drivers/net/ethernet/freescale/sdk_dpaa/dpaa_eth_common.h
index 41db430..b039328 100644
--- a/drivers/net/ethernet/freescale/sdk_dpaa/dpaa_eth_common.h
+++ b/drivers/net/ethernet/freescale/sdk_dpaa/dpaa_eth_common.h
@@ -164,7 +164,7 @@ void dpa_set_rx_mode(struct net_device *net_dev);
 void dpa_set_buffers_layout(struct mac_device *mac_dev,
 		struct dpa_buffer_layout_s *layout);
 int __attribute__((nonnull))
-dpa_bp_alloc(struct dpa_bp *dpa_bp);
+dpa_bp_alloc(struct dpa_bp *dpa_bp, struct device *dev);
 void __cold __attribute__((nonnull))
 dpa_bp_free(struct dpa_priv_s *priv);
 struct dpa_bp *dpa_bpid2pool(int bpid);
diff --git a/drivers/net/ethernet/freescale/sdk_dpaa/dpaa_eth_sg.c b/drivers/net/ethernet/freescale/sdk_dpaa/dpaa_eth_sg.c
index 11b47e8..32e62e6 100644
--- a/drivers/net/ethernet/freescale/sdk_dpaa/dpaa_eth_sg.c
+++ b/drivers/net/ethernet/freescale/sdk_dpaa/dpaa_eth_sg.c
@@ -616,13 +616,6 @@ void __hot _dpa_rx(struct net_device *net_dev,
 	(*count_ptr)--;
 	skb->protocol = eth_type_trans(skb, net_dev);
 
-	/* IP Reassembled frames are allowed to be larger than MTU */
-	if (unlikely(dpa_check_rx_mtu(skb, net_dev->mtu) &&
-		!(fd_status & FM_FD_IPR))) {
-		percpu_stats->rx_dropped++;
-		goto drop_bad_frame;
-	}
-
 	skb_len = skb->len;
 
 #ifdef CONFIG_FSL_DPAA_DBG_LOOP
@@ -655,10 +648,6 @@ void __hot _dpa_rx(struct net_device *net_dev,
 packet_dropped:
 	return;
 
-drop_bad_frame:
-	dev_kfree_skb(skb);
-	return;
-
 _release_frame:
 	dpa_fd_release(net_dev, fd);
 }
diff --git a/drivers/net/ethernet/freescale/sdk_dpaa/mac.c b/drivers/net/ethernet/freescale/sdk_dpaa/mac.c
index 60133b0..a6bfcea 100644
--- a/drivers/net/ethernet/freescale/sdk_dpaa/mac.c
+++ b/drivers/net/ethernet/freescale/sdk_dpaa/mac.c
@@ -76,7 +76,7 @@ static const char phy_str[][11] = {
 	[PHY_INTERFACE_MODE_RGMII_TXID]	= "rgmii-txid",
 	[PHY_INTERFACE_MODE_RTBI]	= "rtbi",
 	[PHY_INTERFACE_MODE_XGMII]	= "xgmii",
-	[PHY_INTERFACE_MODE_SGMII_2500] = "sgmii-2500",
+	[PHY_INTERFACE_MODE_2500SGMII] = "sgmii-2500",
 };
 
 static phy_interface_t __pure __attribute__((nonnull)) str2phy(const char *str)
@@ -103,7 +103,7 @@ static const uint16_t phy2speed[] = {
 	[PHY_INTERFACE_MODE_RGMII_TXID]	= SPEED_1000,
 	[PHY_INTERFACE_MODE_RTBI]	= SPEED_1000,
 	[PHY_INTERFACE_MODE_XGMII]	= SPEED_10000,
-	[PHY_INTERFACE_MODE_SGMII_2500] = SPEED_2500,
+	[PHY_INTERFACE_MODE_2500SGMII] = SPEED_2500,
 };
 
 static struct mac_device * __cold
diff --git a/drivers/net/ethernet/freescale/sdk_fman/Peripherals/FM/MAC/dtsec.c b/drivers/net/ethernet/freescale/sdk_fman/Peripherals/FM/MAC/dtsec.c
index f853825..38948f9 100644
--- a/drivers/net/ethernet/freescale/sdk_fman/Peripherals/FM/MAC/dtsec.c
+++ b/drivers/net/ethernet/freescale/sdk_fman/Peripherals/FM/MAC/dtsec.c
@@ -1389,6 +1389,7 @@ static void InitFmMacControllerDriver(t_FmMacControllerDriver *p_FmMacController
 
     p_FmMacControllerDriver->f_FM_MAC_ResetCounters             = DtsecResetCounters;
     p_FmMacControllerDriver->f_FM_MAC_GetStatistics             = DtsecGetStatistics;
+    p_FmMacControllerDriver->f_FM_MAC_GetFrameSizeCounters		= NULL;
 
     p_FmMacControllerDriver->f_FM_MAC_ModifyMacAddr             = DtsecModifyMacAddress;
     p_FmMacControllerDriver->f_FM_MAC_AddHashMacAddr            = DtsecAddHashMacAddress;
diff --git a/drivers/net/ethernet/freescale/sdk_fman/Peripherals/FM/MAC/fm_mac.c b/drivers/net/ethernet/freescale/sdk_fman/Peripherals/FM/MAC/fm_mac.c
index 20bf150..caf3940 100644
--- a/drivers/net/ethernet/freescale/sdk_fman/Peripherals/FM/MAC/fm_mac.c
+++ b/drivers/net/ethernet/freescale/sdk_fman/Peripherals/FM/MAC/fm_mac.c
@@ -461,6 +461,22 @@ t_Error FM_MAC_GetStatistics (t_Handle h_FmMac, t_FmMacStatistics *p_Statistics)
 
 /* ......................................................................... */
 
+t_Error FM_MAC_GetFrameSizeCounters(t_Handle h_FmMac, t_FmMacFrameSizeCounters *p_FrameSizeCounters, e_CommMode type)
+{
+    t_FmMacControllerDriver *p_FmMacControllerDriver = (t_FmMacControllerDriver *)h_FmMac;
+
+    SANITY_CHECK_RETURN_ERROR(p_FmMacControllerDriver, E_INVALID_HANDLE);
+
+    memset(p_FrameSizeCounters, 0, sizeof(t_FmMacFrameSizeCounters));
+
+    if (p_FmMacControllerDriver->f_FM_MAC_GetFrameSizeCounters)
+        return p_FmMacControllerDriver->f_FM_MAC_GetFrameSizeCounters(h_FmMac, p_FrameSizeCounters, type);
+
+    RETURN_ERROR(MINOR, E_NOT_SUPPORTED, NO_MSG);
+}
+
+/* ......................................................................... */
+
 t_Error FM_MAC_ModifyMacAddr (t_Handle h_FmMac, t_EnetAddr *p_EnetAddr)
 {
     t_FmMacControllerDriver *p_FmMacControllerDriver = (t_FmMacControllerDriver *)h_FmMac;
diff --git a/drivers/net/ethernet/freescale/sdk_fman/Peripherals/FM/MAC/fm_mac.h b/drivers/net/ethernet/freescale/sdk_fman/Peripherals/FM/MAC/fm_mac.h
index 77b9a89..ba3b913 100644
--- a/drivers/net/ethernet/freescale/sdk_fman/Peripherals/FM/MAC/fm_mac.h
+++ b/drivers/net/ethernet/freescale/sdk_fman/Peripherals/FM/MAC/fm_mac.h
@@ -106,6 +106,7 @@ typedef struct {
 
     t_Error (*f_FM_MAC_ResetCounters) (t_Handle h_FmMac);
     t_Error (*f_FM_MAC_GetStatistics) (t_Handle h_FmMac, t_FmMacStatistics *p_Statistics);
+    t_Error (*f_FM_MAC_GetFrameSizeCounters) (t_Handle h_FmMac, t_FmMacFrameSizeCounters *p_FrameSizeCounters, e_CommMode type);
 
     t_Error (*f_FM_MAC_ModifyMacAddr) (t_Handle h_FmMac, t_EnetAddr *p_EnetAddr);
     t_Error (*f_FM_MAC_AddHashMacAddr) (t_Handle h_FmMac, t_EnetAddr *p_EnetAddr);
diff --git a/drivers/net/ethernet/freescale/sdk_fman/Peripherals/FM/MAC/fman_memac.c b/drivers/net/ethernet/freescale/sdk_fman/Peripherals/FM/MAC/fman_memac.c
index 00995a1..f31a92a 100644
--- a/drivers/net/ethernet/freescale/sdk_fman/Peripherals/FM/MAC/fman_memac.c
+++ b/drivers/net/ethernet/freescale/sdk_fman/Peripherals/FM/MAC/fman_memac.c
@@ -360,24 +360,45 @@ uint64_t fman_memac_get_counter(struct memac_regs *regs,
     case E_MEMAC_COUNTER_R64:
         ret_val = GET_MEMAC_CNTR_64(r64);
         break;
+    case E_MEMAC_COUNTER_T64:
+        ret_val = GET_MEMAC_CNTR_64(t64);
+        break;
     case E_MEMAC_COUNTER_R127:
         ret_val = GET_MEMAC_CNTR_64(r127);
         break;
+    case E_MEMAC_COUNTER_T127:
+        ret_val = GET_MEMAC_CNTR_64(t127);
+        break;
     case E_MEMAC_COUNTER_R255:
         ret_val = GET_MEMAC_CNTR_64(r255);
         break;
+    case E_MEMAC_COUNTER_T255:
+        ret_val = GET_MEMAC_CNTR_64(t255);
+        break;
     case E_MEMAC_COUNTER_R511:
         ret_val = GET_MEMAC_CNTR_64(r511);
         break;
+    case E_MEMAC_COUNTER_T511:
+        ret_val = GET_MEMAC_CNTR_64(t511);
+        break;
     case E_MEMAC_COUNTER_R1023:
         ret_val = GET_MEMAC_CNTR_64(r1023);
         break;
+    case E_MEMAC_COUNTER_T1023:
+        ret_val = GET_MEMAC_CNTR_64(t1023);
+        break;
     case E_MEMAC_COUNTER_R1518:
         ret_val = GET_MEMAC_CNTR_64(r1518);
         break;
+    case E_MEMAC_COUNTER_T1518:
+        ret_val = GET_MEMAC_CNTR_64(t1518);
+        break;
     case E_MEMAC_COUNTER_R1519X:
         ret_val = GET_MEMAC_CNTR_64(r1519x);
         break;
+    case E_MEMAC_COUNTER_T1519X:
+        ret_val = GET_MEMAC_CNTR_64(t1519x);
+        break;
     case E_MEMAC_COUNTER_RFRG:
         ret_val = GET_MEMAC_CNTR_64(rfrg);
         break;
diff --git a/drivers/net/ethernet/freescale/sdk_fman/Peripherals/FM/MAC/memac.c b/drivers/net/ethernet/freescale/sdk_fman/Peripherals/FM/MAC/memac.c
index 85426c5..0f299e7 100644
--- a/drivers/net/ethernet/freescale/sdk_fman/Peripherals/FM/MAC/memac.c
+++ b/drivers/net/ethernet/freescale/sdk_fman/Peripherals/FM/MAC/memac.c
@@ -593,6 +593,62 @@ static t_Error MemacGetStatistics(t_Handle h_Memac, t_FmMacStatistics *p_Statist
 
 /* ......................................................................... */
 
+static t_Error MemacGetFrameSizeCounters(t_Handle h_Memac, t_FmMacFrameSizeCounters *p_FrameSizeCounters, e_CommMode type)
+{
+    t_Memac     *p_Memac = (t_Memac *)h_Memac;
+
+    SANITY_CHECK_RETURN_ERROR(p_Memac, E_NULL_POINTER);
+    SANITY_CHECK_RETURN_ERROR(!p_Memac->p_MemacDriverParam, E_INVALID_STATE);
+    SANITY_CHECK_RETURN_ERROR(p_FrameSizeCounters, E_NULL_POINTER);
+
+    switch (type)
+    {
+    case e_COMM_MODE_NONE:
+    	break;
+
+    case e_COMM_MODE_RX:
+        p_FrameSizeCounters->count_pkts_64             = fman_memac_get_counter(p_Memac->p_MemMap, E_MEMAC_COUNTER_R64);
+        p_FrameSizeCounters->count_pkts_65_to_127      = fman_memac_get_counter(p_Memac->p_MemMap, E_MEMAC_COUNTER_R127);
+        p_FrameSizeCounters->count_pkts_128_to_255     = fman_memac_get_counter(p_Memac->p_MemMap, E_MEMAC_COUNTER_R255);
+        p_FrameSizeCounters->count_pkts_256_to_511     = fman_memac_get_counter(p_Memac->p_MemMap, E_MEMAC_COUNTER_R511);
+        p_FrameSizeCounters->count_pkts_512_to_1023    = fman_memac_get_counter(p_Memac->p_MemMap, E_MEMAC_COUNTER_R1023);
+        p_FrameSizeCounters->count_pkts_1024_to_1518   = fman_memac_get_counter(p_Memac->p_MemMap, E_MEMAC_COUNTER_R1518);
+        p_FrameSizeCounters->count_pkts_1519_to_1522   = fman_memac_get_counter(p_Memac->p_MemMap, E_MEMAC_COUNTER_R1519X);
+    	break;
+
+    case e_COMM_MODE_TX:
+        p_FrameSizeCounters->count_pkts_64             = fman_memac_get_counter(p_Memac->p_MemMap, E_MEMAC_COUNTER_T64);
+        p_FrameSizeCounters->count_pkts_65_to_127      = fman_memac_get_counter(p_Memac->p_MemMap, E_MEMAC_COUNTER_T127);
+        p_FrameSizeCounters->count_pkts_128_to_255     = fman_memac_get_counter(p_Memac->p_MemMap, E_MEMAC_COUNTER_T255);
+        p_FrameSizeCounters->count_pkts_256_to_511     = fman_memac_get_counter(p_Memac->p_MemMap, E_MEMAC_COUNTER_T511);
+        p_FrameSizeCounters->count_pkts_512_to_1023    = fman_memac_get_counter(p_Memac->p_MemMap, E_MEMAC_COUNTER_T1023);
+        p_FrameSizeCounters->count_pkts_1024_to_1518   = fman_memac_get_counter(p_Memac->p_MemMap, E_MEMAC_COUNTER_T1518);
+        p_FrameSizeCounters->count_pkts_1519_to_1522   = fman_memac_get_counter(p_Memac->p_MemMap, E_MEMAC_COUNTER_T1519X);
+    	break;
+
+    case e_COMM_MODE_RX_AND_TX:
+        p_FrameSizeCounters->count_pkts_64             = fman_memac_get_counter(p_Memac->p_MemMap, E_MEMAC_COUNTER_R64)
+                                                       + fman_memac_get_counter(p_Memac->p_MemMap, E_MEMAC_COUNTER_T64);
+        p_FrameSizeCounters->count_pkts_65_to_127      = fman_memac_get_counter(p_Memac->p_MemMap, E_MEMAC_COUNTER_R127)
+                                                       + fman_memac_get_counter(p_Memac->p_MemMap, E_MEMAC_COUNTER_T127);
+        p_FrameSizeCounters->count_pkts_128_to_255     = fman_memac_get_counter(p_Memac->p_MemMap, E_MEMAC_COUNTER_R255)
+                                                       + fman_memac_get_counter(p_Memac->p_MemMap, E_MEMAC_COUNTER_T255);
+        p_FrameSizeCounters->count_pkts_256_to_511     = fman_memac_get_counter(p_Memac->p_MemMap, E_MEMAC_COUNTER_R511)
+                                                       + fman_memac_get_counter(p_Memac->p_MemMap, E_MEMAC_COUNTER_T511);
+        p_FrameSizeCounters->count_pkts_512_to_1023    = fman_memac_get_counter(p_Memac->p_MemMap, E_MEMAC_COUNTER_R1023)
+                                                       + fman_memac_get_counter(p_Memac->p_MemMap, E_MEMAC_COUNTER_T1023);
+        p_FrameSizeCounters->count_pkts_1024_to_1518   = fman_memac_get_counter(p_Memac->p_MemMap, E_MEMAC_COUNTER_R1518)
+                                                       + fman_memac_get_counter(p_Memac->p_MemMap, E_MEMAC_COUNTER_T1518);
+        p_FrameSizeCounters->count_pkts_1519_to_1522   = fman_memac_get_counter(p_Memac->p_MemMap, E_MEMAC_COUNTER_R1519X)
+                                                       + fman_memac_get_counter(p_Memac->p_MemMap, E_MEMAC_COUNTER_T1519X);
+    	break;
+    }
+
+    return E_OK;
+}
+
+/* ......................................................................... */
+
 static t_Error MemacModifyMacAddress (t_Handle h_Memac, t_EnetAddr *p_EnetAddr)
 {
     t_Memac     *p_Memac = (t_Memac *)h_Memac;
@@ -1025,6 +1081,7 @@ static void InitFmMacControllerDriver(t_FmMacControllerDriver *p_FmMacController
 
     p_FmMacControllerDriver->f_FM_MAC_ResetCounters             = MemacResetCounters;
     p_FmMacControllerDriver->f_FM_MAC_GetStatistics             = MemacGetStatistics;
+    p_FmMacControllerDriver->f_FM_MAC_GetFrameSizeCounters      = MemacGetFrameSizeCounters;
 
     p_FmMacControllerDriver->f_FM_MAC_ModifyMacAddr             = MemacModifyMacAddress;
     p_FmMacControllerDriver->f_FM_MAC_AddHashMacAddr            = MemacAddHashMacAddress;
diff --git a/drivers/net/ethernet/freescale/sdk_fman/Peripherals/FM/MAC/tgec.c b/drivers/net/ethernet/freescale/sdk_fman/Peripherals/FM/MAC/tgec.c
index 9b136a6..eb00759 100644
--- a/drivers/net/ethernet/freescale/sdk_fman/Peripherals/FM/MAC/tgec.c
+++ b/drivers/net/ethernet/freescale/sdk_fman/Peripherals/FM/MAC/tgec.c
@@ -438,6 +438,47 @@ static t_Error TgecGetStatistics(t_Handle h_Tgec, t_FmMacStatistics *p_Statistic
 
 /* ......................................................................... */
 
+static t_Error TgecGetFrameSizeCounters(t_Handle h_Tgec, t_FmMacFrameSizeCounters *p_FrameSizeCounters, e_CommMode type)
+{
+    t_Tgec              *p_Tgec = (t_Tgec *)h_Tgec;
+    struct tgec_regs    *p_TgecMemMap;
+
+    SANITY_CHECK_RETURN_ERROR(p_Tgec, E_NULL_POINTER);
+    SANITY_CHECK_RETURN_ERROR(!p_Tgec->p_TgecDriverParam, E_INVALID_STATE);
+    SANITY_CHECK_RETURN_ERROR(p_FrameSizeCounters, E_NULL_POINTER);
+
+    p_TgecMemMap = p_Tgec->p_MemMap;
+
+    switch (type)
+    {
+    case e_COMM_MODE_NONE:
+    	break;
+
+    case e_COMM_MODE_RX:
+        p_FrameSizeCounters->count_pkts_64             = fman_tgec_get_counter(p_TgecMemMap, E_TGEC_COUNTER_R64);
+        p_FrameSizeCounters->count_pkts_65_to_127      = fman_tgec_get_counter(p_TgecMemMap, E_TGEC_COUNTER_R127);
+        p_FrameSizeCounters->count_pkts_128_to_255     = fman_tgec_get_counter(p_TgecMemMap, E_TGEC_COUNTER_R255);
+        p_FrameSizeCounters->count_pkts_256_to_511     = fman_tgec_get_counter(p_TgecMemMap, E_TGEC_COUNTER_R511);
+        p_FrameSizeCounters->count_pkts_512_to_1023    = fman_tgec_get_counter(p_TgecMemMap, E_TGEC_COUNTER_R1023);
+        p_FrameSizeCounters->count_pkts_1024_to_1518   = fman_tgec_get_counter(p_TgecMemMap, E_TGEC_COUNTER_R1518);
+        p_FrameSizeCounters->count_pkts_1519_to_1522   = fman_tgec_get_counter(p_TgecMemMap, E_TGEC_COUNTER_R1519X);
+    	break;
+
+    case e_COMM_MODE_TX:
+    	//Tx counters not supported
+    	break;
+
+    case e_COMM_MODE_RX_AND_TX:
+    	//Tx counters not supported
+    	break;
+    }
+
+    return E_OK;
+}
+
+
+/* ......................................................................... */
+
 static t_Error TgecEnable1588TimeStamp(t_Handle h_Tgec)
 {
     t_Tgec      *p_Tgec = (t_Tgec *)h_Tgec;
@@ -905,6 +946,7 @@ static void InitFmMacControllerDriver(t_FmMacControllerDriver *p_FmMacController
 
     p_FmMacControllerDriver->f_FM_MAC_ResetCounters             = TgecResetCounters;
     p_FmMacControllerDriver->f_FM_MAC_GetStatistics             = TgecGetStatistics;
+    p_FmMacControllerDriver->f_FM_MAC_GetFrameSizeCounters      = TgecGetFrameSizeCounters;
 
     p_FmMacControllerDriver->f_FM_MAC_ModifyMacAddr             = TgecModifyMacAddress;
     p_FmMacControllerDriver->f_FM_MAC_AddHashMacAddr            = TgecAddHashMacAddress;
diff --git a/drivers/net/ethernet/freescale/sdk_fman/inc/Peripherals/fm_mac_ext.h b/drivers/net/ethernet/freescale/sdk_fman/inc/Peripherals/fm_mac_ext.h
index da7e046..be99b7c 100644
--- a/drivers/net/ethernet/freescale/sdk_fman/inc/Peripherals/fm_mac_ext.h
+++ b/drivers/net/ethernet/freescale/sdk_fman/inc/Peripherals/fm_mac_ext.h
@@ -197,6 +197,19 @@ typedef struct t_FmMacStatistics {
                                                - Other */
 } t_FmMacStatistics;
 
+/**************************************************************************//**
+ @Description   FM MAC Frame Size Counters
+*//***************************************************************************/
+typedef struct t_FmMacFrameSizeCounters {
+
+        uint64_t  count_pkts_64;            /**< 64 byte frame counter */
+        uint64_t  count_pkts_65_to_127;     /**< 65 to 127 byte frame counter */
+        uint64_t  count_pkts_128_to_255;    /**< 128 to 255 byte frame counter */
+        uint64_t  count_pkts_256_to_511;    /**< 256 to 511 byte frame counter */
+        uint64_t  count_pkts_512_to_1023;   /**< 512 to 1023 byte frame counter */
+        uint64_t  count_pkts_1024_to_1518;  /**< 1024 to 1518 byte frame counter */
+        uint64_t  count_pkts_1519_to_1522;  /**< 1519 to 1522 byte good frame count */
+} t_FmMacFrameSizeCounters;
 
 /**************************************************************************//**
  @Group         FM_mac_init_grp FM MAC Initialization Unit
@@ -654,6 +667,21 @@ t_Error FM_MAC_SetStatistics(t_Handle h_FmMac, e_FmMacStatisticsLevel statistics
 t_Error FM_MAC_GetStatistics(t_Handle h_FmMac, t_FmMacStatistics *p_Statistics);
 
 /**************************************************************************//**
+ @Function      FM_MAC_GetFrameSizeCounters
+
+ @Description   get MAC statistics counters for different frame size
+
+ @Param[in]     h_FmMac       -  A handle to a FM MAC Module.
+ @Param[in]     p_FrameSizeCounters  -  Structure with counters
+ @Param[in]     type  				-  Type of counters to be read
+
+ @Return        E_OK on success; Error code otherwise.
+
+ @Cautions      Allowed only following FM_Init().
+*//***************************************************************************/
+t_Error FM_MAC_GetFrameSizeCounters(t_Handle h_FmMac, t_FmMacFrameSizeCounters *p_FrameSizeCounters, e_CommMode type);
+
+/**************************************************************************//**
  @Function      FM_MAC_ModifyMacAddr
 
  @Description   Replace the main MAC Address
diff --git a/drivers/net/ethernet/freescale/sdk_fman/inc/flib/fsl_fman_memac.h b/drivers/net/ethernet/freescale/sdk_fman/inc/flib/fsl_fman_memac.h
index 0dd8286..058da15 100644
--- a/drivers/net/ethernet/freescale/sdk_fman/inc/flib/fsl_fman_memac.h
+++ b/drivers/net/ethernet/freescale/sdk_fman/inc/flib/fsl_fman_memac.h
@@ -146,12 +146,19 @@ _val &= ~TX_FIFO_SECTIONS_TX_EMPTY_MASK;					\
 
 enum memac_counters {
 	E_MEMAC_COUNTER_R64,
+	E_MEMAC_COUNTER_T64,
 	E_MEMAC_COUNTER_R127,
+	E_MEMAC_COUNTER_T127,
 	E_MEMAC_COUNTER_R255,
+	E_MEMAC_COUNTER_T255,
 	E_MEMAC_COUNTER_R511,
+	E_MEMAC_COUNTER_T511,
 	E_MEMAC_COUNTER_R1023,
+	E_MEMAC_COUNTER_T1023,
 	E_MEMAC_COUNTER_R1518,
+	E_MEMAC_COUNTER_T1518,
 	E_MEMAC_COUNTER_R1519X,
+	E_MEMAC_COUNTER_T1519X,
 	E_MEMAC_COUNTER_RFRG,
 	E_MEMAC_COUNTER_RJBR,
 	E_MEMAC_COUNTER_RDRP,
diff --git a/drivers/net/ethernet/freescale/sdk_fman/src/inc/wrapper/lnxwrp_exp_sym.h b/drivers/net/ethernet/freescale/sdk_fman/src/inc/wrapper/lnxwrp_exp_sym.h
index 0c0c6c1..dd0f03a 100644
--- a/drivers/net/ethernet/freescale/sdk_fman/src/inc/wrapper/lnxwrp_exp_sym.h
+++ b/drivers/net/ethernet/freescale/sdk_fman/src/inc/wrapper/lnxwrp_exp_sym.h
@@ -123,6 +123,8 @@ EXPORT_SYMBOL(FM_PCD_SetAdvancedOffloadSupport);
 /* FMAN MAC exported routines */
 EXPORT_SYMBOL(FM_MAC_GetStatistics);
 
+EXPORT_SYMBOL(FM_MAC_GetFrameSizeCounters);
+
 EXPORT_SYMBOL(FM_GetSpecialOperationCoding);
 
 #endif /* __LNXWRP_EXP_SYM_H */
diff --git a/drivers/net/ethernet/freescale/sdk_fman/src/wrapper/lnxwrp_ioctls_fm.c b/drivers/net/ethernet/freescale/sdk_fman/src/wrapper/lnxwrp_ioctls_fm.c
index 1ddde85..06833ba 100644
--- a/drivers/net/ethernet/freescale/sdk_fman/src/wrapper/lnxwrp_ioctls_fm.c
+++ b/drivers/net/ethernet/freescale/sdk_fman/src/wrapper/lnxwrp_ioctls_fm.c
@@ -4608,6 +4608,47 @@ t_Error LnxwrpFmPortIOCTL(t_LnxWrpFmPortDev *p_LnxWrpFmPortDev, unsigned int cmd
             break;
         }
 
+        case FM_PORT_IOC_GET_MAC_FRAME_SIZE_COUNTERS:
+        {
+            t_LnxWrpFmDev *p_LnxWrpFmDev =
+                    (t_LnxWrpFmDev *)p_LnxWrpFmPortDev->h_LnxWrpFmDev;
+            ioc_fm_port_mac_frame_size_counters_t param;
+            t_FmMacFrameSizeCounters frameSizeCounters;
+            int mac_id = p_LnxWrpFmPortDev->id;
+
+            if (!p_LnxWrpFmDev)
+                RETURN_ERROR(MINOR, E_NOT_AVAILABLE, ("Port not initialized or other error!"));
+
+            if (&p_LnxWrpFmDev->txPorts[mac_id] != p_LnxWrpFmPortDev &&
+                &p_LnxWrpFmDev->rxPorts[mac_id] != p_LnxWrpFmPortDev)
+                mac_id += FM_MAX_NUM_OF_1G_MACS; /* 10G port */
+
+            if (!p_LnxWrpFmDev->macs[mac_id].h_Dev)
+                RETURN_ERROR(MINOR, E_NOT_AVAILABLE, ("Port not initialized or other error!"));
+
+            if (copy_from_user(&param, (ioc_fm_port_mac_frame_size_counters_t *)arg,
+                        sizeof(ioc_fm_port_mac_frame_size_counters_t)))
+                RETURN_ERROR(MINOR, E_WRITE_FAILED, NO_MSG);
+
+            if (FM_MAC_GetFrameSizeCounters(p_LnxWrpFmDev->macs[mac_id].h_Dev,
+                        &frameSizeCounters, param.type))
+                RETURN_ERROR(MINOR, E_WRITE_FAILED, NO_MSG);
+
+            param.count_pkts_64 = frameSizeCounters.count_pkts_64;
+            param.count_pkts_65_to_127 = frameSizeCounters.count_pkts_65_to_127;
+            param.count_pkts_128_to_255 = frameSizeCounters.count_pkts_128_to_255;
+            param.count_pkts_256_to_511 = frameSizeCounters.count_pkts_256_to_511;
+            param.count_pkts_512_to_1023 = frameSizeCounters.count_pkts_512_to_1023;
+            param.count_pkts_1024_to_1518 = frameSizeCounters.count_pkts_1024_to_1518;
+            param.count_pkts_1519_to_1522 = frameSizeCounters.count_pkts_1519_to_1522;
+
+            if (copy_to_user((ioc_fm_port_mac_frame_size_counters_t *)arg, &param,
+                        sizeof(ioc_fm_port_mac_frame_size_counters_t)))
+                RETURN_ERROR(MINOR, E_WRITE_FAILED, NO_MSG);
+
+            break;
+        }
+
         case FM_PORT_IOC_GET_BMI_COUNTERS:
         {
             t_LnxWrpFmDev *p_LnxWrpFmDev =
diff --git a/drivers/net/usb/cdc_ether.c b/drivers/net/usb/cdc_ether.c
index b82be81..baac07a 100644
--- a/drivers/net/usb/cdc_ether.c
+++ b/drivers/net/usb/cdc_ether.c
@@ -532,6 +532,7 @@ static const struct driver_info wwan_info = {
 #define LENOVO_VENDOR_ID	0x17ef
 #define NVIDIA_VENDOR_ID	0x0955
 #define HP_VENDOR_ID		0x03f0
+#define TPLINK_VENDOR_ID	0x2357
 
 static const struct usb_device_id	products[] = {
 /* BLACKLIST !!
@@ -733,6 +734,13 @@ static const struct usb_device_id	products[] = {
 	.driver_info = 0,
 },
 
+	/* TP-LINK UE300 USB 3.0 Ethernet Adapters (based on Realtek RTL8153) */
+{
+	USB_DEVICE_AND_INTERFACE_INFO(TPLINK_VENDOR_ID, 0x0601, USB_CLASS_COMM,
+			USB_CDC_SUBCLASS_ETHERNET, USB_CDC_PROTO_NONE),
+	.driver_info = 0,
+},
+
 /* WHITELIST!!!
  *
  * CDC Ether uses two interfaces, not necessarily consecutive.
diff --git a/drivers/net/usb/r8152.c b/drivers/net/usb/r8152.c
index c9c8649..859d266 100644
--- a/drivers/net/usb/r8152.c
+++ b/drivers/net/usb/r8152.c
@@ -518,6 +518,7 @@ enum rtl8152_flags {
 #define VENDOR_ID_SAMSUNG		0x04e8
 #define VENDOR_ID_LENOVO		0x17ef
 #define VENDOR_ID_NVIDIA		0x0955
+#define VENDOR_ID_TPLINK		0x2357
 
 #define MCU_TYPE_PLA			0x0100
 #define MCU_TYPE_USB			0x0000
@@ -4495,6 +4496,7 @@ static struct usb_device_id rtl8152_table[] = {
 	{REALTEK_USB_DEVICE(VENDOR_ID_LENOVO,  0x7205)},
 	{REALTEK_USB_DEVICE(VENDOR_ID_LENOVO,  0x304f)},
 	{REALTEK_USB_DEVICE(VENDOR_ID_NVIDIA,  0x09ff)},
+	{REALTEK_USB_DEVICE(VENDOR_ID_TPLINK,  0x0601)},
 	{}
 };
 
diff --git a/drivers/pci/host/pci-layerscape.c b/drivers/pci/host/pci-layerscape.c
index 7ce32ff..2da7ce1 100644
--- a/drivers/pci/host/pci-layerscape.c
+++ b/drivers/pci/host/pci-layerscape.c
@@ -33,6 +33,8 @@
 
 /* PEX Internal Configuration Registers */
 #define PCIE_STRFMR1		0x71c /* Symbol Timer & Filter Mask Register1 */
+#define PCIE_ABSERR		0x8d0 /* Bridge Slave Error Response Register */
+#define PCIE_ABSERR_SETTING	0x9401 /* Forward error of non-posted request */
 #define PCIE_DBI_RO_WR_EN	0x8bc /* DBI Read-Only Write Enable Register */
 
 #define PCIE_IATU_NUM		6
@@ -154,6 +156,12 @@ static int ls_pcie_link_up(struct pcie_port *pp)
 	return 1;
 }
 
+/* Forward error response of outbound non-posted requests */
+static void ls_pcie_fix_error_response(struct ls_pcie *pcie)
+{
+	iowrite32(PCIE_ABSERR_SETTING, pcie->pp.dbi_base + PCIE_ABSERR);
+}
+
 static void ls_pcie_host_init(struct pcie_port *pp)
 {
 	struct ls_pcie *pcie = to_ls_pcie(pp);
@@ -165,6 +173,7 @@ static void ls_pcie_host_init(struct pcie_port *pp)
 	iowrite32(0, pcie->pp.dbi_base + PCIE_DBI_RO_WR_EN);
 
 	ls_pcie_disable_outbound_atus(pcie);
+	ls_pcie_fix_error_response(pcie);
 	dw_pcie_setup_rc(pp);
 }
 
@@ -242,6 +251,7 @@ static const struct of_device_id ls_pcie_of_match[] = {
 	{ .compatible = "fsl,ls2080a-pcie", .data = &ls2080_drvdata },
 	{ .compatible = "fsl,ls2085a-pcie", .data = &ls2080_drvdata },
 	{ .compatible = "fsl,ls2088a-pcie", .data = &ls2088_drvdata },
+	{ .compatible = "fsl,ls1088a-pcie", .data = &ls2088_drvdata },
 	{ },
 };
 
diff --git a/drivers/pci/pci.c b/drivers/pci/pci.c
index e7d4048..9d840e7 100644
--- a/drivers/pci/pci.c
+++ b/drivers/pci/pci.c
@@ -454,7 +454,7 @@ struct resource *pci_find_parent_resource(const struct pci_dev *dev,
 	pci_bus_for_each_resource(bus, r, i) {
 		if (!r)
 			continue;
-		if (res->start && resource_contains(r, res)) {
+		if (resource_contains(r, res)) {
 
 			/*
 			 * If the window is prefetchable but the BAR is
diff --git a/drivers/pci/quirks.c b/drivers/pci/quirks.c
index 5d8151b..8eef457 100644
--- a/drivers/pci/quirks.c
+++ b/drivers/pci/quirks.c
@@ -4634,3 +4634,11 @@ DECLARE_PCI_FIXUP_EARLY(PCI_VENDOR_ID_INTEL, 0x2030, quirk_no_aersid);
 DECLARE_PCI_FIXUP_EARLY(PCI_VENDOR_ID_INTEL, 0x2031, quirk_no_aersid);
 DECLARE_PCI_FIXUP_EARLY(PCI_VENDOR_ID_INTEL, 0x2032, quirk_no_aersid);
 DECLARE_PCI_FIXUP_EARLY(PCI_VENDOR_ID_INTEL, 0x2033, quirk_no_aersid);
+
+/* Freescale PCIe doesn't support MSI in RC mode */
+static void quirk_fsl_no_msi(struct pci_dev *pdev)
+{
+	if (pci_pcie_type(pdev) == PCI_EXP_TYPE_ROOT_PORT)
+		pdev->no_msi = 1;
+}
+DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_FREESCALE, PCI_ANY_ID, quirk_fsl_no_msi);
diff --git a/drivers/soc/fsl/qbman/Kconfig b/drivers/soc/fsl/qbman/Kconfig
index 757033c..fb4e6bf 100644
--- a/drivers/soc/fsl/qbman/Kconfig
+++ b/drivers/soc/fsl/qbman/Kconfig
@@ -1,6 +1,6 @@
 menuconfig FSL_DPAA
 	bool "Freescale DPAA 1.x support"
-	depends on FSL_SOC_BOOKE
+	depends on (FSL_SOC_BOOKE || ARCH_LAYERSCAPE)
 	select GENERIC_ALLOCATOR
 	help
 	  The Freescale Data Path Acceleration Architecture (DPAA) is a set of
diff --git a/drivers/soc/fsl/qbman/bman.c b/drivers/soc/fsl/qbman/bman.c
index ffa48fd..1fa9099 100644
--- a/drivers/soc/fsl/qbman/bman.c
+++ b/drivers/soc/fsl/qbman/bman.c
@@ -35,6 +35,27 @@
 
 /* Portal register assists */
 
+#if defined(CONFIG_ARM) || defined(CONFIG_ARM64)
+/* Cache-inhibited register offsets */
+#define BM_REG_RCR_PI_CINH	0x3000
+#define BM_REG_RCR_CI_CINH	0x3100
+#define BM_REG_RCR_ITR		0x3200
+#define BM_REG_CFG		0x3300
+#define BM_REG_SCN(n)		(0x3400 + ((n) << 6))
+#define BM_REG_ISR		0x3e00
+#define BM_REG_IER		0x3e40
+#define BM_REG_ISDR		0x3e80
+#define BM_REG_IIR		0x3ec0
+
+/* Cache-enabled register offsets */
+#define BM_CL_CR		0x0000
+#define BM_CL_RR0		0x0100
+#define BM_CL_RR1		0x0140
+#define BM_CL_RCR		0x1000
+#define BM_CL_RCR_PI_CENA	0x3000
+#define BM_CL_RCR_CI_CENA	0x3100
+
+#else
 /* Cache-inhibited register offsets */
 #define BM_REG_RCR_PI_CINH	0x0000
 #define BM_REG_RCR_CI_CINH	0x0004
@@ -53,6 +74,7 @@
 #define BM_CL_RCR		0x1000
 #define BM_CL_RCR_PI_CENA	0x3000
 #define BM_CL_RCR_CI_CENA	0x3100
+#endif
 
 /*
  * Portal modes.
@@ -167,12 +189,12 @@ struct bm_portal {
 /* Cache-inhibited register access. */
 static inline u32 bm_in(struct bm_portal *p, u32 offset)
 {
-	return __raw_readl(p->addr.ci + offset);
+	return be32_to_cpu(__raw_readl(p->addr.ci + offset));
 }
 
 static inline void bm_out(struct bm_portal *p, u32 offset, u32 val)
 {
-	__raw_writel(val, p->addr.ci + offset);
+	__raw_writel(cpu_to_be32(val), p->addr.ci + offset);
 }
 
 /* Cache Enabled Portal Access */
@@ -188,7 +210,7 @@ static inline void bm_cl_touch_ro(struct bm_portal *p, u32 offset)
 
 static inline u32 bm_ce_in(struct bm_portal *p, u32 offset)
 {
-	return __raw_readl(p->addr.ce + offset);
+	return be32_to_cpu(__raw_readl(p->addr.ce + offset));
 }
 
 struct bman_portal {
@@ -391,7 +413,7 @@ static void bm_rcr_finish(struct bm_portal *portal)
 
 	i = bm_in(portal, BM_REG_RCR_PI_CINH) & (BM_RCR_SIZE - 1);
 	if (i != rcr_ptr2idx(rcr->cursor))
-		pr_crit("losing uncommited RCR entries\n");
+		pr_crit("losing uncommitted RCR entries\n");
 
 	i = bm_in(portal, BM_REG_RCR_CI_CINH) & (BM_RCR_SIZE - 1);
 	if (i != rcr->ci)
@@ -607,7 +629,7 @@ int bman_p_irqsource_add(struct bman_portal *p, u32 bits)
 	unsigned long irqflags;
 
 	local_irq_save(irqflags);
-	set_bits(bits & BM_PIRQ_VISIBLE, &p->irq_sources);
+	p->irq_sources |= bits & BM_PIRQ_VISIBLE;
 	bm_out(&p->p, BM_REG_IER, p->irq_sources);
 	local_irq_restore(irqflags);
 	return 0;
diff --git a/drivers/soc/fsl/qbman/bman_ccsr.c b/drivers/soc/fsl/qbman/bman_ccsr.c
index 9deb052..f126dbd 100644
--- a/drivers/soc/fsl/qbman/bman_ccsr.c
+++ b/drivers/soc/fsl/qbman/bman_ccsr.c
@@ -170,10 +170,11 @@ static int fsl_bman_probe(struct platform_device *pdev)
 {
 	int ret, err_irq;
 	struct device *dev = &pdev->dev;
-	struct device_node *node = dev->of_node;
+	struct device_node *mem_node, *node = dev->of_node;
 	struct resource *res;
 	u16 id, bm_pool_cnt;
 	u8 major, minor;
+	u64 size;
 
 	res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
 	if (!res) {
@@ -181,8 +182,7 @@ static int fsl_bman_probe(struct platform_device *pdev)
 			node->full_name);
 		return -ENXIO;
 	}
-	bm_ccsr_start = devm_ioremap(dev, res->start,
-				     res->end - res->start + 1);
+	bm_ccsr_start = devm_ioremap(dev, res->start, resource_size(res));
 	if (!bm_ccsr_start)
 		return -ENXIO;
 
@@ -202,6 +202,38 @@ static int fsl_bman_probe(struct platform_device *pdev)
 		return -ENODEV;
 	}
 
+	/*
+	 * If FBPR memory wasn't defined using the qbman compatiable string
+	 * try using the of_reserved_mem_device method
+	 */
+	if (!fbpr_a) {
+		ret = of_reserved_mem_device_init(dev);
+		if (ret) {
+			dev_err(dev, "of_reserved_mem_device_init() failed 0x%x\n",
+				ret);
+			return -ENODEV;
+		}
+		mem_node = of_parse_phandle(dev->of_node, "memory-region", 0);
+		if (mem_node) {
+			ret = of_property_read_u64(mem_node, "size", &size);
+			if (ret) {
+				dev_err(dev, "FBPR: of_address_to_resource fails 0x%x\n",
+					ret);
+				return -ENODEV;
+			}
+			fbpr_sz = size;
+		} else {
+			dev_err(dev, "No memory-region found for FBPR\n");
+			return -ENODEV;
+		}
+		if (!dma_zalloc_coherent(dev, fbpr_sz, &fbpr_a, 0)) {
+			dev_err(dev, "Alloc FBPR memory failed\n");
+			return -ENODEV;
+		}
+	}
+
+	dev_dbg(dev, "Allocated FBPR 0x%llx 0x%zx\n", fbpr_a, fbpr_sz);
+
 	bm_set_memory(fbpr_a, fbpr_sz);
 
 	err_irq = platform_get_irq(pdev, 0);
diff --git a/drivers/soc/fsl/qbman/bman_portal.c b/drivers/soc/fsl/qbman/bman_portal.c
index 6579cc1..d37f563 100644
--- a/drivers/soc/fsl/qbman/bman_portal.c
+++ b/drivers/soc/fsl/qbman/bman_portal.c
@@ -53,58 +53,38 @@ static struct bman_portal *init_pcfg(struct bm_portal_config *pcfg)
 	return p;
 }
 
-static void bman_offline_cpu(unsigned int cpu)
+static int bman_offline_cpu(unsigned int cpu)
 {
 	struct bman_portal *p = affine_bportals[cpu];
 	const struct bm_portal_config *pcfg;
 
 	if (!p)
-		return;
+		return 0;
 
 	pcfg = bman_get_bm_portal_config(p);
 	if (!pcfg)
-		return;
+		return 0;
 
 	irq_set_affinity(pcfg->irq, cpumask_of(0));
+	return 0;
 }
 
-static void bman_online_cpu(unsigned int cpu)
+static int bman_online_cpu(unsigned int cpu)
 {
 	struct bman_portal *p = affine_bportals[cpu];
 	const struct bm_portal_config *pcfg;
 
 	if (!p)
-		return;
+		return 0;
 
 	pcfg = bman_get_bm_portal_config(p);
 	if (!pcfg)
-		return;
+		return 0;
 
 	irq_set_affinity(pcfg->irq, cpumask_of(cpu));
+	return 0;
 }
 
-static int bman_hotplug_cpu_callback(struct notifier_block *nfb,
-				     unsigned long action, void *hcpu)
-{
-	unsigned int cpu = (unsigned long)hcpu;
-
-	switch (action) {
-	case CPU_ONLINE:
-	case CPU_ONLINE_FROZEN:
-		bman_online_cpu(cpu);
-		break;
-	case CPU_DOWN_PREPARE:
-	case CPU_DOWN_PREPARE_FROZEN:
-		bman_offline_cpu(cpu);
-	}
-
-	return NOTIFY_OK;
-}
-
-static struct notifier_block bman_hotplug_cpu_notifier = {
-	.notifier_call = bman_hotplug_cpu_callback,
-};
-
 static int bman_portal_probe(struct platform_device *pdev)
 {
 	struct device *dev = &pdev->dev;
@@ -145,16 +125,26 @@ static int bman_portal_probe(struct platform_device *pdev)
 	}
 	pcfg->irq = irq;
 
-	va = ioremap_prot(addr_phys[0]->start, resource_size(addr_phys[0]), 0);
-	if (!va)
+#ifdef CONFIG_PPC
+	/* PPC requires a cacheable/non-coherent mapping of the portal */
+	va = ioremap_prot(addr_phys[0]->start, resource_size(addr_phys[0]),
+			  (pgprot_val(PAGE_KERNEL) & ~_PAGE_COHERENT));
+#else
+	/* For ARM we can use write combine mapping. */
+	va = ioremap_wc(addr_phys[0]->start, resource_size(addr_phys[0]));
+#endif
+	if (!va) {
+		dev_err(dev, "ioremap::CE failed\n");
 		goto err_ioremap1;
+	}
 
 	pcfg->addr_virt[DPAA_PORTAL_CE] = va;
 
-	va = ioremap_prot(addr_phys[1]->start, resource_size(addr_phys[1]),
-			  _PAGE_GUARDED | _PAGE_NO_CACHE);
-	if (!va)
+	va = ioremap(addr_phys[1]->start, resource_size(addr_phys[1]));
+	if (!va) {
+		dev_err(dev, "ioremap::CI failed\n");
 		goto err_ioremap2;
+	}
 
 	pcfg->addr_virt[DPAA_PORTAL_CI] = va;
 
@@ -170,8 +160,10 @@ static int bman_portal_probe(struct platform_device *pdev)
 	spin_unlock(&bman_lock);
 	pcfg->cpu = cpu;
 
-	if (!init_pcfg(pcfg))
-		goto err_ioremap2;
+	if (!init_pcfg(pcfg)) {
+		dev_err(dev, "portal init failed\n");
+		goto err_portal_init;
+	}
 
 	/* clear irq affinity if assigned cpu is offline */
 	if (!cpu_online(cpu))
@@ -179,10 +171,11 @@ static int bman_portal_probe(struct platform_device *pdev)
 
 	return 0;
 
+err_portal_init:
+	iounmap(pcfg->addr_virt[DPAA_PORTAL_CI]);
 err_ioremap2:
 	iounmap(pcfg->addr_virt[DPAA_PORTAL_CE]);
 err_ioremap1:
-	dev_err(dev, "ioremap failed\n");
 	return -ENXIO;
 }
 
@@ -210,8 +203,14 @@ static int __init bman_portal_driver_register(struct platform_driver *drv)
 	if (ret < 0)
 		return ret;
 
-	register_hotcpu_notifier(&bman_hotplug_cpu_notifier);
-
+	ret = cpuhp_setup_state_nocalls(CPUHP_AP_ONLINE_DYN,
+					"soc/qbman_portal:online",
+					bman_online_cpu, bman_offline_cpu);
+	if (ret < 0) {
+		pr_err("bman: failed to register hotplug callbacks.\n");
+		platform_driver_unregister(drv);
+		return ret;
+	}
 	return 0;
 }
 
diff --git a/drivers/soc/fsl/qbman/bman_priv.h b/drivers/soc/fsl/qbman/bman_priv.h
index f6896a2..765a4bf 100644
--- a/drivers/soc/fsl/qbman/bman_priv.h
+++ b/drivers/soc/fsl/qbman/bman_priv.h
@@ -33,6 +33,9 @@
 #include "dpaa_sys.h"
 
 #include <soc/fsl/bman.h>
+#include <linux/dma-contiguous.h>
+#include <linux/of_address.h>
+#include <linux/dma-mapping.h>
 
 /* Portal processing (interrupt) sources */
 #define BM_PIRQ_RCRI	0x00000002	/* RCR Ring (below threshold) */
diff --git a/drivers/soc/fsl/qbman/dpaa_sys.h b/drivers/soc/fsl/qbman/dpaa_sys.h
index b63fd72..6239e18 100644
--- a/drivers/soc/fsl/qbman/dpaa_sys.h
+++ b/drivers/soc/fsl/qbman/dpaa_sys.h
@@ -38,24 +38,23 @@
 #include <linux/kthread.h>
 #include <linux/vmalloc.h>
 #include <linux/platform_device.h>
+#include <linux/of.h>
 #include <linux/of_reserved_mem.h>
 #include <linux/prefetch.h>
 #include <linux/genalloc.h>
 #include <asm/cacheflush.h>
+#include <linux/io.h>
+#include <linux/delay.h>
 
 /* For 2-element tables related to cache-inhibited and cache-enabled mappings */
 #define DPAA_PORTAL_CE 0
 #define DPAA_PORTAL_CI 1
 
-#if (L1_CACHE_BYTES != 32) && (L1_CACHE_BYTES != 64)
-#error "Unsupported Cacheline Size"
-#endif
-
 static inline void dpaa_flush(void *p)
 {
 #ifdef CONFIG_PPC
 	flush_dcache_range((unsigned long)p, (unsigned long)p+64);
-#elif defined(CONFIG_ARM32)
+#elif defined(CONFIG_ARM)
 	__cpuc_flush_dcache_area(p, 64);
 #elif defined(CONFIG_ARM64)
 	__flush_dcache_area(p, 64);
diff --git a/drivers/soc/fsl/qbman/qman.c b/drivers/soc/fsl/qbman/qman.c
index 119054b..aebcf16 100644
--- a/drivers/soc/fsl/qbman/qman.c
+++ b/drivers/soc/fsl/qbman/qman.c
@@ -41,6 +41,43 @@
 
 /* Portal register assists */
 
+#if defined(CONFIG_ARM) || defined(CONFIG_ARM64)
+/* Cache-inhibited register offsets */
+#define QM_REG_EQCR_PI_CINH	0x3000
+#define QM_REG_EQCR_CI_CINH	0x3040
+#define QM_REG_EQCR_ITR		0x3080
+#define QM_REG_DQRR_PI_CINH	0x3100
+#define QM_REG_DQRR_CI_CINH	0x3140
+#define QM_REG_DQRR_ITR		0x3180
+#define QM_REG_DQRR_DCAP	0x31C0
+#define QM_REG_DQRR_SDQCR	0x3200
+#define QM_REG_DQRR_VDQCR	0x3240
+#define QM_REG_DQRR_PDQCR	0x3280
+#define QM_REG_MR_PI_CINH	0x3300
+#define QM_REG_MR_CI_CINH	0x3340
+#define QM_REG_MR_ITR		0x3380
+#define QM_REG_CFG		0x3500
+#define QM_REG_ISR		0x3600
+#define QM_REG_IER		0x3640
+#define QM_REG_ISDR		0x3680
+#define QM_REG_IIR		0x36C0
+#define QM_REG_ITPR		0x3740
+
+/* Cache-enabled register offsets */
+#define QM_CL_EQCR		0x0000
+#define QM_CL_DQRR		0x1000
+#define QM_CL_MR		0x2000
+#define QM_CL_EQCR_PI_CENA	0x3000
+#define QM_CL_EQCR_CI_CENA	0x3040
+#define QM_CL_DQRR_PI_CENA	0x3100
+#define QM_CL_DQRR_CI_CENA	0x3140
+#define QM_CL_MR_PI_CENA	0x3300
+#define QM_CL_MR_CI_CENA	0x3340
+#define QM_CL_CR		0x3800
+#define QM_CL_RR0		0x3900
+#define QM_CL_RR1		0x3940
+
+#else
 /* Cache-inhibited register offsets */
 #define QM_REG_EQCR_PI_CINH	0x0000
 #define QM_REG_EQCR_CI_CINH	0x0004
@@ -75,6 +112,7 @@
 #define QM_CL_CR		0x3800
 #define QM_CL_RR0		0x3900
 #define QM_CL_RR1		0x3940
+#endif
 
 /*
  * BTW, the drivers (and h/w programming model) already obtain the required
@@ -140,10 +178,10 @@ enum qm_mr_cmode {		/* matches QCSP_CFG::MM */
 struct qm_eqcr_entry {
 	u8 _ncw_verb; /* writes to this are non-coherent */
 	u8 dca;
-	u16 seqnum;
-	u32 orp;	/* 24-bit */
-	u32 fqid;	/* 24-bit */
-	u32 tag;
+	__be16 seqnum;
+	u8 __reserved[4];
+	__be32 fqid;	/* 24-bit */
+	__be32 tag;
 	struct qm_fd fd;
 	u8 __reserved3[32];
 } __packed;
@@ -183,41 +221,22 @@ struct qm_mr {
 };
 
 /* MC (Management Command) command */
-/* "Query FQ" */
-struct qm_mcc_queryfq {
+/* "FQ" command layout */
+struct qm_mcc_fq {
 	u8 _ncw_verb;
 	u8 __reserved1[3];
-	u32 fqid;	/* 24-bit */
+	__be32 fqid;	/* 24-bit */
 	u8 __reserved2[56];
 } __packed;
-/* "Alter FQ State Commands " */
-struct qm_mcc_alterfq {
-	u8 _ncw_verb;
-	u8 __reserved1[3];
-	u32 fqid;	/* 24-bit */
-	u8 __reserved2;
-	u8 count;	/* number of consecutive FQID */
-	u8 __reserved3[10];
-	u32 context_b;	/* frame queue context b */
-	u8 __reserved4[40];
-} __packed;
 
-/* "Query CGR" */
-struct qm_mcc_querycgr {
+/* "CGR" command layout */
+struct qm_mcc_cgr {
 	u8 _ncw_verb;
 	u8 __reserved1[30];
 	u8 cgid;
 	u8 __reserved2[32];
 };
 
-struct qm_mcc_querywq {
-	u8 _ncw_verb;
-	u8 __reserved;
-	/* select channel if verb != QUERYWQ_DEDICATED */
-	u16 channel_wq; /* ignores wq (3 lsbits): _res[0-2] */
-	u8 __reserved2[60];
-} __packed;
-
 #define QM_MCC_VERB_VBIT		0x80
 #define QM_MCC_VERB_MASK		0x7f	/* where the verb contains; */
 #define QM_MCC_VERB_INITFQ_PARKED	0x40
@@ -243,12 +262,9 @@ union qm_mc_command {
 		u8 __reserved[63];
 	};
 	struct qm_mcc_initfq initfq;
-	struct qm_mcc_queryfq queryfq;
-	struct qm_mcc_alterfq alterfq;
 	struct qm_mcc_initcgr initcgr;
-	struct qm_mcc_querycgr querycgr;
-	struct qm_mcc_querywq querywq;
-	struct qm_mcc_queryfq_np queryfq_np;
+	struct qm_mcc_fq fq;
+	struct qm_mcc_cgr cgr;
 };
 
 /* MC (Management Command) result */
@@ -343,12 +359,12 @@ struct qm_portal {
 /* Cache-inhibited register access. */
 static inline u32 qm_in(struct qm_portal *p, u32 offset)
 {
-	return __raw_readl(p->addr.ci + offset);
+	return be32_to_cpu(__raw_readl(p->addr.ci + offset));
 }
 
 static inline void qm_out(struct qm_portal *p, u32 offset, u32 val)
 {
-	__raw_writel(val, p->addr.ci + offset);
+	__raw_writel(cpu_to_be32(val), p->addr.ci + offset);
 }
 
 /* Cache Enabled Portal Access */
@@ -364,7 +380,7 @@ static inline void qm_cl_touch_ro(struct qm_portal *p, u32 offset)
 
 static inline u32 qm_ce_in(struct qm_portal *p, u32 offset)
 {
-	return __raw_readl(p->addr.ce + offset);
+	return be32_to_cpu(__raw_readl(p->addr.ce + offset));
 }
 
 /* --- EQCR API --- */
@@ -443,7 +459,7 @@ static inline void qm_eqcr_finish(struct qm_portal *portal)
 
 	DPAA_ASSERT(!eqcr->busy);
 	if (pi != eqcr_ptr2idx(eqcr->cursor))
-		pr_crit("losing uncommited EQCR entries\n");
+		pr_crit("losing uncommitted EQCR entries\n");
 	if (ci != eqcr->ci)
 		pr_crit("missing existing EQCR completions\n");
 	if (eqcr->ci != eqcr_ptr2idx(eqcr->cursor))
@@ -492,8 +508,7 @@ static inline struct qm_eqcr_entry *qm_eqcr_start_stash(struct qm_portal
 static inline void eqcr_commit_checks(struct qm_eqcr *eqcr)
 {
 	DPAA_ASSERT(eqcr->busy);
-	DPAA_ASSERT(eqcr->cursor->orp == (eqcr->cursor->orp & 0x00ffffff));
-	DPAA_ASSERT(eqcr->cursor->fqid == (eqcr->cursor->fqid & 0x00ffffff));
+	DPAA_ASSERT(!(be32_to_cpu(eqcr->cursor->fqid) & ~QM_FQID_MASK));
 	DPAA_ASSERT(eqcr->available >= 1);
 }
 
@@ -932,12 +947,12 @@ static inline int qm_mc_result_timeout(struct qm_portal *portal,
 
 static inline void fq_set(struct qman_fq *fq, u32 mask)
 {
-	set_bits(mask, &fq->flags);
+	fq->flags |= mask;
 }
 
 static inline void fq_clear(struct qman_fq *fq, u32 mask)
 {
-	clear_bits(mask, &fq->flags);
+	fq->flags &= ~mask;
 }
 
 static inline int fq_isset(struct qman_fq *fq, u32 mask)
@@ -962,8 +977,6 @@ struct qman_portal {
 	u32 sdqcr;
 	/* probing time config params for cpu-affine portals */
 	const struct qm_portal_config *config;
-	/* needed for providing a non-NULL device to dma_map_***() */
-	struct platform_device *pdev;
 	/* 2-element array. cgrs[0] is mask, cgrs[1] is snapshot. */
 	struct qman_cgrs *cgrs;
 	/* linked-list of CSCN handlers. */
@@ -1133,7 +1146,6 @@ static int qman_create_portal(struct qman_portal *portal,
 			      const struct qman_cgrs *cgrs)
 {
 	struct qm_portal *p;
-	char buf[16];
 	int ret;
 	u32 isdr;
 
@@ -1196,15 +1208,6 @@ static int qman_create_portal(struct qman_portal *portal,
 	portal->sdqcr = QM_SDQCR_SOURCE_CHANNELS | QM_SDQCR_COUNT_UPTO3 |
 			QM_SDQCR_DEDICATED_PRECEDENCE | QM_SDQCR_TYPE_PRIO_QOS |
 			QM_SDQCR_TOKEN_SET(0xab) | QM_SDQCR_CHANNELS_DEDICATED;
-	sprintf(buf, "qportal-%d", c->channel);
-	portal->pdev = platform_device_alloc(buf, -1);
-	if (!portal->pdev)
-		goto fail_devalloc;
-	if (dma_set_mask(&portal->pdev->dev, DMA_BIT_MASK(40)))
-		goto fail_devadd;
-	ret = platform_device_add(portal->pdev);
-	if (ret)
-		goto fail_devadd;
 	isdr = 0xffffffff;
 	qm_out(p, QM_REG_ISDR, isdr);
 	portal->irq_sources = 0;
@@ -1239,8 +1242,8 @@ static int qman_create_portal(struct qman_portal *portal,
 		/* special handling, drain just in case it's a few FQRNIs */
 		const union qm_mr_entry *e = qm_mr_current(p);
 
-		dev_err(c->dev, "MR dirty, VB 0x%x, rc 0x%x\n, addr 0x%x",
-			e->verb, e->ern.rc, e->ern.fd.addr_lo);
+		dev_err(c->dev, "MR dirty, VB 0x%x, rc 0x%x, addr 0x%llx\n",
+			e->verb, e->ern.rc, qm_fd_addr_get64(&e->ern.fd));
 		goto fail_dqrr_mr_empty;
 	}
 	/* Success */
@@ -1256,10 +1259,6 @@ fail_eqcr_empty:
 fail_affinity:
 	free_irq(c->irq, portal);
 fail_irq:
-	platform_device_del(portal->pdev);
-fail_devadd:
-	platform_device_put(portal->pdev);
-fail_devalloc:
 	kfree(portal->cgrs);
 fail_cgrs:
 	qm_mc_finish(p);
@@ -1321,9 +1320,6 @@ static void qman_destroy_portal(struct qman_portal *qm)
 	qm_dqrr_finish(&qm->p);
 	qm_eqcr_finish(&qm->p);
 
-	platform_device_del(qm->pdev);
-	platform_device_put(qm->pdev);
-
 	qm->config = NULL;
 }
 
@@ -1386,6 +1382,7 @@ static void qm_congestion_task(struct work_struct *work)
 	if (!qm_mc_result_timeout(&p->p, &mcr)) {
 		spin_unlock(&p->cgr_lock);
 		dev_crit(p->config->dev, "QUERYCONGESTION timeout\n");
+		qman_p_irqsource_add(p, QM_PIRQ_CSCI);
 		return;
 	}
 	/* mask out the ones I'm not interested in */
@@ -1400,6 +1397,7 @@ static void qm_congestion_task(struct work_struct *work)
 		if (cgr->cb && qman_cgrs_get(&c, cgr->cgrid))
 			cgr->cb(p, cgr, qman_cgrs_get(&rr, cgr->cgrid));
 	spin_unlock(&p->cgr_lock);
+	qman_p_irqsource_add(p, QM_PIRQ_CSCI);
 }
 
 static void qm_mr_process_task(struct work_struct *work)
@@ -1428,7 +1426,7 @@ static void qm_mr_process_task(struct work_struct *work)
 			case QM_MR_VERB_FQRN:
 			case QM_MR_VERB_FQRL:
 				/* Lookup in the retirement table */
-				fq = fqid_to_fq(msg->fq.fqid);
+				fq = fqid_to_fq(qm_fqid_get(&msg->fq));
 				if (WARN_ON(!fq))
 					break;
 				fq_state_change(p, fq, msg, verb);
@@ -1437,7 +1435,7 @@ static void qm_mr_process_task(struct work_struct *work)
 				break;
 			case QM_MR_VERB_FQPN:
 				/* Parked */
-				fq = tag_to_fq(msg->fq.contextB);
+				fq = tag_to_fq(be32_to_cpu(msg->fq.context_b));
 				fq_state_change(p, fq, msg, verb);
 				if (fq->cb.fqs)
 					fq->cb.fqs(p, fq, msg);
@@ -1451,7 +1449,7 @@ static void qm_mr_process_task(struct work_struct *work)
 			}
 		} else {
 			/* Its a software ERN */
-			fq = tag_to_fq(msg->ern.tag);
+			fq = tag_to_fq(be32_to_cpu(msg->ern.tag));
 			fq->cb.ern(p, fq, msg);
 		}
 		num++;
@@ -1459,12 +1457,14 @@ static void qm_mr_process_task(struct work_struct *work)
 	}
 
 	qm_mr_cci_consume(&p->p, num);
+	qman_p_irqsource_add(p, QM_PIRQ_MRI);
 	preempt_enable();
 }
 
 static u32 __poll_portal_slow(struct qman_portal *p, u32 is)
 {
 	if (is & QM_PIRQ_CSCI) {
+		qman_p_irqsource_remove(p, QM_PIRQ_CSCI);
 		queue_work_on(smp_processor_id(), qm_portal_wq,
 			      &p->congestion_work);
 	}
@@ -1476,6 +1476,7 @@ static u32 __poll_portal_slow(struct qman_portal *p, u32 is)
 	}
 
 	if (is & QM_PIRQ_MRI) {
+		qman_p_irqsource_remove(p, QM_PIRQ_MRI);
 		queue_work_on(smp_processor_id(), qm_portal_wq,
 			      &p->mr_work);
 	}
@@ -1536,7 +1537,7 @@ static inline unsigned int __poll_portal_fast(struct qman_portal *p,
 
 		if (dq->stat & QM_DQRR_STAT_UNSCHEDULED) {
 			/*
-			 * VDQCR: don't trust contextB as the FQ may have
+			 * VDQCR: don't trust context_b as the FQ may have
 			 * been configured for h/w consumption and we're
 			 * draining it post-retirement.
 			 */
@@ -1562,8 +1563,8 @@ static inline unsigned int __poll_portal_fast(struct qman_portal *p,
 			if (dq->stat & QM_DQRR_STAT_DQCR_EXPIRED)
 				clear_vdqcr(p, fq);
 		} else {
-			/* SDQCR: contextB points to the FQ */
-			fq = tag_to_fq(dq->contextB);
+			/* SDQCR: context_b points to the FQ */
+			fq = tag_to_fq(be32_to_cpu(dq->context_b));
 			/* Now let the callback do its stuff */
 			res = fq->cb.dqrr(p, fq, dq);
 			/*
@@ -1603,7 +1604,7 @@ void qman_p_irqsource_add(struct qman_portal *p, u32 bits)
 	unsigned long irqflags;
 
 	local_irq_save(irqflags);
-	set_bits(bits & QM_PIRQ_VISIBLE, &p->irq_sources);
+	p->irq_sources |= bits & QM_PIRQ_VISIBLE;
 	qm_out(&p->p, QM_REG_IER, p->irq_sources);
 	local_irq_restore(irqflags);
 }
@@ -1626,7 +1627,7 @@ void qman_p_irqsource_remove(struct qman_portal *p, u32 bits)
 	 */
 	local_irq_save(irqflags);
 	bits &= QM_PIRQ_VISIBLE;
-	clear_bits(bits, &p->irq_sources);
+	p->irq_sources &= ~bits;
 	qm_out(&p->p, QM_REG_IER, p->irq_sources);
 	ier = qm_in(&p->p, QM_REG_IER);
 	/*
@@ -1780,9 +1781,9 @@ int qman_init_fq(struct qman_fq *fq, u32 flags, struct qm_mcc_initfq *opts)
 	if (fq_isset(fq, QMAN_FQ_FLAG_NO_MODIFY))
 		return -EINVAL;
 #endif
-	if (opts && (opts->we_mask & QM_INITFQ_WE_OAC)) {
+	if (opts && (be16_to_cpu(opts->we_mask) & QM_INITFQ_WE_OAC)) {
 		/* And can't be set at the same time as TDTHRESH */
-		if (opts->we_mask & QM_INITFQ_WE_TDTHRESH)
+		if (be16_to_cpu(opts->we_mask) & QM_INITFQ_WE_TDTHRESH)
 			return -EINVAL;
 	}
 	/* Issue an INITFQ_[PARKED|SCHED] management command */
@@ -1796,37 +1797,49 @@ int qman_init_fq(struct qman_fq *fq, u32 flags, struct qm_mcc_initfq *opts)
 	mcc = qm_mc_start(&p->p);
 	if (opts)
 		mcc->initfq = *opts;
-	mcc->initfq.fqid = fq->fqid;
+	qm_fqid_set(&mcc->fq, fq->fqid);
 	mcc->initfq.count = 0;
 	/*
-	 * If the FQ does *not* have the TO_DCPORTAL flag, contextB is set as a
+	 * If the FQ does *not* have the TO_DCPORTAL flag, context_b is set as a
 	 * demux pointer. Otherwise, the caller-provided value is allowed to
 	 * stand, don't overwrite it.
 	 */
 	if (fq_isclear(fq, QMAN_FQ_FLAG_TO_DCPORTAL)) {
 		dma_addr_t phys_fq;
 
-		mcc->initfq.we_mask |= QM_INITFQ_WE_CONTEXTB;
-		mcc->initfq.fqd.context_b = fq_to_tag(fq);
+		mcc->initfq.we_mask |= cpu_to_be16(QM_INITFQ_WE_CONTEXTB);
+		mcc->initfq.fqd.context_b = cpu_to_be32(fq_to_tag(fq));
 		/*
 		 *  and the physical address - NB, if the user wasn't trying to
 		 * set CONTEXTA, clear the stashing settings.
 		 */
-		if (!(mcc->initfq.we_mask & QM_INITFQ_WE_CONTEXTA)) {
-			mcc->initfq.we_mask |= QM_INITFQ_WE_CONTEXTA;
+		if (!(be16_to_cpu(mcc->initfq.we_mask) &
+				  QM_INITFQ_WE_CONTEXTA)) {
+			mcc->initfq.we_mask |=
+				cpu_to_be16(QM_INITFQ_WE_CONTEXTA);
 			memset(&mcc->initfq.fqd.context_a, 0,
 				sizeof(mcc->initfq.fqd.context_a));
 		} else {
-			phys_fq = dma_map_single(&p->pdev->dev, fq, sizeof(*fq),
-						 DMA_TO_DEVICE);
+			struct qman_portal *p = qman_dma_portal;
+
+			phys_fq = dma_map_single(p->config->dev, fq,
+						 sizeof(*fq), DMA_TO_DEVICE);
+			if (dma_mapping_error(p->config->dev, phys_fq)) {
+				dev_err(p->config->dev, "dma_mapping failed\n");
+				ret = -EIO;
+				goto out;
+			}
+
 			qm_fqd_stashing_set64(&mcc->initfq.fqd, phys_fq);
 		}
 	}
 	if (flags & QMAN_INITFQ_FLAG_LOCAL) {
 		int wq = 0;
 
-		if (!(mcc->initfq.we_mask & QM_INITFQ_WE_DESTWQ)) {
-			mcc->initfq.we_mask |= QM_INITFQ_WE_DESTWQ;
+		if (!(be16_to_cpu(mcc->initfq.we_mask) &
+				  QM_INITFQ_WE_DESTWQ)) {
+			mcc->initfq.we_mask |=
+				cpu_to_be16(QM_INITFQ_WE_DESTWQ);
 			wq = 4;
 		}
 		qm_fqd_set_destwq(&mcc->initfq.fqd, p->config->channel, wq);
@@ -1845,13 +1858,13 @@ int qman_init_fq(struct qman_fq *fq, u32 flags, struct qm_mcc_initfq *opts)
 		goto out;
 	}
 	if (opts) {
-		if (opts->we_mask & QM_INITFQ_WE_FQCTRL) {
-			if (opts->fqd.fq_ctrl & QM_FQCTRL_CGE)
+		if (be16_to_cpu(opts->we_mask) & QM_INITFQ_WE_FQCTRL) {
+			if (be16_to_cpu(opts->fqd.fq_ctrl) & QM_FQCTRL_CGE)
 				fq_set(fq, QMAN_FQ_STATE_CGR_EN);
 			else
 				fq_clear(fq, QMAN_FQ_STATE_CGR_EN);
 		}
-		if (opts->we_mask & QM_INITFQ_WE_CGID)
+		if (be16_to_cpu(opts->we_mask) & QM_INITFQ_WE_CGID)
 			fq->cgr_groupid = opts->fqd.cgid;
 	}
 	fq->state = (flags & QMAN_INITFQ_FLAG_SCHED) ?
@@ -1884,7 +1897,7 @@ int qman_schedule_fq(struct qman_fq *fq)
 		goto out;
 	}
 	mcc = qm_mc_start(&p->p);
-	mcc->alterfq.fqid = fq->fqid;
+	qm_fqid_set(&mcc->fq, fq->fqid);
 	qm_mc_commit(&p->p, QM_MCC_VERB_ALTER_SCHED);
 	if (!qm_mc_result_timeout(&p->p, &mcr)) {
 		dev_err(p->config->dev, "ALTER_SCHED timeout\n");
@@ -1927,7 +1940,7 @@ int qman_retire_fq(struct qman_fq *fq, u32 *flags)
 		goto out;
 	}
 	mcc = qm_mc_start(&p->p);
-	mcc->alterfq.fqid = fq->fqid;
+	qm_fqid_set(&mcc->fq, fq->fqid);
 	qm_mc_commit(&p->p, QM_MCC_VERB_ALTER_RETIRE);
 	if (!qm_mc_result_timeout(&p->p, &mcr)) {
 		dev_crit(p->config->dev, "ALTER_RETIRE timeout\n");
@@ -1970,8 +1983,8 @@ int qman_retire_fq(struct qman_fq *fq, u32 *flags)
 
 			msg.verb = QM_MR_VERB_FQRNI;
 			msg.fq.fqs = mcr->alterfq.fqs;
-			msg.fq.fqid = fq->fqid;
-			msg.fq.contextB = fq_to_tag(fq);
+			qm_fqid_set(&msg.fq, fq->fqid);
+			msg.fq.context_b = cpu_to_be32(fq_to_tag(fq));
 			fq->cb.fqs(p, fq, &msg);
 		}
 	} else if (res == QM_MCR_RESULT_PENDING) {
@@ -2006,7 +2019,7 @@ int qman_oos_fq(struct qman_fq *fq)
 		goto out;
 	}
 	mcc = qm_mc_start(&p->p);
-	mcc->alterfq.fqid = fq->fqid;
+	qm_fqid_set(&mcc->fq, fq->fqid);
 	qm_mc_commit(&p->p, QM_MCC_VERB_ALTER_OOS);
 	if (!qm_mc_result_timeout(&p->p, &mcr)) {
 		ret = -ETIMEDOUT;
@@ -2032,7 +2045,7 @@ int qman_query_fq(struct qman_fq *fq, struct qm_fqd *fqd)
 	int ret = 0;
 
 	mcc = qm_mc_start(&p->p);
-	mcc->queryfq.fqid = fq->fqid;
+	qm_fqid_set(&mcc->fq, fq->fqid);
 	qm_mc_commit(&p->p, QM_MCC_VERB_QUERYFQ);
 	if (!qm_mc_result_timeout(&p->p, &mcr)) {
 		ret = -ETIMEDOUT;
@@ -2049,8 +2062,7 @@ out:
 	return ret;
 }
 
-static int qman_query_fq_np(struct qman_fq *fq,
-			    struct qm_mcr_queryfq_np *np)
+int qman_query_fq_np(struct qman_fq *fq, struct qm_mcr_queryfq_np *np)
 {
 	union qm_mc_command *mcc;
 	union qm_mc_result *mcr;
@@ -2058,7 +2070,7 @@ static int qman_query_fq_np(struct qman_fq *fq,
 	int ret = 0;
 
 	mcc = qm_mc_start(&p->p);
-	mcc->queryfq.fqid = fq->fqid;
+	qm_fqid_set(&mcc->fq, fq->fqid);
 	qm_mc_commit(&p->p, QM_MCC_VERB_QUERYFQ_NP);
 	if (!qm_mc_result_timeout(&p->p, &mcr)) {
 		ret = -ETIMEDOUT;
@@ -2076,6 +2088,7 @@ out:
 	put_affine_portal();
 	return ret;
 }
+EXPORT_SYMBOL(qman_query_fq_np);
 
 static int qman_query_cgr(struct qman_cgr *cgr,
 			  struct qm_mcr_querycgr *cgrd)
@@ -2086,7 +2099,7 @@ static int qman_query_cgr(struct qman_cgr *cgr,
 	int ret = 0;
 
 	mcc = qm_mc_start(&p->p);
-	mcc->querycgr.cgid = cgr->cgrid;
+	mcc->cgr.cgid = cgr->cgrid;
 	qm_mc_commit(&p->p, QM_MCC_VERB_QUERYCGR);
 	if (!qm_mc_result_timeout(&p->p, &mcr)) {
 		ret = -ETIMEDOUT;
@@ -2239,8 +2252,8 @@ int qman_enqueue(struct qman_fq *fq, const struct qm_fd *fd)
 	if (unlikely(!eq))
 		goto out;
 
-	eq->fqid = fq->fqid;
-	eq->tag = fq_to_tag(fq);
+	qm_fqid_set(eq, fq->fqid);
+	eq->tag = cpu_to_be32(fq_to_tag(fq));
 	eq->fd = *fd;
 
 	qm_eqcr_pvb_commit(&p->p, QM_EQCR_VERB_CMD_ENQUEUE);
@@ -2282,7 +2295,24 @@ out:
 }
 
 #define PORTAL_IDX(n)	(n->config->channel - QM_CHANNEL_SWPORTAL0)
-#define TARG_MASK(n)	(BIT(31) >> PORTAL_IDX(n))
+
+/* congestion state change notification target update control */
+static void qm_cgr_cscn_targ_set(struct __qm_mc_cgr *cgr, int pi, u32 val)
+{
+	if (qman_ip_rev >= QMAN_REV30)
+		cgr->cscn_targ_upd_ctrl = cpu_to_be16(pi |
+					QM_CGR_TARG_UDP_CTRL_WRITE_BIT);
+	else
+		cgr->cscn_targ = cpu_to_be32(val | QM_CGR_TARG_PORTAL(pi));
+}
+
+static void qm_cgr_cscn_targ_clear(struct __qm_mc_cgr *cgr, int pi, u32 val)
+{
+	if (qman_ip_rev >= QMAN_REV30)
+		cgr->cscn_targ_upd_ctrl = cpu_to_be16(pi);
+	else
+		cgr->cscn_targ = cpu_to_be32(val & ~QM_CGR_TARG_PORTAL(pi));
+}
 
 static u8 qman_cgr_cpus[CGR_NUM];
 
@@ -2305,7 +2335,6 @@ int qman_create_cgr(struct qman_cgr *cgr, u32 flags,
 		    struct qm_mcc_initcgr *opts)
 {
 	struct qm_mcr_querycgr cgr_state;
-	struct qm_mcc_initcgr local_opts = {};
 	int ret;
 	struct qman_portal *p;
 
@@ -2327,22 +2356,18 @@ int qman_create_cgr(struct qman_cgr *cgr, u32 flags,
 	spin_lock(&p->cgr_lock);
 
 	if (opts) {
+		struct qm_mcc_initcgr local_opts = *opts;
+
 		ret = qman_query_cgr(cgr, &cgr_state);
 		if (ret)
 			goto out;
-		if (opts)
-			local_opts = *opts;
-		if ((qman_ip_rev & 0xFF00) >= QMAN_REV30)
-			local_opts.cgr.cscn_targ_upd_ctrl =
-				QM_CGR_TARG_UDP_CTRL_WRITE_BIT | PORTAL_IDX(p);
-		else
-			/* Overwrite TARG */
-			local_opts.cgr.cscn_targ = cgr_state.cgr.cscn_targ |
-						   TARG_MASK(p);
-		local_opts.we_mask |= QM_CGR_WE_CSCN_TARG;
+
+		qm_cgr_cscn_targ_set(&local_opts.cgr, PORTAL_IDX(p),
+				     be32_to_cpu(cgr_state.cgr.cscn_targ));
+		local_opts.we_mask |= cpu_to_be16(QM_CGR_WE_CSCN_TARG);
 
 		/* send init if flags indicate so */
-		if (opts && (flags & QMAN_CGR_FLAG_USE_INIT))
+		if (flags & QMAN_CGR_FLAG_USE_INIT)
 			ret = qm_modify_cgr(cgr, QMAN_CGR_FLAG_USE_INIT,
 					    &local_opts);
 		else
@@ -2405,13 +2430,11 @@ int qman_delete_cgr(struct qman_cgr *cgr)
 		list_add(&cgr->node, &p->cgr_cbs);
 		goto release_lock;
 	}
-	/* Overwrite TARG */
-	local_opts.we_mask = QM_CGR_WE_CSCN_TARG;
-	if ((qman_ip_rev & 0xFF00) >= QMAN_REV30)
-		local_opts.cgr.cscn_targ_upd_ctrl = PORTAL_IDX(p);
-	else
-		local_opts.cgr.cscn_targ = cgr_state.cgr.cscn_targ &
-							 ~(TARG_MASK(p));
+
+	local_opts.we_mask = cpu_to_be16(QM_CGR_WE_CSCN_TARG);
+	qm_cgr_cscn_targ_clear(&local_opts.cgr, PORTAL_IDX(p),
+			       be32_to_cpu(cgr_state.cgr.cscn_targ));
+
 	ret = qm_modify_cgr(cgr, 0, &local_opts);
 	if (ret)
 		/* add back to the list */
@@ -2501,7 +2524,7 @@ static int _qm_dqrr_consume_and_match(struct qm_portal *p, u32 fqid, int s,
 	} while (wait && !dqrr);
 
 	while (dqrr) {
-		if (dqrr->fqid == fqid && (dqrr->stat & s))
+		if (qm_fqid_get(dqrr) == fqid && (dqrr->stat & s))
 			found = 1;
 		qm_dqrr_cdc_consume_1ptr(p, dqrr, 0);
 		qm_dqrr_pvb_update(p);
@@ -2537,7 +2560,7 @@ static int qman_shutdown_fq(u32 fqid)
 	dev = p->config->dev;
 	/* Determine the state of the FQID */
 	mcc = qm_mc_start(&p->p);
-	mcc->queryfq_np.fqid = fqid;
+	qm_fqid_set(&mcc->fq, fqid);
 	qm_mc_commit(&p->p, QM_MCC_VERB_QUERYFQ_NP);
 	if (!qm_mc_result_timeout(&p->p, &mcr)) {
 		dev_err(dev, "QUERYFQ_NP timeout\n");
@@ -2552,7 +2575,7 @@ static int qman_shutdown_fq(u32 fqid)
 
 	/* Query which channel the FQ is using */
 	mcc = qm_mc_start(&p->p);
-	mcc->queryfq.fqid = fqid;
+	qm_fqid_set(&mcc->fq, fqid);
 	qm_mc_commit(&p->p, QM_MCC_VERB_QUERYFQ);
 	if (!qm_mc_result_timeout(&p->p, &mcr)) {
 		dev_err(dev, "QUERYFQ timeout\n");
@@ -2572,7 +2595,7 @@ static int qman_shutdown_fq(u32 fqid)
 	case QM_MCR_NP_STATE_PARKED:
 		orl_empty = 0;
 		mcc = qm_mc_start(&p->p);
-		mcc->alterfq.fqid = fqid;
+		qm_fqid_set(&mcc->fq, fqid);
 		qm_mc_commit(&p->p, QM_MCC_VERB_ALTER_RETIRE);
 		if (!qm_mc_result_timeout(&p->p, &mcr)) {
 			dev_err(dev, "QUERYFQ_NP timeout\n");
@@ -2667,7 +2690,7 @@ static int qman_shutdown_fq(u32 fqid)
 			cpu_relax();
 		}
 		mcc = qm_mc_start(&p->p);
-		mcc->alterfq.fqid = fqid;
+		qm_fqid_set(&mcc->fq, fqid);
 		qm_mc_commit(&p->p, QM_MCC_VERB_ALTER_OOS);
 		if (!qm_mc_result_timeout(&p->p, &mcr)) {
 			ret = -ETIMEDOUT;
@@ -2687,7 +2710,7 @@ static int qman_shutdown_fq(u32 fqid)
 	case QM_MCR_NP_STATE_RETIRED:
 		/* Send OOS Command */
 		mcc = qm_mc_start(&p->p);
-		mcc->alterfq.fqid = fqid;
+		qm_fqid_set(&mcc->fq, fqid);
 		qm_mc_commit(&p->p, QM_MCC_VERB_ALTER_OOS);
 		if (!qm_mc_result_timeout(&p->p, &mcr)) {
 			ret = -ETIMEDOUT;
@@ -2722,6 +2745,7 @@ const struct qm_portal_config *qman_get_qm_portal_config(
 {
 	return portal->config;
 }
+EXPORT_SYMBOL(qman_get_qm_portal_config);
 
 struct gen_pool *qm_fqalloc; /* FQID allocator */
 struct gen_pool *qm_qpalloc; /* pool-channel allocator */
@@ -2789,15 +2813,18 @@ static int qpool_cleanup(u32 qp)
 		struct qm_mcr_queryfq_np np;
 
 		err = qman_query_fq_np(&fq, &np);
-		if (err)
+		if (err == -ERANGE)
 			/* FQID range exceeded, found no problems */
 			return 0;
+		else if (WARN_ON(err))
+			return err;
+
 		if ((np.state & QM_MCR_NP_STATE_MASK) != QM_MCR_NP_STATE_OOS) {
 			struct qm_fqd fqd;
 
 			err = qman_query_fq(&fq, &fqd);
 			if (WARN_ON(err))
-				return 0;
+				return err;
 			if (qm_fqd_get_chan(&fqd) == qp) {
 				/* The channel is the FQ's target, clean it */
 				err = qman_shutdown_fq(fq.fqid);
@@ -2836,7 +2863,7 @@ static int cgr_cleanup(u32 cgrid)
 	 * error, looking for non-OOS FQDs whose CGR is the CGR being released
 	 */
 	struct qman_fq fq = {
-		.fqid = 1
+		.fqid = QM_FQID_RANGE_START
 	};
 	int err;
 
@@ -2844,16 +2871,19 @@ static int cgr_cleanup(u32 cgrid)
 		struct qm_mcr_queryfq_np np;
 
 		err = qman_query_fq_np(&fq, &np);
-		if (err)
+		if (err == -ERANGE)
 			/* FQID range exceeded, found no problems */
 			return 0;
+		else if (WARN_ON(err))
+			return err;
+
 		if ((np.state & QM_MCR_NP_STATE_MASK) != QM_MCR_NP_STATE_OOS) {
 			struct qm_fqd fqd;
 
 			err = qman_query_fq(&fq, &fqd);
 			if (WARN_ON(err))
-				return 0;
-			if ((fqd.fq_ctrl & QM_FQCTRL_CGE) &&
+				return err;
+			if (be16_to_cpu(fqd.fq_ctrl) & QM_FQCTRL_CGE &&
 			    fqd.cgid == cgrid) {
 				pr_err("CRGID 0x%x is being used by FQID 0x%x, CGR will be leaked\n",
 				       cgrid, fq.fqid);
diff --git a/drivers/soc/fsl/qbman/qman_ccsr.c b/drivers/soc/fsl/qbman/qman_ccsr.c
index 0cace9e..af66e63 100644
--- a/drivers/soc/fsl/qbman/qman_ccsr.c
+++ b/drivers/soc/fsl/qbman/qman_ccsr.c
@@ -34,6 +34,8 @@ u16 qman_ip_rev;
 EXPORT_SYMBOL(qman_ip_rev);
 u16 qm_channel_pool1 = QMAN_CHANNEL_POOL1;
 EXPORT_SYMBOL(qm_channel_pool1);
+u16 qm_channel_caam = QMAN_CHANNEL_CAAM;
+EXPORT_SYMBOL(qm_channel_caam);
 
 /* Register offsets */
 #define REG_QCSP_LIO_CFG(n)	(0x0000 + ((n) * 0x10))
@@ -399,21 +401,42 @@ static int qm_init_pfdr(struct device *dev, u32 pfdr_start, u32 num)
 }
 
 /*
- * Ideally we would use the DMA API to turn rmem->base into a DMA address
- * (especially if iommu translations ever get involved).  Unfortunately, the
- * DMA API currently does not allow mapping anything that is not backed with
- * a struct page.
+ * QMan needs two global memory areas initialized at boot time:
+ *  1) FQD: Frame Queue Descriptors used to manage frame queues
+ *  2) PFDR: Packed Frame Queue Descriptor Records used to store frames
+ * Both areas are reserved using the device tree reserved memory framework
+ * and the addresses and sizes are initialized when the QMan device is probed
  */
 static dma_addr_t fqd_a, pfdr_a;
 static size_t fqd_sz, pfdr_sz;
 
+#ifdef CONFIG_PPC
+/*
+ * Support for PPC Device Tree backward compatibility when compatiable
+ * string is set to fsl-qman-fqd and fsl-qman-pfdr
+ */
+static int zero_priv_mem(phys_addr_t addr, size_t sz)
+{
+	/* map as cacheable, non-guarded */
+	void __iomem *tmpp = ioremap_prot(addr, sz, 0);
+
+	if (!tmpp)
+		return -ENOMEM;
+
+	memset_io(tmpp, 0, sz);
+	flush_dcache_range((unsigned long)tmpp,
+			   (unsigned long)tmpp + sz);
+	iounmap(tmpp);
+
+	return 0;
+}
+
 static int qman_fqd(struct reserved_mem *rmem)
 {
 	fqd_a = rmem->base;
 	fqd_sz = rmem->size;
 
 	WARN_ON(!(fqd_a && fqd_sz));
-
 	return 0;
 }
 RESERVEDMEM_OF_DECLARE(qman_fqd, "fsl,qman-fqd", qman_fqd);
@@ -429,29 +452,13 @@ static int qman_pfdr(struct reserved_mem *rmem)
 }
 RESERVEDMEM_OF_DECLARE(qman_pfdr, "fsl,qman-pfdr", qman_pfdr);
 
+#endif
+
 static unsigned int qm_get_fqid_maxcnt(void)
 {
 	return fqd_sz / 64;
 }
 
-/*
- * Flush this memory range from data cache so that QMAN originated
- * transactions for this memory region could be marked non-coherent.
- */
-static int zero_priv_mem(struct device *dev, struct device_node *node,
-			 phys_addr_t addr, size_t sz)
-{
-	/* map as cacheable, non-guarded */
-	void __iomem *tmpp = ioremap_prot(addr, sz, 0);
-
-	memset_io(tmpp, 0, sz);
-	flush_dcache_range((unsigned long)tmpp,
-			   (unsigned long)tmpp + sz);
-	iounmap(tmpp);
-
-	return 0;
-}
-
 static void log_edata_bits(struct device *dev, u32 bit_count)
 {
 	u32 i, j, mask = 0xffffffff;
@@ -682,11 +689,12 @@ static int qman_resource_init(struct device *dev)
 static int fsl_qman_probe(struct platform_device *pdev)
 {
 	struct device *dev = &pdev->dev;
-	struct device_node *node = dev->of_node;
+	struct device_node *mem_node, *node = dev->of_node;
 	struct resource *res;
 	int ret, err_irq;
 	u16 id;
 	u8 major, minor;
+	u64 size;
 
 	res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
 	if (!res) {
@@ -712,18 +720,95 @@ static int fsl_qman_probe(struct platform_device *pdev)
 		qman_ip_rev = QMAN_REV30;
 	else if (major == 3 && minor == 1)
 		qman_ip_rev = QMAN_REV31;
+	else if (major == 3 && minor == 2)
+		qman_ip_rev = QMAN_REV32;
 	else {
 		dev_err(dev, "Unknown QMan version\n");
 		return -ENODEV;
 	}
 
-	if ((qman_ip_rev & 0xff00) >= QMAN_REV30)
+	if ((qman_ip_rev & 0xff00) >= QMAN_REV30) {
 		qm_channel_pool1 = QMAN_CHANNEL_POOL1_REV3;
+		qm_channel_caam = QMAN_CHANNEL_CAAM_REV3;
+	}
 
-	ret = zero_priv_mem(dev, node, fqd_a, fqd_sz);
-	WARN_ON(ret);
-	if (ret)
-		return -ENODEV;
+	if (fqd_a) {
+#ifdef CONFIG_PPC
+		/*
+		 * For PPC backward DT compatibility
+		 * FQD memory MUST be zero'd by software
+		 */
+		zero_priv_mem(fqd_a, fqd_sz);
+#else
+		WARN(1, "Unexpected archiceture using non shared-dma-mem reservations");
+#endif
+	} else {
+		/*
+		 * Order of memory regions is assumed as FQD followed by PFDR
+		 * in order to ensure allocations from the correct regions the
+		 * driver initializes then allocates each piece in order
+		 */
+		ret = of_reserved_mem_device_init_by_idx(dev, dev->of_node, 0);
+		if (ret) {
+			dev_err(dev, "of_reserved_mem_device_init_by_idx(0) failed 0x%x\n",
+				ret);
+			return -ENODEV;
+		}
+		mem_node = of_parse_phandle(dev->of_node, "memory-region", 0);
+		if (mem_node) {
+			ret = of_property_read_u64(mem_node, "size", &size);
+			if (ret) {
+				dev_err(dev, "FQD: of_address_to_resource fails 0x%x\n",
+					ret);
+				return -ENODEV;
+			}
+			fqd_sz = size;
+		} else {
+			dev_err(dev, "No memory-region found for FQD\n");
+			return -ENODEV;
+		}
+		if (!dma_zalloc_coherent(dev, fqd_sz, &fqd_a, 0)) {
+			dev_err(dev, "Alloc FQD memory failed\n");
+			return -ENODEV;
+		}
+
+		/*
+		 * Disassociate the FQD reserved memory area from the device
+		 * because a device can only have one DMA memory area. This
+		 * should be fine since the memory is allocated and initialized
+		 * and only ever accessed by the QMan device from now on
+		 */
+		of_reserved_mem_device_release(dev);
+	}
+	dev_dbg(dev, "Allocated FQD 0x%llx 0x%zx\n", fqd_a, fqd_sz);
+
+	if (!pfdr_a) {
+		/* Setup PFDR memory */
+		ret = of_reserved_mem_device_init_by_idx(dev, dev->of_node, 1);
+		if (ret) {
+			dev_err(dev, "of_reserved_mem_device_init(1) failed 0x%x\n",
+			ret);
+			return -ENODEV;
+		}
+		mem_node = of_parse_phandle(dev->of_node, "memory-region", 1);
+		if (mem_node) {
+			ret = of_property_read_u64(mem_node, "size", &size);
+			if (ret) {
+				dev_err(dev, "PFDR: of_address_to_resource fails 0x%x\n",
+					ret);
+				return -ENODEV;
+			}
+			pfdr_sz = size;
+		} else {
+			dev_err(dev, "No memory-region found for PFDR\n");
+			return -ENODEV;
+		}
+		if (!dma_zalloc_coherent(dev, pfdr_sz, &pfdr_a, 0)) {
+			dev_err(dev, "Alloc PFDR Failed size 0x%zx\n", pfdr_sz);
+			return -ENODEV;
+		}
+	}
+	dev_info(dev, "Allocated PFDR 0x%llx 0x%zx\n", pfdr_a, pfdr_sz);
 
 	ret = qman_init_ccsr(dev);
 	if (ret) {
diff --git a/drivers/soc/fsl/qbman/qman_portal.c b/drivers/soc/fsl/qbman/qman_portal.c
index 1486143..b5463e4 100644
--- a/drivers/soc/fsl/qbman/qman_portal.c
+++ b/drivers/soc/fsl/qbman/qman_portal.c
@@ -30,6 +30,9 @@
 
 #include "qman_priv.h"
 
+struct qman_portal *qman_dma_portal;
+EXPORT_SYMBOL(qman_dma_portal);
+
 /* Enable portal interupts (as opposed to polling mode) */
 #define CONFIG_FSL_DPA_PIRQ_SLOW  1
 #define CONFIG_FSL_DPA_PIRQ_FAST  1
@@ -150,6 +153,10 @@ static struct qman_portal *init_pcfg(struct qm_portal_config *pcfg)
 		/* all assigned portals are initialized now */
 		qman_init_cgr_all();
 	}
+
+	if (!qman_dma_portal)
+		qman_dma_portal = p;
+
 	spin_unlock(&qman_lock);
 
 	dev_info(pcfg->dev, "Portal initialised, cpu %d\n", pcfg->cpu);
@@ -179,7 +186,7 @@ static void qman_portal_update_sdest(const struct qm_portal_config *pcfg,
 	qman_set_sdest(pcfg->channel, cpu);
 }
 
-static void qman_offline_cpu(unsigned int cpu)
+static int qman_offline_cpu(unsigned int cpu)
 {
 	struct qman_portal *p;
 	const struct qm_portal_config *pcfg;
@@ -192,9 +199,10 @@ static void qman_offline_cpu(unsigned int cpu)
 			qman_portal_update_sdest(pcfg, 0);
 		}
 	}
+	return 0;
 }
 
-static void qman_online_cpu(unsigned int cpu)
+static int qman_online_cpu(unsigned int cpu)
 {
 	struct qman_portal *p;
 	const struct qm_portal_config *pcfg;
@@ -207,40 +215,18 @@ static void qman_online_cpu(unsigned int cpu)
 			qman_portal_update_sdest(pcfg, cpu);
 		}
 	}
+	return 0;
 }
 
-static int qman_hotplug_cpu_callback(struct notifier_block *nfb,
-				     unsigned long action, void *hcpu)
-{
-	unsigned int cpu = (unsigned long)hcpu;
-
-	switch (action) {
-	case CPU_ONLINE:
-	case CPU_ONLINE_FROZEN:
-		qman_online_cpu(cpu);
-		break;
-	case CPU_DOWN_PREPARE:
-	case CPU_DOWN_PREPARE_FROZEN:
-		qman_offline_cpu(cpu);
-	default:
-		break;
-	}
-	return NOTIFY_OK;
-}
-
-static struct notifier_block qman_hotplug_cpu_notifier = {
-	.notifier_call = qman_hotplug_cpu_callback,
-};
-
 static int qman_portal_probe(struct platform_device *pdev)
 {
 	struct device *dev = &pdev->dev;
 	struct device_node *node = dev->of_node;
 	struct qm_portal_config *pcfg;
 	struct resource *addr_phys[2];
-	const u32 *channel;
 	void __iomem *va;
-	int irq, len, cpu;
+	int irq, cpu, err;
+	u32 val;
 
 	pcfg = devm_kmalloc(dev, sizeof(*pcfg), GFP_KERNEL);
 	if (!pcfg)
@@ -264,13 +250,13 @@ static int qman_portal_probe(struct platform_device *pdev)
 		return -ENXIO;
 	}
 
-	channel = of_get_property(node, "cell-index", &len);
-	if (!channel || (len != 4)) {
+	err = of_property_read_u32(node, "cell-index", &val);
+	if (err) {
 		dev_err(dev, "Can't get %s property 'cell-index'\n",
 			node->full_name);
-		return -ENXIO;
+		return err;
 	}
-	pcfg->channel = *channel;
+	pcfg->channel = val;
 	pcfg->cpu = -1;
 	irq = platform_get_irq(pdev, 0);
 	if (irq <= 0) {
@@ -279,16 +265,26 @@ static int qman_portal_probe(struct platform_device *pdev)
 	}
 	pcfg->irq = irq;
 
-	va = ioremap_prot(addr_phys[0]->start, resource_size(addr_phys[0]), 0);
-	if (!va)
+#ifdef CONFIG_PPC
+	/* PPC requires a cacheable/non-coherent mapping of the portal */
+	va = ioremap_prot(addr_phys[0]->start, resource_size(addr_phys[0]),
+			  (pgprot_val(PAGE_KERNEL) & ~_PAGE_COHERENT));
+#else
+	/* For ARM we can use write combine mapping. */
+	va = ioremap_wc(addr_phys[0]->start, resource_size(addr_phys[0]));
+#endif
+	if (!va) {
+		dev_err(dev, "ioremap::CE failed\n");
 		goto err_ioremap1;
+	}
 
 	pcfg->addr_virt[DPAA_PORTAL_CE] = va;
 
-	va = ioremap_prot(addr_phys[1]->start, resource_size(addr_phys[1]),
-			  _PAGE_GUARDED | _PAGE_NO_CACHE);
-	if (!va)
+	va = ioremap(addr_phys[1]->start, resource_size(addr_phys[1]));
+	if (!va) {
+		dev_err(dev, "ioremap::CI failed\n");
 		goto err_ioremap2;
+	}
 
 	pcfg->addr_virt[DPAA_PORTAL_CI] = va;
 
@@ -306,8 +302,15 @@ static int qman_portal_probe(struct platform_device *pdev)
 	spin_unlock(&qman_lock);
 	pcfg->cpu = cpu;
 
-	if (!init_pcfg(pcfg))
-		goto err_ioremap2;
+	if (dma_set_mask(dev, DMA_BIT_MASK(40))) {
+		dev_err(dev, "dma_set_mask() failed\n");
+		goto err_portal_init;
+	}
+
+	if (!init_pcfg(pcfg)) {
+		dev_err(dev, "portal init failed\n");
+		goto err_portal_init;
+	}
 
 	/* clear irq affinity if assigned cpu is offline */
 	if (!cpu_online(cpu))
@@ -315,10 +318,11 @@ static int qman_portal_probe(struct platform_device *pdev)
 
 	return 0;
 
+err_portal_init:
+	iounmap(pcfg->addr_virt[DPAA_PORTAL_CI]);
 err_ioremap2:
 	iounmap(pcfg->addr_virt[DPAA_PORTAL_CE]);
 err_ioremap1:
-	dev_err(dev, "ioremap failed\n");
 	return -ENXIO;
 }
 
@@ -346,8 +350,14 @@ static int __init qman_portal_driver_register(struct platform_driver *drv)
 	if (ret < 0)
 		return ret;
 
-	register_hotcpu_notifier(&qman_hotplug_cpu_notifier);
-
+	ret = cpuhp_setup_state_nocalls(CPUHP_AP_ONLINE_DYN,
+					"soc/qman_portal:online",
+					qman_online_cpu, qman_offline_cpu);
+	if (ret < 0) {
+		pr_err("qman: failed to register hotplug callbacks.\n");
+		platform_driver_unregister(drv);
+		return ret;
+	}
 	return 0;
 }
 
diff --git a/drivers/soc/fsl/qbman/qman_priv.h b/drivers/soc/fsl/qbman/qman_priv.h
index 5cf821e..96f58c4 100644
--- a/drivers/soc/fsl/qbman/qman_priv.h
+++ b/drivers/soc/fsl/qbman/qman_priv.h
@@ -28,12 +28,12 @@
  * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
  */
 
-#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
-
 #include "dpaa_sys.h"
 
 #include <soc/fsl/qman.h>
 #include <linux/iommu.h>
+#include <linux/dma-contiguous.h>
+#include <linux/of_address.h>
 
 #if defined(CONFIG_FSL_PAMU)
 #include <asm/fsl_pamu_stash.h>
@@ -73,89 +73,22 @@ struct qm_mcr_querycgr {
 	struct __qm_mc_cgr cgr; /* CGR fields */
 	u8 __reserved2[6];
 	u8 i_bcnt_hi;	/* high 8-bits of 40-bit "Instant" */
-	u32 i_bcnt_lo;	/* low 32-bits of 40-bit */
+	__be32 i_bcnt_lo;	/* low 32-bits of 40-bit */
 	u8 __reserved3[3];
 	u8 a_bcnt_hi;	/* high 8-bits of 40-bit "Average" */
-	u32 a_bcnt_lo;	/* low 32-bits of 40-bit */
-	u32 cscn_targ_swp[4];
+	__be32 a_bcnt_lo;	/* low 32-bits of 40-bit */
+	__be32 cscn_targ_swp[4];
 } __packed;
 
 static inline u64 qm_mcr_querycgr_i_get64(const struct qm_mcr_querycgr *q)
 {
-	return ((u64)q->i_bcnt_hi << 32) | (u64)q->i_bcnt_lo;
+	return ((u64)q->i_bcnt_hi << 32) | be32_to_cpu(q->i_bcnt_lo);
 }
 static inline u64 qm_mcr_querycgr_a_get64(const struct qm_mcr_querycgr *q)
 {
-	return ((u64)q->a_bcnt_hi << 32) | (u64)q->a_bcnt_lo;
+	return ((u64)q->a_bcnt_hi << 32) | be32_to_cpu(q->a_bcnt_lo);
 }
 
-/* "Query FQ Non-Programmable Fields" */
-struct qm_mcc_queryfq_np {
-	u8 _ncw_verb;
-	u8 __reserved1[3];
-	u32 fqid;	/* 24-bit */
-	u8 __reserved2[56];
-} __packed;
-
-struct qm_mcr_queryfq_np {
-	u8 verb;
-	u8 result;
-	u8 __reserved1;
-	u8 state;		/* QM_MCR_NP_STATE_*** */
-	u32 fqd_link;		/* 24-bit, _res2[24-31] */
-	u16 odp_seq;		/* 14-bit, _res3[14-15] */
-	u16 orp_nesn;		/* 14-bit, _res4[14-15] */
-	u16 orp_ea_hseq;	/* 15-bit, _res5[15] */
-	u16 orp_ea_tseq;	/* 15-bit, _res6[15] */
-	u32 orp_ea_hptr;	/* 24-bit, _res7[24-31] */
-	u32 orp_ea_tptr;	/* 24-bit, _res8[24-31] */
-	u32 pfdr_hptr;		/* 24-bit, _res9[24-31] */
-	u32 pfdr_tptr;		/* 24-bit, _res10[24-31] */
-	u8 __reserved2[5];
-	u8 is;			/* 1-bit, _res12[1-7] */
-	u16 ics_surp;
-	u32 byte_cnt;
-	u32 frm_cnt;		/* 24-bit, _res13[24-31] */
-	u32 __reserved3;
-	u16 ra1_sfdr;		/* QM_MCR_NP_RA1_*** */
-	u16 ra2_sfdr;		/* QM_MCR_NP_RA2_*** */
-	u16 __reserved4;
-	u16 od1_sfdr;		/* QM_MCR_NP_OD1_*** */
-	u16 od2_sfdr;		/* QM_MCR_NP_OD2_*** */
-	u16 od3_sfdr;		/* QM_MCR_NP_OD3_*** */
-} __packed;
-
-#define QM_MCR_NP_STATE_FE		0x10
-#define QM_MCR_NP_STATE_R		0x08
-#define QM_MCR_NP_STATE_MASK		0x07	/* Reads FQD::STATE; */
-#define QM_MCR_NP_STATE_OOS		0x00
-#define QM_MCR_NP_STATE_RETIRED		0x01
-#define QM_MCR_NP_STATE_TEN_SCHED	0x02
-#define QM_MCR_NP_STATE_TRU_SCHED	0x03
-#define QM_MCR_NP_STATE_PARKED		0x04
-#define QM_MCR_NP_STATE_ACTIVE		0x05
-#define QM_MCR_NP_PTR_MASK		0x07ff	/* for RA[12] & OD[123] */
-#define QM_MCR_NP_RA1_NRA(v)		(((v) >> 14) & 0x3)	/* FQD::NRA */
-#define QM_MCR_NP_RA2_IT(v)		(((v) >> 14) & 0x1)	/* FQD::IT */
-#define QM_MCR_NP_OD1_NOD(v)		(((v) >> 14) & 0x3)	/* FQD::NOD */
-#define QM_MCR_NP_OD3_NPC(v)		(((v) >> 14) & 0x3)	/* FQD::NPC */
-
-enum qm_mcr_queryfq_np_masks {
-	qm_mcr_fqd_link_mask = BIT(24)-1,
-	qm_mcr_odp_seq_mask = BIT(14)-1,
-	qm_mcr_orp_nesn_mask = BIT(14)-1,
-	qm_mcr_orp_ea_hseq_mask = BIT(15)-1,
-	qm_mcr_orp_ea_tseq_mask = BIT(15)-1,
-	qm_mcr_orp_ea_hptr_mask = BIT(24)-1,
-	qm_mcr_orp_ea_tptr_mask = BIT(24)-1,
-	qm_mcr_pfdr_hptr_mask = BIT(24)-1,
-	qm_mcr_pfdr_tptr_mask = BIT(24)-1,
-	qm_mcr_is_mask = BIT(1)-1,
-	qm_mcr_frm_cnt_mask = BIT(24)-1,
-};
-#define qm_mcr_np_get(np, field) \
-	((np)->field & (qm_mcr_##field##_mask))
-
 /* Congestion Groups */
 
 /*
@@ -253,6 +186,7 @@ struct qm_portal_config {
 #define QMAN_REV20 0x0200
 #define QMAN_REV30 0x0300
 #define QMAN_REV31 0x0301
+#define QMAN_REV32 0x0302
 extern u16 qman_ip_rev; /* 0 if uninitialised, otherwise QMAN_REVx */
 
 #define QM_FQID_RANGE_START 1 /* FQID 0 reserved for internal use */
@@ -277,42 +211,6 @@ const struct qm_portal_config *qman_destroy_affine_portal(void);
  */
 int qman_query_fq(struct qman_fq *fq, struct qm_fqd *fqd);
 
-/*
- * For qman_volatile_dequeue(); Choose one PRECEDENCE. EXACT is optional. Use
- * NUMFRAMES(n) (6-bit) or NUMFRAMES_TILLEMPTY to fill in the frame-count. Use
- * FQID(n) to fill in the frame queue ID.
- */
-#define QM_VDQCR_PRECEDENCE_VDQCR	0x0
-#define QM_VDQCR_PRECEDENCE_SDQCR	0x80000000
-#define QM_VDQCR_EXACT			0x40000000
-#define QM_VDQCR_NUMFRAMES_MASK		0x3f000000
-#define QM_VDQCR_NUMFRAMES_SET(n)	(((n) & 0x3f) << 24)
-#define QM_VDQCR_NUMFRAMES_GET(n)	(((n) >> 24) & 0x3f)
-#define QM_VDQCR_NUMFRAMES_TILLEMPTY	QM_VDQCR_NUMFRAMES_SET(0)
-
-#define QMAN_VOLATILE_FLAG_WAIT	     0x00000001 /* wait if VDQCR is in use */
-#define QMAN_VOLATILE_FLAG_WAIT_INT  0x00000002 /* if wait, interruptible? */
-#define QMAN_VOLATILE_FLAG_FINISH    0x00000004 /* wait till VDQCR completes */
-
-/*
- * qman_volatile_dequeue - Issue a volatile dequeue command
- * @fq: the frame queue object to dequeue from
- * @flags: a bit-mask of QMAN_VOLATILE_FLAG_*** options
- * @vdqcr: bit mask of QM_VDQCR_*** options, as per qm_dqrr_vdqcr_set()
- *
- * Attempts to lock access to the portal's VDQCR volatile dequeue functionality.
- * The function will block and sleep if QMAN_VOLATILE_FLAG_WAIT is specified and
- * the VDQCR is already in use, otherwise returns non-zero for failure. If
- * QMAN_VOLATILE_FLAG_FINISH is specified, the function will only return once
- * the VDQCR command has finished executing (ie. once the callback for the last
- * DQRR entry resulting from the VDQCR command has been called). If not using
- * the FINISH flag, completion can be determined either by detecting the
- * presence of the QM_DQRR_STAT_UNSCHEDULED and QM_DQRR_STAT_DQCR_EXPIRED bits
- * in the "stat" parameter passed to the FQ's dequeue callback, or by waiting
- * for the QMAN_FQ_STATE_VDQCR bit to disappear.
- */
-int qman_volatile_dequeue(struct qman_fq *fq, u32 flags, u32 vdqcr);
-
 int qman_alloc_fq_table(u32 num_fqids);
 
 /*   QMan s/w corenet portal, low-level i/face	 */
@@ -367,5 +265,6 @@ int qman_alloc_fq_table(u32 num_fqids);
 #define QM_PIRQ_VISIBLE	(QM_PIRQ_SLOW | QM_PIRQ_DQRI)
 
 extern struct qman_portal *affine_portals[NR_CPUS];
+extern struct qman_portal *qman_dma_portal;
 const struct qm_portal_config *qman_get_qm_portal_config(
 						struct qman_portal *portal);
diff --git a/drivers/soc/fsl/qbman/qman_test.h b/drivers/soc/fsl/qbman/qman_test.h
index d5f8cb2..41bdbc48 100644
--- a/drivers/soc/fsl/qbman/qman_test.h
+++ b/drivers/soc/fsl/qbman/qman_test.h
@@ -30,7 +30,5 @@
 
 #include "qman_priv.h"
 
-#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
-
 int qman_test_stash(void);
 int qman_test_api(void);
diff --git a/drivers/soc/fsl/qbman/qman_test_api.c b/drivers/soc/fsl/qbman/qman_test_api.c
index 6880ff1..2895d06 100644
--- a/drivers/soc/fsl/qbman/qman_test_api.c
+++ b/drivers/soc/fsl/qbman/qman_test_api.c
@@ -65,7 +65,7 @@ static void fd_init(struct qm_fd *fd)
 {
 	qm_fd_addr_set64(fd, 0xabdeadbeefLLU);
 	qm_fd_set_contig_big(fd, 0x0000ffff);
-	fd->cmd = 0xfeedf00d;
+	fd->cmd = cpu_to_be32(0xfeedf00d);
 }
 
 static void fd_inc(struct qm_fd *fd)
@@ -86,26 +86,19 @@ static void fd_inc(struct qm_fd *fd)
 	len--;
 	qm_fd_set_param(fd, fmt, off, len);
 
-	fd->cmd++;
+	fd->cmd = cpu_to_be32(be32_to_cpu(fd->cmd) + 1);
 }
 
 /* The only part of the 'fd' we can't memcmp() is the ppid */
-static int fd_cmp(const struct qm_fd *a, const struct qm_fd *b)
+static bool fd_neq(const struct qm_fd *a, const struct qm_fd *b)
 {
-	int r = (qm_fd_addr_get64(a) == qm_fd_addr_get64(b)) ? 0 : -1;
+	bool neq = qm_fd_addr_get64(a) != qm_fd_addr_get64(b);
 
-	if (!r) {
-		enum qm_fd_format fmt_a, fmt_b;
+	neq |= qm_fd_get_format(a) != qm_fd_get_format(b);
+	neq |= a->cfg != b->cfg;
+	neq |= a->cmd != b->cmd;
 
-		fmt_a = qm_fd_get_format(a);
-		fmt_b = qm_fd_get_format(b);
-		r = fmt_a - fmt_b;
-	}
-	if (!r)
-		r = a->cfg - b->cfg;
-	if (!r)
-		r = a->cmd - b->cmd;
-	return r;
+	return neq;
 }
 
 /* test */
@@ -217,12 +210,12 @@ static enum qman_cb_dqrr_result cb_dqrr(struct qman_portal *p,
 					struct qman_fq *fq,
 					const struct qm_dqrr_entry *dq)
 {
-	if (WARN_ON(fd_cmp(&fd_dq, &dq->fd))) {
+	if (WARN_ON(fd_neq(&fd_dq, &dq->fd))) {
 		pr_err("BADNESS: dequeued frame doesn't match;\n");
 		return qman_cb_dqrr_consume;
 	}
 	fd_inc(&fd_dq);
-	if (!(dq->stat & QM_DQRR_STAT_UNSCHEDULED) && !fd_cmp(&fd_dq, &fd)) {
+	if (!(dq->stat & QM_DQRR_STAT_UNSCHEDULED) && !fd_neq(&fd_dq, &fd)) {
 		sdqcr_complete = 1;
 		wake_up(&waitqueue);
 	}
diff --git a/drivers/soc/fsl/qbman/qman_test_stash.c b/drivers/soc/fsl/qbman/qman_test_stash.c
index 43cf66b..e87b654 100644
--- a/drivers/soc/fsl/qbman/qman_test_stash.c
+++ b/drivers/soc/fsl/qbman/qman_test_stash.c
@@ -175,7 +175,7 @@ static DEFINE_PER_CPU(struct hp_cpu, hp_cpus);
 
 /* links together the hp_cpu structs, in first-come first-serve order. */
 static LIST_HEAD(hp_cpu_list);
-static spinlock_t hp_lock = __SPIN_LOCK_UNLOCKED(hp_lock);
+static DEFINE_SPINLOCK(hp_lock);
 
 static unsigned int hp_cpu_list_length;
 
@@ -191,6 +191,9 @@ static void *__frame_ptr;
 static u32 *frame_ptr;
 static dma_addr_t frame_dma;
 
+/* needed for dma_map*() */
+static const struct qm_portal_config *pcfg;
+
 /* the main function waits on this */
 static DECLARE_WAIT_QUEUE_HEAD(queue);
 
@@ -210,16 +213,14 @@ static int allocate_frame_data(void)
 {
 	u32 lfsr = HP_FIRST_WORD;
 	int loop;
-	struct platform_device *pdev = platform_device_alloc("foobar", -1);
 
-	if (!pdev) {
-		pr_crit("platform_device_alloc() failed");
-		return -EIO;
-	}
-	if (platform_device_add(pdev)) {
-		pr_crit("platform_device_add() failed");
+	if (!qman_dma_portal) {
+		pr_crit("portal not available\n");
 		return -EIO;
 	}
+
+	pcfg = qman_get_qm_portal_config(qman_dma_portal);
+
 	__frame_ptr = kmalloc(4 * HP_NUM_WORDS, GFP_KERNEL);
 	if (!__frame_ptr)
 		return -ENOMEM;
@@ -229,15 +230,22 @@ static int allocate_frame_data(void)
 		frame_ptr[loop] = lfsr;
 		lfsr = do_lfsr(lfsr);
 	}
-	frame_dma = dma_map_single(&pdev->dev, frame_ptr, 4 * HP_NUM_WORDS,
+
+	frame_dma = dma_map_single(pcfg->dev, frame_ptr, 4 * HP_NUM_WORDS,
 				   DMA_BIDIRECTIONAL);
-	platform_device_del(pdev);
-	platform_device_put(pdev);
+	if (dma_mapping_error(pcfg->dev, frame_dma)) {
+		pr_crit("dma mapping failure\n");
+		kfree(__frame_ptr);
+		return -EIO;
+	}
+
 	return 0;
 }
 
 static void deallocate_frame_data(void)
 {
+	dma_unmap_single(pcfg->dev, frame_dma, 4 * HP_NUM_WORDS,
+			 DMA_BIDIRECTIONAL);
 	kfree(__frame_ptr);
 }
 
@@ -249,7 +257,8 @@ static inline int process_frame_data(struct hp_handler *handler,
 	int loop;
 
 	if (qm_fd_addr_get64(fd) != handler->addr) {
-		pr_crit("bad frame address");
+		pr_crit("bad frame address, [%llX != %llX]\n",
+			qm_fd_addr_get64(fd), handler->addr);
 		return -EIO;
 	}
 	for (loop = 0; loop < HP_NUM_WORDS; loop++, p++) {
@@ -397,8 +406,9 @@ static int init_handler(void *h)
 		goto failed;
 	}
 	memset(&opts, 0, sizeof(opts));
-	opts.we_mask = QM_INITFQ_WE_FQCTRL | QM_INITFQ_WE_CONTEXTA;
-	opts.fqd.fq_ctrl = QM_FQCTRL_CTXASTASHING;
+	opts.we_mask = cpu_to_be16(QM_INITFQ_WE_FQCTRL |
+				   QM_INITFQ_WE_CONTEXTA);
+	opts.fqd.fq_ctrl = cpu_to_be16(QM_FQCTRL_CTXASTASHING);
 	qm_fqd_set_stashing(&opts.fqd, 0, STASH_DATA_CL, STASH_CTX_CL);
 	err = qman_init_fq(&handler->rx, QMAN_INITFQ_FLAG_SCHED |
 			   QMAN_INITFQ_FLAG_LOCAL, &opts);
diff --git a/drivers/staging/Kconfig b/drivers/staging/Kconfig
index f8e5486..6c69e3b 100644
--- a/drivers/staging/Kconfig
+++ b/drivers/staging/Kconfig
@@ -110,4 +110,6 @@ source "drivers/staging/vc04_services/Kconfig"
 
 source "drivers/staging/fsl_qbman/Kconfig"
 
+source "drivers/staging/fsl_ppfe/Kconfig"
+
 endif # STAGING
diff --git a/drivers/staging/Makefile b/drivers/staging/Makefile
index cbd7b08..ee817a5 100644
--- a/drivers/staging/Makefile
+++ b/drivers/staging/Makefile
@@ -44,3 +44,4 @@ obj-$(CONFIG_KS7010)		+= ks7010/
 obj-$(CONFIG_GREYBUS)		+= greybus/
 obj-$(CONFIG_BCM2708_VCHIQ)	+= vc04_services/
 obj-$(CONFIG_FSL_SDK_DPA)	+= fsl_qbman/
+obj-$(CONFIG_FSL_PPFE)		+= fsl_ppfe/
diff --git a/drivers/staging/fsl-dpaa2/Kconfig b/drivers/staging/fsl-dpaa2/Kconfig
index 8042d9c..3444727e 100644
--- a/drivers/staging/fsl-dpaa2/Kconfig
+++ b/drivers/staging/fsl-dpaa2/Kconfig
@@ -34,6 +34,16 @@ config FSL_DPAA2_ETH_DEBUGFS
 	default n
 	---help---
 	  Enable advanced statistics through debugfs interface.
+
+config FSL_DPAA2_ETH_DCB
+        bool "Data Center Bridging (DCB) Support"
+        default n
+        depends on DCB
+        ---help---
+          Say Y here if you want to use Data Center Bridging (DCB) features
+          (PFC) in the driver.
+
+          If unsure, say N.
 endif
 
 source "drivers/staging/fsl-dpaa2/mac/Kconfig"
diff --git a/drivers/staging/fsl-dpaa2/ethernet/dpaa2-eth-debugfs.c b/drivers/staging/fsl-dpaa2/ethernet/dpaa2-eth-debugfs.c
index 445c5d1..933412e 100644
--- a/drivers/staging/fsl-dpaa2/ethernet/dpaa2-eth-debugfs.c
+++ b/drivers/staging/fsl-dpaa2/ethernet/dpaa2-eth-debugfs.c
@@ -110,9 +110,9 @@ static int dpaa2_dbg_fqs_show(struct seq_file *file, void *offset)
 	int i, err;
 
 	seq_printf(file, "FQ stats for %s:\n", priv->net_dev->name);
-	seq_printf(file, "%s%16s%16s%16s%16s%16s\n",
-		   "VFQID", "CPU", "Type", "Frames", "Pending frames",
-		   "Congestion");
+	seq_printf(file, "%s%16s%16s%16s%16s%16s%16s\n",
+		   "VFQID", "CPU", "Traffic Class", "Type", "Frames",
+		   "Pending frames", "Congestion");
 
 	for (i = 0; i <  priv->num_fqs; i++) {
 		fq = &priv->fq[i];
@@ -120,9 +120,10 @@ static int dpaa2_dbg_fqs_show(struct seq_file *file, void *offset)
 		if (err)
 			fcnt = 0;
 
-		seq_printf(file, "%5d%16d%16s%16llu%16u%16llu\n",
+		seq_printf(file, "%5d%16d%16d%16s%16llu%16u%16llu\n",
 			   fq->fqid,
 			   fq->target_cpu,
+			   fq->tc,
 			   fq_type_to_str(fq),
 			   fq->stats.frames,
 			   fcnt,
@@ -158,19 +159,20 @@ static int dpaa2_dbg_ch_show(struct seq_file *file, void *offset)
 	int i;
 
 	seq_printf(file, "Channel stats for %s:\n", priv->net_dev->name);
-	seq_printf(file, "%s%16s%16s%16s%16s%16s\n",
+	seq_printf(file, "%s%16s%16s%16s%16s%16s%16s\n",
 		   "CHID", "CPU", "Deq busy", "Frames", "CDANs",
-		   "Avg frm/CDAN");
+		   "Avg frm/CDAN", "Buf count");
 
 	for (i = 0; i < priv->num_channels; i++) {
 		ch = priv->channel[i];
-		seq_printf(file, "%4d%16d%16llu%16llu%16llu%16llu\n",
+		seq_printf(file, "%4d%16d%16llu%16llu%16llu%16llu%16d\n",
 			   ch->ch_id,
 			   ch->nctx.desired_cpu,
 			   ch->stats.dequeue_portal_busy,
 			   ch->stats.frames,
 			   ch->stats.cdan,
-			   ch->stats.frames / ch->stats.cdan);
+			   ch->stats.frames / ch->stats.cdan,
+			   ch->buf_count);
 	}
 
 	return 0;
diff --git a/drivers/staging/fsl-dpaa2/ethernet/dpaa2-eth.c b/drivers/staging/fsl-dpaa2/ethernet/dpaa2-eth.c
index 452eca5..789a5e6 100644
--- a/drivers/staging/fsl-dpaa2/ethernet/dpaa2-eth.c
+++ b/drivers/staging/fsl-dpaa2/ethernet/dpaa2-eth.c
@@ -137,6 +137,8 @@ static struct sk_buff *build_linear_skb(struct dpaa2_eth_priv *priv,
 	u16 fd_offset = dpaa2_fd_get_offset(fd);
 	u32 fd_length = dpaa2_fd_get_len(fd);
 
+	ch->buf_count--;
+
 	skb = build_skb(fd_vaddr, DPAA2_ETH_SKB_SIZE);
 	if (unlikely(!skb))
 		return NULL;
@@ -144,8 +146,6 @@ static struct sk_buff *build_linear_skb(struct dpaa2_eth_priv *priv,
 	skb_reserve(skb, fd_offset);
 	skb_put(skb, fd_length);
 
-	ch->buf_count--;
-
 	return skb;
 }
 
@@ -183,7 +183,7 @@ static struct sk_buff *build_frag_skb(struct dpaa2_eth_priv *priv,
 			/* We build the skb around the first data buffer */
 			skb = build_skb(sg_vaddr, DPAA2_ETH_SKB_SIZE);
 			if (unlikely(!skb))
-				return NULL;
+				goto err_build;
 
 			sg_offset = dpaa2_sg_get_offset(sge);
 			skb_reserve(skb, sg_offset);
@@ -214,6 +214,32 @@ static struct sk_buff *build_frag_skb(struct dpaa2_eth_priv *priv,
 	ch->buf_count -= i + 2;
 
 	return skb;
+
+err_build:
+	/* We still need to subtract the buffers used by this FD from our
+	 * software counter
+	 */
+	for (i = 0; i < DPAA2_ETH_MAX_SG_ENTRIES; i++)
+		if (dpaa2_sg_is_final(&sgt[i]))
+			break;
+	ch->buf_count -= i + 2;
+
+	return NULL;
+}
+
+static void free_bufs(struct dpaa2_eth_priv *priv, u64 *buf_array, int count)
+{
+	struct device *dev = priv->net_dev->dev.parent;
+	void *vaddr;
+	int i;
+
+	for (i = 0; i < count; i++) {
+		/* Same logic as on regular Rx path */
+		vaddr = dpaa2_eth_iova_to_virt(priv->iommu_domain, buf_array[i]);
+		dma_unmap_single(dev, buf_array[i], DPAA2_ETH_RX_BUF_SIZE,
+				 DMA_FROM_DEVICE);
+		put_page(virt_to_head_page(vaddr));
+	}
 }
 
 /* Main Rx frame processing routine */
@@ -521,7 +547,7 @@ static int build_sg_fd(struct dpaa2_eth_priv *priv,
 	dpaa2_fd_set_addr(fd, addr);
 	dpaa2_fd_set_len(fd, skb->len);
 
-	fd->simple.ctrl = DPAA2_FD_CTRL_ASAL | FD_CTRL_PTA | FD_CTRL_PTV1;
+	fd->simple.ctrl = DPAA2_FD_CTRL_ASAL | FD_CTRL_PTA;
 
 	if (priv->ts_tx_en && skb_shinfo(skb)->tx_flags & SKBTX_HW_TSTAMP)
 		enable_tx_tstamp(fd, sgt_buf);
@@ -578,7 +604,7 @@ static int build_single_fd(struct dpaa2_eth_priv *priv,
 	dpaa2_fd_set_len(fd, skb->len);
 	dpaa2_fd_set_format(fd, dpaa2_fd_single);
 
-	fd->simple.ctrl = DPAA2_FD_CTRL_ASAL | FD_CTRL_PTA | FD_CTRL_PTV1;
+	fd->simple.ctrl = DPAA2_FD_CTRL_ASAL | FD_CTRL_PTA;
 
 	if (priv->ts_tx_en && skb_shinfo(skb)->tx_flags & SKBTX_HW_TSTAMP)
 		enable_tx_tstamp(fd, buffer_start);
@@ -597,7 +623,7 @@ static int build_single_fd(struct dpaa2_eth_priv *priv,
  */
 static void free_tx_fd(const struct dpaa2_eth_priv *priv,
 		       const struct dpaa2_fd *fd,
-		       u32 *status)
+		       u32 *status, bool in_napi)
 {
 	struct device *dev = priv->net_dev->dev.parent;
 	dma_addr_t fd_addr;
@@ -676,7 +702,7 @@ static void free_tx_fd(const struct dpaa2_eth_priv *priv,
 		kfree(skbh);
 
 	/* Move on with skb release */
-	dev_kfree_skb(skb);
+	napi_consume_skb(skb, in_napi);
 }
 
 static int dpaa2_eth_tx(struct sk_buff *skb, struct net_device *net_dev)
@@ -760,7 +786,7 @@ static int dpaa2_eth_tx(struct sk_buff *skb, struct net_device *net_dev)
 	if (unlikely(err < 0)) {
 		percpu_stats->tx_errors++;
 		/* Clean up everything, including freeing the skb */
-		free_tx_fd(priv, &fd, NULL);
+		free_tx_fd(priv, &fd, NULL, false);
 	} else {
 		percpu_stats->tx_packets++;
 		percpu_stats->tx_bytes += dpaa2_fd_get_len(&fd);
@@ -813,7 +839,7 @@ static void dpaa2_eth_tx_conf(struct dpaa2_eth_priv *priv,
 				   fd->simple.ctrl & DPAA2_FD_TX_ERR_MASK);
 	}
 
-	free_tx_fd(priv, fd, check_fas_errors ? &status : NULL);
+	free_tx_fd(priv, fd, check_fas_errors ? &status : NULL, true);
 
 	/* if there are no errors, we're done */
 	if (likely(!errors))
@@ -883,7 +909,7 @@ static int add_bufs(struct dpaa2_eth_priv *priv, u16 bpid)
 	u64 buf_array[DPAA2_ETH_BUFS_PER_CMD];
 	void *buf;
 	dma_addr_t addr;
-	int i;
+	int i, err;
 
 	for (i = 0; i < DPAA2_ETH_BUFS_PER_CMD; i++) {
 		/* Allocate buffer visible to WRIOP + skb shared info +
@@ -910,22 +936,25 @@ static int add_bufs(struct dpaa2_eth_priv *priv, u16 bpid)
 	}
 
 release_bufs:
-	/* In case the portal is busy, retry until successful.
-	 * The buffer release function would only fail if the QBMan portal
-	 * was busy, which implies portal contention (i.e. more CPUs than
-	 * portals, i.e. GPPs w/o affine DPIOs). For all practical purposes,
-	 * there is little we can realistically do, short of giving up -
-	 * in which case we'd risk depleting the buffer pool and never again
-	 * receiving the Rx interrupt which would kick-start the refill logic.
-	 * So just keep retrying, at the risk of being moved to ksoftirqd.
-	 */
-	while (dpaa2_io_service_release(NULL, bpid, buf_array, i))
+	/* In case the portal is busy, retry until successful */
+	while ((err = dpaa2_io_service_release(NULL, bpid,
+					       buf_array, i)) == -EBUSY)
 		cpu_relax();
+
+	/* If release command failed, clean up and bail out; not much
+	 * else we can do about it
+	 */
+	if (unlikely(err)) {
+		free_bufs(priv, buf_array, i);
+		return 0;
+	}
+
 	return i;
 
 err_map:
 	put_page(virt_to_head_page(buf));
 err_alloc:
+	/* If we managed to allocate at least some buffers, release them */
 	if (i)
 		goto release_bufs;
 
@@ -968,10 +997,8 @@ static int seed_pool(struct dpaa2_eth_priv *priv, u16 bpid)
  */
 static void drain_bufs(struct dpaa2_eth_priv *priv, int count)
 {
-	struct device *dev = priv->net_dev->dev.parent;
 	u64 buf_array[DPAA2_ETH_BUFS_PER_CMD];
-	void *vaddr;
-	int ret, i;
+	int ret;
 
 	do {
 		ret = dpaa2_io_service_acquire(NULL, priv->bpid,
@@ -980,15 +1007,7 @@ static void drain_bufs(struct dpaa2_eth_priv *priv, int count)
 			netdev_err(priv->net_dev, "dpaa2_io_service_acquire() failed\n");
 			return;
 		}
-		for (i = 0; i < ret; i++) {
-			/* Same logic as on regular Rx path */
-			vaddr = dpaa2_eth_iova_to_virt(priv->iommu_domain,
-						       buf_array[i]);
-			dma_unmap_single(dev, buf_array[i],
-					 DPAA2_ETH_RX_BUF_SIZE,
-					 DMA_FROM_DEVICE);
-			put_page(virt_to_head_page(vaddr));
-		}
+		free_bufs(priv, buf_array, ret);
 	} while (ret);
 }
 
@@ -1838,7 +1857,7 @@ static void set_fq_affinity(struct dpaa2_eth_priv *priv)
 
 static void setup_fqs(struct dpaa2_eth_priv *priv)
 {
-	int i;
+	int i, j;
 
 	/* We have one TxConf FQ per Tx flow. Tx queues MUST be at the
 	 * beginning of the queue array.
@@ -1851,11 +1870,13 @@ static void setup_fqs(struct dpaa2_eth_priv *priv)
 		priv->fq[priv->num_fqs++].flowid = (u16)i;
 	}
 
-	for (i = 0; i < dpaa2_eth_queue_count(priv); i++) {
-		priv->fq[priv->num_fqs].type = DPAA2_RX_FQ;
-		priv->fq[priv->num_fqs].consume = dpaa2_eth_rx;
-		priv->fq[priv->num_fqs++].flowid = (u16)i;
-	}
+	for (i = 0; i < dpaa2_eth_tc_count(priv); i++)
+		for (j = 0; j < dpaa2_eth_queue_count(priv); j++) {
+			priv->fq[priv->num_fqs].type = DPAA2_RX_FQ;
+			priv->fq[priv->num_fqs].consume = dpaa2_eth_rx;
+			priv->fq[priv->num_fqs].tc = (u8)i;
+			priv->fq[priv->num_fqs++].flowid = (u16)j;
+		}
 
 #ifdef CONFIG_FSL_DPAA2_ETH_USE_ERR_QUEUE
 	/* We have exactly one Rx error queue per DPNI */
@@ -2098,9 +2119,6 @@ static int setup_dpni(struct fsl_mc_device *ls_dev)
 		dev_warn(dev, "Tx data offset (%d) not a multiple of 64B",
 			 priv->tx_data_offset);
 
-	/* Accommodate software annotation space (SWA) */
-	priv->tx_data_offset += DPAA2_ETH_SWA_SIZE;
-
 	/* Enable congestion notifications for Tx queues */
 	err = setup_tx_congestion(priv);
 	if (err)
@@ -2156,39 +2174,111 @@ static void free_dpni(struct dpaa2_eth_priv *priv)
 	kfree(priv->cscn_unaligned);
 }
 
-int setup_fqs_taildrop(struct dpaa2_eth_priv *priv,
-		       bool enable)
+static int set_queue_taildrop(struct dpaa2_eth_priv *priv,
+			      struct dpni_taildrop *td)
 {
 	struct device *dev = priv->net_dev->dev.parent;
-	struct dpni_taildrop td;
-	int err = 0, i;
+	int err, i;
 
-	td.enable = enable;
-	td.threshold = DPAA2_ETH_TAILDROP_THRESH;
-
-	if (enable) {
-		priv->num_bufs = DPAA2_ETH_NUM_BUFS_TD;
-		priv->refill_thresh = DPAA2_ETH_REFILL_THRESH_TD;
-	} else {
-		priv->num_bufs = DPAA2_ETH_NUM_BUFS_FC /
-			priv->num_channels;
-		priv->refill_thresh = priv->num_bufs - DPAA2_ETH_BUFS_PER_CMD;
-	}
 
 	for (i = 0; i < priv->num_fqs; i++) {
 		if (priv->fq[i].type != DPAA2_RX_FQ)
 			continue;
 
 		err = dpni_set_taildrop(priv->mc_io, 0, priv->mc_token,
-					DPNI_CP_QUEUE, DPNI_QUEUE_RX, 0,
-					priv->fq[i].flowid, &td);
+					DPNI_CP_QUEUE, DPNI_QUEUE_RX,
+					priv->fq[i].tc, priv->fq[i].flowid,
+					td);
 		if (err) {
 			dev_err(dev, "dpni_set_taildrop() failed (%d)\n", err);
-			break;
+			return err;
 		}
 	}
 
-	return err;
+	return 0;
+}
+
+static int set_group_taildrop(struct dpaa2_eth_priv *priv,
+			      struct dpni_taildrop *td)
+{
+	struct device *dev = priv->net_dev->dev.parent;
+	struct dpni_taildrop disable_td, *tc_td;
+	int i, err;
+
+	memset(&disable_td, 0, sizeof(struct dpni_taildrop));
+	for (i = 0; i < dpaa2_eth_tc_count(priv); i++) {
+		if (td->enable && dpaa2_eth_is_pfc_enabled(priv, i))
+			/* Do not set taildrop thresholds for PFC-enabled
+			 * traffic classes. We will enable congestion
+			 * notifications for them.
+			 */
+			tc_td = &disable_td;
+		else
+			tc_td = td;
+
+		err = dpni_set_taildrop(priv->mc_io, 0, priv->mc_token,
+					DPNI_CP_GROUP, DPNI_QUEUE_RX,
+					i, 0, tc_td);
+		if (err) {
+			dev_err(dev, "dpni_set_taildrop() failed (%d)\n", err);
+			return err;
+		}
+	}
+	return 0;
+}
+
+/* Enable/disable Rx FQ taildrop
+ *
+ * Rx FQ taildrop is mutually exclusive with flow control and it only gets
+ * disabled when FC is active. Depending on FC status, we need to compute
+ * the maximum number of buffers in the pool differently, so use the
+ * opportunity to update max number of buffers as well.
+ */
+int set_rx_taildrop(struct dpaa2_eth_priv *priv)
+{
+	enum dpaa2_eth_td_cfg cfg = dpaa2_eth_get_td_type(priv);
+	struct dpni_taildrop td_queue, td_group;
+	int err = 0;
+
+	switch (cfg) {
+	case DPAA2_ETH_TD_NONE:
+		memset(&td_queue, 0, sizeof(struct dpni_taildrop));
+		memset(&td_group, 0, sizeof(struct dpni_taildrop));
+		priv->num_bufs = DPAA2_ETH_NUM_BUFS_FC /
+					priv->num_channels;
+		break;
+	case DPAA2_ETH_TD_QUEUE:
+		memset(&td_group, 0, sizeof(struct dpni_taildrop));
+		td_queue.enable = 1;
+		td_queue.units = DPNI_CONGESTION_UNIT_BYTES;
+		td_queue.threshold = DPAA2_ETH_TAILDROP_THRESH /
+				     dpaa2_eth_tc_count(priv);
+		priv->num_bufs = DPAA2_ETH_NUM_BUFS_TD;
+		break;
+	case DPAA2_ETH_TD_GROUP:
+		memset(&td_queue, 0, sizeof(struct dpni_taildrop));
+		td_group.enable = 1;
+		td_group.units = DPNI_CONGESTION_UNIT_FRAMES;
+		td_group.threshold = NAPI_POLL_WEIGHT *
+				     dpaa2_eth_queue_count(priv);
+		priv->num_bufs = NAPI_POLL_WEIGHT *
+					dpaa2_eth_tc_count(priv);
+		break;
+	default:
+		break;
+	}
+
+	err = set_queue_taildrop(priv, &td_queue);
+	if (err)
+		return err;
+
+	err = set_group_taildrop(priv, &td_group);
+	if (err)
+		return err;
+
+	priv->refill_thresh = priv->num_bufs - DPAA2_ETH_BUFS_PER_CMD;
+
+	return 0;
 }
 
 static int setup_rx_flow(struct dpaa2_eth_priv *priv,
@@ -2201,7 +2291,7 @@ static int setup_rx_flow(struct dpaa2_eth_priv *priv,
 	int err;
 
 	err = dpni_get_queue(priv->mc_io, 0, priv->mc_token,
-			     DPNI_QUEUE_RX, 0, fq->flowid, &q, &qid);
+			     DPNI_QUEUE_RX, fq->tc, fq->flowid, &q, &qid);
 	if (err) {
 		dev_err(dev, "dpni_get_queue() failed (%d)\n", err);
 		return err;
@@ -2214,7 +2304,7 @@ static int setup_rx_flow(struct dpaa2_eth_priv *priv,
 	q.destination.priority = 1;
 	q.user_context = (u64)fq;
 	err = dpni_set_queue(priv->mc_io, 0, priv->mc_token,
-			     DPNI_QUEUE_RX, 0, fq->flowid, q_opt, &q);
+			     DPNI_QUEUE_RX, fq->tc, fq->flowid, q_opt, &q);
 	if (err) {
 		dev_err(dev, "dpni_set_queue() failed (%d)\n", err);
 		return err;
@@ -2411,7 +2501,13 @@ static int set_hash(struct dpaa2_eth_priv *priv)
 		dist_cfg.dist_mode = DPNI_DIST_MODE_HASH;
 	}
 
-	err = dpni_set_rx_tc_dist(priv->mc_io, 0, priv->mc_token, 0, &dist_cfg);
+	for (i = 0; i < dpaa2_eth_tc_count(priv); i++) {
+		err = dpni_set_rx_tc_dist(priv->mc_io, 0, priv->mc_token, i,
+					  &dist_cfg);
+		if (err)
+			break;
+	}
+
 	dma_unmap_single(dev, dist_cfg.key_cfg_iova,
 			 DPAA2_CLASSIFIER_DMA_SIZE, DMA_TO_DEVICE);
 	if (err)
@@ -2438,6 +2534,7 @@ static int bind_dpni(struct dpaa2_eth_priv *priv)
 	pools_params.num_dpbp = 1;
 	pools_params.pools[0].dpbp_id = priv->dpbp_dev->obj_desc.id;
 	pools_params.pools[0].backup_pool = 0;
+	pools_params.pools[0].priority_mask = 0xff;
 	pools_params.pools[0].buffer_size = DPAA2_ETH_RX_BUF_SIZE;
 	err = dpni_set_pools(priv->mc_io, 0, priv->mc_token, &pools_params);
 	if (err) {
@@ -2923,6 +3020,264 @@ static void dpaa2_eth_sysfs_remove(struct device *dev)
 		device_remove_file(dev, &dpaa2_eth_attrs[i]);
 }
 
+#ifdef CONFIG_FSL_DPAA2_ETH_DCB
+static int dpaa2_eth_dcbnl_ieee_getpfc(struct net_device *net_dev,
+				       struct ieee_pfc *pfc)
+{
+	struct dpaa2_eth_priv *priv = netdev_priv(net_dev);
+	struct dpni_congestion_notification_cfg notification_cfg;
+	struct dpni_link_state state;
+	int err, i;
+
+	pfc->pfc_cap = dpaa2_eth_tc_count(priv);
+
+	err = dpni_get_link_state(priv->mc_io, 0, priv->mc_token, &state);
+	if (err) {
+		netdev_err(net_dev, "ERROR %d getting link state", err);
+		return err;
+	}
+
+	if (!(state.options & DPNI_LINK_OPT_PFC_PAUSE))
+		return 0;
+
+	priv->pfc.pfc_en = 0;
+	for (i = 0; i < dpaa2_eth_tc_count(priv); i++) {
+		err = dpni_get_congestion_notification(priv->mc_io, 0,
+						       priv->mc_token,
+						       DPNI_QUEUE_RX,
+						       i, &notification_cfg);
+		if (err) {
+			netdev_err(net_dev, "Error %d getting congestion notif",
+				   err);
+			return err;
+		}
+
+		if (notification_cfg.threshold_entry)
+			priv->pfc.pfc_en |= 1 << i;
+	}
+
+	pfc->pfc_en = priv->pfc.pfc_en;
+	pfc->mbc = priv->pfc.mbc;
+	pfc->delay = priv->pfc.delay;
+
+	return 0;
+}
+
+/* Configure ingress classification based on VLAN PCP */
+static int set_vlan_qos(struct dpaa2_eth_priv *priv)
+{
+	struct device *dev = priv->net_dev->dev.parent;
+	struct dpkg_profile_cfg kg_cfg = {0};
+	struct dpni_qos_tbl_cfg qos_cfg = {0};
+	struct dpni_rule_cfg key_params;
+	u8 *params_iova;
+	__be16 key, mask = cpu_to_be16(VLAN_PRIO_MASK);
+	int err = 0, i, j = 0;
+
+	if (priv->vlan_clsf_set)
+		return 0;
+
+	params_iova = kzalloc(DPAA2_CLASSIFIER_DMA_SIZE, GFP_KERNEL);
+	if (!params_iova)
+		return -ENOMEM;
+
+	kg_cfg.num_extracts = 1;
+	kg_cfg.extracts[0].type = DPKG_EXTRACT_FROM_HDR;
+	kg_cfg.extracts[0].extract.from_hdr.prot = NET_PROT_VLAN;
+	kg_cfg.extracts[0].extract.from_hdr.type = DPKG_FULL_FIELD;
+	kg_cfg.extracts[0].extract.from_hdr.field = NH_FLD_VLAN_TCI;
+
+	err = dpni_prepare_key_cfg(&kg_cfg, params_iova);
+	if (err) {
+		dev_err(dev, "dpkg_prepare_key_cfg failed: %d\n", err);
+		goto out_free;
+	}
+
+	/* Set QoS table */
+	qos_cfg.default_tc = 0;
+	qos_cfg.discard_on_miss = 0;
+	qos_cfg.key_cfg_iova = dma_map_single(dev, params_iova,
+					      DPAA2_CLASSIFIER_DMA_SIZE,
+					      DMA_TO_DEVICE);
+	if (dma_mapping_error(dev, qos_cfg.key_cfg_iova)) {
+		dev_err(dev, "%s: DMA mapping failed\n", __func__);
+		err = -ENOMEM;
+		goto out_free;
+	}
+	err = dpni_set_qos_table(priv->mc_io, 0, priv->mc_token, &qos_cfg);
+	dma_unmap_single(dev, qos_cfg.key_cfg_iova,
+			 DPAA2_CLASSIFIER_DMA_SIZE, DMA_TO_DEVICE);
+
+	if (err) {
+		dev_err(dev, "dpni_set_qos_table failed: %d\n", err);
+		goto out_free;
+	}
+
+	key_params.key_size = sizeof(key);
+
+	if (dpaa2_eth_fs_mask_enabled(priv)) {
+		key_params.mask_iova = dma_map_single(dev, &mask, sizeof(mask),
+						      DMA_TO_DEVICE);
+		if (dma_mapping_error(dev, key_params.mask_iova)) {
+			dev_err(dev, "DMA mapping failed %s\n", __func__);
+			err = -ENOMEM;
+			goto out_free;
+		}
+	} else {
+		key_params.mask_iova = 0;
+	}
+
+	key_params.key_iova = dma_map_single(dev, &key, sizeof(key),
+					     DMA_TO_DEVICE);
+	if (dma_mapping_error(dev, key_params.key_iova)) {
+		dev_err(dev, "%s: DMA mapping failed\n", __func__);
+		err = -ENOMEM;
+		goto out_unmap_mask;
+	}
+
+	for (i = 0; i < dpaa2_eth_tc_count(priv); i++) {
+		key = cpu_to_be16(i << VLAN_PRIO_SHIFT);
+		dma_sync_single_for_device(dev, key_params.key_iova,
+					   sizeof(key), DMA_TO_DEVICE);
+
+		err = dpni_add_qos_entry(priv->mc_io, 0, priv->mc_token,
+					 &key_params, i, j++);
+		if (err) {
+			dev_err(dev, "dpni_add_qos_entry failed: %d\n", err);
+			goto out_unmap;
+		}
+	}
+
+	priv->vlan_clsf_set = true;
+
+out_unmap:
+	dma_unmap_single(dev, key_params.key_iova, sizeof(key), DMA_TO_DEVICE);
+out_unmap_mask:
+	if (key_params.mask_iova)
+		dma_unmap_single(dev, key_params.mask_iova, sizeof(mask),
+				 DMA_TO_DEVICE);
+out_free:
+	kfree(params_iova);
+	return err;
+}
+
+static int dpaa2_eth_dcbnl_ieee_setpfc(struct net_device *net_dev,
+				       struct ieee_pfc *pfc)
+{
+	struct dpaa2_eth_priv *priv = netdev_priv(net_dev);
+	struct dpni_congestion_notification_cfg notification_cfg = {0};
+	struct dpni_link_state state = {0};
+	struct dpni_link_cfg cfg = {0};
+	int err = 0, i;
+
+	if (priv->pfc.pfc_en == pfc->pfc_en)
+		/* Same enabled mask, nothing to be done */
+		return 0;
+
+	err = set_vlan_qos(priv);
+	if (err)
+		return err;
+
+	err = dpni_get_link_state(priv->mc_io, 0, priv->mc_token, &state);
+	if (err) {
+		netdev_err(net_dev, "ERROR %d getting link state", err);
+		return err;
+	}
+
+	cfg.rate = state.rate;
+	cfg.options = state.options;
+	if (pfc->pfc_en)
+		cfg.options |= DPNI_LINK_OPT_PFC_PAUSE;
+	else
+		cfg.options &= ~DPNI_LINK_OPT_PFC_PAUSE;
+
+	err = dpni_set_link_cfg(priv->mc_io, 0, priv->mc_token, &cfg);
+	if (err) {
+		netdev_err(net_dev, "ERROR %d setting link cfg", err);
+		return err;
+	}
+
+	memcpy(&priv->pfc, pfc, sizeof(priv->pfc));
+
+	err = set_rx_taildrop(priv);
+	if (err)
+		return err;
+
+	/* configure congestion notifications */
+	notification_cfg.notification_mode = DPNI_CONG_OPT_FLOW_CONTROL;
+	notification_cfg.units = DPNI_CONGESTION_UNIT_FRAMES;
+	notification_cfg.message_iova = 0ULL;
+	notification_cfg.message_ctx = 0ULL;
+
+	for (i = 0; i < dpaa2_eth_tc_count(priv); i++) {
+		if (dpaa2_eth_is_pfc_enabled(priv, i)) {
+			notification_cfg.threshold_entry = NAPI_POLL_WEIGHT;
+			notification_cfg.threshold_exit = NAPI_POLL_WEIGHT / 2;
+		} else {
+			notification_cfg.threshold_entry = 0;
+			notification_cfg.threshold_exit = 0;
+		}
+
+		err = dpni_set_congestion_notification(priv->mc_io, 0,
+						       priv->mc_token,
+						       DPNI_QUEUE_RX,
+						       i, &notification_cfg);
+		if (err) {
+			netdev_err(net_dev, "Error %d setting congestion notif",
+				   err);
+			return err;
+		}
+	}
+
+	return 0;
+}
+
+static u8 dpaa2_eth_dcbnl_getdcbx(struct net_device *net_dev)
+{
+	struct dpaa2_eth_priv *priv = netdev_priv(net_dev);
+
+	return priv->dcbx_mode;
+}
+
+static u8 dpaa2_eth_dcbnl_setdcbx(struct net_device *net_dev, u8 mode)
+{
+	struct dpaa2_eth_priv *priv = netdev_priv(net_dev);
+
+	priv->dcbx_mode = mode;
+	return 0;
+}
+
+static u8 dpaa2_eth_dcbnl_getcap(struct net_device *net_dev, int capid, u8 *cap)
+{
+	struct dpaa2_eth_priv *priv = netdev_priv(net_dev);
+
+	switch (capid) {
+	case DCB_CAP_ATTR_PFC:
+		*cap = true;
+		break;
+	case DCB_CAP_ATTR_PFC_TCS:
+		*cap = 1 << dpaa2_eth_tc_count(priv);
+		break;
+	case DCB_CAP_ATTR_DCBX:
+		*cap = priv->dcbx_mode;
+		break;
+	default:
+		*cap = false;
+		break;
+	}
+
+	return 0;
+}
+
+const struct dcbnl_rtnl_ops dpaa2_eth_dcbnl_ops = {
+	.ieee_getpfc	= dpaa2_eth_dcbnl_ieee_getpfc,
+	.ieee_setpfc	= dpaa2_eth_dcbnl_ieee_setpfc,
+	.getdcbx	= dpaa2_eth_dcbnl_getdcbx,
+	.setdcbx	= dpaa2_eth_dcbnl_setdcbx,
+	.getcap		= dpaa2_eth_dcbnl_getcap,
+};
+#endif
+
 static int dpaa2_eth_probe(struct fsl_mc_device *dpni_dev)
 {
 	struct device *dev;
@@ -2951,7 +3306,8 @@ static int dpaa2_eth_probe(struct fsl_mc_device *dpni_dev)
 	err = fsl_mc_portal_allocate(dpni_dev, FSL_MC_IO_ATOMIC_CONTEXT_PORTAL,
 				     &priv->mc_io);
 	if (err) {
-		dev_err(dev, "MC portal allocation failed\n");
+		dev_dbg(dev, "MC portal allocation failed\n");
+		err = -EPROBE_DEFER;
 		goto err_portal_alloc;
 	}
 
@@ -2977,10 +3333,6 @@ static int dpaa2_eth_probe(struct fsl_mc_device *dpni_dev)
 	if (err)
 		goto err_bind;
 
-	/* Add a NAPI context for each channel */
-	add_ch_napi(priv);
-	enable_ch_napi(priv);
-
 	/* Percpu statistics */
 	priv->percpu_stats = alloc_percpu(*priv->percpu_stats);
 	if (!priv->percpu_stats) {
@@ -3023,6 +3375,14 @@ static int dpaa2_eth_probe(struct fsl_mc_device *dpni_dev)
 		goto err_alloc_rings;
 
 	net_dev->ethtool_ops = &dpaa2_ethtool_ops;
+#ifdef CONFIG_FSL_DPAA2_ETH_DCB
+	net_dev->dcbnl_ops = &dpaa2_eth_dcbnl_ops;
+	priv->dcbx_mode = DCB_CAP_DCBX_HOST | DCB_CAP_DCBX_VER_IEEE;
+#endif
+
+	/* Add a NAPI context for each channel */
+	add_ch_napi(priv);
+	enable_ch_napi(priv);
 
 	err = setup_irqs(dpni_dev);
 	if (err) {
@@ -3086,6 +3446,9 @@ static int dpaa2_eth_remove(struct fsl_mc_device *ls_dev)
 #endif
 	dpaa2_eth_sysfs_remove(&net_dev->dev);
 
+	disable_ch_napi(priv);
+	del_ch_napi(priv);
+
 	unregister_netdev(net_dev);
 	dev_info(net_dev->dev.parent, "Removed interface %s\n", net_dev->name);
 
@@ -3097,9 +3460,6 @@ static int dpaa2_eth_remove(struct fsl_mc_device *ls_dev)
 	free_rings(priv);
 	free_percpu(priv->percpu_stats);
 	free_percpu(priv->percpu_extras);
-
-	disable_ch_napi(priv);
-	del_ch_napi(priv);
 	free_dpbp(priv);
 	free_dpio(priv);
 	free_dpni(priv);
diff --git a/drivers/staging/fsl-dpaa2/ethernet/dpaa2-eth.h b/drivers/staging/fsl-dpaa2/ethernet/dpaa2-eth.h
index 86cb12e..62de3c7 100644
--- a/drivers/staging/fsl-dpaa2/ethernet/dpaa2-eth.h
+++ b/drivers/staging/fsl-dpaa2/ethernet/dpaa2-eth.h
@@ -33,6 +33,7 @@
 #define __DPAA2_ETH_H
 
 #include <linux/atomic.h>
+#include <linux/dcbnl.h>
 #include <linux/netdevice.h>
 #include <linux/if_vlan.h>
 #include "../../fsl-mc/include/dpaa2-io.h"
@@ -96,7 +97,7 @@
 #define DPAA2_ETH_RX_BUF_ALIGN		64
 #define DPAA2_ETH_RX_BUF_ALIGN_V1	256
 #define DPAA2_ETH_NEEDED_HEADROOM(p_priv) \
-	((p_priv)->tx_data_offset + DPAA2_ETH_TX_BUF_ALIGN)
+	((p_priv)->tx_data_offset + DPAA2_ETH_TX_BUF_ALIGN - HH_DATA_MOD)
 
 /* rx_extra_head prevents reallocations in L3 processing. */
 #define DPAA2_ETH_SKB_SIZE \
@@ -114,17 +115,19 @@
 /* PTP nominal frequency 1GHz */
 #define DPAA2_PTP_NOMINAL_FREQ_PERIOD_NS 1
 
-/* Leave enough extra space in the headroom to make sure the skb is
- * not realloc'd in forwarding scenarios.
- */
-#define DPAA2_ETH_RX_HEAD_ROOM		192
-
 /* We are accommodating a skb backpointer and some S/G info
  * in the frame's software annotation. The hardware
  * options are either 0 or 64, so we choose the latter.
  */
 #define DPAA2_ETH_SWA_SIZE		64
 
+/* Extra headroom space requested to hardware, in order to make sure there's
+ * no realloc'ing in forwarding scenarios
+ */
+#define DPAA2_ETH_RX_HEAD_ROOM \
+	(DPAA2_ETH_TX_HWA_SIZE - DPAA2_ETH_RX_HWA_SIZE + \
+	 DPAA2_ETH_TX_BUF_ALIGN)
+
 /* Must keep this struct smaller than DPAA2_ETH_SWA_SIZE */
 struct dpaa2_eth_swa {
 	struct sk_buff *skb;
@@ -301,16 +304,17 @@ struct dpaa2_eth_ch_stats {
 	__u64 pull_err;
 };
 
+#define DPAA2_ETH_MAX_DPCONS		NR_CPUS
+#define DPAA2_ETH_MAX_TCS		8
+
 /* Maximum number of queues associated with a DPNI */
-#define DPAA2_ETH_MAX_RX_QUEUES		16
-#define DPAA2_ETH_MAX_TX_QUEUES		NR_CPUS
+#define DPAA2_ETH_MAX_RX_QUEUES		(DPNI_MAX_DIST_SIZE * DPAA2_ETH_MAX_TCS)
+#define DPAA2_ETH_MAX_TX_QUEUES		DPNI_MAX_SENDERS
 #define DPAA2_ETH_MAX_RX_ERR_QUEUES	1
 #define DPAA2_ETH_MAX_QUEUES		(DPAA2_ETH_MAX_RX_QUEUES + \
 					DPAA2_ETH_MAX_TX_QUEUES + \
 					DPAA2_ETH_MAX_RX_ERR_QUEUES)
 
-#define DPAA2_ETH_MAX_DPCONS		NR_CPUS
-
 enum dpaa2_eth_fq_type {
 	DPAA2_RX_FQ = 0,
 	DPAA2_TX_CONF_FQ,
@@ -323,6 +327,7 @@ struct dpaa2_eth_fq {
 	u32 fqid;
 	u32 tx_qdbin;
 	u16 flowid;
+	u8 tc;
 	int target_cpu;
 	struct dpaa2_eth_channel *channel;
 	enum dpaa2_eth_fq_type type;
@@ -429,6 +434,10 @@ struct dpaa2_eth_priv {
 	struct dpaa2_eth_cls_rule *cls_rule;
 
 	struct dpni_tx_shaping_cfg shaping_cfg;
+
+	u8 dcbx_mode;
+	struct ieee_pfc pfc;
+	bool vlan_clsf_set;
 };
 
 #define dpaa2_eth_hash_enabled(priv)	\
@@ -454,7 +463,37 @@ static inline int dpaa2_eth_queue_count(struct dpaa2_eth_priv *priv)
 	return priv->dpni_attrs.num_queues;
 }
 
+static inline int dpaa2_eth_tc_count(struct dpaa2_eth_priv *priv)
+{
+	return priv->dpni_attrs.num_tcs;
+}
+
+static inline bool dpaa2_eth_is_pfc_enabled(struct dpaa2_eth_priv *priv,
+					    int traffic_class)
+{
+	return priv->pfc.pfc_en & (1 << traffic_class);
+}
+
+enum dpaa2_eth_td_cfg {
+	DPAA2_ETH_TD_NONE,
+	DPAA2_ETH_TD_QUEUE,
+	DPAA2_ETH_TD_GROUP
+};
+
+static inline enum dpaa2_eth_td_cfg
+dpaa2_eth_get_td_type(struct dpaa2_eth_priv *priv)
+{
+	bool pfc_enabled = !!(priv->pfc.pfc_en);
+
+	if (pfc_enabled)
+		return DPAA2_ETH_TD_GROUP;
+	else if (priv->tx_pause_frames)
+		return DPAA2_ETH_TD_NONE;
+	else
+		return DPAA2_ETH_TD_QUEUE;
+}
+
 void check_cls_support(struct dpaa2_eth_priv *priv);
 
-int setup_fqs_taildrop(struct dpaa2_eth_priv *priv, bool enable);
+int set_rx_taildrop(struct dpaa2_eth_priv *priv);
 #endif	/* __DPAA2_H */
diff --git a/drivers/staging/fsl-dpaa2/ethernet/dpaa2-ethtool.c b/drivers/staging/fsl-dpaa2/ethernet/dpaa2-ethtool.c
index 9859814..304a21c 100644
--- a/drivers/staging/fsl-dpaa2/ethernet/dpaa2-ethtool.c
+++ b/drivers/staging/fsl-dpaa2/ethernet/dpaa2-ethtool.c
@@ -230,7 +230,8 @@ static int dpaa2_eth_set_pauseparam(struct net_device *net_dev,
 	if (current_tx_pause == pause->tx_pause)
 		goto out;
 
-	err = setup_fqs_taildrop(priv, !pause->tx_pause);
+	priv->tx_pause_frames = pause->tx_pause;
+	err = set_rx_taildrop(priv);
 	if (err)
 		netdev_dbg(net_dev, "ERROR %d configuring taildrop", err);
 
@@ -676,7 +677,7 @@ static int do_cls(struct net_device *net_dev,
 	struct dpni_rule_cfg rule_cfg;
 	struct dpni_fs_action_cfg fs_act = { 0 };
 	void *dma_mem;
-	int err = 0;
+	int err = 0, tc;
 
 	if (!dpaa2_eth_fs_enabled(priv)) {
 		netdev_err(net_dev, "dev does not support steering!\n");
@@ -719,12 +720,19 @@ static int do_cls(struct net_device *net_dev,
 	else
 		fs_act.flow_id = fs->ring_cookie;
 
-	if (add)
-		err = dpni_add_fs_entry(priv->mc_io, 0, priv->mc_token,
-					0, fs->location, &rule_cfg, &fs_act);
-	else
-		err = dpni_remove_fs_entry(priv->mc_io, 0, priv->mc_token,
-					   0, &rule_cfg);
+	for (tc = 0; tc < dpaa2_eth_tc_count(priv); tc++) {
+		if (add)
+			err = dpni_add_fs_entry(priv->mc_io, 0, priv->mc_token,
+						tc, fs->location, &rule_cfg,
+						&fs_act);
+		else
+			err = dpni_remove_fs_entry(priv->mc_io, 0,
+						   priv->mc_token, tc,
+						   &rule_cfg);
+
+		if (err)
+			break;
+	}
 
 	dma_unmap_single(dev, rule_cfg.key_iova,
 			 rule_cfg.key_size * 2, DMA_TO_DEVICE);
diff --git a/drivers/staging/fsl-dpaa2/ethernet/dpni-cmd.h b/drivers/staging/fsl-dpaa2/ethernet/dpni-cmd.h
index fa353d7..29f44df 100644
--- a/drivers/staging/fsl-dpaa2/ethernet/dpni-cmd.h
+++ b/drivers/staging/fsl-dpaa2/ethernet/dpni-cmd.h
@@ -37,9 +37,11 @@
 #define DPNI_VER_MAJOR				7
 #define DPNI_VER_MINOR				0
 #define DPNI_CMD_BASE_VERSION			1
+#define DPNI_CMD_2ND_VERSION			2
 #define DPNI_CMD_ID_OFFSET			4
 
 #define DPNI_CMD(id)	(((id) << DPNI_CMD_ID_OFFSET) | DPNI_CMD_BASE_VERSION)
+#define DPNI_CMD_V2(id)	(((id) << DPNI_CMD_ID_OFFSET) | DPNI_CMD_2ND_VERSION)
 
 #define DPNI_CMDID_OPEN					DPNI_CMD(0x801)
 #define DPNI_CMDID_CLOSE				DPNI_CMD(0x800)
@@ -62,7 +64,7 @@
 #define DPNI_CMDID_GET_IRQ_STATUS			DPNI_CMD(0x016)
 #define DPNI_CMDID_CLEAR_IRQ_STATUS			DPNI_CMD(0x017)
 
-#define DPNI_CMDID_SET_POOLS				DPNI_CMD(0x200)
+#define DPNI_CMDID_SET_POOLS				DPNI_CMD_V2(0x200)
 #define DPNI_CMDID_SET_ERRORS_BEHAVIOR			DPNI_CMD(0x20B)
 
 #define DPNI_CMDID_GET_QDID				DPNI_CMD(0x210)
@@ -85,6 +87,8 @@
 
 #define DPNI_CMDID_SET_RX_TC_DIST			DPNI_CMD(0x235)
 
+#define DPNI_CMDID_SET_QOS_TBL				DPNI_CMD(0x240)
+#define DPNI_CMDID_ADD_QOS_ENT				DPNI_CMD(0x241)
 #define DPNI_CMDID_ADD_FS_ENT				DPNI_CMD(0x244)
 #define DPNI_CMDID_REMOVE_FS_ENT			DPNI_CMD(0x245)
 #define DPNI_CMDID_CLR_FS_ENT				DPNI_CMD(0x246)
@@ -125,13 +129,14 @@ struct dpni_cmd_open {
 
 #define DPNI_BACKUP_POOL(val, order)	(((val) & 0x1) << (order))
 struct dpni_cmd_set_pools {
-	/* cmd word 0 */
 	u8 num_dpbp;
 	u8 backup_pool_mask;
 	__le16 pad;
-	/* cmd word 0..4 */
-	__le32 dpbp_id[DPNI_MAX_DPBP];
-	/* cmd word 4..6 */
+	struct {
+		__le16 dpbp_id;
+		u8 priority_mask;
+		u8 pad;
+	} pool[DPNI_MAX_DPBP];
 	__le16 buffer_size[DPNI_MAX_DPBP];
 };
 
@@ -510,6 +515,36 @@ struct dpni_cmd_set_queue {
 	__le64 user_context;
 };
 
+#define DPNI_DISCARD_ON_MISS_SHIFT	0
+#define DPNI_DISCARD_ON_MISS_SIZE	1
+
+struct dpni_cmd_set_qos_table {
+	u32 pad;
+	u8 default_tc;
+	/* only the LSB */
+	u8 discard_on_miss;
+	u16 pad1[21];
+	u64 key_cfg_iova;
+};
+
+struct dpni_cmd_add_qos_entry {
+	u16 pad;
+	u8 tc_id;
+	u8 key_size;
+	u16 index;
+	u16 pad2;
+	u64 key_iova;
+	u64 mask_iova;
+};
+
+struct dpni_cmd_remove_qos_entry {
+	u8 pad1[3];
+	u8 key_size;
+	u32 pad2;
+	u64 key_iova;
+	u64 mask_iova;
+};
+
 struct dpni_cmd_add_fs_entry {
 	/* cmd word 0 */
 	u16 options;
@@ -597,4 +632,27 @@ struct dpni_cmd_set_congestion_notification {
 	u32 threshold_exit;
 };
 
+struct dpni_cmd_get_congestion_notification {
+	/* cmd word 0 */
+	u8 qtype;
+	u8 tc;
+};
+
+struct dpni_rsp_get_congestion_notification {
+	/* cmd word 0 */
+	u64 pad;
+	/* cmd word 1 */
+	u32 dest_id;
+	u16 notification_mode;
+	u8 dest_priority;
+	/* from LSB: dest_type: 4 units:2 */
+	u8 type_units;
+	/* cmd word 2 */
+	u64 message_iova;
+	/* cmd word 3 */
+	u64 message_ctx;
+	/* cmd word 4 */
+	u32 threshold_entry;
+	u32 threshold_exit;
+};
 #endif /* _FSL_DPNI_CMD_H */
diff --git a/drivers/staging/fsl-dpaa2/ethernet/dpni.c b/drivers/staging/fsl-dpaa2/ethernet/dpni.c
index 3c23e4d..5c15ece 100644
--- a/drivers/staging/fsl-dpaa2/ethernet/dpni.c
+++ b/drivers/staging/fsl-dpaa2/ethernet/dpni.c
@@ -198,7 +198,10 @@ int dpni_set_pools(struct fsl_mc_io *mc_io,
 	cmd_params = (struct dpni_cmd_set_pools *)cmd.params;
 	cmd_params->num_dpbp = cfg->num_dpbp;
 	for (i = 0; i < DPNI_MAX_DPBP; i++) {
-		cmd_params->dpbp_id[i] = cpu_to_le32(cfg->pools[i].dpbp_id);
+		cmd_params->pool[i].dpbp_id =
+			cpu_to_le16(cfg->pools[i].dpbp_id);
+		cmd_params->pool[i].priority_mask =
+			cfg->pools[i].priority_mask;
 		cmd_params->buffer_size[i] =
 			cpu_to_le16(cfg->pools[i].buffer_size);
 		cmd_params->backup_pool_mask |=
@@ -1374,6 +1377,82 @@ int dpni_set_rx_tc_dist(struct fsl_mc_io *mc_io,
 	return mc_send_command(mc_io, &cmd);
 }
 
+/*
+ * dpni_set_qos_table() - Set QoS mapping table
+ * @mc_io:	Pointer to MC portal's I/O object
+ * @cmd_flags:	Command flags; one or more of 'MC_CMD_FLAG_'
+ * @token:	Token of DPNI object
+ * @cfg:	QoS table configuration
+ *
+ * This function and all QoS-related functions require that
+ *'max_tcs > 1' was set at DPNI creation.
+ *
+ * warning: Before calling this function, call dpkg_prepare_key_cfg() to
+ *			prepare the key_cfg_iova parameter
+ *
+ * Return:	'0' on Success; Error code otherwise.
+ */
+int dpni_set_qos_table(struct fsl_mc_io *mc_io,
+		       u32 cmd_flags,
+		       u16 token,
+		       const struct dpni_qos_tbl_cfg *cfg)
+{
+	struct dpni_cmd_set_qos_table *cmd_params;
+	struct mc_command cmd = { 0 };
+
+	/* prepare command */
+	cmd.header = mc_encode_cmd_header(DPNI_CMDID_SET_QOS_TBL,
+					  cmd_flags,
+					  token);
+	cmd_params = (struct dpni_cmd_set_qos_table *)cmd.params;
+	cmd_params->default_tc = cfg->default_tc;
+	cmd_params->key_cfg_iova = cpu_to_le64(cfg->key_cfg_iova);
+	dpni_set_field(cmd_params->discard_on_miss,
+		       ENABLE,
+		       cfg->discard_on_miss);
+
+	/* send command to mc*/
+	return mc_send_command(mc_io, &cmd);
+}
+
+/**
+ * dpni_add_qos_entry() - Add QoS mapping entry (to select a traffic class)
+ * @mc_io:	Pointer to MC portal's I/O object
+ * @cmd_flags:	Command flags; one or more of 'MC_CMD_FLAG_'
+ * @token:	Token of DPNI object
+ * @cfg:	QoS rule to add
+ * @tc_id:	Traffic class selection (0-7)
+ * @index:	Location in the QoS table where to insert the entry.
+ *		Only relevant if MASKING is enabled for QoS classification on
+ *		this DPNI, it is ignored for exact match.
+ *
+ * Return:	'0' on Success; Error code otherwise.
+ */
+int dpni_add_qos_entry(struct fsl_mc_io *mc_io,
+		       u32 cmd_flags,
+		       u16 token,
+		       const struct dpni_rule_cfg *cfg,
+		       u8 tc_id,
+		       u16 index)
+{
+	struct dpni_cmd_add_qos_entry *cmd_params;
+	struct mc_command cmd = { 0 };
+
+	/* prepare command */
+	cmd.header = mc_encode_cmd_header(DPNI_CMDID_ADD_QOS_ENT,
+					  cmd_flags,
+					  token);
+	cmd_params = (struct dpni_cmd_add_qos_entry *)cmd.params;
+	cmd_params->tc_id = tc_id;
+	cmd_params->key_size = cfg->key_size;
+	cmd_params->index = cpu_to_le16(index);
+	cmd_params->key_iova = cpu_to_le64(cfg->key_iova);
+	cmd_params->mask_iova = cpu_to_le64(cfg->mask_iova);
+
+	/* send command to mc*/
+	return mc_send_command(mc_io, &cmd);
+}
+
 /**
  * dpni_add_fs_entry() - Add Flow Steering entry for a specific traffic class
  *			(to select a flow ID)
@@ -1498,6 +1577,60 @@ int dpni_set_congestion_notification(struct fsl_mc_io *mc_io,
 }
 
 /**
+ * dpni_get_congestion_notification() - Get traffic class congestion
+ *	notification configuration
+ * @mc_io:	Pointer to MC portal's I/O object
+ * @cmd_flags:	Command flags; one or more of 'MC_CMD_FLAG_'
+ * @token:	Token of DPNI object
+ * @qtype:	Type of queue - Rx, Tx and Tx confirm types are supported
+ * @tc_id:	Traffic class selection (0-7)
+ * @cfg:	congestion notification configuration
+ *
+ * Return:	'0' on Success; error code otherwise.
+ */
+int dpni_get_congestion_notification(
+			struct fsl_mc_io *mc_io,
+			u32 cmd_flags,
+			u16 token,
+			enum dpni_queue_type qtype,
+			u8 tc_id,
+			struct dpni_congestion_notification_cfg *cfg)
+{
+	struct dpni_rsp_get_congestion_notification *rsp_params;
+	struct dpni_cmd_get_congestion_notification *cmd_params;
+	struct mc_command cmd = { 0 };
+	int err;
+
+	/* prepare command */
+	cmd.header = mc_encode_cmd_header(
+				DPNI_CMDID_GET_CONGESTION_NOTIFICATION,
+				cmd_flags,
+				token);
+	cmd_params = (struct dpni_cmd_get_congestion_notification *)cmd.params;
+	cmd_params->qtype = qtype;
+	cmd_params->tc = tc_id;
+
+	/* send command to mc*/
+	err = mc_send_command(mc_io, &cmd);
+	if (err)
+		return err;
+
+	rsp_params = (struct dpni_rsp_get_congestion_notification *)cmd.params;
+	cfg->units = dpni_get_field(rsp_params->type_units, CONG_UNITS);
+	cfg->threshold_entry = le32_to_cpu(rsp_params->threshold_entry);
+	cfg->threshold_exit = le32_to_cpu(rsp_params->threshold_exit);
+	cfg->message_ctx = le64_to_cpu(rsp_params->message_ctx);
+	cfg->message_iova = le64_to_cpu(rsp_params->message_iova);
+	cfg->notification_mode = le16_to_cpu(rsp_params->notification_mode);
+	cfg->dest_cfg.dest_id = le32_to_cpu(rsp_params->dest_id);
+	cfg->dest_cfg.priority = rsp_params->dest_priority;
+	cfg->dest_cfg.dest_type = dpni_get_field(rsp_params->type_units,
+						 DEST_TYPE);
+
+	return 0;
+}
+
+/**
  * dpni_set_queue() - Set queue parameters
  * @mc_io:	Pointer to MC portal's I/O object
  * @cmd_flags:	Command flags; one or more of 'MC_CMD_FLAG_'
diff --git a/drivers/staging/fsl-dpaa2/ethernet/dpni.h b/drivers/staging/fsl-dpaa2/ethernet/dpni.h
index 600c357..40128ed 100644
--- a/drivers/staging/fsl-dpaa2/ethernet/dpni.h
+++ b/drivers/staging/fsl-dpaa2/ethernet/dpni.h
@@ -52,6 +52,14 @@ struct fsl_mc_io;
  * Maximum number of buffer pools per DPNI
  */
 #define DPNI_MAX_DPBP				8
+/**
+ * Maximum number of senders
+ */
+#define DPNI_MAX_SENDERS			8
+/**
+ * Maximum distribution size
+ */
+#define DPNI_MAX_DIST_SIZE			8
 
 /**
  * All traffic classes considered; see dpni_set_queue()
@@ -123,13 +131,15 @@ struct dpni_pools_cfg {
 	/**
 	 * struct pools - Buffer pools parameters
 	 * @dpbp_id: DPBP object ID
+	 * @priority_mask: priorities served by DPBP
 	 * @buffer_size: Buffer size
 	 * @backup_pool: Backup pool
 	 */
 	struct {
-		int	dpbp_id;
+		u16	dpbp_id;
+		u8	priority_mask;
 		u16	buffer_size;
-		int	backup_pool;
+		u8	backup_pool;
 	} pools[DPNI_MAX_DPBP];
 };
 
@@ -509,6 +519,10 @@ int dpni_reset_statistics(struct fsl_mc_io *mc_io,
  * Enable a-symmetric pause frames
  */
 #define DPNI_LINK_OPT_ASYM_PAUSE	0x0000000000000008ULL
+/**
+ * Enable priority flow control pause frames
+ */
+#define DPNI_LINK_OPT_PFC_PAUSE		0x0000000000000010ULL
 
 /**
  * struct - Structure representing DPNI link configuration
@@ -658,6 +672,26 @@ int dpni_prepare_key_cfg(const struct dpkg_profile_cfg *cfg,
 			 u8 *key_cfg_buf);
 
 /**
+ * struct dpni_qos_tbl_cfg - Structure representing QOS table configuration
+ * @key_cfg_iova: I/O virtual address of 256 bytes DMA-able memory filled with
+ *		key extractions to be used as the QoS criteria by calling
+ *		dpkg_prepare_key_cfg()
+ * @discard_on_miss: Set to '1' to discard frames in case of no match (miss);
+ *		'0' to use the 'default_tc' in such cases
+ * @default_tc: Used in case of no-match and 'discard_on_miss'= 0
+ */
+struct dpni_qos_tbl_cfg {
+	u64 key_cfg_iova;
+	int discard_on_miss;
+	u8 default_tc;
+};
+
+int dpni_set_qos_table(struct fsl_mc_io *mc_io,
+		       u32 cmd_flags,
+		       u16 token,
+		       const struct dpni_qos_tbl_cfg *cfg);
+
+/**
  * struct dpni_rx_tc_dist_cfg - Rx traffic class distribution configuration
  * @dist_size: Set the distribution size;
  *	supported values: 1,2,3,4,6,7,8,12,14,16,24,28,32,48,56,64,96,
@@ -850,6 +884,12 @@ struct dpni_dest_cfg {
  * sw-portal's DQRR, the DQRI interrupt is asserted immediately (if enabled)
  */
 #define DPNI_CONG_OPT_INTR_COALESCING_DISABLED	0x00000020
+/**
+ * This congestion will trigger flow control or priority flow control.
+ * This will have effect only if flow control is enabled with
+ * dpni_set_link_cfg().
+ */
+#define DPNI_CONG_OPT_FLOW_CONTROL	0x00000040
 
 /**
  * struct dpni_congestion_notification_cfg - congestion notification
@@ -883,6 +923,14 @@ int dpni_set_congestion_notification(struct fsl_mc_io *mc_io,
 			u8 tc_id,
 			const struct dpni_congestion_notification_cfg *cfg);
 
+int dpni_get_congestion_notification(
+			struct fsl_mc_io *mc_io,
+			u32 cmd_flags,
+			u16 token,
+			enum dpni_queue_type qtype,
+			u8 tc_id,
+			struct dpni_congestion_notification_cfg *cfg);
+
 /**
  * struct dpni_taildrop - Structure representing the taildrop
  * @enable:	Indicates whether the taildrop is active or not.
@@ -929,6 +977,22 @@ struct dpni_rule_cfg {
 	u8	key_size;
 };
 
+int dpni_add_qos_entry(struct fsl_mc_io *mc_io,
+		       u32 cmd_flags,
+		       u16 token,
+		       const struct dpni_rule_cfg *cfg,
+		       u8 tc_id,
+		       u16 index);
+
+int dpni_remove_qos_entry(struct fsl_mc_io *mc_io,
+			  u32 cmd_flags,
+			  u16 token,
+			  const struct dpni_rule_cfg *cfg);
+
+int dpni_clear_qos_table(struct fsl_mc_io *mc_io,
+			 u32 cmd_flags,
+			 u16 token);
+
 /**
  * Discard matching traffic.  If set, this takes precedence over any other
  * configuration and matching traffic is always discarded.
diff --git a/drivers/staging/fsl-dpaa2/mac/mac.c b/drivers/staging/fsl-dpaa2/mac/mac.c
index 3016963..3ddf344 100644
--- a/drivers/staging/fsl-dpaa2/mac/mac.c
+++ b/drivers/staging/fsl-dpaa2/mac/mac.c
@@ -119,15 +119,6 @@ static void dpaa2_mac_link_changed(struct net_device *netdev)
 		dev_err(&priv->mc_dev->dev, "dpmac_set_link_state: %d\n", err);
 }
 
-#ifdef CONFIG_FSL_DPAA2_MAC_NETDEVS
-static netdev_tx_t dpaa2_mac_drop_frame(struct sk_buff *skb,
-					struct net_device *dev)
-{
-	/* we don't support I/O for now, drop the frame */
-	dev_kfree_skb_any(skb);
-	return NETDEV_TX_OK;
-}
-
 static int dpaa2_mac_open(struct net_device *netdev)
 {
 	/* start PHY state machine */
@@ -152,6 +143,15 @@ done:
 	return 0;
 }
 
+#ifdef CONFIG_FSL_DPAA2_MAC_NETDEVS
+static netdev_tx_t dpaa2_mac_drop_frame(struct sk_buff *skb,
+					struct net_device *dev)
+{
+	/* we don't support I/O for now, drop the frame */
+	dev_kfree_skb_any(skb);
+	return NETDEV_TX_OK;
+}
+
 static int dpaa2_mac_get_settings(struct net_device *netdev,
 				  struct ethtool_cmd *cmd)
 {
@@ -312,9 +312,9 @@ static int dpaa2_mac_get_sset_count(struct net_device *dev, int sset)
 }
 
 static const struct net_device_ops dpaa2_mac_ndo_ops = {
-	.ndo_start_xmit		= &dpaa2_mac_drop_frame,
 	.ndo_open		= &dpaa2_mac_open,
 	.ndo_stop		= &dpaa2_mac_stop,
+	.ndo_start_xmit		= &dpaa2_mac_drop_frame,
 	.ndo_get_stats64	= &dpaa2_mac_get_stats,
 };
 
@@ -542,10 +542,9 @@ static int dpaa2_mac_probe(struct fsl_mc_device *mc_dev)
 	}
 #endif /* CONFIG_FSL_DPAA2_MAC_NETDEVS */
 
-	/* probe the PHY as a fixed-link if the link type declared in DPC
-	 * explicitly mandates this
+	/* probe the PHY as a fixed-link if there's a phy-handle defined
+	 * in the device tree
 	 */
-
 	phy_node = of_parse_phandle(dpmac_node, "phy-handle", 0);
 	if (!phy_node) {
 		goto probe_fixed_link;
@@ -597,12 +596,8 @@ probe_fixed_link:
 		dev_info(dev, "Registered fixed PHY.\n");
 	}
 
-	/* start PHY state machine */
-#ifdef CONFIG_FSL_DPAA2_MAC_NETDEVS
 	dpaa2_mac_open(netdev);
-#else /* CONFIG_FSL_DPAA2_MAC_NETDEVS */
-	phy_start(netdev->phydev);
-#endif /* CONFIG_FSL_DPAA2_MAC_NETDEVS */
+
 	return 0;
 
 err_defer:
@@ -626,6 +621,15 @@ static int dpaa2_mac_remove(struct fsl_mc_device *mc_dev)
 {
 	struct device		*dev = &mc_dev->dev;
 	struct dpaa2_mac_priv	*priv = dev_get_drvdata(dev);
+	struct net_device	*netdev = priv->netdev;
+
+	dpaa2_mac_stop(netdev);
+
+	if (phy_is_pseudo_fixed_link(netdev->phydev))
+		fixed_phy_unregister(netdev->phydev);
+	else
+		phy_disconnect(netdev->phydev);
+	netdev->phydev = NULL;
 
 #ifdef CONFIG_FSL_DPAA2_MAC_NETDEVS
 	unregister_netdev(priv->netdev);
@@ -636,7 +640,6 @@ static int dpaa2_mac_remove(struct fsl_mc_device *mc_dev)
 	free_netdev(priv->netdev);
 
 	dev_set_drvdata(dev, NULL);
-	kfree(priv);
 
 	return 0;
 }
diff --git a/drivers/staging/fsl_ppfe/Kconfig b/drivers/staging/fsl_ppfe/Kconfig
new file mode 100644
index 0000000..e409643
--- /dev/null
+++ b/drivers/staging/fsl_ppfe/Kconfig
@@ -0,0 +1,20 @@
+#
+# Freescale Programmable Packet Forwarding Engine driver
+#
+config FSL_PPFE
+	bool "Freescale PPFE Driver"
+	default n
+	---help---
+	Freescale LS1012A SoC has a Programmable Packet Forwarding Engine.
+	It provides two high performance ethernet interfaces.
+	This driver initializes, programs and controls the PPFE.
+	Use this driver to enable network connectivity on LS1012A platforms.
+
+if FSL_PPFE
+
+config FSL_PPFE_UTIL_DISABLED
+	bool "Disable PPFE UTIL Processor Engine"
+	---help---
+	UTIL PE has to be enabled only if required.
+
+endif # FSL_PPFE
diff --git a/drivers/staging/fsl_ppfe/Makefile b/drivers/staging/fsl_ppfe/Makefile
new file mode 100644
index 0000000..07cd351
--- /dev/null
+++ b/drivers/staging/fsl_ppfe/Makefile
@@ -0,0 +1,19 @@
+#
+# Makefile for Freesecale PPFE driver
+#
+
+ccflags-y +=  -I$(src)/include  -I$(src)
+
+obj-m += pfe.o
+
+pfe-y += pfe_mod.o \
+	pfe_hw.o \
+	pfe_firmware.o \
+	pfe_ctrl.o \
+	pfe_hif.o \
+	pfe_hif_lib.o\
+	pfe_eth.o \
+	pfe_sysfs.o \
+	pfe_debugfs.o \
+	pfe_ls1012a_platform.o \
+	pfe_hal.o
diff --git a/drivers/staging/fsl_ppfe/TODO b/drivers/staging/fsl_ppfe/TODO
new file mode 100644
index 0000000..43c48cc
--- /dev/null
+++ b/drivers/staging/fsl_ppfe/TODO
@@ -0,0 +1,2 @@
+TODO:
+	- provide pfe pe monitoring support
diff --git a/drivers/staging/fsl_ppfe/include/pfe/cbus.h b/drivers/staging/fsl_ppfe/include/pfe/cbus.h
new file mode 100644
index 0000000..04503d2
--- /dev/null
+++ b/drivers/staging/fsl_ppfe/include/pfe/cbus.h
@@ -0,0 +1,78 @@
+/*
+ * Copyright 2015-2016 Freescale Semiconductor, Inc.
+ * Copyright 2017 NXP
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program.  If not, see <http://www.gnu.org/licenses/>.
+ */
+
+#ifndef _CBUS_H_
+#define _CBUS_H_
+
+#define EMAC1_BASE_ADDR	(CBUS_BASE_ADDR + 0x200000)
+#define EGPI1_BASE_ADDR	(CBUS_BASE_ADDR + 0x210000)
+#define EMAC2_BASE_ADDR	(CBUS_BASE_ADDR + 0x220000)
+#define EGPI2_BASE_ADDR	(CBUS_BASE_ADDR + 0x230000)
+#define BMU1_BASE_ADDR	(CBUS_BASE_ADDR + 0x240000)
+#define BMU2_BASE_ADDR	(CBUS_BASE_ADDR + 0x250000)
+#define ARB_BASE_ADDR	(CBUS_BASE_ADDR + 0x260000)
+#define DDR_CONFIG_BASE_ADDR	(CBUS_BASE_ADDR + 0x270000)
+#define HIF_BASE_ADDR	(CBUS_BASE_ADDR + 0x280000)
+#define HGPI_BASE_ADDR	(CBUS_BASE_ADDR + 0x290000)
+#define LMEM_BASE_ADDR	(CBUS_BASE_ADDR + 0x300000)
+#define LMEM_SIZE	0x10000
+#define LMEM_END	(LMEM_BASE_ADDR + LMEM_SIZE)
+#define TMU_CSR_BASE_ADDR	(CBUS_BASE_ADDR + 0x310000)
+#define CLASS_CSR_BASE_ADDR	(CBUS_BASE_ADDR + 0x320000)
+#define HIF_NOCPY_BASE_ADDR	(CBUS_BASE_ADDR + 0x350000)
+#define UTIL_CSR_BASE_ADDR	(CBUS_BASE_ADDR + 0x360000)
+#define CBUS_GPT_BASE_ADDR	(CBUS_BASE_ADDR + 0x370000)
+
+/*
+ * defgroup XXX_MEM_ACCESS_ADDR PE memory access through CSR
+ * XXX_MEM_ACCESS_ADDR register bit definitions.
+ */
+#define PE_MEM_ACCESS_WRITE	BIT(31)	/* Internal Memory Write. */
+#define PE_MEM_ACCESS_IMEM	BIT(15)
+#define PE_MEM_ACCESS_DMEM	BIT(16)
+
+/* Byte Enables of the Internal memory access. These are interpred in BE */
+#define PE_MEM_ACCESS_BYTE_ENABLE(offset, size)	\
+	({ typeof(size) size_ = (size);		\
+	(((BIT(size_) - 1) << (4 - (offset) - (size_))) & 0xf) << 24; })
+
+#include "cbus/emac_mtip.h"
+#include "cbus/gpi.h"
+#include "cbus/bmu.h"
+#include "cbus/hif.h"
+#include "cbus/tmu_csr.h"
+#include "cbus/class_csr.h"
+#include "cbus/hif_nocpy.h"
+#include "cbus/util_csr.h"
+
+/* PFE cores states */
+#define CORE_DISABLE	0x00000000
+#define CORE_ENABLE	0x00000001
+#define CORE_SW_RESET	0x00000002
+
+/* LMEM defines */
+#define LMEM_HDR_SIZE	0x0010
+#define LMEM_BUF_SIZE_LN2	0x7
+#define LMEM_BUF_SIZE	BIT(LMEM_BUF_SIZE_LN2)
+
+/* DDR defines */
+#define DDR_HDR_SIZE	0x0100
+#define DDR_BUF_SIZE_LN2	0xb
+#define DDR_BUF_SIZE	BIT(DDR_BUF_SIZE_LN2)
+
+#endif /* _CBUS_H_ */
diff --git a/drivers/staging/fsl_ppfe/include/pfe/cbus/bmu.h b/drivers/staging/fsl_ppfe/include/pfe/cbus/bmu.h
new file mode 100644
index 0000000..87738ca
--- /dev/null
+++ b/drivers/staging/fsl_ppfe/include/pfe/cbus/bmu.h
@@ -0,0 +1,55 @@
+/*
+ * Copyright 2015-2016 Freescale Semiconductor, Inc.
+ * Copyright 2017 NXP
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program.  If not, see <http://www.gnu.org/licenses/>.
+ */
+
+#ifndef _BMU_H_
+#define _BMU_H_
+
+#define BMU_VERSION	0x000
+#define BMU_CTRL	0x004
+#define BMU_UCAST_CONFIG	0x008
+#define BMU_UCAST_BASE_ADDR	0x00c
+#define BMU_BUF_SIZE	0x010
+#define BMU_BUF_CNT	0x014
+#define BMU_THRES	0x018
+#define BMU_INT_SRC	0x020
+#define BMU_INT_ENABLE	0x024
+#define BMU_ALLOC_CTRL	0x030
+#define BMU_FREE_CTRL	0x034
+#define BMU_FREE_ERR_ADDR	0x038
+#define BMU_CURR_BUF_CNT	0x03c
+#define BMU_MCAST_CNT	0x040
+#define BMU_MCAST_ALLOC_CTRL	0x044
+#define BMU_REM_BUF_CNT	0x048
+#define BMU_LOW_WATERMARK	0x050
+#define BMU_HIGH_WATERMARK	0x054
+#define BMU_INT_MEM_ACCESS	0x100
+
+struct BMU_CFG {
+	unsigned long baseaddr;
+	u32 count;
+	u32 size;
+	u32 low_watermark;
+	u32 high_watermark;
+};
+
+#define BMU1_BUF_SIZE	LMEM_BUF_SIZE_LN2
+#define BMU2_BUF_SIZE	DDR_BUF_SIZE_LN2
+
+#define BMU2_MCAST_ALLOC_CTRL	(BMU2_BASE_ADDR + BMU_MCAST_ALLOC_CTRL)
+
+#endif /* _BMU_H_ */
diff --git a/drivers/staging/fsl_ppfe/include/pfe/cbus/class_csr.h b/drivers/staging/fsl_ppfe/include/pfe/cbus/class_csr.h
new file mode 100644
index 0000000..e4dadff
--- /dev/null
+++ b/drivers/staging/fsl_ppfe/include/pfe/cbus/class_csr.h
@@ -0,0 +1,289 @@
+/*
+ * Copyright 2015-2016 Freescale Semiconductor, Inc.
+ * Copyright 2017 NXP
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program.  If not, see <http://www.gnu.org/licenses/>.
+ */
+
+#ifndef _CLASS_CSR_H_
+#define _CLASS_CSR_H_
+
+/* @file class_csr.h.
+ * class_csr - block containing all the classifier control and status register.
+ * Mapped on CBUS and accessible from all PE's and ARM.
+ */
+#define CLASS_VERSION	(CLASS_CSR_BASE_ADDR + 0x000)
+#define CLASS_TX_CTRL	(CLASS_CSR_BASE_ADDR + 0x004)
+#define CLASS_INQ_PKTPTR	(CLASS_CSR_BASE_ADDR + 0x010)
+
+/* (ddr_hdr_size[24:16], lmem_hdr_size[5:0]) */
+#define CLASS_HDR_SIZE	(CLASS_CSR_BASE_ADDR + 0x014)
+
+/* LMEM header size for the Classifier block.\ Data in the LMEM
+ * is written from this offset.
+ */
+#define CLASS_HDR_SIZE_LMEM(off)	((off) & 0x3f)
+
+/* DDR header size for the Classifier block.\ Data in the DDR
+ * is written from this offset.
+ */
+#define CLASS_HDR_SIZE_DDR(off)	(((off) & 0x1ff) << 16)
+
+#define CLASS_PE0_QB_DM_ADDR0	(CLASS_CSR_BASE_ADDR + 0x020)
+
+/* DMEM address of first [15:0] and second [31:16] buffers on QB side. */
+#define CLASS_PE0_QB_DM_ADDR1	(CLASS_CSR_BASE_ADDR + 0x024)
+
+/* DMEM address of third [15:0] and fourth [31:16] buffers on QB side. */
+#define CLASS_PE0_RO_DM_ADDR0	(CLASS_CSR_BASE_ADDR + 0x060)
+
+/* DMEM address of first [15:0] and second [31:16] buffers on RO side. */
+#define CLASS_PE0_RO_DM_ADDR1	(CLASS_CSR_BASE_ADDR + 0x064)
+
+/* DMEM address of third [15:0] and fourth [31:16] buffers on RO side. */
+
+/* @name Class PE memory access. Allows external PE's and HOST to
+ * read/write PMEM/DMEM memory ranges for each classifier PE.
+ */
+/* {sr_pe_mem_cmd[31], csr_pe_mem_wren[27:24], csr_pe_mem_addr[23:0]},
+ * See \ref XXX_MEM_ACCESS_ADDR for details.
+ */
+#define CLASS_MEM_ACCESS_ADDR	(CLASS_CSR_BASE_ADDR + 0x100)
+
+/* Internal Memory Access Write Data [31:0] */
+#define CLASS_MEM_ACCESS_WDATA	(CLASS_CSR_BASE_ADDR + 0x104)
+
+/* Internal Memory Access Read Data [31:0] */
+#define CLASS_MEM_ACCESS_RDATA	(CLASS_CSR_BASE_ADDR + 0x108)
+#define CLASS_TM_INQ_ADDR	(CLASS_CSR_BASE_ADDR + 0x114)
+#define CLASS_PE_STATUS	(CLASS_CSR_BASE_ADDR + 0x118)
+
+#define CLASS_PHY1_RX_PKTS	(CLASS_CSR_BASE_ADDR + 0x11c)
+#define CLASS_PHY1_TX_PKTS	(CLASS_CSR_BASE_ADDR + 0x120)
+#define CLASS_PHY1_LP_FAIL_PKTS	(CLASS_CSR_BASE_ADDR + 0x124)
+#define CLASS_PHY1_INTF_FAIL_PKTS	(CLASS_CSR_BASE_ADDR + 0x128)
+#define CLASS_PHY1_INTF_MATCH_PKTS	(CLASS_CSR_BASE_ADDR + 0x12c)
+#define CLASS_PHY1_L3_FAIL_PKTS	(CLASS_CSR_BASE_ADDR + 0x130)
+#define CLASS_PHY1_V4_PKTS	(CLASS_CSR_BASE_ADDR + 0x134)
+#define CLASS_PHY1_V6_PKTS	(CLASS_CSR_BASE_ADDR + 0x138)
+#define CLASS_PHY1_CHKSUM_ERR_PKTS	(CLASS_CSR_BASE_ADDR + 0x13c)
+#define CLASS_PHY1_TTL_ERR_PKTS	(CLASS_CSR_BASE_ADDR + 0x140)
+#define CLASS_PHY2_RX_PKTS	(CLASS_CSR_BASE_ADDR + 0x144)
+#define CLASS_PHY2_TX_PKTS	(CLASS_CSR_BASE_ADDR + 0x148)
+#define CLASS_PHY2_LP_FAIL_PKTS	(CLASS_CSR_BASE_ADDR + 0x14c)
+#define CLASS_PHY2_INTF_FAIL_PKTS	(CLASS_CSR_BASE_ADDR + 0x150)
+#define CLASS_PHY2_INTF_MATCH_PKTS	(CLASS_CSR_BASE_ADDR + 0x154)
+#define CLASS_PHY2_L3_FAIL_PKTS	(CLASS_CSR_BASE_ADDR + 0x158)
+#define CLASS_PHY2_V4_PKTS	(CLASS_CSR_BASE_ADDR + 0x15c)
+#define CLASS_PHY2_V6_PKTS	(CLASS_CSR_BASE_ADDR + 0x160)
+#define CLASS_PHY2_CHKSUM_ERR_PKTS	(CLASS_CSR_BASE_ADDR + 0x164)
+#define CLASS_PHY2_TTL_ERR_PKTS	(CLASS_CSR_BASE_ADDR + 0x168)
+#define CLASS_PHY3_RX_PKTS	(CLASS_CSR_BASE_ADDR + 0x16c)
+#define CLASS_PHY3_TX_PKTS	(CLASS_CSR_BASE_ADDR + 0x170)
+#define CLASS_PHY3_LP_FAIL_PKTS	(CLASS_CSR_BASE_ADDR + 0x174)
+#define CLASS_PHY3_INTF_FAIL_PKTS	(CLASS_CSR_BASE_ADDR + 0x178)
+#define CLASS_PHY3_INTF_MATCH_PKTS	(CLASS_CSR_BASE_ADDR + 0x17c)
+#define CLASS_PHY3_L3_FAIL_PKTS	(CLASS_CSR_BASE_ADDR + 0x180)
+#define CLASS_PHY3_V4_PKTS	(CLASS_CSR_BASE_ADDR + 0x184)
+#define CLASS_PHY3_V6_PKTS	(CLASS_CSR_BASE_ADDR + 0x188)
+#define CLASS_PHY3_CHKSUM_ERR_PKTS	(CLASS_CSR_BASE_ADDR + 0x18c)
+#define CLASS_PHY3_TTL_ERR_PKTS	(CLASS_CSR_BASE_ADDR + 0x190)
+#define CLASS_PHY1_ICMP_PKTS	(CLASS_CSR_BASE_ADDR + 0x194)
+#define CLASS_PHY1_IGMP_PKTS	(CLASS_CSR_BASE_ADDR + 0x198)
+#define CLASS_PHY1_TCP_PKTS	(CLASS_CSR_BASE_ADDR + 0x19c)
+#define CLASS_PHY1_UDP_PKTS	(CLASS_CSR_BASE_ADDR + 0x1a0)
+#define CLASS_PHY2_ICMP_PKTS	(CLASS_CSR_BASE_ADDR + 0x1a4)
+#define CLASS_PHY2_IGMP_PKTS	(CLASS_CSR_BASE_ADDR + 0x1a8)
+#define CLASS_PHY2_TCP_PKTS	(CLASS_CSR_BASE_ADDR + 0x1ac)
+#define CLASS_PHY2_UDP_PKTS	(CLASS_CSR_BASE_ADDR + 0x1b0)
+#define CLASS_PHY3_ICMP_PKTS	(CLASS_CSR_BASE_ADDR + 0x1b4)
+#define CLASS_PHY3_IGMP_PKTS	(CLASS_CSR_BASE_ADDR + 0x1b8)
+#define CLASS_PHY3_TCP_PKTS	(CLASS_CSR_BASE_ADDR + 0x1bc)
+#define CLASS_PHY3_UDP_PKTS	(CLASS_CSR_BASE_ADDR + 0x1c0)
+#define CLASS_PHY4_ICMP_PKTS	(CLASS_CSR_BASE_ADDR + 0x1c4)
+#define CLASS_PHY4_IGMP_PKTS	(CLASS_CSR_BASE_ADDR + 0x1c8)
+#define CLASS_PHY4_TCP_PKTS	(CLASS_CSR_BASE_ADDR + 0x1cc)
+#define CLASS_PHY4_UDP_PKTS	(CLASS_CSR_BASE_ADDR + 0x1d0)
+#define CLASS_PHY4_RX_PKTS	(CLASS_CSR_BASE_ADDR + 0x1d4)
+#define CLASS_PHY4_TX_PKTS	(CLASS_CSR_BASE_ADDR + 0x1d8)
+#define CLASS_PHY4_LP_FAIL_PKTS	(CLASS_CSR_BASE_ADDR + 0x1dc)
+#define CLASS_PHY4_INTF_FAIL_PKTS	(CLASS_CSR_BASE_ADDR + 0x1e0)
+#define CLASS_PHY4_INTF_MATCH_PKTS	(CLASS_CSR_BASE_ADDR + 0x1e4)
+#define CLASS_PHY4_L3_FAIL_PKTS	(CLASS_CSR_BASE_ADDR + 0x1e8)
+#define CLASS_PHY4_V4_PKTS	(CLASS_CSR_BASE_ADDR + 0x1ec)
+#define CLASS_PHY4_V6_PKTS	(CLASS_CSR_BASE_ADDR + 0x1f0)
+#define CLASS_PHY4_CHKSUM_ERR_PKTS	(CLASS_CSR_BASE_ADDR + 0x1f4)
+#define CLASS_PHY4_TTL_ERR_PKTS	(CLASS_CSR_BASE_ADDR + 0x1f8)
+
+#define CLASS_PE_SYS_CLK_RATIO	(CLASS_CSR_BASE_ADDR + 0x200)
+#define CLASS_AFULL_THRES	(CLASS_CSR_BASE_ADDR + 0x204)
+#define CLASS_GAP_BETWEEN_READS	(CLASS_CSR_BASE_ADDR + 0x208)
+#define CLASS_MAX_BUF_CNT	(CLASS_CSR_BASE_ADDR + 0x20c)
+#define CLASS_TSQ_FIFO_THRES	(CLASS_CSR_BASE_ADDR + 0x210)
+#define CLASS_TSQ_MAX_CNT	(CLASS_CSR_BASE_ADDR + 0x214)
+#define CLASS_IRAM_DATA_0	(CLASS_CSR_BASE_ADDR + 0x218)
+#define CLASS_IRAM_DATA_1	(CLASS_CSR_BASE_ADDR + 0x21c)
+#define CLASS_IRAM_DATA_2	(CLASS_CSR_BASE_ADDR + 0x220)
+#define CLASS_IRAM_DATA_3	(CLASS_CSR_BASE_ADDR + 0x224)
+
+#define CLASS_BUS_ACCESS_ADDR	(CLASS_CSR_BASE_ADDR + 0x228)
+
+#define CLASS_BUS_ACCESS_WDATA	(CLASS_CSR_BASE_ADDR + 0x22c)
+#define CLASS_BUS_ACCESS_RDATA	(CLASS_CSR_BASE_ADDR + 0x230)
+
+/* (route_entry_size[9:0], route_hash_size[23:16]
+ * (this is actually ln2(size)))
+ */
+#define CLASS_ROUTE_HASH_ENTRY_SIZE	(CLASS_CSR_BASE_ADDR + 0x234)
+
+#define CLASS_ROUTE_ENTRY_SIZE(size)	 ((size) & 0x1ff)
+#define CLASS_ROUTE_HASH_SIZE(hash_bits) (((hash_bits) & 0xff) << 16)
+
+#define CLASS_ROUTE_TABLE_BASE	(CLASS_CSR_BASE_ADDR + 0x238)
+
+#define CLASS_ROUTE_MULTI	(CLASS_CSR_BASE_ADDR + 0x23c)
+#define CLASS_SMEM_OFFSET	(CLASS_CSR_BASE_ADDR + 0x240)
+#define CLASS_LMEM_BUF_SIZE	(CLASS_CSR_BASE_ADDR + 0x244)
+#define CLASS_VLAN_ID	(CLASS_CSR_BASE_ADDR + 0x248)
+#define CLASS_BMU1_BUF_FREE	(CLASS_CSR_BASE_ADDR + 0x24c)
+#define CLASS_USE_TMU_INQ	(CLASS_CSR_BASE_ADDR + 0x250)
+#define CLASS_VLAN_ID1	(CLASS_CSR_BASE_ADDR + 0x254)
+
+#define CLASS_BUS_ACCESS_BASE	(CLASS_CSR_BASE_ADDR + 0x258)
+#define CLASS_BUS_ACCESS_BASE_MASK	(0xFF000000)
+/* bit 31:24 of PE peripheral address are stored in CLASS_BUS_ACCESS_BASE */
+
+#define CLASS_HIF_PARSE	(CLASS_CSR_BASE_ADDR + 0x25c)
+
+#define CLASS_HOST_PE0_GP	(CLASS_CSR_BASE_ADDR + 0x260)
+#define CLASS_PE0_GP	(CLASS_CSR_BASE_ADDR + 0x264)
+#define CLASS_HOST_PE1_GP	(CLASS_CSR_BASE_ADDR + 0x268)
+#define CLASS_PE1_GP	(CLASS_CSR_BASE_ADDR + 0x26c)
+#define CLASS_HOST_PE2_GP	(CLASS_CSR_BASE_ADDR + 0x270)
+#define CLASS_PE2_GP	(CLASS_CSR_BASE_ADDR + 0x274)
+#define CLASS_HOST_PE3_GP	(CLASS_CSR_BASE_ADDR + 0x278)
+#define CLASS_PE3_GP	(CLASS_CSR_BASE_ADDR + 0x27c)
+#define CLASS_HOST_PE4_GP	(CLASS_CSR_BASE_ADDR + 0x280)
+#define CLASS_PE4_GP	(CLASS_CSR_BASE_ADDR + 0x284)
+#define CLASS_HOST_PE5_GP	(CLASS_CSR_BASE_ADDR + 0x288)
+#define CLASS_PE5_GP	(CLASS_CSR_BASE_ADDR + 0x28c)
+
+#define CLASS_PE_INT_SRC	(CLASS_CSR_BASE_ADDR + 0x290)
+#define CLASS_PE_INT_ENABLE	(CLASS_CSR_BASE_ADDR + 0x294)
+
+#define CLASS_TPID0_TPID1	(CLASS_CSR_BASE_ADDR + 0x298)
+#define CLASS_TPID2	(CLASS_CSR_BASE_ADDR + 0x29c)
+
+#define CLASS_L4_CHKSUM_ADDR	(CLASS_CSR_BASE_ADDR + 0x2a0)
+
+#define CLASS_PE0_DEBUG	(CLASS_CSR_BASE_ADDR + 0x2a4)
+#define CLASS_PE1_DEBUG	(CLASS_CSR_BASE_ADDR + 0x2a8)
+#define CLASS_PE2_DEBUG	(CLASS_CSR_BASE_ADDR + 0x2ac)
+#define CLASS_PE3_DEBUG	(CLASS_CSR_BASE_ADDR + 0x2b0)
+#define CLASS_PE4_DEBUG	(CLASS_CSR_BASE_ADDR + 0x2b4)
+#define CLASS_PE5_DEBUG	(CLASS_CSR_BASE_ADDR + 0x2b8)
+
+#define CLASS_STATE	(CLASS_CSR_BASE_ADDR + 0x2bc)
+
+/* CLASS defines */
+#define CLASS_PBUF_SIZE	0x100	/* Fixed by hardware */
+#define CLASS_PBUF_HEADER_OFFSET	0x80	/* Can be configured */
+
+/* Can be configured */
+#define CLASS_PBUF0_BASE_ADDR	0x000
+/* Can be configured */
+#define CLASS_PBUF1_BASE_ADDR	(CLASS_PBUF0_BASE_ADDR + CLASS_PBUF_SIZE)
+/* Can be configured */
+#define CLASS_PBUF2_BASE_ADDR	(CLASS_PBUF1_BASE_ADDR + CLASS_PBUF_SIZE)
+/* Can be configured */
+#define CLASS_PBUF3_BASE_ADDR	(CLASS_PBUF2_BASE_ADDR + CLASS_PBUF_SIZE)
+
+#define CLASS_PBUF0_HEADER_BASE_ADDR	(CLASS_PBUF0_BASE_ADDR + \
+						CLASS_PBUF_HEADER_OFFSET)
+#define CLASS_PBUF1_HEADER_BASE_ADDR	(CLASS_PBUF1_BASE_ADDR + \
+						CLASS_PBUF_HEADER_OFFSET)
+#define CLASS_PBUF2_HEADER_BASE_ADDR	(CLASS_PBUF2_BASE_ADDR + \
+						CLASS_PBUF_HEADER_OFFSET)
+#define CLASS_PBUF3_HEADER_BASE_ADDR	(CLASS_PBUF3_BASE_ADDR + \
+						CLASS_PBUF_HEADER_OFFSET)
+
+#define CLASS_PE0_RO_DM_ADDR0_VAL	((CLASS_PBUF1_BASE_ADDR << 16) | \
+						CLASS_PBUF0_BASE_ADDR)
+#define CLASS_PE0_RO_DM_ADDR1_VAL	((CLASS_PBUF3_BASE_ADDR << 16) | \
+						CLASS_PBUF2_BASE_ADDR)
+
+#define CLASS_PE0_QB_DM_ADDR0_VAL	((CLASS_PBUF1_HEADER_BASE_ADDR << 16) |\
+						CLASS_PBUF0_HEADER_BASE_ADDR)
+#define CLASS_PE0_QB_DM_ADDR1_VAL	((CLASS_PBUF3_HEADER_BASE_ADDR << 16) |\
+						CLASS_PBUF2_HEADER_BASE_ADDR)
+
+#define CLASS_ROUTE_SIZE	128
+#define CLASS_MAX_ROUTE_SIZE	256
+#define CLASS_ROUTE_HASH_BITS	20
+#define CLASS_ROUTE_HASH_MASK	(BIT(CLASS_ROUTE_HASH_BITS) - 1)
+
+/* Can be configured */
+#define	CLASS_ROUTE0_BASE_ADDR	0x400
+/* Can be configured */
+#define CLASS_ROUTE1_BASE_ADDR	(CLASS_ROUTE0_BASE_ADDR + CLASS_ROUTE_SIZE)
+/* Can be configured */
+#define CLASS_ROUTE2_BASE_ADDR	(CLASS_ROUTE1_BASE_ADDR + CLASS_ROUTE_SIZE)
+/* Can be configured */
+#define CLASS_ROUTE3_BASE_ADDR	(CLASS_ROUTE2_BASE_ADDR + CLASS_ROUTE_SIZE)
+
+#define CLASS_SA_SIZE	128
+#define CLASS_IPSEC_SA0_BASE_ADDR	0x600
+/* not used */
+#define CLASS_IPSEC_SA1_BASE_ADDR  (CLASS_IPSEC_SA0_BASE_ADDR + CLASS_SA_SIZE)
+/* not used */
+#define CLASS_IPSEC_SA2_BASE_ADDR  (CLASS_IPSEC_SA1_BASE_ADDR + CLASS_SA_SIZE)
+/* not used */
+#define CLASS_IPSEC_SA3_BASE_ADDR  (CLASS_IPSEC_SA2_BASE_ADDR + CLASS_SA_SIZE)
+
+/* generic purpose free dmem buffer, last portion of 2K dmem pbuf */
+#define CLASS_GP_DMEM_BUF_SIZE	(2048 - (CLASS_PBUF_SIZE * 4) - \
+				(CLASS_ROUTE_SIZE * 4) - (CLASS_SA_SIZE))
+#define CLASS_GP_DMEM_BUF	((void *)(CLASS_IPSEC_SA0_BASE_ADDR + \
+					CLASS_SA_SIZE))
+
+#define TWO_LEVEL_ROUTE		BIT(0)
+#define PHYNO_IN_HASH		BIT(1)
+#define HW_ROUTE_FETCH		BIT(3)
+#define HW_BRIDGE_FETCH		BIT(5)
+#define IP_ALIGNED		BIT(6)
+#define ARC_HIT_CHECK_EN	BIT(7)
+#define CLASS_TOE		BIT(11)
+#define HASH_NORMAL		(0 << 12)
+#define HASH_CRC_PORT		BIT(12)
+#define HASH_CRC_IP		(2 << 12)
+#define HASH_CRC_PORT_IP	(3 << 12)
+#define QB2BUS_LE		BIT(15)
+
+#define TCP_CHKSUM_DROP		BIT(0)
+#define UDP_CHKSUM_DROP		BIT(1)
+#define IPV4_CHKSUM_DROP	BIT(9)
+
+/*CLASS_HIF_PARSE bits*/
+#define HIF_PKT_CLASS_EN	BIT(0)
+#define HIF_PKT_OFFSET(ofst)	(((ofst) & 0xF) << 1)
+
+struct class_cfg {
+	u32 toe_mode;
+	unsigned long route_table_baseaddr;
+	u32 route_table_hash_bits;
+	u32 pe_sys_clk_ratio;
+	u32 resume;
+};
+
+#endif /* _CLASS_CSR_H_ */
diff --git a/drivers/staging/fsl_ppfe/include/pfe/cbus/emac_mtip.h b/drivers/staging/fsl_ppfe/include/pfe/cbus/emac_mtip.h
new file mode 100644
index 0000000..9c5d791
--- /dev/null
+++ b/drivers/staging/fsl_ppfe/include/pfe/cbus/emac_mtip.h
@@ -0,0 +1,242 @@
+/*
+ * Copyright 2015-2016 Freescale Semiconductor, Inc.
+ * Copyright 2017 NXP
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program.  If not, see <http://www.gnu.org/licenses/>.
+ */
+
+#ifndef _EMAC_H_
+#define _EMAC_H_
+
+#include <linux/ethtool.h>
+
+#define EMAC_IEVENT_REG		0x004
+#define EMAC_IMASK_REG		0x008
+#define EMAC_R_DES_ACTIVE_REG	0x010
+#define EMAC_X_DES_ACTIVE_REG	0x014
+#define EMAC_ECNTRL_REG		0x024
+#define EMAC_MII_DATA_REG	0x040
+#define EMAC_MII_CTRL_REG	0x044
+#define EMAC_MIB_CTRL_STS_REG	0x064
+#define EMAC_RCNTRL_REG		0x084
+#define EMAC_TCNTRL_REG		0x0C4
+#define EMAC_PHY_ADDR_LOW	0x0E4
+#define EMAC_PHY_ADDR_HIGH	0x0E8
+#define EMAC_GAUR		0x120
+#define EMAC_GALR		0x124
+#define EMAC_TFWR_STR_FWD	0x144
+#define EMAC_RX_SECTION_FULL	0x190
+#define EMAC_RX_SECTION_EMPTY	0x194
+#define EMAC_TX_SECTION_EMPTY	0x1A0
+#define EMAC_TRUNC_FL		0x1B0
+
+#define RMON_T_DROP	0x200 /* Count of frames not cntd correctly */
+#define RMON_T_PACKETS	0x204 /* RMON TX packet count */
+#define RMON_T_BC_PKT	0x208 /* RMON TX broadcast pkts */
+#define RMON_T_MC_PKT	0x20c /* RMON TX multicast pkts */
+#define RMON_T_CRC_ALIGN	0x210 /* RMON TX pkts with CRC align err */
+#define RMON_T_UNDERSIZE	0x214 /* RMON TX pkts < 64 bytes, good CRC */
+#define RMON_T_OVERSIZE	0x218 /* RMON TX pkts > MAX_FL bytes good CRC */
+#define RMON_T_FRAG	0x21c /* RMON TX pkts < 64 bytes, bad CRC */
+#define RMON_T_JAB	0x220 /* RMON TX pkts > MAX_FL bytes, bad CRC */
+#define RMON_T_COL	0x224 /* RMON TX collision count */
+#define RMON_T_P64	0x228 /* RMON TX 64 byte pkts */
+#define RMON_T_P65TO127	0x22c /* RMON TX 65 to 127 byte pkts */
+#define RMON_T_P128TO255	0x230 /* RMON TX 128 to 255 byte pkts */
+#define RMON_T_P256TO511	0x234 /* RMON TX 256 to 511 byte pkts */
+#define RMON_T_P512TO1023	0x238 /* RMON TX 512 to 1023 byte pkts */
+#define RMON_T_P1024TO2047	0x23c /* RMON TX 1024 to 2047 byte pkts */
+#define RMON_T_P_GTE2048	0x240 /* RMON TX pkts > 2048 bytes */
+#define RMON_T_OCTETS	0x244 /* RMON TX octets */
+#define IEEE_T_DROP	0x248 /* Count of frames not counted crtly */
+#define IEEE_T_FRAME_OK	0x24c /* Frames tx'd OK */
+#define IEEE_T_1COL	0x250 /* Frames tx'd with single collision */
+#define IEEE_T_MCOL	0x254 /* Frames tx'd with multiple collision */
+#define IEEE_T_DEF	0x258 /* Frames tx'd after deferral delay */
+#define IEEE_T_LCOL	0x25c /* Frames tx'd with late collision */
+#define IEEE_T_EXCOL	0x260 /* Frames tx'd with excesv collisions */
+#define IEEE_T_MACERR	0x264 /* Frames tx'd with TX FIFO underrun */
+#define IEEE_T_CSERR	0x268 /* Frames tx'd with carrier sense err */
+#define IEEE_T_SQE	0x26c /* Frames tx'd with SQE err */
+#define IEEE_T_FDXFC	0x270 /* Flow control pause frames tx'd */
+#define IEEE_T_OCTETS_OK	0x274 /* Octet count for frames tx'd w/o err */
+#define RMON_R_PACKETS	0x284 /* RMON RX packet count */
+#define RMON_R_BC_PKT	0x288 /* RMON RX broadcast pkts */
+#define RMON_R_MC_PKT	0x28c /* RMON RX multicast pkts */
+#define RMON_R_CRC_ALIGN	0x290 /* RMON RX pkts with CRC alignment err */
+#define RMON_R_UNDERSIZE	0x294 /* RMON RX pkts < 64 bytes, good CRC */
+#define RMON_R_OVERSIZE	0x298 /* RMON RX pkts > MAX_FL bytes good CRC */
+#define RMON_R_FRAG	0x29c /* RMON RX pkts < 64 bytes, bad CRC */
+#define RMON_R_JAB	0x2a0 /* RMON RX pkts > MAX_FL bytes, bad CRC */
+#define RMON_R_RESVD_O	0x2a4 /* Reserved */
+#define RMON_R_P64	0x2a8 /* RMON RX 64 byte pkts */
+#define RMON_R_P65TO127	0x2ac /* RMON RX 65 to 127 byte pkts */
+#define RMON_R_P128TO255	0x2b0 /* RMON RX 128 to 255 byte pkts */
+#define RMON_R_P256TO511	0x2b4 /* RMON RX 256 to 511 byte pkts */
+#define RMON_R_P512TO1023	0x2b8 /* RMON RX 512 to 1023 byte pkts */
+#define RMON_R_P1024TO2047	0x2bc /* RMON RX 1024 to 2047 byte pkts */
+#define RMON_R_P_GTE2048	0x2c0 /* RMON RX pkts > 2048 bytes */
+#define RMON_R_OCTETS	0x2c4 /* RMON RX octets */
+#define IEEE_R_DROP	0x2c8 /* Count frames not counted correctly */
+#define IEEE_R_FRAME_OK	0x2cc /* Frames rx'd OK */
+#define IEEE_R_CRC	0x2d0 /* Frames rx'd with CRC err */
+#define IEEE_R_ALIGN	0x2d4 /* Frames rx'd with alignment err */
+#define IEEE_R_MACERR	0x2d8 /* Receive FIFO overflow count */
+#define IEEE_R_FDXFC	0x2dc /* Flow control pause frames rx'd */
+#define IEEE_R_OCTETS_OK	0x2e0 /* Octet cnt for frames rx'd w/o err */
+
+#define EMAC_SMAC_0_0	0x500 /*Supplemental MAC Address 0 (RW).*/
+#define EMAC_SMAC_0_1	0x504 /*Supplemental MAC Address 0 (RW).*/
+
+/* GEMAC definitions and settings */
+
+#define EMAC_PORT_0	0
+#define EMAC_PORT_1	1
+
+/* GEMAC Bit definitions */
+#define EMAC_IEVENT_HBERR		 0x80000000
+#define EMAC_IEVENT_BABR		 0x40000000
+#define EMAC_IEVENT_BABT		 0x20000000
+#define EMAC_IEVENT_GRA			 0x10000000
+#define EMAC_IEVENT_TXF			 0x08000000
+#define EMAC_IEVENT_TXB			 0x04000000
+#define EMAC_IEVENT_RXF			 0x02000000
+#define EMAC_IEVENT_RXB			 0x01000000
+#define EMAC_IEVENT_MII			 0x00800000
+#define EMAC_IEVENT_EBERR		 0x00400000
+#define EMAC_IEVENT_LC			 0x00200000
+#define EMAC_IEVENT_RL			 0x00100000
+#define EMAC_IEVENT_UN			 0x00080000
+
+#define EMAC_IMASK_HBERR                 0x80000000
+#define EMAC_IMASK_BABR                  0x40000000
+#define EMAC_IMASKT_BABT                 0x20000000
+#define EMAC_IMASK_GRA                   0x10000000
+#define EMAC_IMASKT_TXF                  0x08000000
+#define EMAC_IMASK_TXB                   0x04000000
+#define EMAC_IMASKT_RXF                  0x02000000
+#define EMAC_IMASK_RXB                   0x01000000
+#define EMAC_IMASK_MII                   0x00800000
+#define EMAC_IMASK_EBERR                 0x00400000
+#define EMAC_IMASK_LC                    0x00200000
+#define EMAC_IMASKT_RL                   0x00100000
+#define EMAC_IMASK_UN                    0x00080000
+
+#define EMAC_RCNTRL_MAX_FL_SHIFT         16
+#define EMAC_RCNTRL_LOOP                 0x00000001
+#define EMAC_RCNTRL_DRT                  0x00000002
+#define EMAC_RCNTRL_MII_MODE             0x00000004
+#define EMAC_RCNTRL_PROM                 0x00000008
+#define EMAC_RCNTRL_BC_REJ               0x00000010
+#define EMAC_RCNTRL_FCE                  0x00000020
+#define EMAC_RCNTRL_RGMII                0x00000040
+#define EMAC_RCNTRL_SGMII                0x00000080
+#define EMAC_RCNTRL_RMII                 0x00000100
+#define EMAC_RCNTRL_RMII_10T             0x00000200
+#define EMAC_RCNTRL_CRC_FWD		 0x00004000
+
+#define EMAC_TCNTRL_GTS                  0x00000001
+#define EMAC_TCNTRL_HBC                  0x00000002
+#define EMAC_TCNTRL_FDEN                 0x00000004
+#define EMAC_TCNTRL_TFC_PAUSE            0x00000008
+#define EMAC_TCNTRL_RFC_PAUSE            0x00000010
+
+#define EMAC_ECNTRL_RESET                0x00000001      /* reset the EMAC */
+#define EMAC_ECNTRL_ETHER_EN             0x00000002      /* enable the EMAC */
+#define EMAC_ECNTRL_MAGIC_ENA		 0x00000004
+#define EMAC_ECNTRL_SLEEP		 0x00000008
+#define EMAC_ECNTRL_SPEED                0x00000020
+#define EMAC_ECNTRL_DBSWAP               0x00000100
+
+#define EMAC_X_WMRK_STRFWD               0x00000100
+
+#define EMAC_X_DES_ACTIVE_TDAR           0x01000000
+#define EMAC_R_DES_ACTIVE_RDAR           0x01000000
+
+#define EMAC_RX_SECTION_EMPTY_V		0x00010006
+/*
+ * The possible operating speeds of the MAC, currently supporting 10, 100 and
+ * 1000Mb modes.
+ */
+enum mac_speed {SPEED_10M, SPEED_100M, SPEED_1000M, SPEED_1000M_PCS};
+
+/* MII-related definitios */
+#define EMAC_MII_DATA_ST         0x40000000      /* Start of frame delimiter */
+#define EMAC_MII_DATA_OP_RD      0x20000000      /* Perform a read operation */
+#define EMAC_MII_DATA_OP_CL45_RD 0x30000000      /* Perform a read operation */
+#define EMAC_MII_DATA_OP_WR      0x10000000      /* Perform a write operation */
+#define EMAC_MII_DATA_OP_CL45_WR 0x10000000      /* Perform a write operation */
+#define EMAC_MII_DATA_PA_MSK     0x0f800000      /* PHY Address field mask */
+#define EMAC_MII_DATA_RA_MSK     0x007c0000      /* PHY Register field mask */
+#define EMAC_MII_DATA_TA         0x00020000      /* Turnaround */
+#define EMAC_MII_DATA_DATAMSK    0x0000ffff      /* PHY data field */
+
+#define EMAC_MII_DATA_RA_SHIFT   18      /* MII Register address bits */
+#define EMAC_MII_DATA_RA_MASK	 0x1F      /* MII Register address mask */
+#define EMAC_MII_DATA_PA_SHIFT   23      /* MII PHY address bits */
+#define EMAC_MII_DATA_PA_MASK    0x1F      /* MII PHY address mask */
+
+#define EMAC_MII_DATA_RA(v) (((v) & EMAC_MII_DATA_RA_MASK) << \
+				EMAC_MII_DATA_RA_SHIFT)
+#define EMAC_MII_DATA_PA(v) (((v) & EMAC_MII_DATA_RA_MASK) << \
+				EMAC_MII_DATA_PA_SHIFT)
+#define EMAC_MII_DATA(v)    ((v) & 0xffff)
+
+#define EMAC_MII_SPEED_SHIFT	1
+#define EMAC_HOLDTIME_SHIFT	8
+#define EMAC_HOLDTIME_MASK	0x7
+#define EMAC_HOLDTIME(v)	(((v) & EMAC_HOLDTIME_MASK) << \
+					EMAC_HOLDTIME_SHIFT)
+
+/*
+ * The Address organisation for the MAC device.  All addresses are split into
+ * two 32-bit register fields.  The first one (bottom) is the lower 32-bits of
+ * the address and the other field are the high order bits - this may be 16-bits
+ * in the case of MAC addresses, or 32-bits for the hash address.
+ * In terms of memory storage, the first item (bottom) is assumed to be at a
+ * lower address location than 'top'. i.e. top should be at address location of
+ * 'bottom' + 4 bytes.
+ */
+struct pfe_mac_addr {
+	u32 bottom;     /* Lower 32-bits of address. */
+	u32 top;        /* Upper 32-bits of address. */
+};
+
+/*
+ * The following is the organisation of the address filters section of the MAC
+ * registers.  The Cadence MAC contains four possible specific address match
+ * addresses, if an incoming frame corresponds to any one of these four
+ * addresses then the frame will be copied to memory.
+ * It is not necessary for all four of the address match registers to be
+ * programmed, this is application dependent.
+ */
+struct spec_addr {
+	struct pfe_mac_addr one;        /* Specific address register 1. */
+	struct pfe_mac_addr two;        /* Specific address register 2. */
+	struct pfe_mac_addr three;      /* Specific address register 3. */
+	struct pfe_mac_addr four;       /* Specific address register 4. */
+};
+
+struct gemac_cfg {
+	u32 mode;
+	u32 speed;
+	u32 duplex;
+};
+
+/* EMAC Hash size */
+#define EMAC_HASH_REG_BITS       64
+
+#define EMAC_SPEC_ADDR_MAX	4
+
+#endif /* _EMAC_H_ */
diff --git a/drivers/staging/fsl_ppfe/include/pfe/cbus/gpi.h b/drivers/staging/fsl_ppfe/include/pfe/cbus/gpi.h
new file mode 100644
index 0000000..7b29583
--- /dev/null
+++ b/drivers/staging/fsl_ppfe/include/pfe/cbus/gpi.h
@@ -0,0 +1,86 @@
+/*
+ * Copyright 2015-2016 Freescale Semiconductor, Inc.
+ * Copyright 2017 NXP
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program.  If not, see <http://www.gnu.org/licenses/>.
+ */
+
+#ifndef _GPI_H_
+#define _GPI_H_
+
+#define GPI_VERSION	0x00
+#define GPI_CTRL	0x04
+#define GPI_RX_CONFIG	0x08
+#define GPI_HDR_SIZE	0x0c
+#define GPI_BUF_SIZE	0x10
+#define GPI_LMEM_ALLOC_ADDR	0x14
+#define GPI_LMEM_FREE_ADDR	0x18
+#define GPI_DDR_ALLOC_ADDR	0x1c
+#define GPI_DDR_FREE_ADDR	0x20
+#define GPI_CLASS_ADDR	0x24
+#define GPI_DRX_FIFO	0x28
+#define GPI_TRX_FIFO	0x2c
+#define GPI_INQ_PKTPTR	0x30
+#define GPI_DDR_DATA_OFFSET	0x34
+#define GPI_LMEM_DATA_OFFSET	0x38
+#define GPI_TMLF_TX	0x4c
+#define GPI_DTX_ASEQ	0x50
+#define GPI_FIFO_STATUS	0x54
+#define GPI_FIFO_DEBUG	0x58
+#define GPI_TX_PAUSE_TIME	0x5c
+#define GPI_LMEM_SEC_BUF_DATA_OFFSET	0x60
+#define GPI_DDR_SEC_BUF_DATA_OFFSET	0x64
+#define GPI_TOE_CHKSUM_EN	0x68
+#define GPI_OVERRUN_DROPCNT	0x6c
+#define GPI_CSR_MTIP_PAUSE_REG		0x74
+#define GPI_CSR_MTIP_PAUSE_QUANTUM	0x78
+#define GPI_CSR_RX_CNT			0x7c
+#define GPI_CSR_TX_CNT			0x80
+#define GPI_CSR_DEBUG1			0x84
+#define GPI_CSR_DEBUG2			0x88
+
+struct gpi_cfg {
+	u32 lmem_rtry_cnt;
+	u32 tmlf_txthres;
+	u32 aseq_len;
+	u32 mtip_pause_reg;
+};
+
+/* GPI commons defines */
+#define GPI_LMEM_BUF_EN	0x1
+#define GPI_DDR_BUF_EN	0x1
+
+/* EGPI 1 defines */
+#define EGPI1_LMEM_RTRY_CNT	0x40
+#define EGPI1_TMLF_TXTHRES	0xBC
+#define EGPI1_ASEQ_LEN	0x50
+
+/* EGPI 2 defines */
+#define EGPI2_LMEM_RTRY_CNT	0x40
+#define EGPI2_TMLF_TXTHRES	0xBC
+#define EGPI2_ASEQ_LEN	0x40
+
+/* EGPI 3 defines */
+#define EGPI3_LMEM_RTRY_CNT	0x40
+#define EGPI3_TMLF_TXTHRES	0xBC
+#define EGPI3_ASEQ_LEN	0x40
+
+/* HGPI defines */
+#define HGPI_LMEM_RTRY_CNT	0x40
+#define HGPI_TMLF_TXTHRES	0xBC
+#define HGPI_ASEQ_LEN	0x40
+
+#define EGPI_PAUSE_TIME		0x000007D0
+#define EGPI_PAUSE_ENABLE	0x40000000
+#endif /* _GPI_H_ */
diff --git a/drivers/staging/fsl_ppfe/include/pfe/cbus/hif.h b/drivers/staging/fsl_ppfe/include/pfe/cbus/hif.h
new file mode 100644
index 0000000..71cf81a7
--- /dev/null
+++ b/drivers/staging/fsl_ppfe/include/pfe/cbus/hif.h
@@ -0,0 +1,100 @@
+/*
+ * Copyright 2015-2016 Freescale Semiconductor, Inc.
+ * Copyright 2017 NXP
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program.  If not, see <http://www.gnu.org/licenses/>.
+ */
+
+#ifndef _HIF_H_
+#define _HIF_H_
+
+/* @file hif.h.
+ * hif - PFE hif block control and status register.
+ * Mapped on CBUS and accessible from all PE's and ARM.
+ */
+#define HIF_VERSION	(HIF_BASE_ADDR + 0x00)
+#define HIF_TX_CTRL	(HIF_BASE_ADDR + 0x04)
+#define HIF_TX_CURR_BD_ADDR	(HIF_BASE_ADDR + 0x08)
+#define HIF_TX_ALLOC	(HIF_BASE_ADDR + 0x0c)
+#define HIF_TX_BDP_ADDR	(HIF_BASE_ADDR + 0x10)
+#define HIF_TX_STATUS	(HIF_BASE_ADDR + 0x14)
+#define HIF_RX_CTRL	(HIF_BASE_ADDR + 0x20)
+#define HIF_RX_BDP_ADDR	(HIF_BASE_ADDR + 0x24)
+#define HIF_RX_STATUS	(HIF_BASE_ADDR + 0x30)
+#define HIF_INT_SRC	(HIF_BASE_ADDR + 0x34)
+#define HIF_INT_ENABLE	(HIF_BASE_ADDR + 0x38)
+#define HIF_POLL_CTRL	(HIF_BASE_ADDR + 0x3c)
+#define HIF_RX_CURR_BD_ADDR	(HIF_BASE_ADDR + 0x40)
+#define HIF_RX_ALLOC	(HIF_BASE_ADDR + 0x44)
+#define HIF_TX_DMA_STATUS	(HIF_BASE_ADDR + 0x48)
+#define HIF_RX_DMA_STATUS	(HIF_BASE_ADDR + 0x4c)
+#define HIF_INT_COAL	(HIF_BASE_ADDR + 0x50)
+
+/* HIF_INT_SRC/ HIF_INT_ENABLE control bits */
+#define HIF_INT		BIT(0)
+#define HIF_RXBD_INT	BIT(1)
+#define HIF_RXPKT_INT	BIT(2)
+#define HIF_TXBD_INT	BIT(3)
+#define HIF_TXPKT_INT	BIT(4)
+
+/* HIF_TX_CTRL bits */
+#define HIF_CTRL_DMA_EN			BIT(0)
+#define HIF_CTRL_BDP_POLL_CTRL_EN	BIT(1)
+#define HIF_CTRL_BDP_CH_START_WSTB	BIT(2)
+
+/* HIF_RX_STATUS bits */
+#define BDP_CSR_RX_DMA_ACTV     BIT(16)
+
+/* HIF_INT_ENABLE bits */
+#define HIF_INT_EN		BIT(0)
+#define HIF_RXBD_INT_EN		BIT(1)
+#define HIF_RXPKT_INT_EN	BIT(2)
+#define HIF_TXBD_INT_EN		BIT(3)
+#define HIF_TXPKT_INT_EN	BIT(4)
+
+/* HIF_POLL_CTRL bits*/
+#define HIF_RX_POLL_CTRL_CYCLE	0x0400
+#define HIF_TX_POLL_CTRL_CYCLE	0x0400
+
+/* HIF_INT_COAL bits*/
+#define HIF_INT_COAL_ENABLE	BIT(31)
+
+/* Buffer descriptor control bits */
+#define BD_CTRL_BUFLEN_MASK	0x3fff
+#define BD_BUF_LEN(x)	((x) & BD_CTRL_BUFLEN_MASK)
+#define BD_CTRL_CBD_INT_EN	BIT(16)
+#define BD_CTRL_PKT_INT_EN	BIT(17)
+#define BD_CTRL_LIFM		BIT(18)
+#define BD_CTRL_LAST_BD		BIT(19)
+#define BD_CTRL_DIR		BIT(20)
+#define BD_CTRL_LMEM_CPY	BIT(21) /* Valid only for HIF_NOCPY */
+#define BD_CTRL_PKT_XFER	BIT(24)
+#define BD_CTRL_DESC_EN		BIT(31)
+#define BD_CTRL_PARSE_DISABLE	BIT(25)
+#define BD_CTRL_BRFETCH_DISABLE	BIT(26)
+#define BD_CTRL_RTFETCH_DISABLE	BIT(27)
+
+/* Buffer descriptor status bits*/
+#define BD_STATUS_CONN_ID(x)	((x) & 0xffff)
+#define BD_STATUS_DIR_PROC_ID	BIT(16)
+#define BD_STATUS_CONN_ID_EN	BIT(17)
+#define BD_STATUS_PE2PROC_ID(x)	(((x) & 7) << 18)
+#define BD_STATUS_LE_DATA	BIT(21)
+#define BD_STATUS_CHKSUM_EN	BIT(22)
+
+/* HIF Buffer descriptor status bits */
+#define DIR_PROC_ID	BIT(16)
+#define PROC_ID(id)	((id) << 18)
+
+#endif /* _HIF_H_ */
diff --git a/drivers/staging/fsl_ppfe/include/pfe/cbus/hif_nocpy.h b/drivers/staging/fsl_ppfe/include/pfe/cbus/hif_nocpy.h
new file mode 100644
index 0000000..3d4d43c
--- /dev/null
+++ b/drivers/staging/fsl_ppfe/include/pfe/cbus/hif_nocpy.h
@@ -0,0 +1,50 @@
+/*
+ * Copyright 2015-2016 Freescale Semiconductor, Inc.
+ * Copyright 2017 NXP
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program.  If not, see <http://www.gnu.org/licenses/>.
+ */
+
+#ifndef _HIF_NOCPY_H_
+#define _HIF_NOCPY_H_
+
+#define HIF_NOCPY_VERSION	(HIF_NOCPY_BASE_ADDR + 0x00)
+#define HIF_NOCPY_TX_CTRL	(HIF_NOCPY_BASE_ADDR + 0x04)
+#define HIF_NOCPY_TX_CURR_BD_ADDR	(HIF_NOCPY_BASE_ADDR + 0x08)
+#define HIF_NOCPY_TX_ALLOC	(HIF_NOCPY_BASE_ADDR + 0x0c)
+#define HIF_NOCPY_TX_BDP_ADDR	(HIF_NOCPY_BASE_ADDR + 0x10)
+#define HIF_NOCPY_TX_STATUS	(HIF_NOCPY_BASE_ADDR + 0x14)
+#define HIF_NOCPY_RX_CTRL	(HIF_NOCPY_BASE_ADDR + 0x20)
+#define HIF_NOCPY_RX_BDP_ADDR	(HIF_NOCPY_BASE_ADDR + 0x24)
+#define HIF_NOCPY_RX_STATUS	(HIF_NOCPY_BASE_ADDR + 0x30)
+#define HIF_NOCPY_INT_SRC	(HIF_NOCPY_BASE_ADDR + 0x34)
+#define HIF_NOCPY_INT_ENABLE	(HIF_NOCPY_BASE_ADDR + 0x38)
+#define HIF_NOCPY_POLL_CTRL	(HIF_NOCPY_BASE_ADDR + 0x3c)
+#define HIF_NOCPY_RX_CURR_BD_ADDR	(HIF_NOCPY_BASE_ADDR + 0x40)
+#define HIF_NOCPY_RX_ALLOC	(HIF_NOCPY_BASE_ADDR + 0x44)
+#define HIF_NOCPY_TX_DMA_STATUS	(HIF_NOCPY_BASE_ADDR + 0x48)
+#define HIF_NOCPY_RX_DMA_STATUS	(HIF_NOCPY_BASE_ADDR + 0x4c)
+#define HIF_NOCPY_RX_INQ0_PKTPTR	(HIF_NOCPY_BASE_ADDR + 0x50)
+#define HIF_NOCPY_RX_INQ1_PKTPTR	(HIF_NOCPY_BASE_ADDR + 0x54)
+#define HIF_NOCPY_TX_PORT_NO	(HIF_NOCPY_BASE_ADDR + 0x60)
+#define HIF_NOCPY_LMEM_ALLOC_ADDR	(HIF_NOCPY_BASE_ADDR + 0x64)
+#define HIF_NOCPY_CLASS_ADDR	(HIF_NOCPY_BASE_ADDR + 0x68)
+#define HIF_NOCPY_TMU_PORT0_ADDR	(HIF_NOCPY_BASE_ADDR + 0x70)
+#define HIF_NOCPY_TMU_PORT1_ADDR	(HIF_NOCPY_BASE_ADDR + 0x74)
+#define HIF_NOCPY_TMU_PORT2_ADDR	(HIF_NOCPY_BASE_ADDR + 0x7c)
+#define HIF_NOCPY_TMU_PORT3_ADDR	(HIF_NOCPY_BASE_ADDR + 0x80)
+#define HIF_NOCPY_TMU_PORT4_ADDR	(HIF_NOCPY_BASE_ADDR + 0x84)
+#define HIF_NOCPY_INT_COAL	(HIF_NOCPY_BASE_ADDR + 0x90)
+
+#endif /* _HIF_NOCPY_H_ */
diff --git a/drivers/staging/fsl_ppfe/include/pfe/cbus/tmu_csr.h b/drivers/staging/fsl_ppfe/include/pfe/cbus/tmu_csr.h
new file mode 100644
index 0000000..05f3d68
--- /dev/null
+++ b/drivers/staging/fsl_ppfe/include/pfe/cbus/tmu_csr.h
@@ -0,0 +1,168 @@
+/*
+ * Copyright 2015-2016 Freescale Semiconductor, Inc.
+ * Copyright 2017 NXP
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program.  If not, see <http://www.gnu.org/licenses/>.
+ */
+
+#ifndef _TMU_CSR_H_
+#define _TMU_CSR_H_
+
+#define TMU_VERSION	(TMU_CSR_BASE_ADDR + 0x000)
+#define TMU_INQ_WATERMARK	(TMU_CSR_BASE_ADDR + 0x004)
+#define TMU_PHY_INQ_PKTPTR	(TMU_CSR_BASE_ADDR + 0x008)
+#define TMU_PHY_INQ_PKTINFO	(TMU_CSR_BASE_ADDR + 0x00c)
+#define TMU_PHY_INQ_FIFO_CNT	(TMU_CSR_BASE_ADDR + 0x010)
+#define TMU_SYS_GENERIC_CONTROL	(TMU_CSR_BASE_ADDR + 0x014)
+#define TMU_SYS_GENERIC_STATUS	(TMU_CSR_BASE_ADDR + 0x018)
+#define TMU_SYS_GEN_CON0	(TMU_CSR_BASE_ADDR + 0x01c)
+#define TMU_SYS_GEN_CON1	(TMU_CSR_BASE_ADDR + 0x020)
+#define TMU_SYS_GEN_CON2	(TMU_CSR_BASE_ADDR + 0x024)
+#define TMU_SYS_GEN_CON3	(TMU_CSR_BASE_ADDR + 0x028)
+#define TMU_SYS_GEN_CON4	(TMU_CSR_BASE_ADDR + 0x02c)
+#define TMU_TEQ_DISABLE_DROPCHK	(TMU_CSR_BASE_ADDR + 0x030)
+#define TMU_TEQ_CTRL	(TMU_CSR_BASE_ADDR + 0x034)
+#define TMU_TEQ_QCFG	(TMU_CSR_BASE_ADDR + 0x038)
+#define TMU_TEQ_DROP_STAT	(TMU_CSR_BASE_ADDR + 0x03c)
+#define TMU_TEQ_QAVG	(TMU_CSR_BASE_ADDR + 0x040)
+#define TMU_TEQ_WREG_PROB	(TMU_CSR_BASE_ADDR + 0x044)
+#define TMU_TEQ_TRANS_STAT	(TMU_CSR_BASE_ADDR + 0x048)
+#define TMU_TEQ_HW_PROB_CFG0	(TMU_CSR_BASE_ADDR + 0x04c)
+#define TMU_TEQ_HW_PROB_CFG1	(TMU_CSR_BASE_ADDR + 0x050)
+#define TMU_TEQ_HW_PROB_CFG2	(TMU_CSR_BASE_ADDR + 0x054)
+#define TMU_TEQ_HW_PROB_CFG3	(TMU_CSR_BASE_ADDR + 0x058)
+#define TMU_TEQ_HW_PROB_CFG4	(TMU_CSR_BASE_ADDR + 0x05c)
+#define TMU_TEQ_HW_PROB_CFG5	(TMU_CSR_BASE_ADDR + 0x060)
+#define TMU_TEQ_HW_PROB_CFG6	(TMU_CSR_BASE_ADDR + 0x064)
+#define TMU_TEQ_HW_PROB_CFG7	(TMU_CSR_BASE_ADDR + 0x068)
+#define TMU_TEQ_HW_PROB_CFG8	(TMU_CSR_BASE_ADDR + 0x06c)
+#define TMU_TEQ_HW_PROB_CFG9	(TMU_CSR_BASE_ADDR + 0x070)
+#define TMU_TEQ_HW_PROB_CFG10	(TMU_CSR_BASE_ADDR + 0x074)
+#define TMU_TEQ_HW_PROB_CFG11	(TMU_CSR_BASE_ADDR + 0x078)
+#define TMU_TEQ_HW_PROB_CFG12	(TMU_CSR_BASE_ADDR + 0x07c)
+#define TMU_TEQ_HW_PROB_CFG13	(TMU_CSR_BASE_ADDR + 0x080)
+#define TMU_TEQ_HW_PROB_CFG14	(TMU_CSR_BASE_ADDR + 0x084)
+#define TMU_TEQ_HW_PROB_CFG15	(TMU_CSR_BASE_ADDR + 0x088)
+#define TMU_TEQ_HW_PROB_CFG16	(TMU_CSR_BASE_ADDR + 0x08c)
+#define TMU_TEQ_HW_PROB_CFG17	(TMU_CSR_BASE_ADDR + 0x090)
+#define TMU_TEQ_HW_PROB_CFG18	(TMU_CSR_BASE_ADDR + 0x094)
+#define TMU_TEQ_HW_PROB_CFG19	(TMU_CSR_BASE_ADDR + 0x098)
+#define TMU_TEQ_HW_PROB_CFG20	(TMU_CSR_BASE_ADDR + 0x09c)
+#define TMU_TEQ_HW_PROB_CFG21	(TMU_CSR_BASE_ADDR + 0x0a0)
+#define TMU_TEQ_HW_PROB_CFG22	(TMU_CSR_BASE_ADDR + 0x0a4)
+#define TMU_TEQ_HW_PROB_CFG23	(TMU_CSR_BASE_ADDR + 0x0a8)
+#define TMU_TEQ_HW_PROB_CFG24	(TMU_CSR_BASE_ADDR + 0x0ac)
+#define TMU_TEQ_HW_PROB_CFG25	(TMU_CSR_BASE_ADDR + 0x0b0)
+#define TMU_TDQ_IIFG_CFG	(TMU_CSR_BASE_ADDR + 0x0b4)
+/* [9:0] Scheduler Enable for each of the scheduler in the TDQ.
+ * This is a global Enable for all schedulers in PHY0
+ */
+#define TMU_TDQ0_SCH_CTRL	(TMU_CSR_BASE_ADDR + 0x0b8)
+
+#define TMU_LLM_CTRL	(TMU_CSR_BASE_ADDR + 0x0bc)
+#define TMU_LLM_BASE_ADDR	(TMU_CSR_BASE_ADDR + 0x0c0)
+#define TMU_LLM_QUE_LEN	(TMU_CSR_BASE_ADDR + 0x0c4)
+#define TMU_LLM_QUE_HEADPTR	(TMU_CSR_BASE_ADDR + 0x0c8)
+#define TMU_LLM_QUE_TAILPTR	(TMU_CSR_BASE_ADDR + 0x0cc)
+#define TMU_LLM_QUE_DROPCNT	(TMU_CSR_BASE_ADDR + 0x0d0)
+#define TMU_INT_EN	(TMU_CSR_BASE_ADDR + 0x0d4)
+#define TMU_INT_SRC	(TMU_CSR_BASE_ADDR + 0x0d8)
+#define TMU_INQ_STAT	(TMU_CSR_BASE_ADDR + 0x0dc)
+#define TMU_CTRL	(TMU_CSR_BASE_ADDR + 0x0e0)
+
+/* [31] Mem Access Command. 0 = Internal Memory Read, 1 = Internal memory
+ * Write [27:24] Byte Enables of the Internal memory access [23:0] Address of
+ * the internal memory. This address is used to access both the PM and DM of
+ * all the PE's
+ */
+#define TMU_MEM_ACCESS_ADDR	(TMU_CSR_BASE_ADDR + 0x0e4)
+
+/* Internal Memory Access Write Data */
+#define TMU_MEM_ACCESS_WDATA	(TMU_CSR_BASE_ADDR + 0x0e8)
+/* Internal Memory Access Read Data. The commands are blocked
+ * at the mem_access only
+ */
+#define TMU_MEM_ACCESS_RDATA	(TMU_CSR_BASE_ADDR + 0x0ec)
+
+/* [31:0] PHY0 in queue address (must be initialized with one of the
+ * xxx_INQ_PKTPTR cbus addresses)
+ */
+#define TMU_PHY0_INQ_ADDR	(TMU_CSR_BASE_ADDR + 0x0f0)
+/* [31:0] PHY1 in queue address (must be initialized with one of the
+ * xxx_INQ_PKTPTR cbus addresses)
+ */
+#define TMU_PHY1_INQ_ADDR	(TMU_CSR_BASE_ADDR + 0x0f4)
+/* [31:0] PHY2 in queue address (must be initialized with one of the
+ * xxx_INQ_PKTPTR cbus addresses)
+ */
+#define TMU_PHY2_INQ_ADDR	(TMU_CSR_BASE_ADDR + 0x0f8)
+/* [31:0] PHY3 in queue address (must be initialized with one of the
+ * xxx_INQ_PKTPTR cbus addresses)
+ */
+#define TMU_PHY3_INQ_ADDR	(TMU_CSR_BASE_ADDR + 0x0fc)
+#define TMU_BMU_INQ_ADDR	(TMU_CSR_BASE_ADDR + 0x100)
+#define TMU_TX_CTRL	(TMU_CSR_BASE_ADDR + 0x104)
+
+#define TMU_BUS_ACCESS_WDATA	(TMU_CSR_BASE_ADDR + 0x108)
+#define TMU_BUS_ACCESS	(TMU_CSR_BASE_ADDR + 0x10c)
+#define TMU_BUS_ACCESS_RDATA	(TMU_CSR_BASE_ADDR + 0x110)
+
+#define TMU_PE_SYS_CLK_RATIO	(TMU_CSR_BASE_ADDR + 0x114)
+#define TMU_PE_STATUS	(TMU_CSR_BASE_ADDR + 0x118)
+#define TMU_TEQ_MAX_THRESHOLD	(TMU_CSR_BASE_ADDR + 0x11c)
+/* [31:0] PHY4 in queue address (must be initialized with one of the
+ * xxx_INQ_PKTPTR cbus addresses)
+ */
+#define TMU_PHY4_INQ_ADDR	(TMU_CSR_BASE_ADDR + 0x134)
+/* [9:0] Scheduler Enable for each of the scheduler in the TDQ.
+ * This is a global Enable for all schedulers in PHY1
+ */
+#define TMU_TDQ1_SCH_CTRL	(TMU_CSR_BASE_ADDR + 0x138)
+/* [9:0] Scheduler Enable for each of the scheduler in the TDQ.
+ * This is a global Enable for all schedulers in PHY2
+ */
+#define TMU_TDQ2_SCH_CTRL	(TMU_CSR_BASE_ADDR + 0x13c)
+/* [9:0] Scheduler Enable for each of the scheduler in the TDQ.
+ * This is a global Enable for all schedulers in PHY3
+ */
+#define TMU_TDQ3_SCH_CTRL	(TMU_CSR_BASE_ADDR + 0x140)
+#define TMU_BMU_BUF_SIZE	(TMU_CSR_BASE_ADDR + 0x144)
+/* [31:0] PHY5 in queue address (must be initialized with one of the
+ * xxx_INQ_PKTPTR cbus addresses)
+ */
+#define TMU_PHY5_INQ_ADDR	(TMU_CSR_BASE_ADDR + 0x148)
+
+#define SW_RESET		BIT(0)	/* Global software reset */
+#define INQ_RESET		BIT(2)
+#define TEQ_RESET		BIT(3)
+#define TDQ_RESET		BIT(4)
+#define PE_RESET		BIT(5)
+#define MEM_INIT		BIT(6)
+#define MEM_INIT_DONE		BIT(7)
+#define LLM_INIT		BIT(8)
+#define LLM_INIT_DONE		BIT(9)
+#define ECC_MEM_INIT_DONE	BIT(10)
+
+struct tmu_cfg {
+	u32 pe_sys_clk_ratio;
+	unsigned long llm_base_addr;
+	u32 llm_queue_len;
+};
+
+/* Not HW related for pfe_ctrl / pfe common defines */
+#define DEFAULT_MAX_QDEPTH	80
+#define DEFAULT_Q0_QDEPTH	511 /*We keep one large queue for host tx qos */
+#define DEFAULT_TMU3_QDEPTH	127
+
+#endif /* _TMU_CSR_H_ */
diff --git a/drivers/staging/fsl_ppfe/include/pfe/cbus/util_csr.h b/drivers/staging/fsl_ppfe/include/pfe/cbus/util_csr.h
new file mode 100644
index 0000000..ae623cd
--- /dev/null
+++ b/drivers/staging/fsl_ppfe/include/pfe/cbus/util_csr.h
@@ -0,0 +1,61 @@
+/*
+ * Copyright 2015-2016 Freescale Semiconductor, Inc.
+ * Copyright 2017 NXP
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program.  If not, see <http://www.gnu.org/licenses/>.
+ */
+
+#ifndef _UTIL_CSR_H_
+#define _UTIL_CSR_H_
+
+#define UTIL_VERSION	(UTIL_CSR_BASE_ADDR + 0x000)
+#define UTIL_TX_CTRL	(UTIL_CSR_BASE_ADDR + 0x004)
+#define UTIL_INQ_PKTPTR	(UTIL_CSR_BASE_ADDR + 0x010)
+
+#define UTIL_HDR_SIZE	(UTIL_CSR_BASE_ADDR + 0x014)
+
+#define UTIL_PE0_QB_DM_ADDR0	(UTIL_CSR_BASE_ADDR + 0x020)
+#define UTIL_PE0_QB_DM_ADDR1	(UTIL_CSR_BASE_ADDR + 0x024)
+#define UTIL_PE0_RO_DM_ADDR0	(UTIL_CSR_BASE_ADDR + 0x060)
+#define UTIL_PE0_RO_DM_ADDR1	(UTIL_CSR_BASE_ADDR + 0x064)
+
+#define UTIL_MEM_ACCESS_ADDR	(UTIL_CSR_BASE_ADDR + 0x100)
+#define UTIL_MEM_ACCESS_WDATA	(UTIL_CSR_BASE_ADDR + 0x104)
+#define UTIL_MEM_ACCESS_RDATA	(UTIL_CSR_BASE_ADDR + 0x108)
+
+#define UTIL_TM_INQ_ADDR	(UTIL_CSR_BASE_ADDR + 0x114)
+#define UTIL_PE_STATUS	(UTIL_CSR_BASE_ADDR + 0x118)
+
+#define UTIL_PE_SYS_CLK_RATIO	(UTIL_CSR_BASE_ADDR + 0x200)
+#define UTIL_AFULL_THRES	(UTIL_CSR_BASE_ADDR + 0x204)
+#define UTIL_GAP_BETWEEN_READS	(UTIL_CSR_BASE_ADDR + 0x208)
+#define UTIL_MAX_BUF_CNT	(UTIL_CSR_BASE_ADDR + 0x20c)
+#define UTIL_TSQ_FIFO_THRES	(UTIL_CSR_BASE_ADDR + 0x210)
+#define UTIL_TSQ_MAX_CNT	(UTIL_CSR_BASE_ADDR + 0x214)
+#define UTIL_IRAM_DATA_0	(UTIL_CSR_BASE_ADDR + 0x218)
+#define UTIL_IRAM_DATA_1	(UTIL_CSR_BASE_ADDR + 0x21c)
+#define UTIL_IRAM_DATA_2	(UTIL_CSR_BASE_ADDR + 0x220)
+#define UTIL_IRAM_DATA_3	(UTIL_CSR_BASE_ADDR + 0x224)
+
+#define UTIL_BUS_ACCESS_ADDR	(UTIL_CSR_BASE_ADDR + 0x228)
+#define UTIL_BUS_ACCESS_WDATA	(UTIL_CSR_BASE_ADDR + 0x22c)
+#define UTIL_BUS_ACCESS_RDATA	(UTIL_CSR_BASE_ADDR + 0x230)
+
+#define UTIL_INQ_AFULL_THRES	(UTIL_CSR_BASE_ADDR + 0x234)
+
+struct util_cfg {
+	u32 pe_sys_clk_ratio;
+};
+
+#endif /* _UTIL_CSR_H_ */
diff --git a/drivers/staging/fsl_ppfe/include/pfe/pfe.h b/drivers/staging/fsl_ppfe/include/pfe/pfe.h
new file mode 100644
index 0000000..d93ae4c
--- /dev/null
+++ b/drivers/staging/fsl_ppfe/include/pfe/pfe.h
@@ -0,0 +1,372 @@
+/*
+ * Copyright 2015-2016 Freescale Semiconductor, Inc.
+ * Copyright 2017 NXP
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program.  If not, see <http://www.gnu.org/licenses/>.
+ */
+
+#ifndef _PFE_H_
+#define _PFE_H_
+
+#include "cbus.h"
+
+#define CLASS_DMEM_BASE_ADDR(i)	(0x00000000 | ((i) << 20))
+/*
+ * Only valid for mem access register interface
+ */
+#define CLASS_IMEM_BASE_ADDR(i)	(0x00000000 | ((i) << 20))
+#define CLASS_DMEM_SIZE	0x00002000
+#define CLASS_IMEM_SIZE	0x00008000
+
+#define TMU_DMEM_BASE_ADDR(i)	(0x00000000 + ((i) << 20))
+/*
+ * Only valid for mem access register interface
+ */
+#define TMU_IMEM_BASE_ADDR(i)	(0x00000000 + ((i) << 20))
+#define TMU_DMEM_SIZE	0x00000800
+#define TMU_IMEM_SIZE	0x00002000
+
+#define UTIL_DMEM_BASE_ADDR	0x00000000
+#define UTIL_DMEM_SIZE	0x00002000
+
+#define PE_LMEM_BASE_ADDR	0xc3010000
+#define PE_LMEM_SIZE	0x8000
+#define PE_LMEM_END	(PE_LMEM_BASE_ADDR + PE_LMEM_SIZE)
+
+#define DMEM_BASE_ADDR	0x00000000
+#define DMEM_SIZE	0x2000	/* TMU has less... */
+#define DMEM_END	(DMEM_BASE_ADDR + DMEM_SIZE)
+
+#define PMEM_BASE_ADDR	0x00010000
+#define PMEM_SIZE	0x8000	/* TMU has less... */
+#define PMEM_END	(PMEM_BASE_ADDR + PMEM_SIZE)
+
+/* These check memory ranges from PE point of view/memory map */
+#define IS_DMEM(addr, len)				\
+	({ typeof(addr) addr_ = (addr);			\
+	((unsigned long)(addr_) >= DMEM_BASE_ADDR) &&	\
+	(((unsigned long)(addr_) + (len)) <= DMEM_END); })
+
+#define IS_PMEM(addr, len)				\
+	({ typeof(addr) addr_ = (addr);			\
+	((unsigned long)(addr_) >= PMEM_BASE_ADDR) &&	\
+	(((unsigned long)(addr_) + (len)) <= PMEM_END); })
+
+#define IS_PE_LMEM(addr, len)				\
+	({ typeof(addr) addr_ = (addr);			\
+	((unsigned long)(addr_) >=			\
+	PE_LMEM_BASE_ADDR) &&				\
+	(((unsigned long)(addr_) +			\
+	(len)) <= PE_LMEM_END); })
+
+#define IS_PFE_LMEM(addr, len)				\
+	({ typeof(addr) addr_ = (addr);			\
+	((unsigned long)(addr_) >=			\
+	CBUS_VIRT_TO_PFE(LMEM_BASE_ADDR)) &&		\
+	(((unsigned long)(addr_) + (len)) <=		\
+	CBUS_VIRT_TO_PFE(LMEM_END)); })
+
+#define __IS_PHYS_DDR(addr, len)			\
+	({ typeof(addr) addr_ = (addr);			\
+	((unsigned long)(addr_) >=			\
+	DDR_PHYS_BASE_ADDR) &&				\
+	(((unsigned long)(addr_) + (len)) <=		\
+	DDR_PHYS_END); })
+
+#define IS_PHYS_DDR(addr, len)	__IS_PHYS_DDR(DDR_PFE_TO_PHYS(addr), len)
+
+/*
+ * If using a run-time virtual address for the cbus base address use this code
+ */
+extern void *cbus_base_addr;
+extern void *ddr_base_addr;
+extern unsigned long ddr_phys_base_addr;
+extern unsigned int ddr_size;
+
+#define CBUS_BASE_ADDR	cbus_base_addr
+#define DDR_PHYS_BASE_ADDR	ddr_phys_base_addr
+#define DDR_BASE_ADDR	ddr_base_addr
+#define DDR_SIZE	ddr_size
+
+#define DDR_PHYS_END	(DDR_PHYS_BASE_ADDR + DDR_SIZE)
+
+#define LS1012A_PFE_RESET_WA	/*
+				 * PFE doesn't have global reset and re-init
+				 * should takecare few things to make PFE
+				 * functional after reset
+				 */
+#define PFE_CBUS_PHYS_BASE_ADDR	0xc0000000	/* CBUS physical base address
+						 * as seen by PE's.
+						 */
+/* CBUS physical base address as seen by PE's. */
+#define PFE_CBUS_PHYS_BASE_ADDR_FROM_PFE	0xc0000000
+
+#define DDR_PHYS_TO_PFE(p)	(((unsigned long int)(p)) & 0x7FFFFFFF)
+#define DDR_PFE_TO_PHYS(p)	(((unsigned long int)(p)) | 0x80000000)
+#define CBUS_PHYS_TO_PFE(p)	(((p) - PFE_CBUS_PHYS_BASE_ADDR) + \
+				PFE_CBUS_PHYS_BASE_ADDR_FROM_PFE)
+/* Translates to PFE address map */
+
+#define DDR_PHYS_TO_VIRT(p)	(((p) - DDR_PHYS_BASE_ADDR) + DDR_BASE_ADDR)
+#define DDR_VIRT_TO_PHYS(v)	(((v) - DDR_BASE_ADDR) + DDR_PHYS_BASE_ADDR)
+#define DDR_VIRT_TO_PFE(p)	(DDR_PHYS_TO_PFE(DDR_VIRT_TO_PHYS(p)))
+
+#define CBUS_VIRT_TO_PFE(v)	(((v) - CBUS_BASE_ADDR) + \
+				PFE_CBUS_PHYS_BASE_ADDR)
+#define CBUS_PFE_TO_VIRT(p)	(((unsigned long int)(p) - \
+				PFE_CBUS_PHYS_BASE_ADDR) + CBUS_BASE_ADDR)
+
+/* The below part of the code is used in QOS control driver from host */
+#define TMU_APB_BASE_ADDR       0xc1000000      /* TMU base address seen by
+						 * pe's
+						 */
+
+enum {
+	CLASS0_ID = 0,
+	CLASS1_ID,
+	CLASS2_ID,
+	CLASS3_ID,
+	CLASS4_ID,
+	CLASS5_ID,
+	TMU0_ID,
+	TMU1_ID,
+	TMU2_ID,
+	TMU3_ID,
+#if !defined(CONFIG_FSL_PPFE_UTIL_DISABLED)
+	UTIL_ID,
+#endif
+	MAX_PE
+};
+
+#define CLASS_MASK	(BIT(CLASS0_ID) | BIT(CLASS1_ID) |\
+			BIT(CLASS2_ID) | BIT(CLASS3_ID) |\
+			BIT(CLASS4_ID) | BIT(CLASS5_ID))
+#define CLASS_MAX_ID	CLASS5_ID
+
+#define TMU_MASK	(BIT(TMU0_ID) | BIT(TMU1_ID) |\
+			BIT(TMU3_ID))
+
+#define TMU_MAX_ID	TMU3_ID
+
+#if !defined(CONFIG_FSL_PPFE_UTIL_DISABLED)
+#define UTIL_MASK	BIT(UTIL_ID)
+#endif
+
+struct pe_status {
+	u32	cpu_state;
+	u32	activity_counter;
+	u32	rx;
+	union {
+	u32	tx;
+	u32	tmu_qstatus;
+	};
+	u32	drop;
+#if defined(CFG_PE_DEBUG)
+	u32	debug_indicator;
+	u32	debug[16];
+#endif
+} __aligned(16);
+
+struct pe_sync_mailbox {
+	u32 stop;
+	u32 stopped;
+};
+
+/* Drop counter definitions */
+
+#define	CLASS_NUM_DROP_COUNTERS	13
+#define	UTIL_NUM_DROP_COUNTERS	8
+
+/* PE information.
+ * Structure containing PE's specific information. It is used to create
+ * generic C functions common to all PE's.
+ * Before using the library functions this structure needs to be initialized
+ * with the different registers virtual addresses
+ * (according to the ARM MMU mmaping). The default initialization supports a
+ * virtual == physical mapping.
+ */
+struct pe_info {
+	u32 dmem_base_addr;	/* PE's dmem base address */
+	u32 pmem_base_addr;	/* PE's pmem base address */
+	u32 pmem_size;	/* PE's pmem size */
+
+	void *mem_access_wdata;	/* PE's _MEM_ACCESS_WDATA register
+				 * address
+				 */
+	void *mem_access_addr;	/* PE's _MEM_ACCESS_ADDR register
+				 * address
+				 */
+	void *mem_access_rdata;	/* PE's _MEM_ACCESS_RDATA register
+				 * address
+				 */
+};
+
+void pe_lmem_read(u32 *dst, u32 len, u32 offset);
+void pe_lmem_write(u32 *src, u32 len, u32 offset);
+
+void pe_dmem_memcpy_to32(int id, u32 dst, const void *src, unsigned int len);
+void pe_pmem_memcpy_to32(int id, u32 dst, const void *src, unsigned int len);
+
+u32 pe_pmem_read(int id, u32 addr, u8 size);
+
+void pe_dmem_write(int id, u32 val, u32 addr, u8 size);
+u32 pe_dmem_read(int id, u32 addr, u8 size);
+void class_pe_lmem_memcpy_to32(u32 dst, const void *src, unsigned int len);
+void class_pe_lmem_memset(u32 dst, int val, unsigned int len);
+void class_bus_write(u32 val, u32 addr, u8 size);
+u32 class_bus_read(u32 addr, u8 size);
+
+#define class_bus_readl(addr)	class_bus_read(addr, 4)
+#define class_bus_readw(addr)	class_bus_read(addr, 2)
+#define class_bus_readb(addr)	class_bus_read(addr, 1)
+
+#define class_bus_writel(val, addr)	class_bus_write(val, addr, 4)
+#define class_bus_writew(val, addr)	class_bus_write(val, addr, 2)
+#define class_bus_writeb(val, addr)	class_bus_write(val, addr, 1)
+
+#define pe_dmem_readl(id, addr)	pe_dmem_read(id, addr, 4)
+#define pe_dmem_readw(id, addr)	pe_dmem_read(id, addr, 2)
+#define pe_dmem_readb(id, addr)	pe_dmem_read(id, addr, 1)
+
+#define pe_dmem_writel(id, val, addr)	pe_dmem_write(id, val, addr, 4)
+#define pe_dmem_writew(id, val, addr)	pe_dmem_write(id, val, addr, 2)
+#define pe_dmem_writeb(id, val, addr)	pe_dmem_write(id, val, addr, 1)
+
+/*int pe_load_elf_section(int id, const void *data, elf32_shdr *shdr); */
+int pe_load_elf_section(int id, const void *data, struct elf32_shdr *shdr,
+			struct device *dev);
+
+void pfe_lib_init(void *cbus_base, void *ddr_base, unsigned long ddr_phys_base,
+		  unsigned int ddr_size);
+void bmu_init(void *base, struct BMU_CFG *cfg);
+void bmu_reset(void *base);
+void bmu_enable(void *base);
+void bmu_disable(void *base);
+void bmu_set_config(void *base, struct BMU_CFG *cfg);
+
+/*
+ * An enumerated type for loopback values.  This can be one of three values, no
+ * loopback -normal operation, local loopback with internal loopback module of
+ * MAC or PHY loopback which is through the external PHY.
+ */
+#ifndef __MAC_LOOP_ENUM__
+#define __MAC_LOOP_ENUM__
+enum mac_loop {LB_NONE, LB_EXT, LB_LOCAL};
+#endif
+
+void gemac_init(void *base, void *config);
+void gemac_disable_rx_checksum_offload(void *base);
+void gemac_enable_rx_checksum_offload(void *base);
+void gemac_set_mdc_div(void *base, int mdc_div);
+void gemac_set_speed(void *base, enum mac_speed gem_speed);
+void gemac_set_duplex(void *base, int duplex);
+void gemac_set_mode(void *base, int mode);
+void gemac_enable(void *base);
+void gemac_tx_disable(void *base);
+void gemac_tx_enable(void *base);
+void gemac_disable(void *base);
+void gemac_reset(void *base);
+void gemac_set_address(void *base, struct spec_addr *addr);
+struct spec_addr gemac_get_address(void *base);
+void gemac_set_loop(void *base, enum mac_loop gem_loop);
+void gemac_set_laddr1(void *base, struct pfe_mac_addr *address);
+void gemac_set_laddr2(void *base, struct pfe_mac_addr *address);
+void gemac_set_laddr3(void *base, struct pfe_mac_addr *address);
+void gemac_set_laddr4(void *base, struct pfe_mac_addr *address);
+void gemac_set_laddrN(void *base, struct pfe_mac_addr *address,
+		      unsigned int entry_index);
+void gemac_clear_laddr1(void *base);
+void gemac_clear_laddr2(void *base);
+void gemac_clear_laddr3(void *base);
+void gemac_clear_laddr4(void *base);
+void gemac_clear_laddrN(void *base, unsigned int entry_index);
+struct pfe_mac_addr gemac_get_hash(void *base);
+void gemac_set_hash(void *base, struct pfe_mac_addr *hash);
+struct pfe_mac_addr gem_get_laddr1(void *base);
+struct pfe_mac_addr gem_get_laddr2(void *base);
+struct pfe_mac_addr gem_get_laddr3(void *base);
+struct pfe_mac_addr gem_get_laddr4(void *base);
+struct pfe_mac_addr gem_get_laddrN(void *base, unsigned int entry_index);
+void gemac_set_config(void *base, struct gemac_cfg *cfg);
+void gemac_allow_broadcast(void *base);
+void gemac_no_broadcast(void *base);
+void gemac_enable_1536_rx(void *base);
+void gemac_disable_1536_rx(void *base);
+void gemac_enable_rx_jmb(void *base);
+void gemac_disable_rx_jmb(void *base);
+void gemac_enable_stacked_vlan(void *base);
+void gemac_disable_stacked_vlan(void *base);
+void gemac_enable_pause_rx(void *base);
+void gemac_disable_pause_rx(void *base);
+void gemac_enable_copy_all(void *base);
+void gemac_disable_copy_all(void *base);
+void gemac_set_bus_width(void *base, int width);
+void gemac_set_wol(void *base, u32 wol_conf);
+
+void gpi_init(void *base, struct gpi_cfg *cfg);
+void gpi_reset(void *base);
+void gpi_enable(void *base);
+void gpi_disable(void *base);
+void gpi_set_config(void *base, struct gpi_cfg *cfg);
+
+void class_init(struct class_cfg *cfg);
+void class_reset(void);
+void class_enable(void);
+void class_disable(void);
+void class_set_config(struct class_cfg *cfg);
+
+void tmu_reset(void);
+void tmu_init(struct tmu_cfg *cfg);
+void tmu_enable(u32 pe_mask);
+void tmu_disable(u32 pe_mask);
+u32  tmu_qstatus(u32 if_id);
+u32  tmu_pkts_processed(u32 if_id);
+
+void util_init(struct util_cfg *cfg);
+void util_reset(void);
+void util_enable(void);
+void util_disable(void);
+
+void hif_init(void);
+void hif_tx_enable(void);
+void hif_tx_disable(void);
+void hif_rx_enable(void);
+void hif_rx_disable(void);
+
+/* Get Chip Revision level
+ *
+ */
+static inline unsigned int CHIP_REVISION(void)
+{
+	/*For LS1012A return always 1 */
+	return 1;
+}
+
+/* Start HIF rx DMA
+ *
+ */
+static inline void hif_rx_dma_start(void)
+{
+	writel(HIF_CTRL_DMA_EN | HIF_CTRL_BDP_CH_START_WSTB, HIF_RX_CTRL);
+}
+
+/* Start HIF tx DMA
+ *
+ */
+static inline void hif_tx_dma_start(void)
+{
+	writel(HIF_CTRL_DMA_EN | HIF_CTRL_BDP_CH_START_WSTB, HIF_TX_CTRL);
+}
+
+#endif /* _PFE_H_ */
diff --git a/drivers/staging/fsl_ppfe/pfe_ctrl.c b/drivers/staging/fsl_ppfe/pfe_ctrl.c
new file mode 100644
index 0000000..dfa8547
--- /dev/null
+++ b/drivers/staging/fsl_ppfe/pfe_ctrl.c
@@ -0,0 +1,238 @@
+/*
+ * Copyright 2015-2016 Freescale Semiconductor, Inc.
+ * Copyright 2017 NXP
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program.  If not, see <http://www.gnu.org/licenses/>.
+ */
+
+#include <linux/kernel.h>
+#include <linux/sched.h>
+#include <linux/module.h>
+#include <linux/list.h>
+#include <linux/kthread.h>
+
+#include "pfe_mod.h"
+#include "pfe_ctrl.h"
+
+#define TIMEOUT_MS	1000
+
+int relax(unsigned long end)
+{
+	if (time_after(jiffies, end)) {
+		if (time_after(jiffies, end + (TIMEOUT_MS * HZ) / 1000))
+			return -1;
+
+		if (need_resched())
+			schedule();
+	}
+
+	return 0;
+}
+
+void pfe_ctrl_suspend(struct pfe_ctrl *ctrl)
+{
+	int id;
+
+	mutex_lock(&ctrl->mutex);
+
+	for (id = CLASS0_ID; id <= CLASS_MAX_ID; id++)
+		pe_dmem_write(id, cpu_to_be32(0x1), CLASS_DM_RESUME, 4);
+
+	for (id = TMU0_ID; id <= TMU_MAX_ID; id++) {
+		if (id == TMU2_ID)
+			continue;
+		pe_dmem_write(id, cpu_to_be32(0x1), TMU_DM_RESUME, 4);
+	}
+
+#if !defined(CONFIG_FSL_PPFE_UTIL_DISABLED)
+	pe_dmem_write(UTIL_ID, cpu_to_be32(0x1), UTIL_DM_RESUME, 4);
+#endif
+	mutex_unlock(&ctrl->mutex);
+}
+
+void pfe_ctrl_resume(struct pfe_ctrl *ctrl)
+{
+	int pe_mask = CLASS_MASK | TMU_MASK;
+
+#if !defined(CONFIG_FSL_PPFE_UTIL_DISABLED)
+	pe_mask |= UTIL_MASK;
+#endif
+	mutex_lock(&ctrl->mutex);
+	pe_start(&pfe->ctrl, pe_mask);
+	mutex_unlock(&ctrl->mutex);
+}
+
+/* PE sync stop.
+ * Stops packet processing for a list of PE's (specified using a bitmask).
+ * The caller must hold ctrl->mutex.
+ *
+ * @param ctrl		Control context
+ * @param pe_mask	Mask of PE id's to stop
+ *
+ */
+int pe_sync_stop(struct pfe_ctrl *ctrl, int pe_mask)
+{
+	struct pe_sync_mailbox *mbox;
+	int pe_stopped = 0;
+	unsigned long end = jiffies + 2;
+	int i;
+
+	pe_mask &= 0x2FF;  /*Exclude Util + TMU2 */
+
+	for (i = 0; i < MAX_PE; i++)
+		if (pe_mask & (1 << i)) {
+			mbox = (void *)ctrl->sync_mailbox_baseaddr[i];
+
+			pe_dmem_write(i, cpu_to_be32(0x1), (unsigned
+					long)&mbox->stop, 4);
+		}
+
+	while (pe_stopped != pe_mask) {
+		for (i = 0; i < MAX_PE; i++)
+			if ((pe_mask & (1 << i)) && !(pe_stopped & (1 << i))) {
+				mbox = (void *)ctrl->sync_mailbox_baseaddr[i];
+
+				if (pe_dmem_read(i, (unsigned
+					long)&mbox->stopped, 4) &
+					cpu_to_be32(0x1))
+					pe_stopped |= (1 << i);
+			}
+
+		if (relax(end) < 0)
+			goto err;
+	}
+
+	return 0;
+
+err:
+	pr_err("%s: timeout, %x %x\n", __func__, pe_mask, pe_stopped);
+
+	for (i = 0; i < MAX_PE; i++)
+		if (pe_mask & (1 << i)) {
+			mbox = (void *)ctrl->sync_mailbox_baseaddr[i];
+
+			pe_dmem_write(i, cpu_to_be32(0x0), (unsigned
+					long)&mbox->stop, 4);
+	}
+
+	return -EIO;
+}
+
+/* PE start.
+ * Starts packet processing for a list of PE's (specified using a bitmask).
+ * The caller must hold ctrl->mutex.
+ *
+ * @param ctrl		Control context
+ * @param pe_mask	Mask of PE id's to start
+ *
+ */
+void pe_start(struct pfe_ctrl *ctrl, int pe_mask)
+{
+	struct pe_sync_mailbox *mbox;
+	int i;
+
+	for (i = 0; i < MAX_PE; i++)
+		if (pe_mask & (1 << i)) {
+			mbox = (void *)ctrl->sync_mailbox_baseaddr[i];
+
+			pe_dmem_write(i, cpu_to_be32(0x0), (unsigned
+					long)&mbox->stop, 4);
+		}
+}
+
+/* This function will ensure all PEs are put in to idle state */
+int pe_reset_all(struct pfe_ctrl *ctrl)
+{
+	struct pe_sync_mailbox *mbox;
+	int pe_stopped = 0;
+	unsigned long end = jiffies + 2;
+	int i;
+	int pe_mask  = CLASS_MASK | TMU_MASK;
+
+#if !defined(CONFIG_FSL_PPFE_UTIL_DISABLED)
+	pe_mask |= UTIL_MASK;
+#endif
+
+	for (i = 0; i < MAX_PE; i++)
+		if (pe_mask & (1 << i)) {
+			mbox = (void *)ctrl->sync_mailbox_baseaddr[i];
+
+			pe_dmem_write(i, cpu_to_be32(0x2), (unsigned
+					long)&mbox->stop, 4);
+		}
+
+	while (pe_stopped != pe_mask) {
+		for (i = 0; i < MAX_PE; i++)
+			if ((pe_mask & (1 << i)) && !(pe_stopped & (1 << i))) {
+				mbox = (void *)ctrl->sync_mailbox_baseaddr[i];
+
+				if (pe_dmem_read(i, (unsigned long)
+							&mbox->stopped, 4) &
+						cpu_to_be32(0x1))
+					pe_stopped |= (1 << i);
+			}
+
+		if (relax(end) < 0)
+			goto err;
+	}
+
+	return 0;
+
+err:
+	pr_err("%s: timeout, %x %x\n", __func__, pe_mask, pe_stopped);
+	return -EIO;
+}
+
+int pfe_ctrl_init(struct pfe *pfe)
+{
+	struct pfe_ctrl *ctrl = &pfe->ctrl;
+	int id;
+
+	pr_info("%s\n", __func__);
+
+	mutex_init(&ctrl->mutex);
+	spin_lock_init(&ctrl->lock);
+
+	for (id = CLASS0_ID; id <= CLASS_MAX_ID; id++) {
+		ctrl->sync_mailbox_baseaddr[id] = CLASS_DM_SYNC_MBOX;
+		ctrl->msg_mailbox_baseaddr[id] = CLASS_DM_MSG_MBOX;
+	}
+
+	for (id = TMU0_ID; id <= TMU_MAX_ID; id++) {
+		if (id == TMU2_ID)
+			continue;
+		ctrl->sync_mailbox_baseaddr[id] = TMU_DM_SYNC_MBOX;
+		ctrl->msg_mailbox_baseaddr[id] = TMU_DM_MSG_MBOX;
+	}
+
+#if !defined(CONFIG_FSL_PPFE_UTIL_DISABLED)
+	ctrl->sync_mailbox_baseaddr[UTIL_ID] = UTIL_DM_SYNC_MBOX;
+	ctrl->msg_mailbox_baseaddr[UTIL_ID] = UTIL_DM_MSG_MBOX;
+#endif
+
+	ctrl->hash_array_baseaddr = pfe->ddr_baseaddr + ROUTE_TABLE_BASEADDR;
+	ctrl->hash_array_phys_baseaddr = pfe->ddr_phys_baseaddr +
+						ROUTE_TABLE_BASEADDR;
+
+	ctrl->dev = pfe->dev;
+
+	pr_info("%s finished\n", __func__);
+
+	return 0;
+}
+
+void pfe_ctrl_exit(struct pfe *pfe)
+{
+	pr_info("%s\n", __func__);
+}
diff --git a/drivers/staging/fsl_ppfe/pfe_ctrl.h b/drivers/staging/fsl_ppfe/pfe_ctrl.h
new file mode 100644
index 0000000..22115c7
--- /dev/null
+++ b/drivers/staging/fsl_ppfe/pfe_ctrl.h
@@ -0,0 +1,112 @@
+/*
+ * Copyright 2015-2016 Freescale Semiconductor, Inc.
+ * Copyright 2017 NXP
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program.  If not, see <http://www.gnu.org/licenses/>.
+ */
+
+#ifndef _PFE_CTRL_H_
+#define _PFE_CTRL_H_
+
+#include <linux/dmapool.h>
+
+#include "pfe_mod.h"
+#include "pfe/pfe.h"
+
+#define DMA_BUF_SIZE_128	0x80	/* enough for 1 conntracks */
+#define DMA_BUF_SIZE_256	0x100
+/* enough for 2 conntracks, 1 bridge entry or 1 multicast entry */
+#define DMA_BUF_SIZE_512	0x200
+/* 512bytes dma allocated buffers used by rtp relay feature */
+#define DMA_BUF_MIN_ALIGNMENT	8
+#define DMA_BUF_BOUNDARY	(4 * 1024)
+/* bursts can not cross 4k boundary */
+
+#define CMD_TX_ENABLE	0x0501
+#define CMD_TX_DISABLE	0x0502
+
+#define CMD_RX_LRO		0x0011
+#define CMD_PKTCAP_ENABLE       0x0d01
+#define CMD_QM_EXPT_RATE	0x020c
+
+#define CLASS_DM_SH_STATIC		(0x800)
+#define CLASS_DM_CPU_TICKS		(CLASS_DM_SH_STATIC)
+#define CLASS_DM_SYNC_MBOX		(0x808)
+#define CLASS_DM_MSG_MBOX		(0x810)
+#define CLASS_DM_DROP_CNTR		(0x820)
+#define CLASS_DM_RESUME			(0x854)
+#define CLASS_DM_PESTATUS		(0x860)
+
+#define TMU_DM_SH_STATIC		(0x80)
+#define TMU_DM_CPU_TICKS		(TMU_DM_SH_STATIC)
+#define TMU_DM_SYNC_MBOX		(0x88)
+#define TMU_DM_MSG_MBOX			(0x90)
+#define TMU_DM_RESUME			(0xA0)
+#define TMU_DM_PESTATUS			(0xB0)
+#define TMU_DM_CONTEXT			(0x300)
+#define TMU_DM_TX_TRANS			(0x480)
+
+#define UTIL_DM_SH_STATIC		(0x0)
+#define UTIL_DM_CPU_TICKS		(UTIL_DM_SH_STATIC)
+#define UTIL_DM_SYNC_MBOX		(0x8)
+#define UTIL_DM_MSG_MBOX		(0x10)
+#define UTIL_DM_DROP_CNTR		(0x20)
+#define UTIL_DM_RESUME			(0x40)
+#define UTIL_DM_PESTATUS		(0x50)
+
+struct pfe_ctrl {
+	struct mutex mutex; /* to serialize pfe control access */
+	spinlock_t lock;
+
+	void *dma_pool;
+	void *dma_pool_512;
+	void *dma_pool_128;
+
+	struct device *dev;
+
+	void *hash_array_baseaddr;		/*
+						 * Virtual base address of
+						 * the conntrack hash array
+						 */
+	unsigned long hash_array_phys_baseaddr; /*
+						 * Physical base address of
+						 * the conntrack hash array
+						 */
+
+	int (*event_cb)(u16, u16, u16*);
+
+	unsigned long sync_mailbox_baseaddr[MAX_PE]; /*
+						      * Sync mailbox PFE
+						      * internal address,
+						      * initialized
+						      * when parsing elf images
+						      */
+	unsigned long msg_mailbox_baseaddr[MAX_PE]; /*
+						     * Msg mailbox PFE internal
+						     * address, initialized
+						     * when parsing elf images
+						     */
+	unsigned int sys_clk;			/* AXI clock value, in KHz */
+};
+
+int pfe_ctrl_init(struct pfe *pfe);
+void pfe_ctrl_exit(struct pfe *pfe);
+int pe_sync_stop(struct pfe_ctrl *ctrl, int pe_mask);
+void pe_start(struct pfe_ctrl *ctrl, int pe_mask);
+int pe_reset_all(struct pfe_ctrl *ctrl);
+void pfe_ctrl_suspend(struct pfe_ctrl *ctrl);
+void pfe_ctrl_resume(struct pfe_ctrl *ctrl);
+int relax(unsigned long end);
+
+#endif /* _PFE_CTRL_H_ */
diff --git a/drivers/staging/fsl_ppfe/pfe_debugfs.c b/drivers/staging/fsl_ppfe/pfe_debugfs.c
new file mode 100644
index 0000000..4156610
--- /dev/null
+++ b/drivers/staging/fsl_ppfe/pfe_debugfs.c
@@ -0,0 +1,111 @@
+/*
+ * Copyright 2015-2016 Freescale Semiconductor, Inc.
+ * Copyright 2017 NXP
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program.  If not, see <http://www.gnu.org/licenses/>.
+ */
+
+#include <linux/module.h>
+#include <linux/debugfs.h>
+#include <linux/platform_device.h>
+
+#include "pfe_mod.h"
+
+static int dmem_show(struct seq_file *s, void *unused)
+{
+	u32 dmem_addr, val;
+	int id = (long int)s->private;
+	int i;
+
+	for (dmem_addr = 0; dmem_addr < CLASS_DMEM_SIZE; dmem_addr += 8 * 4) {
+		seq_printf(s, "%04x:", dmem_addr);
+
+		for (i = 0; i < 8; i++) {
+			val = pe_dmem_read(id, dmem_addr + i * 4, 4);
+			seq_printf(s, " %02x %02x %02x %02x", val & 0xff,
+				   (val >> 8) & 0xff, (val >> 16) & 0xff,
+				   (val >> 24) & 0xff);
+		}
+
+		seq_puts(s, "\n");
+	}
+
+	return 0;
+}
+
+static int dmem_open(struct inode *inode, struct file *file)
+{
+	return single_open(file, dmem_show, inode->i_private);
+}
+
+static const struct file_operations dmem_fops = {
+	.open		= dmem_open,
+	.read		= seq_read,
+	.llseek		= seq_lseek,
+	.release	= single_release,
+};
+
+int pfe_debugfs_init(struct pfe *pfe)
+{
+	struct dentry *d;
+
+	pr_info("%s\n", __func__);
+
+	pfe->dentry = debugfs_create_dir("pfe", NULL);
+	if (IS_ERR_OR_NULL(pfe->dentry))
+		goto err_dir;
+
+	d = debugfs_create_file("pe0_dmem", 0444, pfe->dentry, (void *)0,
+				&dmem_fops);
+	if (IS_ERR_OR_NULL(d))
+		goto err_pe;
+
+	d = debugfs_create_file("pe1_dmem", 0444, pfe->dentry, (void *)1,
+				&dmem_fops);
+	if (IS_ERR_OR_NULL(d))
+		goto err_pe;
+
+	d = debugfs_create_file("pe2_dmem", 0444, pfe->dentry, (void *)2,
+				&dmem_fops);
+	if (IS_ERR_OR_NULL(d))
+		goto err_pe;
+
+	d = debugfs_create_file("pe3_dmem", 0444, pfe->dentry, (void *)3,
+				&dmem_fops);
+	if (IS_ERR_OR_NULL(d))
+		goto err_pe;
+
+	d = debugfs_create_file("pe4_dmem", 0444, pfe->dentry, (void *)4,
+				&dmem_fops);
+	if (IS_ERR_OR_NULL(d))
+		goto err_pe;
+
+	d = debugfs_create_file("pe5_dmem", 0444, pfe->dentry, (void *)5,
+				&dmem_fops);
+	if (IS_ERR_OR_NULL(d))
+		goto err_pe;
+
+	return 0;
+
+err_pe:
+	debugfs_remove_recursive(pfe->dentry);
+
+err_dir:
+	return -1;
+}
+
+void pfe_debugfs_exit(struct pfe *pfe)
+{
+	debugfs_remove_recursive(pfe->dentry);
+}
diff --git a/drivers/staging/fsl_ppfe/pfe_debugfs.h b/drivers/staging/fsl_ppfe/pfe_debugfs.h
new file mode 100644
index 0000000..301d9fc
--- /dev/null
+++ b/drivers/staging/fsl_ppfe/pfe_debugfs.h
@@ -0,0 +1,25 @@
+/*
+ * Copyright 2015-2016 Freescale Semiconductor, Inc.
+ * Copyright 2017 NXP
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program.  If not, see <http://www.gnu.org/licenses/>.
+ */
+
+#ifndef _PFE_DEBUGFS_H_
+#define _PFE_DEBUGFS_H_
+
+int pfe_debugfs_init(struct pfe *pfe);
+void pfe_debugfs_exit(struct pfe *pfe);
+
+#endif /* _PFE_DEBUGFS_H_ */
diff --git a/drivers/staging/fsl_ppfe/pfe_eth.c b/drivers/staging/fsl_ppfe/pfe_eth.c
new file mode 100644
index 0000000..7d4493f
--- /dev/null
+++ b/drivers/staging/fsl_ppfe/pfe_eth.c
@@ -0,0 +1,2474 @@
+/*
+ * Copyright 2015-2016 Freescale Semiconductor, Inc.
+ * Copyright 2017 NXP
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program.  If not, see <http://www.gnu.org/licenses/>.
+ */
+
+/* @pfe_eth.c.
+ *  Ethernet driver for to handle exception path for PFE.
+ *  - uses HIF functions to send/receive packets.
+ *  - uses ctrl function to start/stop interfaces.
+ *  - uses direct register accesses to control phy operation.
+ */
+#include <linux/version.h>
+#include <linux/kernel.h>
+#include <linux/interrupt.h>
+#include <linux/dma-mapping.h>
+#include <linux/dmapool.h>
+#include <linux/netdevice.h>
+#include <linux/etherdevice.h>
+#include <linux/ethtool.h>
+#include <linux/mii.h>
+#include <linux/phy.h>
+#include <linux/timer.h>
+#include <linux/hrtimer.h>
+#include <linux/platform_device.h>
+
+#include <net/ip.h>
+#include <net/sock.h>
+
+#include <linux/io.h>
+#include <asm/irq.h>
+#include <linux/delay.h>
+#include <linux/regmap.h>
+#include <linux/i2c.h>
+
+#if defined(CONFIG_NF_CONNTRACK_MARK)
+#include <net/netfilter/nf_conntrack.h>
+#endif
+
+#include "pfe_mod.h"
+#include "pfe_eth.h"
+
+static void *cbus_emac_base[3];
+static void *cbus_gpi_base[3];
+
+/* Forward Declaration */
+static void pfe_eth_exit_one(struct pfe_eth_priv_s *priv);
+static void pfe_eth_flush_tx(struct pfe_eth_priv_s *priv);
+static void pfe_eth_flush_txQ(struct pfe_eth_priv_s *priv, int tx_q_num, int
+				from_tx, int n_desc);
+
+unsigned int gemac_regs[] = {
+	0x0004, /* Interrupt event */
+	0x0008, /* Interrupt mask */
+	0x0024, /* Ethernet control */
+	0x0064, /* MIB Control/Status */
+	0x0084, /* Receive control/status */
+	0x00C4, /* Transmit control */
+	0x00E4, /* Physical address low */
+	0x00E8, /* Physical address high */
+	0x0144, /* Transmit FIFO Watermark and Store and Forward Control*/
+	0x0190, /* Receive FIFO Section Full Threshold */
+	0x01A0, /* Transmit FIFO Section Empty Threshold */
+	0x01B0, /* Frame Truncation Length */
+};
+
+/********************************************************************/
+/*                   SYSFS INTERFACE				    */
+/********************************************************************/
+
+#ifdef PFE_ETH_NAPI_STATS
+/*
+ * pfe_eth_show_napi_stats
+ */
+static ssize_t pfe_eth_show_napi_stats(struct device *dev,
+				       struct device_attribute *attr,
+				       char *buf)
+{
+	struct pfe_eth_priv_s *priv = netdev_priv(to_net_dev(dev));
+	ssize_t len = 0;
+
+	len += sprintf(buf + len, "sched:  %u\n",
+			priv->napi_counters[NAPI_SCHED_COUNT]);
+	len += sprintf(buf + len, "poll:   %u\n",
+			priv->napi_counters[NAPI_POLL_COUNT]);
+	len += sprintf(buf + len, "packet: %u\n",
+			priv->napi_counters[NAPI_PACKET_COUNT]);
+	len += sprintf(buf + len, "budget: %u\n",
+			priv->napi_counters[NAPI_FULL_BUDGET_COUNT]);
+	len += sprintf(buf + len, "desc:   %u\n",
+			priv->napi_counters[NAPI_DESC_COUNT]);
+
+	return len;
+}
+
+/*
+ * pfe_eth_set_napi_stats
+ */
+static ssize_t pfe_eth_set_napi_stats(struct device *dev,
+				      struct device_attribute *attr,
+				      const char *buf, size_t count)
+{
+	struct pfe_eth_priv_s *priv = netdev_priv(to_net_dev(dev));
+
+	memset(priv->napi_counters, 0, sizeof(priv->napi_counters));
+
+	return count;
+}
+#endif
+#ifdef PFE_ETH_TX_STATS
+/* pfe_eth_show_tx_stats
+ *
+ */
+static ssize_t pfe_eth_show_tx_stats(struct device *dev,
+				     struct device_attribute *attr,
+				     char *buf)
+{
+	struct pfe_eth_priv_s *priv = netdev_priv(to_net_dev(dev));
+	ssize_t len = 0;
+	int i;
+
+	len += sprintf(buf + len, "TX queues stats:\n");
+
+	for (i = 0; i < emac_txq_cnt; i++) {
+		struct netdev_queue *tx_queue = netdev_get_tx_queue(priv->ndev,
+									i);
+
+		len += sprintf(buf + len, "\n");
+		__netif_tx_lock_bh(tx_queue);
+
+		hif_tx_lock(&pfe->hif);
+		len += sprintf(buf + len,
+				"Queue %2d :  credits               = %10d\n"
+				, i, hif_lib_tx_credit_avail(pfe, priv->id, i));
+		len += sprintf(buf + len,
+				 "            tx packets            = %10d\n"
+				,  pfe->tmu_credit.tx_packets[priv->id][i]);
+		hif_tx_unlock(&pfe->hif);
+
+		/* Don't output additionnal stats if queue never used */
+		if (!pfe->tmu_credit.tx_packets[priv->id][i])
+			goto skip;
+
+		len += sprintf(buf + len,
+				 "            clean_fail            = %10d\n"
+				, priv->clean_fail[i]);
+		len += sprintf(buf + len,
+				 "            stop_queue            = %10d\n"
+				, priv->stop_queue_total[i]);
+		len += sprintf(buf + len,
+				 "            stop_queue_hif        = %10d\n"
+				, priv->stop_queue_hif[i]);
+		len += sprintf(buf + len,
+				"            stop_queue_hif_client = %10d\n"
+				, priv->stop_queue_hif_client[i]);
+		len += sprintf(buf + len,
+				 "            stop_queue_credit     = %10d\n"
+				, priv->stop_queue_credit[i]);
+skip:
+		__netif_tx_unlock_bh(tx_queue);
+	}
+	return len;
+}
+
+/* pfe_eth_set_tx_stats
+ *
+ */
+static ssize_t pfe_eth_set_tx_stats(struct device *dev,
+				    struct device_attribute *attr,
+				    const char *buf, size_t count)
+{
+	struct pfe_eth_priv_s *priv = netdev_priv(to_net_dev(dev));
+	int i;
+
+	for (i = 0; i < emac_txq_cnt; i++) {
+		struct netdev_queue *tx_queue = netdev_get_tx_queue(priv->ndev,
+									i);
+
+		__netif_tx_lock_bh(tx_queue);
+		priv->clean_fail[i] = 0;
+		priv->stop_queue_total[i] = 0;
+		priv->stop_queue_hif[i] = 0;
+		priv->stop_queue_hif_client[i] = 0;
+		priv->stop_queue_credit[i] = 0;
+		__netif_tx_unlock_bh(tx_queue);
+	}
+
+	return count;
+}
+#endif
+/* pfe_eth_show_txavail
+ *
+ */
+static ssize_t pfe_eth_show_txavail(struct device *dev,
+				    struct device_attribute *attr,
+				    char *buf)
+{
+	struct pfe_eth_priv_s *priv = netdev_priv(to_net_dev(dev));
+	ssize_t len = 0;
+	int i;
+
+	for (i = 0; i < emac_txq_cnt; i++) {
+		struct netdev_queue *tx_queue = netdev_get_tx_queue(priv->ndev,
+									i);
+
+		__netif_tx_lock_bh(tx_queue);
+
+		len += sprintf(buf + len, "%d",
+				hif_lib_tx_avail(&priv->client, i));
+
+		__netif_tx_unlock_bh(tx_queue);
+
+		if (i == (emac_txq_cnt - 1))
+			len += sprintf(buf + len, "\n");
+		else
+			len += sprintf(buf + len, " ");
+	}
+
+	return len;
+}
+
+/* pfe_eth_show_default_priority
+ *
+ */
+static ssize_t pfe_eth_show_default_priority(struct device *dev,
+					     struct device_attribute *attr,
+						char *buf)
+{
+	struct pfe_eth_priv_s *priv = netdev_priv(to_net_dev(dev));
+	unsigned long flags;
+	int rc;
+
+	spin_lock_irqsave(&priv->lock, flags);
+	rc = sprintf(buf, "%d\n", priv->default_priority);
+	spin_unlock_irqrestore(&priv->lock, flags);
+
+	return rc;
+}
+
+/* pfe_eth_set_default_priority
+ *
+ */
+
+static ssize_t pfe_eth_set_default_priority(struct device *dev,
+					    struct device_attribute *attr,
+					    const char *buf, size_t count)
+{
+	struct pfe_eth_priv_s *priv = netdev_priv(to_net_dev(dev));
+	unsigned long flags;
+
+	spin_lock_irqsave(&priv->lock, flags);
+	priv->default_priority = kstrtoul(buf, 0, 0);
+	spin_unlock_irqrestore(&priv->lock, flags);
+
+	return count;
+}
+
+static DEVICE_ATTR(txavail, 0444, pfe_eth_show_txavail, NULL);
+static DEVICE_ATTR(default_priority, 0644, pfe_eth_show_default_priority,
+			pfe_eth_set_default_priority);
+
+#ifdef PFE_ETH_NAPI_STATS
+static DEVICE_ATTR(napi_stats, 0644, pfe_eth_show_napi_stats,
+			pfe_eth_set_napi_stats);
+#endif
+
+#ifdef PFE_ETH_TX_STATS
+static DEVICE_ATTR(tx_stats, 0644, pfe_eth_show_tx_stats,
+			pfe_eth_set_tx_stats);
+#endif
+
+/*
+ * pfe_eth_sysfs_init
+ *
+ */
+static int pfe_eth_sysfs_init(struct net_device *ndev)
+{
+	struct pfe_eth_priv_s *priv = netdev_priv(ndev);
+	int err;
+
+	/* Initialize the default values */
+
+	/*
+	 * By default, packets without conntrack will use this default low
+	 * priority queue
+	 */
+	priv->default_priority = 0;
+
+	/* Create our sysfs files */
+	err = device_create_file(&ndev->dev, &dev_attr_default_priority);
+	if (err) {
+		netdev_err(ndev,
+			   "failed to create default_priority sysfs files\n");
+		goto err_priority;
+	}
+
+	err = device_create_file(&ndev->dev, &dev_attr_txavail);
+	if (err) {
+		netdev_err(ndev,
+			   "failed to create default_priority sysfs files\n");
+		goto err_txavail;
+	}
+
+#ifdef PFE_ETH_NAPI_STATS
+	err = device_create_file(&ndev->dev, &dev_attr_napi_stats);
+	if (err) {
+		netdev_err(ndev, "failed to create napi stats sysfs files\n");
+		goto err_napi;
+	}
+#endif
+
+#ifdef PFE_ETH_TX_STATS
+	err = device_create_file(&ndev->dev, &dev_attr_tx_stats);
+	if (err) {
+		netdev_err(ndev, "failed to create tx stats sysfs files\n");
+		goto err_tx;
+	}
+#endif
+
+	return 0;
+
+#ifdef PFE_ETH_TX_STATS
+err_tx:
+#endif
+#ifdef PFE_ETH_NAPI_STATS
+	device_remove_file(&ndev->dev, &dev_attr_napi_stats);
+
+err_napi:
+#endif
+	device_remove_file(&ndev->dev, &dev_attr_txavail);
+
+err_txavail:
+	device_remove_file(&ndev->dev, &dev_attr_default_priority);
+
+err_priority:
+	return -1;
+}
+
+/* pfe_eth_sysfs_exit
+ *
+ */
+void pfe_eth_sysfs_exit(struct net_device *ndev)
+{
+#ifdef PFE_ETH_TX_STATS
+	device_remove_file(&ndev->dev, &dev_attr_tx_stats);
+#endif
+
+#ifdef PFE_ETH_NAPI_STATS
+	device_remove_file(&ndev->dev, &dev_attr_napi_stats);
+#endif
+	device_remove_file(&ndev->dev, &dev_attr_txavail);
+	device_remove_file(&ndev->dev, &dev_attr_default_priority);
+}
+
+/*************************************************************************/
+/*		ETHTOOL INTERCAE					 */
+/*************************************************************************/
+
+/*MTIP GEMAC */
+static const struct fec_stat {
+	char name[ETH_GSTRING_LEN];
+	u16 offset;
+} fec_stats[] = {
+	/* RMON TX */
+	{ "tx_dropped", RMON_T_DROP },
+	{ "tx_packets", RMON_T_PACKETS },
+	{ "tx_broadcast", RMON_T_BC_PKT },
+	{ "tx_multicast", RMON_T_MC_PKT },
+	{ "tx_crc_errors", RMON_T_CRC_ALIGN },
+	{ "tx_undersize", RMON_T_UNDERSIZE },
+	{ "tx_oversize", RMON_T_OVERSIZE },
+	{ "tx_fragment", RMON_T_FRAG },
+	{ "tx_jabber", RMON_T_JAB },
+	{ "tx_collision", RMON_T_COL },
+	{ "tx_64byte", RMON_T_P64 },
+	{ "tx_65to127byte", RMON_T_P65TO127 },
+	{ "tx_128to255byte", RMON_T_P128TO255 },
+	{ "tx_256to511byte", RMON_T_P256TO511 },
+	{ "tx_512to1023byte", RMON_T_P512TO1023 },
+	{ "tx_1024to2047byte", RMON_T_P1024TO2047 },
+	{ "tx_GTE2048byte", RMON_T_P_GTE2048 },
+	{ "tx_octets", RMON_T_OCTETS },
+
+	/* IEEE TX */
+	{ "IEEE_tx_drop", IEEE_T_DROP },
+	{ "IEEE_tx_frame_ok", IEEE_T_FRAME_OK },
+	{ "IEEE_tx_1col", IEEE_T_1COL },
+	{ "IEEE_tx_mcol", IEEE_T_MCOL },
+	{ "IEEE_tx_def", IEEE_T_DEF },
+	{ "IEEE_tx_lcol", IEEE_T_LCOL },
+	{ "IEEE_tx_excol", IEEE_T_EXCOL },
+	{ "IEEE_tx_macerr", IEEE_T_MACERR },
+	{ "IEEE_tx_cserr", IEEE_T_CSERR },
+	{ "IEEE_tx_sqe", IEEE_T_SQE },
+	{ "IEEE_tx_fdxfc", IEEE_T_FDXFC },
+	{ "IEEE_tx_octets_ok", IEEE_T_OCTETS_OK },
+
+	/* RMON RX */
+	{ "rx_packets", RMON_R_PACKETS },
+	{ "rx_broadcast", RMON_R_BC_PKT },
+	{ "rx_multicast", RMON_R_MC_PKT },
+	{ "rx_crc_errors", RMON_R_CRC_ALIGN },
+	{ "rx_undersize", RMON_R_UNDERSIZE },
+	{ "rx_oversize", RMON_R_OVERSIZE },
+	{ "rx_fragment", RMON_R_FRAG },
+	{ "rx_jabber", RMON_R_JAB },
+	{ "rx_64byte", RMON_R_P64 },
+	{ "rx_65to127byte", RMON_R_P65TO127 },
+	{ "rx_128to255byte", RMON_R_P128TO255 },
+	{ "rx_256to511byte", RMON_R_P256TO511 },
+	{ "rx_512to1023byte", RMON_R_P512TO1023 },
+	{ "rx_1024to2047byte", RMON_R_P1024TO2047 },
+	{ "rx_GTE2048byte", RMON_R_P_GTE2048 },
+	{ "rx_octets", RMON_R_OCTETS },
+
+	/* IEEE RX */
+	{ "IEEE_rx_drop", IEEE_R_DROP },
+	{ "IEEE_rx_frame_ok", IEEE_R_FRAME_OK },
+	{ "IEEE_rx_crc", IEEE_R_CRC },
+	{ "IEEE_rx_align", IEEE_R_ALIGN },
+	{ "IEEE_rx_macerr", IEEE_R_MACERR },
+	{ "IEEE_rx_fdxfc", IEEE_R_FDXFC },
+	{ "IEEE_rx_octets_ok", IEEE_R_OCTETS_OK },
+};
+
+static void pfe_eth_fill_stats(struct net_device *ndev, struct ethtool_stats
+				*stats, u64 *data)
+{
+	struct pfe_eth_priv_s *priv = netdev_priv(ndev);
+	int i;
+
+	for (i = 0; i < ARRAY_SIZE(fec_stats); i++)
+		data[i] = readl(priv->EMAC_baseaddr + fec_stats[i].offset);
+}
+
+static void pfe_eth_gstrings(struct net_device *netdev,
+			     u32 stringset, u8 *data)
+{
+	int i;
+
+	switch (stringset) {
+	case ETH_SS_STATS:
+		for (i = 0; i < ARRAY_SIZE(fec_stats); i++)
+			memcpy(data + i * ETH_GSTRING_LEN,
+			       fec_stats[i].name, ETH_GSTRING_LEN);
+		break;
+	}
+}
+
+static int pfe_eth_stats_count(struct net_device *ndev, int sset)
+{
+	switch (sset) {
+	case ETH_SS_STATS:
+		return ARRAY_SIZE(fec_stats);
+	default:
+		return -EOPNOTSUPP;
+	}
+}
+
+/*
+ * pfe_eth_gemac_reglen - Return the length of the register structure.
+ *
+ */
+static int pfe_eth_gemac_reglen(struct net_device *ndev)
+{
+	pr_info("%s()\n", __func__);
+	return (sizeof(gemac_regs) / sizeof(u32));
+}
+
+/*
+ * pfe_eth_gemac_get_regs - Return the gemac register structure.
+ *
+ */
+static void  pfe_eth_gemac_get_regs(struct net_device *ndev, struct ethtool_regs
+					*regs, void *regbuf)
+{
+	int i;
+
+	struct pfe_eth_priv_s *priv = netdev_priv(ndev);
+	u32 *buf = (u32 *)regbuf;
+
+	pr_info("%s()\n", __func__);
+	for (i = 0; i < sizeof(gemac_regs) / sizeof(u32); i++)
+		buf[i] = readl(priv->EMAC_baseaddr + gemac_regs[i]);
+}
+
+/*
+ * pfe_eth_set_wol - Set the magic packet option, in WoL register.
+ *
+ */
+static int pfe_eth_set_wol(struct net_device *ndev, struct ethtool_wolinfo *wol)
+{
+	struct pfe_eth_priv_s *priv = netdev_priv(ndev);
+
+	if (wol->wolopts & ~WAKE_MAGIC)
+		return -EOPNOTSUPP;
+
+	/* for MTIP we store wol->wolopts */
+	priv->wol = wol->wolopts;
+
+	device_set_wakeup_enable(&ndev->dev, wol->wolopts & WAKE_MAGIC);
+
+	return 0;
+}
+
+/*
+ *
+ * pfe_eth_get_wol - Get the WoL options.
+ *
+ */
+static void pfe_eth_get_wol(struct net_device *ndev, struct ethtool_wolinfo
+				*wol)
+{
+	struct pfe_eth_priv_s *priv = netdev_priv(ndev);
+
+	wol->supported = WAKE_MAGIC;
+	wol->wolopts = 0;
+
+	if (priv->wol & WAKE_MAGIC)
+		wol->wolopts = WAKE_MAGIC;
+
+	memset(&wol->sopass, 0, sizeof(wol->sopass));
+}
+
+/*
+ * pfe_eth_get_drvinfo -  Fills in the drvinfo structure with some basic info
+ *
+ */
+static void pfe_eth_get_drvinfo(struct net_device *ndev, struct ethtool_drvinfo
+				*drvinfo)
+{
+	strlcpy(drvinfo->driver, DRV_NAME, sizeof(drvinfo->driver));
+	strlcpy(drvinfo->version, DRV_VERSION, sizeof(drvinfo->version));
+	strlcpy(drvinfo->fw_version, "N/A", sizeof(drvinfo->fw_version));
+	strlcpy(drvinfo->bus_info, "N/A", sizeof(drvinfo->bus_info));
+}
+
+/*
+ * pfe_eth_set_settings - Used to send commands to PHY.
+ *
+ */
+static int pfe_eth_set_settings(struct net_device *ndev,
+				const struct ethtool_link_ksettings *cmd)
+{
+	struct pfe_eth_priv_s *priv = netdev_priv(ndev);
+	struct phy_device *phydev = priv->phydev;
+
+	if (!phydev)
+		return -ENODEV;
+
+	return phy_ethtool_ksettings_set(phydev, cmd);
+}
+
+/*
+ * pfe_eth_getsettings - Return the current settings in the ethtool_cmd
+ * structure.
+ *
+ */
+static int pfe_eth_get_settings(struct net_device *ndev,
+				struct ethtool_link_ksettings *cmd)
+{
+	struct pfe_eth_priv_s *priv = netdev_priv(ndev);
+	struct phy_device *phydev = priv->phydev;
+
+	if (!phydev)
+		return -ENODEV;
+
+	phy_ethtool_ksettings_get(phydev, cmd);
+
+	return 0;
+}
+
+/*
+ * pfe_eth_get_msglevel - Gets the debug message mask.
+ *
+ */
+static uint32_t pfe_eth_get_msglevel(struct net_device *ndev)
+{
+	struct pfe_eth_priv_s *priv = netdev_priv(ndev);
+
+	return priv->msg_enable;
+}
+
+/*
+ * pfe_eth_set_msglevel - Sets the debug message mask.
+ *
+ */
+static void pfe_eth_set_msglevel(struct net_device *ndev, uint32_t data)
+{
+	struct pfe_eth_priv_s *priv = netdev_priv(ndev);
+
+	priv->msg_enable = data;
+}
+
+#define HIF_RX_COAL_MAX_CLKS		(~(1 << 31))
+#define HIF_RX_COAL_CLKS_PER_USEC	(pfe->ctrl.sys_clk / 1000)
+#define HIF_RX_COAL_MAX_USECS		(HIF_RX_COAL_MAX_CLKS	/ \
+						HIF_RX_COAL_CLKS_PER_USEC)
+
+/*
+ * pfe_eth_set_coalesce - Sets rx interrupt coalescing timer.
+ *
+ */
+static int pfe_eth_set_coalesce(struct net_device *ndev,
+				struct ethtool_coalesce *ec)
+{
+	if (ec->rx_coalesce_usecs > HIF_RX_COAL_MAX_USECS)
+		return -EINVAL;
+
+	if (!ec->rx_coalesce_usecs) {
+		writel(0, HIF_INT_COAL);
+		return 0;
+	}
+
+	writel((ec->rx_coalesce_usecs * HIF_RX_COAL_CLKS_PER_USEC) |
+			HIF_INT_COAL_ENABLE, HIF_INT_COAL);
+
+	return 0;
+}
+
+/*
+ * pfe_eth_get_coalesce - Gets rx interrupt coalescing timer value.
+ *
+ */
+static int pfe_eth_get_coalesce(struct net_device *ndev,
+				struct ethtool_coalesce *ec)
+{
+	int reg_val = readl(HIF_INT_COAL);
+
+	if (reg_val & HIF_INT_COAL_ENABLE)
+		ec->rx_coalesce_usecs = (reg_val & HIF_RX_COAL_MAX_CLKS) /
+						HIF_RX_COAL_CLKS_PER_USEC;
+	else
+		ec->rx_coalesce_usecs = 0;
+
+	return 0;
+}
+
+/*
+ * pfe_eth_set_pauseparam - Sets pause parameters
+ *
+ */
+static int pfe_eth_set_pauseparam(struct net_device *ndev,
+				  struct ethtool_pauseparam *epause)
+{
+	struct pfe_eth_priv_s *priv = netdev_priv(ndev);
+
+	if (epause->tx_pause != epause->rx_pause) {
+		netdev_info(ndev,
+			    "hardware only support enable/disable both tx and rx\n");
+		return -EINVAL;
+	}
+
+	priv->pause_flag = 0;
+	priv->pause_flag |= epause->rx_pause ? PFE_PAUSE_FLAG_ENABLE : 0;
+	priv->pause_flag |= epause->autoneg ? PFE_PAUSE_FLAG_AUTONEG : 0;
+
+	if (epause->rx_pause || epause->autoneg) {
+		gemac_enable_pause_rx(priv->EMAC_baseaddr);
+		writel((readl(priv->GPI_baseaddr + GPI_TX_PAUSE_TIME) |
+					EGPI_PAUSE_ENABLE),
+				priv->GPI_baseaddr + GPI_TX_PAUSE_TIME);
+		if (priv->phydev) {
+			priv->phydev->supported |= ADVERTISED_Pause |
+							ADVERTISED_Asym_Pause;
+			priv->phydev->advertising |= ADVERTISED_Pause |
+							ADVERTISED_Asym_Pause;
+		}
+	} else {
+		gemac_disable_pause_rx(priv->EMAC_baseaddr);
+		writel((readl(priv->GPI_baseaddr + GPI_TX_PAUSE_TIME) &
+					~EGPI_PAUSE_ENABLE),
+				priv->GPI_baseaddr + GPI_TX_PAUSE_TIME);
+		if (priv->phydev) {
+			priv->phydev->supported &= ~(ADVERTISED_Pause |
+							ADVERTISED_Asym_Pause);
+			priv->phydev->advertising &= ~(ADVERTISED_Pause |
+							ADVERTISED_Asym_Pause);
+		}
+	}
+
+	return 0;
+}
+
+/*
+ * pfe_eth_get_pauseparam - Gets pause parameters
+ *
+ */
+static void pfe_eth_get_pauseparam(struct net_device *ndev,
+				   struct ethtool_pauseparam *epause)
+{
+	struct pfe_eth_priv_s *priv = netdev_priv(ndev);
+
+	epause->autoneg = (priv->pause_flag & PFE_PAUSE_FLAG_AUTONEG) != 0;
+	epause->tx_pause = (priv->pause_flag & PFE_PAUSE_FLAG_ENABLE) != 0;
+	epause->rx_pause = epause->tx_pause;
+}
+
+/*
+ * pfe_eth_get_hash
+ */
+#define PFE_HASH_BITS	6		/* #bits in hash */
+#define CRC32_POLY	0xEDB88320
+
+static int pfe_eth_get_hash(u8 *addr)
+{
+	unsigned int i, bit, data, crc, hash;
+
+	/* calculate crc32 value of mac address */
+	crc = 0xffffffff;
+
+	for (i = 0; i < 6; i++) {
+		data = addr[i];
+		for (bit = 0; bit < 8; bit++, data >>= 1) {
+			crc = (crc >> 1) ^
+				(((crc ^ data) & 1) ? CRC32_POLY : 0);
+		}
+	}
+
+	/*
+	 * only upper 6 bits (PFE_HASH_BITS) are used
+	 * which point to specific bit in the hash registers
+	 */
+	hash = (crc >> (32 - PFE_HASH_BITS)) & 0x3f;
+
+	return hash;
+}
+
+const struct ethtool_ops pfe_ethtool_ops = {
+	.get_drvinfo = pfe_eth_get_drvinfo,
+	.get_regs_len = pfe_eth_gemac_reglen,
+	.get_regs = pfe_eth_gemac_get_regs,
+	.get_link = ethtool_op_get_link,
+	.get_wol  = pfe_eth_get_wol,
+	.set_wol  = pfe_eth_set_wol,
+	.set_pauseparam = pfe_eth_set_pauseparam,
+	.get_pauseparam = pfe_eth_get_pauseparam,
+	.get_strings = pfe_eth_gstrings,
+	.get_sset_count = pfe_eth_stats_count,
+	.get_ethtool_stats = pfe_eth_fill_stats,
+	.get_msglevel = pfe_eth_get_msglevel,
+	.set_msglevel = pfe_eth_set_msglevel,
+	.set_coalesce = pfe_eth_set_coalesce,
+	.get_coalesce = pfe_eth_get_coalesce,
+	.get_link_ksettings = pfe_eth_get_settings,
+	.set_link_ksettings = pfe_eth_set_settings,
+};
+
+/* pfe_eth_mdio_reset
+ */
+int pfe_eth_mdio_reset(struct mii_bus *bus)
+{
+	struct pfe_eth_priv_s *priv = (struct pfe_eth_priv_s *)bus->priv;
+	u32 phy_speed;
+
+	netif_info(priv, hw, priv->ndev, "%s\n", __func__);
+
+	mutex_lock(&bus->mdio_lock);
+
+	/*
+	 * Set MII speed to 2.5 MHz (= clk_get_rate() / 2 * phy_speed)
+	 *
+	 * The formula for FEC MDC is 'ref_freq / (MII_SPEED x 2)' while
+	 * for ENET-MAC is 'ref_freq / ((MII_SPEED + 1) x 2)'.
+	 */
+	phy_speed = (DIV_ROUND_UP((pfe->ctrl.sys_clk * 1000), 4000000)
+		     << EMAC_MII_SPEED_SHIFT);
+	phy_speed |= EMAC_HOLDTIME(0x5);
+	__raw_writel(phy_speed, priv->PHY_baseaddr + EMAC_MII_CTRL_REG);
+
+	mutex_unlock(&bus->mdio_lock);
+
+	return 0;
+}
+
+/* pfe_eth_gemac_phy_timeout
+ *
+ */
+static int pfe_eth_gemac_phy_timeout(struct pfe_eth_priv_s *priv, int timeout)
+{
+	while (!(__raw_readl(priv->PHY_baseaddr + EMAC_IEVENT_REG) &
+			EMAC_IEVENT_MII)) {
+		if (timeout-- <= 0)
+			return -1;
+		usleep_range(10, 20);
+	}
+	__raw_writel(EMAC_IEVENT_MII, priv->PHY_baseaddr + EMAC_IEVENT_REG);
+	return 0;
+}
+
+static int pfe_eth_mdio_mux(u8 muxval)
+{
+	struct i2c_adapter *a;
+	struct i2c_msg msg;
+	unsigned char buf[2];
+	int ret;
+
+	a = i2c_get_adapter(0);
+	if (!a)
+		return -ENODEV;
+
+	/* set bit 1 (the second bit) of chip at 0x09, register 0x13 */
+	buf[0] = 0x54; /* reg number */
+	buf[1] = (muxval << 6) | 0x3; /* data */
+	msg.addr = 0x66;
+	msg.buf = buf;
+	msg.len = 2;
+	msg.flags = 0;
+	ret = i2c_transfer(a, &msg, 1);
+	i2c_put_adapter(a);
+	if (ret != 1)
+		return -ENODEV;
+	return 0;
+}
+
+static int pfe_eth_mdio_write_addr(struct mii_bus *bus, int mii_id,
+				   int dev_addr, int regnum)
+{
+	struct pfe_eth_priv_s *priv = (struct pfe_eth_priv_s *)bus->priv;
+
+	__raw_writel(EMAC_MII_DATA_PA(mii_id) |
+		     EMAC_MII_DATA_RA(dev_addr) |
+		     EMAC_MII_DATA_TA | EMAC_MII_DATA(regnum),
+		     priv->PHY_baseaddr + EMAC_MII_DATA_REG);
+
+	if (pfe_eth_gemac_phy_timeout(priv, EMAC_MDIO_TIMEOUT)) {
+		netdev_err(priv->ndev, "%s: phy MDIO address write timeout\n",
+			   __func__);
+		return -1;
+	}
+
+	return 0;
+}
+
+static int pfe_eth_mdio_write(struct mii_bus *bus, int mii_id, int regnum,
+			      u16 value)
+{
+	struct pfe_eth_priv_s *priv = (struct pfe_eth_priv_s *)bus->priv;
+
+	/*To access external PHYs on QDS board mux needs to be configured*/
+	if ((mii_id) && (pfe->mdio_muxval[mii_id]))
+		pfe_eth_mdio_mux(pfe->mdio_muxval[mii_id]);
+
+	if (regnum & MII_ADDR_C45) {
+		pfe_eth_mdio_write_addr(bus, mii_id, (regnum >> 16) & 0x1f,
+					regnum & 0xffff);
+		__raw_writel(EMAC_MII_DATA_OP_CL45_WR |
+			     EMAC_MII_DATA_PA(mii_id) |
+			     EMAC_MII_DATA_RA((regnum >> 16) & 0x1f) |
+			     EMAC_MII_DATA_TA | EMAC_MII_DATA(value),
+			     priv->PHY_baseaddr + EMAC_MII_DATA_REG);
+	} else {
+		/* start a write op */
+		__raw_writel(EMAC_MII_DATA_ST | EMAC_MII_DATA_OP_WR |
+			     EMAC_MII_DATA_PA(mii_id) |
+			     EMAC_MII_DATA_RA(regnum) |
+			     EMAC_MII_DATA_TA | EMAC_MII_DATA(value),
+			     priv->PHY_baseaddr + EMAC_MII_DATA_REG);
+	}
+
+	if (pfe_eth_gemac_phy_timeout(priv, EMAC_MDIO_TIMEOUT)) {
+		netdev_err(priv->ndev, "%s: phy MDIO write timeout\n",
+			   __func__);
+		return -1;
+	}
+	netif_info(priv, hw, priv->ndev, "%s: phy %x reg %x val %x\n", __func__,
+		   mii_id, regnum, value);
+
+	return 0;
+}
+
+static int pfe_eth_mdio_read(struct mii_bus *bus, int mii_id, int regnum)
+{
+	struct pfe_eth_priv_s *priv = (struct pfe_eth_priv_s *)bus->priv;
+	u16 value = 0;
+
+	/*To access external PHYs on QDS board mux needs to be configured*/
+	if ((mii_id) && (pfe->mdio_muxval[mii_id]))
+		pfe_eth_mdio_mux(pfe->mdio_muxval[mii_id]);
+
+	if (regnum & MII_ADDR_C45) {
+		pfe_eth_mdio_write_addr(bus, mii_id, (regnum >> 16) & 0x1f,
+					regnum & 0xffff);
+		__raw_writel(EMAC_MII_DATA_OP_CL45_RD |
+			     EMAC_MII_DATA_PA(mii_id) |
+			     EMAC_MII_DATA_RA((regnum >> 16) & 0x1f) |
+			     EMAC_MII_DATA_TA,
+			     priv->PHY_baseaddr + EMAC_MII_DATA_REG);
+	} else {
+		/* start a read op */
+		__raw_writel(EMAC_MII_DATA_ST | EMAC_MII_DATA_OP_RD |
+			     EMAC_MII_DATA_PA(mii_id) |
+			     EMAC_MII_DATA_RA(regnum) |
+			     EMAC_MII_DATA_TA, priv->PHY_baseaddr +
+			     EMAC_MII_DATA_REG);
+	}
+
+	if (pfe_eth_gemac_phy_timeout(priv, EMAC_MDIO_TIMEOUT)) {
+		netdev_err(priv->ndev, "%s: phy MDIO read timeout\n", __func__);
+		return -1;
+	}
+
+	value = EMAC_MII_DATA(__raw_readl(priv->PHY_baseaddr +
+						EMAC_MII_DATA_REG));
+	netif_info(priv, hw, priv->ndev, "%s: phy %x reg %x val %x\n", __func__,
+		   mii_id, regnum, value);
+	return value;
+}
+
+static int pfe_eth_mdio_init(struct pfe_eth_priv_s *priv,
+			     struct ls1012a_mdio_platform_data *minfo)
+{
+	struct mii_bus *bus;
+	int rc, ii;
+	struct phy_device *phydev;
+
+	netif_info(priv, drv, priv->ndev, "%s\n", __func__);
+	pr_info("%s\n", __func__);
+
+	bus = mdiobus_alloc();
+	if (!bus) {
+		netdev_err(priv->ndev, "mdiobus_alloc() failed\n");
+		rc = -ENOMEM;
+		goto err0;
+	}
+
+	bus->name = "ls1012a MDIO Bus";
+	bus->read = &pfe_eth_mdio_read;
+	bus->write = &pfe_eth_mdio_write;
+	bus->reset = &pfe_eth_mdio_reset;
+	snprintf(bus->id, MII_BUS_ID_SIZE, "ls1012a-%x", priv->id);
+	bus->priv = priv;
+
+	bus->phy_mask = minfo->phy_mask;
+	priv->mdc_div = minfo->mdc_div;
+
+	if (!priv->mdc_div)
+		priv->mdc_div = 64;
+
+	bus->irq[0] = minfo->irq[0];
+
+	bus->parent = priv->pfe->dev;
+
+	netif_info(priv, drv, priv->ndev, "%s: mdc_div: %d, phy_mask: %x\n",
+		   __func__, priv->mdc_div, bus->phy_mask);
+	rc = mdiobus_register(bus);
+	if (rc) {
+		netdev_err(priv->ndev, "mdiobus_register(%s) failed\n",
+			   bus->name);
+		goto err1;
+	}
+
+	priv->mii_bus = bus;
+
+	/* For clause 45 we need to call get_phy_device() with it's
+	 * 3rd argument as true and then register the phy device
+	 * via phy_device_register()
+	 */
+
+	if (priv->einfo->mii_config == PHY_INTERFACE_MODE_2500SGMII) {
+		for (ii = 0; ii < NUM_GEMAC_SUPPORT; ii++) {
+			phydev = get_phy_device(priv->mii_bus,
+					priv->einfo->phy_id + ii, true);
+			if (!phydev || IS_ERR(phydev)) {
+				rc = -EIO;
+				netdev_err(priv->ndev, "fail to get device\n");
+				goto err1;
+			}
+			rc = phy_device_register(phydev);
+			if (rc) {
+				phy_device_free(phydev);
+				netdev_err(priv->ndev,
+					"phy_device_register() failed\n");
+				goto err1;
+			}
+		}
+	}
+
+	pfe_eth_mdio_reset(bus);
+
+	return 0;
+
+err1:
+	mdiobus_free(bus);
+err0:
+	return rc;
+}
+
+/* pfe_eth_mdio_exit
+ */
+static void pfe_eth_mdio_exit(struct mii_bus *bus)
+{
+	if (!bus)
+		return;
+
+	netif_info((struct pfe_eth_priv_s *)bus->priv, drv, ((struct
+			pfe_eth_priv_s *)(bus->priv))->ndev, "%s\n", __func__);
+
+	mdiobus_unregister(bus);
+	mdiobus_free(bus);
+}
+
+/* pfe_get_phydev_speed
+ */
+static int pfe_get_phydev_speed(struct phy_device *phydev)
+{
+	switch (phydev->speed) {
+	case 10:
+			return SPEED_10M;
+	case 100:
+			return SPEED_100M;
+	case 1000:
+	default:
+			return SPEED_1000M;
+	}
+}
+
+/* pfe_set_rgmii_speed
+ */
+#define RGMIIPCR	0x434
+/* RGMIIPCR bit definitions*/
+#define SCFG_RGMIIPCR_EN_AUTO           (0x00000008)
+#define SCFG_RGMIIPCR_SETSP_1000M       (0x00000004)
+#define SCFG_RGMIIPCR_SETSP_100M        (0x00000000)
+#define SCFG_RGMIIPCR_SETSP_10M         (0x00000002)
+#define SCFG_RGMIIPCR_SETFD             (0x00000001)
+
+static void pfe_set_rgmii_speed(struct phy_device *phydev)
+{
+	u32 rgmii_pcr;
+
+	regmap_read(pfe->scfg, RGMIIPCR, &rgmii_pcr);
+	rgmii_pcr  &= ~(SCFG_RGMIIPCR_SETSP_1000M | SCFG_RGMIIPCR_SETSP_10M);
+
+	switch (phydev->speed) {
+	case 10:
+			rgmii_pcr |= SCFG_RGMIIPCR_SETSP_10M;
+			break;
+	case 1000:
+			rgmii_pcr |= SCFG_RGMIIPCR_SETSP_1000M;
+			break;
+	case 100:
+	default:
+			/* Default is 100M */
+			break;
+	}
+	regmap_write(pfe->scfg, RGMIIPCR, rgmii_pcr);
+}
+
+/* pfe_get_phydev_duplex
+ */
+static int pfe_get_phydev_duplex(struct phy_device *phydev)
+{
+	/*return (phydev->duplex == DUPLEX_HALF) ? DUP_HALF:DUP_FULL ; */
+	return DUPLEX_FULL;
+}
+
+/* pfe_eth_adjust_link
+ */
+static void pfe_eth_adjust_link(struct net_device *ndev)
+{
+	struct pfe_eth_priv_s *priv = netdev_priv(ndev);
+	unsigned long flags;
+	struct phy_device *phydev = priv->phydev;
+	int new_state = 0;
+
+	netif_info(priv, drv, ndev, "%s\n", __func__);
+
+	spin_lock_irqsave(&priv->lock, flags);
+
+	if (phydev->link) {
+		/*
+		 * Now we make sure that we can be in full duplex mode.
+		 * If not, we operate in half-duplex mode.
+		 */
+		if (phydev->duplex != priv->oldduplex) {
+			new_state = 1;
+			gemac_set_duplex(priv->EMAC_baseaddr,
+					 pfe_get_phydev_duplex(phydev));
+			priv->oldduplex = phydev->duplex;
+		}
+
+		if (phydev->speed != priv->oldspeed) {
+			new_state = 1;
+			gemac_set_speed(priv->EMAC_baseaddr,
+					pfe_get_phydev_speed(phydev));
+			if (priv->einfo->mii_config == PHY_INTERFACE_MODE_RGMII_TXID)
+				pfe_set_rgmii_speed(phydev);
+			priv->oldspeed = phydev->speed;
+		}
+
+		if (!priv->oldlink) {
+			new_state = 1;
+			priv->oldlink = 1;
+		}
+
+	} else if (priv->oldlink) {
+		new_state = 1;
+		priv->oldlink = 0;
+		priv->oldspeed = 0;
+		priv->oldduplex = -1;
+	}
+
+	if (new_state && netif_msg_link(priv))
+		phy_print_status(phydev);
+
+	spin_unlock_irqrestore(&priv->lock, flags);
+}
+
+/* pfe_phy_exit
+ */
+static void pfe_phy_exit(struct net_device *ndev)
+{
+	struct pfe_eth_priv_s *priv = netdev_priv(ndev);
+
+	netif_info(priv, drv, ndev, "%s\n", __func__);
+
+	phy_disconnect(priv->phydev);
+	priv->phydev = NULL;
+}
+
+/* pfe_eth_stop
+ */
+static void pfe_eth_stop(struct net_device *ndev, int wake)
+{
+	struct pfe_eth_priv_s *priv = netdev_priv(ndev);
+
+	netif_info(priv, drv, ndev, "%s\n", __func__);
+
+	if (wake) {
+		gemac_tx_disable(priv->EMAC_baseaddr);
+	} else {
+		gemac_disable(priv->EMAC_baseaddr);
+		gpi_disable(priv->GPI_baseaddr);
+
+		if (priv->phydev)
+			phy_stop(priv->phydev);
+	}
+}
+
+/* pfe_eth_start
+ */
+static int pfe_eth_start(struct pfe_eth_priv_s *priv)
+{
+	netif_info(priv, drv, priv->ndev, "%s\n", __func__);
+
+	if (priv->phydev)
+		phy_start(priv->phydev);
+
+	gpi_enable(priv->GPI_baseaddr);
+	gemac_enable(priv->EMAC_baseaddr);
+
+	return 0;
+}
+
+/*
+ * Configure on chip serdes through mdio
+ */
+static void ls1012a_configure_serdes(struct net_device *ndev)
+{
+	struct pfe_eth_priv_s *priv = pfe->eth.eth_priv[0];
+	int sgmii_2500 = 0;
+	struct mii_bus *bus = priv->mii_bus;
+	u16 value = 0;
+
+	if (priv->einfo->mii_config == PHY_INTERFACE_MODE_2500SGMII)
+		sgmii_2500 = 1;
+
+	netif_info(priv, drv, ndev, "%s\n", __func__);
+	/* PCS configuration done with corresponding GEMAC */
+
+	pfe_eth_mdio_read(bus, 0, 0);
+	pfe_eth_mdio_read(bus, 0, 1);
+
+       /*These settings taken from validtion team */
+	pfe_eth_mdio_write(bus, 0, 0x0, 0x8000);
+	if (sgmii_2500) {
+		pfe_eth_mdio_write(bus, 0, 0x14, 0x9);
+		pfe_eth_mdio_write(bus, 0, 0x4, 0x4001);
+		pfe_eth_mdio_write(bus, 0, 0x12, 0xa120);
+		pfe_eth_mdio_write(bus, 0, 0x13, 0x7);
+		/* Autonegotiation need to be disabled for 2.5G SGMII mode*/
+		value = 0x0140;
+		pfe_eth_mdio_write(bus, 0, 0x0, value);
+	} else {
+		pfe_eth_mdio_write(bus, 0, 0x14, 0xb);
+		pfe_eth_mdio_write(bus, 0, 0x4, 0x1a1);
+		pfe_eth_mdio_write(bus, 0, 0x12, 0x400);
+		pfe_eth_mdio_write(bus, 0, 0x13, 0x0);
+		pfe_eth_mdio_write(bus, 0, 0x0, 0x1140);
+	}
+}
+
+/*
+ * pfe_phy_init
+ *
+ */
+static int pfe_phy_init(struct net_device *ndev)
+{
+	struct pfe_eth_priv_s *priv = netdev_priv(ndev);
+	struct phy_device *phydev;
+	char phy_id[MII_BUS_ID_SIZE + 3];
+	char bus_id[MII_BUS_ID_SIZE];
+	phy_interface_t interface;
+
+	priv->oldlink = 0;
+	priv->oldspeed = 0;
+	priv->oldduplex = -1;
+
+	snprintf(bus_id, MII_BUS_ID_SIZE, "ls1012a-%d", 0);
+	snprintf(phy_id, MII_BUS_ID_SIZE + 3, PHY_ID_FMT, bus_id,
+		 priv->einfo->phy_id);
+
+	netif_info(priv, drv, ndev, "%s: %s\n", __func__, phy_id);
+	interface = priv->einfo->mii_config;
+	if ((interface == PHY_INTERFACE_MODE_SGMII) ||
+	    (interface == PHY_INTERFACE_MODE_2500SGMII)) {
+		/*Configure SGMII PCS */
+		if (pfe->scfg) {
+			/*Config MDIO from serdes */
+			regmap_write(pfe->scfg, 0x484, 0x00000000);
+		}
+		ls1012a_configure_serdes(ndev);
+	}
+
+	if (pfe->scfg) {
+		/*Config MDIO from PAD */
+		regmap_write(pfe->scfg, 0x484, 0x80000000);
+	}
+
+	priv->oldlink = 0;
+	priv->oldspeed = 0;
+	priv->oldduplex = -1;
+	pr_info("%s interface %x\n", __func__, interface);
+	phydev = phy_connect(ndev, phy_id, &pfe_eth_adjust_link, interface);
+
+	if (IS_ERR(phydev)) {
+		netdev_err(ndev, "phy_connect() failed\n");
+		return PTR_ERR(phydev);
+	}
+
+	priv->phydev = phydev;
+	phydev->irq = PHY_POLL;
+
+	return 0;
+}
+
+/* pfe_gemac_init
+ */
+static int pfe_gemac_init(struct pfe_eth_priv_s *priv)
+{
+	struct gemac_cfg cfg;
+
+	netif_info(priv, ifup, priv->ndev, "%s\n", __func__);
+
+	cfg.speed = SPEED_1000M;
+	cfg.duplex = DUPLEX_FULL;
+
+	gemac_set_config(priv->EMAC_baseaddr, &cfg);
+	gemac_allow_broadcast(priv->EMAC_baseaddr);
+	gemac_enable_1536_rx(priv->EMAC_baseaddr);
+	gemac_enable_rx_jmb(priv->EMAC_baseaddr);
+	gemac_enable_stacked_vlan(priv->EMAC_baseaddr);
+	gemac_enable_pause_rx(priv->EMAC_baseaddr);
+	gemac_set_bus_width(priv->EMAC_baseaddr, 64);
+
+	/*GEM will perform checksum verifications*/
+	if (priv->ndev->features & NETIF_F_RXCSUM)
+		gemac_enable_rx_checksum_offload(priv->EMAC_baseaddr);
+	else
+		gemac_disable_rx_checksum_offload(priv->EMAC_baseaddr);
+
+	return 0;
+}
+
+/* pfe_eth_event_handler
+ */
+static int pfe_eth_event_handler(void *data, int event, int qno)
+{
+	struct pfe_eth_priv_s *priv = data;
+
+	switch (event) {
+	case EVENT_RX_PKT_IND:
+
+		if (qno == 0) {
+			if (napi_schedule_prep(&priv->high_napi)) {
+				netif_info(priv, intr, priv->ndev,
+					   "%s: schedule high prio poll\n"
+					   , __func__);
+
+#ifdef PFE_ETH_NAPI_STATS
+				priv->napi_counters[NAPI_SCHED_COUNT]++;
+#endif
+
+				__napi_schedule(&priv->high_napi);
+			}
+		} else if (qno == 1) {
+			if (napi_schedule_prep(&priv->low_napi)) {
+				netif_info(priv, intr, priv->ndev,
+					   "%s: schedule low prio poll\n"
+					   , __func__);
+
+#ifdef PFE_ETH_NAPI_STATS
+				priv->napi_counters[NAPI_SCHED_COUNT]++;
+#endif
+				__napi_schedule(&priv->low_napi);
+			}
+		} else if (qno == 2) {
+			if (napi_schedule_prep(&priv->lro_napi)) {
+				netif_info(priv, intr, priv->ndev,
+					   "%s: schedule lro prio poll\n"
+					   , __func__);
+
+#ifdef PFE_ETH_NAPI_STATS
+				priv->napi_counters[NAPI_SCHED_COUNT]++;
+#endif
+				__napi_schedule(&priv->lro_napi);
+			}
+		}
+
+		break;
+
+	case EVENT_TXDONE_IND:
+		pfe_eth_flush_tx(priv);
+		hif_lib_event_handler_start(&priv->client, EVENT_TXDONE_IND, 0);
+		break;
+	case EVENT_HIGH_RX_WM:
+	default:
+		break;
+	}
+
+	return 0;
+}
+
+/* pfe_eth_open
+ */
+static int pfe_eth_open(struct net_device *ndev)
+{
+	struct pfe_eth_priv_s *priv = netdev_priv(ndev);
+	struct hif_client_s *client;
+	int rc;
+
+	netif_info(priv, ifup, ndev, "%s\n", __func__);
+
+	/* Register client driver with HIF */
+	client = &priv->client;
+	memset(client, 0, sizeof(*client));
+	client->id = PFE_CL_GEM0 + priv->id;
+	client->tx_qn = emac_txq_cnt;
+	client->rx_qn = EMAC_RXQ_CNT;
+	client->priv = priv;
+	client->pfe = priv->pfe;
+	client->event_handler = pfe_eth_event_handler;
+
+	client->tx_qsize = EMAC_TXQ_DEPTH;
+	client->rx_qsize = EMAC_RXQ_DEPTH;
+
+	rc = hif_lib_client_register(client);
+	if (rc) {
+		netdev_err(ndev, "%s: hif_lib_client_register(%d) failed\n",
+			   __func__, client->id);
+		goto err0;
+	}
+
+	netif_info(priv, drv, ndev, "%s: registered client: %p\n", __func__,
+		   client);
+
+	pfe_gemac_init(priv);
+
+	if (!is_valid_ether_addr(ndev->dev_addr)) {
+		netdev_err(ndev, "%s: invalid MAC address\n", __func__);
+		rc = -EADDRNOTAVAIL;
+		goto err1;
+	}
+
+	gemac_set_laddrN(priv->EMAC_baseaddr,
+			 (struct pfe_mac_addr *)ndev->dev_addr, 1);
+
+	napi_enable(&priv->high_napi);
+	napi_enable(&priv->low_napi);
+	napi_enable(&priv->lro_napi);
+
+	rc = pfe_eth_start(priv);
+
+	netif_tx_wake_all_queues(ndev);
+
+	return rc;
+
+err1:
+	hif_lib_client_unregister(&priv->client);
+
+err0:
+	return rc;
+}
+
+/*
+ *  pfe_eth_shutdown
+ */
+int pfe_eth_shutdown(struct net_device *ndev, int wake)
+{
+	struct pfe_eth_priv_s *priv = netdev_priv(ndev);
+	int i, qstatus;
+	unsigned long next_poll = jiffies + 1, end = jiffies +
+				(TX_POLL_TIMEOUT_MS * HZ) / 1000;
+	int tx_pkts, prv_tx_pkts;
+
+	netif_info(priv, ifdown, ndev, "%s\n", __func__);
+
+	for (i = 0; i < emac_txq_cnt; i++)
+		hrtimer_cancel(&priv->fast_tx_timeout[i].timer);
+
+	netif_tx_stop_all_queues(ndev);
+
+	do {
+		tx_pkts = 0;
+		pfe_eth_flush_tx(priv);
+
+		for (i = 0; i < emac_txq_cnt; i++)
+			tx_pkts += hif_lib_tx_pending(&priv->client, i);
+
+		if (tx_pkts) {
+			/*Don't wait forever, break if we cross max timeout */
+			if (time_after(jiffies, end)) {
+				pr_err(
+					"(%s)Tx is not complete after %dmsec\n",
+					ndev->name, TX_POLL_TIMEOUT_MS);
+				break;
+			}
+
+			pr_info("%s : (%s) Waiting for tx packets to free. Pending tx pkts = %d.\n"
+				, __func__, ndev->name, tx_pkts);
+			if (need_resched())
+				schedule();
+		}
+
+	} while (tx_pkts);
+
+	end = jiffies + (TX_POLL_TIMEOUT_MS * HZ) / 1000;
+
+	prv_tx_pkts = tmu_pkts_processed(priv->id);
+	/*
+	 * Wait till TMU transmits all pending packets
+	 * poll tmu_qstatus and pkts processed by TMU for every 10ms
+	 * Consider TMU is busy, If we see TMU qeueu pending or any packets
+	 * processed by TMU
+	 */
+	while (1) {
+		if (time_after(jiffies, next_poll)) {
+			tx_pkts = tmu_pkts_processed(priv->id);
+			qstatus = tmu_qstatus(priv->id) & 0x7ffff;
+
+			if (!qstatus && (tx_pkts == prv_tx_pkts))
+				break;
+			/* Don't wait forever, break if we cross max
+			 * timeout(TX_POLL_TIMEOUT_MS)
+			 */
+			if (time_after(jiffies, end)) {
+				pr_err("TMU%d is busy after %dmsec\n",
+				       priv->id, TX_POLL_TIMEOUT_MS);
+				break;
+			}
+			prv_tx_pkts = tx_pkts;
+			next_poll++;
+		}
+		if (need_resched())
+			schedule();
+	}
+	/* Wait for some more time to complete transmitting packet if any */
+	next_poll = jiffies + 1;
+	while (1) {
+		if (time_after(jiffies, next_poll))
+			break;
+		if (need_resched())
+			schedule();
+	}
+
+	pfe_eth_stop(ndev, wake);
+
+	napi_disable(&priv->lro_napi);
+	napi_disable(&priv->low_napi);
+	napi_disable(&priv->high_napi);
+
+	hif_lib_client_unregister(&priv->client);
+
+	return 0;
+}
+
+/* pfe_eth_close
+ *
+ */
+static int pfe_eth_close(struct net_device *ndev)
+{
+	pfe_eth_shutdown(ndev, 0);
+
+	return 0;
+}
+
+/* pfe_eth_suspend
+ *
+ * return value : 1 if netdevice is configured to wakeup system
+ *                0 otherwise
+ */
+int pfe_eth_suspend(struct net_device *ndev)
+{
+	struct pfe_eth_priv_s *priv = netdev_priv(ndev);
+	int retval = 0;
+
+	if (priv->wol) {
+		gemac_set_wol(priv->EMAC_baseaddr, priv->wol);
+		retval = 1;
+	}
+	pfe_eth_shutdown(ndev, priv->wol);
+
+	return retval;
+}
+
+/* pfe_eth_resume
+ *
+ */
+int pfe_eth_resume(struct net_device *ndev)
+{
+	struct pfe_eth_priv_s *priv = netdev_priv(ndev);
+
+	if (priv->wol)
+		gemac_set_wol(priv->EMAC_baseaddr, 0);
+	gemac_tx_enable(priv->EMAC_baseaddr);
+
+	return pfe_eth_open(ndev);
+}
+
+/* pfe_eth_get_queuenum
+ */
+static int pfe_eth_get_queuenum(struct pfe_eth_priv_s *priv, struct sk_buff
+					*skb)
+{
+	int queuenum = 0;
+	unsigned long flags;
+
+	/* Get the Fast Path queue number */
+	/*
+	 * Use conntrack mark (if conntrack exists), then packet mark (if any),
+	 * then fallback to default
+	 */
+#if defined(CONFIG_IP_NF_CONNTRACK_MARK) || defined(CONFIG_NF_CONNTRACK_MARK)
+	if (skb->nfct) {
+		enum ip_conntrack_info cinfo;
+		struct nf_conn *ct;
+
+		ct = nf_ct_get(skb, &cinfo);
+
+		if (ct) {
+			u32 connmark;
+
+			connmark = ct->mark;
+
+			if ((connmark & 0x80000000) && priv->id != 0)
+				connmark >>= 16;
+
+			queuenum = connmark & EMAC_QUEUENUM_MASK;
+		}
+	} else  {/* continued after #endif ... */
+#endif
+		if (skb->mark) {
+			queuenum = skb->mark & EMAC_QUEUENUM_MASK;
+		} else {
+			spin_lock_irqsave(&priv->lock, flags);
+			queuenum = priv->default_priority & EMAC_QUEUENUM_MASK;
+			spin_unlock_irqrestore(&priv->lock, flags);
+		}
+#if defined(CONFIG_IP_NF_CONNTRACK_MARK) || defined(CONFIG_NF_CONNTRACK_MARK)
+	}
+#endif
+	return queuenum;
+}
+
+/* pfe_eth_might_stop_tx
+ *
+ */
+static int pfe_eth_might_stop_tx(struct pfe_eth_priv_s *priv, int queuenum,
+				 struct netdev_queue *tx_queue,
+				 unsigned int n_desc,
+				 unsigned int n_segs)
+{
+	ktime_t kt;
+	int tried = 0;
+
+try_again:
+	if (unlikely((__hif_tx_avail(&pfe->hif) < n_desc) ||
+	(hif_lib_tx_avail(&priv->client, queuenum) < n_desc) ||
+	(hif_lib_tx_credit_avail(pfe, priv->id, queuenum) < n_segs))) {
+		if (!tried) {
+			__hif_lib_update_credit(&priv->client, queuenum);
+			tried = 1;
+			goto try_again;
+		}
+#ifdef PFE_ETH_TX_STATS
+		if (__hif_tx_avail(&pfe->hif) < n_desc) {
+			priv->stop_queue_hif[queuenum]++;
+		} else if (hif_lib_tx_avail(&priv->client, queuenum) < n_desc) {
+			priv->stop_queue_hif_client[queuenum]++;
+		} else if (hif_lib_tx_credit_avail(pfe, priv->id, queuenum) <
+			n_segs) {
+			priv->stop_queue_credit[queuenum]++;
+		}
+		priv->stop_queue_total[queuenum]++;
+#endif
+		netif_tx_stop_queue(tx_queue);
+
+		kt = ktime_set(0, LS1012A_TX_FAST_RECOVERY_TIMEOUT_MS *
+				NSEC_PER_MSEC);
+		hrtimer_start(&priv->fast_tx_timeout[queuenum].timer, kt,
+			      HRTIMER_MODE_REL);
+		return -1;
+	} else {
+		return 0;
+	}
+}
+
+#define SA_MAX_OP 2
+/* pfe_hif_send_packet
+ *
+ * At this level if TX fails we drop the packet
+ */
+static void pfe_hif_send_packet(struct sk_buff *skb, struct  pfe_eth_priv_s
+					*priv, int queuenum)
+{
+	struct skb_shared_info *sh = skb_shinfo(skb);
+	unsigned int nr_frags;
+	u32 ctrl = 0;
+
+	netif_info(priv, tx_queued, priv->ndev, "%s\n", __func__);
+
+	if (skb_is_gso(skb)) {
+		priv->stats.tx_dropped++;
+		return;
+	}
+
+	if (skb->ip_summed == CHECKSUM_PARTIAL)
+		ctrl = HIF_CTRL_TX_CHECKSUM;
+
+	nr_frags = sh->nr_frags;
+
+	if (nr_frags) {
+		skb_frag_t *f;
+		int i;
+
+		__hif_lib_xmit_pkt(&priv->client, queuenum, skb->data,
+				   skb_headlen(skb), ctrl, HIF_FIRST_BUFFER,
+				   skb);
+
+		for (i = 0; i < nr_frags - 1; i++) {
+			f = &sh->frags[i];
+			__hif_lib_xmit_pkt(&priv->client, queuenum,
+					   skb_frag_address(f),
+					   skb_frag_size(f),
+					   0x0, 0x0, skb);
+		}
+
+		f = &sh->frags[i];
+
+		__hif_lib_xmit_pkt(&priv->client, queuenum,
+				   skb_frag_address(f), skb_frag_size(f),
+				   0x0, HIF_LAST_BUFFER | HIF_DATA_VALID,
+				   skb);
+
+		netif_info(priv, tx_queued, priv->ndev,
+			   "%s: pkt sent successfully skb:%p nr_frags:%d len:%d\n",
+			   __func__, skb, nr_frags, skb->len);
+	} else {
+		__hif_lib_xmit_pkt(&priv->client, queuenum, skb->data,
+				   skb->len, ctrl, HIF_FIRST_BUFFER |
+				   HIF_LAST_BUFFER | HIF_DATA_VALID,
+				   skb);
+		netif_info(priv, tx_queued, priv->ndev,
+			   "%s: pkt sent successfully skb:%p len:%d\n",
+			   __func__, skb, skb->len);
+	}
+	hif_tx_dma_start();
+	priv->stats.tx_packets++;
+	priv->stats.tx_bytes += skb->len;
+	hif_lib_tx_credit_use(pfe, priv->id, queuenum, 1);
+}
+
+/* pfe_eth_flush_txQ
+ */
+static void pfe_eth_flush_txQ(struct pfe_eth_priv_s *priv, int tx_q_num, int
+				from_tx, int n_desc)
+{
+	struct sk_buff *skb;
+	struct netdev_queue *tx_queue = netdev_get_tx_queue(priv->ndev,
+								tx_q_num);
+	unsigned int flags;
+
+	netif_info(priv, tx_done, priv->ndev, "%s\n", __func__);
+
+	if (!from_tx)
+		__netif_tx_lock_bh(tx_queue);
+
+	/* Clean HIF and client queue */
+	while ((skb = hif_lib_tx_get_next_complete(&priv->client,
+						   tx_q_num, &flags,
+						   HIF_TX_DESC_NT))) {
+		if (flags & HIF_DATA_VALID)
+			dev_kfree_skb_any(skb);
+	}
+	if (!from_tx)
+		__netif_tx_unlock_bh(tx_queue);
+}
+
+/* pfe_eth_flush_tx
+ */
+static void pfe_eth_flush_tx(struct pfe_eth_priv_s *priv)
+{
+	int ii;
+
+	netif_info(priv, tx_done, priv->ndev, "%s\n", __func__);
+
+	for (ii = 0; ii < emac_txq_cnt; ii++) {
+		pfe_eth_flush_txQ(priv, ii, 0, 0);
+		__hif_lib_update_credit(&priv->client, ii);
+	}
+}
+
+void pfe_tx_get_req_desc(struct sk_buff *skb, unsigned int *n_desc, unsigned int
+				*n_segs)
+{
+	struct skb_shared_info *sh = skb_shinfo(skb);
+
+	/* Scattered data */
+	if (sh->nr_frags) {
+		*n_desc = sh->nr_frags + 1;
+		*n_segs = 1;
+	/* Regular case */
+	} else {
+		*n_desc = 1;
+		*n_segs = 1;
+	}
+}
+
+/* pfe_eth_send_packet
+ */
+static int pfe_eth_send_packet(struct sk_buff *skb, struct net_device *ndev)
+{
+	struct pfe_eth_priv_s *priv = netdev_priv(ndev);
+	int tx_q_num = skb_get_queue_mapping(skb);
+	int n_desc, n_segs;
+	struct netdev_queue *tx_queue = netdev_get_tx_queue(priv->ndev,
+								tx_q_num);
+
+	netif_info(priv, tx_queued, ndev, "%s\n", __func__);
+
+	if ((!skb_is_gso(skb)) && (skb_headroom(skb) < (PFE_PKT_HEADER_SZ +
+			sizeof(unsigned long)))) {
+		netif_warn(priv, tx_err, priv->ndev, "%s: copying skb\n",
+			   __func__);
+
+		if (pskb_expand_head(skb, (PFE_PKT_HEADER_SZ + sizeof(unsigned
+					long)), 0, GFP_ATOMIC)) {
+			/* No need to re-transmit, no way to recover*/
+			kfree_skb(skb);
+			priv->stats.tx_dropped++;
+			return NETDEV_TX_OK;
+		}
+	}
+
+	pfe_tx_get_req_desc(skb, &n_desc, &n_segs);
+
+	hif_tx_lock(&pfe->hif);
+	if (unlikely(pfe_eth_might_stop_tx(priv, tx_q_num, tx_queue, n_desc,
+					   n_segs))) {
+#ifdef PFE_ETH_TX_STATS
+		if (priv->was_stopped[tx_q_num]) {
+			priv->clean_fail[tx_q_num]++;
+			priv->was_stopped[tx_q_num] = 0;
+		}
+#endif
+		hif_tx_unlock(&pfe->hif);
+		return NETDEV_TX_BUSY;
+	}
+
+	pfe_hif_send_packet(skb, priv, tx_q_num);
+
+	hif_tx_unlock(&pfe->hif);
+
+	tx_queue->trans_start = jiffies;
+
+#ifdef PFE_ETH_TX_STATS
+	priv->was_stopped[tx_q_num] = 0;
+#endif
+
+	return NETDEV_TX_OK;
+}
+
+/* pfe_eth_select_queue
+ *
+ */
+static u16 pfe_eth_select_queue(struct net_device *ndev, struct sk_buff *skb,
+				void *accel_priv,
+				select_queue_fallback_t fallback)
+{
+	struct pfe_eth_priv_s *priv = netdev_priv(ndev);
+
+	return pfe_eth_get_queuenum(priv, skb);
+}
+
+/* pfe_eth_get_stats
+ */
+static struct net_device_stats *pfe_eth_get_stats(struct net_device *ndev)
+{
+	struct pfe_eth_priv_s *priv = netdev_priv(ndev);
+
+	netif_info(priv, drv, ndev, "%s\n", __func__);
+
+	return &priv->stats;
+}
+
+/* pfe_eth_set_mac_address
+ */
+static int pfe_eth_set_mac_address(struct net_device *ndev, void *addr)
+{
+	struct pfe_eth_priv_s *priv = netdev_priv(ndev);
+	struct sockaddr *sa = addr;
+
+	netif_info(priv, drv, ndev, "%s\n", __func__);
+
+	if (!is_valid_ether_addr(sa->sa_data))
+		return -EADDRNOTAVAIL;
+
+	memcpy(ndev->dev_addr, sa->sa_data, ETH_ALEN);
+
+	gemac_set_laddrN(priv->EMAC_baseaddr,
+			 (struct pfe_mac_addr *)ndev->dev_addr, 1);
+
+	return 0;
+}
+
+/* pfe_eth_enet_addr_byte_mac
+ */
+int pfe_eth_enet_addr_byte_mac(u8 *enet_byte_addr,
+			       struct pfe_mac_addr *enet_addr)
+{
+	if (!enet_byte_addr || !enet_addr) {
+		return -1;
+
+	} else {
+		enet_addr->bottom = enet_byte_addr[0] |
+			(enet_byte_addr[1] << 8) |
+			(enet_byte_addr[2] << 16) |
+			(enet_byte_addr[3] << 24);
+		enet_addr->top = enet_byte_addr[4] |
+			(enet_byte_addr[5] << 8);
+		return 0;
+	}
+}
+
+/* pfe_eth_set_multi
+ */
+static void pfe_eth_set_multi(struct net_device *ndev)
+{
+	struct pfe_eth_priv_s *priv = netdev_priv(ndev);
+	struct pfe_mac_addr    hash_addr; /* hash register structure */
+	/* specific mac address	register structure */
+	struct pfe_mac_addr    spec_addr;
+	int		result; /* index into hash register to set.. */
+	int		uc_count = 0;
+	struct netdev_hw_addr *ha;
+
+	if (ndev->flags & IFF_PROMISC) {
+		netif_info(priv, drv, ndev, "entering promiscuous mode\n");
+
+		priv->promisc = 1;
+		gemac_enable_copy_all(priv->EMAC_baseaddr);
+	} else {
+		priv->promisc = 0;
+		gemac_disable_copy_all(priv->EMAC_baseaddr);
+	}
+
+	/* Enable broadcast frame reception if required. */
+	if (ndev->flags & IFF_BROADCAST) {
+		gemac_allow_broadcast(priv->EMAC_baseaddr);
+	} else {
+		netif_info(priv, drv, ndev,
+			   "disabling broadcast frame reception\n");
+
+		gemac_no_broadcast(priv->EMAC_baseaddr);
+	}
+
+	if (ndev->flags & IFF_ALLMULTI) {
+		/* Set the hash to rx all multicast frames */
+		hash_addr.bottom = 0xFFFFFFFF;
+		hash_addr.top = 0xFFFFFFFF;
+		gemac_set_hash(priv->EMAC_baseaddr, &hash_addr);
+		netdev_for_each_uc_addr(ha, ndev) {
+			if (uc_count >= MAX_UC_SPEC_ADDR_REG)
+				break;
+			pfe_eth_enet_addr_byte_mac(ha->addr, &spec_addr);
+			gemac_set_laddrN(priv->EMAC_baseaddr, &spec_addr,
+					 uc_count + 2);
+			uc_count++;
+		}
+	} else if ((netdev_mc_count(ndev) > 0)  || (netdev_uc_count(ndev))) {
+		u8 *addr;
+
+		hash_addr.bottom = 0;
+		hash_addr.top = 0;
+
+		netdev_for_each_mc_addr(ha, ndev) {
+			addr = ha->addr;
+
+			netif_info(priv, drv, ndev,
+				   "adding multicast address %X:%X:%X:%X:%X:%X to gem filter\n",
+				addr[0], addr[1], addr[2],
+				addr[3], addr[4], addr[5]);
+
+			result = pfe_eth_get_hash(addr);
+
+			if (result < EMAC_HASH_REG_BITS) {
+				if (result < 32)
+					hash_addr.bottom |= (1 << result);
+				else
+					hash_addr.top |= (1 << (result - 32));
+			} else {
+				break;
+			}
+		}
+
+		uc_count = -1;
+		netdev_for_each_uc_addr(ha, ndev) {
+			addr = ha->addr;
+
+			if (++uc_count < MAX_UC_SPEC_ADDR_REG)   {
+				netdev_info(ndev,
+					    "adding unicast address %02x:%02x:%02x:%02x:%02x:%02x to gem filter\n",
+					    addr[0], addr[1], addr[2],
+					    addr[3], addr[4], addr[5]);
+				pfe_eth_enet_addr_byte_mac(addr, &spec_addr);
+				gemac_set_laddrN(priv->EMAC_baseaddr,
+						 &spec_addr, uc_count + 2);
+			} else {
+				netif_info(priv, drv, ndev,
+					   "adding unicast address %02x:%02x:%02x:%02x:%02x:%02x to gem hash\n",
+					   addr[0], addr[1], addr[2],
+					   addr[3], addr[4], addr[5]);
+
+				result = pfe_eth_get_hash(addr);
+				if (result >= EMAC_HASH_REG_BITS) {
+					break;
+
+				} else {
+					if (result < 32)
+						hash_addr.bottom |= (1 <<
+								result);
+					else
+						hash_addr.top |= (1 <<
+								(result - 32));
+				}
+			}
+		}
+
+		gemac_set_hash(priv->EMAC_baseaddr, &hash_addr);
+	}
+
+	if (!(netdev_uc_count(ndev) >= MAX_UC_SPEC_ADDR_REG)) {
+		/*
+		 *  Check if there are any specific address HW registers that
+		 * need to be flushed
+		 */
+		for (uc_count = netdev_uc_count(ndev); uc_count <
+			MAX_UC_SPEC_ADDR_REG; uc_count++)
+			gemac_clear_laddrN(priv->EMAC_baseaddr, uc_count + 2);
+	}
+
+	if (ndev->flags & IFF_LOOPBACK)
+		gemac_set_loop(priv->EMAC_baseaddr, LB_LOCAL);
+}
+
+/* pfe_eth_set_features
+ */
+static int pfe_eth_set_features(struct net_device *ndev, netdev_features_t
+					features)
+{
+	struct pfe_eth_priv_s *priv = netdev_priv(ndev);
+	int rc = 0;
+
+	if (features & NETIF_F_RXCSUM)
+		gemac_enable_rx_checksum_offload(priv->EMAC_baseaddr);
+	else
+		gemac_disable_rx_checksum_offload(priv->EMAC_baseaddr);
+	return rc;
+}
+
+/* pfe_eth_fast_tx_timeout
+ */
+static enum hrtimer_restart pfe_eth_fast_tx_timeout(struct hrtimer *timer)
+{
+	struct pfe_eth_fast_timer *fast_tx_timeout = container_of(timer, struct
+							pfe_eth_fast_timer,
+							timer);
+	struct pfe_eth_priv_s *priv =  container_of(fast_tx_timeout->base,
+							struct pfe_eth_priv_s,
+							fast_tx_timeout);
+	struct netdev_queue *tx_queue = netdev_get_tx_queue(priv->ndev,
+						fast_tx_timeout->queuenum);
+
+	if (netif_tx_queue_stopped(tx_queue)) {
+#ifdef PFE_ETH_TX_STATS
+		priv->was_stopped[fast_tx_timeout->queuenum] = 1;
+#endif
+		netif_tx_wake_queue(tx_queue);
+	}
+
+	return HRTIMER_NORESTART;
+}
+
+/* pfe_eth_fast_tx_timeout_init
+ */
+static void pfe_eth_fast_tx_timeout_init(struct pfe_eth_priv_s *priv)
+{
+	int i;
+
+	for (i = 0; i < emac_txq_cnt; i++) {
+		priv->fast_tx_timeout[i].queuenum = i;
+		hrtimer_init(&priv->fast_tx_timeout[i].timer, CLOCK_MONOTONIC,
+			     HRTIMER_MODE_REL);
+		priv->fast_tx_timeout[i].timer.function =
+				pfe_eth_fast_tx_timeout;
+		priv->fast_tx_timeout[i].base = priv->fast_tx_timeout;
+	}
+}
+
+static struct sk_buff *pfe_eth_rx_skb(struct net_device *ndev,
+				      struct	pfe_eth_priv_s *priv,
+				      unsigned int qno)
+{
+	void *buf_addr;
+	unsigned int rx_ctrl;
+	unsigned int desc_ctrl = 0;
+	struct hif_ipsec_hdr *ipsec_hdr = NULL;
+	struct sk_buff *skb;
+	struct sk_buff *skb_frag, *skb_frag_last = NULL;
+	int length = 0, offset;
+
+	skb = priv->skb_inflight[qno];
+
+	if (skb) {
+		skb_frag_last = skb_shinfo(skb)->frag_list;
+		if (skb_frag_last) {
+			while (skb_frag_last->next)
+				skb_frag_last = skb_frag_last->next;
+		}
+	}
+
+	while (!(desc_ctrl & CL_DESC_LAST)) {
+		buf_addr = hif_lib_receive_pkt(&priv->client, qno, &length,
+					       &offset, &rx_ctrl, &desc_ctrl,
+					       (void **)&ipsec_hdr);
+		if (!buf_addr)
+			goto incomplete;
+
+#ifdef PFE_ETH_NAPI_STATS
+		priv->napi_counters[NAPI_DESC_COUNT]++;
+#endif
+
+		/* First frag */
+		if (desc_ctrl & CL_DESC_FIRST) {
+			skb = build_skb(buf_addr, 0);
+			if (unlikely(!skb))
+				goto pkt_drop;
+
+			skb_reserve(skb, offset);
+			skb_put(skb, length);
+			skb->dev = ndev;
+
+			if ((ndev->features & NETIF_F_RXCSUM) && (rx_ctrl &
+					HIF_CTRL_RX_CHECKSUMMED))
+				skb->ip_summed = CHECKSUM_UNNECESSARY;
+			else
+				skb_checksum_none_assert(skb);
+
+		} else {
+			/* Next frags */
+			if (unlikely(!skb)) {
+				pr_err("%s: NULL skb_inflight\n",
+				       __func__);
+				goto pkt_drop;
+			}
+
+			skb_frag = build_skb(buf_addr, 0);
+
+			if (unlikely(!skb_frag)) {
+				kfree(buf_addr);
+				goto pkt_drop;
+			}
+
+			skb_reserve(skb_frag, offset);
+			skb_put(skb_frag, length);
+
+			skb_frag->dev = ndev;
+
+			if (skb_shinfo(skb)->frag_list)
+				skb_frag_last->next = skb_frag;
+			else
+				skb_shinfo(skb)->frag_list = skb_frag;
+
+			skb->truesize += skb_frag->truesize;
+			skb->data_len += length;
+			skb->len += length;
+			skb_frag_last = skb_frag;
+		}
+	}
+
+	priv->skb_inflight[qno] = NULL;
+	return skb;
+
+incomplete:
+	priv->skb_inflight[qno] = skb;
+	return NULL;
+
+pkt_drop:
+	priv->skb_inflight[qno] = NULL;
+
+	if (skb)
+		kfree_skb(skb);
+	else
+		kfree(buf_addr);
+
+	priv->stats.rx_errors++;
+
+	return NULL;
+}
+
+/* pfe_eth_poll
+ */
+static int pfe_eth_poll(struct pfe_eth_priv_s *priv, struct napi_struct *napi,
+			unsigned int qno, int budget)
+{
+	struct net_device *ndev = priv->ndev;
+	struct sk_buff *skb;
+	int work_done = 0;
+	unsigned int len;
+
+	netif_info(priv, intr, priv->ndev, "%s\n", __func__);
+
+#ifdef PFE_ETH_NAPI_STATS
+	priv->napi_counters[NAPI_POLL_COUNT]++;
+#endif
+
+	do {
+		skb = pfe_eth_rx_skb(ndev, priv, qno);
+
+		if (!skb)
+			break;
+
+		len = skb->len;
+
+		/* Packet will be processed */
+		skb->protocol = eth_type_trans(skb, ndev);
+
+		netif_receive_skb(skb);
+
+		priv->stats.rx_packets++;
+		priv->stats.rx_bytes += len;
+
+		work_done++;
+
+#ifdef PFE_ETH_NAPI_STATS
+		priv->napi_counters[NAPI_PACKET_COUNT]++;
+#endif
+
+	} while (work_done < budget);
+
+	/*
+	 * If no Rx receive nor cleanup work was done, exit polling mode.
+	 * No more netif_running(dev) check is required here , as this is
+	 * checked in net/core/dev.c (2.6.33.5 kernel specific).
+	 */
+	if (work_done < budget) {
+		napi_complete(napi);
+
+		hif_lib_event_handler_start(&priv->client, EVENT_RX_PKT_IND,
+					    qno);
+	}
+#ifdef PFE_ETH_NAPI_STATS
+	else
+		priv->napi_counters[NAPI_FULL_BUDGET_COUNT]++;
+#endif
+
+	return work_done;
+}
+
+/*
+ * pfe_eth_lro_poll
+ */
+static int pfe_eth_lro_poll(struct napi_struct *napi, int budget)
+{
+	struct pfe_eth_priv_s *priv = container_of(napi, struct pfe_eth_priv_s,
+							lro_napi);
+
+	netif_info(priv, intr, priv->ndev, "%s\n", __func__);
+
+	return pfe_eth_poll(priv, napi, 2, budget);
+}
+
+/* pfe_eth_low_poll
+ */
+static int pfe_eth_low_poll(struct napi_struct *napi, int budget)
+{
+	struct pfe_eth_priv_s *priv = container_of(napi, struct pfe_eth_priv_s,
+							low_napi);
+
+	netif_info(priv, intr, priv->ndev, "%s\n", __func__);
+
+	return pfe_eth_poll(priv, napi, 1, budget);
+}
+
+/* pfe_eth_high_poll
+ */
+static int pfe_eth_high_poll(struct napi_struct *napi, int budget)
+{
+	struct pfe_eth_priv_s *priv = container_of(napi, struct pfe_eth_priv_s,
+							high_napi);
+
+	netif_info(priv, intr, priv->ndev, "%s\n", __func__);
+
+	return pfe_eth_poll(priv, napi, 0, budget);
+}
+
+static const struct net_device_ops pfe_netdev_ops = {
+	.ndo_open = pfe_eth_open,
+	.ndo_stop = pfe_eth_close,
+	.ndo_start_xmit = pfe_eth_send_packet,
+	.ndo_select_queue = pfe_eth_select_queue,
+	.ndo_get_stats = pfe_eth_get_stats,
+	.ndo_set_mac_address = pfe_eth_set_mac_address,
+	.ndo_set_rx_mode = pfe_eth_set_multi,
+	.ndo_set_features = pfe_eth_set_features,
+	.ndo_validate_addr = eth_validate_addr,
+};
+
+/* pfe_eth_init_one
+ */
+static int pfe_eth_init_one(struct pfe *pfe, int id)
+{
+	struct net_device *ndev = NULL;
+	struct pfe_eth_priv_s *priv = NULL;
+	struct ls1012a_eth_platform_data *einfo;
+	struct ls1012a_mdio_platform_data *minfo;
+	struct ls1012a_pfe_platform_data *pfe_info;
+	int err;
+
+	/* Extract pltform data */
+	pfe_info = (struct ls1012a_pfe_platform_data *)
+					pfe->dev->platform_data;
+	if (!pfe_info) {
+		pr_err(
+			"%s: pfe missing additional platform data\n"
+			, __func__);
+		err = -ENODEV;
+		goto err0;
+	}
+
+	einfo = (struct ls1012a_eth_platform_data *)
+				pfe_info->ls1012a_eth_pdata;
+
+	/* einfo never be NULL, but no harm in having this check */
+	if (!einfo) {
+		pr_err(
+			"%s: pfe missing additional gemacs platform data\n"
+			, __func__);
+		err = -ENODEV;
+		goto err0;
+	}
+
+	minfo = (struct ls1012a_mdio_platform_data *)
+				pfe_info->ls1012a_mdio_pdata;
+
+	/* einfo never be NULL, but no harm in having this check */
+	if (!minfo) {
+		pr_err(
+			"%s: pfe missing additional mdios platform data\n",
+			 __func__);
+		err = -ENODEV;
+		goto err0;
+	}
+
+	/* Create an ethernet device instance */
+	ndev = alloc_etherdev_mq(sizeof(*priv), emac_txq_cnt);
+
+	if (!ndev) {
+		pr_err("%s: gemac %d device allocation failed\n",
+		       __func__, einfo[id].gem_id);
+		err = -ENOMEM;
+		goto err0;
+	}
+
+	priv = netdev_priv(ndev);
+	priv->ndev = ndev;
+	priv->id = einfo[id].gem_id;
+	priv->pfe = pfe;
+
+	SET_NETDEV_DEV(priv->ndev, priv->pfe->dev);
+
+	pfe->eth.eth_priv[id] = priv;
+
+	/* Set the info in the priv to the current info */
+	priv->einfo = &einfo[id];
+	priv->EMAC_baseaddr = cbus_emac_base[id];
+	priv->PHY_baseaddr = cbus_emac_base[0];
+	priv->GPI_baseaddr = cbus_gpi_base[id];
+
+#define HIF_GEMAC_TMUQ_BASE	6
+	priv->low_tmu_q	=  HIF_GEMAC_TMUQ_BASE + (id * 2);
+	priv->high_tmu_q	=  priv->low_tmu_q + 1;
+
+	spin_lock_init(&priv->lock);
+
+	pfe_eth_fast_tx_timeout_init(priv);
+
+	/* Copy the station address into the dev structure, */
+	memcpy(ndev->dev_addr, einfo[id].mac_addr, ETH_ALEN);
+
+	/* Initialize mdio */
+	if (minfo[id].enabled) {
+		err = pfe_eth_mdio_init(priv, &minfo[id]);
+		if (err) {
+			netdev_err(ndev, "%s: pfe_eth_mdio_init() failed\n",
+				   __func__);
+			goto err2;
+		}
+	}
+
+	ndev->mtu = 1500;
+
+	/* Set MTU limits */
+	ndev->min_mtu = ETH_MIN_MTU;
+	ndev->max_mtu = JUMBO_FRAME_SIZE;
+
+	/* supported features */
+	ndev->hw_features = NETIF_F_SG;
+
+	/*Enable after checksum offload is validated */
+	ndev->hw_features = NETIF_F_RXCSUM | NETIF_F_IP_CSUM |
+		NETIF_F_IPV6_CSUM | NETIF_F_SG;
+
+	/* enabled by default */
+	ndev->features = ndev->hw_features;
+
+	priv->usr_features = ndev->features;
+
+	ndev->netdev_ops = &pfe_netdev_ops;
+
+	ndev->ethtool_ops = &pfe_ethtool_ops;
+
+	/* Enable basic messages by default */
+	priv->msg_enable = NETIF_MSG_IFUP | NETIF_MSG_IFDOWN | NETIF_MSG_LINK |
+				NETIF_MSG_PROBE;
+
+	netif_napi_add(ndev, &priv->low_napi, pfe_eth_low_poll,
+		       HIF_RX_POLL_WEIGHT - 16);
+	netif_napi_add(ndev, &priv->high_napi, pfe_eth_high_poll,
+		       HIF_RX_POLL_WEIGHT - 16);
+	netif_napi_add(ndev, &priv->lro_napi, pfe_eth_lro_poll,
+		       HIF_RX_POLL_WEIGHT - 16);
+
+	err = register_netdev(ndev);
+
+	if (err) {
+		netdev_err(ndev, "register_netdev() failed\n");
+		goto err3;
+	}
+	device_init_wakeup(&ndev->dev, WAKE_MAGIC);
+
+	if (!(priv->einfo->phy_flags & GEMAC_NO_PHY)) {
+		err = pfe_phy_init(ndev);
+		if (err) {
+			netdev_err(ndev, "%s: pfe_phy_init() failed\n",
+				   __func__);
+			goto err4;
+		}
+	}
+
+	netif_carrier_on(ndev);
+
+	/* Create all the sysfs files */
+	if (pfe_eth_sysfs_init(ndev))
+		goto err4;
+
+	netif_info(priv, probe, ndev, "%s: created interface, baseaddr: %p\n",
+		   __func__, priv->EMAC_baseaddr);
+
+	return 0;
+err4:
+	unregister_netdev(ndev);
+err3:
+	pfe_eth_mdio_exit(priv->mii_bus);
+err2:
+	free_netdev(priv->ndev);
+err0:
+	return err;
+}
+
+/* pfe_eth_init
+ */
+int pfe_eth_init(struct pfe *pfe)
+{
+	int ii = 0;
+	int err;
+
+	pr_info("%s\n", __func__);
+
+	cbus_emac_base[0] = EMAC1_BASE_ADDR;
+	cbus_emac_base[1] = EMAC2_BASE_ADDR;
+
+	cbus_gpi_base[0] = EGPI1_BASE_ADDR;
+	cbus_gpi_base[1] = EGPI2_BASE_ADDR;
+
+	for (ii = 0; ii < NUM_GEMAC_SUPPORT; ii++) {
+		err = pfe_eth_init_one(pfe, ii);
+		if (err)
+			goto err0;
+	}
+
+	return 0;
+
+err0:
+	while (ii--)
+		pfe_eth_exit_one(pfe->eth.eth_priv[ii]);
+
+	/* Register three network devices in the kernel */
+	return err;
+}
+
+/* pfe_eth_exit_one
+ */
+static void pfe_eth_exit_one(struct pfe_eth_priv_s *priv)
+{
+	netif_info(priv, probe, priv->ndev, "%s\n", __func__);
+
+	pfe_eth_sysfs_exit(priv->ndev);
+
+	unregister_netdev(priv->ndev);
+
+	if (!(priv->einfo->phy_flags & GEMAC_NO_PHY))
+		pfe_phy_exit(priv->ndev);
+
+	if (priv->mii_bus)
+		pfe_eth_mdio_exit(priv->mii_bus);
+
+	free_netdev(priv->ndev);
+}
+
+/* pfe_eth_exit
+ */
+void pfe_eth_exit(struct pfe *pfe)
+{
+	int ii;
+
+	pr_info("%s\n", __func__);
+
+	for (ii = NUM_GEMAC_SUPPORT - 1; ii >= 0; ii--)
+		pfe_eth_exit_one(pfe->eth.eth_priv[ii]);
+}
diff --git a/drivers/staging/fsl_ppfe/pfe_eth.h b/drivers/staging/fsl_ppfe/pfe_eth.h
new file mode 100644
index 0000000..721bef3
--- /dev/null
+++ b/drivers/staging/fsl_ppfe/pfe_eth.h
@@ -0,0 +1,184 @@
+/*
+ * Copyright 2015-2016 Freescale Semiconductor, Inc.
+ * Copyright 2017 NXP
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program.  If not, see <http://www.gnu.org/licenses/>.
+ */
+
+#ifndef _PFE_ETH_H_
+#define _PFE_ETH_H_
+#include <linux/kernel.h>
+#include <linux/netdevice.h>
+#include <linux/etherdevice.h>
+#include <linux/ethtool.h>
+#include <linux/mii.h>
+#include <linux/phy.h>
+#include <linux/clk.h>
+#include <linux/interrupt.h>
+#include <linux/time.h>
+
+#define PFE_ETH_NAPI_STATS
+#define PFE_ETH_TX_STATS
+
+#define PFE_ETH_FRAGS_MAX (65536 / HIF_RX_PKT_MIN_SIZE)
+#define LRO_LEN_COUNT_MAX	32
+#define LRO_NB_COUNT_MAX	32
+
+#define PFE_PAUSE_FLAG_ENABLE		1
+#define PFE_PAUSE_FLAG_AUTONEG		2
+
+/* GEMAC configured by SW */
+/* GEMAC configured by phy lines (not for MII/GMII) */
+
+#define GEMAC_SW_FULL_DUPLEX    BIT(9)
+#define GEMAC_SW_SPEED_10M      (0 << 12)
+#define GEMAC_SW_SPEED_100M     BIT(12)
+#define GEMAC_SW_SPEED_1G       (2 << 12)
+
+#define GEMAC_NO_PHY            BIT(0)
+
+struct ls1012a_eth_platform_data {
+	/* device specific information */
+	u32 device_flags;
+	char name[16];
+
+	/* board specific information */
+	u32 mii_config;
+	u32 phy_flags;
+	u32 gem_id;
+	u32 bus_id;
+	u32 phy_id;
+	u32 mdio_muxval;
+	u8 mac_addr[ETH_ALEN];
+};
+
+struct ls1012a_mdio_platform_data {
+	int enabled;
+	int irq[32];
+	u32 phy_mask;
+	int mdc_div;
+};
+
+struct ls1012a_pfe_platform_data {
+	struct ls1012a_eth_platform_data ls1012a_eth_pdata[3];
+	struct ls1012a_mdio_platform_data ls1012a_mdio_pdata[3];
+};
+
+#define NUM_GEMAC_SUPPORT	2
+#define DRV_NAME		"pfe-eth"
+#define DRV_VERSION		"1.0"
+
+#define LS1012A_TX_FAST_RECOVERY_TIMEOUT_MS	3
+#define TX_POLL_TIMEOUT_MS	1000
+
+#define EMAC_TXQ_CNT	16
+#define EMAC_TXQ_DEPTH	(HIF_TX_DESC_NT)
+
+#define JUMBO_FRAME_SIZE	10258
+/*
+ * Client Tx queue threshold, for txQ flush condition.
+ * It must be smaller than the queue size (in case we ever change it in the
+ * future).
+ */
+#define HIF_CL_TX_FLUSH_MARK	32
+
+/*
+ * Max number of TX resources (HIF descriptors or skbs) that will be released
+ * in a single go during batch recycling.
+ * Should be lower than the flush mark so the SW can provide the HW with a
+ * continuous stream of packets instead of bursts.
+ */
+#define TX_FREE_MAX_COUNT 16
+#define EMAC_RXQ_CNT	3
+#define EMAC_RXQ_DEPTH	HIF_RX_DESC_NT
+/* make sure clients can receive a full burst of packets */
+#define EMAC_RMON_TXBYTES_POS	0x00
+#define EMAC_RMON_RXBYTES_POS	0x14
+
+#define EMAC_QUEUENUM_MASK      (emac_txq_cnt - 1)
+#define EMAC_MDIO_TIMEOUT	1000
+#define MAX_UC_SPEC_ADDR_REG 31
+
+struct pfe_eth_fast_timer {
+	int queuenum;
+	struct hrtimer timer;
+	void *base;
+};
+
+struct  pfe_eth_priv_s {
+	struct pfe		*pfe;
+	struct hif_client_s	client;
+	struct napi_struct	lro_napi;
+	struct napi_struct	low_napi;
+	struct napi_struct	high_napi;
+	int			low_tmu_q;
+	int			high_tmu_q;
+	struct net_device_stats stats;
+	struct net_device	*ndev;
+	int			id;
+	int			promisc;
+	unsigned int		msg_enable;
+	unsigned int		usr_features;
+
+	spinlock_t		lock; /* protect member variables */
+	unsigned int		event_status;
+	int			irq;
+	void			*EMAC_baseaddr;
+	/* This points to the EMAC base from where we access PHY */
+	void			*PHY_baseaddr;
+	void			*GPI_baseaddr;
+	/* PHY stuff */
+	struct phy_device	*phydev;
+	int			oldspeed;
+	int			oldduplex;
+	int			oldlink;
+	/* mdio info */
+	int			mdc_div;
+	struct mii_bus		*mii_bus;
+	struct clk		*gemtx_clk;
+	int			wol;
+	int			pause_flag;
+
+	int			default_priority;
+	struct pfe_eth_fast_timer fast_tx_timeout[EMAC_TXQ_CNT];
+
+	struct ls1012a_eth_platform_data *einfo;
+	struct sk_buff *skb_inflight[EMAC_RXQ_CNT + 6];
+
+#ifdef PFE_ETH_TX_STATS
+	unsigned int stop_queue_total[EMAC_TXQ_CNT];
+	unsigned int stop_queue_hif[EMAC_TXQ_CNT];
+	unsigned int stop_queue_hif_client[EMAC_TXQ_CNT];
+	unsigned int stop_queue_credit[EMAC_TXQ_CNT];
+	unsigned int clean_fail[EMAC_TXQ_CNT];
+	unsigned int was_stopped[EMAC_TXQ_CNT];
+#endif
+
+#ifdef PFE_ETH_NAPI_STATS
+	unsigned int napi_counters[NAPI_MAX_COUNT];
+#endif
+	unsigned int frags_inflight[EMAC_RXQ_CNT + 6];
+};
+
+struct pfe_eth {
+	struct pfe_eth_priv_s *eth_priv[3];
+};
+
+int pfe_eth_init(struct pfe *pfe);
+void pfe_eth_exit(struct pfe *pfe);
+int pfe_eth_suspend(struct net_device *dev);
+int pfe_eth_resume(struct net_device *dev);
+int pfe_eth_mdio_reset(struct mii_bus *bus);
+
+#endif /* _PFE_ETH_H_ */
diff --git a/drivers/staging/fsl_ppfe/pfe_firmware.c b/drivers/staging/fsl_ppfe/pfe_firmware.c
new file mode 100644
index 0000000..47462b9
--- /dev/null
+++ b/drivers/staging/fsl_ppfe/pfe_firmware.c
@@ -0,0 +1,314 @@
+/*
+ * Copyright 2015-2016 Freescale Semiconductor, Inc.
+ * Copyright 2017 NXP
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program.  If not, see <http://www.gnu.org/licenses/>.
+ */
+
+/*
+ * @file
+ * Contains all the functions to handle parsing and loading of PE firmware
+ * files.
+ */
+#include <linux/firmware.h>
+
+#include "pfe_mod.h"
+#include "pfe_firmware.h"
+#include "pfe/pfe.h"
+
+static struct elf32_shdr *get_elf_section_header(const struct firmware *fw,
+						 const char *section)
+{
+	struct elf32_hdr *elf_hdr = (struct elf32_hdr *)fw->data;
+	struct elf32_shdr *shdr;
+	struct elf32_shdr *shdr_shstr;
+	Elf32_Off e_shoff = be32_to_cpu(elf_hdr->e_shoff);
+	Elf32_Half e_shentsize = be16_to_cpu(elf_hdr->e_shentsize);
+	Elf32_Half e_shnum = be16_to_cpu(elf_hdr->e_shnum);
+	Elf32_Half e_shstrndx = be16_to_cpu(elf_hdr->e_shstrndx);
+	Elf32_Off shstr_offset;
+	Elf32_Word sh_name;
+	const char *name;
+	int i;
+
+	/* Section header strings */
+	shdr_shstr = (struct elf32_shdr *)(fw->data + e_shoff + e_shstrndx *
+					e_shentsize);
+	shstr_offset = be32_to_cpu(shdr_shstr->sh_offset);
+
+	for (i = 0; i < e_shnum; i++) {
+		shdr = (struct elf32_shdr *)(fw->data + e_shoff
+					     + i * e_shentsize);
+
+		sh_name = be32_to_cpu(shdr->sh_name);
+
+		name = (const char *)(fw->data + shstr_offset + sh_name);
+
+		if (!strcmp(name, section))
+			return shdr;
+	}
+
+	pr_err("%s: didn't find section %s\n", __func__, section);
+
+	return NULL;
+}
+
+#if defined(CFG_DIAGS)
+static int pfe_get_diags_info(const struct firmware *fw, struct pfe_diags_info
+				*diags_info)
+{
+	struct elf32_shdr *shdr;
+	unsigned long offset, size;
+
+	shdr = get_elf_section_header(fw, ".pfe_diags_str");
+	if (shdr) {
+		offset = be32_to_cpu(shdr->sh_offset);
+		size = be32_to_cpu(shdr->sh_size);
+		diags_info->diags_str_base = be32_to_cpu(shdr->sh_addr);
+		diags_info->diags_str_size = size;
+		diags_info->diags_str_array = kmalloc(size, GFP_KERNEL);
+		memcpy(diags_info->diags_str_array, fw->data + offset, size);
+
+		return 0;
+	} else {
+		return -1;
+	}
+}
+#endif
+
+static void pfe_check_version_info(const struct firmware *fw)
+{
+	/*static char *version = NULL;*/
+	static char *version;
+
+	struct elf32_shdr *shdr = get_elf_section_header(fw, ".version");
+
+	if (shdr) {
+		if (!version) {
+			/*
+			 * this is the first fw we load, use its version
+			 * string as reference (whatever it is)
+			 */
+			version = (char *)(fw->data +
+					be32_to_cpu(shdr->sh_offset));
+
+			pr_info("PFE binary version: %s\n", version);
+		} else {
+			/*
+			 * already have loaded at least one firmware, check
+			 * sequence can start now
+			 */
+			if (strcmp(version, (char *)(fw->data +
+				be32_to_cpu(shdr->sh_offset)))) {
+				pr_info(
+				"WARNING: PFE firmware binaries from incompatible version\n");
+			}
+		}
+	} else {
+		/*
+		 * version cannot be verified, a potential issue that should
+		 * be reported
+		 */
+		pr_info(
+			 "WARNING: PFE firmware binaries from incompatible version\n");
+	}
+}
+
+/* PFE elf firmware loader.
+ * Loads an elf firmware image into a list of PE's (specified using a bitmask)
+ *
+ * @param pe_mask	Mask of PE id's to load firmware to
+ * @param fw		Pointer to the firmware image
+ *
+ * @return		0 on success, a negative value on error
+ *
+ */
+int pfe_load_elf(int pe_mask, const struct firmware *fw, struct pfe *pfe)
+{
+	struct elf32_hdr *elf_hdr = (struct elf32_hdr *)fw->data;
+	Elf32_Half sections = be16_to_cpu(elf_hdr->e_shnum);
+	struct elf32_shdr *shdr = (struct elf32_shdr *)(fw->data +
+					be32_to_cpu(elf_hdr->e_shoff));
+	int id, section;
+	int rc;
+
+	pr_info("%s\n", __func__);
+
+	/* Some sanity checks */
+	if (strncmp(&elf_hdr->e_ident[EI_MAG0], ELFMAG, SELFMAG)) {
+		pr_err("%s: incorrect elf magic number\n", __func__);
+		return -EINVAL;
+	}
+
+	if (elf_hdr->e_ident[EI_CLASS] != ELFCLASS32) {
+		pr_err("%s: incorrect elf class(%x)\n", __func__,
+		       elf_hdr->e_ident[EI_CLASS]);
+		return -EINVAL;
+	}
+
+	if (elf_hdr->e_ident[EI_DATA] != ELFDATA2MSB) {
+		pr_err("%s: incorrect elf data(%x)\n", __func__,
+		       elf_hdr->e_ident[EI_DATA]);
+		return -EINVAL;
+	}
+
+	if (be16_to_cpu(elf_hdr->e_type) != ET_EXEC) {
+		pr_err("%s: incorrect elf file type(%x)\n", __func__,
+		       be16_to_cpu(elf_hdr->e_type));
+		return -EINVAL;
+	}
+
+	for (section = 0; section < sections; section++, shdr++) {
+		if (!(be32_to_cpu(shdr->sh_flags) & (SHF_WRITE | SHF_ALLOC |
+			SHF_EXECINSTR)))
+			continue;
+
+		for (id = 0; id < MAX_PE; id++)
+			if (pe_mask & (1 << id)) {
+				rc = pe_load_elf_section(id, fw->data, shdr,
+							 pfe->dev);
+				if (rc < 0)
+					goto err;
+			}
+	}
+
+	pfe_check_version_info(fw);
+
+	return 0;
+
+err:
+	return rc;
+}
+
+/* PFE firmware initialization.
+ * Loads different firmware files from filesystem.
+ * Initializes PE IMEM/DMEM and UTIL-PE DDR
+ * Initializes control path symbol addresses (by looking them up in the elf
+ * firmware files
+ * Takes PE's out of reset
+ *
+ * @return	0 on success, a negative value on error
+ *
+ */
+int pfe_firmware_init(struct pfe *pfe)
+{
+	const struct firmware *class_fw, *tmu_fw;
+	int rc = 0;
+#if !defined(CONFIG_FSL_PPFE_UTIL_DISABLED)
+	const char *util_fw_name;
+	const struct firmware *util_fw;
+#endif
+
+	pr_info("%s\n", __func__);
+
+	if (request_firmware(&class_fw, CLASS_FIRMWARE_FILENAME, pfe->dev)) {
+		pr_err("%s: request firmware %s failed\n", __func__,
+		       CLASS_FIRMWARE_FILENAME);
+		rc = -ETIMEDOUT;
+		goto err0;
+	}
+
+	if (request_firmware(&tmu_fw, TMU_FIRMWARE_FILENAME, pfe->dev)) {
+		pr_err("%s: request firmware %s failed\n", __func__,
+		       TMU_FIRMWARE_FILENAME);
+		rc = -ETIMEDOUT;
+		goto err1;
+}
+
+#if !defined(CONFIG_FSL_PPFE_UTIL_DISABLED)
+	util_fw_name = UTIL_FIRMWARE_FILENAME;
+
+	if (request_firmware(&util_fw, util_fw_name, pfe->dev)) {
+		pr_err("%s: request firmware %s failed\n", __func__,
+		       util_fw_name);
+		rc = -ETIMEDOUT;
+		goto err2;
+	}
+#endif
+	rc = pfe_load_elf(CLASS_MASK, class_fw, pfe);
+	if (rc < 0) {
+		pr_err("%s: class firmware load failed\n", __func__);
+		goto err3;
+	}
+
+#if defined(CFG_DIAGS)
+	rc = pfe_get_diags_info(class_fw, &pfe->diags.class_diags_info);
+	if (rc < 0) {
+		pr_warn(
+			"PFE diags won't be available for class PEs\n");
+		rc = 0;
+	}
+#endif
+
+	rc = pfe_load_elf(TMU_MASK, tmu_fw, pfe);
+	if (rc < 0) {
+		pr_err("%s: tmu firmware load failed\n", __func__);
+		goto err3;
+	}
+
+#if !defined(CONFIG_FSL_PPFE_UTIL_DISABLED)
+	rc = pfe_load_elf(UTIL_MASK, util_fw, pfe);
+	if (rc < 0) {
+		pr_err("%s: util firmware load failed\n", __func__);
+		goto err3;
+	}
+
+#if defined(CFG_DIAGS)
+	rc = pfe_get_diags_info(util_fw, &pfe->diags.util_diags_info);
+	if (rc < 0) {
+		pr_warn(
+			"PFE diags won't be available for util PE\n");
+		rc = 0;
+	}
+#endif
+
+	util_enable();
+#endif
+
+	tmu_enable(0xf);
+	class_enable();
+
+err3:
+#if !defined(CONFIG_FSL_PPFE_UTIL_DISABLED)
+	release_firmware(util_fw);
+
+err2:
+#endif
+	release_firmware(tmu_fw);
+
+err1:
+	release_firmware(class_fw);
+
+err0:
+	return rc;
+}
+
+/* PFE firmware cleanup
+ * Puts PE's in reset
+ *
+ *
+ */
+void pfe_firmware_exit(struct pfe *pfe)
+{
+	pr_info("%s\n", __func__);
+
+	if (pe_reset_all(&pfe->ctrl) != 0)
+		pr_err("Error: Failed to stop PEs, PFE reload may not work correctly\n");
+
+	class_disable();
+	tmu_disable(0xf);
+#if !defined(CONFIG_FSL_PPFE_UTIL_DISABLED)
+	util_disable();
+#endif
+}
diff --git a/drivers/staging/fsl_ppfe/pfe_firmware.h b/drivers/staging/fsl_ppfe/pfe_firmware.h
new file mode 100644
index 0000000..5ade848
--- /dev/null
+++ b/drivers/staging/fsl_ppfe/pfe_firmware.h
@@ -0,0 +1,32 @@
+/*
+ * Copyright 2015-2016 Freescale Semiconductor, Inc.
+ * Copyright 2017 NXP
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program.  If not, see <http://www.gnu.org/licenses/>.
+ */
+
+#ifndef _PFE_FIRMWARE_H_
+#define _PFE_FIRMWARE_H_
+
+#define CLASS_FIRMWARE_FILENAME		"ppfe_class_ls1012a.elf"
+#define TMU_FIRMWARE_FILENAME		"ppfe_tmu_ls1012a.elf"
+
+#define PFE_FW_CHECK_PASS		0
+#define PFE_FW_CHECK_FAIL		1
+#define NUM_PFE_FW				3
+
+int pfe_firmware_init(struct pfe *pfe);
+void pfe_firmware_exit(struct pfe *pfe);
+
+#endif /* _PFE_FIRMWARE_H_ */
diff --git a/drivers/staging/fsl_ppfe/pfe_hal.c b/drivers/staging/fsl_ppfe/pfe_hal.c
new file mode 100644
index 0000000..0915034
--- /dev/null
+++ b/drivers/staging/fsl_ppfe/pfe_hal.c
@@ -0,0 +1,1516 @@
+/*
+ * Copyright 2015-2016 Freescale Semiconductor, Inc.
+ * Copyright 2017 NXP
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program.  If not, see <http://www.gnu.org/licenses/>.
+ */
+
+#include "pfe_mod.h"
+#include "pfe/pfe.h"
+
+void *cbus_base_addr;
+void *ddr_base_addr;
+unsigned long ddr_phys_base_addr;
+unsigned int ddr_size;
+
+static struct pe_info pe[MAX_PE];
+
+/* Initializes the PFE library.
+ * Must be called before using any of the library functions.
+ *
+ * @param[in] cbus_base		CBUS virtual base address (as mapped in
+ * the host CPU address space)
+ * @param[in] ddr_base		PFE DDR range virtual base address (as
+ * mapped in the host CPU address space)
+ * @param[in] ddr_phys_base	PFE DDR range physical base address (as
+ * mapped in platform)
+ * @param[in] size		PFE DDR range size (as defined by the host
+ * software)
+ */
+void pfe_lib_init(void *cbus_base, void *ddr_base, unsigned long ddr_phys_base,
+		  unsigned int size)
+{
+	cbus_base_addr = cbus_base;
+	ddr_base_addr = ddr_base;
+	ddr_phys_base_addr = ddr_phys_base;
+	ddr_size = size;
+
+	pe[CLASS0_ID].dmem_base_addr = CLASS_DMEM_BASE_ADDR(0);
+	pe[CLASS0_ID].pmem_base_addr = CLASS_IMEM_BASE_ADDR(0);
+	pe[CLASS0_ID].pmem_size = CLASS_IMEM_SIZE;
+	pe[CLASS0_ID].mem_access_wdata = CLASS_MEM_ACCESS_WDATA;
+	pe[CLASS0_ID].mem_access_addr = CLASS_MEM_ACCESS_ADDR;
+	pe[CLASS0_ID].mem_access_rdata = CLASS_MEM_ACCESS_RDATA;
+
+	pe[CLASS1_ID].dmem_base_addr = CLASS_DMEM_BASE_ADDR(1);
+	pe[CLASS1_ID].pmem_base_addr = CLASS_IMEM_BASE_ADDR(1);
+	pe[CLASS1_ID].pmem_size = CLASS_IMEM_SIZE;
+	pe[CLASS1_ID].mem_access_wdata = CLASS_MEM_ACCESS_WDATA;
+	pe[CLASS1_ID].mem_access_addr = CLASS_MEM_ACCESS_ADDR;
+	pe[CLASS1_ID].mem_access_rdata = CLASS_MEM_ACCESS_RDATA;
+
+	pe[CLASS2_ID].dmem_base_addr = CLASS_DMEM_BASE_ADDR(2);
+	pe[CLASS2_ID].pmem_base_addr = CLASS_IMEM_BASE_ADDR(2);
+	pe[CLASS2_ID].pmem_size = CLASS_IMEM_SIZE;
+	pe[CLASS2_ID].mem_access_wdata = CLASS_MEM_ACCESS_WDATA;
+	pe[CLASS2_ID].mem_access_addr = CLASS_MEM_ACCESS_ADDR;
+	pe[CLASS2_ID].mem_access_rdata = CLASS_MEM_ACCESS_RDATA;
+
+	pe[CLASS3_ID].dmem_base_addr = CLASS_DMEM_BASE_ADDR(3);
+	pe[CLASS3_ID].pmem_base_addr = CLASS_IMEM_BASE_ADDR(3);
+	pe[CLASS3_ID].pmem_size = CLASS_IMEM_SIZE;
+	pe[CLASS3_ID].mem_access_wdata = CLASS_MEM_ACCESS_WDATA;
+	pe[CLASS3_ID].mem_access_addr = CLASS_MEM_ACCESS_ADDR;
+	pe[CLASS3_ID].mem_access_rdata = CLASS_MEM_ACCESS_RDATA;
+
+	pe[CLASS4_ID].dmem_base_addr = CLASS_DMEM_BASE_ADDR(4);
+	pe[CLASS4_ID].pmem_base_addr = CLASS_IMEM_BASE_ADDR(4);
+	pe[CLASS4_ID].pmem_size = CLASS_IMEM_SIZE;
+	pe[CLASS4_ID].mem_access_wdata = CLASS_MEM_ACCESS_WDATA;
+	pe[CLASS4_ID].mem_access_addr = CLASS_MEM_ACCESS_ADDR;
+	pe[CLASS4_ID].mem_access_rdata = CLASS_MEM_ACCESS_RDATA;
+
+	pe[CLASS5_ID].dmem_base_addr = CLASS_DMEM_BASE_ADDR(5);
+	pe[CLASS5_ID].pmem_base_addr = CLASS_IMEM_BASE_ADDR(5);
+	pe[CLASS5_ID].pmem_size = CLASS_IMEM_SIZE;
+	pe[CLASS5_ID].mem_access_wdata = CLASS_MEM_ACCESS_WDATA;
+	pe[CLASS5_ID].mem_access_addr = CLASS_MEM_ACCESS_ADDR;
+	pe[CLASS5_ID].mem_access_rdata = CLASS_MEM_ACCESS_RDATA;
+
+	pe[TMU0_ID].dmem_base_addr = TMU_DMEM_BASE_ADDR(0);
+	pe[TMU0_ID].pmem_base_addr = TMU_IMEM_BASE_ADDR(0);
+	pe[TMU0_ID].pmem_size = TMU_IMEM_SIZE;
+	pe[TMU0_ID].mem_access_wdata = TMU_MEM_ACCESS_WDATA;
+	pe[TMU0_ID].mem_access_addr = TMU_MEM_ACCESS_ADDR;
+	pe[TMU0_ID].mem_access_rdata = TMU_MEM_ACCESS_RDATA;
+
+	pe[TMU1_ID].dmem_base_addr = TMU_DMEM_BASE_ADDR(1);
+	pe[TMU1_ID].pmem_base_addr = TMU_IMEM_BASE_ADDR(1);
+	pe[TMU1_ID].pmem_size = TMU_IMEM_SIZE;
+	pe[TMU1_ID].mem_access_wdata = TMU_MEM_ACCESS_WDATA;
+	pe[TMU1_ID].mem_access_addr = TMU_MEM_ACCESS_ADDR;
+	pe[TMU1_ID].mem_access_rdata = TMU_MEM_ACCESS_RDATA;
+
+	pe[TMU3_ID].dmem_base_addr = TMU_DMEM_BASE_ADDR(3);
+	pe[TMU3_ID].pmem_base_addr = TMU_IMEM_BASE_ADDR(3);
+	pe[TMU3_ID].pmem_size = TMU_IMEM_SIZE;
+	pe[TMU3_ID].mem_access_wdata = TMU_MEM_ACCESS_WDATA;
+	pe[TMU3_ID].mem_access_addr = TMU_MEM_ACCESS_ADDR;
+	pe[TMU3_ID].mem_access_rdata = TMU_MEM_ACCESS_RDATA;
+
+#if !defined(CONFIG_FSL_PPFE_UTIL_DISABLED)
+	pe[UTIL_ID].dmem_base_addr = UTIL_DMEM_BASE_ADDR;
+	pe[UTIL_ID].mem_access_wdata = UTIL_MEM_ACCESS_WDATA;
+	pe[UTIL_ID].mem_access_addr = UTIL_MEM_ACCESS_ADDR;
+	pe[UTIL_ID].mem_access_rdata = UTIL_MEM_ACCESS_RDATA;
+#endif
+}
+
+/* Writes a buffer to PE internal memory from the host
+ * through indirect access registers.
+ *
+ * @param[in] id		PE identification (CLASS0_ID, ..., TMU0_ID,
+ * ..., UTIL_ID)
+ * @param[in] src		Buffer source address
+ * @param[in] mem_access_addr	DMEM destination address (must be 32bit
+ * aligned)
+ * @param[in] len		Number of bytes to copy
+ */
+void pe_mem_memcpy_to32(int id, u32 mem_access_addr, const void *src, unsigned
+int len)
+{
+	u32 offset = 0, val, addr;
+	unsigned int len32 = len >> 2;
+	int i;
+
+	addr = mem_access_addr | PE_MEM_ACCESS_WRITE |
+		PE_MEM_ACCESS_BYTE_ENABLE(0, 4);
+
+	for (i = 0; i < len32; i++, offset += 4, src += 4) {
+		val = *(u32 *)src;
+		writel(cpu_to_be32(val), pe[id].mem_access_wdata);
+		writel(addr + offset, pe[id].mem_access_addr);
+	}
+
+	len = (len & 0x3);
+	if (len) {
+		val = 0;
+
+		addr = (mem_access_addr | PE_MEM_ACCESS_WRITE |
+			PE_MEM_ACCESS_BYTE_ENABLE(0, len)) + offset;
+
+		for (i = 0; i < len; i++, src++)
+			val |= (*(u8 *)src) << (8 * i);
+
+		writel(cpu_to_be32(val), pe[id].mem_access_wdata);
+		writel(addr, pe[id].mem_access_addr);
+	}
+}
+
+/* Writes a buffer to PE internal data memory (DMEM) from the host
+ * through indirect access registers.
+ * @param[in] id		PE identification (CLASS0_ID, ..., TMU0_ID,
+ * ..., UTIL_ID)
+ * @param[in] src		Buffer source address
+ * @param[in] dst		DMEM destination address (must be 32bit
+ * aligned)
+ * @param[in] len		Number of bytes to copy
+ */
+void pe_dmem_memcpy_to32(int id, u32 dst, const void *src, unsigned int len)
+{
+	pe_mem_memcpy_to32(id, pe[id].dmem_base_addr | dst |
+				PE_MEM_ACCESS_DMEM, src, len);
+}
+
+/* Writes a buffer to PE internal program memory (PMEM) from the host
+ * through indirect access registers.
+ * @param[in] id		PE identification (CLASS0_ID, ..., TMU0_ID,
+ * ..., TMU3_ID)
+ * @param[in] src		Buffer source address
+ * @param[in] dst		PMEM destination address (must be 32bit
+ * aligned)
+ * @param[in] len		Number of bytes to copy
+ */
+void pe_pmem_memcpy_to32(int id, u32 dst, const void *src, unsigned int len)
+{
+	pe_mem_memcpy_to32(id, pe[id].pmem_base_addr | (dst & (pe[id].pmem_size
+				- 1)) | PE_MEM_ACCESS_IMEM, src, len);
+}
+
+/* Reads PE internal program memory (IMEM) from the host
+ * through indirect access registers.
+ * @param[in] id		PE identification (CLASS0_ID, ..., TMU0_ID,
+ * ..., TMU3_ID)
+ * @param[in] addr		PMEM read address (must be aligned on size)
+ * @param[in] size		Number of bytes to read (maximum 4, must not
+ * cross 32bit boundaries)
+ * @return			the data read (in PE endianness, i.e BE).
+ */
+u32 pe_pmem_read(int id, u32 addr, u8 size)
+{
+	u32 offset = addr & 0x3;
+	u32 mask = 0xffffffff >> ((4 - size) << 3);
+	u32 val;
+
+	addr = pe[id].pmem_base_addr | ((addr & ~0x3) & (pe[id].pmem_size - 1))
+		| PE_MEM_ACCESS_IMEM | PE_MEM_ACCESS_BYTE_ENABLE(offset, size);
+
+	writel(addr, pe[id].mem_access_addr);
+	val = be32_to_cpu(readl(pe[id].mem_access_rdata));
+
+	return (val >> (offset << 3)) & mask;
+}
+
+/* Writes PE internal data memory (DMEM) from the host
+ * through indirect access registers.
+ * @param[in] id		PE identification (CLASS0_ID, ..., TMU0_ID,
+ * ..., UTIL_ID)
+ * @param[in] addr		DMEM write address (must be aligned on size)
+ * @param[in] val		Value to write (in PE endianness, i.e BE)
+ * @param[in] size		Number of bytes to write (maximum 4, must not
+ * cross 32bit boundaries)
+ */
+void pe_dmem_write(int id, u32 val, u32 addr, u8 size)
+{
+	u32 offset = addr & 0x3;
+
+	addr = pe[id].dmem_base_addr | (addr & ~0x3) | PE_MEM_ACCESS_WRITE |
+		PE_MEM_ACCESS_DMEM | PE_MEM_ACCESS_BYTE_ENABLE(offset, size);
+
+	/* Indirect access interface is byte swapping data being written */
+	writel(cpu_to_be32(val << (offset << 3)), pe[id].mem_access_wdata);
+	writel(addr, pe[id].mem_access_addr);
+}
+
+/* Reads PE internal data memory (DMEM) from the host
+ * through indirect access registers.
+ * @param[in] id		PE identification (CLASS0_ID, ..., TMU0_ID,
+ * ..., UTIL_ID)
+ * @param[in] addr		DMEM read address (must be aligned on size)
+ * @param[in] size		Number of bytes to read (maximum 4, must not
+ * cross 32bit boundaries)
+ * @return			the data read (in PE endianness, i.e BE).
+ */
+u32 pe_dmem_read(int id, u32 addr, u8 size)
+{
+	u32 offset = addr & 0x3;
+	u32 mask = 0xffffffff >> ((4 - size) << 3);
+	u32 val;
+
+	addr = pe[id].dmem_base_addr | (addr & ~0x3) | PE_MEM_ACCESS_DMEM |
+			PE_MEM_ACCESS_BYTE_ENABLE(offset, size);
+
+	writel(addr, pe[id].mem_access_addr);
+
+	/* Indirect access interface is byte swapping data being read */
+	val = be32_to_cpu(readl(pe[id].mem_access_rdata));
+
+	return (val >> (offset << 3)) & mask;
+}
+
+/* This function is used to write to CLASS internal bus peripherals (ccu,
+ * pe-lem) from the host
+ * through indirect access registers.
+ * @param[in]	val	value to write
+ * @param[in]	addr	Address to write to (must be aligned on size)
+ * @param[in]	size	Number of bytes to write (1, 2 or 4)
+ *
+ */
+void class_bus_write(u32 val, u32 addr, u8 size)
+{
+	u32 offset = addr & 0x3;
+
+	writel((addr & CLASS_BUS_ACCESS_BASE_MASK), CLASS_BUS_ACCESS_BASE);
+
+	addr = (addr & ~CLASS_BUS_ACCESS_BASE_MASK) | PE_MEM_ACCESS_WRITE |
+			(size << 24);
+
+	writel(cpu_to_be32(val << (offset << 3)), CLASS_BUS_ACCESS_WDATA);
+	writel(addr, CLASS_BUS_ACCESS_ADDR);
+}
+
+/* Reads from CLASS internal bus peripherals (ccu, pe-lem) from the host
+ * through indirect access registers.
+ * @param[in] addr	Address to read from (must be aligned on size)
+ * @param[in] size	Number of bytes to read (1, 2 or 4)
+ * @return		the read data
+ *
+ */
+u32 class_bus_read(u32 addr, u8 size)
+{
+	u32 offset = addr & 0x3;
+	u32 mask = 0xffffffff >> ((4 - size) << 3);
+	u32 val;
+
+	writel((addr & CLASS_BUS_ACCESS_BASE_MASK), CLASS_BUS_ACCESS_BASE);
+
+	addr = (addr & ~CLASS_BUS_ACCESS_BASE_MASK) | (size << 24);
+
+	writel(addr, CLASS_BUS_ACCESS_ADDR);
+	val = be32_to_cpu(readl(CLASS_BUS_ACCESS_RDATA));
+
+	return (val >> (offset << 3)) & mask;
+}
+
+/* Writes data to the cluster memory (PE_LMEM)
+ * @param[in] dst	PE LMEM destination address (must be 32bit aligned)
+ * @param[in] src	Buffer source address
+ * @param[in] len	Number of bytes to copy
+ */
+void class_pe_lmem_memcpy_to32(u32 dst, const void *src, unsigned int len)
+{
+	u32 len32 = len >> 2;
+	int i;
+
+	for (i = 0; i < len32; i++, src += 4, dst += 4)
+		class_bus_write(*(u32 *)src, dst, 4);
+
+	if (len & 0x2) {
+		class_bus_write(*(u16 *)src, dst, 2);
+		src += 2;
+		dst += 2;
+	}
+
+	if (len & 0x1) {
+		class_bus_write(*(u8 *)src, dst, 1);
+		src++;
+		dst++;
+	}
+}
+
+/* Writes value to the cluster memory (PE_LMEM)
+ * @param[in] dst	PE LMEM destination address (must be 32bit aligned)
+ * @param[in] val	Value to write
+ * @param[in] len	Number of bytes to write
+ */
+void class_pe_lmem_memset(u32 dst, int val, unsigned int len)
+{
+	u32 len32 = len >> 2;
+	int i;
+
+	val = val | (val << 8) | (val << 16) | (val << 24);
+
+	for (i = 0; i < len32; i++, dst += 4)
+		class_bus_write(val, dst, 4);
+
+	if (len & 0x2) {
+		class_bus_write(val, dst, 2);
+		dst += 2;
+	}
+
+	if (len & 0x1) {
+		class_bus_write(val, dst, 1);
+		dst++;
+	}
+}
+
+#if !defined(CONFIG_FSL_PPFE_UTIL_DISABLED)
+
+/* Writes UTIL program memory (DDR) from the host.
+ *
+ * @param[in] addr	Address to write (virtual, must be aligned on size)
+ * @param[in] val		Value to write (in PE endianness, i.e BE)
+ * @param[in] size		Number of bytes to write (2 or 4)
+ */
+static void util_pmem_write(u32 val, void *addr, u8 size)
+{
+	void *addr64 = (void *)((unsigned long)addr & ~0x7);
+	unsigned long off = 8 - ((unsigned long)addr & 0x7) - size;
+
+	/*
+	 * IMEM should  be loaded as a 64bit swapped value in a 64bit aligned
+	 * location
+	 */
+	if (size == 4)
+		writel(be32_to_cpu(val), addr64 + off);
+	else
+		writew(be16_to_cpu((u16)val), addr64 + off);
+}
+
+/* Writes a buffer to UTIL program memory (DDR) from the host.
+ *
+ * @param[in] dst	Address to write (virtual, must be at least 16bit
+ * aligned)
+ * @param[in] src	Buffer to write (in PE endianness, i.e BE, must have
+ * same alignment as dst)
+ * @param[in] len	Number of bytes to write (must be at least 16bit
+ * aligned)
+ */
+static void util_pmem_memcpy(void *dst, const void *src, unsigned int len)
+{
+	unsigned int len32;
+	int i;
+
+	if ((unsigned long)src & 0x2) {
+		util_pmem_write(*(u16 *)src, dst, 2);
+		src += 2;
+		dst += 2;
+		len -= 2;
+	}
+
+	len32 = len >> 2;
+
+	for (i = 0; i < len32; i++, dst += 4, src += 4)
+		util_pmem_write(*(u32 *)src, dst, 4);
+
+	if (len & 0x2)
+		util_pmem_write(*(u16 *)src, dst, len & 0x2);
+}
+#endif
+
+/* Loads an elf section into pmem
+ * Code needs to be at least 16bit aligned and only PROGBITS sections are
+ * supported
+ *
+ * @param[in] id	PE identification (CLASS0_ID, ..., TMU0_ID, ...,
+ * TMU3_ID)
+ * @param[in] data	pointer to the elf firmware
+ * @param[in] shdr	pointer to the elf section header
+ *
+ */
+static int pe_load_pmem_section(int id, const void *data,
+				struct elf32_shdr *shdr)
+{
+	u32 offset = be32_to_cpu(shdr->sh_offset);
+	u32 addr = be32_to_cpu(shdr->sh_addr);
+	u32 size = be32_to_cpu(shdr->sh_size);
+	u32 type = be32_to_cpu(shdr->sh_type);
+
+#if !defined(CONFIG_FSL_PPFE_UTIL_DISABLED)
+	if (id == UTIL_ID) {
+		pr_err("%s: unsupported pmem section for UTIL\n",
+		       __func__);
+		return -EINVAL;
+	}
+#endif
+
+	if (((unsigned long)(data + offset) & 0x3) != (addr & 0x3)) {
+		pr_err(
+			"%s: load address(%x) and elf file address(%lx) don't have the same alignment\n"
+			, __func__, addr, (unsigned long)data + offset);
+
+		return -EINVAL;
+	}
+
+	if (addr & 0x1) {
+		pr_err("%s: load address(%x) is not 16bit aligned\n",
+		       __func__, addr);
+		return -EINVAL;
+	}
+
+	if (size & 0x1) {
+		pr_err("%s: load size(%x) is not 16bit aligned\n",
+		       __func__, size);
+		return -EINVAL;
+	}
+
+	switch (type) {
+	case SHT_PROGBITS:
+		pe_pmem_memcpy_to32(id, addr, data + offset, size);
+
+		break;
+
+	default:
+		pr_err("%s: unsupported section type(%x)\n", __func__,
+		       type);
+		return -EINVAL;
+	}
+
+	return 0;
+}
+
+/* Loads an elf section into dmem
+ * Data needs to be at least 32bit aligned, NOBITS sections are correctly
+ * initialized to 0
+ *
+ * @param[in] id		PE identification (CLASS0_ID, ..., TMU0_ID,
+ * ..., UTIL_ID)
+ * @param[in] data		pointer to the elf firmware
+ * @param[in] shdr		pointer to the elf section header
+ *
+ */
+static int pe_load_dmem_section(int id, const void *data,
+				struct elf32_shdr *shdr)
+{
+	u32 offset = be32_to_cpu(shdr->sh_offset);
+	u32 addr = be32_to_cpu(shdr->sh_addr);
+	u32 size = be32_to_cpu(shdr->sh_size);
+	u32 type = be32_to_cpu(shdr->sh_type);
+	u32 size32 = size >> 2;
+	int i;
+
+	if (((unsigned long)(data + offset) & 0x3) != (addr & 0x3)) {
+		pr_err(
+			"%s: load address(%x) and elf file address(%lx) don't have the same alignment\n",
+			__func__, addr, (unsigned long)data + offset);
+
+		return -EINVAL;
+	}
+
+	if (addr & 0x3) {
+		pr_err("%s: load address(%x) is not 32bit aligned\n",
+		       __func__, addr);
+		return -EINVAL;
+	}
+
+	switch (type) {
+	case SHT_PROGBITS:
+		pe_dmem_memcpy_to32(id, addr, data + offset, size);
+		break;
+
+	case SHT_NOBITS:
+		for (i = 0; i < size32; i++, addr += 4)
+			pe_dmem_write(id, 0, addr, 4);
+
+		if (size & 0x3)
+			pe_dmem_write(id, 0, addr, size & 0x3);
+
+		break;
+
+	default:
+		pr_err("%s: unsupported section type(%x)\n", __func__,
+		       type);
+		return -EINVAL;
+	}
+
+	return 0;
+}
+
+/* Loads an elf section into DDR
+ * Data needs to be at least 32bit aligned, NOBITS sections are correctly
+ * initialized to 0
+ *
+ * @param[in] id		PE identification (CLASS0_ID, ..., TMU0_ID,
+ * ..., UTIL_ID)
+ * @param[in] data		pointer to the elf firmware
+ * @param[in] shdr		pointer to the elf section header
+ *
+ */
+static int pe_load_ddr_section(int id, const void *data,
+			       struct elf32_shdr *shdr,
+			       struct device *dev) {
+	u32 offset = be32_to_cpu(shdr->sh_offset);
+	u32 addr = be32_to_cpu(shdr->sh_addr);
+	u32 size = be32_to_cpu(shdr->sh_size);
+	u32 type = be32_to_cpu(shdr->sh_type);
+	u32 flags = be32_to_cpu(shdr->sh_flags);
+
+	switch (type) {
+	case SHT_PROGBITS:
+		if (flags & SHF_EXECINSTR) {
+			if (id <= CLASS_MAX_ID) {
+				/* DO the loading only once in DDR */
+				if (id == CLASS0_ID) {
+					pr_err(
+						"%s: load address(%x) and elf file address(%lx) rcvd\n",
+						__func__, addr,
+						(unsigned long)data + offset);
+					if (((unsigned long)(data + offset)
+						& 0x3) != (addr & 0x3)) {
+						pr_err(
+							"%s: load address(%x) and elf file address(%lx) don't have the same alignment\n"
+							, __func__, addr,
+						(unsigned long)data + offset);
+
+						return -EINVAL;
+					}
+
+					if (addr & 0x1) {
+						pr_err(
+							"%s: load address(%x) is not 16bit aligned\n"
+							, __func__, addr);
+						return -EINVAL;
+					}
+
+					if (size & 0x1) {
+						pr_err(
+							"%s: load length(%x) is not 16bit aligned\n"
+							, __func__, size);
+						return -EINVAL;
+					}
+					memcpy(DDR_PHYS_TO_VIRT(
+						DDR_PFE_TO_PHYS(addr)),
+						data + offset, size);
+				}
+#if !defined(CONFIG_FSL_PPFE_UTIL_DISABLED)
+			} else if (id == UTIL_ID) {
+				if (((unsigned long)(data + offset) & 0x3)
+					!= (addr & 0x3)) {
+					pr_err(
+						"%s: load address(%x) and elf file address(%lx) don't have the same alignment\n"
+						, __func__, addr,
+						(unsigned long)data + offset);
+
+					return -EINVAL;
+				}
+
+				if (addr & 0x1) {
+					pr_err(
+						"%s: load address(%x) is not 16bit aligned\n"
+						, __func__, addr);
+					return -EINVAL;
+				}
+
+				if (size & 0x1) {
+					pr_err(
+						"%s: load length(%x) is not 16bit aligned\n"
+						, __func__, size);
+					return -EINVAL;
+				}
+
+				util_pmem_memcpy(DDR_PHYS_TO_VIRT(
+							DDR_PFE_TO_PHYS(addr)),
+							data + offset, size);
+			}
+#endif
+			} else {
+				pr_err(
+					"%s: unsupported ddr section type(%x) for PE(%d)\n"
+						, __func__, type, id);
+				return -EINVAL;
+			}
+
+		} else {
+			memcpy(DDR_PHYS_TO_VIRT(DDR_PFE_TO_PHYS(addr)), data
+				+ offset, size);
+		}
+
+		break;
+
+	case SHT_NOBITS:
+		memset(DDR_PHYS_TO_VIRT(DDR_PFE_TO_PHYS(addr)), 0, size);
+
+		break;
+
+	default:
+		pr_err("%s: unsupported section type(%x)\n", __func__,
+		       type);
+		return -EINVAL;
+	}
+
+	return 0;
+}
+
+/* Loads an elf section into pe lmem
+ * Data needs to be at least 32bit aligned, NOBITS sections are correctly
+ * initialized to 0
+ *
+ * @param[in] id		PE identification (CLASS0_ID,..., CLASS5_ID)
+ * @param[in] data		pointer to the elf firmware
+ * @param[in] shdr		pointer to the elf section header
+ *
+ */
+static int pe_load_pe_lmem_section(int id, const void *data,
+				   struct elf32_shdr *shdr)
+{
+	u32 offset = be32_to_cpu(shdr->sh_offset);
+	u32 addr = be32_to_cpu(shdr->sh_addr);
+	u32 size = be32_to_cpu(shdr->sh_size);
+	u32 type = be32_to_cpu(shdr->sh_type);
+
+	if (id > CLASS_MAX_ID) {
+		pr_err(
+			"%s: unsupported pe-lmem section type(%x) for PE(%d)\n",
+			 __func__, type, id);
+		return -EINVAL;
+	}
+
+	if (((unsigned long)(data + offset) & 0x3) != (addr & 0x3)) {
+		pr_err(
+			"%s: load address(%x) and elf file address(%lx) don't have the same alignment\n",
+			__func__, addr, (unsigned long)data + offset);
+
+		return -EINVAL;
+	}
+
+	if (addr & 0x3) {
+		pr_err("%s: load address(%x) is not 32bit aligned\n",
+		       __func__, addr);
+		return -EINVAL;
+	}
+
+	switch (type) {
+	case SHT_PROGBITS:
+		class_pe_lmem_memcpy_to32(addr, data + offset, size);
+		break;
+
+	case SHT_NOBITS:
+		class_pe_lmem_memset(addr, 0, size);
+		break;
+
+	default:
+		pr_err("%s: unsupported section type(%x)\n", __func__,
+		       type);
+		return -EINVAL;
+	}
+
+	return 0;
+}
+
+/* Loads an elf section into a PE
+ * For now only supports loading a section to dmem (all PE's), pmem (class and
+ * tmu PE's),
+ * DDDR (util PE code)
+ *
+ * @param[in] id		PE identification (CLASS0_ID, ..., TMU0_ID,
+ * ..., UTIL_ID)
+ * @param[in] data		pointer to the elf firmware
+ * @param[in] shdr		pointer to the elf section header
+ *
+ */
+int pe_load_elf_section(int id, const void *data, struct elf32_shdr *shdr,
+			struct device *dev) {
+	u32 addr = be32_to_cpu(shdr->sh_addr);
+	u32 size = be32_to_cpu(shdr->sh_size);
+
+	if (IS_DMEM(addr, size))
+		return pe_load_dmem_section(id, data, shdr);
+	else if (IS_PMEM(addr, size))
+		return pe_load_pmem_section(id, data, shdr);
+	else if (IS_PFE_LMEM(addr, size))
+		return 0;
+	else if (IS_PHYS_DDR(addr, size))
+		return pe_load_ddr_section(id, data, shdr, dev);
+	else if (IS_PE_LMEM(addr, size))
+		return pe_load_pe_lmem_section(id, data, shdr);
+
+	pr_err("%s: unsupported memory range(%x)\n", __func__,
+	       addr);
+	return 0;
+}
+
+/**************************** BMU ***************************/
+
+/* Initializes a BMU block.
+ * @param[in] base	BMU block base address
+ * @param[in] cfg	BMU configuration
+ */
+void bmu_init(void *base, struct BMU_CFG *cfg)
+{
+	bmu_disable(base);
+
+	bmu_set_config(base, cfg);
+
+	bmu_reset(base);
+}
+
+/* Resets a BMU block.
+ * @param[in] base	BMU block base address
+ */
+void bmu_reset(void *base)
+{
+	writel(CORE_SW_RESET, base + BMU_CTRL);
+
+	/* Wait for self clear */
+	while (readl(base + BMU_CTRL) & CORE_SW_RESET)
+		;
+}
+
+/* Enabled a BMU block.
+ * @param[in] base	BMU block base address
+ */
+void bmu_enable(void *base)
+{
+	writel(CORE_ENABLE, base + BMU_CTRL);
+}
+
+/* Disables a BMU block.
+ * @param[in] base	BMU block base address
+ */
+void bmu_disable(void *base)
+{
+	writel(CORE_DISABLE, base + BMU_CTRL);
+}
+
+/* Sets the configuration of a BMU block.
+ * @param[in] base	BMU block base address
+ * @param[in] cfg	BMU configuration
+ */
+void bmu_set_config(void *base, struct BMU_CFG *cfg)
+{
+	writel(cfg->baseaddr, base + BMU_UCAST_BASE_ADDR);
+	writel(cfg->count & 0xffff, base + BMU_UCAST_CONFIG);
+	writel(cfg->size & 0xffff, base + BMU_BUF_SIZE);
+
+	/* Interrupts are never used */
+	writel(cfg->low_watermark, base + BMU_LOW_WATERMARK);
+	writel(cfg->high_watermark, base + BMU_HIGH_WATERMARK);
+	writel(0x0, base + BMU_INT_ENABLE);
+}
+
+/**************************** MTIP GEMAC ***************************/
+
+/* Enable Rx Checksum Engine. With this enabled, Frame with bad IP,
+ *   TCP or UDP checksums are discarded
+ *
+ * @param[in] base	GEMAC base address.
+ */
+void gemac_enable_rx_checksum_offload(void *base)
+{
+	/*Do not find configuration to do this */
+}
+
+/* Disable Rx Checksum Engine.
+ *
+ * @param[in] base	GEMAC base address.
+ */
+void gemac_disable_rx_checksum_offload(void *base)
+{
+	/*Do not find configuration to do this */
+}
+
+/* GEMAC set speed.
+ * @param[in] base	GEMAC base address
+ * @param[in] speed	GEMAC speed (10, 100 or 1000 Mbps)
+ */
+void gemac_set_speed(void *base, enum mac_speed gem_speed)
+{
+	u32 ecr = readl(base + EMAC_ECNTRL_REG) & ~EMAC_ECNTRL_SPEED;
+	u32 rcr = readl(base + EMAC_RCNTRL_REG) & ~EMAC_RCNTRL_RMII_10T;
+
+	switch (gem_speed) {
+	case SPEED_10M:
+			rcr |= EMAC_RCNTRL_RMII_10T;
+			break;
+
+	case SPEED_1000M:
+			ecr |= EMAC_ECNTRL_SPEED;
+			break;
+
+	case SPEED_100M:
+	default:
+			/*It is in 100M mode */
+			break;
+	}
+	writel(ecr, (base + EMAC_ECNTRL_REG));
+	writel(rcr, (base + EMAC_RCNTRL_REG));
+}
+
+/* GEMAC set duplex.
+ * @param[in] base	GEMAC base address
+ * @param[in] duplex	GEMAC duplex mode (Full, Half)
+ */
+void gemac_set_duplex(void *base, int duplex)
+{
+	if (duplex == DUPLEX_HALF) {
+		writel(readl(base + EMAC_TCNTRL_REG) & ~EMAC_TCNTRL_FDEN, base
+			+ EMAC_TCNTRL_REG);
+		writel(readl(base + EMAC_RCNTRL_REG) | EMAC_RCNTRL_DRT, (base
+			+ EMAC_RCNTRL_REG));
+	} else{
+		writel(readl(base + EMAC_TCNTRL_REG) | EMAC_TCNTRL_FDEN, base
+			+ EMAC_TCNTRL_REG);
+		writel(readl(base + EMAC_RCNTRL_REG) & ~EMAC_RCNTRL_DRT, (base
+			+ EMAC_RCNTRL_REG));
+	}
+}
+
+/* GEMAC set mode.
+ * @param[in] base	GEMAC base address
+ * @param[in] mode	GEMAC operation mode (MII, RMII, RGMII, SGMII)
+ */
+void gemac_set_mode(void *base, int mode)
+{
+	u32 val = readl(base + EMAC_RCNTRL_REG);
+
+	/*Remove loopbank*/
+	val &= ~EMAC_RCNTRL_LOOP;
+
+	/*Enable flow control and MII mode*/
+	val |= (EMAC_RCNTRL_FCE | EMAC_RCNTRL_MII_MODE);
+
+	writel(val, base + EMAC_RCNTRL_REG);
+}
+
+/* GEMAC enable function.
+ * @param[in] base	GEMAC base address
+ */
+void gemac_enable(void *base)
+{
+	writel(readl(base + EMAC_ECNTRL_REG) | EMAC_ECNTRL_ETHER_EN, base +
+		EMAC_ECNTRL_REG);
+}
+
+/* GEMAC disable function.
+ * @param[in] base	GEMAC base address
+ */
+void gemac_disable(void *base)
+{
+	writel(readl(base + EMAC_ECNTRL_REG) & ~EMAC_ECNTRL_ETHER_EN, base +
+		EMAC_ECNTRL_REG);
+}
+
+/* GEMAC TX disable function.
+ * @param[in] base	GEMAC base address
+ */
+void gemac_tx_disable(void *base)
+{
+	writel(readl(base + EMAC_TCNTRL_REG) | EMAC_TCNTRL_GTS, base +
+		EMAC_TCNTRL_REG);
+}
+
+void gemac_tx_enable(void *base)
+{
+	writel(readl(base + EMAC_TCNTRL_REG) & ~EMAC_TCNTRL_GTS, base +
+			EMAC_TCNTRL_REG);
+}
+
+/* Sets the hash register of the MAC.
+ * This register is used for matching unicast and multicast frames.
+ *
+ * @param[in] base	GEMAC base address.
+ * @param[in] hash	64-bit hash to be configured.
+ */
+void gemac_set_hash(void *base, struct pfe_mac_addr *hash)
+{
+	writel(hash->bottom,  base + EMAC_GALR);
+	writel(hash->top, base + EMAC_GAUR);
+}
+
+void gemac_set_laddrN(void *base, struct pfe_mac_addr *address,
+		      unsigned int entry_index)
+{
+	if ((entry_index < 1) || (entry_index > EMAC_SPEC_ADDR_MAX))
+		return;
+
+	entry_index = entry_index - 1;
+	if (entry_index < 1) {
+		writel(htonl(address->bottom),  base + EMAC_PHY_ADDR_LOW);
+		writel((htonl(address->top) | 0x8808), base +
+			EMAC_PHY_ADDR_HIGH);
+	} else {
+		writel(htonl(address->bottom),  base + ((entry_index - 1) * 8)
+			+ EMAC_SMAC_0_0);
+		writel((htonl(address->top) | 0x8808), base + ((entry_index -
+			1) * 8) + EMAC_SMAC_0_1);
+	}
+}
+
+void gemac_clear_laddrN(void *base, unsigned int entry_index)
+{
+	if ((entry_index < 1) || (entry_index > EMAC_SPEC_ADDR_MAX))
+		return;
+
+	entry_index = entry_index - 1;
+	if (entry_index < 1) {
+		writel(0, base + EMAC_PHY_ADDR_LOW);
+		writel(0, base + EMAC_PHY_ADDR_HIGH);
+	} else {
+		writel(0,  base + ((entry_index - 1) * 8) + EMAC_SMAC_0_0);
+		writel(0, base + ((entry_index - 1) * 8) + EMAC_SMAC_0_1);
+	}
+}
+
+/* Set the loopback mode of the MAC.  This can be either no loopback for
+ * normal operation, local loopback through MAC internal loopback module or PHY
+ *   loopback for external loopback through a PHY.  This asserts the external
+ * loop pin.
+ *
+ * @param[in] base	GEMAC base address.
+ * @param[in] gem_loop	Loopback mode to be enabled. LB_LOCAL - MAC
+ * Loopback,
+ *			LB_EXT - PHY Loopback.
+ */
+void gemac_set_loop(void *base, enum mac_loop gem_loop)
+{
+	pr_info("%s()\n", __func__);
+	writel(readl(base + EMAC_RCNTRL_REG) | EMAC_RCNTRL_LOOP, (base +
+		EMAC_RCNTRL_REG));
+}
+
+/* GEMAC allow frames
+ * @param[in] base	GEMAC base address
+ */
+void gemac_enable_copy_all(void *base)
+{
+	writel(readl(base + EMAC_RCNTRL_REG) | EMAC_RCNTRL_PROM, (base +
+		EMAC_RCNTRL_REG));
+}
+
+/* GEMAC do not allow frames
+ * @param[in] base	GEMAC base address
+ */
+void gemac_disable_copy_all(void *base)
+{
+	writel(readl(base + EMAC_RCNTRL_REG) & ~EMAC_RCNTRL_PROM, (base +
+		EMAC_RCNTRL_REG));
+}
+
+/* GEMAC allow broadcast function.
+ * @param[in] base	GEMAC base address
+ */
+void gemac_allow_broadcast(void *base)
+{
+	writel(readl(base + EMAC_RCNTRL_REG) & ~EMAC_RCNTRL_BC_REJ, base +
+		EMAC_RCNTRL_REG);
+}
+
+/* GEMAC no broadcast function.
+ * @param[in] base	GEMAC base address
+ */
+void gemac_no_broadcast(void *base)
+{
+	writel(readl(base + EMAC_RCNTRL_REG) | EMAC_RCNTRL_BC_REJ, base +
+		EMAC_RCNTRL_REG);
+}
+
+/* GEMAC enable 1536 rx function.
+ * @param[in]	base	GEMAC base address
+ */
+void gemac_enable_1536_rx(void *base)
+{
+	/* Set 1536 as Maximum frame length */
+	writel(readl(base + EMAC_RCNTRL_REG) | (1536 << 16), base +
+		EMAC_RCNTRL_REG);
+}
+
+/* GEMAC enable jumbo function.
+ * @param[in]	base	GEMAC base address
+ */
+void gemac_enable_rx_jmb(void *base)
+{
+	writel(readl(base + EMAC_RCNTRL_REG) | (JUMBO_FRAME_SIZE << 16), base
+		+ EMAC_RCNTRL_REG);
+}
+
+/* GEMAC enable stacked vlan function.
+ * @param[in]	base	GEMAC base address
+ */
+void gemac_enable_stacked_vlan(void *base)
+{
+	/* MTIP doesn't support stacked vlan */
+}
+
+/* GEMAC enable pause rx function.
+ * @param[in] base	GEMAC base address
+ */
+void gemac_enable_pause_rx(void *base)
+{
+	writel(readl(base + EMAC_RCNTRL_REG) | EMAC_RCNTRL_FCE,
+	       base + EMAC_RCNTRL_REG);
+}
+
+/* GEMAC disable pause rx function.
+ * @param[in] base	GEMAC base address
+ */
+void gemac_disable_pause_rx(void *base)
+{
+	writel(readl(base + EMAC_RCNTRL_REG) & ~EMAC_RCNTRL_FCE,
+	       base + EMAC_RCNTRL_REG);
+}
+
+/* GEMAC enable pause tx function.
+ * @param[in] base GEMAC base address
+ */
+void gemac_enable_pause_tx(void *base)
+{
+	writel(EMAC_RX_SECTION_EMPTY_V, base + EMAC_RX_SECTION_EMPTY);
+}
+
+/* GEMAC disable pause tx function.
+ * @param[in] base GEMAC base address
+ */
+void gemac_disable_pause_tx(void *base)
+{
+	writel(0x0, base + EMAC_RX_SECTION_EMPTY);
+}
+
+/* GEMAC wol configuration
+ * @param[in] base	GEMAC base address
+ * @param[in] wol_conf	WoL register configuration
+ */
+void gemac_set_wol(void *base, u32 wol_conf)
+{
+	u32  val = readl(base + EMAC_ECNTRL_REG);
+
+	if (wol_conf)
+		val |= (EMAC_ECNTRL_MAGIC_ENA | EMAC_ECNTRL_SLEEP);
+	else
+		val &= ~(EMAC_ECNTRL_MAGIC_ENA | EMAC_ECNTRL_SLEEP);
+	writel(val, base + EMAC_ECNTRL_REG);
+}
+
+/* Sets Gemac bus width to 64bit
+ * @param[in] base       GEMAC base address
+ * @param[in] width     gemac bus width to be set possible values are 32/64/128
+ */
+void gemac_set_bus_width(void *base, int width)
+{
+}
+
+/* Sets Gemac configuration.
+ * @param[in] base	GEMAC base address
+ * @param[in] cfg	GEMAC configuration
+ */
+void gemac_set_config(void *base, struct gemac_cfg *cfg)
+{
+	/*GEMAC config taken from VLSI */
+	writel(0x00000004, base + EMAC_TFWR_STR_FWD);
+	writel(0x00000005, base + EMAC_RX_SECTION_FULL);
+	writel(0x00003fff, base + EMAC_TRUNC_FL);
+	writel(0x00000030, base + EMAC_TX_SECTION_EMPTY);
+	writel(0x00000000, base + EMAC_MIB_CTRL_STS_REG);
+
+	gemac_set_mode(base, cfg->mode);
+
+	gemac_set_speed(base, cfg->speed);
+
+	gemac_set_duplex(base, cfg->duplex);
+}
+
+/**************************** GPI ***************************/
+
+/* Initializes a GPI block.
+ * @param[in] base	GPI base address
+ * @param[in] cfg	GPI configuration
+ */
+void gpi_init(void *base, struct gpi_cfg *cfg)
+{
+	gpi_reset(base);
+
+	gpi_disable(base);
+
+	gpi_set_config(base, cfg);
+}
+
+/* Resets a GPI block.
+ * @param[in] base	GPI base address
+ */
+void gpi_reset(void *base)
+{
+	writel(CORE_SW_RESET, base + GPI_CTRL);
+}
+
+/* Enables a GPI block.
+ * @param[in] base	GPI base address
+ */
+void gpi_enable(void *base)
+{
+	writel(CORE_ENABLE, base + GPI_CTRL);
+}
+
+/* Disables a GPI block.
+ * @param[in] base	GPI base address
+ */
+void gpi_disable(void *base)
+{
+	writel(CORE_DISABLE, base + GPI_CTRL);
+}
+
+/* Sets the configuration of a GPI block.
+ * @param[in] base	GPI base address
+ * @param[in] cfg	GPI configuration
+ */
+void gpi_set_config(void *base, struct gpi_cfg *cfg)
+{
+	writel(CBUS_VIRT_TO_PFE(BMU1_BASE_ADDR + BMU_ALLOC_CTRL),	base
+		+ GPI_LMEM_ALLOC_ADDR);
+	writel(CBUS_VIRT_TO_PFE(BMU1_BASE_ADDR + BMU_FREE_CTRL),	base
+		+ GPI_LMEM_FREE_ADDR);
+	writel(CBUS_VIRT_TO_PFE(BMU2_BASE_ADDR + BMU_ALLOC_CTRL),	base
+		+ GPI_DDR_ALLOC_ADDR);
+	writel(CBUS_VIRT_TO_PFE(BMU2_BASE_ADDR + BMU_FREE_CTRL),	base
+		+ GPI_DDR_FREE_ADDR);
+	writel(CBUS_VIRT_TO_PFE(CLASS_INQ_PKTPTR), base + GPI_CLASS_ADDR);
+	writel(DDR_HDR_SIZE, base + GPI_DDR_DATA_OFFSET);
+	writel(LMEM_HDR_SIZE, base + GPI_LMEM_DATA_OFFSET);
+	writel(0, base + GPI_LMEM_SEC_BUF_DATA_OFFSET);
+	writel(0, base + GPI_DDR_SEC_BUF_DATA_OFFSET);
+	writel((DDR_HDR_SIZE << 16) |	LMEM_HDR_SIZE,	base + GPI_HDR_SIZE);
+	writel((DDR_BUF_SIZE << 16) |	LMEM_BUF_SIZE,	base + GPI_BUF_SIZE);
+
+	writel(((cfg->lmem_rtry_cnt << 16) | (GPI_DDR_BUF_EN << 1) |
+		GPI_LMEM_BUF_EN), base + GPI_RX_CONFIG);
+	writel(cfg->tmlf_txthres, base + GPI_TMLF_TX);
+	writel(cfg->aseq_len,	base + GPI_DTX_ASEQ);
+	writel(1, base + GPI_TOE_CHKSUM_EN);
+
+	if (cfg->mtip_pause_reg) {
+		writel(cfg->mtip_pause_reg, base + GPI_CSR_MTIP_PAUSE_REG);
+		writel(EGPI_PAUSE_TIME, base + GPI_TX_PAUSE_TIME);
+	}
+}
+
+/**************************** CLASSIFIER ***************************/
+
+/* Initializes CLASSIFIER block.
+ * @param[in] cfg	CLASSIFIER configuration
+ */
+void class_init(struct class_cfg *cfg)
+{
+	class_reset();
+
+	class_disable();
+
+	class_set_config(cfg);
+}
+
+/* Resets CLASSIFIER block.
+ *
+ */
+void class_reset(void)
+{
+	writel(CORE_SW_RESET, CLASS_TX_CTRL);
+}
+
+/* Enables all CLASS-PE's cores.
+ *
+ */
+void class_enable(void)
+{
+	writel(CORE_ENABLE, CLASS_TX_CTRL);
+}
+
+/* Disables all CLASS-PE's cores.
+ *
+ */
+void class_disable(void)
+{
+	writel(CORE_DISABLE, CLASS_TX_CTRL);
+}
+
+/*
+ * Sets the configuration of the CLASSIFIER block.
+ * @param[in] cfg	CLASSIFIER configuration
+ */
+void class_set_config(struct class_cfg *cfg)
+{
+	u32 val;
+
+	/* Initialize route table */
+	if (!cfg->resume)
+		memset(DDR_PHYS_TO_VIRT(cfg->route_table_baseaddr), 0, (1 <<
+		cfg->route_table_hash_bits) * CLASS_ROUTE_SIZE);
+
+#if !defined(LS1012A_PFE_RESET_WA)
+	writel(cfg->pe_sys_clk_ratio,	CLASS_PE_SYS_CLK_RATIO);
+#endif
+
+	writel((DDR_HDR_SIZE << 16) | LMEM_HDR_SIZE,	CLASS_HDR_SIZE);
+	writel(LMEM_BUF_SIZE,				CLASS_LMEM_BUF_SIZE);
+	writel(CLASS_ROUTE_ENTRY_SIZE(CLASS_ROUTE_SIZE) |
+		CLASS_ROUTE_HASH_SIZE(cfg->route_table_hash_bits),
+		CLASS_ROUTE_HASH_ENTRY_SIZE);
+	writel(HIF_PKT_CLASS_EN | HIF_PKT_OFFSET(sizeof(struct hif_hdr)),
+	       CLASS_HIF_PARSE);
+
+	val = HASH_CRC_PORT_IP | QB2BUS_LE;
+
+#if defined(CONFIG_IP_ALIGNED)
+	val |= IP_ALIGNED;
+#endif
+
+	/*
+	 *  Class PE packet steering will only work if TOE mode, bridge fetch or
+	 * route fetch are enabled (see class/qb_fet.v). Route fetch would
+	 * trigger additional memory copies (likely from DDR because of hash
+	 * table size, which cannot be reduced because PE software still
+	 * relies on hash value computed in HW), so when not in TOE mode we
+	 * simply enable HW bridge fetch even though we don't use it.
+	 */
+	if (cfg->toe_mode)
+		val |= CLASS_TOE;
+	else
+		val |= HW_BRIDGE_FETCH;
+
+	writel(val, CLASS_ROUTE_MULTI);
+
+	writel(DDR_PHYS_TO_PFE(cfg->route_table_baseaddr),
+	       CLASS_ROUTE_TABLE_BASE);
+	writel(CLASS_PE0_RO_DM_ADDR0_VAL,		CLASS_PE0_RO_DM_ADDR0);
+	writel(CLASS_PE0_RO_DM_ADDR1_VAL,		CLASS_PE0_RO_DM_ADDR1);
+	writel(CLASS_PE0_QB_DM_ADDR0_VAL,		CLASS_PE0_QB_DM_ADDR0);
+	writel(CLASS_PE0_QB_DM_ADDR1_VAL,		CLASS_PE0_QB_DM_ADDR1);
+	writel(CBUS_VIRT_TO_PFE(TMU_PHY_INQ_PKTPTR),	CLASS_TM_INQ_ADDR);
+
+	writel(23, CLASS_AFULL_THRES);
+	writel(23, CLASS_TSQ_FIFO_THRES);
+
+	writel(24, CLASS_MAX_BUF_CNT);
+	writel(24, CLASS_TSQ_MAX_CNT);
+}
+
+/**************************** TMU ***************************/
+
+void tmu_reset(void)
+{
+	writel(SW_RESET, TMU_CTRL);
+}
+
+/* Initializes TMU block.
+ * @param[in] cfg	TMU configuration
+ */
+void tmu_init(struct tmu_cfg *cfg)
+{
+	int q, phyno;
+
+	tmu_disable(0xF);
+	mdelay(10);
+
+#if !defined(LS1012A_PFE_RESET_WA)
+	/* keep in soft reset */
+	writel(SW_RESET, TMU_CTRL);
+#endif
+	writel(0x3, TMU_SYS_GENERIC_CONTROL);
+	writel(750, TMU_INQ_WATERMARK);
+	writel(CBUS_VIRT_TO_PFE(EGPI1_BASE_ADDR +
+		GPI_INQ_PKTPTR),	TMU_PHY0_INQ_ADDR);
+	writel(CBUS_VIRT_TO_PFE(EGPI2_BASE_ADDR +
+		GPI_INQ_PKTPTR),	TMU_PHY1_INQ_ADDR);
+	writel(CBUS_VIRT_TO_PFE(HGPI_BASE_ADDR +
+		GPI_INQ_PKTPTR),	TMU_PHY3_INQ_ADDR);
+	writel(CBUS_VIRT_TO_PFE(HIF_NOCPY_RX_INQ0_PKTPTR), TMU_PHY4_INQ_ADDR);
+	writel(CBUS_VIRT_TO_PFE(UTIL_INQ_PKTPTR), TMU_PHY5_INQ_ADDR);
+	writel(CBUS_VIRT_TO_PFE(BMU2_BASE_ADDR + BMU_FREE_CTRL),
+	       TMU_BMU_INQ_ADDR);
+
+	writel(0x3FF,	TMU_TDQ0_SCH_CTRL);	/*
+						 * enabling all 10
+						 * schedulers [9:0] of each TDQ
+						 */
+	writel(0x3FF,	TMU_TDQ1_SCH_CTRL);
+	writel(0x3FF,	TMU_TDQ3_SCH_CTRL);
+
+#if !defined(LS1012A_PFE_RESET_WA)
+	writel(cfg->pe_sys_clk_ratio,	TMU_PE_SYS_CLK_RATIO);
+#endif
+
+#if !defined(LS1012A_PFE_RESET_WA)
+	writel(DDR_PHYS_TO_PFE(cfg->llm_base_addr),	TMU_LLM_BASE_ADDR);
+	/* Extra packet pointers will be stored from this address onwards */
+
+	writel(cfg->llm_queue_len,	TMU_LLM_QUE_LEN);
+	writel(5,			TMU_TDQ_IIFG_CFG);
+	writel(DDR_BUF_SIZE,		TMU_BMU_BUF_SIZE);
+
+	writel(0x0,			TMU_CTRL);
+
+	/* MEM init */
+	pr_info("%s: mem init\n", __func__);
+	writel(MEM_INIT,	TMU_CTRL);
+
+	while (!(readl(TMU_CTRL) & MEM_INIT_DONE))
+		;
+
+	/* LLM init */
+	pr_info("%s: lmem init\n", __func__);
+	writel(LLM_INIT,	TMU_CTRL);
+
+	while (!(readl(TMU_CTRL) & LLM_INIT_DONE))
+		;
+#endif
+	/* set up each queue for tail drop */
+	for (phyno = 0; phyno < 4; phyno++) {
+		if (phyno == 2)
+			continue;
+		for (q = 0; q < 16; q++) {
+			u32 qdepth;
+
+			writel((phyno << 8) | q, TMU_TEQ_CTRL);
+			writel(1 << 22, TMU_TEQ_QCFG); /*Enable tail drop */
+
+			if (phyno == 3)
+				qdepth = DEFAULT_TMU3_QDEPTH;
+			else
+				qdepth = (q == 0) ? DEFAULT_Q0_QDEPTH :
+						DEFAULT_MAX_QDEPTH;
+
+			/* LOG: 68855 */
+			/*
+			 * The following is a workaround for the reordered
+			 * packet and BMU2 buffer leakage issue.
+			 */
+			if (CHIP_REVISION() == 0)
+				qdepth = 31;
+
+			writel(qdepth << 18, TMU_TEQ_HW_PROB_CFG2);
+			writel(qdepth >> 14, TMU_TEQ_HW_PROB_CFG3);
+		}
+	}
+
+#ifdef CFG_LRO
+	/* Set TMU-3 queue 5 (LRO) in no-drop mode */
+	writel((3 << 8) | TMU_QUEUE_LRO, TMU_TEQ_CTRL);
+	writel(0, TMU_TEQ_QCFG);
+#endif
+
+	writel(0x05, TMU_TEQ_DISABLE_DROPCHK);
+
+	writel(0x0, TMU_CTRL);
+}
+
+/* Enables TMU-PE cores.
+ * @param[in] pe_mask	TMU PE mask
+ */
+void tmu_enable(u32 pe_mask)
+{
+	writel(readl(TMU_TX_CTRL) | (pe_mask & 0xF), TMU_TX_CTRL);
+}
+
+/* Disables TMU cores.
+ * @param[in] pe_mask	TMU PE mask
+ */
+void tmu_disable(u32 pe_mask)
+{
+	writel(readl(TMU_TX_CTRL) & ~(pe_mask & 0xF), TMU_TX_CTRL);
+}
+
+/* This will return the tmu queue status
+ * @param[in] if_id	gem interface id or TMU index
+ * @return		returns the bit mask of busy queues, zero means all
+ * queues are empty
+ */
+u32 tmu_qstatus(u32 if_id)
+{
+	return cpu_to_be32(pe_dmem_read(TMU0_ID + if_id, TMU_DM_PESTATUS +
+		offsetof(struct pe_status, tmu_qstatus), 4));
+}
+
+u32 tmu_pkts_processed(u32 if_id)
+{
+	return cpu_to_be32(pe_dmem_read(TMU0_ID + if_id, TMU_DM_PESTATUS +
+		offsetof(struct pe_status, rx), 4));
+}
+
+/**************************** UTIL ***************************/
+
+/* Resets UTIL block.
+ */
+void util_reset(void)
+{
+	writel(CORE_SW_RESET, UTIL_TX_CTRL);
+}
+
+/* Initializes UTIL block.
+ * @param[in] cfg	UTIL configuration
+ */
+void util_init(struct util_cfg *cfg)
+{
+	writel(cfg->pe_sys_clk_ratio,   UTIL_PE_SYS_CLK_RATIO);
+}
+
+/* Enables UTIL-PE core.
+ *
+ */
+void util_enable(void)
+{
+	writel(CORE_ENABLE, UTIL_TX_CTRL);
+}
+
+/* Disables UTIL-PE core.
+ *
+ */
+void util_disable(void)
+{
+	writel(CORE_DISABLE, UTIL_TX_CTRL);
+}
+
+/**************************** HIF ***************************/
+/* Initializes HIF copy block.
+ *
+ */
+void hif_init(void)
+{
+	/*Initialize HIF registers*/
+	writel((HIF_RX_POLL_CTRL_CYCLE << 16) | HIF_TX_POLL_CTRL_CYCLE,
+	       HIF_POLL_CTRL);
+}
+
+/* Enable hif tx DMA and interrupt
+ *
+ */
+void hif_tx_enable(void)
+{
+	writel(HIF_CTRL_DMA_EN, HIF_TX_CTRL);
+	writel((readl(HIF_INT_ENABLE) | HIF_INT_EN | HIF_TXPKT_INT_EN),
+	       HIF_INT_ENABLE);
+}
+
+/* Disable hif tx DMA and interrupt
+ *
+ */
+void hif_tx_disable(void)
+{
+	u32	hif_int;
+
+	writel(0, HIF_TX_CTRL);
+
+	hif_int = readl(HIF_INT_ENABLE);
+	hif_int &= HIF_TXPKT_INT_EN;
+	writel(hif_int, HIF_INT_ENABLE);
+}
+
+/* Enable hif rx DMA and interrupt
+ *
+ */
+void hif_rx_enable(void)
+{
+	hif_rx_dma_start();
+	writel((readl(HIF_INT_ENABLE) | HIF_INT_EN | HIF_RXPKT_INT_EN),
+	       HIF_INT_ENABLE);
+}
+
+/* Disable hif rx DMA and interrupt
+ *
+ */
+void hif_rx_disable(void)
+{
+	u32	hif_int;
+
+	writel(0, HIF_RX_CTRL);
+
+	hif_int = readl(HIF_INT_ENABLE);
+	hif_int &= HIF_RXPKT_INT_EN;
+	writel(hif_int, HIF_INT_ENABLE);
+}
diff --git a/drivers/staging/fsl_ppfe/pfe_hif.c b/drivers/staging/fsl_ppfe/pfe_hif.c
new file mode 100644
index 0000000..6835e14
--- /dev/null
+++ b/drivers/staging/fsl_ppfe/pfe_hif.c
@@ -0,0 +1,1072 @@
+/*
+ * Copyright 2015-2016 Freescale Semiconductor, Inc.
+ * Copyright 2017 NXP
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program.  If not, see <http://www.gnu.org/licenses/>.
+ */
+
+#include <linux/kernel.h>
+#include <linux/interrupt.h>
+#include <linux/dma-mapping.h>
+#include <linux/dmapool.h>
+#include <linux/sched.h>
+#include <linux/module.h>
+#include <linux/list.h>
+#include <linux/kthread.h>
+#include <linux/slab.h>
+
+#include <linux/io.h>
+#include <asm/irq.h>
+
+#include "pfe_mod.h"
+
+#define HIF_INT_MASK	(HIF_INT | HIF_RXPKT_INT | HIF_TXPKT_INT)
+
+unsigned char napi_first_batch;
+
+static void pfe_tx_do_cleanup(unsigned long data);
+
+static int pfe_hif_alloc_descr(struct pfe_hif *hif)
+{
+	void *addr;
+	dma_addr_t dma_addr;
+	int err = 0;
+
+	pr_info("%s\n", __func__);
+	addr = dma_alloc_coherent(pfe->dev,
+				  HIF_RX_DESC_NT * sizeof(struct hif_desc) +
+				  HIF_TX_DESC_NT * sizeof(struct hif_desc),
+				  &dma_addr, GFP_KERNEL);
+
+	if (!addr) {
+		pr_err("%s: Could not allocate buffer descriptors!\n"
+			, __func__);
+		err = -ENOMEM;
+		goto err0;
+	}
+
+	hif->descr_baseaddr_p = dma_addr;
+	hif->descr_baseaddr_v = addr;
+	hif->rx_ring_size = HIF_RX_DESC_NT;
+	hif->tx_ring_size = HIF_TX_DESC_NT;
+
+	return 0;
+
+err0:
+	return err;
+}
+
+#if defined(LS1012A_PFE_RESET_WA)
+static void pfe_hif_disable_rx_desc(struct pfe_hif *hif)
+{
+	int ii;
+	struct hif_desc	*desc = hif->rx_base;
+
+	/*Mark all descriptors as LAST_BD */
+	for (ii = 0; ii < hif->rx_ring_size; ii++) {
+		desc->ctrl |= BD_CTRL_LAST_BD;
+		desc++;
+	}
+}
+
+struct class_rx_hdr_t {
+	u32     next_ptr;       /* ptr to the start of the first DDR buffer */
+	u16     length;         /* total packet length */
+	u16     phyno;          /* input physical port number */
+	u32     status;         /* gemac status bits */
+	u32     status2;            /* reserved for software usage */
+};
+
+/* STATUS_BAD_FRAME_ERR is set for all errors (including checksums if enabled)
+ * except overflow
+ */
+#define STATUS_BAD_FRAME_ERR            BIT(16)
+#define STATUS_LENGTH_ERR               BIT(17)
+#define STATUS_CRC_ERR                  BIT(18)
+#define STATUS_TOO_SHORT_ERR            BIT(19)
+#define STATUS_TOO_LONG_ERR             BIT(20)
+#define STATUS_CODE_ERR                 BIT(21)
+#define STATUS_MC_HASH_MATCH            BIT(22)
+#define STATUS_CUMULATIVE_ARC_HIT       BIT(23)
+#define STATUS_UNICAST_HASH_MATCH       BIT(24)
+#define STATUS_IP_CHECKSUM_CORRECT      BIT(25)
+#define STATUS_TCP_CHECKSUM_CORRECT     BIT(26)
+#define STATUS_UDP_CHECKSUM_CORRECT     BIT(27)
+#define STATUS_OVERFLOW_ERR             BIT(28) /* GPI error */
+#define MIN_PKT_SIZE			64
+
+static inline void copy_to_lmem(u32 *dst, u32 *src, int len)
+{
+	int i;
+
+	for (i = 0; i < len; i += sizeof(u32))	{
+		*dst = htonl(*src);
+		dst++; src++;
+	}
+}
+
+static void send_dummy_pkt_to_hif(void)
+{
+	void *lmem_ptr, *ddr_ptr, *lmem_virt_addr;
+	u32 physaddr;
+	struct class_rx_hdr_t local_hdr;
+	static u32 dummy_pkt[] =  {
+		0x33221100, 0x2b785544, 0xd73093cb, 0x01000608,
+		0x04060008, 0x2b780200, 0xd73093cb, 0x0a01a8c0,
+		0x33221100, 0xa8c05544, 0x00000301, 0x00000000,
+		0x00000000, 0x00000000, 0x00000000, 0xbe86c51f };
+
+	ddr_ptr = (void *)((u64)readl(BMU2_BASE_ADDR + BMU_ALLOC_CTRL));
+	if (!ddr_ptr)
+		return;
+
+	lmem_ptr = (void *)((u64)readl(BMU1_BASE_ADDR + BMU_ALLOC_CTRL));
+	if (!lmem_ptr)
+		return;
+
+	pr_info("Sending a dummy pkt to HIF %p %p\n", ddr_ptr, lmem_ptr);
+	physaddr = (u32)DDR_VIRT_TO_PFE(ddr_ptr);
+
+	lmem_virt_addr = (void *)CBUS_PFE_TO_VIRT((unsigned long int)lmem_ptr);
+
+	local_hdr.phyno = htons(0); /* RX_PHY_0 */
+	local_hdr.length = htons(MIN_PKT_SIZE);
+
+	local_hdr.next_ptr = htonl((u32)physaddr);
+	/*Mark checksum is correct */
+	local_hdr.status = htonl((STATUS_IP_CHECKSUM_CORRECT |
+				STATUS_UDP_CHECKSUM_CORRECT |
+				STATUS_TCP_CHECKSUM_CORRECT |
+				STATUS_UNICAST_HASH_MATCH |
+				STATUS_CUMULATIVE_ARC_HIT));
+	copy_to_lmem((u32 *)lmem_virt_addr, (u32 *)&local_hdr,
+		     sizeof(local_hdr));
+
+	copy_to_lmem((u32 *)(lmem_virt_addr + LMEM_HDR_SIZE), (u32 *)dummy_pkt,
+		     0x40);
+
+	writel((unsigned long int)lmem_ptr, CLASS_INQ_PKTPTR);
+}
+
+void pfe_hif_rx_idle(struct pfe_hif *hif)
+{
+	int hif_stop_loop = 10;
+	u32 rx_status;
+
+	pfe_hif_disable_rx_desc(hif);
+	pr_info("Bringing hif to idle state...");
+	writel(0, HIF_INT_ENABLE);
+	/*If HIF Rx BDP is busy send a dummy packet */
+	do {
+		rx_status = readl(HIF_RX_STATUS);
+		if (rx_status & BDP_CSR_RX_DMA_ACTV)
+			send_dummy_pkt_to_hif();
+
+		usleep_range(100, 150);
+	} while (--hif_stop_loop);
+
+	if (readl(HIF_RX_STATUS) & BDP_CSR_RX_DMA_ACTV)
+		pr_info("Failed\n");
+	else
+		pr_info("Done\n");
+}
+#endif
+
+static void pfe_hif_free_descr(struct pfe_hif *hif)
+{
+	pr_info("%s\n", __func__);
+
+	dma_free_coherent(pfe->dev,
+			  hif->rx_ring_size * sizeof(struct hif_desc) +
+			  hif->tx_ring_size * sizeof(struct hif_desc),
+			  hif->descr_baseaddr_v, hif->descr_baseaddr_p);
+}
+
+void pfe_hif_desc_dump(struct pfe_hif *hif)
+{
+	struct hif_desc	*desc;
+	unsigned long desc_p;
+	int ii = 0;
+
+	pr_info("%s\n", __func__);
+
+	desc = hif->rx_base;
+	desc_p = (u32)((u64)desc - (u64)hif->descr_baseaddr_v +
+			hif->descr_baseaddr_p);
+
+	pr_info("HIF Rx desc base %p physical %x\n", desc, (u32)desc_p);
+	for (ii = 0; ii < hif->rx_ring_size; ii++) {
+		pr_info("status: %08x, ctrl: %08x, data: %08x, next: %x\n",
+			readl(&desc->status), readl(&desc->ctrl),
+			readl(&desc->data), readl(&desc->next));
+			desc++;
+	}
+
+	desc = hif->tx_base;
+	desc_p = ((u64)desc - (u64)hif->descr_baseaddr_v +
+			hif->descr_baseaddr_p);
+
+	pr_info("HIF Tx desc base %p physical %x\n", desc, (u32)desc_p);
+	for (ii = 0; ii < hif->tx_ring_size; ii++) {
+		pr_info("status: %08x, ctrl: %08x, data: %08x, next: %x\n",
+			readl(&desc->status), readl(&desc->ctrl),
+			readl(&desc->data), readl(&desc->next));
+		desc++;
+	}
+}
+
+/* pfe_hif_release_buffers */
+static void pfe_hif_release_buffers(struct pfe_hif *hif)
+{
+	struct hif_desc	*desc;
+	int i = 0;
+
+	hif->rx_base = hif->descr_baseaddr_v;
+
+	pr_info("%s\n", __func__);
+
+	/*Free Rx buffers */
+	desc = hif->rx_base;
+	for (i = 0; i < hif->rx_ring_size; i++) {
+		if (readl(&desc->data)) {
+			if ((i < hif->shm->rx_buf_pool_cnt) &&
+			    (!hif->shm->rx_buf_pool[i])) {
+				/*
+				 * dma_unmap_single(hif->dev, desc->data,
+				 * hif->rx_buf_len[i], DMA_FROM_DEVICE);
+				 */
+				dma_unmap_single(hif->dev,
+						 DDR_PFE_TO_PHYS(
+						 readl(&desc->data)),
+						 hif->rx_buf_len[i],
+						 DMA_FROM_DEVICE);
+				hif->shm->rx_buf_pool[i] = hif->rx_buf_addr[i];
+			} else {
+				pr_err("%s: buffer pool already full\n"
+					, __func__);
+			}
+		}
+
+		writel(0, &desc->data);
+		writel(0, &desc->status);
+		writel(0, &desc->ctrl);
+		desc++;
+	}
+}
+
+/*
+ * pfe_hif_init_buffers
+ * This function initializes the HIF Rx/Tx ring descriptors and
+ * initialize Rx queue with buffers.
+ */
+static int pfe_hif_init_buffers(struct pfe_hif *hif)
+{
+	struct hif_desc	*desc, *first_desc_p;
+	u32 data;
+	int i = 0;
+
+	pr_info("%s\n", __func__);
+
+	/* Check enough Rx buffers available in the shared memory */
+	if (hif->shm->rx_buf_pool_cnt < hif->rx_ring_size)
+		return -ENOMEM;
+
+	hif->rx_base = hif->descr_baseaddr_v;
+	memset(hif->rx_base, 0, hif->rx_ring_size * sizeof(struct hif_desc));
+
+	/*Initialize Rx descriptors */
+	desc = hif->rx_base;
+	first_desc_p = (struct hif_desc *)hif->descr_baseaddr_p;
+
+	for (i = 0; i < hif->rx_ring_size; i++) {
+		/* Initialize Rx buffers from the shared memory */
+
+		data = (u32)dma_map_single(hif->dev, hif->shm->rx_buf_pool[i],
+				pfe_pkt_size, DMA_FROM_DEVICE);
+		hif->rx_buf_addr[i] = hif->shm->rx_buf_pool[i];
+		hif->rx_buf_len[i] = pfe_pkt_size;
+		hif->shm->rx_buf_pool[i] = NULL;
+
+		if (likely(dma_mapping_error(hif->dev, data) == 0)) {
+			writel(DDR_PHYS_TO_PFE(data), &desc->data);
+		} else {
+			pr_err("%s : low on mem\n",  __func__);
+
+			goto err;
+		}
+
+		writel(0, &desc->status);
+
+		/*
+		 * Ensure everything else is written to DDR before
+		 * writing bd->ctrl
+		 */
+		wmb();
+
+		writel((BD_CTRL_PKT_INT_EN | BD_CTRL_LIFM
+			| BD_CTRL_DIR | BD_CTRL_DESC_EN
+			| BD_BUF_LEN(pfe_pkt_size)), &desc->ctrl);
+
+		/* Chain descriptors */
+		writel((u32)DDR_PHYS_TO_PFE(first_desc_p + i + 1), &desc->next);
+		desc++;
+	}
+
+	/* Overwrite last descriptor to chain it to first one*/
+	desc--;
+	writel((u32)DDR_PHYS_TO_PFE(first_desc_p), &desc->next);
+
+	hif->rxtoclean_index = 0;
+
+	/*Initialize Rx buffer descriptor ring base address */
+	writel(DDR_PHYS_TO_PFE(hif->descr_baseaddr_p), HIF_RX_BDP_ADDR);
+
+	hif->tx_base = hif->rx_base + hif->rx_ring_size;
+	first_desc_p = (struct hif_desc *)hif->descr_baseaddr_p +
+				hif->rx_ring_size;
+	memset(hif->tx_base, 0, hif->tx_ring_size * sizeof(struct hif_desc));
+
+	/*Initialize tx descriptors */
+	desc = hif->tx_base;
+
+	for (i = 0; i < hif->tx_ring_size; i++) {
+		/* Chain descriptors */
+		writel((u32)DDR_PHYS_TO_PFE(first_desc_p + i + 1), &desc->next);
+		writel(0, &desc->ctrl);
+		desc++;
+	}
+
+	/* Overwrite last descriptor to chain it to first one */
+	desc--;
+	writel((u32)DDR_PHYS_TO_PFE(first_desc_p), &desc->next);
+	hif->txavail = hif->tx_ring_size;
+	hif->txtosend = 0;
+	hif->txtoclean = 0;
+	hif->txtoflush = 0;
+
+	/*Initialize Tx buffer descriptor ring base address */
+	writel((u32)DDR_PHYS_TO_PFE(first_desc_p), HIF_TX_BDP_ADDR);
+
+	return 0;
+
+err:
+	pfe_hif_release_buffers(hif);
+	return -ENOMEM;
+}
+
+/*
+ * pfe_hif_client_register
+ *
+ * This function used to register a client driver with the HIF driver.
+ *
+ * Return value:
+ * 0 - on Successful registration
+ */
+static int pfe_hif_client_register(struct pfe_hif *hif, u32 client_id,
+				   struct hif_client_shm *client_shm)
+{
+	struct hif_client *client = &hif->client[client_id];
+	u32 i, cnt;
+	struct rx_queue_desc *rx_qbase;
+	struct tx_queue_desc *tx_qbase;
+	struct hif_rx_queue *rx_queue;
+	struct hif_tx_queue *tx_queue;
+	int err = 0;
+
+	pr_info("%s\n", __func__);
+
+	spin_lock_bh(&hif->tx_lock);
+
+	if (test_bit(client_id, &hif->shm->g_client_status[0])) {
+		pr_err("%s: client %d already registered\n",
+		       __func__, client_id);
+		err = -1;
+		goto unlock;
+	}
+
+	memset(client, 0, sizeof(struct hif_client));
+
+	/* Initialize client Rx queues baseaddr, size */
+
+	cnt = CLIENT_CTRL_RX_Q_CNT(client_shm->ctrl);
+	/* Check if client is requesting for more queues than supported */
+	if (cnt > HIF_CLIENT_QUEUES_MAX)
+		cnt = HIF_CLIENT_QUEUES_MAX;
+
+	client->rx_qn = cnt;
+	rx_qbase = (struct rx_queue_desc *)client_shm->rx_qbase;
+	for (i = 0; i < cnt; i++) {
+		rx_queue = &client->rx_q[i];
+		rx_queue->base = rx_qbase + i * client_shm->rx_qsize;
+		rx_queue->size = client_shm->rx_qsize;
+		rx_queue->write_idx = 0;
+	}
+
+	/* Initialize client Tx queues baseaddr, size */
+	cnt = CLIENT_CTRL_TX_Q_CNT(client_shm->ctrl);
+
+	/* Check if client is requesting for more queues than supported */
+	if (cnt > HIF_CLIENT_QUEUES_MAX)
+		cnt = HIF_CLIENT_QUEUES_MAX;
+
+	client->tx_qn = cnt;
+	tx_qbase = (struct tx_queue_desc *)client_shm->tx_qbase;
+	for (i = 0; i < cnt; i++) {
+		tx_queue = &client->tx_q[i];
+		tx_queue->base = tx_qbase + i * client_shm->tx_qsize;
+		tx_queue->size = client_shm->tx_qsize;
+		tx_queue->ack_idx = 0;
+	}
+
+	set_bit(client_id, &hif->shm->g_client_status[0]);
+
+unlock:
+	spin_unlock_bh(&hif->tx_lock);
+
+	return err;
+}
+
+/*
+ * pfe_hif_client_unregister
+ *
+ * This function used to unregister a client  from the HIF driver.
+ *
+ */
+static void pfe_hif_client_unregister(struct pfe_hif *hif, u32 client_id)
+{
+	pr_info("%s\n", __func__);
+
+	/*
+	 * Mark client as no longer available (which prevents further packet
+	 * receive for this client)
+	 */
+	spin_lock_bh(&hif->tx_lock);
+
+	if (!test_bit(client_id, &hif->shm->g_client_status[0])) {
+		pr_err("%s: client %d not registered\n", __func__,
+		       client_id);
+
+		spin_unlock_bh(&hif->tx_lock);
+		return;
+	}
+
+	clear_bit(client_id, &hif->shm->g_client_status[0]);
+
+	spin_unlock_bh(&hif->tx_lock);
+}
+
+/*
+ * client_put_rxpacket-
+ * This functions puts the Rx pkt  in the given client Rx queue.
+ * It actually swap the Rx pkt in the client Rx descriptor buffer
+ * and returns the free buffer from it.
+ *
+ * If the function returns NULL means client Rx queue is full and
+ * packet couldn't send to client queue.
+ */
+static void *client_put_rxpacket(struct hif_rx_queue *queue, void *pkt, u32 len,
+				 u32 flags, u32 client_ctrl, u32 *rem_len)
+{
+	void *free_pkt = NULL;
+	struct rx_queue_desc *desc = queue->base + queue->write_idx;
+
+	if (readl(&desc->ctrl) & CL_DESC_OWN) {
+		if (page_mode) {
+			int rem_page_size = PAGE_SIZE -
+					PRESENT_OFST_IN_PAGE(pkt);
+			int cur_pkt_size = ROUND_MIN_RX_SIZE(len +
+					pfe_pkt_headroom);
+			*rem_len = (rem_page_size - cur_pkt_size);
+			if (*rem_len) {
+				free_pkt = pkt + cur_pkt_size;
+				get_page(virt_to_page(free_pkt));
+			} else {
+				free_pkt = (void
+				*)__get_free_page(GFP_ATOMIC | GFP_DMA_PFE);
+				*rem_len = pfe_pkt_size;
+			}
+		} else {
+			free_pkt = kmalloc(PFE_BUF_SIZE, GFP_ATOMIC |
+					GFP_DMA_PFE);
+			*rem_len = PFE_BUF_SIZE - pfe_pkt_headroom;
+		}
+
+		if (free_pkt) {
+			desc->data = pkt;
+			desc->client_ctrl = client_ctrl;
+			/*
+			 * Ensure everything else is written to DDR before
+			 * writing bd->ctrl
+			 */
+			smp_wmb();
+			writel(CL_DESC_BUF_LEN(len) | flags, &desc->ctrl);
+			queue->write_idx = (queue->write_idx + 1)
+					    & (queue->size - 1);
+
+			free_pkt += pfe_pkt_headroom;
+		}
+	}
+
+	return free_pkt;
+}
+
+/*
+ * pfe_hif_rx_process-
+ * This function does pfe hif rx queue processing.
+ * Dequeue packet from Rx queue and send it to corresponding client queue
+ */
+static int pfe_hif_rx_process(struct pfe_hif *hif, int budget)
+{
+	struct hif_desc	*desc;
+	struct hif_hdr *pkt_hdr;
+	struct __hif_hdr hif_hdr;
+	void *free_buf;
+	int rtc, len, rx_processed = 0;
+	struct __hif_desc local_desc;
+	int flags;
+	unsigned int desc_p;
+	unsigned int buf_size = 0;
+
+	spin_lock_bh(&hif->lock);
+
+	rtc = hif->rxtoclean_index;
+
+	while (rx_processed < budget) {
+		desc = hif->rx_base + rtc;
+
+		__memcpy12(&local_desc, desc);
+
+		/* ACK pending Rx interrupt */
+		if (local_desc.ctrl & BD_CTRL_DESC_EN) {
+			writel(HIF_INT | HIF_RXPKT_INT, HIF_INT_SRC);
+
+			if (rx_processed == 0) {
+				if (napi_first_batch == 1) {
+					desc_p = hif->descr_baseaddr_p +
+					((unsigned long int)(desc) -
+					(unsigned long
+					int)hif->descr_baseaddr_v);
+					napi_first_batch = 0;
+				}
+			}
+
+			__memcpy12(&local_desc, desc);
+
+			if (local_desc.ctrl & BD_CTRL_DESC_EN)
+				break;
+		}
+
+		napi_first_batch = 0;
+
+#ifdef HIF_NAPI_STATS
+		hif->napi_counters[NAPI_DESC_COUNT]++;
+#endif
+		len = BD_BUF_LEN(local_desc.ctrl);
+		/*
+		 * dma_unmap_single(hif->dev, DDR_PFE_TO_PHYS(local_desc.data),
+		 * hif->rx_buf_len[rtc], DMA_FROM_DEVICE);
+		 */
+		dma_unmap_single(hif->dev, DDR_PFE_TO_PHYS(local_desc.data),
+				 hif->rx_buf_len[rtc], DMA_FROM_DEVICE);
+
+		pkt_hdr = (struct hif_hdr *)hif->rx_buf_addr[rtc];
+
+		/* Track last HIF header received */
+		if (!hif->started) {
+			hif->started = 1;
+
+			__memcpy8(&hif_hdr, pkt_hdr);
+
+			hif->qno = hif_hdr.hdr.q_num;
+			hif->client_id = hif_hdr.hdr.client_id;
+			hif->client_ctrl = (hif_hdr.hdr.client_ctrl1 << 16) |
+						hif_hdr.hdr.client_ctrl;
+			flags = CL_DESC_FIRST;
+
+		} else {
+			flags = 0;
+		}
+
+		if (local_desc.ctrl & BD_CTRL_LIFM)
+			flags |= CL_DESC_LAST;
+
+		/* Check for valid client id and still registered */
+		if ((hif->client_id >= HIF_CLIENTS_MAX) ||
+		    !(test_bit(hif->client_id,
+			&hif->shm->g_client_status[0]))) {
+			printk_ratelimited("%s: packet with invalid client id %d q_num %d\n",
+					   __func__,
+					   hif->client_id,
+					   hif->qno);
+
+			free_buf = pkt_hdr;
+
+			goto pkt_drop;
+		}
+
+		/* Check to valid queue number */
+		if (hif->client[hif->client_id].rx_qn <= hif->qno) {
+			pr_info("%s: packet with invalid queue: %d\n"
+				, __func__, hif->qno);
+			hif->qno = 0;
+		}
+
+		free_buf =
+		client_put_rxpacket(&hif->client[hif->client_id].rx_q[hif->qno],
+				    (void *)pkt_hdr, len, flags,
+			hif->client_ctrl, &buf_size);
+
+		hif_lib_indicate_client(hif->client_id, EVENT_RX_PKT_IND,
+					hif->qno);
+
+		if (unlikely(!free_buf)) {
+#ifdef HIF_NAPI_STATS
+			hif->napi_counters[NAPI_CLIENT_FULL_COUNT]++;
+#endif
+			/*
+			 * If we want to keep in polling mode to retry later,
+			 * we need to tell napi that we consumed
+			 * the full budget or we will hit a livelock scenario.
+			 * The core code keeps this napi instance
+			 * at the head of the list and none of the other
+			 * instances get to run
+			 */
+			rx_processed = budget;
+
+			if (flags & CL_DESC_FIRST)
+				hif->started = 0;
+
+			break;
+		}
+
+pkt_drop:
+		/*Fill free buffer in the descriptor */
+		hif->rx_buf_addr[rtc] = free_buf;
+		hif->rx_buf_len[rtc] = min(pfe_pkt_size, buf_size);
+		writel((DDR_PHYS_TO_PFE
+			((u32)dma_map_single(hif->dev,
+			free_buf, hif->rx_buf_len[rtc], DMA_FROM_DEVICE))),
+			&desc->data);
+		/*
+		 * Ensure everything else is written to DDR before
+		 * writing bd->ctrl
+		 */
+		wmb();
+		writel((BD_CTRL_PKT_INT_EN | BD_CTRL_LIFM | BD_CTRL_DIR |
+			BD_CTRL_DESC_EN | BD_BUF_LEN(hif->rx_buf_len[rtc])),
+			&desc->ctrl);
+
+		rtc = (rtc + 1) & (hif->rx_ring_size - 1);
+
+		if (local_desc.ctrl & BD_CTRL_LIFM) {
+			if (!(hif->client_ctrl & HIF_CTRL_RX_CONTINUED)) {
+				rx_processed++;
+
+#ifdef HIF_NAPI_STATS
+				hif->napi_counters[NAPI_PACKET_COUNT]++;
+#endif
+			}
+			hif->started = 0;
+		}
+	}
+
+	hif->rxtoclean_index = rtc;
+	spin_unlock_bh(&hif->lock);
+
+	/* we made some progress, re-start rx dma in case it stopped */
+	hif_rx_dma_start();
+
+	return rx_processed;
+}
+
+/*
+ * client_ack_txpacket-
+ * This function ack the Tx packet in the give client Tx queue by resetting
+ * ownership bit in the descriptor.
+ */
+static int client_ack_txpacket(struct pfe_hif *hif, unsigned int client_id,
+			       unsigned int q_no)
+{
+	struct hif_tx_queue *queue = &hif->client[client_id].tx_q[q_no];
+	struct tx_queue_desc *desc = queue->base + queue->ack_idx;
+
+	if (readl(&desc->ctrl) & CL_DESC_OWN) {
+		writel((readl(&desc->ctrl) & ~CL_DESC_OWN), &desc->ctrl);
+		queue->ack_idx = (queue->ack_idx + 1) & (queue->size - 1);
+
+		return 0;
+
+	} else {
+		/*This should not happen */
+		pr_err("%s: %d %d %d %d %d %p %d\n", __func__,
+		       hif->txtosend, hif->txtoclean, hif->txavail,
+			client_id, q_no, queue, queue->ack_idx);
+		WARN(1, "%s: doesn't own this descriptor", __func__);
+		return 1;
+	}
+}
+
+void __hif_tx_done_process(struct pfe_hif *hif, int count)
+{
+	struct hif_desc *desc;
+	struct hif_desc_sw *desc_sw;
+	int ttc, tx_avl;
+	int pkts_done[HIF_CLIENTS_MAX] = {0, 0};
+
+	ttc = hif->txtoclean;
+	tx_avl = hif->txavail;
+
+	while ((tx_avl < hif->tx_ring_size) && count--) {
+		desc = hif->tx_base + ttc;
+
+		if (readl(&desc->ctrl) & BD_CTRL_DESC_EN)
+			break;
+
+		desc_sw = &hif->tx_sw_queue[ttc];
+
+		if (desc_sw->data) {
+			/*
+			 * dmap_unmap_single(hif->dev, desc_sw->data,
+			 * desc_sw->len, DMA_TO_DEVICE);
+			 */
+			dma_unmap_single(hif->dev, desc_sw->data,
+					 desc_sw->len, DMA_TO_DEVICE);
+		}
+
+		if (desc_sw->client_id > HIF_CLIENTS_MAX)
+			pr_err("Invalid cl id %d\n", desc_sw->client_id);
+
+		pkts_done[desc_sw->client_id]++;
+
+		client_ack_txpacket(hif, desc_sw->client_id, desc_sw->q_no);
+
+		ttc = (ttc + 1) & (hif->tx_ring_size - 1);
+		tx_avl++;
+	}
+
+	if (pkts_done[0])
+		hif_lib_indicate_client(0, EVENT_TXDONE_IND, 0);
+	if (pkts_done[1])
+		hif_lib_indicate_client(1, EVENT_TXDONE_IND, 0);
+
+	hif->txtoclean = ttc;
+	hif->txavail = tx_avl;
+
+	if (!count) {
+		tasklet_schedule(&hif->tx_cleanup_tasklet);
+	} else {
+		/*Enable Tx done interrupt */
+		writel(readl_relaxed(HIF_INT_ENABLE) | HIF_TXPKT_INT,
+		       HIF_INT_ENABLE);
+	}
+}
+
+static void pfe_tx_do_cleanup(unsigned long data)
+{
+	struct pfe_hif *hif = (struct pfe_hif *)data;
+
+	writel(HIF_INT | HIF_TXPKT_INT, HIF_INT_SRC);
+
+	hif_tx_done_process(hif, 64);
+}
+
+/*
+ * __hif_xmit_pkt -
+ * This function puts one packet in the HIF Tx queue
+ */
+void __hif_xmit_pkt(struct pfe_hif *hif, unsigned int client_id, unsigned int
+			q_no, void *data, u32 len, unsigned int flags)
+{
+	struct hif_desc	*desc;
+	struct hif_desc_sw *desc_sw;
+
+	desc = hif->tx_base + hif->txtosend;
+	desc_sw = &hif->tx_sw_queue[hif->txtosend];
+
+	desc_sw->len = len;
+	desc_sw->client_id = client_id;
+	desc_sw->q_no = q_no;
+	desc_sw->flags = flags;
+
+	if (flags & HIF_DONT_DMA_MAP) {
+		desc_sw->data = 0;
+		writel((u32)DDR_PHYS_TO_PFE(data), &desc->data);
+	} else {
+		desc_sw->data = dma_map_single(hif->dev, data, len,
+						DMA_TO_DEVICE);
+		writel((u32)DDR_PHYS_TO_PFE(desc_sw->data), &desc->data);
+	}
+
+	hif->txtosend = (hif->txtosend + 1) & (hif->tx_ring_size - 1);
+	hif->txavail--;
+
+	if ((!((flags & HIF_DATA_VALID) && (flags &
+				HIF_LAST_BUFFER))))
+		goto skip_tx;
+
+	/*
+	 * Ensure everything else is written to DDR before
+	 * writing bd->ctrl
+	 */
+	wmb();
+
+	do {
+		desc_sw = &hif->tx_sw_queue[hif->txtoflush];
+		desc = hif->tx_base + hif->txtoflush;
+
+		if (desc_sw->flags & HIF_LAST_BUFFER) {
+			writel((BD_CTRL_LIFM |
+			       BD_CTRL_BRFETCH_DISABLE | BD_CTRL_RTFETCH_DISABLE
+			       | BD_CTRL_PARSE_DISABLE | BD_CTRL_DESC_EN |
+				BD_CTRL_PKT_INT_EN | BD_BUF_LEN(desc_sw->len)),
+				&desc->ctrl);
+		} else {
+			writel((BD_CTRL_DESC_EN |
+				BD_BUF_LEN(desc_sw->len)), &desc->ctrl);
+		}
+		hif->txtoflush = (hif->txtoflush + 1) & (hif->tx_ring_size - 1);
+	}
+	while (hif->txtoflush != hif->txtosend)
+		;
+
+skip_tx:
+	return;
+}
+
+static irqreturn_t wol_isr(int irq, void *dev_id)
+{
+	pr_info("WoL\n");
+	gemac_set_wol(EMAC1_BASE_ADDR, 0);
+	gemac_set_wol(EMAC2_BASE_ADDR, 0);
+	return IRQ_HANDLED;
+}
+
+/*
+ * hif_isr-
+ * This ISR routine processes Rx/Tx done interrupts from the HIF hardware block
+ */
+static irqreturn_t hif_isr(int irq, void *dev_id)
+{
+	struct pfe_hif *hif = (struct pfe_hif *)dev_id;
+	int int_status;
+	int int_enable_mask;
+
+	/*Read hif interrupt source register */
+	int_status = readl_relaxed(HIF_INT_SRC);
+	int_enable_mask = readl_relaxed(HIF_INT_ENABLE);
+
+	if ((int_status & HIF_INT) == 0)
+		return IRQ_NONE;
+
+	int_status &= ~(HIF_INT);
+
+	if (int_status & HIF_RXPKT_INT) {
+		int_status &= ~(HIF_RXPKT_INT);
+		int_enable_mask &= ~(HIF_RXPKT_INT);
+
+		napi_first_batch = 1;
+
+		if (napi_schedule_prep(&hif->napi)) {
+#ifdef HIF_NAPI_STATS
+			hif->napi_counters[NAPI_SCHED_COUNT]++;
+#endif
+			__napi_schedule(&hif->napi);
+		}
+	}
+
+	if (int_status & HIF_TXPKT_INT) {
+		int_status &= ~(HIF_TXPKT_INT);
+		int_enable_mask &= ~(HIF_TXPKT_INT);
+		/*Schedule tx cleanup tassklet */
+		tasklet_schedule(&hif->tx_cleanup_tasklet);
+	}
+
+	/*Disable interrupts, they will be enabled after they are serviced */
+	writel_relaxed(int_enable_mask, HIF_INT_ENABLE);
+
+	if (int_status) {
+		pr_info("%s : Invalid interrupt : %d\n", __func__,
+			int_status);
+		writel(int_status, HIF_INT_SRC);
+	}
+
+	return IRQ_HANDLED;
+}
+
+void hif_process_client_req(struct pfe_hif *hif, int req, int data1, int data2)
+{
+	unsigned int client_id = data1;
+
+	if (client_id >= HIF_CLIENTS_MAX) {
+		pr_err("%s: client id %d out of bounds\n", __func__,
+		       client_id);
+		return;
+	}
+
+	switch (req) {
+	case REQUEST_CL_REGISTER:
+			/* Request for register a client */
+			pr_info("%s: register client_id %d\n",
+				__func__, client_id);
+			pfe_hif_client_register(hif, client_id, (struct
+				hif_client_shm *)&hif->shm->client[client_id]);
+			break;
+
+	case REQUEST_CL_UNREGISTER:
+			pr_info("%s: unregister client_id %d\n",
+				__func__, client_id);
+
+			/* Request for unregister a client */
+			pfe_hif_client_unregister(hif, client_id);
+
+			break;
+
+	default:
+			pr_err("%s: unsupported request %d\n",
+			       __func__, req);
+			break;
+	}
+
+	/*
+	 * Process client Tx queues
+	 * Currently we don't have checking for tx pending
+	 */
+}
+
+/*
+ * pfe_hif_rx_poll
+ *  This function is NAPI poll function to process HIF Rx queue.
+ */
+static int pfe_hif_rx_poll(struct napi_struct *napi, int budget)
+{
+	struct pfe_hif *hif = container_of(napi, struct pfe_hif, napi);
+	int work_done;
+
+#ifdef HIF_NAPI_STATS
+	hif->napi_counters[NAPI_POLL_COUNT]++;
+#endif
+
+	work_done = pfe_hif_rx_process(hif, budget);
+
+	if (work_done < budget) {
+		napi_complete(napi);
+		writel(readl_relaxed(HIF_INT_ENABLE) | HIF_RXPKT_INT,
+		       HIF_INT_ENABLE);
+	}
+#ifdef HIF_NAPI_STATS
+	else
+		hif->napi_counters[NAPI_FULL_BUDGET_COUNT]++;
+#endif
+
+	return work_done;
+}
+
+/*
+ * pfe_hif_init
+ * This function initializes the baseaddresses and irq, etc.
+ */
+int pfe_hif_init(struct pfe *pfe)
+{
+	struct pfe_hif *hif = &pfe->hif;
+	int err;
+
+	pr_info("%s\n", __func__);
+
+	hif->dev = pfe->dev;
+	hif->irq = pfe->hif_irq;
+
+	err = pfe_hif_alloc_descr(hif);
+	if (err)
+		goto err0;
+
+	if (pfe_hif_init_buffers(hif)) {
+		pr_err("%s: Could not initialize buffer descriptors\n"
+			, __func__);
+		err = -ENOMEM;
+		goto err1;
+	}
+
+	/* Initialize NAPI for Rx processing */
+	init_dummy_netdev(&hif->dummy_dev);
+	netif_napi_add(&hif->dummy_dev, &hif->napi, pfe_hif_rx_poll,
+		       HIF_RX_POLL_WEIGHT);
+	napi_enable(&hif->napi);
+
+	spin_lock_init(&hif->tx_lock);
+	spin_lock_init(&hif->lock);
+
+	hif_init();
+	hif_rx_enable();
+	hif_tx_enable();
+
+	/* Disable tx done interrupt */
+	writel(HIF_INT_MASK, HIF_INT_ENABLE);
+
+	gpi_enable(HGPI_BASE_ADDR);
+
+	err = request_irq(hif->irq, hif_isr, 0, "pfe_hif", hif);
+	if (err) {
+		pr_err("%s: failed to get the hif IRQ = %d\n",
+		       __func__, hif->irq);
+		goto err1;
+	}
+
+	err = request_irq(pfe->wol_irq, wol_isr, 0, "pfe_wol", pfe);
+	if (err) {
+		pr_err("%s: failed to get the wol IRQ = %d\n",
+		       __func__, pfe->wol_irq);
+		goto err1;
+	}
+
+	tasklet_init(&hif->tx_cleanup_tasklet,
+		     (void(*)(unsigned long))pfe_tx_do_cleanup,
+		     (unsigned long)hif);
+
+	return 0;
+err1:
+	pfe_hif_free_descr(hif);
+err0:
+	return err;
+}
+
+/* pfe_hif_exit- */
+void pfe_hif_exit(struct pfe *pfe)
+{
+	struct pfe_hif *hif = &pfe->hif;
+
+	pr_info("%s\n", __func__);
+
+	tasklet_kill(&hif->tx_cleanup_tasklet);
+
+	spin_lock_bh(&hif->lock);
+	hif->shm->g_client_status[0] = 0;
+	/* Make sure all clients are disabled*/
+	hif->shm->g_client_status[1] = 0;
+
+	spin_unlock_bh(&hif->lock);
+
+	/*Disable Rx/Tx */
+	gpi_disable(HGPI_BASE_ADDR);
+	hif_rx_disable();
+	hif_tx_disable();
+
+	napi_disable(&hif->napi);
+	netif_napi_del(&hif->napi);
+
+	free_irq(pfe->wol_irq, pfe);
+	free_irq(hif->irq, hif);
+
+	pfe_hif_release_buffers(hif);
+	pfe_hif_free_descr(hif);
+}
diff --git a/drivers/staging/fsl_ppfe/pfe_hif.h b/drivers/staging/fsl_ppfe/pfe_hif.h
new file mode 100644
index 0000000..6e36f0c
--- /dev/null
+++ b/drivers/staging/fsl_ppfe/pfe_hif.h
@@ -0,0 +1,211 @@
+/*
+ * Copyright 2015-2016 Freescale Semiconductor, Inc.
+ * Copyright 2017 NXP
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program.  If not, see <http://www.gnu.org/licenses/>.
+ */
+
+#ifndef _PFE_HIF_H_
+#define _PFE_HIF_H_
+
+#include <linux/netdevice.h>
+
+#define HIF_NAPI_STATS
+
+#define HIF_CLIENT_QUEUES_MAX	16
+#define HIF_RX_POLL_WEIGHT	64
+
+#define HIF_RX_PKT_MIN_SIZE 0x800 /* 2KB */
+#define HIF_RX_PKT_MIN_SIZE_MASK ~(HIF_RX_PKT_MIN_SIZE - 1)
+#define ROUND_MIN_RX_SIZE(_sz) (((_sz) + (HIF_RX_PKT_MIN_SIZE - 1)) \
+					& HIF_RX_PKT_MIN_SIZE_MASK)
+#define PRESENT_OFST_IN_PAGE(_buf) (((unsigned long int)(_buf) & (PAGE_SIZE \
+					- 1)) & HIF_RX_PKT_MIN_SIZE_MASK)
+
+enum {
+	NAPI_SCHED_COUNT = 0,
+	NAPI_POLL_COUNT,
+	NAPI_PACKET_COUNT,
+	NAPI_DESC_COUNT,
+	NAPI_FULL_BUDGET_COUNT,
+	NAPI_CLIENT_FULL_COUNT,
+	NAPI_MAX_COUNT
+};
+
+/*
+ * HIF_TX_DESC_NT value should be always greter than 4,
+ * Otherwise HIF_TX_POLL_MARK will become zero.
+ */
+#define HIF_RX_DESC_NT		256
+#define HIF_TX_DESC_NT		2048
+
+#define HIF_FIRST_BUFFER	BIT(0)
+#define HIF_LAST_BUFFER		BIT(1)
+#define HIF_DONT_DMA_MAP	BIT(2)
+#define HIF_DATA_VALID		BIT(3)
+#define HIF_TSO			BIT(4)
+
+enum {
+	PFE_CL_GEM0 = 0,
+	PFE_CL_GEM1,
+	HIF_CLIENTS_MAX
+};
+
+/*structure to store client queue info */
+struct hif_rx_queue {
+	struct rx_queue_desc *base;
+	u32	size;
+	u32	write_idx;
+};
+
+struct hif_tx_queue {
+	struct tx_queue_desc *base;
+	u32	size;
+	u32	ack_idx;
+};
+
+/*Structure to store the client info */
+struct hif_client {
+	int	rx_qn;
+	struct hif_rx_queue	rx_q[HIF_CLIENT_QUEUES_MAX];
+	int	tx_qn;
+	struct hif_tx_queue	tx_q[HIF_CLIENT_QUEUES_MAX];
+};
+
+/*HIF hardware buffer descriptor */
+struct hif_desc {
+	u32 ctrl;
+	u32 status;
+	u32 data;
+	u32 next;
+};
+
+struct __hif_desc {
+	u32 ctrl;
+	u32 status;
+	u32 data;
+};
+
+struct hif_desc_sw {
+	dma_addr_t data;
+	u16 len;
+	u8 client_id;
+	u8 q_no;
+	u16 flags;
+};
+
+struct hif_hdr {
+	u8 client_id;
+	u8 q_num;
+	u16 client_ctrl;
+	u16 client_ctrl1;
+};
+
+struct __hif_hdr {
+	union {
+		struct hif_hdr hdr;
+		u32 word[2];
+	};
+};
+
+struct hif_ipsec_hdr {
+	u16	sa_handle[2];
+} __packed;
+
+/*  HIF_CTRL_TX... defines */
+#define HIF_CTRL_TX_CHECKSUM		BIT(2)
+
+/*  HIF_CTRL_RX... defines */
+#define HIF_CTRL_RX_OFFSET_OFST         (24)
+#define HIF_CTRL_RX_CHECKSUMMED		BIT(2)
+#define HIF_CTRL_RX_CONTINUED		BIT(1)
+
+struct pfe_hif {
+	/* To store registered clients in hif layer */
+	struct hif_client client[HIF_CLIENTS_MAX];
+	struct hif_shm *shm;
+	int	irq;
+
+	void	*descr_baseaddr_v;
+	unsigned long	descr_baseaddr_p;
+
+	struct hif_desc *rx_base;
+	u32	rx_ring_size;
+	u32	rxtoclean_index;
+	void	*rx_buf_addr[HIF_RX_DESC_NT];
+	int	rx_buf_len[HIF_RX_DESC_NT];
+	unsigned int qno;
+	unsigned int client_id;
+	unsigned int client_ctrl;
+	unsigned int started;
+
+	struct hif_desc *tx_base;
+	u32	tx_ring_size;
+	u32	txtosend;
+	u32	txtoclean;
+	u32	txavail;
+	u32	txtoflush;
+	struct hif_desc_sw tx_sw_queue[HIF_TX_DESC_NT];
+
+/* tx_lock synchronizes hif packet tx as well as pfe_hif structure access */
+	spinlock_t tx_lock;
+/* lock synchronizes hif rx queue processing */
+	spinlock_t lock;
+	struct net_device	dummy_dev;
+	struct napi_struct	napi;
+	struct device *dev;
+
+#ifdef HIF_NAPI_STATS
+	unsigned int napi_counters[NAPI_MAX_COUNT];
+#endif
+	struct tasklet_struct	tx_cleanup_tasklet;
+};
+
+void __hif_xmit_pkt(struct pfe_hif *hif, unsigned int client_id, unsigned int
+			q_no, void *data, u32 len, unsigned int flags);
+int hif_xmit_pkt(struct pfe_hif *hif, unsigned int client_id, unsigned int q_no,
+		 void *data, unsigned int len);
+void __hif_tx_done_process(struct pfe_hif *hif, int count);
+void hif_process_client_req(struct pfe_hif *hif, int req, int data1, int
+				data2);
+int pfe_hif_init(struct pfe *pfe);
+void pfe_hif_exit(struct pfe *pfe);
+void pfe_hif_rx_idle(struct pfe_hif *hif);
+static inline void hif_tx_done_process(struct pfe_hif *hif, int count)
+{
+	spin_lock_bh(&hif->tx_lock);
+	__hif_tx_done_process(hif, count);
+	spin_unlock_bh(&hif->tx_lock);
+}
+
+static inline void hif_tx_lock(struct pfe_hif *hif)
+{
+	spin_lock_bh(&hif->tx_lock);
+}
+
+static inline void hif_tx_unlock(struct pfe_hif *hif)
+{
+	spin_unlock_bh(&hif->tx_lock);
+}
+
+static inline int __hif_tx_avail(struct pfe_hif *hif)
+{
+	return hif->txavail;
+}
+
+#define __memcpy8(dst, src)		memcpy(dst, src, 8)
+#define __memcpy12(dst, src)		memcpy(dst, src, 12)
+#define __memcpy(dst, src, len)		memcpy(dst, src, len)
+
+#endif /* _PFE_HIF_H_ */
diff --git a/drivers/staging/fsl_ppfe/pfe_hif_lib.c b/drivers/staging/fsl_ppfe/pfe_hif_lib.c
new file mode 100644
index 0000000..eeab4ff
--- /dev/null
+++ b/drivers/staging/fsl_ppfe/pfe_hif_lib.c
@@ -0,0 +1,637 @@
+/*
+ * Copyright 2015-2016 Freescale Semiconductor, Inc.
+ * Copyright 2017 NXP
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program.  If not, see <http://www.gnu.org/licenses/>.
+ */
+
+#include <linux/version.h>
+#include <linux/kernel.h>
+#include <linux/slab.h>
+#include <linux/interrupt.h>
+#include <linux/workqueue.h>
+#include <linux/dma-mapping.h>
+#include <linux/dmapool.h>
+#include <linux/sched.h>
+#include <linux/skbuff.h>
+#include <linux/moduleparam.h>
+#include <linux/cpu.h>
+
+#include "pfe_mod.h"
+#include "pfe_hif.h"
+#include "pfe_hif_lib.h"
+
+unsigned int lro_mode;
+unsigned int page_mode;
+unsigned int tx_qos = 1;
+module_param(tx_qos, uint, 0444);
+MODULE_PARM_DESC(tx_qos, "0: disable ,\n"
+			 "1: enable (default), guarantee no packet drop at TMU level\n");
+unsigned int pfe_pkt_size;
+unsigned int pfe_pkt_headroom;
+unsigned int emac_txq_cnt;
+
+/*
+ * @pfe_hal_lib.c.
+ * Common functions used by HIF client drivers
+ */
+
+/*HIF shared memory Global variable */
+struct hif_shm ghif_shm;
+
+/* Cleanup the HIF shared memory, release HIF rx_buffer_pool.
+ * This function should be called after pfe_hif_exit
+ *
+ * @param[in] hif_shm		Shared memory address location in DDR
+ */
+static void pfe_hif_shm_clean(struct hif_shm *hif_shm)
+{
+	int i;
+	void *pkt;
+
+	for (i = 0; i < hif_shm->rx_buf_pool_cnt; i++) {
+		pkt = hif_shm->rx_buf_pool[i];
+		if (pkt) {
+			hif_shm->rx_buf_pool[i] = NULL;
+			pkt -= pfe_pkt_headroom;
+
+			if (page_mode)
+				put_page(virt_to_page(pkt));
+			else
+				kfree(pkt);
+		}
+	}
+}
+
+/* Initialize shared memory used between HIF driver and clients,
+ * allocate rx_buffer_pool required for HIF Rx descriptors.
+ * This function should be called before initializing HIF driver.
+ *
+ * @param[in] hif_shm		Shared memory address location in DDR
+ * @rerurn			0 - on succes, <0 on fail to initialize
+ */
+static int pfe_hif_shm_init(struct hif_shm *hif_shm)
+{
+	int i;
+	void *pkt;
+
+	memset(hif_shm, 0, sizeof(struct hif_shm));
+	hif_shm->rx_buf_pool_cnt = HIF_RX_DESC_NT;
+
+	for (i = 0; i < hif_shm->rx_buf_pool_cnt; i++) {
+		if (page_mode) {
+			pkt = (void *)__get_free_page(GFP_KERNEL |
+				GFP_DMA_PFE);
+		} else {
+			pkt = kmalloc(PFE_BUF_SIZE, GFP_KERNEL | GFP_DMA_PFE);
+		}
+
+		if (pkt)
+			hif_shm->rx_buf_pool[i] = pkt + pfe_pkt_headroom;
+		else
+			goto err0;
+	}
+
+	return 0;
+
+err0:
+	pr_err("%s Low memory\n", __func__);
+	pfe_hif_shm_clean(hif_shm);
+	return -ENOMEM;
+}
+
+/*This function sends indication to HIF driver
+ *
+ * @param[in] hif	hif context
+ */
+static void hif_lib_indicate_hif(struct pfe_hif *hif, int req, int data1, int
+					data2)
+{
+	hif_process_client_req(hif, req, data1, data2);
+}
+
+void hif_lib_indicate_client(int client_id, int event_type, int qno)
+{
+	struct hif_client_s *client = pfe->hif_client[client_id];
+
+	if (!client || (event_type >= HIF_EVENT_MAX) || (qno >=
+		HIF_CLIENT_QUEUES_MAX))
+		return;
+
+	if (!test_and_set_bit(qno, &client->queue_mask[event_type]))
+		client->event_handler(client->priv, event_type, qno);
+}
+
+/*This function releases Rx queue descriptors memory and pre-filled buffers
+ *
+ * @param[in] client	hif_client context
+ */
+static void hif_lib_client_release_rx_buffers(struct hif_client_s *client)
+{
+	struct rx_queue_desc *desc;
+	int qno, ii;
+	void *buf;
+
+	for (qno = 0; qno < client->rx_qn; qno++) {
+		desc = client->rx_q[qno].base;
+
+		for (ii = 0; ii < client->rx_q[qno].size; ii++) {
+			buf = (void *)desc->data;
+			if (buf) {
+				buf -= pfe_pkt_headroom;
+
+				if (page_mode)
+					free_page((unsigned long)buf);
+				else
+					kfree(buf);
+
+				desc->ctrl = 0;
+			}
+
+			desc++;
+		}
+	}
+
+	kfree(client->rx_qbase);
+}
+
+/*This function allocates memory for the rxq descriptors and pre-fill rx queues
+ * with buffers.
+ * @param[in] client	client context
+ * @param[in] q_size	size of the rxQ, all queues are of same size
+ */
+static int hif_lib_client_init_rx_buffers(struct hif_client_s *client, int
+						q_size)
+{
+	struct rx_queue_desc *desc;
+	struct hif_client_rx_queue *queue;
+	int ii, qno;
+
+	/*Allocate memory for the client queues */
+	client->rx_qbase = kzalloc(client->rx_qn * q_size * sizeof(struct
+				rx_queue_desc), GFP_KERNEL);
+	if (!client->rx_qbase)
+		goto err;
+
+	for (qno = 0; qno < client->rx_qn; qno++) {
+		queue = &client->rx_q[qno];
+
+		queue->base = client->rx_qbase + qno * q_size * sizeof(struct
+				rx_queue_desc);
+		queue->size = q_size;
+		queue->read_idx = 0;
+		queue->write_idx = 0;
+
+		pr_debug("rx queue: %d, base: %p, size: %d\n", qno,
+			 queue->base, queue->size);
+	}
+
+	for (qno = 0; qno < client->rx_qn; qno++) {
+		queue = &client->rx_q[qno];
+		desc = queue->base;
+
+		for (ii = 0; ii < queue->size; ii++) {
+			desc->ctrl = CL_DESC_BUF_LEN(pfe_pkt_size) |
+					CL_DESC_OWN;
+			desc++;
+		}
+	}
+
+	return 0;
+
+err:
+	return 1;
+}
+
+
+static void hif_lib_client_cleanup_tx_queue(struct hif_client_tx_queue *queue)
+{
+	pr_debug("%s\n", __func__);
+
+	/*
+	 * Check if there are any pending packets. Client must flush the tx
+	 * queues before unregistering, by calling by calling
+	 * hif_lib_tx_get_next_complete()
+	 *
+	 * Hif no longer calls since we are no longer registered
+	 */
+	if (queue->tx_pending)
+		pr_err("%s: pending transmit packets\n", __func__);
+}
+
+static void hif_lib_client_release_tx_buffers(struct hif_client_s *client)
+{
+	int qno;
+
+	pr_debug("%s\n", __func__);
+
+	for (qno = 0; qno < client->tx_qn; qno++)
+		hif_lib_client_cleanup_tx_queue(&client->tx_q[qno]);
+
+	kfree(client->tx_qbase);
+}
+
+static int hif_lib_client_init_tx_buffers(struct hif_client_s *client, int
+						q_size)
+{
+	struct hif_client_tx_queue *queue;
+	int qno;
+
+	client->tx_qbase = kzalloc(client->tx_qn * q_size * sizeof(struct
+					tx_queue_desc), GFP_KERNEL);
+	if (!client->tx_qbase)
+		return 1;
+
+	for (qno = 0; qno < client->tx_qn; qno++) {
+		queue = &client->tx_q[qno];
+
+		queue->base = client->tx_qbase + qno * q_size * sizeof(struct
+				tx_queue_desc);
+		queue->size = q_size;
+		queue->read_idx = 0;
+		queue->write_idx = 0;
+		queue->tx_pending = 0;
+		queue->nocpy_flag = 0;
+		queue->prev_tmu_tx_pkts = 0;
+		queue->done_tmu_tx_pkts = 0;
+
+		pr_debug("tx queue: %d, base: %p, size: %d\n", qno,
+			 queue->base, queue->size);
+	}
+
+	return 0;
+}
+
+static int hif_lib_event_dummy(void *priv, int event_type, int qno)
+{
+	return 0;
+}
+
+int hif_lib_client_register(struct hif_client_s *client)
+{
+	struct hif_shm *hif_shm;
+	struct hif_client_shm *client_shm;
+	int err, i;
+	/* int loop_cnt = 0; */
+
+	pr_debug("%s\n", __func__);
+
+	/*Allocate memory before spin_lock*/
+	if (hif_lib_client_init_rx_buffers(client, client->rx_qsize)) {
+		err = -ENOMEM;
+		goto err_rx;
+	}
+
+	if (hif_lib_client_init_tx_buffers(client, client->tx_qsize)) {
+		err = -ENOMEM;
+		goto err_tx;
+	}
+
+	spin_lock_bh(&pfe->hif.lock);
+	if (!(client->pfe) || (client->id >= HIF_CLIENTS_MAX) ||
+	    (pfe->hif_client[client->id])) {
+		err = -EINVAL;
+		goto err;
+	}
+
+	hif_shm = client->pfe->hif.shm;
+
+	if (!client->event_handler)
+		client->event_handler = hif_lib_event_dummy;
+
+	/*Initialize client specific shared memory */
+	client_shm = (struct hif_client_shm *)&hif_shm->client[client->id];
+	client_shm->rx_qbase = (unsigned long int)client->rx_qbase;
+	client_shm->rx_qsize = client->rx_qsize;
+	client_shm->tx_qbase = (unsigned long int)client->tx_qbase;
+	client_shm->tx_qsize = client->tx_qsize;
+	client_shm->ctrl = (client->tx_qn << CLIENT_CTRL_TX_Q_CNT_OFST) |
+				(client->rx_qn << CLIENT_CTRL_RX_Q_CNT_OFST);
+	/* spin_lock_init(&client->rx_lock); */
+
+	for (i = 0; i < HIF_EVENT_MAX; i++) {
+		client->queue_mask[i] = 0;  /*
+					     * By default all events are
+					     * unmasked
+					     */
+	}
+
+	/*Indicate to HIF driver*/
+	hif_lib_indicate_hif(&pfe->hif, REQUEST_CL_REGISTER, client->id, 0);
+
+	pr_debug("%s: client: %p, client_id: %d, tx_qsize: %d, rx_qsize: %d\n",
+		 __func__, client, client->id, client->tx_qsize,
+		 client->rx_qsize);
+
+	client->cpu_id = -1;
+
+	pfe->hif_client[client->id] = client;
+	spin_unlock_bh(&pfe->hif.lock);
+
+	return 0;
+
+err:
+	spin_unlock_bh(&pfe->hif.lock);
+	hif_lib_client_release_tx_buffers(client);
+
+err_tx:
+	hif_lib_client_release_rx_buffers(client);
+
+err_rx:
+	return err;
+}
+
+int hif_lib_client_unregister(struct hif_client_s *client)
+{
+	struct pfe *pfe = client->pfe;
+	u32 client_id = client->id;
+
+	pr_info(
+		"%s : client: %p, client_id: %d, txQ_depth: %d, rxQ_depth: %d\n"
+		, __func__, client, client->id, client->tx_qsize,
+		client->rx_qsize);
+
+	spin_lock_bh(&pfe->hif.lock);
+	hif_lib_indicate_hif(&pfe->hif, REQUEST_CL_UNREGISTER, client->id, 0);
+
+	hif_lib_client_release_tx_buffers(client);
+	hif_lib_client_release_rx_buffers(client);
+	pfe->hif_client[client_id] = NULL;
+	spin_unlock_bh(&pfe->hif.lock);
+
+	return 0;
+}
+
+int hif_lib_event_handler_start(struct hif_client_s *client, int event,
+				int qno)
+{
+	struct hif_client_rx_queue *queue = &client->rx_q[qno];
+	struct rx_queue_desc *desc = queue->base + queue->read_idx;
+
+	if ((event >= HIF_EVENT_MAX) || (qno >= HIF_CLIENT_QUEUES_MAX)) {
+		pr_debug("%s: Unsupported event : %d  queue number : %d\n",
+			 __func__, event, qno);
+		return -1;
+	}
+
+	test_and_clear_bit(qno, &client->queue_mask[event]);
+
+	switch (event) {
+	case EVENT_RX_PKT_IND:
+		if (!(desc->ctrl & CL_DESC_OWN))
+			hif_lib_indicate_client(client->id,
+						EVENT_RX_PKT_IND, qno);
+		break;
+
+	case EVENT_HIGH_RX_WM:
+	case EVENT_TXDONE_IND:
+	default:
+		break;
+	}
+
+	return 0;
+}
+
+/*
+ * This function gets one packet from the specified client queue
+ * It also refill the rx buffer
+ */
+void *hif_lib_receive_pkt(struct hif_client_s *client, int qno, int *len, int
+				*ofst, unsigned int *rx_ctrl,
+				unsigned int *desc_ctrl, void **priv_data)
+{
+	struct hif_client_rx_queue *queue = &client->rx_q[qno];
+	struct rx_queue_desc *desc;
+	void *pkt = NULL;
+
+	/*
+	 * Following lock is to protect rx queue access from,
+	 * hif_lib_event_handler_start.
+	 * In general below lock is not required, because hif_lib_xmit_pkt and
+	 * hif_lib_event_handler_start are called from napi poll and which is
+	 * not re-entrant. But if some client use in different way this lock is
+	 * required.
+	 */
+	/*spin_lock_irqsave(&client->rx_lock, flags); */
+	desc = queue->base + queue->read_idx;
+	if (!(desc->ctrl & CL_DESC_OWN)) {
+		pkt = desc->data - pfe_pkt_headroom;
+
+		*rx_ctrl = desc->client_ctrl;
+		*desc_ctrl = desc->ctrl;
+
+		if (desc->ctrl & CL_DESC_FIRST) {
+			u16 size = *rx_ctrl >> HIF_CTRL_RX_OFFSET_OFST;
+
+			if (size) {
+				*len = CL_DESC_BUF_LEN(desc->ctrl) -
+						PFE_PKT_HEADER_SZ - size;
+				*ofst = pfe_pkt_headroom + PFE_PKT_HEADER_SZ
+								+ size;
+				*priv_data = desc->data + PFE_PKT_HEADER_SZ;
+			} else {
+				*len = CL_DESC_BUF_LEN(desc->ctrl) -
+						PFE_PKT_HEADER_SZ;
+				*ofst = pfe_pkt_headroom + PFE_PKT_HEADER_SZ;
+				*priv_data = NULL;
+			}
+
+		} else {
+			*len = CL_DESC_BUF_LEN(desc->ctrl);
+			*ofst = pfe_pkt_headroom;
+		}
+
+		/*
+		 * Needed so we don't free a buffer/page
+		 * twice on module_exit
+		 */
+		desc->data = NULL;
+
+		/*
+		 * Ensure everything else is written to DDR before
+		 * writing bd->ctrl
+		 */
+		smp_wmb();
+
+		desc->ctrl = CL_DESC_BUF_LEN(pfe_pkt_size) | CL_DESC_OWN;
+		queue->read_idx = (queue->read_idx + 1) & (queue->size - 1);
+	}
+
+	/*spin_unlock_irqrestore(&client->rx_lock, flags); */
+	return pkt;
+}
+
+static inline void hif_hdr_write(struct hif_hdr *pkt_hdr, unsigned int
+					client_id, unsigned int qno,
+					u32 client_ctrl)
+{
+	/* Optimize the write since the destinaton may be non-cacheable */
+	if (!((unsigned long)pkt_hdr & 0x3)) {
+		((u32 *)pkt_hdr)[0] = (client_ctrl << 16) | (qno << 8) |
+					client_id;
+	} else {
+		((u16 *)pkt_hdr)[0] = (qno << 8) | (client_id & 0xFF);
+		((u16 *)pkt_hdr)[1] = (client_ctrl & 0xFFFF);
+	}
+}
+
+/*This function puts the given packet in the specific client queue */
+void __hif_lib_xmit_pkt(struct hif_client_s *client, unsigned int qno, void
+				*data, unsigned int len, u32 client_ctrl,
+				unsigned int flags, void *client_data)
+{
+	struct hif_client_tx_queue *queue = &client->tx_q[qno];
+	struct tx_queue_desc *desc = queue->base + queue->write_idx;
+
+	/* First buffer */
+	if (flags & HIF_FIRST_BUFFER) {
+		data -= sizeof(struct hif_hdr);
+		len += sizeof(struct hif_hdr);
+
+		hif_hdr_write(data, client->id, qno, client_ctrl);
+	}
+
+	desc->data = client_data;
+	desc->ctrl = CL_DESC_OWN | CL_DESC_FLAGS(flags);
+
+	__hif_xmit_pkt(&pfe->hif, client->id, qno, data, len, flags);
+
+	queue->write_idx = (queue->write_idx + 1) & (queue->size - 1);
+	queue->tx_pending++;
+	queue->jiffies_last_packet = jiffies;
+}
+
+void *hif_lib_tx_get_next_complete(struct hif_client_s *client, int qno,
+				   unsigned int *flags, int count)
+{
+	struct hif_client_tx_queue *queue = &client->tx_q[qno];
+	struct tx_queue_desc *desc = queue->base + queue->read_idx;
+
+	pr_debug("%s: qno : %d rd_indx: %d pending:%d\n", __func__, qno,
+		 queue->read_idx, queue->tx_pending);
+
+	if (!queue->tx_pending)
+		return NULL;
+
+	if (queue->nocpy_flag && !queue->done_tmu_tx_pkts) {
+		u32 tmu_tx_pkts = be32_to_cpu(pe_dmem_read(TMU0_ID +
+			client->id, TMU_DM_TX_TRANS, 4));
+
+		if (queue->prev_tmu_tx_pkts > tmu_tx_pkts)
+			queue->done_tmu_tx_pkts = UINT_MAX -
+				queue->prev_tmu_tx_pkts + tmu_tx_pkts;
+		else
+			queue->done_tmu_tx_pkts = tmu_tx_pkts -
+						queue->prev_tmu_tx_pkts;
+
+		queue->prev_tmu_tx_pkts  = tmu_tx_pkts;
+
+		if (!queue->done_tmu_tx_pkts)
+			return NULL;
+	}
+
+	if (desc->ctrl & CL_DESC_OWN)
+		return NULL;
+
+	queue->read_idx = (queue->read_idx + 1) & (queue->size - 1);
+	queue->tx_pending--;
+
+	*flags = CL_DESC_GET_FLAGS(desc->ctrl);
+
+	if (queue->done_tmu_tx_pkts && (*flags & HIF_LAST_BUFFER))
+		queue->done_tmu_tx_pkts--;
+
+	return desc->data;
+}
+
+static void hif_lib_tmu_credit_init(struct pfe *pfe)
+{
+	int i, q;
+
+	for (i = 0; i < NUM_GEMAC_SUPPORT; i++)
+		for (q = 0; q < emac_txq_cnt; q++) {
+			pfe->tmu_credit.tx_credit_max[i][q] = (q == 0) ?
+					DEFAULT_Q0_QDEPTH : DEFAULT_MAX_QDEPTH;
+			pfe->tmu_credit.tx_credit[i][q] =
+					pfe->tmu_credit.tx_credit_max[i][q];
+		}
+}
+
+/* __hif_lib_update_credit
+ *
+ * @param[in] client	hif client context
+ * @param[in] queue	queue number in match with TMU
+ */
+void __hif_lib_update_credit(struct hif_client_s *client, unsigned int queue)
+{
+	unsigned int tmu_tx_packets, tmp;
+
+	if (tx_qos) {
+		tmu_tx_packets = be32_to_cpu(pe_dmem_read(TMU0_ID +
+			client->id, (TMU_DM_TX_TRANS + (queue * 4)), 4));
+
+		/* tx_packets counter overflowed */
+		if (tmu_tx_packets >
+		    pfe->tmu_credit.tx_packets[client->id][queue]) {
+			tmp = UINT_MAX - tmu_tx_packets +
+			pfe->tmu_credit.tx_packets[client->id][queue];
+
+			pfe->tmu_credit.tx_credit[client->id][queue] =
+			pfe->tmu_credit.tx_credit_max[client->id][queue] - tmp;
+		} else {
+		/* TMU tx <= pfe_eth tx, normal case or both OF since
+		 * last time
+		 */
+			pfe->tmu_credit.tx_credit[client->id][queue] =
+			pfe->tmu_credit.tx_credit_max[client->id][queue] -
+			(pfe->tmu_credit.tx_packets[client->id][queue] -
+			tmu_tx_packets);
+		}
+	}
+}
+
+int pfe_hif_lib_init(struct pfe *pfe)
+{
+	int rc;
+
+	pr_info("%s\n", __func__);
+
+	if (lro_mode) {
+		page_mode = 1;
+		pfe_pkt_size = min(PAGE_SIZE, MAX_PFE_PKT_SIZE);
+		pfe_pkt_headroom = 0;
+	} else {
+		page_mode = 0;
+		pfe_pkt_size = PFE_PKT_SIZE;
+		pfe_pkt_headroom = PFE_PKT_HEADROOM;
+	}
+
+	if (tx_qos)
+		emac_txq_cnt = EMAC_TXQ_CNT / 2;
+	else
+		emac_txq_cnt = EMAC_TXQ_CNT;
+
+	hif_lib_tmu_credit_init(pfe);
+	pfe->hif.shm = &ghif_shm;
+	rc = pfe_hif_shm_init(pfe->hif.shm);
+
+	return rc;
+}
+
+void pfe_hif_lib_exit(struct pfe *pfe)
+{
+	pr_info("%s\n", __func__);
+
+	pfe_hif_shm_clean(pfe->hif.shm);
+}
diff --git a/drivers/staging/fsl_ppfe/pfe_hif_lib.h b/drivers/staging/fsl_ppfe/pfe_hif_lib.h
new file mode 100644
index 0000000..d48eb14
--- /dev/null
+++ b/drivers/staging/fsl_ppfe/pfe_hif_lib.h
@@ -0,0 +1,240 @@
+/*
+ * Copyright 2015-2016 Freescale Semiconductor, Inc.
+ * Copyright 2017 NXP
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program.  If not, see <http://www.gnu.org/licenses/>.
+ */
+
+#ifndef _PFE_HIF_LIB_H_
+#define _PFE_HIF_LIB_H_
+
+#include "pfe_hif.h"
+
+#define HIF_CL_REQ_TIMEOUT	10
+#define GFP_DMA_PFE 0
+
+enum {
+	REQUEST_CL_REGISTER = 0,
+	REQUEST_CL_UNREGISTER,
+	HIF_REQUEST_MAX
+};
+
+enum {
+	/* Event to indicate that client rx queue is reached water mark level */
+	EVENT_HIGH_RX_WM = 0,
+	/* Event to indicate that, packet received for client */
+	EVENT_RX_PKT_IND,
+	/* Event to indicate that, packet tx done for client */
+	EVENT_TXDONE_IND,
+	HIF_EVENT_MAX
+};
+
+/*structure to store client queue info */
+
+/*structure to store client queue info */
+struct hif_client_rx_queue {
+	struct rx_queue_desc *base;
+	u32	size;
+	u32	read_idx;
+	u32	write_idx;
+};
+
+struct hif_client_tx_queue {
+	struct tx_queue_desc *base;
+	u32	size;
+	u32	read_idx;
+	u32	write_idx;
+	u32	tx_pending;
+	unsigned long jiffies_last_packet;
+	u32	nocpy_flag;
+	u32	prev_tmu_tx_pkts;
+	u32	done_tmu_tx_pkts;
+};
+
+struct hif_client_s {
+	int	id;
+	int	tx_qn;
+	int	rx_qn;
+	void	*rx_qbase;
+	void	*tx_qbase;
+	int	tx_qsize;
+	int	rx_qsize;
+	int	cpu_id;
+	struct hif_client_tx_queue tx_q[HIF_CLIENT_QUEUES_MAX];
+	struct hif_client_rx_queue rx_q[HIF_CLIENT_QUEUES_MAX];
+	int (*event_handler)(void *priv, int event, int data);
+	unsigned long queue_mask[HIF_EVENT_MAX];
+	struct pfe *pfe;
+	void *priv;
+};
+
+/*
+ * Client specific shared memory
+ * It contains number of Rx/Tx queues, base addresses and queue sizes
+ */
+struct hif_client_shm {
+	u32 ctrl; /*0-7: number of Rx queues, 8-15: number of tx queues */
+	unsigned long rx_qbase; /*Rx queue base address */
+	u32 rx_qsize; /*each Rx queue size, all Rx queues are of same size */
+	unsigned long tx_qbase; /* Tx queue base address */
+	u32 tx_qsize; /*each Tx queue size, all Tx queues are of same size */
+};
+
+/*Client shared memory ctrl bit description */
+#define CLIENT_CTRL_RX_Q_CNT_OFST	0
+#define CLIENT_CTRL_TX_Q_CNT_OFST	8
+#define CLIENT_CTRL_RX_Q_CNT(ctrl)	(((ctrl) >> CLIENT_CTRL_RX_Q_CNT_OFST) \
+						& 0xFF)
+#define CLIENT_CTRL_TX_Q_CNT(ctrl)	(((ctrl) >> CLIENT_CTRL_TX_Q_CNT_OFST) \
+						& 0xFF)
+
+/*
+ * Shared memory used to communicate between HIF driver and host/client drivers
+ * Before starting the hif driver rx_buf_pool ans rx_buf_pool_cnt should be
+ * initialized with host buffers and buffers count in the pool.
+ * rx_buf_pool_cnt should be >= HIF_RX_DESC_NT.
+ *
+ */
+struct hif_shm {
+	u32 rx_buf_pool_cnt; /*Number of rx buffers available*/
+	/*Rx buffers required to initialize HIF rx descriptors */
+	void *rx_buf_pool[HIF_RX_DESC_NT];
+	unsigned long g_client_status[2]; /*Global client status bit mask */
+	/* Client specific shared memory */
+	struct hif_client_shm client[HIF_CLIENTS_MAX];
+};
+
+#define CL_DESC_OWN	BIT(31)
+/* This sets owner ship to HIF driver */
+#define CL_DESC_LAST	BIT(30)
+/* This indicates last packet for multi buffers handling */
+#define CL_DESC_FIRST	BIT(29)
+/* This indicates first packet for multi buffers handling */
+
+#define CL_DESC_BUF_LEN(x)		((x) & 0xFFFF)
+#define CL_DESC_FLAGS(x)		(((x) & 0xF) << 16)
+#define CL_DESC_GET_FLAGS(x)		(((x) >> 16) & 0xF)
+
+struct rx_queue_desc {
+	void *data;
+	u32	ctrl; /*0-15bit len, 16-20bit flags, 31bit owner*/
+	u32	client_ctrl;
+};
+
+struct tx_queue_desc {
+	void *data;
+	u32	ctrl; /*0-15bit len, 16-20bit flags, 31bit owner*/
+};
+
+/* HIF Rx is not working properly for 2-byte aligned buffers and
+ * ip_header should be 4byte aligned for better iperformance.
+ * "ip_header = 64 + 6(hif_header) + 14 (MAC Header)" will be 4byte aligned.
+ */
+#define PFE_PKT_HEADER_SZ	sizeof(struct hif_hdr)
+/* must be big enough for headroom, pkt size and skb shared info */
+#define PFE_BUF_SIZE		2048
+#define PFE_PKT_HEADROOM	128
+
+#define SKB_SHARED_INFO_SIZE   (sizeof(struct skb_shared_info))
+#define PFE_PKT_SIZE		(PFE_BUF_SIZE - PFE_PKT_HEADROOM \
+				 - SKB_SHARED_INFO_SIZE)
+#define MAX_L2_HDR_SIZE		14	/* Not correct for VLAN/PPPoE */
+#define MAX_L3_HDR_SIZE		20	/* Not correct for IPv6 */
+#define MAX_L4_HDR_SIZE		60	/* TCP with maximum options */
+#define MAX_HDR_SIZE		(MAX_L2_HDR_SIZE + MAX_L3_HDR_SIZE \
+				 + MAX_L4_HDR_SIZE)
+/* Used in page mode to clamp packet size to the maximum supported by the hif
+ *hw interface (<16KiB)
+ */
+#define MAX_PFE_PKT_SIZE	16380UL
+
+extern unsigned int pfe_pkt_size;
+extern unsigned int pfe_pkt_headroom;
+extern unsigned int page_mode;
+extern unsigned int lro_mode;
+extern unsigned int tx_qos;
+extern unsigned int emac_txq_cnt;
+
+int pfe_hif_lib_init(struct pfe *pfe);
+void pfe_hif_lib_exit(struct pfe *pfe);
+int hif_lib_client_register(struct hif_client_s *client);
+int hif_lib_client_unregister(struct  hif_client_s *client);
+void __hif_lib_xmit_pkt(struct hif_client_s *client, unsigned int qno, void
+				*data, unsigned int len, u32 client_ctrl,
+				unsigned int flags, void *client_data);
+int hif_lib_xmit_pkt(struct hif_client_s *client, unsigned int qno, void *data,
+		     unsigned int len, u32 client_ctrl, void *client_data);
+void hif_lib_indicate_client(int cl_id, int event, int data);
+int hif_lib_event_handler_start(struct hif_client_s *client, int event, int
+					data);
+int hif_lib_tmu_queue_start(struct hif_client_s *client, int qno);
+int hif_lib_tmu_queue_stop(struct hif_client_s *client, int qno);
+void *hif_lib_tx_get_next_complete(struct hif_client_s *client, int qno,
+				   unsigned int *flags, int count);
+void *hif_lib_receive_pkt(struct hif_client_s *client, int qno, int *len, int
+				*ofst, unsigned int *rx_ctrl,
+				unsigned int *desc_ctrl, void **priv_data);
+void __hif_lib_update_credit(struct hif_client_s *client, unsigned int queue);
+void hif_lib_set_rx_cpu_affinity(struct hif_client_s *client, int cpu_id);
+void hif_lib_set_tx_queue_nocpy(struct hif_client_s *client, int qno, int
+					enable);
+static inline int hif_lib_tx_avail(struct hif_client_s *client, unsigned int
+					qno)
+{
+	struct hif_client_tx_queue *queue = &client->tx_q[qno];
+
+	return (queue->size - queue->tx_pending);
+}
+
+static inline int hif_lib_get_tx_wr_index(struct hif_client_s *client, unsigned
+						int qno)
+{
+	struct hif_client_tx_queue *queue = &client->tx_q[qno];
+
+	return queue->write_idx;
+}
+
+static inline int hif_lib_tx_pending(struct hif_client_s *client, unsigned int
+					qno)
+{
+	struct hif_client_tx_queue *queue = &client->tx_q[qno];
+
+	return queue->tx_pending;
+}
+
+#define hif_lib_tx_credit_avail(pfe, id, qno) \
+				((pfe)->tmu_credit.tx_credit[id][qno])
+
+#define hif_lib_tx_credit_max(pfe, id, qno) \
+				((pfe)->tmu_credit.tx_credit_max[id][qno])
+
+/*
+ * Test comment
+ */
+#define hif_lib_tx_credit_use(pfe, id, qno, credit)			\
+	({ typeof(pfe) pfe_ = pfe;					\
+		typeof(id) id_ = id;					\
+		typeof(qno) qno_ = qno_;				\
+		typeof(credit) credit_ = credit;			\
+		do {							\
+			if (tx_qos) {					\
+				(pfe_)->tmu_credit.tx_credit[id_][qno_]\
+					 -= credit_;			\
+				(pfe_)->tmu_credit.tx_packets[id_][qno_]\
+					+= credit_;			\
+			}						\
+		} while (0);						\
+	})
+
+#endif /* _PFE_HIF_LIB_H_ */
diff --git a/drivers/staging/fsl_ppfe/pfe_hw.c b/drivers/staging/fsl_ppfe/pfe_hw.c
new file mode 100644
index 0000000..16ea2c6
--- /dev/null
+++ b/drivers/staging/fsl_ppfe/pfe_hw.c
@@ -0,0 +1,176 @@
+/*
+ * Copyright 2015-2016 Freescale Semiconductor, Inc.
+ * Copyright 2017 NXP
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program.  If not, see <http://www.gnu.org/licenses/>.
+ */
+
+#include "pfe_mod.h"
+#include "pfe_hw.h"
+
+/* Functions to handle most of pfe hw register initialization */
+int pfe_hw_init(struct pfe *pfe, int resume)
+{
+	struct class_cfg class_cfg = {
+		.pe_sys_clk_ratio = PE_SYS_CLK_RATIO,
+		.route_table_baseaddr = pfe->ddr_phys_baseaddr +
+					ROUTE_TABLE_BASEADDR,
+		.route_table_hash_bits = ROUTE_TABLE_HASH_BITS,
+	};
+
+	struct tmu_cfg tmu_cfg = {
+		.pe_sys_clk_ratio = PE_SYS_CLK_RATIO,
+		.llm_base_addr = pfe->ddr_phys_baseaddr + TMU_LLM_BASEADDR,
+		.llm_queue_len = TMU_LLM_QUEUE_LEN,
+	};
+
+#if !defined(CONFIG_FSL_PPFE_UTIL_DISABLED)
+	struct util_cfg util_cfg = {
+		.pe_sys_clk_ratio = PE_SYS_CLK_RATIO,
+	};
+#endif
+
+	struct BMU_CFG bmu1_cfg = {
+		.baseaddr = CBUS_VIRT_TO_PFE(LMEM_BASE_ADDR +
+						BMU1_LMEM_BASEADDR),
+		.count = BMU1_BUF_COUNT,
+		.size = BMU1_BUF_SIZE,
+		.low_watermark = 10,
+		.high_watermark = 15,
+	};
+
+	struct BMU_CFG bmu2_cfg = {
+		.baseaddr = DDR_PHYS_TO_PFE(pfe->ddr_phys_baseaddr +
+						BMU2_DDR_BASEADDR),
+		.count = BMU2_BUF_COUNT,
+		.size = BMU2_BUF_SIZE,
+		.low_watermark = 250,
+		.high_watermark = 253,
+	};
+
+	struct gpi_cfg egpi1_cfg = {
+		.lmem_rtry_cnt = EGPI1_LMEM_RTRY_CNT,
+		.tmlf_txthres = EGPI1_TMLF_TXTHRES,
+		.aseq_len = EGPI1_ASEQ_LEN,
+		.mtip_pause_reg = CBUS_VIRT_TO_PFE(EMAC1_BASE_ADDR +
+						EMAC_TCNTRL_REG),
+	};
+
+	struct gpi_cfg egpi2_cfg = {
+		.lmem_rtry_cnt = EGPI2_LMEM_RTRY_CNT,
+		.tmlf_txthres = EGPI2_TMLF_TXTHRES,
+		.aseq_len = EGPI2_ASEQ_LEN,
+		.mtip_pause_reg = CBUS_VIRT_TO_PFE(EMAC2_BASE_ADDR +
+						EMAC_TCNTRL_REG),
+	};
+
+	struct gpi_cfg hgpi_cfg = {
+		.lmem_rtry_cnt = HGPI_LMEM_RTRY_CNT,
+		.tmlf_txthres = HGPI_TMLF_TXTHRES,
+		.aseq_len = HGPI_ASEQ_LEN,
+		.mtip_pause_reg = 0,
+	};
+
+	pr_info("%s\n", __func__);
+
+#if !defined(LS1012A_PFE_RESET_WA)
+	/* LS1012A needs this to make PE work correctly */
+	writel(0x3,     CLASS_PE_SYS_CLK_RATIO);
+	writel(0x3,     TMU_PE_SYS_CLK_RATIO);
+	writel(0x3,     UTIL_PE_SYS_CLK_RATIO);
+	usleep_range(10, 20);
+#endif
+
+	pr_info("CLASS version: %x\n", readl(CLASS_VERSION));
+	pr_info("TMU version: %x\n", readl(TMU_VERSION));
+
+	pr_info("BMU1 version: %x\n", readl(BMU1_BASE_ADDR +
+		BMU_VERSION));
+	pr_info("BMU2 version: %x\n", readl(BMU2_BASE_ADDR +
+		BMU_VERSION));
+
+	pr_info("EGPI1 version: %x\n", readl(EGPI1_BASE_ADDR +
+		GPI_VERSION));
+	pr_info("EGPI2 version: %x\n", readl(EGPI2_BASE_ADDR +
+		GPI_VERSION));
+	pr_info("HGPI version: %x\n", readl(HGPI_BASE_ADDR +
+		GPI_VERSION));
+
+	pr_info("HIF version: %x\n", readl(HIF_VERSION));
+	pr_info("HIF NOPCY version: %x\n", readl(HIF_NOCPY_VERSION));
+
+#if !defined(CONFIG_FSL_PPFE_UTIL_DISABLED)
+	pr_info("UTIL version: %x\n", readl(UTIL_VERSION));
+#endif
+	while (!(readl(TMU_CTRL) & ECC_MEM_INIT_DONE))
+		;
+
+	hif_rx_disable();
+	hif_tx_disable();
+
+	bmu_init(BMU1_BASE_ADDR, &bmu1_cfg);
+
+	pr_info("bmu_init(1) done\n");
+
+	bmu_init(BMU2_BASE_ADDR, &bmu2_cfg);
+
+	pr_info("bmu_init(2) done\n");
+
+	class_cfg.resume = resume ? 1 : 0;
+
+	class_init(&class_cfg);
+
+	pr_info("class_init() done\n");
+
+	tmu_init(&tmu_cfg);
+
+	pr_info("tmu_init() done\n");
+#if !defined(CONFIG_FSL_PPFE_UTIL_DISABLED)
+	util_init(&util_cfg);
+
+	pr_info("util_init() done\n");
+#endif
+	gpi_init(EGPI1_BASE_ADDR, &egpi1_cfg);
+
+	pr_info("gpi_init(1) done\n");
+
+	gpi_init(EGPI2_BASE_ADDR, &egpi2_cfg);
+
+	pr_info("gpi_init(2) done\n");
+
+	gpi_init(HGPI_BASE_ADDR, &hgpi_cfg);
+
+	pr_info("gpi_init(hif) done\n");
+
+	bmu_enable(BMU1_BASE_ADDR);
+
+	pr_info("bmu_enable(1) done\n");
+
+	bmu_enable(BMU2_BASE_ADDR);
+
+	pr_info("bmu_enable(2) done\n");
+
+	return 0;
+}
+
+void pfe_hw_exit(struct pfe *pfe)
+{
+	pr_info("%s\n", __func__);
+
+	bmu_disable(BMU1_BASE_ADDR);
+	bmu_reset(BMU1_BASE_ADDR);
+
+	bmu_disable(BMU2_BASE_ADDR);
+	bmu_reset(BMU2_BASE_ADDR);
+}
diff --git a/drivers/staging/fsl_ppfe/pfe_hw.h b/drivers/staging/fsl_ppfe/pfe_hw.h
new file mode 100644
index 0000000..53b5fe1
--- /dev/null
+++ b/drivers/staging/fsl_ppfe/pfe_hw.h
@@ -0,0 +1,27 @@
+/*
+ * Copyright 2015-2016 Freescale Semiconductor, Inc.
+ * Copyright 2017 NXP
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program.  If not, see <http://www.gnu.org/licenses/>.
+ */
+
+#ifndef _PFE_HW_H_
+#define _PFE_HW_H_
+
+#define PE_SYS_CLK_RATIO	1	/* SYS/AXI = 250MHz, HFE = 500MHz */
+
+int pfe_hw_init(struct pfe *pfe, int resume);
+void pfe_hw_exit(struct pfe *pfe);
+
+#endif /* _PFE_HW_H_ */
diff --git a/drivers/staging/fsl_ppfe/pfe_ls1012a_platform.c b/drivers/staging/fsl_ppfe/pfe_ls1012a_platform.c
new file mode 100644
index 0000000..c579eb5
--- /dev/null
+++ b/drivers/staging/fsl_ppfe/pfe_ls1012a_platform.c
@@ -0,0 +1,394 @@
+/*
+ * Copyright 2015-2016 Freescale Semiconductor, Inc.
+ * Copyright 2017 NXP
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program.  If not, see <http://www.gnu.org/licenses/>.
+ */
+
+#include <linux/module.h>
+#include <linux/device.h>
+#include <linux/of_net.h>
+#include <linux/of_address.h>
+#include <linux/platform_device.h>
+#include <linux/slab.h>
+#include <linux/clk.h>
+#include <linux/mfd/syscon.h>
+#include <linux/regmap.h>
+
+#include "pfe_mod.h"
+
+struct ls1012a_pfe_platform_data pfe_platform_data;
+
+static int pfe_get_gemac_if_proprties(struct device_node *parent, int port, int
+					if_cnt,
+					struct ls1012a_pfe_platform_data
+					*pdata)
+{
+	struct device_node *gem = NULL, *phy = NULL;
+	int size;
+	int ii = 0, phy_id = 0;
+	const u32 *addr;
+	const void *mac_addr;
+
+	for (ii = 0; ii < if_cnt; ii++) {
+		gem = of_get_next_child(parent, gem);
+		if (!gem)
+			goto err;
+		addr = of_get_property(gem, "reg", &size);
+		if (addr && (be32_to_cpup(addr) == port))
+			break;
+	}
+
+	if (ii >= if_cnt) {
+		pr_err("%s:%d Failed to find interface = %d\n",
+		       __func__, __LINE__, if_cnt);
+		goto err;
+	}
+
+	pdata->ls1012a_eth_pdata[port].gem_id = port;
+
+	mac_addr = of_get_mac_address(gem);
+
+	if (mac_addr) {
+		memcpy(pdata->ls1012a_eth_pdata[port].mac_addr, mac_addr,
+		       ETH_ALEN);
+	}
+
+	pdata->ls1012a_eth_pdata[port].mii_config = of_get_phy_mode(gem);
+
+	if ((pdata->ls1012a_eth_pdata[port].mii_config) < 0)
+		pr_err("%s:%d Incorrect Phy mode....\n", __func__,
+		       __LINE__);
+
+	addr = of_get_property(gem, "fsl,gemac-bus-id", &size);
+	if (!addr)
+		pr_err("%s:%d Invalid gemac-bus-id....\n", __func__,
+		       __LINE__);
+	else
+		pdata->ls1012a_eth_pdata[port].bus_id = be32_to_cpup(addr);
+
+	addr = of_get_property(gem, "fsl,gemac-phy-id", &size);
+	if (!addr) {
+		pr_err("%s:%d Invalid gemac-phy-id....\n", __func__,
+		       __LINE__);
+	} else {
+		phy_id = be32_to_cpup(addr);
+		pdata->ls1012a_eth_pdata[port].phy_id = phy_id;
+		pdata->ls1012a_mdio_pdata[0].phy_mask &= ~(1 << phy_id);
+	}
+
+	addr = of_get_property(gem, "fsl,mdio-mux-val", &size);
+	if (!addr)
+		pr_err("%s: Invalid mdio-mux-val....\n", __func__);
+	else
+		phy_id = be32_to_cpup(addr);
+		pdata->ls1012a_eth_pdata[port].mdio_muxval = phy_id;
+
+	if (pdata->ls1012a_eth_pdata[port].phy_id < 32)
+		pfe->mdio_muxval[pdata->ls1012a_eth_pdata[port].phy_id] =
+			 pdata->ls1012a_eth_pdata[port].mdio_muxval;
+
+	addr = of_get_property(gem, "fsl,pfe-phy-if-flags", &size);
+	if (!addr)
+		pr_err("%s:%d Invalid pfe-phy-if-flags....\n",
+		       __func__, __LINE__);
+	else
+		pdata->ls1012a_eth_pdata[port].phy_flags = be32_to_cpup(addr);
+
+	/* If PHY is enabled, read mdio properties */
+	if (pdata->ls1012a_eth_pdata[port].phy_flags & GEMAC_NO_PHY)
+		goto done;
+
+	phy = of_get_next_child(gem, NULL);
+
+	addr = of_get_property(phy, "reg", &size);
+
+	if (!addr)
+		pr_err("%s:%d Invalid phy enable flag....\n",
+		       __func__, __LINE__);
+	else
+		pdata->ls1012a_mdio_pdata[port].enabled = be32_to_cpup(addr);
+
+	pdata->ls1012a_mdio_pdata[port].irq[0] = PHY_POLL;
+
+done:
+
+	return 0;
+
+err:
+	return -1;
+}
+
+/*
+ *
+ * pfe_platform_probe -
+ *
+ *
+ */
+static int pfe_platform_probe(struct platform_device *pdev)
+{
+	struct resource res;
+	int ii, rc, interface_count = 0, size = 0;
+	const u32 *prop;
+	struct device_node  *np;
+	struct clk *pfe_clk;
+
+	np = pdev->dev.of_node;
+
+	if (!np) {
+		pr_err("Invalid device node\n");
+		return -EINVAL;
+	}
+
+	pfe = kzalloc(sizeof(*pfe), GFP_KERNEL);
+	if (!pfe) {
+		rc = -ENOMEM;
+		goto err_alloc;
+	}
+
+	platform_set_drvdata(pdev, pfe);
+
+	dma_set_mask_and_coherent(&pdev->dev, DMA_BIT_MASK(32));
+
+	if (of_address_to_resource(np, 1, &res)) {
+		rc = -ENOMEM;
+		pr_err("failed to get ddr resource\n");
+		goto err_ddr;
+	}
+
+	pfe->ddr_phys_baseaddr = res.start;
+	pfe->ddr_size = resource_size(&res);
+
+	pfe->ddr_baseaddr = phys_to_virt(res.start);
+	if (!pfe->ddr_baseaddr) {
+		pr_err("ioremap() ddr failed\n");
+		rc = -ENOMEM;
+		goto err_ddr;
+	}
+
+	pfe->scfg =
+		syscon_regmap_lookup_by_phandle(pdev->dev.of_node,
+						"fsl,pfe-scfg");
+	if (IS_ERR(pfe->scfg)) {
+		dev_err(&pdev->dev, "No syscfg phandle specified\n");
+		return PTR_ERR(pfe->scfg);
+	}
+
+	pfe->cbus_baseaddr = of_iomap(np, 0);
+	if (!pfe->cbus_baseaddr) {
+		rc = -ENOMEM;
+		pr_err("failed to get axi resource\n");
+		goto err_axi;
+	}
+
+	pfe->hif_irq = platform_get_irq(pdev, 0);
+	if (pfe->hif_irq < 0) {
+		pr_err("platform_get_irq for hif failed\n");
+		rc = pfe->hif_irq;
+		goto err_hif_irq;
+	}
+
+	pfe->wol_irq = platform_get_irq(pdev, 2);
+	if (pfe->wol_irq < 0) {
+		pr_err("platform_get_irq for WoL failed\n");
+		rc = pfe->wol_irq;
+		goto err_hif_irq;
+	}
+
+	/* Read interface count */
+	prop = of_get_property(np, "fsl,pfe-num-interfaces", &size);
+	if (!prop) {
+		pr_err("Failed to read number of interfaces\n");
+		rc = -ENXIO;
+		goto err_prop;
+	}
+
+	interface_count = be32_to_cpup(prop);
+	if (interface_count <= 0) {
+		pr_err("No ethernet interface count : %d\n",
+		       interface_count);
+		rc = -ENXIO;
+		goto err_prop;
+	}
+
+	pfe_platform_data.ls1012a_mdio_pdata[0].phy_mask = 0xffffffff;
+
+	for (ii = 0; ii < interface_count; ii++) {
+		pfe_get_gemac_if_proprties(np, ii, interface_count,
+					   &pfe_platform_data);
+	}
+
+	pfe->dev = &pdev->dev;
+
+	pfe->dev->platform_data = &pfe_platform_data;
+
+	/* declare WoL capabilities */
+	device_init_wakeup(&pdev->dev, true);
+
+	/* find the clocks */
+	pfe_clk = devm_clk_get(pfe->dev, "pfe");
+	if (IS_ERR(pfe_clk))
+		return PTR_ERR(pfe_clk);
+
+	/* PFE clock is (platform clock / 2) */
+	/* save sys_clk value as KHz */
+	pfe->ctrl.sys_clk = clk_get_rate(pfe_clk) / (2 * 1000);
+
+	rc = pfe_probe(pfe);
+	if (rc < 0)
+		goto err_probe;
+
+	return 0;
+
+err_probe:
+err_prop:
+err_hif_irq:
+	iounmap(pfe->cbus_baseaddr);
+
+err_axi:
+	iounmap(pfe->ddr_baseaddr);
+
+err_ddr:
+	platform_set_drvdata(pdev, NULL);
+
+	kfree(pfe);
+
+err_alloc:
+	return rc;
+}
+
+/*
+ * pfe_platform_remove -
+ */
+static int pfe_platform_remove(struct platform_device *pdev)
+{
+	struct pfe *pfe = platform_get_drvdata(pdev);
+	int rc;
+
+	pr_info("%s\n", __func__);
+
+	rc = pfe_remove(pfe);
+
+	iounmap(pfe->cbus_baseaddr);
+	iounmap(pfe->ddr_baseaddr);
+
+	platform_set_drvdata(pdev, NULL);
+
+	kfree(pfe);
+
+	return rc;
+}
+
+#ifdef CONFIG_PM
+#ifdef CONFIG_PM_SLEEP
+int pfe_platform_suspend(struct device *dev)
+{
+	struct pfe *pfe = platform_get_drvdata(to_platform_device(dev));
+	struct net_device *netdev;
+	int i;
+
+	pfe->wake = 0;
+
+	for (i = 0; i < (NUM_GEMAC_SUPPORT); i++) {
+		netdev = pfe->eth.eth_priv[i]->ndev;
+
+		netif_device_detach(netdev);
+
+		if (netif_running(netdev))
+			if (pfe_eth_suspend(netdev))
+				pfe->wake = 1;
+	}
+
+	/* Shutdown PFE only if we're not waking up the system */
+	if (!pfe->wake) {
+#if defined(LS1012A_PFE_RESET_WA)
+		pfe_hif_rx_idle(&pfe->hif);
+#endif
+		pfe_ctrl_suspend(&pfe->ctrl);
+		pfe_firmware_exit(pfe);
+
+		pfe_hif_exit(pfe);
+		pfe_hif_lib_exit(pfe);
+
+		pfe_hw_exit(pfe);
+	}
+
+	return 0;
+}
+
+static int pfe_platform_resume(struct device *dev)
+{
+	struct pfe *pfe = platform_get_drvdata(to_platform_device(dev));
+	struct net_device *netdev;
+	int i;
+
+	if (!pfe->wake) {
+		pfe_hw_init(pfe, 1);
+		pfe_hif_lib_init(pfe);
+		pfe_hif_init(pfe);
+#if !defined(CONFIG_FSL_PPFE_UTIL_DISABLED)
+		util_enable();
+#endif
+		tmu_enable(0xf);
+		class_enable();
+		pfe_ctrl_resume(&pfe->ctrl);
+	}
+
+	for (i = 0; i < (NUM_GEMAC_SUPPORT); i++) {
+		netdev = pfe->eth.eth_priv[i]->ndev;
+
+		if (pfe->eth.eth_priv[i]->mii_bus)
+			pfe_eth_mdio_reset(pfe->eth.eth_priv[i]->mii_bus);
+
+		if (netif_running(netdev))
+			pfe_eth_resume(netdev);
+
+		netif_device_attach(netdev);
+	}
+	return 0;
+}
+#else
+#define pfe_platform_suspend NULL
+#define pfe_platform_resume NULL
+#endif
+
+static const struct dev_pm_ops pfe_platform_pm_ops = {
+	SET_SYSTEM_SLEEP_PM_OPS(pfe_platform_suspend, pfe_platform_resume)
+};
+#endif
+
+static const struct of_device_id pfe_match[] = {
+	{
+		.compatible = "fsl,pfe",
+	},
+	{},
+};
+MODULE_DEVICE_TABLE(of, pfe_match);
+
+static struct platform_driver pfe_platform_driver = {
+	.probe = pfe_platform_probe,
+	.remove = pfe_platform_remove,
+	.driver = {
+		.name = "pfe",
+		.of_match_table = pfe_match,
+#ifdef CONFIG_PM
+		.pm = &pfe_platform_pm_ops,
+#endif
+	},
+};
+
+module_platform_driver(pfe_platform_driver);
+MODULE_LICENSE("GPL");
+MODULE_DESCRIPTION("PFE Ethernet driver");
+MODULE_AUTHOR("NXP DNCPE");
diff --git a/drivers/staging/fsl_ppfe/pfe_mod.c b/drivers/staging/fsl_ppfe/pfe_mod.c
new file mode 100644
index 0000000..d5ba56a
--- /dev/null
+++ b/drivers/staging/fsl_ppfe/pfe_mod.c
@@ -0,0 +1,141 @@
+/*
+ * Copyright 2015-2016 Freescale Semiconductor, Inc.
+ * Copyright 2017 NXP
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program.  If not, see <http://www.gnu.org/licenses/>.
+ */
+
+#include <linux/dma-mapping.h>
+#include "pfe_mod.h"
+
+struct pfe *pfe;
+
+/*
+ * pfe_probe -
+ */
+int pfe_probe(struct pfe *pfe)
+{
+	int rc;
+
+	if (pfe->ddr_size < DDR_MAX_SIZE) {
+		pr_err("%s: required DDR memory (%x) above platform ddr memory (%x)\n",
+		       __func__, (unsigned int)DDR_MAX_SIZE, pfe->ddr_size);
+		rc = -ENOMEM;
+		goto err_hw;
+	}
+
+	if (((int)(pfe->ddr_phys_baseaddr + BMU2_DDR_BASEADDR) &
+			(8 * SZ_1M - 1)) != 0) {
+		pr_err("%s: BMU2 base address (0x%x) must be aligned on 8MB boundary\n",
+		       __func__, (int)pfe->ddr_phys_baseaddr +
+			BMU2_DDR_BASEADDR);
+		rc = -ENOMEM;
+		goto err_hw;
+	}
+
+	pr_info("cbus_baseaddr: %lx, ddr_baseaddr: %lx, ddr_phys_baseaddr: %lx, ddr_size: %x\n",
+		(unsigned long)pfe->cbus_baseaddr,
+		(unsigned long)pfe->ddr_baseaddr,
+		pfe->ddr_phys_baseaddr, pfe->ddr_size);
+
+	pfe_lib_init(pfe->cbus_baseaddr, pfe->ddr_baseaddr,
+		     pfe->ddr_phys_baseaddr, pfe->ddr_size);
+
+	rc = pfe_hw_init(pfe, 0);
+	if (rc < 0)
+		goto err_hw;
+
+	rc = pfe_hif_lib_init(pfe);
+	if (rc < 0)
+		goto err_hif_lib;
+
+	rc = pfe_hif_init(pfe);
+	if (rc < 0)
+		goto err_hif;
+
+	rc = pfe_firmware_init(pfe);
+	if (rc < 0)
+		goto err_firmware;
+
+	rc = pfe_ctrl_init(pfe);
+	if (rc < 0)
+		goto err_ctrl;
+
+	rc = pfe_eth_init(pfe);
+	if (rc < 0)
+		goto err_eth;
+
+	rc = pfe_sysfs_init(pfe);
+	if (rc < 0)
+		goto err_sysfs;
+
+	rc = pfe_debugfs_init(pfe);
+	if (rc < 0)
+		goto err_debugfs;
+
+	return 0;
+
+err_debugfs:
+	pfe_sysfs_exit(pfe);
+
+err_sysfs:
+	pfe_eth_exit(pfe);
+
+err_eth:
+	pfe_ctrl_exit(pfe);
+
+err_ctrl:
+	pfe_firmware_exit(pfe);
+
+err_firmware:
+	pfe_hif_exit(pfe);
+
+err_hif:
+	pfe_hif_lib_exit(pfe);
+
+err_hif_lib:
+	pfe_hw_exit(pfe);
+
+err_hw:
+	return rc;
+}
+
+/*
+ * pfe_remove -
+ */
+int pfe_remove(struct pfe *pfe)
+{
+	pr_info("%s\n", __func__);
+
+	pfe_debugfs_exit(pfe);
+
+	pfe_sysfs_exit(pfe);
+
+	pfe_eth_exit(pfe);
+
+	pfe_ctrl_exit(pfe);
+
+#if defined(LS1012A_PFE_RESET_WA)
+	pfe_hif_rx_idle(&pfe->hif);
+#endif
+	pfe_firmware_exit(pfe);
+
+	pfe_hif_exit(pfe);
+
+	pfe_hif_lib_exit(pfe);
+
+	pfe_hw_exit(pfe);
+
+	return 0;
+}
diff --git a/drivers/staging/fsl_ppfe/pfe_mod.h b/drivers/staging/fsl_ppfe/pfe_mod.h
new file mode 100644
index 0000000..3012f17
--- /dev/null
+++ b/drivers/staging/fsl_ppfe/pfe_mod.h
@@ -0,0 +1,112 @@
+/*
+ * Copyright 2015-2016 Freescale Semiconductor, Inc.
+ * Copyright 2017 NXP
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program.  If not, see <http://www.gnu.org/licenses/>.
+ */
+
+#ifndef _PFE_MOD_H_
+#define _PFE_MOD_H_
+
+#include <linux/device.h>
+#include <linux/elf.h>
+
+struct pfe;
+
+#include "pfe_hw.h"
+#include "pfe_firmware.h"
+#include "pfe_ctrl.h"
+#include "pfe_hif.h"
+#include "pfe_hif_lib.h"
+#include "pfe_eth.h"
+#include "pfe_sysfs.h"
+#include "pfe_perfmon.h"
+#include "pfe_debugfs.h"
+
+#define PHYID_MAX_VAL 32
+
+struct pfe_tmu_credit {
+	/* Number of allowed TX packet in-flight, matches TMU queue size */
+	unsigned int tx_credit[NUM_GEMAC_SUPPORT][EMAC_TXQ_CNT];
+	unsigned int tx_credit_max[NUM_GEMAC_SUPPORT][EMAC_TXQ_CNT];
+	unsigned int tx_packets[NUM_GEMAC_SUPPORT][EMAC_TXQ_CNT];
+};
+
+struct pfe {
+	struct regmap	*scfg;
+	unsigned long ddr_phys_baseaddr;
+	void *ddr_baseaddr;
+	unsigned int ddr_size;
+	void *cbus_baseaddr;
+	void *apb_baseaddr;
+	unsigned long iram_phys_baseaddr;
+	void *iram_baseaddr;
+	unsigned long ipsec_phys_baseaddr;
+	void *ipsec_baseaddr;
+	int hif_irq;
+	int wol_irq;
+	int hif_client_irq;
+	struct device *dev;
+	struct dentry *dentry;
+	struct pfe_ctrl ctrl;
+	struct pfe_hif hif;
+	struct pfe_eth eth;
+	struct hif_client_s *hif_client[HIF_CLIENTS_MAX];
+#if defined(CFG_DIAGS)
+	struct pfe_diags diags;
+#endif
+	struct pfe_tmu_credit tmu_credit;
+	struct pfe_cpumon cpumon;
+	struct pfe_memmon memmon;
+	int wake;
+	int mdio_muxval[PHYID_MAX_VAL];
+	struct clk *hfe_clock;
+};
+
+extern struct pfe *pfe;
+
+int pfe_probe(struct pfe *pfe);
+int pfe_remove(struct pfe *pfe);
+
+/* DDR Mapping in reserved memory*/
+#define ROUTE_TABLE_BASEADDR	0
+#define ROUTE_TABLE_HASH_BITS	15	/* 32K entries */
+#define ROUTE_TABLE_SIZE	((1 << ROUTE_TABLE_HASH_BITS) \
+				  * CLASS_ROUTE_SIZE)
+#define BMU2_DDR_BASEADDR	(ROUTE_TABLE_BASEADDR + ROUTE_TABLE_SIZE)
+#define BMU2_BUF_COUNT		(4096 - 256)
+/* This is to get a total DDR size of 12MiB */
+#define BMU2_DDR_SIZE		(DDR_BUF_SIZE * BMU2_BUF_COUNT)
+#define UTIL_CODE_BASEADDR	(BMU2_DDR_BASEADDR + BMU2_DDR_SIZE)
+#define UTIL_CODE_SIZE		(128 * SZ_1K)
+#define UTIL_DDR_DATA_BASEADDR	(UTIL_CODE_BASEADDR + UTIL_CODE_SIZE)
+#define UTIL_DDR_DATA_SIZE	(64 * SZ_1K)
+#define CLASS_DDR_DATA_BASEADDR	(UTIL_DDR_DATA_BASEADDR + UTIL_DDR_DATA_SIZE)
+#define CLASS_DDR_DATA_SIZE	(32 * SZ_1K)
+#define TMU_DDR_DATA_BASEADDR	(CLASS_DDR_DATA_BASEADDR + CLASS_DDR_DATA_SIZE)
+#define TMU_DDR_DATA_SIZE	(32 * SZ_1K)
+#define TMU_LLM_BASEADDR	(TMU_DDR_DATA_BASEADDR + TMU_DDR_DATA_SIZE)
+#define TMU_LLM_QUEUE_LEN	(8 * 512)
+/* Must be power of two and at least 16 * 8 = 128 bytes */
+#define TMU_LLM_SIZE		(4 * 16 * TMU_LLM_QUEUE_LEN)
+/* (4 TMU's x 16 queues x queue_len) */
+
+#define DDR_MAX_SIZE		(TMU_LLM_BASEADDR + TMU_LLM_SIZE)
+
+/* LMEM Mapping */
+#define BMU1_LMEM_BASEADDR	0
+#define BMU1_BUF_COUNT		256
+#define BMU1_LMEM_SIZE		(LMEM_BUF_SIZE * BMU1_BUF_COUNT)
+
+#endif /* _PFE_MOD_H */
diff --git a/drivers/staging/fsl_ppfe/pfe_perfmon.h b/drivers/staging/fsl_ppfe/pfe_perfmon.h
new file mode 100644
index 0000000..8490812
--- /dev/null
+++ b/drivers/staging/fsl_ppfe/pfe_perfmon.h
@@ -0,0 +1,38 @@
+/*
+ * Copyright 2015-2016 Freescale Semiconductor, Inc.
+ * Copyright 2017 NXP
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program.  If not, see <http://www.gnu.org/licenses/>.
+ */
+
+#ifndef _PFE_PERFMON_H_
+#define _PFE_PERFMON_H_
+
+#include "pfe/pfe.h"
+
+#define	CT_CPUMON_INTERVAL	(1 * TIMER_TICKS_PER_SEC)
+
+struct pfe_cpumon {
+	u32 cpu_usage_pct[MAX_PE];
+	u32 class_usage_pct;
+};
+
+struct pfe_memmon {
+	u32 kernel_memory_allocated;
+};
+
+int pfe_perfmon_init(struct pfe *pfe);
+void pfe_perfmon_exit(struct pfe *pfe);
+
+#endif /* _PFE_PERFMON_H_ */
diff --git a/drivers/staging/fsl_ppfe/pfe_sysfs.c b/drivers/staging/fsl_ppfe/pfe_sysfs.c
new file mode 100644
index 0000000..2a76384
--- /dev/null
+++ b/drivers/staging/fsl_ppfe/pfe_sysfs.c
@@ -0,0 +1,818 @@
+/*
+ * Copyright 2015-2016 Freescale Semiconductor, Inc.
+ * Copyright 2017 NXP
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program.  If not, see <http://www.gnu.org/licenses/>.
+ */
+
+#include <linux/module.h>
+#include <linux/platform_device.h>
+
+#include "pfe_mod.h"
+
+#define PE_EXCEPTION_DUMP_ADDRESS 0x1fa8
+#define NUM_QUEUES		16
+
+static char register_name[20][5] = {
+	"EPC", "ECAS", "EID", "ED",
+	"r0", "r1", "r2", "r3",
+	"r4", "r5", "r6", "r7",
+	"r8", "r9", "r10", "r11",
+	"r12", "r13", "r14", "r15",
+};
+
+static char exception_name[14][20] = {
+	"Reset",
+	"HardwareFailure",
+	"NMI",
+	"InstBreakpoint",
+	"DataBreakpoint",
+	"Unsupported",
+	"PrivilegeViolation",
+	"InstBusError",
+	"DataBusError",
+	"AlignmentError",
+	"ArithmeticError",
+	"SystemCall",
+	"MemoryManagement",
+	"Interrupt",
+};
+
+static unsigned long class_do_clear;
+static unsigned long tmu_do_clear;
+#if !defined(CONFIG_FSL_PPFE_UTIL_DISABLED)
+static unsigned long util_do_clear;
+#endif
+
+static ssize_t display_pe_status(char *buf, int id, u32 dmem_addr, unsigned long
+					do_clear)
+{
+	ssize_t len = 0;
+	u32 val;
+	char statebuf[5];
+	struct pfe_cpumon *cpumon = &pfe->cpumon;
+	u32 debug_indicator;
+	u32 debug[20];
+
+	*(u32 *)statebuf = pe_dmem_read(id, dmem_addr, 4);
+	dmem_addr += 4;
+
+	statebuf[4] = '\0';
+	len += sprintf(buf + len, "state=%4s ", statebuf);
+
+	val = pe_dmem_read(id, dmem_addr, 4);
+	dmem_addr += 4;
+	len += sprintf(buf + len, "ctr=%08x ", cpu_to_be32(val));
+
+	val = pe_dmem_read(id, dmem_addr, 4);
+	if (do_clear && val)
+		pe_dmem_write(id, 0, dmem_addr, 4);
+	dmem_addr += 4;
+	len += sprintf(buf + len, "rx=%u ", cpu_to_be32(val));
+
+	val = pe_dmem_read(id, dmem_addr, 4);
+	if (do_clear && val)
+		pe_dmem_write(id, 0, dmem_addr, 4);
+	dmem_addr += 4;
+	if (id >= TMU0_ID && id <= TMU_MAX_ID)
+		len += sprintf(buf + len, "qstatus=%x", cpu_to_be32(val));
+	else
+		len += sprintf(buf + len, "tx=%u", cpu_to_be32(val));
+
+	val = pe_dmem_read(id, dmem_addr, 4);
+	if (do_clear && val)
+		pe_dmem_write(id, 0, dmem_addr, 4);
+	dmem_addr += 4;
+	if (val)
+		len += sprintf(buf + len, " drop=%u", cpu_to_be32(val));
+
+	len += sprintf(buf + len, " load=%d%%", cpumon->cpu_usage_pct[id]);
+
+	len += sprintf(buf + len, "\n");
+
+	debug_indicator = pe_dmem_read(id, dmem_addr, 4);
+	dmem_addr += 4;
+	if (!strncmp((char *)&debug_indicator, "DBUG", 4)) {
+		int j, last = 0;
+
+		for (j = 0; j < 16; j++) {
+			debug[j] = pe_dmem_read(id, dmem_addr, 4);
+			if (debug[j]) {
+				if (do_clear)
+					pe_dmem_write(id, 0, dmem_addr, 4);
+				last = j + 1;
+			}
+			dmem_addr += 4;
+		}
+		for (j = 0; j < last; j++) {
+			len += sprintf(buf + len, "%08x%s",
+			cpu_to_be32(debug[j]),
+			(j & 0x7) == 0x7 || j == last - 1 ? "\n" : " ");
+		}
+	}
+
+	if (!strncmp(statebuf, "DEAD", 4)) {
+		u32 i, dump = PE_EXCEPTION_DUMP_ADDRESS;
+
+		len += sprintf(buf + len, "Exception details:\n");
+		for (i = 0; i < 20; i++) {
+			debug[i] = pe_dmem_read(id, dump, 4);
+			dump += 4;
+			if (i == 2)
+				len += sprintf(buf + len, "%4s = %08x (=%s) ",
+				register_name[i], cpu_to_be32(debug[i]),
+				exception_name[min((u32)
+				cpu_to_be32(debug[i]), (u32)13)]);
+			else
+				len += sprintf(buf + len, "%4s = %08x%s",
+				register_name[i], cpu_to_be32(debug[i]),
+				(i & 0x3) == 0x3 || i == 19 ? "\n" : " ");
+		}
+	}
+
+	return len;
+}
+
+static ssize_t class_phy_stats(char *buf, int phy)
+{
+	ssize_t len = 0;
+	int off1 = phy * 0x28;
+	int off2 = phy * 0x10;
+
+	if (phy == 3)
+		off1 = CLASS_PHY4_RX_PKTS - CLASS_PHY1_RX_PKTS;
+
+	len += sprintf(buf + len, "phy: %d\n", phy);
+	len += sprintf(buf + len,
+			"  rx:   %10u, tx:   %10u, intf:  %10u, ipv4:    %10u, ipv6: %10u\n",
+			readl(CLASS_PHY1_RX_PKTS + off1),
+			readl(CLASS_PHY1_TX_PKTS + off1),
+			readl(CLASS_PHY1_INTF_MATCH_PKTS + off1),
+			readl(CLASS_PHY1_V4_PKTS + off1),
+			readl(CLASS_PHY1_V6_PKTS + off1));
+
+	len += sprintf(buf + len,
+			"  icmp: %10u, igmp: %10u, tcp:   %10u, udp:     %10u\n",
+			readl(CLASS_PHY1_ICMP_PKTS + off2),
+			readl(CLASS_PHY1_IGMP_PKTS + off2),
+			readl(CLASS_PHY1_TCP_PKTS + off2),
+			readl(CLASS_PHY1_UDP_PKTS + off2));
+
+	len += sprintf(buf + len, "  err\n");
+	len += sprintf(buf + len,
+			"  lp:   %10u, intf: %10u, l3:    %10u, chcksum: %10u, ttl:  %10u\n",
+			readl(CLASS_PHY1_LP_FAIL_PKTS + off1),
+			readl(CLASS_PHY1_INTF_FAIL_PKTS + off1),
+			readl(CLASS_PHY1_L3_FAIL_PKTS + off1),
+			readl(CLASS_PHY1_CHKSUM_ERR_PKTS + off1),
+			readl(CLASS_PHY1_TTL_ERR_PKTS + off1));
+
+	return len;
+}
+
+/* qm_read_drop_stat
+ * This function is used to read the drop statistics from the TMU
+ * hw drop counter.  Since the hw counter is always cleared afer
+ * reading, this function maintains the previous drop count, and
+ * adds the new value to it.  That value can be retrieved by
+ * passing a pointer to it with the total_drops arg.
+ *
+ * @param tmu		TMU number (0 - 3)
+ * @param queue		queue number (0 - 15)
+ * @param total_drops	pointer to location to store total drops (or NULL)
+ * @param do_reset	if TRUE, clear total drops after updating
+ */
+u32 qm_read_drop_stat(u32 tmu, u32 queue, u32 *total_drops, int do_reset)
+{
+	static u32 qtotal[TMU_MAX_ID + 1][NUM_QUEUES];
+	u32 val;
+
+	writel((tmu << 8) | queue, TMU_TEQ_CTRL);
+	writel((tmu << 8) | queue, TMU_LLM_CTRL);
+	val = readl(TMU_TEQ_DROP_STAT);
+	qtotal[tmu][queue] += val;
+	if (total_drops)
+		*total_drops = qtotal[tmu][queue];
+	if (do_reset)
+		qtotal[tmu][queue] = 0;
+	return val;
+}
+
+static ssize_t tmu_queue_stats(char *buf, int tmu, int queue)
+{
+	ssize_t len = 0;
+	u32 drops;
+
+	len += sprintf(buf + len, "%d-%02d, ", tmu, queue);
+
+	drops = qm_read_drop_stat(tmu, queue, NULL, 0);
+
+	/* Select queue */
+	writel((tmu << 8) | queue, TMU_TEQ_CTRL);
+	writel((tmu << 8) | queue, TMU_LLM_CTRL);
+
+	len += sprintf(buf + len,
+			"(teq) drop: %10u, tx: %10u (llm) head: %08x, tail: %08x, drop: %10u\n",
+		drops, readl(TMU_TEQ_TRANS_STAT),
+		readl(TMU_LLM_QUE_HEADPTR), readl(TMU_LLM_QUE_TAILPTR),
+		readl(TMU_LLM_QUE_DROPCNT));
+
+	return len;
+}
+
+static ssize_t tmu_queues(char *buf, int tmu)
+{
+	ssize_t len = 0;
+	int queue;
+
+	for (queue = 0; queue < 16; queue++)
+		len += tmu_queue_stats(buf + len, tmu, queue);
+
+	return len;
+}
+
+static ssize_t block_version(char *buf, void *addr)
+{
+	ssize_t len = 0;
+	u32 val;
+
+	val = readl(addr);
+	len += sprintf(buf + len, "revision: %x, version: %x, id: %x\n",
+		(val >> 24) & 0xff, (val >> 16) & 0xff, val & 0xffff);
+
+	return len;
+}
+
+static ssize_t bmu(char *buf, int id, void *base)
+{
+	ssize_t len = 0;
+
+	len += sprintf(buf + len, "%s: %d\n  ", __func__, id);
+
+	len += block_version(buf + len, base + BMU_VERSION);
+
+	len += sprintf(buf + len, "  buf size:  %x\n", (1 << readl(base +
+			BMU_BUF_SIZE)));
+	len += sprintf(buf + len, "  buf count: %x\n", readl(base +
+			BMU_BUF_CNT));
+	len += sprintf(buf + len, "  buf rem:   %x\n", readl(base +
+			BMU_REM_BUF_CNT));
+	len += sprintf(buf + len, "  buf curr:  %x\n", readl(base +
+			BMU_CURR_BUF_CNT));
+	len += sprintf(buf + len, "  free err:  %x\n", readl(base +
+			BMU_FREE_ERR_ADDR));
+
+	return len;
+}
+
+static ssize_t gpi(char *buf, int id, void *base)
+{
+	ssize_t len = 0;
+	u32 val;
+
+	len += sprintf(buf + len, "%s%d:\n  ", __func__, id);
+	len += block_version(buf + len, base + GPI_VERSION);
+
+	len += sprintf(buf + len, "  tx under stick: %x\n", readl(base +
+			GPI_FIFO_STATUS));
+	val = readl(base + GPI_FIFO_DEBUG);
+	len += sprintf(buf + len, "  tx pkts:        %x\n", (val >> 23) &
+			0x3f);
+	len += sprintf(buf + len, "  rx pkts:        %x\n", (val >> 18) &
+			0x3f);
+	len += sprintf(buf + len, "  tx bytes:       %x\n", (val >> 9) &
+			0x1ff);
+	len += sprintf(buf + len, "  rx bytes:       %x\n", (val >> 0) &
+			0x1ff);
+	len += sprintf(buf + len, "  overrun:        %x\n", readl(base +
+			GPI_OVERRUN_DROPCNT));
+
+	return len;
+}
+
+static ssize_t pfe_set_class(struct device *dev, struct device_attribute *attr,
+			     const char *buf, size_t count)
+{
+	class_do_clear = kstrtoul(buf, 0, 0);
+	return count;
+}
+
+static ssize_t pfe_show_class(struct device *dev, struct device_attribute *attr,
+			      char *buf)
+{
+	ssize_t len = 0;
+	int id;
+	u32 val;
+	struct pfe_cpumon *cpumon = &pfe->cpumon;
+
+	len += block_version(buf + len, CLASS_VERSION);
+
+	for (id = CLASS0_ID; id <= CLASS_MAX_ID; id++) {
+		len += sprintf(buf + len, "%d: ", id - CLASS0_ID);
+
+		val = readl(CLASS_PE0_DEBUG + id * 4);
+		len += sprintf(buf + len, "pc=1%04x ", val & 0xffff);
+
+		len += display_pe_status(buf + len, id, CLASS_DM_PESTATUS,
+						class_do_clear);
+	}
+	len += sprintf(buf + len, "aggregate load=%d%%\n\n",
+			cpumon->class_usage_pct);
+
+	len += sprintf(buf + len, "pe status:   0x%x\n",
+			readl(CLASS_PE_STATUS));
+	len += sprintf(buf + len, "max buf cnt: 0x%x   afull thres: 0x%x\n",
+			readl(CLASS_MAX_BUF_CNT), readl(CLASS_AFULL_THRES));
+	len += sprintf(buf + len, "tsq max cnt: 0x%x   tsq fifo thres: 0x%x\n",
+			readl(CLASS_TSQ_MAX_CNT), readl(CLASS_TSQ_FIFO_THRES));
+	len += sprintf(buf + len, "state:       0x%x\n", readl(CLASS_STATE));
+
+	len += class_phy_stats(buf + len, 0);
+	len += class_phy_stats(buf + len, 1);
+	len += class_phy_stats(buf + len, 2);
+	len += class_phy_stats(buf + len, 3);
+
+	return len;
+}
+
+static ssize_t pfe_set_tmu(struct device *dev, struct device_attribute *attr,
+			   const char *buf, size_t count)
+{
+	tmu_do_clear = kstrtoul(buf, 0, 0);
+	return count;
+}
+
+static ssize_t pfe_show_tmu(struct device *dev, struct device_attribute *attr,
+			    char *buf)
+{
+	ssize_t len = 0;
+	int id;
+	u32 val;
+
+	len += block_version(buf + len, TMU_VERSION);
+
+	for (id = TMU0_ID; id <= TMU_MAX_ID; id++) {
+		if (id == TMU2_ID)
+			continue;
+		len += sprintf(buf + len, "%d: ", id - TMU0_ID);
+
+		len += display_pe_status(buf + len, id, TMU_DM_PESTATUS,
+						tmu_do_clear);
+	}
+
+	len += sprintf(buf + len, "pe status:    %x\n", readl(TMU_PE_STATUS));
+	len += sprintf(buf + len, "inq fifo cnt: %x\n",
+			readl(TMU_PHY_INQ_FIFO_CNT));
+	val = readl(TMU_INQ_STAT);
+	len += sprintf(buf + len, "inq wr ptr:     %x\n", val & 0x3ff);
+	len += sprintf(buf + len, "inq rd ptr:     %x\n", val >> 10);
+
+	return len;
+}
+
+static unsigned long drops_do_clear;
+static u32 class_drop_counter[CLASS_NUM_DROP_COUNTERS];
+#if !defined(CONFIG_FSL_PPFE_UTIL_DISABLED)
+static u32 util_drop_counter[UTIL_NUM_DROP_COUNTERS];
+#endif
+
+char *class_drop_description[CLASS_NUM_DROP_COUNTERS] = {
+	"ICC",
+	"Host Pkt Error",
+	"Rx Error",
+	"IPsec Outbound",
+	"IPsec Inbound",
+	"EXPT IPsec Error",
+	"Reassembly",
+	"Fragmenter",
+	"NAT-T",
+	"Socket",
+	"Multicast",
+	"NAT-PT",
+	"Tx Disabled",
+};
+
+#if !defined(CONFIG_FSL_PPFE_UTIL_DISABLED)
+char *util_drop_description[UTIL_NUM_DROP_COUNTERS] = {
+	"IPsec Outbound",
+	"IPsec Inbound",
+	"IPsec Rate Limiter",
+	"Fragmenter",
+	"Socket",
+	"Tx Disabled",
+	"Rx Error",
+};
+#endif
+
+static ssize_t pfe_set_drops(struct device *dev, struct device_attribute *attr,
+			     const char *buf, size_t count)
+{
+	drops_do_clear = kstrtoul(buf, 0, 0);
+	return count;
+}
+
+static u32 tmu_drops[4][16];
+static ssize_t pfe_show_drops(struct device *dev, struct device_attribute *attr,
+			      char *buf)
+{
+	ssize_t len = 0;
+	int id, dropnum;
+	int tmu, queue;
+	u32 val;
+	u32 dmem_addr;
+	int num_class_drops = 0, num_tmu_drops = 0, num_util_drops = 0;
+	struct pfe_ctrl *ctrl = &pfe->ctrl;
+
+	memset(class_drop_counter, 0, sizeof(class_drop_counter));
+	for (id = CLASS0_ID; id <= CLASS_MAX_ID; id++) {
+		if (drops_do_clear)
+			pe_sync_stop(ctrl, (1 << id));
+		for (dropnum = 0; dropnum < CLASS_NUM_DROP_COUNTERS;
+			dropnum++) {
+			dmem_addr = CLASS_DM_DROP_CNTR;
+			val = be32_to_cpu(pe_dmem_read(id, dmem_addr, 4));
+			class_drop_counter[dropnum] += val;
+			num_class_drops += val;
+			if (drops_do_clear)
+				pe_dmem_write(id, 0, dmem_addr, 4);
+		}
+		if (drops_do_clear)
+			pe_start(ctrl, (1 << id));
+	}
+
+#if !defined(CONFIG_FSL_PPFE_UTIL_DISABLED)
+	if (drops_do_clear)
+		pe_sync_stop(ctrl, (1 << UTIL_ID));
+	for (dropnum = 0; dropnum < UTIL_NUM_DROP_COUNTERS; dropnum++) {
+		dmem_addr = UTIL_DM_DROP_CNTR;
+		val = be32_to_cpu(pe_dmem_read(UTIL_ID, dmem_addr, 4));
+		util_drop_counter[dropnum] = val;
+		num_util_drops += val;
+		if (drops_do_clear)
+			pe_dmem_write(UTIL_ID, 0, dmem_addr, 4);
+	}
+	if (drops_do_clear)
+		pe_start(ctrl, (1 << UTIL_ID));
+#endif
+	for (tmu = 0; tmu < 4; tmu++) {
+		for (queue = 0; queue < 16; queue++) {
+			qm_read_drop_stat(tmu, queue, &tmu_drops[tmu][queue],
+					  drops_do_clear);
+			num_tmu_drops += tmu_drops[tmu][queue];
+		}
+	}
+
+	if (num_class_drops == 0 && num_util_drops == 0 && num_tmu_drops == 0)
+		len += sprintf(buf + len, "No PE drops\n\n");
+
+	if (num_class_drops > 0) {
+		len += sprintf(buf + len, "Class PE drops --\n");
+		for (dropnum = 0; dropnum < CLASS_NUM_DROP_COUNTERS;
+			dropnum++) {
+			if (class_drop_counter[dropnum] > 0)
+				len += sprintf(buf + len, "  %s: %d\n",
+					class_drop_description[dropnum],
+					class_drop_counter[dropnum]);
+		}
+		len += sprintf(buf + len, "\n");
+	}
+
+#if !defined(CONFIG_FSL_PPFE_UTIL_DISABLED)
+	if (num_util_drops > 0) {
+		len += sprintf(buf + len, "Util PE drops --\n");
+		for (dropnum = 0; dropnum < UTIL_NUM_DROP_COUNTERS; dropnum++) {
+			if (util_drop_counter[dropnum] > 0)
+				len += sprintf(buf + len, "  %s: %d\n",
+					util_drop_description[dropnum],
+					util_drop_counter[dropnum]);
+		}
+		len += sprintf(buf + len, "\n");
+	}
+#endif
+	if (num_tmu_drops > 0) {
+		len += sprintf(buf + len, "TMU drops --\n");
+		for (tmu = 0; tmu < 4; tmu++) {
+			for (queue = 0; queue < 16; queue++) {
+				if (tmu_drops[tmu][queue] > 0)
+					len += sprintf(buf + len,
+						"  TMU%d-Q%d: %d\n"
+					, tmu, queue, tmu_drops[tmu][queue]);
+			}
+		}
+		len += sprintf(buf + len, "\n");
+	}
+
+	return len;
+}
+
+static ssize_t pfe_show_tmu0_queues(struct device *dev, struct device_attribute
+					*attr, char *buf)
+{
+	return tmu_queues(buf, 0);
+}
+
+static ssize_t pfe_show_tmu1_queues(struct device *dev, struct device_attribute
+					*attr, char *buf)
+{
+	return tmu_queues(buf, 1);
+}
+
+static ssize_t pfe_show_tmu2_queues(struct device *dev, struct device_attribute
+					*attr, char *buf)
+{
+	return tmu_queues(buf, 2);
+}
+
+static ssize_t pfe_show_tmu3_queues(struct device *dev, struct device_attribute
+					*attr, char *buf)
+{
+	return tmu_queues(buf, 3);
+}
+
+#if !defined(CONFIG_FSL_PPFE_UTIL_DISABLED)
+static ssize_t pfe_set_util(struct device *dev, struct device_attribute *attr,
+			    const char *buf, size_t count)
+{
+	util_do_clear = kstrtoul(buf, NULL, 0);
+	return count;
+}
+
+static ssize_t pfe_show_util(struct device *dev, struct device_attribute *attr,
+			     char *buf)
+{
+	ssize_t len = 0;
+	struct pfe_ctrl *ctrl = &pfe->ctrl;
+
+	len += block_version(buf + len, UTIL_VERSION);
+
+	pe_sync_stop(ctrl, (1 << UTIL_ID));
+	len += display_pe_status(buf + len, UTIL_ID, UTIL_DM_PESTATUS,
+					util_do_clear);
+	pe_start(ctrl, (1 << UTIL_ID));
+
+	len += sprintf(buf + len, "pe status:   %x\n", readl(UTIL_PE_STATUS));
+	len += sprintf(buf + len, "max buf cnt: %x\n",
+			readl(UTIL_MAX_BUF_CNT));
+	len += sprintf(buf + len, "tsq max cnt: %x\n",
+			readl(UTIL_TSQ_MAX_CNT));
+
+	return len;
+}
+#endif
+
+static ssize_t pfe_show_bmu(struct device *dev, struct device_attribute *attr,
+			    char *buf)
+{
+	ssize_t len = 0;
+
+	len += bmu(buf + len, 1, BMU1_BASE_ADDR);
+	len += bmu(buf + len, 2, BMU2_BASE_ADDR);
+
+	return len;
+}
+
+static ssize_t pfe_show_hif(struct device *dev, struct device_attribute *attr,
+			    char *buf)
+{
+	ssize_t len = 0;
+
+	len += sprintf(buf + len, "hif:\n  ");
+	len += block_version(buf + len, HIF_VERSION);
+
+	len += sprintf(buf + len, "  tx curr bd:    %x\n",
+			readl(HIF_TX_CURR_BD_ADDR));
+	len += sprintf(buf + len, "  tx status:     %x\n",
+			readl(HIF_TX_STATUS));
+	len += sprintf(buf + len, "  tx dma status: %x\n",
+			readl(HIF_TX_DMA_STATUS));
+
+	len += sprintf(buf + len, "  rx curr bd:    %x\n",
+			readl(HIF_RX_CURR_BD_ADDR));
+	len += sprintf(buf + len, "  rx status:     %x\n",
+			readl(HIF_RX_STATUS));
+	len += sprintf(buf + len, "  rx dma status: %x\n",
+			readl(HIF_RX_DMA_STATUS));
+
+	len += sprintf(buf + len, "hif nocopy:\n  ");
+	len += block_version(buf + len, HIF_NOCPY_VERSION);
+
+	len += sprintf(buf + len, "  tx curr bd:    %x\n",
+			readl(HIF_NOCPY_TX_CURR_BD_ADDR));
+	len += sprintf(buf + len, "  tx status:     %x\n",
+			readl(HIF_NOCPY_TX_STATUS));
+	len += sprintf(buf + len, "  tx dma status: %x\n",
+			readl(HIF_NOCPY_TX_DMA_STATUS));
+
+	len += sprintf(buf + len, "  rx curr bd:    %x\n",
+			readl(HIF_NOCPY_RX_CURR_BD_ADDR));
+	len += sprintf(buf + len, "  rx status:     %x\n",
+			readl(HIF_NOCPY_RX_STATUS));
+	len += sprintf(buf + len, "  rx dma status: %x\n",
+			readl(HIF_NOCPY_RX_DMA_STATUS));
+
+	return len;
+}
+
+static ssize_t pfe_show_gpi(struct device *dev, struct device_attribute *attr,
+			    char *buf)
+{
+	ssize_t len = 0;
+
+	len += gpi(buf + len, 0, EGPI1_BASE_ADDR);
+	len += gpi(buf + len, 1, EGPI2_BASE_ADDR);
+	len += gpi(buf + len, 3, HGPI_BASE_ADDR);
+
+	return len;
+}
+
+static ssize_t pfe_show_pfemem(struct device *dev, struct device_attribute
+				*attr, char *buf)
+{
+	ssize_t len = 0;
+	struct pfe_memmon *memmon = &pfe->memmon;
+
+	len += sprintf(buf + len, "Kernel Memory: %d Bytes (%d KB)\n",
+		memmon->kernel_memory_allocated,
+		(memmon->kernel_memory_allocated + 1023) / 1024);
+
+	return len;
+}
+
+#ifdef HIF_NAPI_STATS
+static ssize_t pfe_show_hif_napi_stats(struct device *dev,
+				       struct device_attribute *attr,
+				       char *buf)
+{
+	struct platform_device *pdev = to_platform_device(dev);
+	struct pfe *pfe = platform_get_drvdata(pdev);
+	ssize_t len = 0;
+
+	len += sprintf(buf + len, "sched:  %u\n",
+			pfe->hif.napi_counters[NAPI_SCHED_COUNT]);
+	len += sprintf(buf + len, "poll:   %u\n",
+			pfe->hif.napi_counters[NAPI_POLL_COUNT]);
+	len += sprintf(buf + len, "packet: %u\n",
+			pfe->hif.napi_counters[NAPI_PACKET_COUNT]);
+	len += sprintf(buf + len, "budget: %u\n",
+			pfe->hif.napi_counters[NAPI_FULL_BUDGET_COUNT]);
+	len += sprintf(buf + len, "desc:   %u\n",
+			pfe->hif.napi_counters[NAPI_DESC_COUNT]);
+	len += sprintf(buf + len, "full:   %u\n",
+			pfe->hif.napi_counters[NAPI_CLIENT_FULL_COUNT]);
+
+	return len;
+}
+
+static ssize_t pfe_set_hif_napi_stats(struct device *dev,
+				      struct device_attribute *attr,
+					const char *buf, size_t count)
+{
+	struct platform_device *pdev = to_platform_device(dev);
+	struct pfe *pfe = platform_get_drvdata(pdev);
+
+	memset(pfe->hif.napi_counters, 0, sizeof(pfe->hif.napi_counters));
+
+	return count;
+}
+
+static DEVICE_ATTR(hif_napi_stats, 0644, pfe_show_hif_napi_stats,
+			pfe_set_hif_napi_stats);
+#endif
+
+static DEVICE_ATTR(class, 0644, pfe_show_class, pfe_set_class);
+static DEVICE_ATTR(tmu, 0644, pfe_show_tmu, pfe_set_tmu);
+#if !defined(CONFIG_FSL_PPFE_UTIL_DISABLED)
+static DEVICE_ATTR(util, 0644, pfe_show_util, pfe_set_util);
+#endif
+static DEVICE_ATTR(bmu, 0444, pfe_show_bmu, NULL);
+static DEVICE_ATTR(hif, 0444, pfe_show_hif, NULL);
+static DEVICE_ATTR(gpi, 0444, pfe_show_gpi, NULL);
+static DEVICE_ATTR(drops, 0644, pfe_show_drops, pfe_set_drops);
+static DEVICE_ATTR(tmu0_queues, 0444, pfe_show_tmu0_queues, NULL);
+static DEVICE_ATTR(tmu1_queues, 0444, pfe_show_tmu1_queues, NULL);
+static DEVICE_ATTR(tmu2_queues, 0444, pfe_show_tmu2_queues, NULL);
+static DEVICE_ATTR(tmu3_queues, 0444, pfe_show_tmu3_queues, NULL);
+static DEVICE_ATTR(pfemem, 0444, pfe_show_pfemem, NULL);
+
+int pfe_sysfs_init(struct pfe *pfe)
+{
+	if (device_create_file(pfe->dev, &dev_attr_class))
+		goto err_class;
+
+	if (device_create_file(pfe->dev, &dev_attr_tmu))
+		goto err_tmu;
+
+#if !defined(CONFIG_FSL_PPFE_UTIL_DISABLED)
+	if (device_create_file(pfe->dev, &dev_attr_util))
+		goto err_util;
+#endif
+
+	if (device_create_file(pfe->dev, &dev_attr_bmu))
+		goto err_bmu;
+
+	if (device_create_file(pfe->dev, &dev_attr_hif))
+		goto err_hif;
+
+	if (device_create_file(pfe->dev, &dev_attr_gpi))
+		goto err_gpi;
+
+	if (device_create_file(pfe->dev, &dev_attr_drops))
+		goto err_drops;
+
+	if (device_create_file(pfe->dev, &dev_attr_tmu0_queues))
+		goto err_tmu0_queues;
+
+	if (device_create_file(pfe->dev, &dev_attr_tmu1_queues))
+		goto err_tmu1_queues;
+
+	if (device_create_file(pfe->dev, &dev_attr_tmu2_queues))
+		goto err_tmu2_queues;
+
+	if (device_create_file(pfe->dev, &dev_attr_tmu3_queues))
+		goto err_tmu3_queues;
+
+	if (device_create_file(pfe->dev, &dev_attr_pfemem))
+		goto err_pfemem;
+
+#ifdef HIF_NAPI_STATS
+	if (device_create_file(pfe->dev, &dev_attr_hif_napi_stats))
+		goto err_hif_napi_stats;
+#endif
+
+	return 0;
+
+#ifdef HIF_NAPI_STATS
+err_hif_napi_stats:
+	device_remove_file(pfe->dev, &dev_attr_pfemem);
+#endif
+
+err_pfemem:
+	device_remove_file(pfe->dev, &dev_attr_tmu3_queues);
+
+err_tmu3_queues:
+	device_remove_file(pfe->dev, &dev_attr_tmu2_queues);
+
+err_tmu2_queues:
+	device_remove_file(pfe->dev, &dev_attr_tmu1_queues);
+
+err_tmu1_queues:
+	device_remove_file(pfe->dev, &dev_attr_tmu0_queues);
+
+err_tmu0_queues:
+	device_remove_file(pfe->dev, &dev_attr_drops);
+
+err_drops:
+	device_remove_file(pfe->dev, &dev_attr_gpi);
+
+err_gpi:
+	device_remove_file(pfe->dev, &dev_attr_hif);
+
+err_hif:
+	device_remove_file(pfe->dev, &dev_attr_bmu);
+
+err_bmu:
+#if !defined(CONFIG_FSL_PPFE_UTIL_DISABLED)
+	device_remove_file(pfe->dev, &dev_attr_util);
+
+err_util:
+#endif
+	device_remove_file(pfe->dev, &dev_attr_tmu);
+
+err_tmu:
+	device_remove_file(pfe->dev, &dev_attr_class);
+
+err_class:
+	return -1;
+}
+
+void pfe_sysfs_exit(struct pfe *pfe)
+{
+#ifdef HIF_NAPI_STATS
+	device_remove_file(pfe->dev, &dev_attr_hif_napi_stats);
+#endif
+	device_remove_file(pfe->dev, &dev_attr_pfemem);
+	device_remove_file(pfe->dev, &dev_attr_tmu3_queues);
+	device_remove_file(pfe->dev, &dev_attr_tmu2_queues);
+	device_remove_file(pfe->dev, &dev_attr_tmu1_queues);
+	device_remove_file(pfe->dev, &dev_attr_tmu0_queues);
+	device_remove_file(pfe->dev, &dev_attr_drops);
+	device_remove_file(pfe->dev, &dev_attr_gpi);
+	device_remove_file(pfe->dev, &dev_attr_hif);
+	device_remove_file(pfe->dev, &dev_attr_bmu);
+#if !defined(CONFIG_FSL_PPFE_UTIL_DISABLED)
+	device_remove_file(pfe->dev, &dev_attr_util);
+#endif
+	device_remove_file(pfe->dev, &dev_attr_tmu);
+	device_remove_file(pfe->dev, &dev_attr_class);
+}
diff --git a/drivers/staging/fsl_ppfe/pfe_sysfs.h b/drivers/staging/fsl_ppfe/pfe_sysfs.h
new file mode 100644
index 0000000..4fb39c9
--- /dev/null
+++ b/drivers/staging/fsl_ppfe/pfe_sysfs.h
@@ -0,0 +1,29 @@
+/*
+ * Copyright 2015-2016 Freescale Semiconductor, Inc.
+ * Copyright 2017 NXP
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program.  If not, see <http://www.gnu.org/licenses/>.
+ */
+
+#ifndef _PFE_SYSFS_H_
+#define _PFE_SYSFS_H_
+
+#include <linux/proc_fs.h>
+
+u32 qm_read_drop_stat(u32 tmu, u32 queue, u32 *total_drops, int do_reset);
+
+int pfe_sysfs_init(struct pfe *pfe);
+void pfe_sysfs_exit(struct pfe *pfe);
+
+#endif /* _PFE_SYSFS_H_ */
diff --git a/drivers/staging/fsl_qbman/fsl_usdpaa.c b/drivers/staging/fsl_qbman/fsl_usdpaa.c
index 3a6d3722..ec5ea6e 100644
--- a/drivers/staging/fsl_qbman/fsl_usdpaa.c
+++ b/drivers/staging/fsl_qbman/fsl_usdpaa.c
@@ -371,6 +371,16 @@ static int usdpaa_open(struct inode *inode, struct file *filp)
 
 #define DQRR_MAXFILL 15
 
+
+/* Invalidate a portal */
+void dbci_portal(void *addr)
+{
+	int i;
+
+	for (i = 0; i < 0x4000; i += 64)
+		dcbi(addr + i);
+}
+
 /* Reset a QMan portal to its default state */
 static int init_qm_portal(struct qm_portal_config *config,
 			  struct qm_portal *portal)
@@ -384,6 +394,13 @@ static int init_qm_portal(struct qm_portal_config *config,
 	/* Make sure interrupts are inhibited */
 	qm_out(IIR, 1);
 
+	/*
+	 * Invalidate the entire CE portal are to ensure no stale
+	 * cachelines are present.  This should be done on all
+	 * cores as the portal is mapped as M=0 (non-coherent).
+	 */
+	on_each_cpu(dbci_portal, portal->addr.addr_ce, 1);
+
 	/* Initialize the DQRR.  This will stop any dequeue
 	   commands that are in progress */
 	if (qm_dqrr_init(portal, config, qm_dqrr_dpush, qm_dqrr_pvb,
@@ -435,6 +452,13 @@ static int init_bm_portal(struct bm_portal_config *config,
 	portal->addr.addr_ce = config->addr_virt[DPA_PORTAL_CE];
 	portal->addr.addr_ci = config->addr_virt[DPA_PORTAL_CI];
 
+	/*
+	 * Invalidate the entire CE portal are to ensure no stale
+	 * cachelines are present.  This should be done on all
+	 * cores as the portal is mapped as M=0 (non-coherent).
+	 */
+	on_each_cpu(dbci_portal, portal->addr.addr_ce, 1);
+
 	if (bm_rcr_init(portal, bm_rcr_pvb, bm_rcr_cce)) {
 		pr_err("Bman RCR initialisation failed\n");
 	return 1;
diff --git a/drivers/staging/fsl_qbman/qman_low.h b/drivers/staging/fsl_qbman/qman_low.h
index 547b5fa..e43f9ea 100644
--- a/drivers/staging/fsl_qbman/qman_low.h
+++ b/drivers/staging/fsl_qbman/qman_low.h
@@ -1095,11 +1095,26 @@ static inline void qm_mr_set_ithresh(struct qm_portal *portal, u8 ithresh)
 
 static inline int qm_mc_init(struct qm_portal *portal)
 {
+	u8 rr0, rr1;
 	register struct qm_mc *mc = &portal->mc;
+
 	mc->cr = portal->addr.addr_ce + QM_CL_CR;
 	mc->rr = portal->addr.addr_ce + QM_CL_RR0;
-	mc->rridx = (__raw_readb(&mc->cr->__dont_write_directly__verb) &
-			QM_MCC_VERB_VBIT) ?  0 : 1;
+
+	/*
+	 * The expected valid bit polarity for the next CR command is 0
+	 * if RR1 contains a valid response, and is 1 if RR0 contains a
+	 * valid response. If both RR contain all 0, this indicates either
+	 * that no command has been executed since reset (in which case the
+	 * expected valid bit polarity is 1)
+	 */
+	rr0 = __raw_readb(&mc->rr->verb);
+	rr1 = __raw_readb(&(mc->rr+1)->verb);
+	if ((rr0 == 0 && rr1 == 0) || rr0 != 0)
+		mc->rridx = 1;
+	else
+		mc->rridx = 0;
+
 	mc->vbit = mc->rridx ? QM_MCC_VERB_VBIT : 0;
 #ifdef CONFIG_FSL_DPA_CHECKING
 	mc->state = qman_mc_idle;
diff --git a/drivers/usb/dwc3/core.c b/drivers/usb/dwc3/core.c
index e34ef90..885d8e5 100644
--- a/drivers/usb/dwc3/core.c
+++ b/drivers/usb/dwc3/core.c
@@ -1115,6 +1115,8 @@ static void dwc3_get_properties(struct dwc3 *dwc)
 
 	dwc->tx_de_emphasis_quirk = device_property_read_bool(dev,
 				"snps,tx_de_emphasis_quirk");
+	dwc->disable_devinit_u1u2_quirk = device_property_read_bool(dev,
+				"snps,disable_devinit_u1u2");
 	device_property_read_u8(dev, "snps,tx_de_emphasis",
 				&tx_de_emphasis);
 	device_property_read_string(dev, "snps,hsphy_interface",
diff --git a/drivers/usb/dwc3/core.h b/drivers/usb/dwc3/core.h
index 9151eef..2549339 100644
--- a/drivers/usb/dwc3/core.h
+++ b/drivers/usb/dwc3/core.h
@@ -860,6 +860,7 @@ struct dwc3_scratchpad_array {
  * 	1	- -3.5dB de-emphasis
  * 	2	- No de-emphasis
  * 	3	- Reserved
+ * @disable_devinit_u1u2_quirk: disable device-initiated U1/U2 request.
  */
 struct dwc3 {
 	struct usb_ctrlrequest	*ctrl_req;
@@ -1016,6 +1017,7 @@ struct dwc3 {
 
 	unsigned		tx_de_emphasis_quirk:1;
 	unsigned		tx_de_emphasis:2;
+	unsigned		disable_devinit_u1u2_quirk:1;
 
 	u16                     imod_interval;
 };
diff --git a/drivers/usb/dwc3/ep0.c b/drivers/usb/dwc3/ep0.c
index 2331469..4e6c1bb 100644
--- a/drivers/usb/dwc3/ep0.c
+++ b/drivers/usb/dwc3/ep0.c
@@ -360,9 +360,9 @@ static int dwc3_ep0_handle_status(struct dwc3 *dwc,
 		if ((dwc->speed == DWC3_DSTS_SUPERSPEED) ||
 		    (dwc->speed == DWC3_DSTS_SUPERSPEED_PLUS)) {
 			reg = dwc3_readl(dwc->regs, DWC3_DCTL);
-			if (reg & DWC3_DCTL_INITU1ENA)
+			if ((reg & DWC3_DCTL_INITU1ENA) && !dwc->disable_devinit_u1u2_quirk)
 				usb_status |= 1 << USB_DEV_STAT_U1_ENABLED;
-			if (reg & DWC3_DCTL_INITU2ENA)
+			if ((reg & DWC3_DCTL_INITU2ENA) && !dwc->disable_devinit_u1u2_quirk)
 				usb_status |= 1 << USB_DEV_STAT_U2_ENABLED;
 		}
 
diff --git a/drivers/usb/dwc3/gadget.c b/drivers/usb/dwc3/gadget.c
index c61ddbf..51c86aa 100644
--- a/drivers/usb/dwc3/gadget.c
+++ b/drivers/usb/dwc3/gadget.c
@@ -2930,6 +2930,7 @@ static irqreturn_t dwc3_interrupt(int irq, void *_evt)
 int dwc3_gadget_init(struct dwc3 *dwc)
 {
 	int ret, irq;
+	u32	reg;
 	struct platform_device *dwc3_pdev = to_platform_device(dwc->dev);
 
 	irq = platform_get_irq_byname(dwc3_pdev, "peripheral");
@@ -3044,6 +3045,12 @@ int dwc3_gadget_init(struct dwc3 *dwc)
 		goto err5;
 	}
 
+	if (dwc->disable_devinit_u1u2_quirk) {
+		reg = dwc3_readl(dwc->regs, DWC3_DCTL);
+		reg &= ~(DWC3_DCTL_INITU1ENA | DWC3_DCTL_INITU2ENA);
+		dwc3_writel(dwc->regs, DWC3_DCTL, reg);
+	}
+
 	return 0;
 
 err5:
diff --git a/drivers/usb/host/ehci-fsl.c b/drivers/usb/host/ehci-fsl.c
index cd16860..d24b92d 100644
--- a/drivers/usb/host/ehci-fsl.c
+++ b/drivers/usb/host/ehci-fsl.c
@@ -36,6 +36,7 @@
 #include <linux/platform_device.h>
 #include <linux/fsl_devices.h>
 #include <linux/of_platform.h>
+#include <linux/io.h>
 
 #ifdef CONFIG_PPC
 #include <asm/fsl_pm.h>
@@ -82,13 +83,23 @@ struct ccsr_usb_phy {
 #define DRV_NAME "ehci-fsl"
 
 static struct hc_driver __read_mostly fsl_ehci_hc_driver;
+
 struct ehci_fsl {
-       /* store current hcd state for otg;
-	* have_hcd is true when host drv al already part of otg framework,
-	* otherwise false;
-	* hcd_add is true when otg framework wants to add host
-	* drv as part of otg;flase when it wants to remove it
-	*/
+	struct ehci_hcd ehci;
+
+#ifdef CONFIG_PM
+struct ehci_regs saved_regs;
+struct ccsr_usb_phy saved_phy_regs;
+/* Saved USB PHY settings, need to restore after deep sleep. */
+u32 usb_ctrl;
+#endif
+	/*
+	 * store current hcd state for otg;
+	 * have_hcd is true when host drv al already part of otg framework,
+	 * otherwise false;
+	 * hcd_add is true when otg framework wants to add host
+	 * drv as part of otg;flase when it wants to remove it
+	 */
 unsigned have_hcd:1;
 unsigned hcd_add:1;
 };
@@ -115,7 +126,7 @@ int retval;
 	/* host, gadget and otg share same int line */
 	retval = usb_add_hcd(hcd, hcd->irq, IRQF_SHARED);
 	if (retval == 0)
-	ehci_fsl->have_hcd = 1;
+		ehci_fsl->have_hcd = 1;
 	} else if (!ehci_fsl->hcd_add && ehci_fsl->have_hcd) {
 		usb_remove_hcd(hcd);
 		ehci_fsl->have_hcd = 0;
@@ -123,53 +134,26 @@ int retval;
 }
 #endif
 
-struct ehci_fsl {
-	struct ehci_hcd ehci;
-
-#ifdef CONFIG_PM
-struct ehci_regs saved_regs;
-struct ccsr_usb_phy saved_phy_regs;
-/* Saved USB PHY settings, need to restore after deep sleep. */
-u32 usb_ctrl;
-#endif
-	/*
-	 * store current hcd state for otg;
-	 * have_hcd is true when host drv al already part of otg framework,
-	 * otherwise false;
-	 * hcd_add is true when otg framework wants to add host
-	 * drv as part of otg;flase when it wants to remove it
-	 */
-unsigned have_hcd:1;
-unsigned hcd_add:1;
-};
-
-static strut ehci_fsl *hcd_to_ehci_fsl(struct usb_hcd *hcd)
-{
-struct ehci_hcd *ehci = hcd_to_ehci(hcd);
-
-return container_of(ehci, struct ehci_fsl, ehci);
-}
-
 #if defined(CONFIG_FSL_USB2_OTG) || defined(CONFIG_FSL_USB2_OTG_MODULE)
 static void do_change_hcd(struct work_struct *work)
 {
-struct ehci_hcd *ehci = container_of(work, struct ehci_hcd,
-change_hcd_work);
-struct usb_hcd *hcd = ehci_to_hcd(ehci);
-struct ehci_fsl *ehci_fsl = hcd_to_ehci_fsl(hcd);
-void __iomem *non_ehci = hcd->regs;
-int retval;
+	struct ehci_hcd *ehci = container_of(work, struct ehci_hcd,
+			change_hcd_work);
+	struct usb_hcd *hcd = ehci_to_hcd(ehci);
+	struct ehci_fsl *ehci_fsl = hcd_to_ehci_fsl(hcd);
+	void __iomem *non_ehci = hcd->regs;
+	int retval;
 
-if (ehci_fsl->hcd_add && !ehci_fsl->have_hcd) {
-writel(USBMODE_CM_HOST, non_ehci + FSL_SOC_USB_USBMODE);
-/* host, gadget and otg share same int line */
-retval = usb_add_hcd(hcd, hcd->irq, IRQF_SHARED);
-if (retval == 0)
-ehci_fsl->have_hcd = 1;
-} else if (!ehci_fsl->hcd_add && ehci_fsl->have_hcd) {
-	usb_remove_hcd(hcd);
-ehci_fsl->have_hcd = 0;
-}
+	if (ehci_fsl->hcd_add && !ehci_fsl->have_hcd) {
+		writel(USBMODE_CM_HOST, non_ehci + FSL_SOC_USB_USBMODE);
+		/* host, gadget and otg share same int line */
+		retval = usb_add_hcd(hcd, hcd->irq, IRQF_SHARED);
+		if (retval == 0)
+			ehci_fsl->have_hcd = 1;
+	} else if (!ehci_fsl->hcd_add && ehci_fsl->have_hcd) {
+		usb_remove_hcd(hcd);
+		ehci_fsl->have_hcd = 0;
+	}
 }
 #endif
 
@@ -324,13 +308,12 @@ static int fsl_ehci_drv_probe(struct platform_device *pdev)
 	return retval;
 }
 
-static bool usb_phy_clk_valid(struct usb_hcd *hcd,
-			enum fsl_usb2_phy_modes phy_mode)
+static bool usb_phy_clk_valid(struct usb_hcd *hcd)
 {
 	void __iomem *non_ehci = hcd->regs;
 	bool ret = true;
 
-	if (!(in_be32(non_ehci + FSL_SOC_USB_CTRL) & PHY_CLK_VALID))
+	if (!(ioread32be(non_ehci + FSL_SOC_USB_CTRL) & PHY_CLK_VALID))
 		ret = false;
 
 	return ret;
@@ -456,6 +439,9 @@ static int ehci_fsl_usb_setup(struct ehci_hcd *ehci)
 	if (pdata->has_fsl_erratum_a005275 == 1)
 		ehci->has_fsl_hs_errata = 1;
 
+	if (pdata->has_fsl_erratum_a005697 == 1)
+		ehci->has_fsl_susp_errata = 1;
+
 	if ((pdata->operating_mode == FSL_USB2_DR_HOST) ||
 			(pdata->operating_mode == FSL_USB2_DR_OTG))
 		if (ehci_fsl_setup_phy(hcd, pdata->phy_mode, 0))
@@ -752,7 +738,9 @@ static int ehci_fsl_drv_suspend(struct device *dev)
 
 #ifdef CONFIG_PPC
 suspend_state_t pm_state;
-pm_state = pm_suspend_state();
+/* FIXME:Need to port fsl_pm.h before enable below code. */
+/*pm_state = pm_suspend_state();*/
+pm_state = PM_SUSPEND_MEM;
 
 if (pm_state == PM_SUSPEND_MEM)
 	ehci_fsl_save_context(hcd);
@@ -797,7 +785,9 @@ static int ehci_fsl_drv_resume(struct device *dev)
 
 #ifdef CONFIG_PPC
 suspend_state_t pm_state;
-pm_state = pm_suspend_state();
+/* FIXME:Need to port fsl_pm.h before enable below code.*/
+/* pm_state = pm_suspend_state(); */
+pm_state = PM_SUSPEND_MEM;
 
 if (pm_state == PM_SUSPEND_MEM)
 	ehci_fsl_restore_context(hcd);
diff --git a/drivers/usb/host/ehci-hub.c b/drivers/usb/host/ehci-hub.c
index d4a8ec5..6d82575 100644
--- a/drivers/usb/host/ehci-hub.c
+++ b/drivers/usb/host/ehci-hub.c
@@ -278,6 +278,8 @@ static int ehci_bus_suspend (struct usb_hcd *hcd)
 		else if ((t1 & PORT_PE) && !(t1 & PORT_SUSPEND)) {
 			t2 |= PORT_SUSPEND;
 			set_bit(port, &ehci->bus_suspended);
+			if (ehci_has_fsl_susp_errata(ehci))
+				usleep_range(10000, 20000);
 		}
 
 		/* enable remote wakeup on all ports, if told to do so */
diff --git a/drivers/usb/host/ehci.h b/drivers/usb/host/ehci.h
index c4bd983..492efbb 100644
--- a/drivers/usb/host/ehci.h
+++ b/drivers/usb/host/ehci.h
@@ -222,6 +222,7 @@ struct ehci_hcd {			/* one per controller */
 	unsigned		no_selective_suspend:1;
 	unsigned		has_fsl_port_bug:1; /* FreeScale */
 	unsigned		has_fsl_hs_errata:1;	/* Freescale HS quirk */
+	unsigned		has_fsl_susp_errata:1; /*Freescale SUSP quirk*/
 	unsigned		big_endian_mmio:1;
 	unsigned		big_endian_desc:1;
 	unsigned		big_endian_capbase:1;
@@ -706,6 +707,9 @@ ehci_port_speed(struct ehci_hcd *ehci, unsigned int portsc)
 #if defined(CONFIG_PPC_85xx)
 /* Some Freescale processors have an erratum (USB A-005275) in which
  * incoming packets get corrupted in HS mode
+ * Some Freescale processors have an erratum (USB A-005697) in which
+ * we need to wait for 10ms for bus to fo into suspend mode after
+ * setting SUSP bit
  */
 #define ehci_has_fsl_hs_errata(e)	((e)->has_fsl_hs_errata)
 #define ehci_has_fsl_susp_errata(e)     ((e)->has_fsl_susp_errata)