diff --git a/drivers/net/ethernet/intel/igb/igb_main.c b/drivers/net/ethernet/intel/igb/igb_main.c
index b9c020a05fb8..89c29b40d61c 100644
--- a/drivers/net/ethernet/intel/igb/igb_main.c
+++ b/drivers/net/ethernet/intel/igb/igb_main.c
@@ -136,7 +136,6 @@ static void igb_update_phy_info(unsigned long);
 static void igb_watchdog(unsigned long);
 static void igb_watchdog_task(struct work_struct *);
 static netdev_tx_t igb_xmit_frame(struct sk_buff *skb, struct net_device *);
-static void igb_xmit_flush(struct net_device *netdev, u16 queue);
 static struct rtnl_link_stats64 *igb_get_stats64(struct net_device *dev,
 					  struct rtnl_link_stats64 *stats);
 static int igb_change_mtu(struct net_device *, int);
@@ -2076,7 +2075,6 @@ static const struct net_device_ops igb_netdev_ops = {
 	.ndo_open		= igb_open,
 	.ndo_stop		= igb_close,
 	.ndo_start_xmit		= igb_xmit_frame,
-	.ndo_xmit_flush		= igb_xmit_flush,
 	.ndo_get_stats64	= igb_get_stats64,
 	.ndo_set_rx_mode	= igb_set_rx_mode,
 	.ndo_set_mac_address	= igb_set_mac,
@@ -4917,6 +4915,14 @@ static void igb_tx_map(struct igb_ring *tx_ring,
 
 	tx_ring->next_to_use = i;
 
+	if (!skb->xmit_more) {
+		writel(i, tx_ring->tail);
+
+		/* we need this if more than one processor can write to our tail
+		 * at a time, it synchronizes IO on IA64/Altix systems
+		 */
+		mmiowb();
+	}
 	return;
 
 dma_error:
@@ -5052,20 +5058,17 @@ out_drop:
 	return NETDEV_TX_OK;
 }
 
-static struct igb_ring *__igb_tx_queue_mapping(struct igb_adapter *adapter, unsigned int r_idx)
+static inline struct igb_ring *igb_tx_queue_mapping(struct igb_adapter *adapter,
+						    struct sk_buff *skb)
 {
+	unsigned int r_idx = skb->queue_mapping;
+
 	if (r_idx >= adapter->num_tx_queues)
 		r_idx = r_idx % adapter->num_tx_queues;
 
 	return adapter->tx_ring[r_idx];
 }
 
-static inline struct igb_ring *igb_tx_queue_mapping(struct igb_adapter *adapter,
-						    struct sk_buff *skb)
-{
-	return __igb_tx_queue_mapping(adapter, skb->queue_mapping);
-}
-
 static netdev_tx_t igb_xmit_frame(struct sk_buff *skb,
 				  struct net_device *netdev)
 {
@@ -5094,21 +5097,6 @@ static netdev_tx_t igb_xmit_frame(struct sk_buff *skb,
 	return igb_xmit_frame_ring(skb, igb_tx_queue_mapping(adapter, skb));
 }
 
-static void igb_xmit_flush(struct net_device *netdev, u16 queue)
-{
-	struct igb_adapter *adapter = netdev_priv(netdev);
-	struct igb_ring *tx_ring;
-
-	tx_ring = __igb_tx_queue_mapping(adapter, queue);
-
-	writel(tx_ring->next_to_use, tx_ring->tail);
-
-	/* we need this if more than one processor can write to our tail
-	 * at a time, it synchronizes IO on IA64/Altix systems
-	 */
-	mmiowb();
-}
-
 /**
  *  igb_tx_timeout - Respond to a Tx Hang
  *  @netdev: network interface device structure
diff --git a/drivers/net/virtio_net.c b/drivers/net/virtio_net.c
index 62421086d3e6..f0c2824f5e0f 100644
--- a/drivers/net/virtio_net.c
+++ b/drivers/net/virtio_net.c
@@ -953,17 +953,12 @@ static netdev_tx_t start_xmit(struct sk_buff *skb, struct net_device *dev)
 		}
 	}
 
+	if (!skb->xmit_more)
+		virtqueue_kick(sq->vq);
+
 	return NETDEV_TX_OK;
 }
 
-static void xmit_flush(struct net_device *dev, u16 qnum)
-{
-	struct virtnet_info *vi = netdev_priv(dev);
-	struct send_queue *sq = &vi->sq[qnum];
-
-	virtqueue_kick(sq->vq);
-}
-
 /*
  * Send command via the control virtqueue and check status.  Commands
  * supported by the hypervisor, as indicated by feature bits, should
@@ -1393,7 +1388,6 @@ static const struct net_device_ops virtnet_netdev = {
 	.ndo_open            = virtnet_open,
 	.ndo_stop   	     = virtnet_close,
 	.ndo_start_xmit      = start_xmit,
-	.ndo_xmit_flush      = xmit_flush,
 	.ndo_validate_addr   = eth_validate_addr,
 	.ndo_set_mac_address = virtnet_set_mac_address,
 	.ndo_set_rx_mode     = virtnet_set_rx_mode,
diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h
index 220c50984688..039b23786c22 100644
--- a/include/linux/netdevice.h
+++ b/include/linux/netdevice.h
@@ -782,19 +782,6 @@ typedef u16 (*select_queue_fallback_t)(struct net_device *dev,
  *        (can also return NETDEV_TX_LOCKED iff NETIF_F_LLTX)
  *	Required can not be NULL.
  *
- * void (*ndo_xmit_flush)(struct net_device *dev, u16 queue);
- *	A driver implements this function when it wishes to support
- *	deferred TX queue flushing.  The idea is that the expensive
- *	operation to trigger TX queue processing can be done after
- *	N calls to ndo_start_xmit rather than being done every single
- *	time.  In this regime ndo_start_xmit will be called one or more
- *	times, and then a final ndo_xmit_flush call will be made to
- *	have the driver tell the device about the new pending TX queue
- *	entries.  The kernel keeps track of which queues need flushing
- *	by monitoring skb->queue_mapping of the packets it submits to
- *	ndo_start_xmit.  This is the queue value that will be passed
- *	to ndo_xmit_flush.
- *
  * u16 (*ndo_select_queue)(struct net_device *dev, struct sk_buff *skb,
  *                         void *accel_priv, select_queue_fallback_t fallback);
  *	Called to decide which queue to when device supports multiple
@@ -1018,7 +1005,6 @@ struct net_device_ops {
 	int			(*ndo_stop)(struct net_device *dev);
 	netdev_tx_t		(*ndo_start_xmit) (struct sk_buff *skb,
 						   struct net_device *dev);
-	void			(*ndo_xmit_flush)(struct net_device *dev, u16 queue);
 	u16			(*ndo_select_queue)(struct net_device *dev,
 						    struct sk_buff *skb,
 						    void *accel_priv,
@@ -3447,15 +3433,8 @@ int __init dev_proc_init(void);
 static inline netdev_tx_t __netdev_start_xmit(const struct net_device_ops *ops,
 					      struct sk_buff *skb, struct net_device *dev)
 {
-	netdev_tx_t ret;
-	u16 q;
-
-	q = skb->queue_mapping;
-	ret = ops->ndo_start_xmit(skb, dev);
-	if (dev_xmit_complete(ret) && ops->ndo_xmit_flush)
-		ops->ndo_xmit_flush(dev, q);
-
-	return ret;
+	skb->xmit_more = 0;
+	return ops->ndo_start_xmit(skb, dev);
 }
 
 static inline netdev_tx_t netdev_start_xmit(struct sk_buff *skb, struct net_device *dev)
diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h
index 18ddf9684a27..9b3802a197a8 100644
--- a/include/linux/skbuff.h
+++ b/include/linux/skbuff.h
@@ -452,6 +452,7 @@ static inline u32 skb_mstamp_us_delta(const struct skb_mstamp *t1,
  *	@tc_verd: traffic control verdict
  *	@hash: the packet hash
  *	@queue_mapping: Queue mapping for multiqueue devices
+ *	@xmit_more: More SKBs are pending for this queue
  *	@ndisc_nodetype: router type (from link layer)
  *	@ooo_okay: allow the mapping of a socket to a queue to be changed
  *	@l4_hash: indicate hash is a canonical 4-tuple hash over transport
@@ -558,6 +559,7 @@ struct sk_buff {
 
 	__u16			queue_mapping;
 	kmemcheck_bitfield_begin(flags2);
+	__u8			xmit_more:1;
 #ifdef CONFIG_IPV6_NDISC_NODETYPE
 	__u8			ndisc_nodetype:2;
 #endif