diff --git a/sound/usb/card.h b/sound/usb/card.h
index a00caa1db37e..87f042d06ce0 100644
--- a/sound/usb/card.h
+++ b/sound/usb/card.h
@@ -74,8 +74,9 @@ struct snd_usb_endpoint {
 
 	atomic_t state;		/* running state */
 
-	void (*prepare_data_urb) (struct snd_usb_substream *subs,
-				  struct urb *urb);
+	int (*prepare_data_urb) (struct snd_usb_substream *subs,
+				 struct urb *urb,
+				 bool in_stream_lock);
 	void (*retire_data_urb) (struct snd_usb_substream *subs,
 				 struct urb *urb);
 
@@ -94,7 +95,6 @@ struct snd_usb_endpoint {
 	struct list_head ready_playback_urbs; /* playback URB FIFO for implicit fb */
 
 	unsigned int nurbs;		/* # urbs */
-	unsigned int nominal_queue_size; /* total buffer sizes in URBs */
 	unsigned long active_mask;	/* bitmask of active urbs */
 	unsigned long unlink_mask;	/* bitmask of unlinked urbs */
 	atomic_t submitted_urbs;	/* currently submitted urbs */
diff --git a/sound/usb/endpoint.c b/sound/usb/endpoint.c
index c32022672319..0b336876e36d 100644
--- a/sound/usb/endpoint.c
+++ b/sound/usb/endpoint.c
@@ -307,8 +307,9 @@ static void prepare_silent_urb(struct snd_usb_endpoint *ep,
 /*
  * Prepare a PLAYBACK urb for submission to the bus.
  */
-static void prepare_outbound_urb(struct snd_usb_endpoint *ep,
-				 struct snd_urb_ctx *ctx)
+static int prepare_outbound_urb(struct snd_usb_endpoint *ep,
+				struct snd_urb_ctx *ctx,
+				bool in_stream_lock)
 {
 	struct urb *urb = ctx->urb;
 	unsigned char *cp = urb->transfer_buffer;
@@ -320,9 +321,9 @@ static void prepare_outbound_urb(struct snd_usb_endpoint *ep,
 	case SND_USB_ENDPOINT_TYPE_DATA:
 		data_subs = READ_ONCE(ep->data_subs);
 		if (data_subs && ep->prepare_data_urb)
-			ep->prepare_data_urb(data_subs, urb);
-		else /* no data provider, so send silence */
-			prepare_silent_urb(ep, ctx);
+			return ep->prepare_data_urb(data_subs, urb, in_stream_lock);
+		/* no data provider, so send silence */
+		prepare_silent_urb(ep, ctx);
 		break;
 
 	case SND_USB_ENDPOINT_TYPE_SYNC:
@@ -351,13 +352,14 @@ static void prepare_outbound_urb(struct snd_usb_endpoint *ep,
 
 		break;
 	}
+	return 0;
 }
 
 /*
  * Prepare a CAPTURE or SYNC urb for submission to the bus.
  */
-static inline void prepare_inbound_urb(struct snd_usb_endpoint *ep,
-				       struct snd_urb_ctx *urb_ctx)
+static int prepare_inbound_urb(struct snd_usb_endpoint *ep,
+			       struct snd_urb_ctx *urb_ctx)
 {
 	int i, offs;
 	struct urb *urb = urb_ctx->urb;
@@ -382,6 +384,7 @@ static inline void prepare_inbound_urb(struct snd_usb_endpoint *ep,
 		urb->iso_frame_desc[0].offset = 0;
 		break;
 	}
+	return 0;
 }
 
 /* notify an error as XRUN to the assigned PCM data substream */
@@ -417,6 +420,16 @@ next_packet_fifo_dequeue(struct snd_usb_endpoint *ep)
 	return p;
 }
 
+static void push_back_to_ready_list(struct snd_usb_endpoint *ep,
+				    struct snd_urb_ctx *ctx)
+{
+	unsigned long flags;
+
+	spin_lock_irqsave(&ep->lock, flags);
+	list_add_tail(&ctx->ready_list, &ep->ready_playback_urbs);
+	spin_unlock_irqrestore(&ep->lock, flags);
+}
+
 /*
  * Send output urbs that have been prepared previously. URBs are dequeued
  * from ep->ready_playback_urbs and in case there aren't any available
@@ -427,12 +440,14 @@ next_packet_fifo_dequeue(struct snd_usb_endpoint *ep)
  * is that host controllers don't guarantee the order in which they return
  * inbound and outbound packets to their submitters.
  *
- * This function is only used for implicit feedback endpoints. For endpoints
- * driven by dedicated sync endpoints, URBs are immediately re-submitted
- * from their completion handler.
+ * This function is used both for implicit feedback endpoints and in low-
+ * latency playback mode.
  */
-static void queue_pending_output_urbs(struct snd_usb_endpoint *ep)
+void snd_usb_queue_pending_output_urbs(struct snd_usb_endpoint *ep,
+				       bool in_stream_lock)
 {
+	bool implicit_fb = snd_usb_endpoint_implicit_feedback_sink(ep);
+
 	while (ep_state_running(ep)) {
 
 		unsigned long flags;
@@ -441,14 +456,14 @@ static void queue_pending_output_urbs(struct snd_usb_endpoint *ep)
 		int err, i;
 
 		spin_lock_irqsave(&ep->lock, flags);
-		if (ep->next_packet_queued > 0 &&
+		if ((!implicit_fb || ep->next_packet_queued > 0) &&
 		    !list_empty(&ep->ready_playback_urbs)) {
 			/* take URB out of FIFO */
 			ctx = list_first_entry(&ep->ready_playback_urbs,
 					       struct snd_urb_ctx, ready_list);
 			list_del_init(&ctx->ready_list);
-
-			packet = next_packet_fifo_dequeue(ep);
+			if (implicit_fb)
+				packet = next_packet_fifo_dequeue(ep);
 		}
 		spin_unlock_irqrestore(&ep->lock, flags);
 
@@ -456,11 +471,24 @@ static void queue_pending_output_urbs(struct snd_usb_endpoint *ep)
 			return;
 
 		/* copy over the length information */
-		for (i = 0; i < packet->packets; i++)
-			ctx->packet_size[i] = packet->packet_size[i];
+		if (implicit_fb) {
+			for (i = 0; i < packet->packets; i++)
+				ctx->packet_size[i] = packet->packet_size[i];
+		}
 
 		/* call the data handler to fill in playback data */
-		prepare_outbound_urb(ep, ctx);
+		err = prepare_outbound_urb(ep, ctx, in_stream_lock);
+		/* can be stopped during prepare callback */
+		if (unlikely(!ep_state_running(ep)))
+			break;
+		if (err < 0) {
+			/* push back to ready list again for -EAGAIN */
+			if (err == -EAGAIN)
+				push_back_to_ready_list(ep, ctx);
+			else
+				notify_xrun(ep);
+			return;
+		}
 
 		err = usb_submit_urb(ctx->urb, GFP_ATOMIC);
 		if (err < 0) {
@@ -483,7 +511,6 @@ static void snd_complete_urb(struct urb *urb)
 {
 	struct snd_urb_ctx *ctx = urb->context;
 	struct snd_usb_endpoint *ep = ctx->ep;
-	unsigned long flags;
 	int err;
 
 	if (unlikely(urb->status == -ENOENT ||		/* unlinked */
@@ -504,17 +531,20 @@ static void snd_complete_urb(struct urb *urb)
 		if (unlikely(!ep_state_running(ep)))
 			goto exit_clear;
 
-		if (snd_usb_endpoint_implicit_feedback_sink(ep)) {
-			spin_lock_irqsave(&ep->lock, flags);
-			list_add_tail(&ctx->ready_list, &ep->ready_playback_urbs);
+		/* in low-latency and implicit-feedback modes, push back the
+		 * URB to ready list at first, then process as much as possible
+		 */
+		if (ep->lowlatency_playback ||
+		     snd_usb_endpoint_implicit_feedback_sink(ep)) {
+			push_back_to_ready_list(ep, ctx);
 			clear_bit(ctx->index, &ep->active_mask);
-			spin_unlock_irqrestore(&ep->lock, flags);
-			queue_pending_output_urbs(ep);
+			snd_usb_queue_pending_output_urbs(ep, false);
 			atomic_dec(&ep->submitted_urbs); /* decrement at last */
 			return;
 		}
 
-		prepare_outbound_urb(ep, ctx);
+		/* in non-lowlatency mode, no error handling for prepare */
+		prepare_outbound_urb(ep, ctx, false);
 		/* can be stopped during prepare callback */
 		if (unlikely(!ep_state_running(ep)))
 			goto exit_clear;
@@ -807,8 +837,9 @@ void snd_usb_endpoint_set_sync(struct snd_usb_audio *chip,
  * Pass NULL to deactivate each callback.
  */
 void snd_usb_endpoint_set_callback(struct snd_usb_endpoint *ep,
-				   void (*prepare)(struct snd_usb_substream *subs,
-						   struct urb *urb),
+				   int (*prepare)(struct snd_usb_substream *subs,
+						  struct urb *urb,
+						  bool in_stream_lock),
 				   void (*retire)(struct snd_usb_substream *subs,
 						  struct urb *urb),
 				   struct snd_usb_substream *data_subs)
@@ -1166,10 +1197,6 @@ static int data_ep_set_params(struct snd_usb_endpoint *ep)
 		INIT_LIST_HEAD(&u->ready_list);
 	}
 
-	/* total buffer bytes of all URBs plus the next queue;
-	 * referred in pcm.c
-	 */
-	ep->nominal_queue_size = maxsize * urb_packs * (ep->nurbs + 1);
 	return 0;
 
 out_of_memory:
@@ -1408,6 +1435,7 @@ int snd_usb_endpoint_get_clock_rate(struct snd_usb_audio *chip, int clock)
  */
 int snd_usb_endpoint_start(struct snd_usb_endpoint *ep)
 {
+	bool is_playback = usb_pipeout(ep->pipe);
 	int err;
 	unsigned int i;
 
@@ -1444,13 +1472,9 @@ int snd_usb_endpoint_start(struct snd_usb_endpoint *ep)
 
 	if (snd_usb_endpoint_implicit_feedback_sink(ep) &&
 	    !(ep->chip->quirk_flags & QUIRK_FLAG_PLAYBACK_FIRST)) {
-		for (i = 0; i < ep->nurbs; i++) {
-			struct snd_urb_ctx *ctx = ep->urb + i;
-			list_add_tail(&ctx->ready_list, &ep->ready_playback_urbs);
-		}
-
 		usb_audio_dbg(ep->chip, "No URB submission due to implicit fb sync\n");
-		return 0;
+		i = 0;
+		goto fill_rest;
 	}
 
 	for (i = 0; i < ep->nurbs; i++) {
@@ -1459,10 +1483,18 @@ int snd_usb_endpoint_start(struct snd_usb_endpoint *ep)
 		if (snd_BUG_ON(!urb))
 			goto __error;
 
-		if (usb_pipeout(ep->pipe)) {
-			prepare_outbound_urb(ep, urb->context);
-		} else {
-			prepare_inbound_urb(ep, urb->context);
+		if (is_playback)
+			err = prepare_outbound_urb(ep, urb->context, true);
+		else
+			err = prepare_inbound_urb(ep, urb->context);
+		if (err < 0) {
+			/* stop filling at applptr */
+			if (err == -EAGAIN)
+				break;
+			usb_audio_dbg(ep->chip,
+				      "EP 0x%x: failed to prepare urb: %d\n",
+				      ep->ep_num, err);
+			goto __error;
 		}
 
 		err = usb_submit_urb(urb, GFP_ATOMIC);
@@ -1476,8 +1508,22 @@ int snd_usb_endpoint_start(struct snd_usb_endpoint *ep)
 		atomic_inc(&ep->submitted_urbs);
 	}
 
+	if (!i) {
+		usb_audio_dbg(ep->chip, "XRUN at starting EP 0x%x\n",
+			      ep->ep_num);
+		goto __error;
+	}
+
 	usb_audio_dbg(ep->chip, "%d URBs submitted for EP 0x%x\n",
-		      ep->nurbs, ep->ep_num);
+		      i, ep->ep_num);
+
+ fill_rest:
+	/* put the remaining URBs to ready list */
+	if (is_playback) {
+		for (; i < ep->nurbs; i++)
+			push_back_to_ready_list(ep, ep->urb + i);
+	}
+
 	return 0;
 
 __error:
@@ -1629,7 +1675,7 @@ static void snd_usb_handle_sync_urb(struct snd_usb_endpoint *ep,
 		}
 
 		spin_unlock_irqrestore(&ep->lock, flags);
-		queue_pending_output_urbs(ep);
+		snd_usb_queue_pending_output_urbs(ep, false);
 
 		return;
 	}
diff --git a/sound/usb/endpoint.h b/sound/usb/endpoint.h
index 1f1a72535a64..6895d50d14d1 100644
--- a/sound/usb/endpoint.h
+++ b/sound/usb/endpoint.h
@@ -30,8 +30,9 @@ void snd_usb_endpoint_set_sync(struct snd_usb_audio *chip,
 			       struct snd_usb_endpoint *data_ep,
 			       struct snd_usb_endpoint *sync_ep);
 void snd_usb_endpoint_set_callback(struct snd_usb_endpoint *ep,
-				   void (*prepare)(struct snd_usb_substream *subs,
-						   struct urb *urb),
+				   int (*prepare)(struct snd_usb_substream *subs,
+						  struct urb *urb,
+						  bool in_stream_lock),
 				   void (*retire)(struct snd_usb_substream *subs,
 						  struct urb *urb),
 				   struct snd_usb_substream *data_subs);
@@ -48,5 +49,7 @@ int snd_usb_endpoint_implicit_feedback_sink(struct snd_usb_endpoint *ep);
 int snd_usb_endpoint_next_packet_size(struct snd_usb_endpoint *ep,
 				      struct snd_urb_ctx *ctx, int idx,
 				      unsigned int avail);
+void snd_usb_queue_pending_output_urbs(struct snd_usb_endpoint *ep,
+				       bool in_stream_lock);
 
 #endif /* __USBAUDIO_ENDPOINT_H */
diff --git a/sound/usb/pcm.c b/sound/usb/pcm.c
index 8ad48c35c559..d5a14e5b9ad3 100644
--- a/sound/usb/pcm.c
+++ b/sound/usb/pcm.c
@@ -598,9 +598,6 @@ static int lowlatency_playback_available(struct snd_pcm_runtime *runtime,
 	/* implicit feedback mode has own operation mode */
 	if (snd_usb_endpoint_implicit_feedback_sink(subs->data_endpoint))
 		return false;
-	/* too short periods? */
-	if (subs->data_endpoint->nominal_queue_size >= subs->buffer_bytes)
-		return false;
 	return true;
 }
 
@@ -1095,6 +1092,10 @@ static int snd_usb_pcm_open(struct snd_pcm_substream *substream)
 	int ret;
 
 	runtime->hw = snd_usb_hardware;
+	/* need an explicit sync to catch applptr update in low-latency mode */
+	if (direction == SNDRV_PCM_STREAM_PLAYBACK &&
+	    as->chip->lowlatency)
+		runtime->hw.info |= SNDRV_PCM_INFO_EXPLICIT_SYNC;
 	runtime->private_data = subs;
 	subs->pcm_substream = substream;
 	/* runtime PM is also done there */
@@ -1347,44 +1348,66 @@ static unsigned int copy_to_urb_quirk(struct snd_usb_substream *subs,
 	return bytes;
 }
 
-static void prepare_playback_urb(struct snd_usb_substream *subs,
-				 struct urb *urb)
+static int prepare_playback_urb(struct snd_usb_substream *subs,
+				struct urb *urb,
+				bool in_stream_lock)
 {
 	struct snd_pcm_runtime *runtime = subs->pcm_substream->runtime;
 	struct snd_usb_endpoint *ep = subs->data_endpoint;
 	struct snd_urb_ctx *ctx = urb->context;
-	unsigned int counts, frames, bytes;
+	unsigned int frames, bytes;
+	int counts;
+	unsigned int transfer_done, frame_limit, avail = 0;
 	int i, stride, period_elapsed = 0;
 	unsigned long flags;
+	int err = 0;
 
 	stride = ep->stride;
 
 	frames = 0;
 	ctx->queued = 0;
 	urb->number_of_packets = 0;
+
 	spin_lock_irqsave(&subs->lock, flags);
-	subs->frame_limit += ep->max_urb_frames;
+	frame_limit = subs->frame_limit + ep->max_urb_frames;
+	transfer_done = subs->transfer_done;
+
+	if (subs->lowlatency_playback &&
+	    runtime->status->state != SNDRV_PCM_STATE_DRAINING) {
+		unsigned int hwptr = subs->hwptr_done / stride;
+
+		/* calculate the byte offset-in-buffer of the appl_ptr */
+		avail = (runtime->control->appl_ptr - runtime->hw_ptr_base)
+			% runtime->buffer_size;
+		if (avail <= hwptr)
+			avail += runtime->buffer_size;
+		avail -= hwptr;
+	}
+
 	for (i = 0; i < ctx->packets; i++) {
-		counts = snd_usb_endpoint_next_packet_size(ep, ctx, i, 0);
+		counts = snd_usb_endpoint_next_packet_size(ep, ctx, i, avail);
+		if (counts < 0)
+			break;
 		/* set up descriptor */
 		urb->iso_frame_desc[i].offset = frames * stride;
 		urb->iso_frame_desc[i].length = counts * stride;
 		frames += counts;
+		avail -= counts;
 		urb->number_of_packets++;
-		subs->transfer_done += counts;
-		if (subs->transfer_done >= runtime->period_size) {
-			subs->transfer_done -= runtime->period_size;
-			subs->frame_limit = 0;
+		transfer_done += counts;
+		if (transfer_done >= runtime->period_size) {
+			transfer_done -= runtime->period_size;
+			frame_limit = 0;
 			period_elapsed = 1;
 			if (subs->fmt_type == UAC_FORMAT_TYPE_II) {
-				if (subs->transfer_done > 0) {
+				if (transfer_done > 0) {
 					/* FIXME: fill-max mode is not
 					 * supported yet */
-					frames -= subs->transfer_done;
-					counts -= subs->transfer_done;
+					frames -= transfer_done;
+					counts -= transfer_done;
 					urb->iso_frame_desc[i].length =
 						counts * stride;
-					subs->transfer_done = 0;
+					transfer_done = 0;
 				}
 				i++;
 				if (i < ctx->packets) {
@@ -1398,13 +1421,19 @@ static void prepare_playback_urb(struct snd_usb_substream *subs,
 			}
 		}
 		/* finish at the period boundary or after enough frames */
-		if ((period_elapsed ||
-				subs->transfer_done >= subs->frame_limit) &&
+		if ((period_elapsed || transfer_done >= frame_limit) &&
 		    !snd_usb_endpoint_implicit_feedback_sink(ep))
 			break;
 	}
-	bytes = frames * stride;
 
+	if (!frames) {
+		err = -EAGAIN;
+		goto unlock;
+	}
+
+	bytes = frames * stride;
+	subs->transfer_done = transfer_done;
+	subs->frame_limit = frame_limit;
 	if (unlikely(ep->cur_format == SNDRV_PCM_FORMAT_DSD_U16_LE &&
 		     subs->cur_audiofmt->dsd_dop)) {
 		fill_playback_urb_dsd_dop(subs, urb, bytes);
@@ -1434,10 +1463,19 @@ static void prepare_playback_urb(struct snd_usb_substream *subs,
 		subs->period_elapsed_pending = 1;
 		period_elapsed = 0;
 	}
+
+ unlock:
 	spin_unlock_irqrestore(&subs->lock, flags);
+	if (err < 0)
+		return err;
 	urb->transfer_buffer_length = bytes;
-	if (period_elapsed)
-		snd_pcm_period_elapsed(subs->pcm_substream);
+	if (period_elapsed) {
+		if (in_stream_lock)
+			snd_pcm_period_elapsed_under_stream_lock(subs->pcm_substream);
+		else
+			snd_pcm_period_elapsed(subs->pcm_substream);
+	}
+	return 0;
 }
 
 /*
@@ -1469,6 +1507,27 @@ static void retire_playback_urb(struct snd_usb_substream *subs,
 		snd_pcm_period_elapsed(subs->pcm_substream);
 }
 
+/* PCM ack callback for the playback stream;
+ * this plays a role only when the stream is running in low-latency mode.
+ */
+static int snd_usb_pcm_playback_ack(struct snd_pcm_substream *substream)
+{
+	struct snd_usb_substream *subs = substream->runtime->private_data;
+	struct snd_usb_endpoint *ep;
+
+	if (!subs->lowlatency_playback || !subs->running)
+		return 0;
+	ep = subs->data_endpoint;
+	if (!ep)
+		return 0;
+	/* When no more in-flight URBs available, try to process the pending
+	 * outputs here
+	 */
+	if (!ep->active_mask)
+		snd_usb_queue_pending_output_urbs(ep, true);
+	return 0;
+}
+
 static int snd_usb_substream_playback_trigger(struct snd_pcm_substream *substream,
 					      int cmd)
 {
@@ -1572,6 +1631,7 @@ static const struct snd_pcm_ops snd_usb_playback_ops = {
 	.trigger =	snd_usb_substream_playback_trigger,
 	.sync_stop =	snd_usb_pcm_sync_stop,
 	.pointer =	snd_usb_pcm_pointer,
+	.ack =		snd_usb_pcm_playback_ack,
 };
 
 static const struct snd_pcm_ops snd_usb_capture_ops = {