linux/arch/powerpc/platforms/cell/spufs/sched.c
Arnd Bergmann a33a7d7309 [PATCH] spufs: implement mfc access for PPE-side DMA
This patch adds a new file called 'mfc' to each spufs directory.
The file accepts DMA commands that are a subset of what would
be legal DMA commands for problem state register access. Upon
reading the file, a bitmask is returned with the completed
tag groups set.

The file is meant to be used from an abstraction in libspe
that is added by a different patch.

From the kernel perspective, this means a process can now
offload a memory copy from or into an SPE local store
without having to run code on the SPE itself.

The transfer will only be performed while the SPE is owned
by one thread that is waiting in the spu_run system call
and the data will be transferred into that thread's
address space, independent of which thread started the
transfer.

Signed-off-by: Arnd Bergmann <arnd.bergmann@de.ibm.com>
Signed-off-by: Paul Mackerras <paulus@samba.org>
2006-03-27 14:48:26 +11:00

464 lines
11 KiB
C

/* sched.c - SPU scheduler.
*
* Copyright (C) IBM 2005
* Author: Mark Nutter <mnutter@us.ibm.com>
*
* SPU scheduler, based on Linux thread priority. For now use
* a simple "cooperative" yield model with no preemption. SPU
* scheduling will eventually be preemptive: When a thread with
* a higher static priority gets ready to run, then an active SPU
* context will be preempted and returned to the waitq.
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 2, or (at your option)
* any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program; if not, write to the Free Software
* Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
*/
#undef DEBUG
#include <linux/config.h>
#include <linux/module.h>
#include <linux/errno.h>
#include <linux/sched.h>
#include <linux/kernel.h>
#include <linux/mm.h>
#include <linux/completion.h>
#include <linux/vmalloc.h>
#include <linux/smp.h>
#include <linux/smp_lock.h>
#include <linux/stddef.h>
#include <linux/unistd.h>
#include <asm/io.h>
#include <asm/mmu_context.h>
#include <asm/spu.h>
#include <asm/spu_csa.h>
#include "spufs.h"
#define SPU_MIN_TIMESLICE (100 * HZ / 1000)
#define SPU_BITMAP_SIZE (((MAX_PRIO+BITS_PER_LONG)/BITS_PER_LONG)+1)
struct spu_prio_array {
atomic_t nr_blocked;
unsigned long bitmap[SPU_BITMAP_SIZE];
wait_queue_head_t waitq[MAX_PRIO];
};
/* spu_runqueue - This is the main runqueue data structure for SPUs. */
struct spu_runqueue {
struct semaphore sem;
unsigned long nr_active;
unsigned long nr_idle;
unsigned long nr_switches;
struct list_head active_list;
struct list_head idle_list;
struct spu_prio_array prio;
};
static struct spu_runqueue *spu_runqueues = NULL;
static inline struct spu_runqueue *spu_rq(void)
{
/* Future: make this a per-NODE array,
* and use cpu_to_node(smp_processor_id())
*/
return spu_runqueues;
}
static inline struct spu *del_idle(struct spu_runqueue *rq)
{
struct spu *spu;
BUG_ON(rq->nr_idle <= 0);
BUG_ON(list_empty(&rq->idle_list));
/* Future: Move SPU out of low-power SRI state. */
spu = list_entry(rq->idle_list.next, struct spu, sched_list);
list_del_init(&spu->sched_list);
rq->nr_idle--;
return spu;
}
static inline void del_active(struct spu_runqueue *rq, struct spu *spu)
{
BUG_ON(rq->nr_active <= 0);
BUG_ON(list_empty(&rq->active_list));
list_del_init(&spu->sched_list);
rq->nr_active--;
}
static inline void add_idle(struct spu_runqueue *rq, struct spu *spu)
{
/* Future: Put SPU into low-power SRI state. */
list_add_tail(&spu->sched_list, &rq->idle_list);
rq->nr_idle++;
}
static inline void add_active(struct spu_runqueue *rq, struct spu *spu)
{
rq->nr_active++;
rq->nr_switches++;
list_add_tail(&spu->sched_list, &rq->active_list);
}
static void prio_wakeup(struct spu_runqueue *rq)
{
if (atomic_read(&rq->prio.nr_blocked) && rq->nr_idle) {
int best = sched_find_first_bit(rq->prio.bitmap);
if (best < MAX_PRIO) {
wait_queue_head_t *wq = &rq->prio.waitq[best];
wake_up_interruptible_nr(wq, 1);
}
}
}
static void prio_wait(struct spu_runqueue *rq, struct spu_context *ctx,
u64 flags)
{
int prio = current->prio;
wait_queue_head_t *wq = &rq->prio.waitq[prio];
DEFINE_WAIT(wait);
__set_bit(prio, rq->prio.bitmap);
atomic_inc(&rq->prio.nr_blocked);
prepare_to_wait_exclusive(wq, &wait, TASK_INTERRUPTIBLE);
if (!signal_pending(current)) {
up(&rq->sem);
up_write(&ctx->state_sema);
pr_debug("%s: pid=%d prio=%d\n", __FUNCTION__,
current->pid, current->prio);
schedule();
down_write(&ctx->state_sema);
down(&rq->sem);
}
finish_wait(wq, &wait);
atomic_dec(&rq->prio.nr_blocked);
if (!waitqueue_active(wq))
__clear_bit(prio, rq->prio.bitmap);
}
static inline int is_best_prio(struct spu_runqueue *rq)
{
int best_prio;
best_prio = sched_find_first_bit(rq->prio.bitmap);
return (current->prio < best_prio) ? 1 : 0;
}
static inline void mm_needs_global_tlbie(struct mm_struct *mm)
{
/* Global TLBIE broadcast required with SPEs. */
#if (NR_CPUS > 1)
__cpus_setall(&mm->cpu_vm_mask, NR_CPUS);
#else
__cpus_setall(&mm->cpu_vm_mask, NR_CPUS+1); /* is this ok? */
#endif
}
static inline void bind_context(struct spu *spu, struct spu_context *ctx)
{
pr_debug("%s: pid=%d SPU=%d\n", __FUNCTION__, current->pid,
spu->number);
spu->ctx = ctx;
spu->flags = 0;
ctx->flags = 0;
ctx->spu = spu;
ctx->ops = &spu_hw_ops;
spu->pid = current->pid;
spu->prio = current->prio;
spu->mm = ctx->owner;
mm_needs_global_tlbie(spu->mm);
spu->ibox_callback = spufs_ibox_callback;
spu->wbox_callback = spufs_wbox_callback;
spu->stop_callback = spufs_stop_callback;
spu->mfc_callback = spufs_mfc_callback;
mb();
spu_unmap_mappings(ctx);
spu_restore(&ctx->csa, spu);
spu->timestamp = jiffies;
}
static inline void unbind_context(struct spu *spu, struct spu_context *ctx)
{
pr_debug("%s: unbind pid=%d SPU=%d\n", __FUNCTION__,
spu->pid, spu->number);
spu_unmap_mappings(ctx);
spu_save(&ctx->csa, spu);
spu->timestamp = jiffies;
ctx->state = SPU_STATE_SAVED;
spu->ibox_callback = NULL;
spu->wbox_callback = NULL;
spu->stop_callback = NULL;
spu->mfc_callback = NULL;
spu->mm = NULL;
spu->pid = 0;
spu->prio = MAX_PRIO;
ctx->ops = &spu_backing_ops;
ctx->spu = NULL;
ctx->flags = 0;
spu->flags = 0;
spu->ctx = NULL;
}
static void spu_reaper(void *data)
{
struct spu_context *ctx = data;
struct spu *spu;
down_write(&ctx->state_sema);
spu = ctx->spu;
if (spu && test_bit(SPU_CONTEXT_PREEMPT, &ctx->flags)) {
if (atomic_read(&spu->rq->prio.nr_blocked)) {
pr_debug("%s: spu=%d\n", __func__, spu->number);
ctx->ops->runcntl_stop(ctx);
spu_deactivate(ctx);
wake_up_all(&ctx->stop_wq);
} else {
clear_bit(SPU_CONTEXT_PREEMPT, &ctx->flags);
}
}
up_write(&ctx->state_sema);
put_spu_context(ctx);
}
static void schedule_spu_reaper(struct spu_runqueue *rq, struct spu *spu)
{
struct spu_context *ctx = get_spu_context(spu->ctx);
unsigned long now = jiffies;
unsigned long expire = spu->timestamp + SPU_MIN_TIMESLICE;
set_bit(SPU_CONTEXT_PREEMPT, &ctx->flags);
INIT_WORK(&ctx->reap_work, spu_reaper, ctx);
if (time_after(now, expire))
schedule_work(&ctx->reap_work);
else
schedule_delayed_work(&ctx->reap_work, expire - now);
}
static void check_preempt_active(struct spu_runqueue *rq)
{
struct list_head *p;
struct spu *worst = NULL;
list_for_each(p, &rq->active_list) {
struct spu *spu = list_entry(p, struct spu, sched_list);
struct spu_context *ctx = spu->ctx;
if (!test_bit(SPU_CONTEXT_PREEMPT, &ctx->flags)) {
if (!worst || (spu->prio > worst->prio)) {
worst = spu;
}
}
}
if (worst && (current->prio < worst->prio))
schedule_spu_reaper(rq, worst);
}
static struct spu *get_idle_spu(struct spu_context *ctx, u64 flags)
{
struct spu_runqueue *rq;
struct spu *spu = NULL;
rq = spu_rq();
down(&rq->sem);
for (;;) {
if (rq->nr_idle > 0) {
if (is_best_prio(rq)) {
/* Fall through. */
spu = del_idle(rq);
break;
} else {
prio_wakeup(rq);
up(&rq->sem);
yield();
if (signal_pending(current)) {
return NULL;
}
rq = spu_rq();
down(&rq->sem);
continue;
}
} else {
check_preempt_active(rq);
prio_wait(rq, ctx, flags);
if (signal_pending(current)) {
prio_wakeup(rq);
spu = NULL;
break;
}
continue;
}
}
up(&rq->sem);
return spu;
}
static void put_idle_spu(struct spu *spu)
{
struct spu_runqueue *rq = spu->rq;
down(&rq->sem);
add_idle(rq, spu);
prio_wakeup(rq);
up(&rq->sem);
}
static int get_active_spu(struct spu *spu)
{
struct spu_runqueue *rq = spu->rq;
struct list_head *p;
struct spu *tmp;
int rc = 0;
down(&rq->sem);
list_for_each(p, &rq->active_list) {
tmp = list_entry(p, struct spu, sched_list);
if (tmp == spu) {
del_active(rq, spu);
rc = 1;
break;
}
}
up(&rq->sem);
return rc;
}
static void put_active_spu(struct spu *spu)
{
struct spu_runqueue *rq = spu->rq;
down(&rq->sem);
add_active(rq, spu);
up(&rq->sem);
}
/* Lock order:
* spu_activate() & spu_deactivate() require the
* caller to have down_write(&ctx->state_sema).
*
* The rq->sem is breifly held (inside or outside a
* given ctx lock) for list management, but is never
* held during save/restore.
*/
int spu_activate(struct spu_context *ctx, u64 flags)
{
struct spu *spu;
if (ctx->spu)
return 0;
spu = get_idle_spu(ctx, flags);
if (!spu)
return (signal_pending(current)) ? -ERESTARTSYS : -EAGAIN;
bind_context(spu, ctx);
/*
* We're likely to wait for interrupts on the same
* CPU that we are now on, so send them here.
*/
spu_irq_setaffinity(spu, raw_smp_processor_id());
put_active_spu(spu);
return 0;
}
void spu_deactivate(struct spu_context *ctx)
{
struct spu *spu;
int needs_idle;
spu = ctx->spu;
if (!spu)
return;
needs_idle = get_active_spu(spu);
unbind_context(spu, ctx);
if (needs_idle)
put_idle_spu(spu);
}
void spu_yield(struct spu_context *ctx)
{
struct spu *spu;
int need_yield = 0;
down_write(&ctx->state_sema);
spu = ctx->spu;
if (spu && (sched_find_first_bit(spu->rq->prio.bitmap) < MAX_PRIO)) {
pr_debug("%s: yielding SPU %d\n", __FUNCTION__, spu->number);
spu_deactivate(ctx);
ctx->state = SPU_STATE_SAVED;
need_yield = 1;
} else if (spu) {
spu->prio = MAX_PRIO;
}
up_write(&ctx->state_sema);
if (unlikely(need_yield))
yield();
}
int __init spu_sched_init(void)
{
struct spu_runqueue *rq;
struct spu *spu;
int i;
rq = spu_runqueues = kmalloc(sizeof(struct spu_runqueue), GFP_KERNEL);
if (!rq) {
printk(KERN_WARNING "%s: Unable to allocate runqueues.\n",
__FUNCTION__);
return 1;
}
memset(rq, 0, sizeof(struct spu_runqueue));
init_MUTEX(&rq->sem);
INIT_LIST_HEAD(&rq->active_list);
INIT_LIST_HEAD(&rq->idle_list);
rq->nr_active = 0;
rq->nr_idle = 0;
rq->nr_switches = 0;
atomic_set(&rq->prio.nr_blocked, 0);
for (i = 0; i < MAX_PRIO; i++) {
init_waitqueue_head(&rq->prio.waitq[i]);
__clear_bit(i, rq->prio.bitmap);
}
__set_bit(MAX_PRIO, rq->prio.bitmap);
for (;;) {
spu = spu_alloc();
if (!spu)
break;
pr_debug("%s: adding SPU[%d]\n", __FUNCTION__, spu->number);
add_idle(rq, spu);
spu->rq = rq;
spu->timestamp = jiffies;
}
if (!rq->nr_idle) {
printk(KERN_WARNING "%s: No available SPUs.\n", __FUNCTION__);
kfree(rq);
return 1;
}
return 0;
}
void __exit spu_sched_exit(void)
{
struct spu_runqueue *rq = spu_rq();
struct spu *spu;
if (!rq) {
printk(KERN_WARNING "%s: no runqueues!\n", __FUNCTION__);
return;
}
while (rq->nr_idle > 0) {
spu = del_idle(rq);
if (!spu)
break;
spu_free(spu);
}
kfree(rq);
}