/*
 mbuffer -- buffer module
 Copyright (c) 2005,2006 Hitachi,Ltd.,
 Created by Satoru Moriya <s-moriya@sdl.hitachi.co.jp>
 
 This program is free software; you can redistribute it and/or modify
 it under the terms of the GNU General Public License as published by
 the Free Software Foundation; either version 2 of the License, or
 (at your option) any later version.

 This program is distributed in the hope that it will be useful,
 but WITHOUT ANY WARRANTY; without even the implied warranty of
 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 GNU General Public License for more details.

 You should have received a copy of the GNU General Public License
 along with this program; if not, write to the Free Software
 Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
*/

#include <linux/module.h>
#include <linux/vmalloc.h>
#include <linux/wait.h>
#include <linux/interrupt.h>
#include <linux/fs.h>
#include <asm/uaccess.h>
#include <asm/atomic.h>
#include <asm/fcntl.h>
#include "mbuffer.h"
#include "mcontrol.h"
#include "ctr.h"
#include "kstrax_ioc.h"

#define BASETIME        -1
#define KSTRAX_WAKEUP   1
#define KSTRAX_SLEEP    0
#define INIT_PID      -1

typedef struct sys_call_info {
	pid_t pid;
	short sys_call_number;
	long time;
	long utime;
	union {
		long arg_1;
		long return_value;
	};
	long arg_2;
	long arg_3;
	long arg_4;
	long arg_5;
	long arg_6;
	int  serial; /* int */
	int  cpu;
} sys_call_t;

typedef struct kbuffer_head {
	sys_call_t *buffer;
	int size;
	int w_index;
	int r_index;
	int wakeup_index;
	int wakeup_flag;
	wait_queue_head_t wq_head;
	struct tasklet_struct kst_tasklet;
} buf_head_t;

/*
 *  global variable
 */
static atomic_t kstrax_serial = ATOMIC_INIT(0);
trace_stat_t kstrax_status;

/* smp code */
static DEFINE_PER_CPU(buf_head_t, kernel_buffer_head) = { NULL };

/* tasklet function */
static void kstrax_wake_up_tasklet(unsigned long addr)
{
	wake_up_interruptible((wait_queue_head_t *)addr);
}

/*--------------------------------------------------------------------------
 * module interface
 */
int init_mbuffer(int entry)
{
	__u32 cpu;
	
	if (entry <= 0) {
		printk("invalid parameter(kbuffer_entry)\n");
		return -EINVAL;
	}

        /* initialize buffer header and tasklet for each cpu */
        for_each_online_cpu(cpu) {
		buf_head_t *head;

                head = &per_cpu(kernel_buffer_head, cpu);
     	
		head->buffer=(sys_call_t*)vmalloc(sizeof(sys_call_t) * entry);
		if (head->buffer == NULL) {
			return -ENOMEM;
		}
		head->size = entry;
		head->w_index = 0;
		head->r_index = 0;
		head->wakeup_index = -1;
		head->wakeup_flag = KSTRAX_WAKEUP;
		init_waitqueue_head(&head->wq_head);
		tasklet_init(&head->kst_tasklet, kstrax_wake_up_tasklet,
			     (unsigned long)&head->wq_head);
	}	
	return 0;
}

/* clear buffer header */
void clear_mbuffer(void)
{
	__u32 cpu;
	buf_head_t *head;
	
	for_each_online_cpu(cpu) {
		head = &per_cpu(kernel_buffer_head, cpu);
		if (head->buffer != NULL)
			vfree(head->buffer);
	}
}

int kstrax_init_status(int nr_entry)
{
	int i;

	kstrax_status.nr_cpu = num_online_cpus();
	kstrax_status.trace_type = TRACE_ALL;
	for (i = 0; i < NR_TRACE_PROCESS_MAX; i++) {
		kstrax_status.trace_pid[i] = INIT_PID;
	}
	kstrax_status.nr_buf_entry = nr_entry;
	return 0;
}

/*--------------------------------------------------------------------------------
 *  called by ioctl function
 */
static void set_basetime(sys_call_t *);
static int register_trace_pid(pid_t);
static int unregister_trace_pid(pid_t);
static int unregister_trace_pid_all(void); 

/* reset read index */
int kstrax_reset_index(void)
{
	__u32 cpu;
	
	for_each_online_cpu(cpu) {
		buf_head_t *head;
		head = &per_cpu(kernel_buffer_head, cpu);

		head->r_index = head->w_index;
	}
	return 0;
}

/* user_buf is not pointer to int but sys_call_t */
int kstrax_read_basetime(int *buf)
{
	sys_call_t tm_info;
	set_basetime(&tm_info);
	return (copy_to_user((sys_call_t *)buf, &tm_info, sizeof(sys_call_t)));
}

static void set_basetime(sys_call_t *kst_time)
{
	struct timeval tv;
	
	/* get the system time (tsc & gettimeofday) */
	do_gettimeofday(&tv);
	rdtsc(kst_time->arg_2, kst_time->arg_1);
	kst_time->time = tv.tv_sec;
	kst_time->utime = tv.tv_usec;
	kst_time->arg_3 = cpu_khz;
	kst_time->pid = BASETIME;
}

int kstrax_copy_status(trace_stat_t *status)
{
	if (copy_to_user((trace_stat_t *)status, &kstrax_status, 
			 sizeof(trace_stat_t))) {
	      	printk(KERN_ERR "error(ioctl) : cannot copy date\n");
		return -EINVAL;
	}
	return 0;
}

int kstrax_pid_spec(pid_t pid)
{
	int i;

	if (pid < -1)
		return -EINVAL;

	if (pid == -1) {
		return unregister_trace_pid_all();
	}

	for (i = 0; i < NR_TRACE_PROCESS_MAX; i++) {
		if (pid == kstrax_status.trace_pid[i])
			return unregister_trace_pid(pid);
	}

	return register_trace_pid(pid);
}

static int register_trace_pid(pid_t pid)
{
	int i;

	for (i = 0; i < NR_TRACE_PROCESS_MAX; i++) {
		if (kstrax_status.trace_pid[i] == INIT_PID) {
			kstrax_status.trace_pid[i] = pid;
			break;
		}
	}

	if (i == NR_TRACE_PROCESS_MAX) {
		printk("error:cannot register pid(too much pid registered)\n");
		return -1;
	}
	return 0;
}

static int unregister_trace_pid(pid_t pid)
{
	int i, j;
	
	for (i = 0; i < NR_TRACE_PROCESS_MAX; i++) {
		if (kstrax_status.trace_pid[i] != pid)
			continue;

		for (j = i+1; j < NR_TRACE_PROCESS_MAX; j++) {
			if (kstrax_status.trace_pid[j] == INIT_PID)
				break;
		}
	     
		if (j == i+1) {
			kstrax_status.trace_pid[i] = INIT_PID;
		} else {
			kstrax_status.trace_pid[i] = kstrax_status.trace_pid[j-1];
			kstrax_status.trace_pid[j-1] = INIT_PID;
		}
	}
	return 0;
}

static int unregister_trace_pid_all(void) 
{
	int i;
	for (i = 0; i < NR_TRACE_PROCESS_MAX; i++) {
		kstrax_status.trace_pid[i] = INIT_PID;
	}
	return 0;
}

int kstrax_syscall_spec_wrapper(int type)
{
	int retval;

	if (type >= TRACE_TYPE_LIMIT) {
		printk(KERN_ERR "error(syscall_spec):bad argument\n");
		return -EINVAL;
	}
	
	retval = kstrax_syscall_spec(type);
	if (retval == 0)
		kstrax_status.trace_type = type;
	
	return retval;
}

/*-------------------------------------------------------------------------------
 *  write to buffer
 *  call entry
 */
void pre_sys_call_record(const struct pt_regs regs)
{
	int i;
	buf_head_t *head;
	__u32 cpu;  /* unsigned int */
	struct list_head *w_list;
	sys_call_t *buf;

	/* check tracing pid or not */
	if (kstrax_status.trace_pid[0] != INIT_PID) {
		for (i = 0; i < NR_TRACE_PROCESS_MAX; i++) {
			if (current->pid == kstrax_status.trace_pid[i]) {
				break;
			} else if (kstrax_status.trace_pid[i] == INIT_PID) {
				return ;
			}
		}
	}

	/* buffer header of current cpu */
	cpu = get_cpu();
	head = &per_cpu(kernel_buffer_head, cpu);
	
	buf = &head->buffer[head->w_index];
	w_list = &head->wq_head.task_list;

	/* update index */	
	head->w_index = (head->w_index + 1) % head->size;
	if (head->w_index == head->r_index) {
		head->r_index = (head->r_index + 1) % head->size;
	}
	
	buf->pid = current->pid;
	rdtsc(buf->utime, buf->time);
	buf->sys_call_number = regs.orig_eax;
	buf->arg_1 = regs.ebx;
	buf->arg_2 = regs.ecx;
	buf->arg_3 = regs.edx;
	buf->arg_4 = regs.esi;
	buf->arg_5 = regs.edi;
	buf->arg_6 = regs.ebp;
	buf->serial = atomic_inc_return(&kstrax_serial); 
	buf->cpu = cpu;

	if (head->wakeup_flag == KSTRAX_SLEEP && 
	    (head->wakeup_index == head->w_index ||
	     kstrax_force_flag == 1)) {
		head->wakeup_flag = KSTRAX_WAKEUP;

		if (!list_empty(w_list)) {
			tasklet_schedule(&head->kst_tasklet);
		}
	}
	put_cpu();
}

/* return entry */
void post_sys_call_record(const struct pt_regs regs)
{
	int i;
	buf_head_t *head;
	__u32 cpu;  /* unsigned int */
	struct list_head *w_list;
	sys_call_t *buf;

	if (kstrax_status.trace_pid[0] != INIT_PID) {
		for (i = 0; i < NR_TRACE_PROCESS_MAX; i++) {
			if (current->pid == kstrax_status.trace_pid[i]) {
				break;
			} else if (kstrax_status.trace_pid[i] == INIT_PID) {
				return ;
			}
		}
	}
	
	/* buffer header of current cpu */
	cpu = get_cpu();
	head = &per_cpu(kernel_buffer_head, cpu);  
	
	buf = &head->buffer[head->w_index];
	w_list = &head->wq_head.task_list;

	/* update index */
	head->w_index = (head->w_index + 1) % head->size;
	if (head->w_index == head->r_index) {
		head->r_index = (head->r_index + 1) % head->size;
	}
	
	buf->pid = current->pid;
	rdtsc(buf->utime, buf->time);
	buf->sys_call_number = -regs.orig_eax;
	buf->return_value = regs.eax;
	buf->arg_2 = regs.ebx;  /* for ipc & socketcall */
	buf->serial = atomic_inc_return(&kstrax_serial);
	buf->cpu = cpu;

	if (head->wakeup_flag == KSTRAX_SLEEP &&
	    (head->wakeup_index == head->w_index ||
	     kstrax_force_flag == 1)) {
		head->wakeup_flag = KSTRAX_WAKEUP;
		
		if (!list_empty(w_list)) { 
			tasklet_schedule(&head->kst_tasklet);
		}
	}
	put_cpu();
}

/*----------------------------------------------------------------------------------
 *  read
 */
ssize_t kstrax_read(struct file *filp, char __user *buf, 
		     size_t count, loff_t *f_pos)
{
	int request;
	int start = 0, end = 0;
	int csize;
	int copy_ret;
	__u32 cpu;  /* unsigned int */
	buf_head_t *head;
	
	if (filp ==  NULL || buf == NULL || f_pos == NULL) 
		return -EINVAL;

	if (current->tgid != kstrax_owner)
		return -EPERM;

	/* buffer header of current cpu */
	cpu = smp_processor_id();
	head = &per_cpu(kernel_buffer_head, cpu);

	/* translate byte into index */
	request = count / sizeof(sys_call_t);
	
 retry:
	start = head->r_index;
	end = head->w_index;
	
	if (start > end)
		end = head->size;
	
	csize = end - start;
	if (request < csize) {
		end = (start + request) % head->size;
		csize = end - start;
	}
	/* copy */
	copy_ret = copy_to_user(buf, (head->buffer + start), 
				sizeof(sys_call_t) * csize);
	if (copy_ret) {
		printk("Error at copy_to_user, cpu:%d\n", smp_processor_id());
		goto out;
	}

	request -= csize;
	head->r_index = end % head->size;
	buf += csize * sizeof(sys_call_t);
	*f_pos += csize * sizeof(sys_call_t);

	/* if need one more copy */
	if (0 < request) {
		int r_to_w, r_to_up;
		
		/* calculate distance from r_index to wake up */
		r_to_up = head->size / 2;
		if (r_to_up > request)
			r_to_up = request;
		
		if (head->w_index >= head->r_index) {
			r_to_w = head->w_index - head->r_index;
		} else {
			r_to_w = head->size - head->r_index;
			r_to_w += head->w_index;
		}
		
		/* sleep */
		if (r_to_w < r_to_up) {
			/* termination */
			if (filp->f_flags & O_NONBLOCK) {
				if (head->r_index == head->w_index) 
					goto out;
				goto retry;
			}

			head->wakeup_index = (head->r_index + r_to_up) % head->size;
			head->wakeup_flag = KSTRAX_SLEEP;

			wait_event_interruptible(head->wq_head, head->wakeup_flag == KSTRAX_WAKEUP); 
		}
		goto retry;
	}
	
 out:
	return (count - ((ssize_t)request * sizeof(sys_call_t)));
}
