/***********************************************************************
 *
 *	file: mty.c
 *
 *	܂A҂ĉB
 *
 *	$Id: mty.c 244 2010-03-16 13:54:51Z chapuni $
 *
 */

#include <assert.h>
#include <stddef.h>
#include <stdio.h>
#include <time.h>
#include <sys/timeb.h>

#if defined(WIN32)

#include <windows.h>
#include <process.h>

#elif defined(__GNUC__)

#include <sys/time.h>

#endif

#include "desconst.h"
#include "expr_parse.h"
#include "hit.h"
#include "key.h"
#include "log.h"
#include "scoreboard.h"
#include "synth.h"
#include "util.h"

static HANDLE mutex_key;

/* CRYPT64 Lqq */
static
struct CRYPT64_DESC const *const crypt64_descs[] =
{
  &crypt64_desc,
};

/* x]p */
static int n_cpus;
static uint64_t loop_cpu[1024];

#define USEC_SEC 1000	/* 1b */

static
uint64_t
usec(void)
{
  uint32_t sec, msec;

#if !defined(WIN32)
  struct timeval tv;
  gettimeofday(&tv, NULL);
  sec = tv.tv_sec;
  msec = tv.tv_usec / (1000000 / USEC_SEC);
#else
  struct timeb tm;
  ftime(&tm);
  sec = tm.time;
  msec = tm.millitm / (1000 / USEC_SEC);
#endif

  return (uint64_t)USEC_SEC * sec + msec;
}

/***************************************************************
 *
 *	CPU capabilities 擾
 *	[XXX] ܂ɂÂvZbT̂Ƃ͍lȂB
 *
 *	a[4] = {EAX,EBX,ECX,EDX}
 *
 */

#if defined(__GNUC__)

#define cpuid(n,a,b,c,d)	\
	asm("cpuid"		\
		: "=a"(a), "=b"(b), "=c"(c), "=d"(d)	\
		: "a"(n))

#elif defined(WIN32)

#define cpuid(n,a,b,c,d)	\
	do {int r[4]; __cpuid(r,n);	\
	(a) = r[0]; (b) = r[1]; (c) = r[2]; (d) = r[3];} while (0)

#endif

static
unsigned
cpuid_getfflags(void)
{
  unsigned a, b, c, d;
  cpuid(1, a, b, c, d);
  return d;
}

static
int
cpuid_issupported(void)
{
  unsigned m = REQUIRED_CAPS;
  return !((cpuid_getfflags() ^ m) & m);
}

/***************************************************************
 *
 *	ob`ppPbg
 *
 */

static
struct PACKET_CRYPT64 *
packet_create(int n,	/* pPbg */
			  int tn,	/* vfɂĕKvȃ[N */
			  uint8_t const *ini_key)
{
  int i;
  int siz;
  void *p;
  intptr_t a = 128;
  struct PACKET_CRYPT64 *pkts;
  assert(IS_POWER2(sizeof(struct PACKET_CRYPT64)));
  assert(n >= 1);

  siz = (a - 1
		 + (n - 1) * sizeof(struct PACKET_CRYPT64)
		 + offsetof(struct PACKET_CRYPT64, param64.hit[tn]));
  p = calloc(siz, 1);
  /* o_킹 */
  pkts = (struct PACKET_CRYPT64 *)(((intptr_t)p
									+ a - 1)
								   & -a);
#if DEBUG>=1
  fprintf(stderr,
		  "packet(n=%d,tn=%d) %d allocated; %p aligned to %p\n",
		  n, tn,
		  siz, p, pkts);
#endif

  /* ̏
	 Rs[ĉ̂́Aɂ
	 ŏIvf̃PcjĂ܂ƂɂȂ̂
	 ǂxvȂx^R[h */
  for (i = 0; i < n; i++)
	{
	  int j, k;

	  /* t[16] ́AZŎgpAall 1 Ă */
	  memset(&pkts[i].param64.t[T_INV], -1, sizeof(SLICE));

	  /* ŒL[̐ */
	  key_init_sk(&pkts[i].key64);

	  /* L[XPW[ɉ߂Ă
		 ] crypt64.S Ŋ悤ɈĂ */
	  for (j = 0; j < 28; j++)
		for (k = 0; k < N_ALU; k++)
		  pkts[i].key64.ks[j].a[k] = sizeof(WS_T) * ks_ls[j];

	  /* Ô߁AŗĂ(sv?) */
	  for (j = 0; j < 8; j++)
		key_set64(&pkts[i].key64, j, pkts[i].uk.key[j] = ini_key[j], 0, 0x7F);
	}

  return pkts;
}

/***************************************************************
 *
 *	thread
 *
 */

#if defined(__GNUC__)

typedef int32_t ATOMWORD_T;

#define LOCK_INC(p)	\
asm volatile ("lock incl %0"	\
			  : "=m"(*(p))	\
			  : /*nil*/		\
			  : "memory")

#define LOCK_DEC(p)	\
asm volatile ("lock decl %0"	\
			  : "=m"(*(p))	\
			  : /*nil*/		\
			  : "memory")

#define LOCK_CAS(pd,s,r)	\
({	ATOMWORD_T a;							\
	asm volatile ("lock cmpxchg %2,%1"		\
				  : "=a"(a)					\
				  : "m"(*(pd)), "r"(s), "0"(r)	\
				  : "memory");a;})

#define LOCK_CASP(pd,s,r)	\
({	void *a;								\
	asm volatile ("lock cmpxchg %2,%1"		\
				  : "=a"(a)					\
				  : "m"(*(pd)), "r"(s), "0"(r)	\
				  : "memory");a;})

#elif defined(WIN32)

typedef LONG ATOMWORD_T;

#define LOCK_INC(p) InterlockedIncrement((LONG *)(p))
#define LOCK_DEC(p) InterlockedDecrement((LONG *)(p))
#define LOCK_CAS(pd,s,r) InterlockedCompareExchange((LONG *)(pd), s, r)
#define LOCK_CASP(pd,s,r) InterlockedCompareExchangePointer((PVOID *)(pd), (PVOID)(s), (PVOID)r)

#else
#error "configuration not implemented"
#endif

#if defined(WIN32)

typedef DWORD THREAD_TIMEOUT_T;

#define THREAD_INFINITE INFINITE

typedef HANDLE THREAD_TH_T;

#define thread_sleep(n) Sleep(n)
#define thread_create(th, proc, arg) {(th) = (HANDLE)_beginthread(proc, 8192, arg);}
#define thread_get_tid()	GetCurrentThread()
#define thread_set_priority(tid,n)	SetThreadPriority(tid, n)
#define thread_set_affinity(tid,m)	SetThreadAffinityMask(tid, (DWORD_PTR)1 << (m))

#elif defined(_POSIX_SOURCE)

#include <pthread.h>
#include <unistd.h>

typedef int THREAD_TIMEOUT_T;

#define THREAD_INFINITE	INT_MAX

#if defined(THREAD_PRIORITY_BELOW_NOROMAL) || defined(THREAD_PRIORITY_IDLE)
#error "unsupported implementation"
#endif

#define THREAD_PRIORITY_NORMAL	14
#define THREAD_PRIORITY_BELOW_NORMAL	15
#define THREAD_PRIORITY_IDLE	16

typedef pthread_t THREAD_TH_T;

#define thread_sleep(n) (usleep(1000 * (n)) != EINVAL || sleep((n) / 1000))
#define thread_create(th, proc, arg) thread_create_p(&(th), proc, arg)

static
void
thread_create_p(pthread_t *th, NORETURN (*proc)(void *), void *param)
{
  pthread_create(th, NULL, (void *(*)(void *))proc, param);
}

#if defined(__linux__)

/* ftHgXPW[O|V[ł
   Dxݒ肵AChXbhNĂ
   낭Ȃ̂ŁÂւ͍̌ۑB */

#include <linux/unistd.h>
_syscall0(pid_t,gettid)

#define thread_get_tid() gettid()

static
int thread_set_affinity(pid_t tid, int i)
{
  cpu_set_t m;
  CPU_ZERO(&m);
  CPU_SET(i, &m);
  return sched_setaffinity(tid, sizeof(m), &m);
}

#else

/* POSIX ł́AXbhPʂ̃XPW[OɉłȂB */

#endif

#else
#error "configuration not supported"
#endif

struct THREAD_PARAM
{
  /* ȉ͋ʏ̃Rs[ */
  CODE_T *code;
  off_t code_cmp;
  unsigned seed;

  /* ȉ̓XbhŗL */
#ifdef thread_set_priority
  int pri;
#endif
};

static
uint64_t
thread_avail(void)
{
#if !USE_MT

  return 0x1U;

#elif defined(WIN32)	/* Win32 API */
  DWORD_PTR mask, mask_s;
  if (!GetProcessAffinityMask(GetCurrentProcess(),
							  &mask,
							  &mask_s)
	  || !mask
	  || !mask_s)
	return 0x1U;
#if DEBUG>=1
  fprintf(stderr,
		  "m=%08X s=%08X\n",
		 (unsigned)mask,
		 (unsigned)mask_s);
#endif
  if (popcnt64(mask_s) == 1)
	/* ܂ */;
  else if (mask == mask_s)
	fprintf(stderr,
			"ʏ%d{Ƃ͂悭̂łB\n",
			popcnt64(mask));
  else
	fprintf(stderr,
			"ō͂%g{̗͂łĂƂɂ΂B\n",
			(double)popcnt64(mask) / popcnt64(mask_s));
  return mask;

#elif defined(__linux__)	/* sched.h g */

  int i;
  uint64_t m = 0;
  cpu_set_t am;
  if (sched_getaffinity(getpid(), sizeof(am), &am) < 0)
	return 0x1U;

  for (i = 0; i < 64 && i < CPU_SETSIZE; i++)
	if (CPU_ISSET(i, &am))
	  m |= 1ULL << i;

  return m;
#else

  /* XXX vZbT𒲂׏グĂ */
  return 0x01U;

#endif
}

static
NORETURN
thread_crypt64_new(void *a_param)
{
	struct THREAD_PARAM *param = a_param;
	CODE_T *code = param->code;
	CODE_T *cmp = code + param->code_cmp;
	struct KS_KEY key;
	struct PACKET_CRYPT64 *pkt = packet_create(16, 1024, key.key);
	uint64_t *ploop;
	THREAD_TH_T th = thread_get_tid();

	WaitForSingleObject(mutex_key, INFINITE);

	ploop = &loop_cpu[n_cpus++];

	srand(usec() ^ param->seed ^ (unsigned)th);
	key_init(&key);
	ReleaseMutex(mutex_key);

#ifdef thread_set_priority
	thread_set_priority(th, param->pri);
#endif

	for (;;)
	{
		do
		{
			int j;
			for (j = 0; j < 8; j++)
			{
				key_set64(&pkt->key64, j, key.key[j], key.key[j] ^ pkt->uk.key[j], 0);
				pkt->uk.key[j] = key.key[j];
			}
			CALL_CRYPT64(code,
						 &pkt->key64,
						 &pkt->param64);
			CALL_CMP64(cmp,
					   pkt->param64.hit,
					   pkt->param64.lr);
			check_hit(pkt, pkt->param64.hit);
			*ploop += N_ALU * ALU_BITS;
		}
		while (key_inc(&key, 6, 8) || key_inc(&key, KEY_SHUFFLE_POS, 8));

		WaitForSingleObject(mutex_key, INFINITE);
		key_reset(&key, 0);
		ReleaseMutex(mutex_key);
	}

	/* notreached */
}

/***************************************************************
 *
 *	C[vƂ
 *
 */

int
main(int argc, char *argv[])
{
  int i;
  int mincnt;
  CODE_T *code = NULL;
  off_t code_cmp;
  FILE *sfp;	/* scoreboard */
  struct ITREE *root_expr;
  uint64_t proc_mask;
  struct THREAD_PARAM *threads = NULL;
  int nthreads;
  int tn;
  int cr;

  /*  */
  struct KS_KEY key;

#define UPDATE_INTERVAL 8	/* x\̊Ԋu b */
  struct status {
    uint64_t startTime;	/* Jn ~b */
    uint64_t lastTime;	/* Ōɕ\ ~b */
    uint64_t loop;		/*  */
    uint64_t lastloop;	/* Ōɕ\ loop */
  } status;
  uint64_t curTime;
  uint32_t upd_int = 0;
/*
 ϑx (trips/s) * UPDATE_INTERVAL  UINT32_MAX 𒴂ƔB
 UINT32_MAX = 4294967295, ϑx = 100Mtrips/s ȂA
 4294967295 / (100 * 1000 * 1000) = 42.949 b܂ŁBia
 LOOP_FACTOR ϑx\΁AقڎwԊuɂȂB
 LOOP_FACTOR * UINT32_MAX + LOOP_FACOTR ƃI[o[t[B
 */

  if (!cpuid_issupported())
	{
	  fprintf(stderr, "̊ő点邱Ƃz肳Ă܂B\n");
	  exit(1);
	}

  assert((1 << N_STRIDE) == N_ALU * ALU_BITS);

  mutex_key = CreateMutex(NULL, FALSE, NULL);

  /* ^Qǂݍ */
  root_expr = expr_parse("target.txt");

  /* R[h𐶐EWJ
	 N\Xbhɉ
	 R[hς */
  sfp = scoreboard_open();
  fwrite(crypt64_descs[0]->pro, 1, crypt64_descs[0]->cmp_pro - crypt64_descs[0]->pro, sfp);	/* prologue & RA[v */
  proc_mask = thread_avail();

#if 0
  if (0&&proc_mask == 1U)
	{
	  /* single */
	  code_cmp = 0;
	}
  else
#endif
	{
	  /* multi */
	  fwrite(crypt64_descs[0]->ep, 1, crypt64_descs[0]->ep_end - crypt64_descs[0]->ep, sfp);	/* epilogue */

	  /* r݂̂𐶐(O) */
	  code_cmp = ftell(sfp);
	  fseek(sfp, (-code_cmp) & 63, SEEK_CUR);
	  code_cmp = ftell(sfp);
	  fwrite(crypt64_descs[0]->pro, 1, crypt64_descs[0]->crypt - crypt64_descs[0]->pro, sfp);	/* prologue */
	}

  /* r𐶐 */
  fwrite(crypt64_descs[0]->cmp_pro, 1, crypt64_descs[0]->cmp_ep - crypt64_descs[0]->cmp_pro, sfp);	/* r폀 */
  tn = synth_synthesize(sfp, root_expr);
  fwrite(crypt64_descs[0]->cmp_ep, 1, crypt64_descs[0]->ep_end - crypt64_descs[0]->cmp_ep, sfp);	/* epilogue */

  /* R[hɓ\t */
  code = scoreboard_map(sfp);

  /* L[̏ */
  WaitForSingleObject(mutex_key, INFINITE);
  srand(usec());
  key_init(&key);
  ReleaseMutex(mutex_key);
  set_salt(code, crypt64_descs[0], key.key);

  if (log_open("log.txt") != 0) return 1;

  WaitForSingleObject(mutex_key, INFINITE);

  /* ʎY */
  nthreads = 0;
  if (code_cmp)
	{
	  THREAD_TH_T h;
	  int ots = -1;
	  threads = calloc(2 * popcnt64(proc_mask), sizeof(*threads));
	  for (i = 0; i < 64; i++)
		if (proc_mask & (1ULL << i))
		  {
			if (0&&ots < 0)
			  {
				/* g̃XPW[O
				   [[ñAv͒߂ɐݒ肷̂g() */
#ifdef WIN32
				h = GetCurrentProcess();
				SetPriorityClass(h, BELOW_NORMAL_PRIORITY_CLASS);
#endif
#if defined(thread_set_priority)
				/* ŠԂ߂܂ */
				threads[nthreads].code = code;
				threads[nthreads].code_cmp = code_cmp;
				threads[nthreads].seed = rand();
				threads[nthreads].pri = THREAD_PRIORITY_IDLE;
				thread_create(h, thread_crypt64_new, &threads[nthreads]);
				nthreads++;
#endif
				if (!code_cmp)
				  break;

				/* g̎c̐ݒAƂł */
				ots = i;
			  }
			else
			  {
				/* Xbh́A߂̗DxŁB */
				threads[nthreads].code = code;
				threads[nthreads].code_cmp = code_cmp;
				threads[nthreads].seed = rand();
#ifdef thread_set_priority
				//threads[nthreads].pri = THREAD_PRIORITY_BELOW_NORMAL;
				threads[nthreads].pri = THREAD_PRIORITY_LOWEST;
#endif
				thread_create(h, thread_crypt64_new, &threads[nthreads]);
#ifdef thread_get_tid
				thread_set_affinity(h, i);
#endif
				nthreads++;
#if 1
				/* IDLE */
				threads[nthreads].code = code;
				threads[nthreads].code_cmp = code_cmp;
				threads[nthreads].seed = rand();
#ifdef thread_set_priority
				threads[nthreads].pri = THREAD_PRIORITY_IDLE;
#endif
				thread_create(h, thread_crypt64_new, &threads[nthreads]);
#ifdef thread_get_tid
				SetThreadAffinityMask(h, proc_mask);
#endif
				nthreads++;
#endif
			  }
		  }
#ifdef thread_get_tid
	  if (ots)
		thread_set_affinity(thread_get_tid(), ots);
#endif
	}

  fprintf(stderr, "Jn!\n");
  ReleaseMutex(mutex_key);

  mincnt = 0x7FFFFFFF;

  cr = 0;
  memset( &status, 0, sizeof( struct status ) );
  status.startTime = status.lastTime = usec();

  /* T[v */
  for (;;)
	{
	  Sleep(5000);

	  /* xv */
	  status.loop = 0;
	  for (i = 0; i < n_cpus; i++) status.loop += loop_cpu[i];

	  if (status.loop >= status.lastloop + upd_int
		  && (curTime = usec()) != status.lastTime)
		{
		  uint64_t diffTime;
		  int a, b, c;
		  /* ʎZ(P ktrips/sec) */
		  diffTime = curTime - status.startTime;
		  a = status.loop / ((1000 / USEC_SEC) * diffTime);

		  /* (P trips/sec) */
		  diffTime = curTime - status.lastTime;
		  b = USEC_SEC * (status.loop - status.lastloop) / diffTime;

		  /* \ */
		  c = UPDATE_INTERVAL * b;

		  /* オȂǁA덷 upd_int Ƃ
			 ȂS␳ 1 b(==b)ÂʂB */
		  upd_int = (upd_int + b < c
					 ? upd_int + b
					 : c);

		  status.lastTime = curTime;
		  status.lastloop = status.loop;
#if DEBUG>=1
		  fprintf(stderr,
			  "%5d/%5d(%3d%%)",
			  nblk_hit, nblk_total, 100 * nblk_hit / nblk_total);
		  nblk_hit = nblk_total = 0;
		  if (nap_total)
			fprintf(stderr,
				"  %5d/%5d(%3d%%)",
				nap_hit, nap_total, 100 * nap_hit / nap_total);
		  else
			fprintf(stderr,
					"  -----/-----(---%%)");
		  nap_hit = nap_total = 0;
#endif
		  fprintf(stderr,
				  "%4d.%03dMtrips/s [%4d.%06dMtrips/s]\r",
				  a / 1000, a % 1000,
				  b / 1000000, b % 1000000);
		  cr++;
		}
	}

  return 0;
}

/*
 *	Local Variables:
 *		tab-width:	4
 *	End:
 *
 * EOF */
