/* Creates a compressed image, given a file as an argument.
 * (c)1999 Paul `Rusty' Russell.  GPL.
 *
 * CHANGELOG:
 *
 * * Mon, 29 Aug 2005 15:20:10 CEST 2005 Eduard Bloch <blade@debian.org>
 * - a complete overhaul, rewrote most parts using STL and C++ features,
 *   portable data types etc.pp.
 * - using multiple threads, input/output buffers, various strategies for
 *   temporary data storage, options parsing with getopt, and: network enabled
 *   server/client modes, distributing the compression jobs to many nodes
 *
 * * Mon Feb 28 20:45:14 CET 2005 Sebastian Schmidt <yath@yath.eu.org>
 * - Added -t command line option to use a temporary file for the compressed
 *   blocks instead of saving it in cb_list.
 * - Added information message before the write of the compressed image.
 * * Sat Deb 14 22:49:20 CEST 2004 Christian Leber
 * - Changed to work with the advancecomp packages, so that it's
 *   better algorithms may be used
 * * Sun Okt 26 01:05:29 CEST 2003 Klaus Knopper
 * - Changed format of index pointers to network byte order
 * * Sat Sep 29 2001 Klaus Knopper <knopper@knopper.net>
 * - changed compression to Z_BEST_COMPRESSION,
 * * Sat Jun 17 2000 Klaus Knopper <knopper@knopper.net>
 * - Support for reading file from stdin,
 * - Changed Preamble.
 * * Sat Jul 28 2001 Klaus Knopper <knopper@knopper.net>
 * - cleanup and gcc 2.96 / glibc checking
 */


#define _FILE_OFFSET_BITS 64

#include <stdio.h>
#include <unistd.h>
#include <stdlib.h>
#include <string.h>
#include <sys/stat.h>
#include <pthread.h>
#include <time.h>
#include <fcntl.h>
#include <zlib.h>
#include "compressed_loop.h"
#include "portable.h"
#include "pngex.h"
//#include "utility.h"
#include "compress.h"
#include "siglock.h"

#ifndef __OPTIMIZE__
#define __OPTIMIZE__
#endif

#include <netinet/in.h>
#include <netinet/tcp.h>
#include <netdb.h>
#include <sys/types.h>
#include <sys/socket.h>
#if defined(__linux__)
#include <sys/socketvar.h>
#endif
#include "lib/mng.h"
#include "lib/endianrw.h"

// for server
#include <unistd.h>
#include <errno.h>
#include <string.h>
#include <arpa/inet.h>
#include <sys/wait.h>
#include <signal.h>

#include <iostream>
#include <iomanip>
#include <cstdio>
#include <string>
#include <list>
#include <map>
#include <vector>
#include <queue>

using namespace std;

#define DEBUG(x)
//#define DEBUG(x) cerr << x << endl;

#define MAX_KMALLOC_SIZE 2L<<17

#define CLOOP_PREAMBLE "#!/bin/sh\n" "#V2.0 Format\n" "modprobe cloop file=$0 && mount -r -t iso9660 /dev/cloop $1\n" "exit $?\n"

#define MAXLEN(blocksize) ((blocksize) + (blocksize)/1000 + 12)

# if defined(linux) || defined(__linux__)
#include <asm/byteorder.h>
#define ENSURE64UINT(x) __cpu_to_be64(x)
#endif

#if defined(__CYGWIN__)

static __inline __uint64_t
__bswap64(__uint64_t _x)
{

	return ((_x >> 56) | ((_x >> 40) & 0xff00) | ((_x >> 24) & 0xff0000) |
	    ((_x >> 8) & 0xff000000) | ((_x << 8) & ((__uint64_t)0xff << 32)) |
	    ((_x << 24) & ((__uint64_t)0xff << 40)) |
	    ((_x << 40) & ((__uint64_t)0xff << 48)) | ((_x << 56)));
}
#define ENSURE64UINT(x) __bswap64(x)
#endif

// FIXME: also use __bswap64 on BSD with additional checks, see
// extract_compressed_fs.c

#ifndef ENSURE64UINT
#warning System not Linux, endian correction macro not available
#warning Assuming the system is Big-Endian (eg. PowerPC)
#define ENSURE64UINT(x) (uint64_t) x
#endif

#define die(msg) { cerr << "ERROR: " << msg << ". Exiting..."<<endl;  exit(1); }

#ifndef MSG_WAITALL
#define MSG_WAITALL 0
#endif

#ifndef MSG_NOSIGNAL
#define MSG_NOSIGNAL 0
#endif

// some globals, easier option passing
int defport=3103;
unsigned long blocksize=0;
unsigned long expected_blocks=0;
unsigned long numblocks=0;
int method=Z_BEST_COMPRESSION;
const int maxalg=11;
bool be_verbose(false), be_quiet(false);
unsigned int MaxBacklog=500;
unsigned int backlog=0;

bool use_memory(false), reuse_tempfile(false);

int workThreads=2;
vector<char *> hostpool;

pthread_mutex_t inputLock = PTHREAD_MUTEX_INITIALIZER;
pthread_mutex_t outputLock = PTHREAD_MUTEX_INITIALIZER;
pthread_cond_t inputCond = PTHREAD_COND_INITIALIZER;
pthread_cond_t outputCond = PTHREAD_COND_INITIALIZER;

bool terminateAll=false;
int IndicateLastPos=-10;
int IndicateError=-10;

time_t starttime, curtime;

// job size
unsigned long jobsize = 32;

FILE *datafh=NULL, *in=NULL;

int start_server(int port);
int setup_connection(char *peer);

// cludge
FILE * split_fopen( char *path, char *mode);
int split_fwrite(void *ptr, size_t size, size_t nmemb, FILE *stream);
uint64_t chunk_size=0;
char *realname, *out_mode=NULL;
FILE *realfh=NULL;

class compressItem {
    public:

        int nodeid;
        int best;
        int maxlen;
        unsigned long compLen, uncompLen;

        char *uncompBuf, *compBuf;

        compressItem (int id, int size) {
            uncompLen=size;
            maxlen=MAXLEN(uncompLen);
            compBuf = new char[maxlen];
            uncompBuf = new char[size];
            nodeid=id;
            memset(uncompBuf,0,uncompLen);
        }

        ~compressItem() {
            // deleted by compress methods or by others
            //if(uncompBuf) delete[] uncompBuf;
            //if(compBuf) delete[] compBuf;
        }

        bool doRemoteCompression(int method, int con) {
            DEBUG("sending data");
            if(send(con, uncompBuf, uncompLen, MSG_NOSIGNAL) == -1) {
                DEBUG("data sending failed");
                return false;
            }
            DEBUG("awaiting compressed data, should come back before TCP timeouts");
            uint32_t rhead[2];
            int l=recv(con, rhead, sizeof(rhead), MSG_WAITALL | MSG_NOSIGNAL);
            if(l<1) return false;
            
            compLen=ntohl(rhead[0]);
            best=ntohl(rhead[1]);
            DEBUG("Receiving: " << compLen << " bytes\n");
            // Cygwin does not know MSG_WAITALL and splits large blobs :(
            char * ptr = compBuf;
            int rest = compLen;
            while(rest>0) {
                l=recv(con, ptr, rest, MSG_WAITALL | MSG_NOSIGNAL);
                if(l<1) return false;
                ptr+=l;
                rest-=l;
            }
            DEBUG("### Received\n");
            return true;
        }

        bool doLocalCompression(int method=0) {
            const int maxalg=11;
            int z_error;

            DEBUG("Compressing " << nodeid);

            if(method<0)
            {
                int j=0;
                // force 7zip if requested
                if(method==-1) j=maxalg-1;
                    
                compLen=maxlen+1; // !!

                char *tmpBuf=new char[maxlen];
                unsigned int tmpLen=maxlen;

                for(; j<maxalg; j++) {

                    memset(tmpBuf,0,maxlen);
                    tmpLen = maxlen; // important!

                    if(j==maxalg-1) { // try 7zip as the last one
                        if(!compress_zlib(shrink_extreme, (unsigned char *) tmpBuf, tmpLen, (unsigned char *)uncompBuf, uncompLen))
                        {
                            fprintf(stderr, "*** Error %d compressing block %d with 7ZIP!\n", nodeid, j);
                            return false;
                        }
                    }
                    else
                    {
                        if((z_error=compress2((Bytef*)tmpBuf, (uLongf*)&tmpLen, (Bytef*)uncompBuf, uncompLen, j)) != Z_OK)
                        {
                            fprintf(stderr, "*** Error %d compressing block %d, algo: %d!\n", z_error, nodeid, j);
                            return false;
                        }
                    }

                    if(tmpLen<compLen) { // a new winner found, swap tmpBuf and compLen
                        best=j;
                        compLen=tmpLen;
                        char *t = tmpBuf;
                        tmpBuf=compBuf;
                        compBuf=t;
                    }
                }
            }
            else
            {
                compLen=maxlen; // damn, that is important, see zlib API docs
                best=method;
                z_error=compress2((Bytef*) compBuf, (uLongf*) & compLen, (Bytef*)uncompBuf, uncompLen, method);
                if(z_error != Z_OK)
                {
                    cerr << "**** Error " << z_error << " compressing block " << nodeid << ", maxlen: " << maxlen << endl;
                    return false;
                }
            }

            DEBUG("done");
            return true;
        }
};

deque<compressItem*> inputQ;
map<int, compressItem *> outMap;
typedef map<int, compressItem *>::iterator outMapIter;

void *compressingLoop(void *ptr)
{
    int id = * ( (int*) ptr);

    vector<compressItem*> localQ;
    vector<compressItem *>::iterator it;
    char *peer;

    int con=-1;
    if(hostpool.size()) {
        peer = hostpool[id % hostpool.size()];
        con=setup_connection(peer);
        if(con<0)
            cerr << "Unable to connect, compressing locally\n";
    }

    while(!terminateAll)
    {

        localQ.clear();
        pthread_mutex_lock( &inputLock );
get_another_one_to_compress:
        while(inputQ.empty()) {
            pthread_cond_wait(&inputCond, &inputLock);
        }
        compressItem *item = inputQ.front();
        inputQ.pop_front();
        localQ.push_back(item);
        if(localQ.size() < jobsize && item->nodeid != IndicateLastPos)
            goto get_another_one_to_compress;
        pthread_cond_broadcast(&inputCond);
        pthread_mutex_unlock( &inputLock );

        // FIXME: may retry the connection to the original host here if
        // neccessary, or maybe not...

        for(it=localQ.begin();it!=localQ.end();it++) {
            bool res;
retry_remote:
            // con is also the flag for local/remote mode. Either all threads
            // local or all remote! using "localhost" and a local daemon has
            // also some advantages, eg. beeing able to renice it
            if(con<0) {
                if (! (*it)->doLocalCompression(method) )
                    die("Compression failed");
            }
            else {
                // on failures, try to jump over to other hosts if possible.
                // Idealy localhost should be one of them.
                //
                // FIXME: cannot clearly distinguish between compression
                // failures and network failures etc. Not really important but
                // can end up in a inf. loop on systematic program errors.
                //(*it)->doRemoteCompression(method, con);
                if(! (*it)->doRemoteCompression(method, con) ) 
                {
                    cerr << "Remote compression failed, trying to reconnect...\n";
                    con=setup_connection(peer);
                    if(con<0) {
                        cerr << "Failed. ";
                        if(hostpool.size()>1) {
                            int r; // choose one but not the same
                            while( (r=rand()% hostpool.size()) != id);

                            peer = hostpool[r];
                            cerr << "Trying another host: "<<peer<<endl;
                            con=setup_connection(peer);
                            if(con>=0)
                                goto retry_remote;
                            else
                                die("unable to continue");
                        }
                        else
                            die("Failed, no more hosts available");
                    }
                    // else: ok, was able to reconnect
                    goto retry_remote;
                }
            }

            delete[] (*it)->uncompBuf;
        }

        DEBUG("Calc: submitting results");
        pthread_mutex_lock( &outputLock );

        for(it=localQ.begin();it!=localQ.end();it++) {
            DEBUG("C: " << (*it)->nodeid);
            outMap[ (*it)->nodeid ] = (*it);
            DEBUG("ADD: " << (*it)->nodeid << ",size: " << outMap.size());
            }

        backlog+=localQ.size();

        // and notify output thread and maybe brothers
        pthread_cond_broadcast(&outputCond);
        
        while(backlog>MaxBacklog) // stop if output writer does not keep up
            pthread_cond_wait(&outputCond, &outputLock);

        pthread_mutex_unlock( &outputLock );
        DEBUG("Calc: submitted");
    }
    pthread_mutex_unlock( &inputLock );
    return(NULL); // g++ shut up
}

void *outputFetch(void *ptr) { 
    
    //int id = * ( (int*) ptr);

    DEBUG("Fetcher thread created");
    unsigned long long total_uncompressed=0,total_compressed=0;
    unsigned int took[maxalg];
    for(int i=0; i<maxalg; i++) took[i]=0;

    int nextId=0;
    vector<compressItem *> localQ;
    vector<compressItem *>::iterator it;

    while(true) { // escape by goto

        localQ.clear();

        pthread_mutex_lock( &outputLock );

get_another_one_to_write:

        DEBUG("Writer wants:" <<nextId);

        while(outMap.find(nextId) == outMap.end() && nextId != IndicateLastPos+1) {
            DEBUG("Writer sleeping, " << nextId << ", " << IndicateLastPos);
            pthread_cond_wait( &outputCond, &outputLock );
            DEBUG("Writer awaken, " << nextId << ", " << IndicateLastPos);
        }
        if(nextId != IndicateLastPos+1) {
            DEBUG("Backlog: " << backlog << ", nextToWrite: " <<nextId);
            if(backlog > MaxBacklog) // may need to awaken workers later
                pthread_cond_broadcast(&outputCond);

            compressItem *item = outMap[nextId];
            nextId++;
            localQ.push_back(item);
            if(localQ.size() < jobsize)
                goto get_another_one_to_write;
        }

        backlog-=localQ.size();
        pthread_mutex_unlock( &outputLock );
        
        for(it=localQ.begin();it!=localQ.end();it++) {
            total_uncompressed += (*it)->uncompLen;
            total_compressed += (*it)->compLen;
            DEBUG("Dumping: " << (*it)->nodeid << ",size: " << outMap.size());
            
            // process contents now
            ++took[(*it)->best];

            if(!use_memory) {
                split_fwrite((*it)->compBuf, 1, (*it)->compLen, datafh);
                delete[] (*it)->compBuf;
            }

            /* Print status  */
            if(be_verbose)
               fprintf(stderr, 
               "[%2d] Block# %5d size %6lu -> %6lu [compression ratio %3lu%%, overall: %3Lu%%]\n",
               (*it)->best, (*it)->nodeid, (*it)->uncompLen, (*it)->compLen,
               ((*it)->uncompLen>0) ? (((*it)->compLen*100)/(*it)->uncompLen) : 100,
               (total_uncompressed>0) ? ((total_compressed*100)/total_uncompressed) : 100
               );
            if((*it)->nodeid==IndicateLastPos) {
                goto print_stats;
            }
        }
        
        time_t t=time(NULL);
        if(t>curtime+5) {
            curtime=t;
            // print short summary
            if(!be_quiet) {
                float deltaT=(curtime-starttime);
                cerr<< "Input: " << total_uncompressed << " bytes, avg. speed: "
                    <<(int) (total_uncompressed/deltaT) << " b/s, "
                    <<(int)((float)expected_blocks/(float)nextId*deltaT-deltaT)
                    << "s remaining" <<endl;
            }
        }


    }
    
    return((void *) 1);

print_stats:
    if(!be_quiet) {
        cerr << "Done. Input: " << total_uncompressed << " bytes, avg. speed: " << 
            (total_uncompressed/(1+curtime-starttime)) << " b/s"<<endl;
        fprintf(stderr,"\nStatistics:\n");
        for(int j=0; j<maxalg-1; j++) 
            fprintf(stderr,"gzip(%d): %5d (%5.2g%%)\n", j, took[j], 100.0F*(float)took[j]/(float)outMap.size());
        fprintf(stderr,"7zip: %5d (%5.2g%%)\n\n", took[maxalg-1], 100.0F*(float)took[maxalg-1]/(float)outMap.size());
    }
    return(NULL); 
}

void *inputFeed(void *ptr) {
    
    //int id = * ( (int*) ptr);
    
    DEBUG("Input thread created");

    vector<compressItem *> localQ;
    vector<compressItem *>::iterator it;
    compressItem *item;

    for(unsigned int rpos=0; rpos<expected_blocks;rpos++) {

        item = new compressItem(rpos, blocksize);
        if(!item) die("Memory exhausted!");
    
        DEBUG("Next block...");
        //bool broken_read=false;
        ssize_t len=fread(item->uncompBuf, sizeof(char), blocksize, in);
        DEBUG("Got " << len << " bytes, eof? "<<feof(in));
        if(!len && feof(in)) { // work around the final zero-len block
            rpos--;
        }
        else {
            localQ.push_back(item);
            // don't jump on the last block, wouldn't get to the block below ever
            if(localQ.size() < jobsize && rpos != expected_blocks-1) continue;
        }
        
        // okay, local queue full, submit
        pthread_mutex_lock( &inputLock );
        while(inputQ.size() > jobsize*3) // overfull now, wait
            pthread_cond_wait( &inputCond, &inputLock );

        for(it=localQ.begin();it!=localQ.end();it++) {
            inputQ.push_back(*it);
        }

        if(feof(in) || rpos == expected_blocks-1) {
            // hier noch eins lesen und feof testen?!
            IndicateLastPos=rpos;
        }
        
        pthread_cond_broadcast(&inputCond); // notify workers
        pthread_mutex_unlock( &inputLock );

        localQ.clear();
        if(feof(in)) break;
    }
    return(NULL); 
}

int create_compressed_blocks_mt() {
    int threadId=0;
    pthread_t output_thread;

#if 0
    if(hostpool.size()) {
        int n=0;
        for(int sid=0; sid < workThreads ; sid++) {
try_another_connection:
            char *peer = hostpool[n++ % hostpool.size()];
            int con=-1;
            con=setup_connection(peer);
            if(con<0) {
                cerr << "Connection to " <<peer <<" failed. Wrong port?"<<endl;
                if(n>sid+5) die("Too many connection failures on ");
                goto try_another_connection;
            }
            DEBUG("got con: " << con << " for " <<sid);
            conpool.push_back(con);
        }
    }
#endif

    for(; threadId < workThreads ; threadId++)
        pthread_create(new pthread_t, NULL, compressingLoop, (void *) new int(threadId));

    pthread_create(new pthread_t, NULL, inputFeed, (void *) new int(threadId++));

    pthread_create(&output_thread, NULL, outputFetch, (void *) new int(threadId));

    int ret;
    pthread_join(output_thread, (void **) &ret);

    terminateAll=true;

    return ret;
};

#define OPTIONS "bB:mrp:lt:hs:f:j:a:vqS:L:"
        
int usage(char *progname)
{
    cout << "Usage: advfs [options] INFILE OUTFILE [HOSTS...]" << endl;
    cout << "Options:" << endl;
    cout << "  -b     Try all and choose best compression method, see -L" << endl;
    cout << "  -B N   Set the block size to N" << endl;
    cout << "  -m     Use memory for temporary data storage (NOT recommended)" << endl;
    cout << "  -r     Reuse output file as temporary file (NOT recommended)"   << endl;
    cout << "  -p M   Set the default port number to N (can be modified in host names)" <<endl;
    cout << "  -l     Listening mode (network node)" <<endl;
    cout << "  -t T   Number of compressing threads" <<endl;
    cout << "  -s Q   Expected data size from the input, see below" <<endl;
    cout << "  -f S   Temporary file S"<<endl;
    cout << "  -q     Don't print normal status messages on the console" << endl;
    cout << "  -v     Verbose mode, print extra statistics" <<endl;
    cout << "  -h     Help of the program" << endl;
    cout << "  -s X   Experimental option to split output into chunks of size X" <<endl;
    cout << "Performance tuning options:"<<endl;
    cout << "  -j T   Jobsize, number of blocks passed to each working thread per tick"<<endl;
    cout << "  -a U   Max. backlog, size limit of the resync queue of the output thread" <<endl;
    cout << "  -L V   Compression level (-2..9); 9: zlib's best (default setting), 0: none,\n"
            "         -1: 7zip, -2: try all and choose the best one" << endl;
    cout <<endl;
    cout << "To use standard input/output - can be used as INFILE/OUTFILE. However, this will\n"
        "require additional memory (or diskspace) for data size calculation and header\n"
        "update (after compression). Passing the INPUT data size with -s may help.\n";
    cout << "The size numbers can be declared with a suffix which acts as multiplier\n"
        "(K: KiB, k: KB, i: iso9660 block (2KiB); M: MiB; m: MB; G: GiB; g: GB)." <<endl;
    return(1);
}

uint64_t getsize(char *text) {
    
    if(!text) return 0;
    
    int map[] = {'k', 1000, 'K', 1024, 'm', 1000000, 'M', 1048576, 
        'g', 1000000000, 'G', 1073741824, 'i', 2048, 0 };
    char *mod = text + strlen(text)-1;
    uint64_t fac=0;
    if(*mod > 57) {
        for(int i=0;map[i];i+=2)
            if(map[i]==*mod)
                fac=map[i+1];
        if(!fac) die("Unknown factor " << mod << " or bad size " << text);
        *mod=0x0;
    }
    else fac=1;
    
    return fac*atoll(text);
}

inline bool is_pos_number(char *text) {
	for(char *p=text;*p;p++)
		if(!isdigit(*p))
			return false;
	return true;
}

int main(int argc, char **argv)
{
    int ret=0;
    struct cloop_head head;
    unsigned long long bytes_so_far;
    char *tempfile(NULL);
    FILE *tempfh(NULL), *out(NULL);
    uint64_t datasize=0;
    int c;

    while (1)
    {
        int option_index = 0;

#ifdef HAVE_GETOPT_LONG
        static struct option long_options[] =
        {
            {"best", 0, 0, 'b'},
            {0, 0, 0, 0}
        };
        c = getopt_long (argc, argv, OPTIONS,
                long_options, &option_index);
#else
		c = getopt(argc, argv, OPTIONS);
#endif
        if (c == -1) 
            break;

        switch (c)
        {
            case 'B':
                blocksize=getsize(optarg);
                if(blocksize > 1<<20) {
                    blocksize = 1<<20;
                    cerr << "Block size is too big. Adjusting to " << blocksize<<endl;
                }
                if(blocksize < 512) {
                    blocksize = 512;
                    cerr << "Block size is too small. Adjusting to " << blocksize<<endl;
                }
                if(blocksize%512) {
                    blocksize = blocksize-blocksize%512;
                    cerr << "Block size not multiple of 512. Adjusting to " << blocksize << endl;
                }
                break;

            case 'm':
                use_memory=true;
                MaxBacklog=INT_MAX;
                break;

            case 'b':
                method=-2;
                break;

            case 'L':
                method=getsize(optarg);
                break;

            case 'f':
                tempfile=optarg;
                break;

            case 'v':
                be_verbose=true;
                break;

            case 'q':
                be_quiet=true;
                break;

            case 'r':
                reuse_tempfile=true;
                break;

            case 'S':
                chunk_size=getsize(optarg);
                if(chunk_size%512) {
                    chunk_size = chunk_size-chunk_size%512;
                    cerr << "Block size not multiple of 512. Adjusting to " << chunk_size << endl;
                }
                break;

            case 's':
                datasize=getsize(optarg);
                break;

            case 'a':
                MaxBacklog=getsize(optarg);
                break;

            case 'j':
                jobsize=getsize(optarg);
                break;

            case 'p':
                defport=getsize(optarg);
                if(defport>65535) die("Invalid port");
                break;

            case 'l':
                start_server(defport);
		exit(0);
                break;

            case 't':
                workThreads=getsize(optarg);
                if(!workThreads) {
#ifdef _SC_NPROCESSORS_ONLN
                        workThreads=sysconf(_SC_NPROCESSORS_ONLN);
                        cerr << workThreads << " processor(s) detected\n";
#else
                        workThreads=3;
                        cerr << "Invalid number of work threads, using 3\n";
#endif
                }
                break;

            default:
                usage(argv[0]);
                exit(1);
        }

    }

    char *fromfile=NULL, *tofile=NULL;
    int test;

    if(optind > argc-2) {
        usage(argv[0]);
        die("\nInfile and outfile must be specified");
    }
    while(optind!=argc) {
        if( 0==hostpool.size() && is_pos_number(argv[optind]) )
        {
		int val=getsize(argv[optind]);
            cerr << "Warning, number as file string found. Assuming old command syntax and\n"
                "choosing compatible parameters (-m -B " << val <<"). See the usage info (-h)\n"
                "for better/correct parameters."<<endl;
            blocksize=val;
            use_memory=true;
        }
        else if(!fromfile) fromfile=argv[optind];
        else if(!tofile) tofile = argv[optind];
        else hostpool.push_back(argv[optind]);
        optind++;
    }
    if(!blocksize)  blocksize=65536;

    if( fromfile && strcmp(fromfile, "-")) {
        struct stat buf;
        if(!datasize) {
            stat(fromfile, &buf);
            datasize=buf.st_size;
        }
        if(datasize < 8000)
            die("Unknown or suspicious input data size. Use -s to specify a real value");

        in=fopen(fromfile, "r");
    }
    else
    {
        if(! (datasize || use_memory || reuse_tempfile || tempfile) )
            die("\nUnknown input data size and no tempdata storage strategy has been choosen.\nOne of: -s, -m, -f or -r required");

        in=stdin;
    }

    if(!in) die("Error opening input");

    // protect the cludge...
    if(chunk_size && (reuse_tempfile || use_memory || tempfile))
        die("Output splitting with temporary storage not implemented yet");

    expected_blocks=datasize/blocksize;
    if(datasize%blocksize) expected_blocks++;

    if( tofile && strcmp(tofile, "-")) {
        out = split_fopen(tofile, "w+");
        if(!out)
            die("Error opening output file for writing");
    }
    else {
        out=stdout;
        if(reuse_tempfile) {
            die("Cannot reuse any file for tempdata, - specified");
        }
        else if(use_memory) {
            cerr << "Warning, going to allocate " << datasize << " bytes of system memory.\n";
        }
        else
            die("Unrewindable output, please choose the tempdata storage strategy option.\nOne of: -s, -m or -r required");
    }

    if(tempfile && use_memory) die("Either -r or -m is allowed");
    if(tempfile && reuse_tempfile) die("-r with another tempfile does not make sense");

    if(tempfile) {
        tempfh=fopen(tempfile, "w+");
        if(!tempfh)
            die("Error opening temporary file");
    }

    if(!expected_blocks) expected_blocks=INT_MAX;

    // expected values including additional pointer to store the initial offset
    bytes_so_far = sizeof(head) + sizeof(uint64_t) * (expected_blocks+1);
    if(!be_quiet) 
        cerr << "Block size "<< blocksize << ", expected number of blocks: " << expected_blocks <<endl;

    // phase 1, prepare to data compression

    if(!reuse_tempfile) 
        // seek to the beginn of the data area but only if the position is
        // known for sure
        fseeko(out, bytes_so_far, SEEK_SET);

    datafh=tempfile ? tempfh : out;

    // go, go, go
    curtime=starttime=time(NULL);
    if(create_compressed_blocks_mt()) {
        cerr << "An error was detected while compressing, exiting..." << endl;
        exit(1);
    }
    fclose(in);
    fflush(datafh);
    if(realfh) fflush(realfh);

    // in tempdata modes choose real values rather than precalculated
    if(reuse_tempfile || tempfile || use_memory) {
        numblocks=outMap.size();
        bytes_so_far = sizeof(head) + sizeof(uint64_t) * (1+outMap.size());
    }
    else
        numblocks=expected_blocks;

    // stretch the temp/target file, shifting data to make space for the header
    if(reuse_tempfile) {
        cerr << "Shifting data..."<<endl;
        fseeko(out, 0, SEEK_SET);
        queue<char *> Q;
        // something about the MaxBacklog memory should be a good buffer size
        int maxqlen=blocksize*MaxBacklog/bytes_so_far;
        int len=0;
        uint64_t offpos=bytes_so_far, inpos=0;
        
        while(true) {
            fseeko(out, inpos, SEEK_SET);
            DEBUG(inpos);
            while(!feof(out) && Q.size() < maxqlen) {
                char *buf = new char[bytes_so_far];
                len=fread(buf, sizeof(char), bytes_so_far, out);
                if(len>0) 
                    Q.push(buf);
                inpos+=len;
            DEBUG(inpos);
            }
            fseeko(out, offpos, SEEK_SET);
            // all but the _one_
            while(Q.size() > 1) {
                if(bytes_so_far != fwrite(Q.front(), sizeof(char), bytes_so_far, out))
                    die("Cannot stretch the output file");
                offpos+=bytes_so_far;
                delete[] Q.front();
                Q.pop();
            }
            // the last one
            if(len!=bytes_so_far) {
                if(len != fwrite(Q.front(), sizeof(char), len, out))
                    die("Cannot stretch the output file, error writting the last block");
                delete[] Q.front();
                break;
            }
        } 
    }
                
    // seek back
    fseeko(out, 0, SEEK_SET);

    /* Update the head... */

    memset(head.preamble, 0, sizeof(head.preamble));
    memcpy(head.preamble, CLOOP_PREAMBLE, sizeof(CLOOP_PREAMBLE));
    head.block_size = htonl(blocksize);
    head.num_blocks = htonl(numblocks);

    /* Write out head... */

    fwrite(&head, sizeof(head), 1, out);

    if(!be_quiet) cerr << "Writing index for " << outMap.size() << " block(s)...\n";

    if(!outMap.size()) die("Compressed data lost");

    /* Write offsets, then data */

    uint64_t tmp;
    tmp = ENSURE64UINT(bytes_so_far);
    fwrite(&tmp, sizeof(tmp), 1, out);

    outMapIter it = outMap.begin();
    for(;it!=outMap.end();it++) {
        bytes_so_far += it->second->compLen;
        tmp = ENSURE64UINT(bytes_so_far);
        fwrite(&tmp, sizeof(tmp), 1, out);
    }
    if(expected_blocks != INT_MAX && expected_blocks > outMap.size()) {
        int diff=expected_blocks-outMap.size();
        cerr << "Warning, expected size > available data.\nFile will be invalid without a modification of the cloop driver.\nAdding " << diff << " dummy index offsets...\n";
        while(diff--)
            fwrite(&tmp, sizeof(tmp), 1, out);
        ret=1;
    }
    
    if(!be_quiet) cerr << "Writing compressed data...\n";
    if(use_memory) {
        for(it=outMap.begin();it!=outMap.end(); it++) {
            DEBUG("Dumping contents of " << it->second->nodeid);
            fwrite(it->second->compBuf,it->second->compLen, 1, out);
        }
    }
    else if(tempfh) {
        DEBUG("Copy back temp data");
        fseeko(tempfh, 0, SEEK_SET);
        size_t maxlen=10*MAXLEN(blocksize);
        char *buf = new char[maxlen];
        while(!feof(tempfh)) {
            unsigned int len=fread(buf, sizeof(char), 256, tempfh);
            if(len != fwrite(buf, sizeof(char), len, out))
                die("File write problem");
        }
        unlink(tempfile);
    }
    if(out) fclose(out);
    return ret;
}

// server code

#define PENDING 10     // how many pending connections queue will hold

void sigchld_handler(int s)
{
    while(wait(NULL) > 0);
}

int start_server(int port)
{
    int sockfd, new_fd;  // listen on sock_fd, new connection on new_fd
    struct sockaddr_in my_addr;    // my address information
    struct sockaddr_in their_addr; // connector's address information
    int yes=1;

    if ((sockfd = socket(AF_INET, SOCK_STREAM, 0)) == -1) {
        perror("socket");
        exit(1);
    }

    if (setsockopt(sockfd,SOL_SOCKET,SO_REUSEADDR,&yes,sizeof(int)) == -1) {
        perror("setsockopt");
        exit(1);
    }
    setsockopt(sockfd, IPPROTO_TCP, TCP_NODELAY, &yes, sizeof(yes));

    my_addr.sin_family = AF_INET;         // host byte order
    my_addr.sin_port = htons(port);     // short, network byte order
    my_addr.sin_addr.s_addr = INADDR_ANY; // automatically fill with my IP
    memset(&(my_addr.sin_zero), '\0', 8); // zero the rest of the struct

    if (bind(sockfd, (struct sockaddr *)&my_addr, sizeof(struct sockaddr))
            == -1) {
        perror("bind");
        exit(1);
    }

    if (listen(sockfd, PENDING) == -1) {
        perror("listen");
        exit(1);
    }

    struct sigaction sa;
    sa.sa_handler = sigchld_handler; // reap all dead processes
    sigemptyset(&sa.sa_mask);
    sa.sa_flags = SA_RESTART;
    if (sigaction(SIGCHLD, &sa, NULL) == -1) {
        perror("sigaction");
        exit(1);
    }

    while(1) {  // main accept() loop
        socklen_t sin_size = sizeof(struct sockaddr_in);
        new_fd = accept(sockfd, (struct sockaddr *)&their_addr, &sin_size);
        if(-1 == new_fd) {
            perror("accept");
            continue;
        }
        printf("server: got connection from %s\n",
                inet_ntoa(their_addr.sin_addr));
        if (!fork()) { // this is the child process
            close(sockfd); // child doesn't need the listener
            unsigned int limit=1048576;
            uint32_t head[2];
            int l=recv(new_fd, head, sizeof(head), MSG_WAITALL);
            if(l<sizeof(head)) { // not OK
                close(new_fd);
                exit(1);
            }
            uint32_t size=ntohl(head[0]);
            uint32_t method=ntohl(head[1]);
            if( !head[0] || head[0]>limit) {
                cerr << "Bad blocksize\n";
                close(new_fd);
                exit(1);
            }
            // nodeid is irrelevant, use the node as permanent buffer
            compressItem item(0, size);
            while(true) {
                char * ptr = item.uncompBuf;
                int rest = size;
                while(rest>0) {
                    l=recv(new_fd, ptr, rest, MSG_WAITALL | MSG_NOSIGNAL);
                    if(l<1) return false;
                    ptr+=l;
                    rest-=l;
                }
                uint32_t rhead[2];
                if(item.doLocalCompression(method))
                    rhead[0]=htonl(item.compLen);
                else { 
                    close(new_fd);
                    exit(0);
                }
                rhead[1]=htonl(item.best);
                if (send(new_fd, rhead, sizeof(rhead), 0) == -1)
                    perror("Unable to return data");
                DEBUG("Sende: " << item.compLen << " bytes");
                if (send(new_fd, item.compBuf, item.compLen, 0) == -1) {
                    perror("Unable to return data");
                    exit(1);
                }
            }
            close(new_fd);
            exit(0);
        }
        close(new_fd);  // parent doesn't need this
    }

    exit(0);
}

int setup_connection(char *hostname)
{
    int port;
    char *szPort=strchr(hostname, ':');
    if(szPort) {
        *szPort++ = 0x0;
        port=getsize(szPort);
    }
    else port=defport;
    
    struct sockaddr_in sa ;
    struct hostent *hp ;
    int s=-1;
    int32_t addr ;

    memset(&sa, 0, sizeof(sa)) ;
    if ((addr = inet_addr(hostname)) != -1) {
        /* is Internet addr in octet notation */
        memcpy(&sa.sin_addr, &addr, sizeof(addr)) ; /* set address */
        sa.sin_family = AF_INET ;
    } else {
        if ((hp = gethostbyname(hostname)) == NULL)
            return -2 ;
        hostname = strdup(hp->h_name) ;
        memcpy(&sa.sin_addr, hp->h_addr, hp->h_length) ;
        sa.sin_family = hp->h_addrtype ;
    }
    sa.sin_port = htons((u_short) port) ;
    DEBUG("s1");
    if ((s = socket(sa.sin_family, SOCK_STREAM, 0)) < 0)
        return -1 ;
    DEBUG("s2");

    int yes=1;
    setsockopt(s, IPPROTO_TCP, TCP_NODELAY, &yes, sizeof(yes));

    if (connect(s, (struct sockaddr *) &sa, sizeof(sa)) < 0) { 
        close(s) ;
        return -1 ;
    }
    DEBUG("s3:"<<s);
    // init the compression parameters
    uint32_t head[2];
    head[0] = htonl(blocksize);
    head[1] = htonl(method);
    if(send(s, head, sizeof(head), MSG_NOSIGNAL) == -1)
	    return -1;
    return s ;
}

FILE * split_fopen( char *path, char *mode) {
    if(chunk_size) {
        string fname=string(path)+".a";
        realname=strdup(fname.c_str());
        out_mode=mode;
        return (realfh=fopen(realname, mode));
    }
    else
        return fopen(path, mode);
}

// cludge. Limited to one file handle (managed separeated, not using stream
// arg) and size==1
int split_fwrite(void *ptr, size_t size, size_t nmemb, FILE *stream) {
    char *src = (char *) ptr;
    if(chunk_size) {
        off_t curpos = ftello(realfh);
        if( (curpos+size*nmemb) > chunk_size) {
            off_t lenA=chunk_size-curpos;
            fwrite(src, 1, lenA, realfh);
            realname[strlen(realname)-1]++;
            fflush(realfh);
            realfh=fopen(realname, out_mode);
            if(!realfh) die("Problems creating "<< realname);
            fwrite(src+lenA, 1, nmemb-lenA, realfh);
        }
        else
            return fwrite(ptr, size, nmemb, realfh);
    }
    else
        return fwrite(ptr, size, nmemb, stream);
}
