/* qf - quickly find strings in files */

#include <assert.h>
#include <fcntl.h>
#include <limits.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <unistd.h>

#define MAX(a,b) ((a)>(b)?(a):(b))
#define CHUNKSIZE (8192)

int main (int argc, char **argv) {
    unsigned char *haystack, *needle, *chunk1, *chunk2;
    int i, j, fd, stdin_done;
    size_t occ[UCHAR_MAX+1];
    size_t hlen, nlen, rlen, hpos, npos;
    unsigned long long fpos, ccnt;

    if (argc < 3) {
        printf ("Usage: %s search files...\n", argv[0]);   
        exit (2);
    }
    needle = (unsigned char*) argv[1];
    nlen = strlen ((char*) needle);

    assert (2*nlen < CHUNKSIZE);

    haystack = chunk1 = calloc (CHUNKSIZE, 2);
    if (haystack == NULL) exit (2);
    chunk2 = chunk1 + CHUNKSIZE;

    /* preprocess needle */
    for (i=0; i<UCHAR_MAX+1; ++i) occ[i] = nlen;
    for (i=0, j=nlen; i<nlen; ++i) occ[needle[i]] = --j;

    /* loop over input files */
    stdin_done = 0;
    for (i=2; i<argc; ++i) {
        /* only search stdin once, no matter how many times - is specified */
        if (argv[i][0] == '-' && argv[i][1] == '\0' && !stdin_done) {
            fd = STDIN_FILENO;
            stdin_done = 1;
        }
        else {
            fd = open (argv[i], O_RDONLY, 0);
            if (fd == -1) {
                perror (argv[i]);
                continue;
            }
        }

        /* stream the file through the chunk1/chunk2 buffers */
        rlen = read (fd, chunk1, 2*CHUNKSIZE);
        hlen = rlen;
        hpos = nlen - 1;
        fpos = 0;
        ccnt = 0;
        while (!fpos && hpos < hlen) {
            if (hpos > CHUNKSIZE + nlen) {
                /* we're well inside chunk2 now, so flip the buffers */
                memcpy (chunk1, chunk2, CHUNKSIZE);
                memset (chunk2, 0, CHUNKSIZE);
                hpos -= CHUNKSIZE;
                hlen -= CHUNKSIZE;
                ++ ccnt;
                /* read the next chunk */
                rlen = read (fd, chunk2, CHUNKSIZE); 
                if (rlen < 0) {
                    perror ("read");
                    fpos = -1;
                    continue;
                }
                hlen += rlen;
            }
            /* searching the current chunk of haystack */
            npos = nlen - 1;
            while (haystack[hpos] == needle[npos]) {
                if (npos == 0)  {
                    fpos = ccnt * CHUNKSIZE + hpos;
                    printf ("%s:%llu\n", argv[i], fpos);
                    break;
                }
                -- hpos;
                -- npos;
            }
            hpos += MAX (nlen - npos, occ[haystack[hpos]]);
        }
        close (fd);
    }
    free (haystack);
    return 0;
}

