File: fix2.c - Tab length: 1 2 4 8 - Lines: on off - No wrap: on off


#include <memory.h>
#include <stdio.h>
#include <stdlib.h>

#define BUFFER_CAPACITY     1024 * 1024 * 25

#define INDICES_EXPECTED    32
#define INDICES_MAXIMUM     40
#define INDICES_WRITE       16

#define PRINT_FREQUENCY     16

int     main (int argc, char* argv[])
{
    unsigned char*  bufferInput;
    unsigned char*  bufferLast;
    unsigned char*  bufferPrev;
    unsigned long   bufferSize;
    unsigned char*  bufferSwap;
    unsigned long   indicesArray[INDICES_MAXIMUM + 1];
    unsigned long   indicesCount;
    unsigned long   indicesWrite[INDICES_WRITE] = {0, 1, 14, 7, 8, 16, 10, 12, 13, 11, 9, 25, 26, 27, 17, 18};
    FILE*           streamWriter;
    FILE*           streamReader;
    unsigned long   block;
    unsigned long   dupes;
    unsigned long   lines;
    unsigned long   size;
    unsigned long   i;
    unsigned long   j;

    if (argc < 3)
    {
        printf ("usage: %s <input> <output>\n", argv[0]);

        return 0;
    }

    streamReader = fopen (argv[1], "rb");

    if (!streamReader)
    {
        fprintf (stderr, "cannot open input file \"%s\" for reading\n", argv[1]);

        return 1;
    }

    streamWriter = fopen (argv[2], "wb");

    if (!streamWriter)
    {
        fprintf (stderr, "cannot open output file \"%s\" for writing\n", argv[2]);
        fclose (streamReader);

        return 1;
    }

    bufferInput = malloc (sizeof (*bufferInput) * BUFFER_CAPACITY * 3);
    bufferLast = bufferInput + BUFFER_CAPACITY;
    bufferPrev = bufferLast + BUFFER_CAPACITY;
    bufferSize = 0;
    indicesArray[0] = 0;
    indicesCount = 1;
    dupes = 0;
    lines = 0;

    printf ("starting process...\n");

    for (i = 0; i < bufferSize || !feof (streamReader); ++i)
    {
        // End of buffer reached: content must be shifted to the left before
        // buffer is populated with incoming data from input stream
        if (i == bufferSize)
        {
            i = bufferSize - indicesArray[0];

            memmove (bufferInput, bufferInput + indicesArray[0], i * sizeof (*bufferInput));

            bufferSize = i + fread (bufferInput + i, sizeof (*bufferInput), BUFFER_CAPACITY - i, streamReader);

            for (j = indicesCount; j--; )
                indicesArray[j] -= indicesArray[0];

            if (i >= bufferSize)
                break;
        }

        // End of item found: save starting index of the next one
        if (bufferInput[i] == '|')
        {
            if (indicesCount < INDICES_MAXIMUM)
                indicesArray[indicesCount++] = i + 1;
        }

        // End of line found: write required indices and flush list
        else if (bufferInput[i] < ' ')
        {
            if (indicesCount == INDICES_EXPECTED)
            {
                indicesArray[indicesCount] = i + 1;
                size = 0;

                for (j = 0; j + 1 < INDICES_WRITE; ++j)
                {
                    block = indicesArray[indicesWrite[j] + 1] - indicesArray[indicesWrite[j]] - 1;

                    memcpy (bufferLast + size, bufferInput + indicesArray[indicesWrite[j]], block * sizeof (*bufferInput));
                    size += block;

                    bufferLast[size++] = '|';
                }

                if (j < INDICES_WRITE)
                {
                    block = indicesArray[indicesWrite[j] + 1] - indicesArray[indicesWrite[j]] - 1;

                    memcpy (bufferLast + size, bufferInput + indicesArray[indicesWrite[j]], block * sizeof (*bufferInput));
                    size += block;

                    bufferLast[size++] = 0;
                }

                if (size > 0)
                {
                    if (memcmp (bufferLast, bufferPrev, size * sizeof (*bufferLast)) != 0)
                    {
                        if ((++lines & (((unsigned)1 << PRINT_FREQUENCY) - 1)) == 0)
                            printf ("writing line %lu (%lu duplicate(s))...\n", lines, dupes);

                        fwrite (bufferLast, sizeof (*bufferLast), size - 1, streamWriter);

                        bufferSwap = bufferLast;
                        bufferLast = bufferPrev;
                        bufferPrev = bufferSwap;
                    }
                    else
                        ++dupes;
                }
            }

            fwrite (bufferInput + i, sizeof (*bufferInput), 1, streamWriter);

            indicesArray[0] = i + 1;
            indicesCount = 1;
        }
    }

    if ((lines & (((unsigned)1 << PRINT_FREQUENCY) - 1)) != 0)
        printf ("writing line %lu (%lu duplicate(s))...\n", lines, dupes);

    printf ("done.\n");

    fclose (streamWriter);
    fclose (streamReader);
    free (bufferInput);

    return 0;
}