// 4-Tap Lanczos-2 filter for picture scaling with nify error distribution.
// Written by Nils Liaaen Corneliusen 2013.
// License: CC0 1.0 Universal (CC0 1.0) Public Domain Dedication license
// Read the 2023 article here: https://www.ignorantus.com/pages/neon_scaler/
// Read the 2018 article here: https://www.ignorantus.com/pages/image_transformation/

#include <stdio.h>
#include <stdlib.h>
#include <stdint.h>
#include <math.h>

#include "coeffs.h"

// Convert float->int and make sure they sum to COEFFS_SUM.
static void convert( float *vf, int *vi )
{
    vi[0] = lroundf( vf[0]*COEFFS_SUM );
    vi[1] = lroundf( vf[1]*COEFFS_SUM );
    vi[2] = lroundf( vf[2]*COEFFS_SUM );
    vi[3] = lroundf( vf[3]*COEFFS_SUM );

    int error = (vi[0]+vi[1]+vi[2]+vi[3])-COEFFS_SUM;

//    printf( "%9.6f,%9.6f,%9.6f,%9.6f", vf[0], vf[1], vf[2], vf[3] );
//    printf( " %02d %02d %02d %02d ", vi[0], vi[1], vi[2], vi[3] );
//    printf( ": %2d : ", error );

    int dir = error > 0 ? -1 : 1;

    while( error ) {

        int pos = 0;
        float low_err = 9999.0f;

        for( int i = 0; i < 4; i++ ) {

            float pos_err  = fabs( vf[i]*(float)COEFFS_SUM - (vi[i] + dir) );

            if( pos_err < low_err ) {
                low_err = pos_err;
                pos = i;
            }

        }

        vi[pos] += dir;
        error   += dir;

    }

//    printf( "%02d %02d %02d %02d", vi[0], vi[1], vi[2], vi[3] );
//    printf( "\n" );

}

// Lanczos-2 filter.

static float sinc( float x )
{
    if( x == 0.0f ) return 1.0f;

    return sinf(M_PI*x)/(M_PI*x);
}

static float L( float x )
{
    return sinc(x)*sinc(x/2.0f);
}


void coeffs_get( float fac, uint32_t *co )
{
    if( fac > 1.0f ) fac = 1.0f;

    float lx = -2.0f*fac;

    for( int i = 0; i < COEFFS_PHASES; i++ ) {

        float phase = i * fac / COEFFS_PHASES;

        float vf[4];
        vf[0] = L( lx   + phase       );
        vf[1] = L( lx   + phase + fac );
        vf[2] = L( 0.0f - phase       );
        vf[3] = L( 0.0f - phase - fac );

        float sum = vf[0]+vf[1]+vf[2]+vf[3];
        vf[0] /= sum;
        vf[1] /= sum;
        vf[2] /= sum;
        vf[3] /= sum;

        int vi[4];
        convert( vf, vi );

        // Stuff values in table
        co[i] = (vi[0]<<24) | ((vi[1]&0xff)<<16) | ((vi[2]&0xff)<<8) | (vi[3]&0xff);

    }

}
