#include <stdio.h>
#include <stdint.h>
#include <stdlib.h>
#include <stdbool.h>
#include <string.h>
#include "hopcroft.h"
#include "cdaig.h"


/*Function to get a Istring.
*   Param:
*       · pos: initial position
*       · is_first: if it is the first icharacter on the istring with this length
*       · n_Icharacter: number of Icharacters on the string
*       · length: length of the first icharacter
*       · data: data file
*       · n: number of rows of the file
*       · m: number of columns of the file
*
*   Returns: An Istring starting at position pos of n_Icharacters length and with the first element of length symbols.
*/
char* get_Istring(int pos, bool is_first, int n_Icharacter, int length, FILE *data, int n, int m){
    char *istring, *icharacter, *tmp;
    int i, position = pos, first = is_first, ilength = length;
    size_t new_len;

    /* Allocate an empty string */
    istring = malloc(1);
    istring[0] = '\0';

    for(i = 0; i < n_Icharacter; i++){
        if(i != 0){
            /* Update first and ilength */
            first = !first;
            if(first) {
                ilength += 1;
            }
            /* Update position */
            if(!first){ /* Horizontal */
                position += ilength * m - (ilength - 1);
            } else { /* Vertical */
                position += ilength - 1 - (ilength - 1) * m;
            }
        }

        /* Get next Icharacter */
        icharacter = get_Icharacter(position, first, ilength, data, n, m);

        /* Add space before next Icharacter (except first one) */
        new_len = strlen(istring) + strlen(icharacter) + (i == 0 ? 0 : 1) + 1; /* +1 for a space, +1 for '\0'*/
        tmp = realloc(istring, new_len);
        if (!tmp) {
            free(istring);
            free(icharacter);
            exit(EXIT_FAILURE);
        }
        istring = tmp;

        if (i == 0) {
            strcat(istring, icharacter);
        } else {
            strcat(istring, " ");
            strcat(istring, icharacter);
        }

        free(icharacter);
    }

    return istring;
}

/*Function that gets an Icharacter from the data file.
*   Param:
*       · pos: starting position of the Icharacter
*       · is_first: indicates if it is the first Icharacter of the two Icharacters with the length length
*       · length: number of symbols of the Icharacter
*       · data: data file
*       · n: number of rows
*       · m: number of columns
*
*   Returns: The Icharacter desired
*
*/
char* get_Icharacter(int pos, bool is_first, int length, FILE* data, int n, int m){
    char* ichar;
    int k, i = pos / m, j = pos % m, check;
    int read_count;
    ichar = malloc((length + 1)*sizeof(char));
    if(!ichar){
        printf("Error allocating memory for the ichar element\n");
        exit(EXIT_FAILURE);
    }
    if(is_first == false){ /*Horizontal Icharacter*/
        /*Is it final?*/
        if(pos >=  n*m){
            free(ichar);
            ichar = (char*)malloc(2 * sizeof(char));
            if(!ichar){
                printf("Error allocating memory\n");
                exit(EXIT_FAILURE);
            }
            ichar[0] = '$';
            ichar[1] = '\0';
            return ichar; 
        }
        /*Length check*/
        check = j + length;
        if (check / m != 0 && check % m > 0){
            printf("Wrong length (i)\n");
            exit(EXIT_FAILURE);
        }
        /*Move the pointer*/
        fseek(data, pos, SEEK_SET);
         /* Read and store the Icharacter */
        size_t read_count = fread(ichar, sizeof(char), length, data);
        if (read_count != length) {
            printf("Error reading data file\n");
            free(ichar);
            exit(EXIT_FAILURE);
        }

        ichar[length] = '\0';
    }else{  /*Vertical Icharacter*/
        /*Length check*/
        check = i + length;
        if(check / n != 0 && check % n > 0){
            printf("Wrong length (j)\n");
            exit(EXIT_FAILURE);
        }
        for(k=0; k < length; k++){
            /* Move the pointer */
            fseek(data, pos + k * m, SEEK_SET);

            /* Store the Icharacter */

            read_count = fread(&ichar[k], sizeof(char), 1, data);
            if (read_count != 1) {
                printf("Error reading data file\n");
                free(ichar);
                exit(EXIT_FAILURE);
            }
        }
        ichar[length] = '\0';
    }
    return ichar;
}


/*Function to compare Istrings Icharacter by Icharacter.  CHECKED
*
*   Param:
*       · seq1: One of the sequences e want to compare
*       · seq2: The other sequence we want to compare
*
*   Returns:
*       · 0 if there is nothing in common
*       · -1 if the two sequences are equal
*       · != 0 if the two sequences differ. The value is the number of symbols in the common prefix of the sequences*/
int32_t istrcmp(char *seq1, char *seq2){
    int32_t prefix = 0, i1 = 0, i2 = 0, n_ichar1, n_ichar2;
    char* ichar1, *ichar2;
    int comp;
    size_t len1 = strlen(seq1), len2 = strlen(seq2), j1 = 0, j2 = 0, i;

    /*Loop menwhile there is still sequence*/
    while(j1 < len1 && j2 < len2){
        /*Search for the first space in both sequences*/
        for(i=j1; i<len1; i++){
            if(seq1[i]==' '){
                break;
            }
        }
        i1 = j1;
        j1 = i;

        for(i=j2; i<len2; i++){
            if(seq2[i]==' '){
                break;
            }
        }
        i2 = j2;
        j2 = i;
        /*Count the number of elements on the Icharacter*/
        n_ichar1 = j1 - i1;
        n_ichar2 = j2 - i2;
        /*If it is different return prefix*/
        if(n_ichar1 != n_ichar2){
            return prefix;
        }
        /*Copy the icharacter onto a variable*/
        ichar1 = malloc((n_ichar1 + 1) * sizeof(char));
        ichar2 = malloc((n_ichar2 + 1) * sizeof(char));
        if (!ichar1 || !ichar2) {
            printf("Memory allocation error\n");
            exit(EXIT_FAILURE);
        }

        strncpy(ichar1, seq1 + i1, n_ichar1);
        ichar1[n_ichar1] = '\0';
        strncpy(ichar2, seq2 + i2, n_ichar2);
        ichar2[n_ichar2] = '\0';

        /*Compare symbol by symbol of the Icharacter*/

        comp = strcmp(ichar1, ichar2);

        free(ichar1);
        free(ichar2);

        /*If there is one diferent return prefix*/
        if(comp != 0){
            return prefix;
        }
        /*If all the symbols are equal, the Icharacters are equal*/
        /*Add the number of symbols to the prefix variable*/
        prefix += n_ichar1;

        /*If there is still sequence and both have a space, then we count the space*/
        if(j1 < len1 && j2 < len2 && seq1[j1] == ' ' && seq2[j2] == ' '){
            prefix++;  /*Counting the space*/
            j1++;
            j2++;
        } else {
            break;  /*One of the sequences is over*/
        }
    }
    if(len1 == len2){
        return -1;
    }
    else{
        return prefix;
    }
}

/* Finds an edge from node q that matches the character at position i in the file.
 *
 *  cdaig Pointer to the cdaig nodes array.
 *  q Current node index.
 *  isuffix Isuffix we want to add to the cdaig
 *  data File pointer to the text data.
 *  n Number of Rows
 *  m Number of Columns
 *
 *  Returns: index of the matching edge or -1 if no match is found.
 */
int32_t find_edge(struct node* cdaig, uint32_t q, char* isuffix, FILE* data, int n, int m) {

    uint32_t j, i, first=0;
    char *ichar_og, *cmp;

    /* Get the Icharacter of the Isuffix*/
    for(i=first; i<strlen(isuffix); i++){
        if(isuffix[i]==' '){
            break;
        }
    }
    ichar_og = malloc((i + 1) * sizeof(char));
    strncpy(ichar_og, isuffix, i);
    ichar_og[i] = '\0';

    /* Iterate through the edges of node q */
    for (j = 0; j < cdaig[q].n_edges; j++) {
        /** Get the first Icharacter of the edge */
        cmp = get_Icharacter(cdaig[q].edges[j].beginpos, cdaig[q].edges[j].is_first, cdaig[q].edges[j].length, data, n, m);

        /* Return the edge index if characters match */
        if (istrcmp(ichar_og, cmp) == -1) {
            free(cmp);
            free(ichar_og);
            return j;
        }
        free(cmp);
    }
    free(ichar_og);
    return -1;
}

/* Implements the SlowFind function to locate the longest matching prefix in the CDAIG. 
 *
 *  p: Index of the starting node.
 *  data: File pointer to the input text data.
 *  e: File position from which to start matching.
 *  cdaig: Pointer to the array of CDAIG nodes.
 *  n: Number of rows.
 *  m: Number of columns.
 *
 * Returns: An edge_partition struct containing the resulting node, the index of the edge, and all the information required to perform a partition on the edge.
 */

struct edge_partition slow_find(uint32_t p, FILE* data, uint32_t e, struct node* cdaig, int n, int m) {
    struct edge_partition result;
    int k, i2, j2 = 0, n_ichar1, n_ichar2, end, start, bottom_bound, right_bound;
    int32_t rho, cmp;
    uint32_t ilength, fe_counter = 0, last_last_node = 1, last_node = 1, last_last_edge, last_edge;
    result.q = p;
    result.gamma = 0;
    result.ilength = 0;
    result.i = 0;
    result.pos = e;
    result.first = true;
    result.length = 1;
    result.path = 0;
    char *isuffix, *iedge, *head, *tail;
    size_t head_len, tail_len, i;

    /* Computation of the number of Icharacters*/
    
    bottom_bound = n - (e / m);
    right_bound = m - (e % m);

    if(right_bound < bottom_bound){
        k = right_bound;
    }else{
        k = bottom_bound;
    }
    
    ilength = 2 * k -1;

    /* Build the suffix */
    isuffix = get_Istring(e, true, ilength, 1, data, n, m);

    result.ilength = ilength;
    /* Find the first matching edge from node p */
    rho = find_edge(cdaig, p, isuffix, data, n, m);
    fe_counter ++;
    last_last_edge = rho;
    last_edge = rho;
    if(rho != -1){
        result.q = p;
        result.gamma = 0;
        result.ilength = 0;
        result.i = 0;
        result.pos = 0;
        result.first = true;
        result.length = 1;
    }
    while (rho != -1) {
        last_edge = result.i;
        result.i = rho;
        i = 1;
        
        /* Get the label */
        iedge = get_Istring(cdaig[result.q].edges[rho].beginpos, cdaig[result.q].edges[rho].is_first, cdaig[result.q].edges[rho].ilength, cdaig[result.q].edges[rho].length, data, n, m);
        /* Compare the two string */
        cmp = istrcmp(isuffix, iedge);

        
        if(cmp == strlen(iedge) || cmp == -1){ /* A node have been arrived */
            /* Compute path */
            if(rho != -1 && fe_counter > 2){
                bool first = cdaig[last_last_node].edges[last_last_edge].is_first;
                uint32_t j;
                for(j=0; j<cdaig[last_last_node].edges[last_last_edge].ilength; j++){
                    first = !first;
                    if(first == false){
                        result.path ++;
                    }
                }
            }
            /* Update last_node variables */
            if(fe_counter > 2){
                last_last_node = last_node;
                last_last_edge = last_edge;
            }
            last_node = result.q;
            /* Update result */
            result.q = cdaig[result.q].edges[rho].dest;

            
            /* Get the last two icharacters of the head */
            head_len = strlen(iedge);
            tail_len = strlen(isuffix) - head_len;

            head = malloc((head_len + 1)*sizeof(char));
            tail = malloc((tail_len + 1)*sizeof(char));
            
            strncpy(head, isuffix, head_len);
            head[head_len] = '\0';
            
            strncpy(tail, isuffix + head_len, tail_len);
            tail[tail_len] = '\0';
            
            /* Erasing the first space if there is one */
            if (tail[0] == ' ') {
                size_t len = strlen(tail);
                memmove(tail, tail + 1, len);
                char *tmp = realloc(tail, len);
                if (tmp == NULL) {
                    fprintf(stderr, "Error: realloc failed\n");
                    free(tail);
                    exit(1);
                }
                tail = tmp;
            }
            

            /* For loop to count icharacters */
            tail_len = strlen(tail);
            result.ilength = 0;
            if(tail_len > 1){
                for(i = 0; i < tail_len; i++){
                    if((tail[i+1] == ' ' || tail[i+1] == '\0' ) && tail[i] != ' '){
                        result.ilength ++;
                    }
                }
            }
            
            /* Compute result.ilength and result.gamma */
            if(cmp == -1){
                result.first = true;
                result.gamma = 0;
                result.pos = 0;
                result.i = 0;
            }

            /* Get the last Icharacter of the head */
            end = strlen(head) - 1;
            while(end >=0 && head[end] == ' ') end--;
            start = end;
            while(start >= 0 && head[start] != ' ') start--;

            j2 = 0;
            /* Get the first Icharacter of the tail */
            if(tail[0] == ' ') j2++;
            for(i=j2; i<strlen(tail); i++){
                if(tail[i]==' '){
                    break;
                }
            }
            i2 = j2;
            j2 = i;
            /* Count the number of elements on the Icharacter */
            n_ichar1 = end - start;
            n_ichar2 = j2 - i2;

            if(n_ichar1 == n_ichar2){
                result.first = false;
            }else{
                result.first = true;
            }

            /* isuffix = tail */
            free(isuffix);
            isuffix = tail;
            tail = NULL;
            free (head);

            /* Update length */
            result.length = n_ichar2;

            /* Update position */
            if(result.first == true){
                result.pos = e + result.length - 1;
            }else{
                result.pos = e + result.length * m;
            }                  
            free(iedge);
            rho = find_edge(cdaig, result.q, isuffix, data, n, m);
            fe_counter ++;
        }else{  /* The head ends at the middle of an edge */
            result.ilength = 0;
            /* Compute path */
            if(fe_counter > 1){
                bool first = cdaig[last_last_node].edges[last_last_edge].is_first;
                uint32_t j;
                for(j=0; j<cdaig[last_last_node].edges[last_last_edge].ilength; j++){
                    first = !first;
                    if(first == false){
                        result.path ++;
                    }
                }
            }
            /* Get if it is the first or not */
            /* Get the last two icharacters of the head */
            j2=0;
            head = malloc((cmp + 1) * sizeof(char));
            strncpy(head, isuffix, cmp);
            head[cmp] = '\0';
            head_len = strlen(head);
            tail_len = strlen(isuffix) - cmp;
            tail = malloc((tail_len + 1) * sizeof(char));
            strncpy(tail, isuffix + cmp, strlen(isuffix) - cmp);
            tail[tail_len] = '\0';

            /* Erasing the first space if there is one */
            if (tail[0] == ' ') {
                size_t len = strlen(tail);
                memmove(tail, tail + 1, len);
                char *tmp = realloc(tail, len);
                if (tmp == NULL) {
                    fprintf(stderr, "Error: realloc failed\n");
                    free(tail);
                    exit(1);
                }
                tail = tmp;
                tail_len = len;
            }
            
            /* Get the last Icharacter of the head */
            end = strlen(head);
            while(end >=0 && head[end] == ' ') end--;
            start = end;
            while(start >= 0 && head[start] != ' ') start--;
            
            /* Get the first Icharacter of the tail */
            for(i=j2; i<strlen(tail); i++){
                if(tail[i]==' '){
                    break;
                }
            }
            i2 = j2;
            j2 = i;
            /* Count the number of elements on the Icharacter */
            n_ichar1 = end - start;
            n_ichar2 = j2 - i2;
            if(n_ichar1 == n_ichar2){
                result.first = false;
            }else{
                result.first = true;
            }
            /* Get the length of the first icharacter of the tail */
            result.length = n_ichar2;

            for(i = 0; i < head_len ; i++){
                if(head[i+1] == ' ' && head[i] != ' ' ){
                    result.gamma ++;
                }else{
                    if(head[i+1] == '\0' && head[i] != ' '){
                        result.gamma ++;
                    }
                }
            }
            if(tail_len > 1){
                for(i = 0; i < tail_len; i++){
                    if((tail[i+1] == ' ' || tail[i+1] == '\0' ) && tail[i] != ' '){
                        result.ilength ++;
                    }
                }
            }
            
            /* Update result */
            result.i = rho;
            
            /* Update position */
            if(result.first == true){
                result.pos = e + result.length - 1;
            }else{
                result.pos = e + result.length * m;
            }
            if(cmp == strlen(isuffix)){
                result.pos = n * m;
            }

            /* Make the necessary frees */
            free(head);
            free(tail);
            free(iedge);
            return result;
        }
        
    }
    free(isuffix);
    return result;
}

/*Function that adds a node to an existing DFA
 *  Param: 
 *      · cdaig: DFA
 *      · v: Number of nodes
 * 
 *  Does not return nothing, but changes the pointer cdaig to a new memory space
 */
void add_node(struct node** cdaig, uint32_t* v) {
    struct node* aux = (struct node*)calloc(*v + 1, sizeof(struct node));
    if (aux == NULL) {
        printf("No memory allocated\n");
        exit(1);
    }

    for (uint32_t j = 0; j < *v + 1; ++j) {
        aux[j].edges = NULL;
        aux[j].s = 0;
        aux[j].n_edges = 0;
    }

    for (uint32_t j = 0; j < *v; j++) {
        aux[j].n_edges = (*cdaig)[j].n_edges;
        aux[j].s = (*cdaig)[j].s;
        aux[j].edges = calloc(aux[j].n_edges, sizeof(struct edge));
        for (uint32_t k = 0; k < aux[j].n_edges; k++) {
            aux[j].edges[k].beginpos = (*cdaig)[j].edges[k].beginpos;
            aux[j].edges[k].length = (*cdaig)[j].edges[k].length;
            aux[j].edges[k].dest = (*cdaig)[j].edges[k].dest;
            aux[j].edges[k].is_first = (*cdaig)[j].edges[k].is_first;
            aux[j].edges[k].ilength = (*cdaig)[j].edges[k].ilength;
        }
        free((*cdaig)[j].edges);
    }
    free(*cdaig);
    *cdaig = aux;
    (*v)++;
}

/*Function that adds an edge to an existing node
 * Param:
 *      · cdaig: DFA
 *      · e: the output of slow_find
 *      · dest: destination node of the new edge
 * 
 *  The function does not return anything. Instead, it updates the pointer cdaig[src]->edges
 */
void add_edge(struct node* cdaig, struct edge_partition e, uint32_t dest) {

    struct edge* new_edges = (struct edge*)calloc(cdaig[e.q].n_edges + 1, sizeof(struct edge));
    if (new_edges == NULL) {
        printf("No memory allocated\n");
        exit(1);
    }

    for (uint32_t i = 0; i < cdaig[e.q].n_edges; ++i) {
        new_edges[i] = cdaig[e.q].edges[i];
    }

    new_edges[cdaig[e.q].n_edges].beginpos = e.pos;
    new_edges[cdaig[e.q].n_edges].dest = dest;
    new_edges[cdaig[e.q].n_edges].ilength = e.ilength;
    new_edges[cdaig[e.q].n_edges].is_first = e.first;
    new_edges[cdaig[e.q].n_edges].length = e.length;

    free(cdaig[e.q].edges);
    cdaig[e.q].edges = new_edges;
    cdaig[e.q].n_edges++;
}

/* Generates a Compact Direct Acyclic Image Graph (cdaig)
*   Param:
*       ·cdaig: Pointer to the machine
*       ·data: data file containing the source
* 
*   Returns:
*       ·Number of nodes
*       ·cdaig given by parameter
* 
*/
uint32_t cdaig_generation(struct node** cdaig, FILE* data, int n, int m, int* suff) {
    uint32_t p, v, j;
    struct edge_partition sf_edge, edge_tail;
    int i;


    p = 1;
    i = 0;
    v = 2;

    *cdaig = (struct node*)calloc(v, sizeof(struct node));
    if (*cdaig == NULL) {
        printf("No memory allocated\n");
        exit(1);
    }
    for (j = 0; j < v; ++j) {
        (*cdaig)[j].edges = NULL;
        (*cdaig)[j].s = 0;
        (*cdaig)[j].n_edges = 0;
    }

    (*cdaig)[0].n_edges = 0;
    (*cdaig)[1].n_edges = 0;
    (*cdaig)[1].s = 1;
    (*cdaig)[0].s = 1;
    
    
    while (i < n*m) {
        /*printf("i=%d\n", i);
        if (i % 1000 == 0) {
            printf("I'm on suffix %d\n", i);
        }*/
        
        sf_edge = slow_find(p, data, i, *cdaig, n, m);
        
        if (sf_edge.q != 0) {
            (*suff) ++;

            
            if (sf_edge.gamma == 0) {

                add_edge(*cdaig, sf_edge, 0);
                
                
            }
            else {   /*The locus is in between the edge*/
                /*Make a struct edge_partition variable with the information from the tail of the edge we are going to partition (edge tail)*/

            
                edge_tail.ilength = (*cdaig)[sf_edge.q].edges[sf_edge.i].ilength - sf_edge.gamma;
                edge_tail.q = v;
                edge_tail.i = sf_edge.i;
                edge_tail.gamma = sf_edge.gamma;
                edge_tail.length = sf_edge.length;
                edge_tail.length = (*cdaig)[sf_edge.q].edges[sf_edge.i].length;
                bool is_first = (*cdaig)[sf_edge.q].edges[sf_edge.i].is_first;
                for(j = 0; j < sf_edge.gamma; j++){
                    is_first = !is_first;
                    if(is_first){
                        edge_tail.length ++;
                    }
                }
                edge_tail.first = is_first;

                if(edge_tail.first == true){
                    edge_tail.pos = (*cdaig)[sf_edge.q].edges[sf_edge.i].beginpos + edge_tail.length - 1 - sf_edge.path * m;
                }else{
                    edge_tail.pos = (*cdaig)[sf_edge.q].edges[sf_edge.i].beginpos + edge_tail.length * m;
                }
                
                
                add_node(cdaig, &v);
                uint32_t old_sf_node = sf_edge.q;
                sf_edge.q = v - 1;
                
                add_edge(*cdaig, sf_edge, 0);
                (*cdaig)[sf_edge.q].edges[(*cdaig)[sf_edge.q].n_edges - 1].ilength = sf_edge.ilength;
                (*cdaig)[sf_edge.q].edges[(*cdaig)[sf_edge.q].n_edges - 1].is_first = sf_edge.first;
                
                
                add_edge(*cdaig, edge_tail, (*cdaig)[old_sf_node].edges[sf_edge.i].dest);
                
                (*cdaig)[edge_tail.q].edges[(*cdaig)[edge_tail.q].n_edges - 1].ilength = edge_tail.ilength;
                (*cdaig)[edge_tail.q].edges[(*cdaig)[edge_tail.q].n_edges - 1].is_first = edge_tail.first;

                (*cdaig)[old_sf_node].edges[sf_edge.i].dest = v - 1;
                (*cdaig)[old_sf_node].edges[sf_edge.i].ilength = sf_edge.gamma;
                
                
                
                
                
            }
        }

        p = 1;
        i++;
    }
    
    
    /*Node comparison and minimization*/

    hopcroft(cdaig, &v, data, n, m);



    return v;
}