#include <stdio.h>
#include <stdint.h>
#include <stdlib.h>
#include <stdbool.h>
#include <string.h>
#include "hopcroft.h"
#include "cdaig.h"


/* Function that tries to break a set S into two sets of "distinguishable" states
*   Param:
*        cdaig: The DFA
*        v: number of nodes of cdaig
*        s: set we want to partition
*        p: the set of all partitions
*        np: number of sets in p
*        data: data file containing the original string
*        n: number of rows
*        m: number of columns
*
*   Returns:
*       The set with the partition made (if needed)
*/
struct partition_vect* split(struct node* cdaig, struct partition s, struct partition* p, uint32_t np, FILE* data, int n, int m) {
    uint32_t index, count_t = 0, count_f = 0, hits = 0;
    uint32_t i = 0, j, k, read, ipos, jpos, ni, nj, length;
    char* ibuffer;
    char* jbuffer;
    int* vect;  /* binary vector */
    struct partition s_new;
    struct partition* aux;

    /* Allocate memory for the result vector */
    struct partition_vect* results = calloc(1, sizeof(struct partition_vect));
    results->n_result = 1;
    results->result = calloc(1, sizeof(struct partition));
    if (!results->result) {
        printf("Error in the allocation of memory for the result (split function)\n");
        return results;
    }

    /* Copy S into S_new */
    s_new.n_elem = s.n_elem;
    s_new.elem = calloc(s_new.n_elem, sizeof(uint32_t));

    for (i = 0; i < s_new.n_elem; i++) {
        s_new.elem[i] = s.elem[i];
    }

    /* Copy S into the result variable */
    results->result->n_elem = s.n_elem;
    results->result->elem = calloc(results->result->n_elem, sizeof(uint32_t));
    for (i = 0; i < results->result->n_elem; i++) {
        results->result->elem[i] = s.elem[i];
    }

    /* Compare all the elements on the set with the first one.*/
    /* Case dissertion */
    i = 0;
    /* Allocate memory for the boolean vector */
    vect = calloc(s_new.n_elem, sizeof(int));
    vect[i] = 1;

    length = 0;


    for (j = i + 1; j < s_new.n_elem; j++) {
        hits = 0;
        /* Check if the nodes have the same number of outgoing edges */
        if (cdaig[s_new.elem[i]].n_edges == cdaig[s_new.elem[j]].n_edges) {
            for (index = 0; index < cdaig[s_new.elem[i]].n_edges; index++) {
                for (k = 0; k < cdaig[s_new.elem[j]].n_edges; k++) {

                    /* Get the label of the edge index */           /*Aqui esta el problema (no mira per a 2D, nomes per a 1D)*/
                    if ((cdaig[s_new.elem[i]].edges[index].length == cdaig[s_new.elem[j]].edges[k].length) && (cdaig[s_new.elem[i]].edges[index].ilength == cdaig[s_new.elem[j]].edges[k].ilength)) {
                        /*fseek(data, cdaig[s_new.elem[i]].edges[index].beginpos, SEEK_SET);
                        ibuffer = calloc((cdaig[s_new.elem[i]].edges[index].length + 1), sizeof(char));
                        read = fread(ibuffer, sizeof(char), cdaig[s_new.elem[i]].edges[index].length, data);
                        ibuffer[read] = '\0';*/
                        ibuffer = get_Istring(cdaig[s_new.elem[i]].edges[index].beginpos, cdaig[s_new.elem[i]].edges[index].is_first, cdaig[s_new.elem[i]].edges[index].ilength, cdaig[s_new.elem[i]].edges[index].length, data, n, m);
                        /* Get the label of the edge k */
                        /*fseek(data, cdaig[s_new.elem[j]].edges[k].beginpos, SEEK_SET);
                        jbuffer = calloc((cdaig[s_new.elem[j]].edges[k].length + 1), sizeof(char));
                        read = fread(jbuffer, sizeof(char), cdaig[s_new.elem[j]].edges[k].length, data);
                        jbuffer[read] = '\0';*/
                        jbuffer = get_Istring(cdaig[s_new.elem[j]].edges[k].beginpos, cdaig[s_new.elem[j]].edges[k].is_first, cdaig[s_new.elem[j]].edges[k].ilength, cdaig[s_new.elem[j]].edges[k].length, data, n, m);
                        /* Check if there is an edge with the same label */
                        if (istrcmp(ibuffer, jbuffer) == -1) {
                            /* Check the destination of the edges */
                            /* Get the set where the node cdaig[s.elem[i]].edges[index].dest belongs */
                            for (ni = 0; ni < np; ni++) {
                                for (ipos = 0; ipos < p[ni].n_elem; ipos++) {
                                    if (p[ni].elem[ipos] == cdaig[s_new.elem[i]].edges[index].dest) {
                                        break;
                                    }
                                }
                                if (ipos != p[ni].n_elem) {
                                    break;
                                }
                            }

                            /* Get the set where the node cdaig[s.elem[j]].edges[k].dest belongs */
                            for (nj = 0; nj < np; nj++) {
                                for (jpos = 0; jpos < p[nj].n_elem; jpos++) {
                                    if (p[nj].elem[jpos] == cdaig[s_new.elem[j]].edges[k].dest) {
                                        break;
                                    }
                                }
                                if (jpos != p[nj].n_elem) {
                                    break;
                                }
                            }

                            if (ni == nj) { /* If the destination nodes are in the same set */
                                /* Update Boolean vector */
                                hits++;
                            }
                        }
                        free(ibuffer);
                        free(jbuffer);
                    }
                }
            }
            if (hits == cdaig[s_new.elem[i]].n_edges) {
                vect[j] = 1;
            }
        }
        else {
            vect[j] = 0;
        }
    }



    /* Get the number of diferent elements on the new set */
    for (k = 0; k < s_new.n_elem; k++) {
        length += vect[k];
    }
    /* Divide the set */
    if (length != s_new.n_elem) {
        aux = calloc(2, sizeof(struct partition));
        aux[0].n_elem = length;     /* vector containing the nodes with vect[node]=1 */
        aux[0].elem = calloc(aux[0].n_elem, sizeof(uint32_t));

        aux[1].n_elem = s_new.n_elem - length;  /* vector containing the nodes with vect[node]=0 */
        aux[1].elem = calloc(aux[1].n_elem, sizeof(uint32_t));

        /* Store the nodes on the set they correspond */
        for (i = 0; i < s_new.n_elem; i++) {
            if (vect[i] == 0) {
                aux[1].elem[count_f] = s_new.elem[i];
                count_f++;
            }
            else {
                aux[0].elem[count_t] = s_new.elem[i];
                count_t++;
            }
        }
        /* Store the sets on results variable */
        results->n_result = 2;
        free(results->result->elem);
        free(results->result);

        
        results->result = aux;
        aux = NULL;

    }
    /* Frees */
    free(vect);

    free(s_new.elem);


    return results;
}

/* Union of two sets replacing one of the elements of one set
*   Param:
*       set_of_sets: set containing all the sets. 
*       n_set_of_sets: number of sets in set_of_sets
*       new_set: set we want to add to the set_of_sets. 
*       n_new_set: number of sets in new_set
*       final_set: the result of the union. 
*       n_final_set: number of sets in final_set
*       index: the index of the element we want to erase. If index = -1, we do not erase any element
*
*   Returns:
*       Nothing (the set is returned as a parameter).
*/
void set_union(struct partition* set_of_sets, uint32_t n_set_of_sets, struct partition* new_set, uint32_t n_new_set, struct partition* final_set, uint32_t n_final_set, int32_t index) {
    uint32_t k, skip;
    int32_t j;

    /* Compute the number of elements of the final set (The elements we had before except the set we want to delete + the number of new sets) */
    if (index == -1) {

        /* Copy all the elements of the old set */
        for (j = 0; j < n_set_of_sets; j++) {
            final_set[j].n_elem = set_of_sets[j].n_elem;
            final_set[j].elem = calloc(final_set[j].n_elem, sizeof(uint32_t));
            for (k = 0; k < final_set[j].n_elem; k++) {
                final_set[j].elem[k] = set_of_sets[j].elem[k];
            }
        }

        /* Add the new elements */
        for (j = 0; j < n_new_set; j++) {
            final_set[j + n_set_of_sets].n_elem = new_set[j].n_elem;
            final_set[j + n_set_of_sets].elem = calloc(final_set[j + n_set_of_sets].n_elem, sizeof(uint32_t));
            for (k = 0; k < new_set[j].n_elem; k++) {
                final_set[j + n_set_of_sets].elem[k] = new_set[j].elem[k];
            }
        }
    }
    else {
        skip = 1;


        /* Copy all the elements of the old set */
        for (j = 0; j < n_set_of_sets - skip; j++) {

            if (j < index) {
                final_set[j].n_elem = set_of_sets[j].n_elem;
                final_set[j].elem = calloc(final_set[j].n_elem, sizeof(uint32_t));
                for (k = 0; k < final_set[j].n_elem; k++) {
                    final_set[j].elem[k] = set_of_sets[j].elem[k];
                }
            }
            else {
                final_set[j].n_elem = set_of_sets[j + skip].n_elem;
                final_set[j].elem = calloc(final_set[j].n_elem, sizeof(uint32_t));
                for (k = 0; k < final_set[j].n_elem; k++) {
                    final_set[j].elem[k] = set_of_sets[j + skip].elem[k];
                }
            }
        }

        /* Add the new elements */
        for (j = 0; j < n_new_set; j++) {

            final_set[j + n_set_of_sets - 1].n_elem = new_set[j].n_elem;
            final_set[j + n_set_of_sets - 1].elem = calloc(final_set[j + n_set_of_sets - 1].n_elem, sizeof(uint32_t));
            for (k = 0; k < new_set[j].n_elem; k++) {
                final_set[j + n_set_of_sets - 1].elem[k] = new_set[j].elem[k];
            }
        }
    }

    return;
}

/*Comparison for uint32_t (qsort)*/
int compare_uint32(const void* a, const void* b) {
    uint32_t x = *(const uint32_t*)a;
    uint32_t y = *(const uint32_t*)b;
    return (x > y) - (x < y);
}

/*Compare two sets (after ordering internally)
*
*   Param:
*       a: A set we want to compare
*       b: The other set to compare
*
*   Return:
*       True: if a and b are equal
*       False: if a and b are different
*/
bool same_set(struct partition* a, struct partition* b) {
    uint32_t* sorted_a, * sorted_b;
    bool equal;
    if (a->n_elem != b->n_elem) {
        return false;
    }

    sorted_a = calloc(a->n_elem, sizeof(uint32_t));
    sorted_b = calloc(b->n_elem, sizeof(uint32_t));
    if (!sorted_a || !sorted_b) {
        free(sorted_a);
        free(sorted_b);
        return false;
    }

    memcpy(sorted_a, a->elem, a->n_elem * sizeof(uint32_t));
    memcpy(sorted_b, b->elem, b->n_elem * sizeof(uint32_t));
    qsort(sorted_a, a->n_elem, sizeof(uint32_t), compare_uint32);
    qsort(sorted_b, b->n_elem, sizeof(uint32_t), compare_uint32);

    equal = memcmp(sorted_a, sorted_b, a->n_elem * sizeof(uint32_t)) == 0;

    free(sorted_a);
    free(sorted_b);
    return equal;
}

/*Compare two set of sets
*
*   Param:
*       A: one of the sets of sets
*       len_A: number of sets in A
*       B: the other set of sets
*       len_B: number of sets in B
*
*   Return:
*       True: If both contains the same sets
*       False: If there is one set in one set of sets which is not in the other
*/
bool same_partition_array(struct partition* A, size_t len_A, struct partition* B, size_t len_B) {
    size_t j, i;
    bool found;
    bool* matched_B;
    if (len_A != len_B) {
        return false;
    }

    matched_B = calloc(len_B, sizeof(bool));
    if (!matched_B) {
        return false;
    }

    for (i = 0; i < len_A; i++) {
        found = false;
        for (j = 0; j < len_B; j++) {
            if (!matched_B[j] && same_set(&A[i], &B[j])) {
                matched_B[j] = true;
                found = true;
                break;
            }
        }
        if (!found) {
            free(matched_B);
            return false;
        }
    }

    free(matched_B);
    return true;
}



/* Hopcroft's Algorithm to compute the DFA minimization
*   Param:
*       cdaig: Deterministic Finite State Automaton
*       v: number of nodes of the automaton
*       data: file containing the values of the edges' labels
*
*   Returns:
*       Does not return nothing.
*       Changes the variable cdaig to a minimized variable.
*/
void hopcroft(struct node** cdaig, uint32_t* v, FILE* data, int n, int m) {
    struct partition* current, * p, * new_current; 
    uint32_t i, n_current = 2, n_p = 0, j, new_n_current, k, l, n_new = 0;
    struct partition_vect* new_set;
    struct node* new_machine;

    /* Current = {F, Q-F} */
    current = calloc(2, sizeof(struct partition));

    /* Current[0]=F */
    current[0].n_elem = 1;
    current[0].elem = calloc(current[0].n_elem, sizeof(uint32_t));
    current[0].elem[0] = 0;

    /* Current[1]=Q-F */
    current[1].n_elem = *v - 1;
    current[1].elem = calloc(current[1].n_elem, sizeof(uint32_t));

    /* The only final state is node 0, so the other v-1 are non terminal states */
    for (i = 0; i < current[1].n_elem; i++) {
        current[1].elem[i] = i + 1;
    }
    p = calloc(1, sizeof(struct partition));
    p->n_elem = 0;
    p->elem = calloc(1, sizeof(uint32_t));
    n_p = 1;

    /* Repeat until P and Current are the same */
    do {
        /* Free p to reallocate the pointer */
        for (i = 0; i < n_p; i++) {
            free(p[i].elem);
        }
        free(p);
        /* P = Current & Current = P */
        p = calloc(n_current, sizeof(struct partition));
        n_p = n_current;
        for (i = 0; i < n_current; i++) {
            p[i].n_elem = current[i].n_elem;
            p[i].elem = calloc(p[i].n_elem, sizeof(uint32_t));
            memcpy(p[i].elem, current[i].elem, p[i].n_elem * sizeof(uint32_t));
        }

        /* For each set S in P */
        for (i = 0; i < n_p; i++) {
            /* Current = Current U Split(S) */
            new_set = split(*cdaig, p[i], current, n_current, data, n, m);

            /* new_current memory allocation */
            new_n_current = n_current - 1 + new_set->n_result;
            new_current = calloc(new_n_current, sizeof(struct partition));


            /* Compute the union*/
            set_union(current, n_current, new_set->result, new_set->n_result, new_current, new_n_current, 0);

            /* Update current and n_current */
            for (j = 0; j < n_current; j++) {
                free(current[j].elem);
            }
            free(current);

            n_current = new_n_current;
            current = new_current;
            new_current = NULL;

            for (j = 0; j < new_set->n_result; j++) {
                free(new_set->result[j].elem);
            }
            free(new_set->result);
            free(new_set);
        }

    } while (same_partition_array(p, n_p, current, n_current) == false);

    /* Updating the DFA */
    for (i = 0; i < n_current; i++) {
        /* Look for sets in current with more than one element */
        if (current[i].n_elem > 1) {
            /* For every element on the set (excluding the first one) */
            for (j = 1; j < current[i].n_elem; j++) {
                /* Search for nodes adjacent to them */
                for (k = 0; k < *v; k++) {
                    for (l = 0; l < (*cdaig)[k].n_edges; l++) {
                        /* If there is one */
                        if ((*cdaig)[k].edges[l].dest == current[i].elem[j]) {
                            /* Do a redirection of the edge to the first element on the set */
                            (*cdaig)[k].edges[l].dest = current[i].elem[0];
                        }
                    }
                }
            }
        }
    }


    /* Create a new value n_new with the number of sets in 'current' */
    n_new = n_current;
    /* Allocate memory for the new minimized DFA */
    new_machine = calloc(n_new, sizeof(struct node));
    /* Copy each representative node into the new machine */
    for (i = 0; i < n_new; i++) {
        uint32_t repr = current[i].elem[0];
        new_machine[i].n_edges = (*cdaig)[repr].n_edges;

        if (new_machine[i].n_edges > 0) {
            /* Allocate memory for the outgoing edges */
            new_machine[i].edges = calloc((*cdaig)[repr].n_edges, sizeof(struct edge));

            for (j = 0; j < new_machine[i].n_edges; j++) {
                /* Copy edge data */
                new_machine[i].edges[j].beginpos = (*cdaig)[repr].edges[j].beginpos;
                new_machine[i].edges[j].length = (*cdaig)[repr].edges[j].length;
                new_machine[i].edges[j].ilength = (*cdaig)[repr].edges[j].ilength;
                new_machine[i].edges[j].is_first = (*cdaig)[repr].edges[j].is_first;

                /* Remap the destination node to the corresponding representative */
                uint32_t old_dest = (*cdaig)[repr].edges[j].dest;

                for (k = 0; k < n_new; k++) {
                    for (l = 0; l < current[k].n_elem; l++) {
                        if (current[k].elem[l] == old_dest) {
                            new_machine[i].edges[j].dest = k;
                            break; /* Redirection done, we move to the next destination node */
                        }
                    }
                }
            }
        }
        else {
            new_machine[i].edges = NULL;
        }
    }

    
    
    /* Free cdaig */
    for (i = 0; i < *v; i++) {
        free((*cdaig)[i].edges);
    }
    free(*cdaig);
    /* Update the pointer and number of nodes */
    *cdaig = new_machine;
    *v = n_new;


    /* Frees */
    for (i = 0; i < n_p; i++) {
        free(p[i].elem);
    }
    free(p);

    for (i = 0; i < n_current; i++) {
        free(current[i].elem);
    }
    free(current);
}
