/* * This file is part of MAMMULT: Metrics And Models for Multilayer Networks * * This program is free software: you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation, either version 3 of the License, or (at * your option) any later version. * * This program is distributed in the hope that it will be useful, but * WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU * General Public License for more details. * * You should have received a copy of the GNU General Public License * along with this program. If not, see <http://www.gnu.org/licenses/>. */ /** * * Fit a given sequence with a power-law function * * a*x^{b} * * The fit is actually performed as a linear fit on the * exponential-binned log-log distribution * * */ #include <stdio.h> #include <stdlib.h> #include <string.h> #include <gsl/gsl_fit.h> /** * * Load a sequence from a file, which contains one element on each line * */ void load_sequence(char *fname, double **v, int *N){ int size; char buff[256]; FILE *f; f = fopen(fname, "r"); if (!f){ fprintf(stderr, "Error opening file %s!!!! Exiting...\n", fname); exit(1); } *N =0; size = 10; if (*v == NULL) *v = malloc(size * sizeof(double)); else{ *v = realloc(*v, size * sizeof(double)); } while (fgets(buff, 255, f)){ (*v)[*N] = atof(buff); *N += 1; if (*N == size){ size += 10; *v = realloc(*v, size * sizeof(double)); } } *v = realloc(*v, (*N) * sizeof(double)); fclose(f); } /** * * Load a sequence, getting the "col"-th column of the input file * */ void load_sequence_col(char *fname, double **v, int *N, int col){ int size, n; char buff[256]; char *ptr; FILE *f; f = fopen(fname, "r"); if (!f){ fprintf(stderr, "Error opening file %s!!!! Exiting...\n", fname); exit(1); } *N =0; size = 10; if (*v == NULL) *v = malloc(size * sizeof(double)); else{ *v = realloc(*v, size * sizeof(double)); } while (fgets(buff, 255, f)){ ptr = strtok(buff, " "); if (col > 0){ n = 0; while (n<col){ ptr = strtok(NULL, " "); n += 1; } } (*v)[*N] = atof(ptr); *N += 1; if (*N == size){ size += 10; *v = realloc(*v, size * sizeof(double)); } } *v = realloc(*v, (*N) * sizeof(double)); fclose(f); } /** * * Make the exponential binning of a distribution, with a giving * exponent alpha. The value of y[i] is the number of elements of v * whose value lies between x[i-1] and x[i]... * */ void exp_bin_cnt(double *v, int N, double alpha, double **x, double **y, int *num){ double min_v, max_v, val, last_val; int i, j, size, num_x; double last_size, new_size; min_v = max_v = v[0]; for (i=1; i<N; i ++){ if (v[i] > max_v) max_v = v[i]; else if (v[i] > 0 && v[i] < min_v) min_v = v[i]; } size = 10; if (*x == NULL){ *x = malloc(size * sizeof(double)); } else{ *x = realloc(*x, size * sizeof(double)); } val = min_v; last_size = min_v; (*x)[0] = min_v; num_x = 1; while(val < max_v){ new_size = last_size * alpha; val = last_size + new_size; last_size = new_size; last_val = val; (*x)[num_x] = val; num_x += 1; if (num_x == size){ size += 10; *x = realloc(*x, size * sizeof(double)); } } if (*y == NULL){ *y = malloc(num_x * sizeof(double)); } else{ *y = realloc(*y, num_x * sizeof(double)); } for (i=0; i < num_x; i++){ (*y)[i] = 0; } for(i=0; i <N; i ++){ j = 0; while(v[i] > (*x)[j]){ j ++; } (*y)[j] += 1; } *num = num_x; } /** * * Make the exponential binning of a distribution, with a giving * exponent alpha. The value of y[i] is the average of the values in * the vector "w" whose corresponding v lies between x[i-1] and x[i]... * */ void exp_bin_avg(double *v, double *w, int N, double alpha, double **x, double **y, int *num){ double min_v, max_v, val, last_val; int i, j, size, num_x; double last_size, new_size; int *cnt; min_v = max_v = v[0]; for (i=1; i<N; i ++){ if (v[i] > max_v) max_v = v[i]; else if (v[i] > 0 && v[i] < min_v) min_v = v[i]; } size = 10; if (*x == NULL){ *x = malloc(size * sizeof(double)); } else{ *x = realloc(*x, size * sizeof(double)); } val = min_v; last_size = min_v; (*x)[0] = min_v; num_x = 1; while(val < max_v){ new_size = last_size * alpha; val = last_size + new_size; last_size = new_size; last_val = val; (*x)[num_x] = val; num_x += 1; if (num_x == size){ size += 10; *x = realloc(*x, size * sizeof(double)); } } cnt = malloc(num_x * sizeof(int)); if (*y == NULL){ *y = malloc(num_x * sizeof(double)); } else{ *y = realloc(*y, num_x * sizeof(double)); } for (i=0; i < num_x; i++){ (*y)[i] = 0; cnt[i] = 0; } for(i=0; i <N; i ++){ j = 0; while(j < num_x && v[i] > (*x)[j]){ j ++; } if(j == num_x){ printf("Error!!!!! Trying to assing a non-existing bin!!! -- fit_utils.c:exp_bin_avg!!!\n"); exit(37); } (*y)[j] += w[i]; cnt[j] += 1; } *num = num_x; for(i=0; i< num_x; i++){ if (cnt[i] > 0){ (*y)[i] = (*y)[i] / cnt[i]; } } free(cnt); } /** * * Print a distribution on stdout * */ void dump_distr(double *x, double *y, int N){ int i; for(i=0; i<N; i ++){ printf("%g %g\n", x[i], y[i]); } } /** * Compact a distribution, leaving only the pairs (x_i, y_i) for which * y_i > 0 * */ void compact_distr(double *x, double *y, int *num){ int i, j; i = j = 0; while(j < *num){ while(j < *num && y[j] == 0){ j ++; } if (j==*num){ break; } x[i] = x[j]; y[i] = y[j]; i ++; j ++; } *num = i; } /** * * Apply the function "f" on all the elemnts of a vector, in-place * */ void map_vec(double *v, int N, double (*f)(double)){ int i; for (i=0; i<N; i ++){ v[i] = f(v[i]); } } /** * * Normalize a distribution, dividing each y[i] for the width of the * corresponding bin (i.e., x[i] - x[i-1]) * */ void normalize_distr(double *x, double *y, int num){ int i; for(i=1; i<num; i ++){ y[i] /= (x[i] - x[i-1]); } } /** * * take two vectors (k and q) and a pairing, and compute the best * power-law fit "a*k^{mu}" of qnn(k) * */ void fit_current_trend(double *k, double *q, int N, int *pairing, double *mu, double *a, double *corr){ static int *num; static double *q_pair, *x, *y; int i; int fit_num; double c0, c1, cov00, cov01, cov11, sqsum; if (q_pair == NULL){ q_pair = malloc(N * sizeof(double)); } for(i=0; i<N; i++){ q_pair[i] = q[pairing[i]]; } exp_bin_avg(k, q_pair, N, 1.3, &x, &y, &fit_num); //normalize_distr(x,y, fit_num); compact_distr(x, y, &fit_num); map_vec(x, fit_num, log); map_vec(y, fit_num, log); gsl_fit_linear(x, 1, y, 1, fit_num, &c0, &c1, &cov00, &cov01, &cov11, &sqsum); //fprintf(stderr,"cov00: %g cov01: %g cov11: %g\n", cov00, cov01, cov11); //fprintf(stderr, "corr: %g ", cov01 / sqrt(cov00 * cov11)); *a = c0; *mu = c1; *corr = cov01/sqrt(cov00 * cov11); }