| Top |
Computation and printing of numerous descriptive statistics along with some hypothesis tests, for example regarding the normality of a data series.
int gretl_minmax (int t1,int t2,const double *x,double *min,double *max);
Puts the minimum and maximum values of the series x
,
from obs t1
to obs t2
, into the variables min
and max
.
double gretl_sum (int t1,int t2,const double *x);
the sum of the series x
from obs
t1
to obs t2
, skipping any missing values, or NADBL
in case there are no valid observations.
double gretl_mean (int t1,int t2,const double *x);
the arithmetic mean of the series x
from obs
t1
to obs t2
, skipping any missing values, or NADBL
in case there are no valid observations.
double gretl_restricted_mean (int t1,int t2,const double *x,const double *y,GretlOp yop,double yval);
t1 |
starting observation. |
|
t2 |
ending observation. |
|
x |
data series. |
|
y |
criterion series. |
|
yop |
criterion operator. |
|
yval |
criterion value. |
the arithmetic mean of the series x
in the
range t1
to t2
(inclusive), but including only
observations where the criterion variable y
bears the
relationship yop
to the value yval
-- or NADBL in case
there are no observations that satisfy the restriction.
double gretl_quantile (int t1,int t2,const double *x,double p,gretlopt opt,int *err);
t1 |
starting observation. |
|
t2 |
ending observation. |
|
x |
data series. |
|
p |
probability. |
|
opt |
may include OPT_Q to hush warning when sample is too small. |
|
err |
location to receive error code. |
the p
quantile of the series x
from obs
t1
to obs t2
, skipping any missing values, or NADBL
on failure.
int gretl_array_quantiles (double *a,int n,double *p,int k);
Computes k
quantiles (given by the elements of p
) for the
first n elements of the array a
, which is re-ordered in
the process. On successful exit, p
contains the quantiles.
double gretl_array_quantile (double *a,int n,double p);
the p
quantile of the first n
elements in a
,
which is re-ordered in the process, or NADBL on
failure.
double gretl_median (int t1,int t2,const double *x);
the median value of the series x
from obs
t1
to obs t2
, skipping any missing values, or NADBL
on failure.
double gretl_sst (int t1,int t2,const double *x);
the sum of squared deviations from the mean for
the series x
from obs t1
to obs t2
, skipping any missing
values, or NADBL on failure.
double gretl_variance (int t1,int t2,const double *x);
the variance of the series x
from obs
t1
to obs t2
, skipping any missing values, or NADBL
on failure.
double gretl_restricted_variance (int t1,int t2,const double *x,const double *y,GretlOp yop,double yval);
t1 |
starting observation. |
|
t2 |
ending observation. |
|
x |
data series. |
|
y |
criterion series. |
|
yop |
criterion operator. |
|
yval |
criterion value. |
the variance of the series x
from obs
t1
to obs t2
, skipping any missing values and
observations where the series y
does not bear the
relationship yop
to the value yval
, or NADBL on
failure.
double gretl_stddev (int t1,int t2,const double *x);
the standard deviation of the series x
from obs
t1
to obs t2
, skipping any missing values, or NADBL
on failure.
double gretl_restricted_stddev (int t1,int t2,const double *x,const double *y,GretlOp yop,double yval);
t1 |
starting observation. |
|
t2 |
ending observation. |
|
x |
data series. |
|
y |
criterion series. |
|
yop |
criterion operator. |
|
yval |
criterion value. |
the standard deviation of the series x
from obs
t1
to obs t2
, skipping any missing values and observations
where the series y
does not bear the relationship yop
to
the value yval
, or NADBL on failure.
double gretl_long_run_variance (int t1,int t2,const double *x,int m);
the long-run variance of the series x
from obs
t1
to obs t2
, using Bartlett kernel weights, or NADBL
on failure (which includes encountering missing values).
double gretl_covar (int t1,int t2,const double *x,const double *y,int *missing);
t1 |
starting observation. |
|
t2 |
ending observation. |
|
x |
data series. |
|
y |
data series. |
|
missing |
location to receive information on the number
of missing observations that were skipped, or |
the covariance of the series x
and y
from obs
t1
to obs t2
, skipping any missing values, or NADBL
on failure.
double gretl_corr (int t1,int t2,const double *x,const double *y,int *missing);
t1 |
starting observation. |
|
t2 |
ending observation. |
|
x |
data series. |
|
y |
data series. |
|
missing |
location to receive information on the number
of missing observations that were skipped, or |
the correlation coefficient for the series x
and y
from obs t1
to obs t2
, skipping any missing values, or NADBL
on failure.
double gretl_corr_rsq (int t1,int t2,const double *x,const double *y);
the square of the correlation coefficient for the series
x
and y
from obs t1
to obs t2
, skipping any missing values,
or NADBL on failure. Used as alternative value for R^2 in a
regression without an intercept.
int gretl_moments (int t1,int t2,const double *x,const double *wts,double *xbar,double *sd,double *skew,double *kurt,int k);
Calculates sample moments for series x
from obs t1
to obs
t2
.
double * gretl_sorted_series (int v,const DATASET *dset,gretlopt opt,int *n,int *err);
v |
ID number of input series. |
|
dset |
dataset struct. |
|
opt |
may include OPT_M to flag an error in case missing values are found. |
|
n |
on input, the minimum acceptable number of non-missing observations; on output, the number of valid observations. |
|
err |
location to receive error code. |
an array containing the valid values of the
input series over the sample range given in dset
,
sorted from smallest to largest, or NULL on error.
An error is flagged if the number of valid observations
is less than that given in n
on input, or if OPT_M
is given and the input contains missing values.
void
free_freq (FreqDist *freq);
Frees all resources associated with freq
, and the
pointer itself.
int freq_setup (int v,const DATASET *dset,int *pn,double *pxmax,double *pxmin,int *nbins,double *binwidth);
FreqDist * get_freq (int varno,const DATASET *dset,double fmin,double fwid,int nbins,int params,gretlopt opt,int *err);
Calculates the frequency distribution for the specified variable.
varno |
ID number of variable to process. |
|
dset |
dataset struct. |
|
fmin |
lower limit of left-most bin (or NADBL for automatic). |
|
fwid |
bin width (or NADBL for automatic). |
|
nbins |
number of bins to use (or 0 for automatic). |
|
params |
degrees of freedom loss (generally = 1 unless we're dealing with the residual from a regression). |
|
opt |
if includes |
|
err |
location to receive error code. |
FreqDist * get_discrete_freq (int v,const DATASET *dset,gretlopt opt,int *err);
Xtab * single_crosstab (const int *list,const DATASET *dset,gretlopt opt,PRN *prn,int *err);
void
free_xtab (Xtab *tab);
Frees all resources associated with tab
, and the
pointer itself.
int model_error_dist (const MODEL *pmod,DATASET *dset,gretlopt opt,PRN *prn);
int corrgram (int varno,int order,int nparam,DATASET *dset,gretlopt opt,PRN *prn);
Computes the autocorrelation function and plots the correlogram for
the variable specified by varno
.
varno |
ID number of variable to process. |
|
order |
integer order for autocorrelation function. |
|
nparam |
number of estimated parameters (e.g. for the case of ARMA), used to correct the degrees of freedom for Q test. |
|
dset |
dataset struct. |
|
opt |
if includes OPT_R, variable in question is a model residual generated "on the fly"; OPT_U can be used to specify a plot option. |
|
prn |
gretl printing struct. |
int xcorrgram (const int *list,int order,DATASET *dset,gretlopt opt,PRN *prn);
Computes the cross-correlation function and plots the cross-correlogram for the specified variables.
int periodogram (int varno,int width,const DATASET *dset,gretlopt opt,PRN *prn);
Computes and displays the periodogram for the series specified
by varno
.
int residual_periodogram (const double *x,int width,const DATASET *dset,gretlopt opt,PRN *prn);
Computes and displays the periodogram for x
, which is presumed to
be a model residual series.
gretl_matrix * periodogram_matrix (const double *x,int t1,int t2,int width,int *err);
Implements the userspace gretl pergm function, which can be used on either a series from the dataset or a gretl vector.
int fractint (int varno,int order,const DATASET *dset,gretlopt opt,PRN *prn);
Computes and prints a test for fractional integration of the
series specified by varno
. By default the test uses the
Local Whittle Estimator but if opt
includes OPT_G then
the Geweke and Porter-Hudak test is done instead, or if
OPT_A then both tests are shown. If OPT_Q is given the
test results are not printed, just recorded (with
preference given to the LWE in case of OPT_A).
Summary * get_summary (const int *list,const DATASET *dset,gretlopt opt,PRN *prn,int *err);
Calculates descriptive summary statistics for the specified variables.
Summary * get_summary_weighted (const int *list,const DATASET *dset,int var,gretlopt opt,PRN *prn,int *err);
Calculates descriptive summary statistics for the specified
variables, weighting the observations by rv
. The series rv
must
be of full length (dset->n).
Summary * get_summary_restricted (const int *list,const DATASET *dset,const double *rv,gretlopt opt,PRN *prn,int *err);
Calculates descriptive summary statistics for the specified variables,
with the observations restricted to those for which rv
has a non-zero
(and non-missing) value. The series rv
must be of full length (dset->n).
int list_summary (const int *list,int wgtvar,const DATASET *dset,gretlopt opt,PRN *prn);
Prints descriptive statistics for the listed variables.
list |
list of series to process. |
|
dset |
dataset struct. |
|
opt |
may include |
|
prn |
gretl printing struct. |
void print_summary (const Summary *summ,const DATASET *dset,PRN *prn);
Prints the summary statistics for a given variable.
void print_summary_single (const Summary *s,int digits,int places,const DATASET *dset,PRN *prn);
void
free_summary (Summary *summ);
Frees all resources associated with summ
, and
the pointer itself.
VMatrix * corrlist (int ci,int *list,const DATASET *dset,gretlopt opt,int *err);
Computes pairwise correlation coefficients for the variables
specified in list
, skipping any constants. If the option
flags contain OPT_N, a uniform sample is ensured: only those
observations for which all the listed variables have valid
values are used. If OPT_C is included, we actually calculate
covariances rather than correlations.
void
free_vmatrix (VMatrix *vmat);
Frees all resources associated with vmat
, and
the pointer itself.
int gretl_corrmx (int *list,const DATASET *dset,gretlopt opt,PRN *prn);
Computes and prints the correlation matrix for the specified list of variables.
int means_test (const int *list,const DATASET *dset,gretlopt opt,PRN *prn);
Carries out test of the null hypothesis that the means of two variables are equal.
int vars_test (const int *list,const DATASET *dset,PRN *prn);
Carries out test of the null hypothesis that the variances of two variables are equal.
void print_corrmat (VMatrix *corr,const DATASET *dset,PRN *prn);
Prints a gretl correlation matrix to prn
.
double doornik_chisq (double skew,double xkurt,int n);
Calculates the Chi-square test for normality as set out by Doornik and Hansen, "An Omnibus Test for Normality", 1994. This is a modified version of the test proposed by Bowman and Shenton (Biometrika, 1975).
int multivariate_normality_test (const gretl_matrix *E,const gretl_matrix *Sigma,gretlopt opt,PRN *prn);
int mahalanobis_distance (const int *list,DATASET *dset,gretlopt opt,PRN *prn);
MahalDist * get_mahal_distances (const int *list,DATASET *dset,gretlopt opt,PRN *prn,int *err);
double gretl_gini (int t1,int t2,const double *x);
the Gini coefficient for the series x
from obs
t1
to obs t2
, skipping any missing values, or NADBL
on failure.
int gini (int varno,DATASET *dset,gretlopt opt,PRN *prn);
Graphs the Lorenz curve for variable vnum
and prints the
Gini coefficient.
int shapiro_wilk (const double *x,int t1,int t2,double *W,double *pval);
Computes the Shapiro-Wilk W statistic as a test for
normality of the data x
, and also the p-value for
the test. These are written into the pointer
arguments W
and pval
.
int gretl_normality_test (int varno,const DATASET *dset,gretlopt opt,PRN *prn);
Performs, and prints the results of, the specified test(s) randomness
for the variable specified by v
.
gretl_matrix * gretl_normtest_matrix (const double *y,int t1,int t2,gretlopt opt,int *err);
gretl_matrix * acf_matrix (const double *x,int order,const DATASET *dset,int n,int *err);
Computes the autocorrelation function for series x
with
maximum lag order
.
gretl_matrix * xcf_vec (const double *x,const double *y,int p,const DATASET *dset,int n,int *err);
Computes the cross-correlation function for series x
with
series y
up to maximum lag order
.
double ljung_box (int m,int t1,int t2,const double *y,int *err);
m |
maximum lag. |
|
t1 |
starting observation. |
|
t2 |
ending observation. |
|
y |
data series. |
|
err |
location to receive error code. |
the Ljung-Box statistic for lag order m
for
the series y
over the sample t1
to t2
, or NADBL
on failure.
typedef struct {
gretlopt opt;
int n;
int weight_var;
int *misscount;
int *list;
double *stats;
double *mean;
double *median;
double *sd;
double *skew;
double *xkurt;
double *low;
double *high;
double *cv;
double *perc05;
double *perc95;
double *iqr;
double sw;
double sb;
} Summary;
typedef struct {
char varname[VNAMELEN]; /* for ID purposes */
int discrete; /* 1 if variable contains integers */
int dist; /* code for theoretical distribution */
int numbins; /* number of bins or intervals */
double xbar, sdx; /* mean and std dev of variable */
double *midpt; /* array of midpoints of intervals */
double *endpt; /* array of endpoints of intervals */
int *f; /* frequencies in the intervals */
double test; /* either Chi-squared statistic for testing
for a Gaussian distribution, or z statistic
for testing for Gamma dist. */
int n;
int t1, t2;
} FreqDist;