Re: SAS/IML & Circumgrids

Chris Evans (sgju101@sghms.ac.uk)
Mon, 13 Mar 1995 13:08:29 +0000

> Chris:
> I would be interested in your codes for using SAS/IML(?) to
> analyse grids and the circumgrid package you referred to ...
> though I am unsure re ftp.
>
For anyone who wants it in its current state of development, SAS/IML
code follows. Circumgrids is more tricky. I copied a more recent
version over my old one sometime last year and someone who wrote to
me about it recently said his/her (her? sorry- replying in haste)
copy takes the data then falls over. I played briefly with my recent
copy and found much the same so I'm loathe to post that until I hear
more from others about the versions in circulation and their
stability!

If anyone does use the SAS/IML code I'd appreciate an acknowledgement
and any feedback going. Good luck.

Chris

options nocenter;
options macrogen symbolgen;
/*********** set title **********/
%let gridtitl = %str(BS);
title &gridtitl;

proc iml worksize=64;
/******** set maximum for loadings on plot ****/
%let top = 8;
%let btop = %eval(&top + 1);
%let step = %eval(2*&btop);
/******** set rating system used **************/
top_rat = 6;
btm_rat = 1;
/******** provide the grid to analyse *********/
/***** omit constructs with no variance *******/
mat =
{
3 3 4 4 6 4 3 4 6 6,
3 3 3 3 6 4 6 4 4 3,
3 3 6 3 4 6 4 6 6 4,
4 4 3 6 6 4 3 3 6 6,
6 6 3 4 6 6 3 4 6 3,
3 3 4 4 6 4 4 3 6 6,
3 6 3 6 6 3 3 4 3 3,
1 6 1 1 6 6 1 3 3 1,
4 4 3 4 3 4 4 3 4 3
};
/******* provide the construct labels ********/
cons_lab =
{
"Domineering"
"Sexually attractive"
"Easily controlled"
"Rejecting"
"Loving"
"Neglecting"
"Sexually intimidating"
"Protective"
"Understanding"
};
/****** provide the element labels **********/
elem_lab =
{
"U to moth"
"Moth to U"
"U to fath"
"Fath to U"
"U to part"
"Part to U"
"U to vict"
"Vict to U"
"U to ther"
"Ther to U"
};

mid_rat = (top_rat + btm_rat)/2;
range = top_rat - btm_rat;
g = range/2;
n_cons = nrow(mat);
n_elem = ncol(mat);
* number of principal components;
n_comp = n_elem - 1;
* reset if fewer constructs;
if n_cons < n_elem then n_comp=n_cons;

/*** generate labels for components ***/
comp_n = j(1,n_comp,1);
do i = 1 to n_comp;
comp_n[i] = i;
end;
comp_lab = char(comp_n,2,0);
comp_lab = concat("PC",comp_lab);

print "The following are largely self-explanatory";
print " N_COMP is the theoretical maximum no. of";
print " principal components which is the lesser";
print " of (N_ELEM - 1) or N_CONS";

print n_cons n_elem n_comp;
*****************************************************;
* O.K. now for some construct and element parameters ;
******************************************************;
av_cons = mat[,:];
min_cons = mat[,><];
max_cons = mat[,<>];
devmat = mat;
* subtract the construct means from the grid to get deviations;
do i = 1 to ncol(mat);
devmat[,i] = mat[,i] - av_cons;
end;
* get sum of squares for entire deviation matrix;
totvar = devmat[##];
* and for each construct;
var_cons = devmat[,##];
per_cons = 100*var_cons/totvar;
* get vector of difference between means and mid-point ratings;
bia_cons = av_cons-mid_rat;
******************************************************;
* Patrick's bias measure is RMS of those differences *;
* divided by half the possible range of ratings *;
******************************************************;
bias = sqrt(bia_cons[##]/n_cons);
bias = bias/abs(g);
******************************************************;
* his variability measure is the RMS of the *;
* deviations from the construct means (mean divisor *;
* the n_cons*(nelem-1) d.o.f.) divided by the *;
* full range between the rating extremes *;
******************************************************;
vari = sqrt(totvar / (n_cons*(n_elem - 1)) ) / abs(g);
print "AV_CONS is the mean rating on the construct";
print "VAR_CONS is the total deviation from the construct mean";
print "PER_CONS is the percentage of the total variance in the";
print " the grid accounted for on that construct";
print "MAX_CONS is the highest rating on the construct and";
print "MIN_CONS is the lowest rating on the construct";

print av_cons var_cons per_cons max_cons min_cons
[ROWNAME = CONS_LAB FORMAT = 6.2];

print "TOTVAR is the total sum of squares of the deviations";
print " from construct means";
print "BIAS is Patrick's measure: the mean squared difference";
print " between construct means and the mid-rating divided by";
print " the absolute half-range (honest!)";
print " I think this parametrises the tendency for the ratings";
print " on constructs to have means away from the mid-point of";
print " the rating scale used.";

print "VARI is Patrick's 'variability': the square root of";
print " the ratio of the total squared variation from construct means";
print " to the product of N_CONS*(N_ELEM - 1), the total square rooted";
print " value then divided by the absolute half-range.";
print " I think this parametrises the variation from construct";
print " means observed against that possible given the rating";
print " scale used.";

print totvar bias vari;

******************************************************;
* now get element and construct cross-products *;
******************************************************;
xprod = devmat` * devmat;
cxprod = devmat * devmat`;
* and convert the latter to a correlation matrix *;
sds = diag(1/sqrt(vecdiag(cxprod)));
ccorr = sds*cxprod*sds;
******************************************************;
******************************************************;
* this is my attempt at intensity which doesn't match*;
* that produced by INGRIDA *;
******************************************************;
******************************************************;
intens = ccorr[##];
intens = 100*intens/(n_cons*n_cons);
print "Beware, intensity doesn't match that in INGRIDA! " intens;
print " My version is 100 times the sum of squared construct";
print " intercorrelations divided by the square of N_CONS";
print " It may be that they excluded the leading diagonal (which";
print " would result in a slightly lower value). Or it may be";
print " that they used Spearman correlations or perhaps absolute";
print " correlations. *** I need to check this ***";

print "Following is full matrix of construct intercorrelations";
print " using Pearson correlation coefficients";

print ccorr
[ROWNAME = CONS_LAB COLNAME = CONS_LAB FORMAT = 6.2];

******************************************************;
* trap the correlation values of 1 on the diagonal *;
* before they cause complaints when extracting *;
* arccos angular distances *;
******************************************************;
do i=1 to n_cons;
ccorr[i,i]=0;
end;
mult = 90/arsin(1);
cang = mult*arcos(ccorr);
print "The following re-expresses those correlations as angular";
print " distances, i.e. r=0 -> ang=90 or 270; r=1 -> ang=0;";
print " r=-1 -> ang=180";

print cang
[ROWNAME = CONS_LAB COLNAME = CONS_LAB FORMAT = 6.1];

******************************************************;
* calculate the element parameters *;
******************************************************;
av_elem = devmat[+,]`;
mean_e = mat[:,]`;
var_elem = devmat[##,]`;
per_elem = 100*var_elem/totvar;
print "AV_ELEM is the sum of deviations from the construct means";
print " for the element in question (N.B. NOT the mean rating of";
print " of the element on all constructs). This follows INGRID";
print "MEAN_E _IS_ the mean element rating. This is not provided";
print " in INGRID and can be pretty meaningless but is included";
print " here partly to underline the distinction!";
print "VAR_ELEM is the sum of squared deviations from construct";
print " means for the element";
print "PER_ELEM expresses the latter as a percentage of the total";

print av_elem mean_e var_elem per_elem
[ROWNAME = ELEM_LAB FORMAT = 6.2];

******************************************************;
* get matrix of squared Euclidean inter-element *;
* distances *;
******************************************************;
sqeuc = xprod;
do i_1 = 1 to ncol(xprod);
do i_2 = 1 to ncol(xprod);
if (i_1 = i_2) then sqeuc[i_1,i_2] = 0;
else sqeuc[i_1,i_2] = xprod[i_1,i_1] + xprod[i_2,i_2] - 2*xprod[i_1,i_2];
end;
end;
* take element square roots to get Euclidean dists. *;
euc = sqeuc##.5;
print "The following is the matrix of Euclidean inter-element";
print " distances. A Euclidean distance is the square root";
print " of the sum of squared differences between the element";
print " ratings on all the constructs";
* and work out the expected Euclidean distance *;

print euc
[ROWNAME = ELEM_LAB COLNAME = ELEM_LAB FORMAT = 5.1];

s = trace(xprod);
print "The following is Patrick's 'expected distance'";
print " This is the square root of: twice the trace of the";
print " element deviation cross-product matrix divided by";
print " one less than the number of elements.";
expdist = (2*s/(ncol(xprod)-1))##.5;
print expdist;

* and get the Slater distances *;
print "The following is Patrick's interelement distances";
print " It's the Euclidean distances divided by the";
print " 'expected' distances. This serves to correct the";
print " values to take into account the number of elements";
print " and (I think) their deviations from the construct";
print " means, i.e. to correct somewhat for the rating scale";
print " and how it is being used by the person (I think!)";

slatdist = euc/expdist;

print slatdist
[ROWNAME = ELEM_LAB COLNAME = ELEM_LAB FORMAT = 5.2];

* get matrices ready for the eigenvalues and vects. *;
******************************************************;
******************************************************;
* these will contain some negative roots if n_cons *;
* is less than n_elem-1 *;
******************************************************;
******************************************************;
eigval = xprod[,1];
eigvec = xprod;
call eigen(eigval,eigvec,xprod);
perc_val = eigval/totvar;
print "The following gives the eigenvalues for each component";
print " and, usually more comprehensible, the percentage of ";
print " the total variation in the grid each contains.";
print " The rescaling is trivial as the eigenvalues just sum";
print " to the total variation in the grid.";

print eigval perc_val
[ROWNAME = COMP_LAB FORMAT = 9.4];

******************************************************;
* element vectors and construct loadings come out *;
* directly *;
******************************************************;
eigvec = eigvec[,1:n_comp];
c_load = devmat*eigvec;
******************************************************;
* create matrices for the other loadings, vectors & *;
* the residuals *;
******************************************************;
e_load = eigvec;
e_resi = eigvec;
c_vect = c_load;
c_resi = c_load;
do i = 1 to n_comp;
****** deal with any negative eigenvalues ******;
if (eigval[i] < 0) then eigval[i]=0;
* convert element vectors to loadings *;
* and construct loadings to vectors *;
mult = sqrt(eigval[i]);
c_mult = mult##(-1);
e_load[,i] = e_load[,i]#mult;
c_vect[,i] = c_load[,i]#c_mult;
* and work out the residuals by cumulative *;
* subtraction from element and construct sosq *;
var_elem = var_elem - e_load[,i]##2;
e_resi[,i] = var_elem;
var_cons = var_cons - c_load[,i]##2;
c_resi[,i] = var_cons;
end;
print "The following is the element vectors. These are";
print " standardised i.e. don't reflect the variance";
print " in the component hence I think the loadings,";
print " which are in the next matrix printed, are more";
print " useful a reflection of the Principal Component";
print " Analysis.";

print eigvec
[ROWNAME = ELEM_LAB COLNAME = COMP_LAB FORMAT = 8.4];

print e_load
[ROWNAME = ELEM_LAB COLNAME = COMP_LAB FORMAT = 8.4];

print "The following is th element residuals, i.e. the ";
print " amount of the squared deviations from the construct";
print " means left to be accounted for after extraction";
print " of the components up that that PC";

print e_resi
[ROWNAME = ELEM_LAB COLNAME = COMP_LAB FORMAT = 8.4];

print "The next three matrices give the construct vectors,";
print " loadings and residuals, as for the elements.";

print c_vect
[ROWNAME = CONS_LAB COLNAME = COMP_LAB FORMAT = 8.4];

print c_load
[ROWNAME = CONS_LAB COLNAME = COMP_LAB FORMAT = 8.4];

print c_resi
[ROWNAME = CONS_LAB COLNAME = COMP_LAB FORMAT = 8.4];

res_mat = devmat;
* now work out the actual residual grids for each *;
* of the PCs *;
/*
print res_mat;
do i = 1 to n_comp;
do j = 1 to n_cons;
do k = 1 to n_elem;
res_mat[j,k] = res_mat[j,k] - c_vect[j,i]*e_load[k,i];
end;
end;
print i res_mat [FORMAT = 8.4];
end;
*/
elemnts = e_load[,1:2];
e_num = elemnts[,1];
do i = 1 to n_elem;
e_num[i] = i;
end;
elemnts = e_num || elemnts;

constrs = c_load[,1:2];
c_num = constrs[,1];
do i = 1 to n_cons;
c_num[i] = i;
end;
constrs = c_num || constrs;

print "The following numbering is used in the grid plot";
print elemnts [ROWNAME = ELEM_LAB];
print constrs [ROWNAME = CONS_LAB];

create elemnts from elemnts [colname={'e_num' 'pc1' 'pc2'}];
append from elemnts;
close elemnts;
tmpe = elemnts;
tmpe = abs(tmpe);
create constrs from constrs [colname={'c_num' 'pc1' 'pc2'}];
append from constrs;
close constrs;

if (elemnts[<>] > &top) then do;
print "&top value exceeded in element loadings";
abort;
end;
if (constrs[<>] > &top) then do;
print "&top value exceeded in construct loadings";
abort;
end;

/* next bit uses the SAS supplied ANNOMAC macros */
%annomac;

data e_anno;
set elemnts;
format function $8.;
format text $4.;
format color $8.;
format style $8.;
format size 8.;
format hsys $1.;
format x 8.;
format xsys $1.;
format y 8.;
format ysys $1.;
format position $1.;
format angle line rotate 8.;
retain position '1';
size = 2;
retain size;
retain xsys ysys '2';
retain hsys '3';
retain style 'SWISS';
retain color 'BLUE';
retain function 'LABEL';
text = put(e_num,? $2.);
x = pc1;
y = pc2;
drop e_num pc1 pc2;
run;

data c_anno;
set constrs;
format function $8.;
format text $4.;
format color $8.;
format style $8.;
format size 8.;
format hsys $1.;
format x 8.;
format xsys $1.;
format y 8.;
format ysys $1.;
format position $1.;
format angle line rotate 8.;

retain position '1';
size = 2;
retain size;
retain xsys ysys '2';
retain hsys '3';
retain style 'SWISS';
retain color 'RED';
retain function 'LABEL';
text = put(c_num,? $2.);
x = pc1;
y = pc2;
drop c_num pc1 pc2;
run;

data axes;
format function $8.;
format text $4.;
format color $8.;
format style $8.;
format size 8.;
format hsys $1.;
format x 8.;
format xsys $1.;
format y 8.;
format ysys $1.;
format position $1.;
format angle line rotate 8.;
retain position '1';
retain xsys ysys '2';
retain hsys '3';
%label(&btop,0,'PC1',BLACK,0,0,2,SWISS,1);
%label(0,&btop,'PC2',BLACK,0,0,2,SWISS,1);
run;

data c_lines;
set constrs;
format function $8.;
format text $4.;
format color $8.;
format style $8.;
format size 8.;
format hsys $1.;
format x 8.;
format xsys $1.;
format y 8.;
format ysys $1.;
format position $1.;
format angle line rotate 8.;

retain hsys '2';
retain xsys ysys '2';
retain size;
%line(0,0,pc1,pc2,RED,1,.2);
drop c_num z pc1 pc2;
run;

data the_rest;
format function $8.;
format text $4.;
format color $8.;
format style $8.;
format size 8.;
format hsys $1.;
format x 8.;
format xsys $1.;
format y 8.;
format ysys $1.;
format position $1.;
format angle line rotate 8.;

retain hsys '2';
retain xsys ysys '2';
%line(0,-&top,0,&top,BLACK,1,.2);
%line(-&top,0,&top,0,BLACK,1,.2);
%circle(0,0,&top,BLACK);
run;

proc append base=the_rest data=e_anno;
run;
proc append base=the_rest data=c_anno;
run;
proc append base=the_rest data=c_lines;
run;
proc append base=the_rest data=axes;
run;

/*
proc print data=the_rest;
title 'after appends';
run;
proc contents data = the_rest;
run;
*/

goptions reset=global;
goptions device = win
gunit = pct
cback = white
htitle = 4
htext = 2
ftext = swiss
ctext = black
hsize = 6.5 in
vsize = 6.5 in;

axis1 order = (-&btop to &btop by &step)
length = 4 in
major = none
color = white
label = (justify = l 'PC1')
minor = none
offset = (0);
axis2 order = (-&btop to &btop by &step)
length = 4 in
major = none
color = white
minor = none
offset = (0);

symbol value = x h=2 co=black cv=black;

proc gplot data=elemnts;
title &gridtitl;
footnote "Circle radius &top, construct & element loadings plotted";
plot pc2*pc1 /
haxis=axis1
vaxis=axis2
anno=the_rest;
run;

%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%