PROGRAM demo ! Program to demonstrate the use of module LSQ for unconstrained ! linear least-squares regression. ! The data on fuel consumption are from: ! Sanford Weisberg `Applied Linear Regression', 2nd edition, 1985, ! pages 35-36. Publisher: Wiley ! The program is designed so that users can easily edit it for their ! problems. If you are likely to have more than 500 cases then increase ! the value of maxcases below. Similarly, if you may have more than ! 30 predictor variables, change maxvar below. ! The subscript 0 is used to relate to the constant in the model. ! Module LSQ treats the constant in the model, if there is one, just as ! any other variable. The numbering of all arrays starts from 1 in LSQ. USE lsq ! Must be the first declaration in the program IMPLICIT NONE INTEGER, PARAMETER :: maxcases = 500, maxvar = 30, & max_cdim = maxvar*(maxvar+1)/2, lout = 6 INTEGER :: case, nvar, iostatus, nreq, ifault, i, list(maxvar), j, & in, i1, i2 REAL (dp) :: xx(0:maxvar), yy, wt, one = 1.D0, beta(0:maxvar), & var, covmat(max_cdim), sterr(0:maxvar), hii, & cormat(max_cdim), ycorr(maxvar) REAL (KIND(0.E0)) :: x(maxcases, maxvar), y(maxcases), t(0:maxvar), tmin, & r2, resid(maxcases), std_resid(maxcases), std_err_pred, & fitted, stdev_res CHARACTER (LEN=80) :: heading, output CHARACTER (LEN= 2) :: state(50) CHARACTER (LEN= 8) :: vname(0:maxvar), y_name LOGICAL :: fit_const, lindep(0:maxvar) ! Interfaces INTERFACE SUBROUTINE separate_text(heading, vname, nvar) IMPLICIT NONE CHARACTER (LEN = *), INTENT(IN OUT) :: heading CHARACTER (LEN = *), DIMENSION(:), INTENT(OUT) :: vname INTEGER, INTENT(OUT) :: nvar END SUBROUTINE separate_text END INTERFACE INTERFACE SUBROUTINE printc(in, cormat, dimc, ycorr, vname, yname, iopt, lout, ier) USE lsq IMPLICIT NONE INTEGER, INTENT(IN) :: in, dimc, iopt, lout INTEGER, INTENT(OUT) :: ier REAL (dp), DIMENSION(:), INTENT(IN) :: cormat, ycorr CHARACTER (LEN=*), DIMENSION(0:), INTENT(IN) :: vname CHARACTER (LEN=*), INTENT(IN) :: yname END SUBROUTINE printc END INTERFACE WRITE(*, *)'The data on fuel consumption are from:' WRITE(*, *)'Sanford Weisberg "Applied Linear Regression", 2nd edition, 1985,' WRITE(*, *)'pages 35-36. Publisher: Wiley ISBN: 0-471-87957-6' WRITE(*, *) OPEN(8, file='fuelcons.dat', status='old') ! The first line of this file contains the names of the variables. READ(8, '(a)') heading vname(0) = 'Constant' ! Read the heading to get the variable names & the number of variables CALL separate_text(heading, vname, nvar) nvar = nvar - 2 ! nvar is the number of variables. ! 1 is subtracted as the first field in this ! file contains the abbreviated state name, ! & 1 is subtracted for the Y-variable. vname(1:nvar) = vname(2:nvar+1) y_name = vname(nvar+2) WRITE(*, *)'No. of variables =', nvar WRITE(*, *)'Predictor variables are:' WRITE(*, '(" ", 9a9)') vname(1:nvar) WRITE(*, *)'Dependent variable is: ', y_name fit_const = .true. ! Change to .false. if fitting a model without ! a constant. CALL startup(nvar, fit_const) ! Initializes the QR-factorization ! Read in the data, one line at a time, and progressively update the ! QR-factorization. wt = one case = 1 DO READ(8, *, IOSTAT=iostatus) state(case), x(case, 1:nvar), y(case) IF (iostatus > 0) CYCLE ! Error in data IF (iostatus < 0) EXIT ! End of file xx(0) = one ! A one is inserted as the first ! variable if a constant is being fitted. xx(1:nvar) = x(case, 1:nvar) ! New variables and transformed variables ! will often be generated here. yy = y(case) CALL includ(wt, xx, yy) case = case + 1 END DO WRITE(*, *)'No. of observations =', nobs CALL sing(lindep, ifault) ! Checks for singularities IF (ifault == 0) THEN WRITE(*, *)'QR-factorization is not singular' ELSE DO i = 1, nvar IF (lindep(i)) THEN WRITE(*, *) vname(i), ' is exactly linearly related to earlier variables' END IF END DO ! i = 1, nvar END IF ! (ifault == 0) WRITE(*, *) ! Show correlations (IN = 1 for `usual' correlations) in = 1 CALL partial_corr(in, cormat, max_cdim, ycorr, ifault) CALL printc(in, cormat, max_cdim, ycorr, vname, y_name, 1, lout, ifault) WRITE(*, *) WRITE(*, *)'Press ENTER to continue' READ(*, *) ! Weisberg only uses variables TAX, INC, ROAD and DLIC. These are ! currently in positions 2, 4, 5 & 7. ! We could have set NVAR = 4 and copied only these variables from X to ! XX, but we will show how regressions can be performed for a subset ! of the variables in the QR-factorization. Routine REORDR will be used ! to re-order the variables. list(1:4) = (/ 2, 4, 5, 7/) CALL reordr(list, 4, 2, ifault) ! Re-order so that the first 4 variables ! appear in positions 2,3,4 & 5. ! N.B. Though variables # 2, 4, 5 & 7 ! will occupy positions 2-5, they ! may be in any order. ! The constant remains in position 1. WRITE(*, 910) (vname(vorder(1:ncol))) 910 FORMAT(' Current order of variables:'/' ', 8a9/) ! Calculate regression coefficients of Y against the first variable, which ! was just a constant = 1.0, and the next 4 predictors. CALL tolset() ! Calculate tolerances before calling ! subroutine regcf. nreq = 5 ! i.e. Const, TAX, INC, ROAD, DLIC CALL regcf(beta, nreq, ifault) CALL ss() ! Calculate residual sums of squares ! Calculate covariance matrix of the regression coefficients & their ! standard errors. var = rss(nreq) / (nobs - nreq) CALL cov(nreq, var, covmat, max_cdim, sterr, ifault) ! Calculate t-values t(0:nreq-1) = beta(0:nreq-1) / sterr(0:nreq-1) ! Output regression table, residual sums of squares, and R-squared. WRITE(*, *) WRITE(*, *)'Variable Regn.coeff. Std.error t-value Res.sum of sq.' DO i = 0, nreq-1 WRITE(*, 900) vname(vorder(i+1)), beta(i), sterr(i), t(i), rss(i+1) 900 FORMAT(' ', a8, ' ', g12.4, ' ', g11.4, ' ', f7.2, ' ', g14.6) END DO WRITE(*, *) ! Output correlations of the parameter estimates WRITE(*, *) 'Covariances of parameter estimates' i2 = nreq DO i = 1, nreq-1 i1 = i2 + 1 i2 = i2 + nreq - i WRITE(output, '(" ", a8)') vname(vorder(i+1)) WRITE(output(10*i:), '(7f10.3)') covmat(i1:i2) WRITE(*, '(a)') output END DO WRITE(*, *) ! Now delete the variable with the smallest t-value by moving it ! to position 5 and then repeating the calculations for the constant ! and the next 3 variables. j = 1 tmin = ABS(t(1)) DO i = 2, nreq-1 IF (ABS(t(i)) < tmin) THEN j = i tmin = ABS(t(i)) END IF END DO j = j + 1 ! Add 1 as the t-array started at subscript 0 WRITE(*, *)'Removing variable in position', j CALL vmove(j, nreq, ifault) nreq = nreq - 1 CALL regcf(beta, nreq, ifault) CALL ss() CALL cov(nreq, var, covmat, max_cdim, sterr, ifault) t(0:nreq-1) = beta(0:nreq-1) / sterr(0:nreq-1) ! Output regression table, residual sums of squares, and R-squared. WRITE(*, *) WRITE(*, *)'Variable Regn.coeff. Std.error t-value Res.sum of sq.' DO i = 0, nreq-1 WRITE(*, 900) vname(vorder(i+1)), beta(i), sterr(i), t(i), rss(i+1) END DO WRITE(*, *) var = rss(nreq)/(nobs - nreq) stdev_res = SQRT( var ) r2 = one - rss(nreq) / rss(1) ! RSS(1) is the rss if only the constant ! is fitted; RSS(nreq) is the rss after ! fitting the requested NREQ variables. WRITE(*, '(" R^2 =", f8.4, " Std. devn. of residuals =", g12.4/)') & r2, stdev_res WRITE(*, *)'N.B. Some statistical packages wrongly call the standard deviation' WRITE(*, *)' of the residuals the standard error of prediction' WRITE(*, *) ! Calculate residuals, hii, standardized residuals & standard errors of ! prediction. WRITE(*, *)'Press ENTER to continue' READ(*, *) WRITE(*, *)'State Actual Fitted Residual Std.resid. SE(prediction)' DO i = 1, nobs xx(0) = one DO j = 1, nreq-1 xx(j) = x(i, vorder(j+1)) ! N.B. Regression coefficient j is for ! the variable vorder(j+1) END DO fitted = DOT_PRODUCT( beta(0:nreq-1), xx(0:nreq-1) ) resid(i) = y(i) - fitted CALL hdiag(xx, nreq, hii, ifault) std_resid(i) = resid(i) / SQRT(var*(one - hii)) ! The sqrt was omitted from the initial version of this demo in line below. std_err_pred = sqrt( varprd(xx, nreq) ) WRITE(*, 920) state(i), y(i), fitted, resid(i), std_resid(i), std_err_pred 920 FORMAT(' ', a2, ' ', 3f10.1, f9.2, ' ', f10.0) IF (i .EQ. 24) THEN WRITE(*, *)'Press ENTER to continue' READ(*, *) WRITE(*, *)'State Actual Fitted Residual Std.resid. SE(prediction)' END IF END DO ! i = 1, nobs STOP END PROGRAM demo SUBROUTINE separate_text(text, name, number) ! Takes the character string in `text' and separates it into `names'. ! This version only allows spaces as delimiters. IMPLICIT NONE CHARACTER (LEN = *), INTENT(IN OUT) :: text CHARACTER (LEN = *), DIMENSION(:), INTENT(OUT) :: name INTEGER, INTENT(OUT) :: number ! Local variables INTEGER :: length, len_name, pos1, pos2, nchars length = LEN_TRIM(text) number = 0 IF (length == 0) RETURN len_name = LEN(name(1)) IF (len_name < 1) RETURN pos1 = 1 DO DO ! Remove any leading blanks IF (text(pos1:pos1) == ' ') THEN text(pos1:) = text(pos1+1:) length = length - 1 CYCLE ELSE EXIT END IF END DO pos2 = INDEX(text(pos1:length), ' ') ! Find position of next blank IF (pos2 > 0) THEN pos2 = pos2 + pos1 - 1 ELSE ! Last name, no blank found pos2 = length + 1 END IF number = number + 1 nchars = MIN(len_name, pos2-pos1) name(number) = text(pos1:pos1+nchars-1) pos1 = pos2 + 1 ! Move past the blank IF (pos1 > length) RETURN END DO RETURN END SUBROUTINE separate_text SUBROUTINE printc(in, cormat, dimc, ycorr, vname, yname, iopt, lout, ier) ! Print (partial) correlations calculated using partial_corr to unit lout. ! If IOPT = 0, print correlations with the Y-variable only. USE lsq IMPLICIT NONE INTEGER, INTENT(IN) :: in, dimc, iopt, lout INTEGER, INTENT(OUT) :: ier REAL (dp), DIMENSION(:), INTENT(IN) :: cormat, ycorr CHARACTER (LEN=*), DIMENSION(0:), INTENT(IN) :: vname CHARACTER (LEN=*), INTENT(IN) :: yname ! Local variables. INTEGER :: nrows, j1, j2, i1, i2, row, upos, tpos, last CHARACTER (LEN=74) :: text CHARACTER (LEN= 9) :: char1 = ' 1.0 ' ! Check validity of arguments ier = 0 IF (in .GE. ncol) ier = 1 IF (ncol .LE. 1) ier = ier + 2 nrows = ncol - in IF (dimc .LE. nrows*(nrows-1)/2) ier = ier + 4 IF (ier .NE. 0) RETURN ! If iopt.NE.0 output heading IF (iopt .EQ. 0) GO TO 30 WRITE(lout, 900) 900 FORMAT(/' ', 'Correlation matrix') j1 = in + 1 DO j2 = MIN(j1+6, ncol) i1 = j1 - in i2 = j2 - in WRITE(lout, 910) vname(vorder(j1:j2)) 910 FORMAT(' ', 7(a8, ' ')) ! Print correlations for rows 1 to i2, columns i1 to i2. DO row = 1, i2 text = ' ' // vname(vorder(row+in)) IF (i1 .GT. row) THEN upos = (row-1) * (nrows+nrows-row) /2 + (i1-row) last = upos + i2 - i1 WRITE(text(12:74), '(7(F8.5, " "))') cormat(upos:last) ELSE upos = (row-1) * (nrows+nrows-row) /2 + 1 tpos = 12 + 9*(row-i1) text(tpos:tpos+8) = char1 last = upos + i2 - row - 1 IF (row .LT. i2) WRITE(text(tpos+9:74), '(6(F8.5, " "))') cormat(upos:last) END IF WRITE(lout, '(a)') text END DO ! row = 1, i2 ! Move onto the next block of columns. j1 = j2 + 1 IF (j1 .GT. ncol) EXIT END DO ! Correlations with the Y-variable. 30 WRITE(lout, 920) yname 920 FORMAT(/' Correlations with the dependent variable: ', a) j1 = in + 1 DO j2 = MIN(j1+7, ncol) WRITE(lout, 930) vname(vorder(j1:j2)) 930 FORMAT(/' ', 8(A8, ' ')) WRITE(lout, 940) ycorr(j1:j2) 940 FORMAT(' ', 8(F8.5, ' ')) j1 = j2 + 1 IF (j1 .GT. ncol) EXIT END DO RETURN END SUBROUTINE printc