MODULE kmeans IMPLICIT NONE ! Define BIG to be a very large positive number REAL, PARAMETER, PRIVATE :: zero = 0.0, one = 1.0, big = HUGE(zero) PUBLIC :: kmns PRIVATE :: optra, qtran CONTAINS SUBROUTINE kmns(a, m, n, c, k, ic1, ic2, nc, an1, an2, ncp, d, & itran, live, iter, wss, ifault) ! Code converted using TO_F90 by Alan Miller ! Date: 2000-11-01 Time: 10:00:51 ! ALGORITHM AS 136 APPL. STATIST. (1979) VOL.28, NO.1 ! Divide M points in N-dimensional space into K clusters so that ! the within cluster sum of squares is minimized. REAL, INTENT(IN) :: a(:,:) ! a(m,n) INTEGER, INTENT(IN) :: m INTEGER, INTENT(IN) :: n REAL, INTENT(IN OUT) :: c(:,:) ! c(k,n) INTEGER, INTENT(IN) :: k INTEGER, INTENT(OUT) :: ic1(:) INTEGER, INTENT(OUT) :: ic2(:) INTEGER, INTENT(OUT) :: nc(:) REAL, INTENT(OUT) :: an1(:) REAL, INTENT(OUT) :: an2(:) INTEGER, INTENT(OUT) :: ncp(:) REAL, INTENT(IN OUT) :: d(:) INTEGER, INTENT(OUT) :: itran(:) INTEGER, INTENT(IN OUT) :: live(:) INTEGER, INTENT(IN) :: iter REAL, INTENT(OUT) :: wss(:) INTEGER, INTENT(OUT) :: ifault ! Local variables INTEGER :: i, ii, ij, il, indx, j, l REAL :: aa, da, db, dc, dt(2), temp ifault = 3 IF (k <= 1 .OR. k >= m) RETURN ! For each point I, find its two closest centres, IC1(I) and ! IC2(I). Assign it to IC1(I). DO i = 1, m ic1(i) = 1 ic2(i) = 2 DO il = 1, 2 dt(il) = zero DO j = 1, n da = a(i,j) - c(il,j) dt(il) = dt(il) + da*da END DO END DO IF (dt(1) > dt(2)) THEN ic1(i) = 2 ic2(i) = 1 temp = dt(1) dt(1) = dt(2) dt(2) = temp END IF loop50: DO l = 3, k db = zero DO j = 1, n dc = a(i,j) - c(l,j) db = db + dc*dc IF (db >= dt(2)) CYCLE loop50 END DO IF (db >= dt(1)) THEN dt(2) = db ic2(i) = l ELSE dt(2) = dt(1) ic2(i) = ic1(i) dt(1) = db ic1(i) = l END IF END DO loop50 END DO ! Update cluster centres to be the average of points contained ! within them. DO l = 1, k nc(l) = 0 c(l,1:n) = zero END DO DO i = 1, m l = ic1(i) nc(l) = nc(l) + 1 c(l,1:n) = c(l,1:n) + a(i,1:n) END DO ! Check to see if there is any empty cluster at this stage DO l = 1, k IF (nc(l) == 0) THEN ifault = 1 RETURN END IF aa = nc(l) c(l,1:n) = c(l,1:n) / aa ! Initialize AN1, AN2, ITRAN & NCP ! AN1(L) = NC(L) / (NC(L) - 1) ! AN2(L) = NC(L) / (NC(L) + 1) ! ITRAN(L) = 1 if cluster L is updated in the quick-transfer stage, ! = 0 otherwise ! In the optimal-transfer stage, NCP(L) stores the step at which ! cluster L is last updated. ! In the quick-transfer stage, NCP(L) stores the step at which ! cluster L is last updated plus M. an2(l) = aa / (aa + one) an1(l) = big IF (aa > one) an1(l) = aa / (aa - one) itran(l) = 1 ncp(l) = -1 END DO indx = 0 DO ij = 1, iter ! In this stage, there is only one pass through the data. Each ! point is re-allocated, if necessary, to the cluster that will ! induce the maximum reduction in within-cluster sum of squares. CALL optra(a, m, n, c, k, ic1, ic2, nc, an1, an2, ncp, d, itran, live, indx) ! Stop if no transfer took place in the last M optimal transfer steps. IF (indx == m) GO TO 150 ! Each point is tested in turn to see if it should be re-allocated ! to the cluster to which it is most likely to be transferred, ! IC2(I), from its present cluster, IC1(I). Loop through the ! data until no further change is to take place. CALL qtran(a, m, n, c, ic1, ic2, nc, an1, an2, ncp, d, itran, indx) ! If there are only two clusters, there is no need to re-enter the ! optimal transfer stage. IF (k == 2) GO TO 150 ! NCP has to be set to 0 before entering OPTRA. ncp(1:k) = 0 END DO ! Since the specified number of iterations has been exceeded, set ! IFAULT = 2. This may indicate unforeseen looping. ifault = 2 ! Compute within-cluster sum of squares for each cluster. 150 DO l = 1, k wss(l) = zero c(l,1:n) = zero END DO DO i = 1, m ii = ic1(i) c(ii,1:n) = c(ii,1:n) + a(i,1:n) END DO DO j = 1, n c(1:k,j) = c(1:k,j) / REAL(nc(l)) DO i = 1, m ii = ic1(i) da = a(i,j) - c(ii,j) wss(ii) = wss(ii) + da*da END DO END DO RETURN END SUBROUTINE kmns SUBROUTINE optra(a, m, n, c, k, ic1, ic2, nc, an1, an2, ncp, d, & itran, live, indx) ! ALGORITHM AS 136.1 APPL. STATIST. (1979) VOL.28, NO.1 ! This is the optimal transfer stage. ! Each point is re-allocated, if necessary, to the cluster that will ! induce a maximum reduction in the within-cluster sum of squares. REAL, INTENT(IN) :: a(:,:) ! a(m,n) INTEGER, INTENT(IN) :: m INTEGER, INTENT(IN) :: n REAL, INTENT(IN OUT) :: c(:,:) ! c(k,n) INTEGER, INTENT(IN) :: k INTEGER, INTENT(IN OUT) :: ic1(:) INTEGER, INTENT(IN OUT) :: ic2(:) INTEGER, INTENT(IN OUT) :: nc(:) REAL, INTENT(IN OUT) :: an1(:) REAL, INTENT(IN OUT) :: an2(:) INTEGER, INTENT(IN OUT) :: ncp(:) REAL, INTENT(OUT) :: d(:) INTEGER, INTENT(IN OUT) :: itran(:) INTEGER, INTENT(OUT) :: live(:) INTEGER, INTENT(OUT) :: indx ! Local variables INTEGER :: i, j, l, l1, l2, ll REAL :: al1, al2, alt, alw, da, db, dc, dd, de, df, rr, r2 ! If cluster L is updated in the last quick-transfer stage, it ! belongs to the live set throughout this stage. Otherwise, at ! each step, it is not in the live set if it has not been updated ! in the last M optimal transfer steps. DO l = 1, k IF (itran(l) == 1) live(l) = m + 1 END DO DO i = 1, m indx = indx + 1 l1 = ic1(i) l2 = ic2(i) ll = l2 ! If point I is the only member of cluster L1, no transfer. IF (nc(l1) == 1) THEN IF (indx == m) RETURN CYCLE END IF ! If L1 has not yet been updated in this stage, no need to re-compute D(I). IF (ncp(l1) == 0) GO TO 30 de = zero DO j = 1, n df = a(i,j) - c(l1,j) de = de + df*df END DO d(i) = de * an1(l1) ! Find the cluster with minimum R2. 30 da = zero DO j = 1, n db = a(i,j) - c(l2,j) da = da + db*db END DO r2 = da * an2(l2) loop60: DO l = 1, k ! If I >= LIVE(L1), then L1 is not in the live set. If this is true, we ! only need to consider clusters that are in the live set for possible ! transfer of point I. Otherwise, we need to consider all possible clusters. IF (i >= live(l1) .AND. i >= live(l) .OR. l == l1 .OR. & l == ll) CYCLE loop60 rr = r2 / an2(l) dc = zero DO j = 1, n dd = a(i,j) - c(l,j) dc = dc + dd*dd IF (dc >= rr) CYCLE loop60 END DO r2 = dc * an2(l) l2 = l END DO loop60 IF (r2 >= d(i)) THEN ! If no transfer is necessary, L2 is the new IC2(I). ic2(i) = l2 IF (indx == m) RETURN ! Update cluster centres, LIVE, NCP, AN1 & AN2 for clusters L1 and ! L2, and update IC1(I) & IC2(I). ELSE indx = 0 live(l1) = m + i live(l2) = m + i ncp(l1) = i ncp(l2) = i al1 = nc(l1) alw = al1 - one al2 = nc(l2) alt = al2 + one DO j = 1, n c(l1,j) = (c(l1,j) * al1 - a(i,j)) / alw c(l2,j) = (c(l2,j) * al2 + a(i,j)) / alt END DO nc(l1) = nc(l1) - 1 nc(l2) = nc(l2) + 1 an2(l1) = alw / al1 an1(l1) = big IF (alw > one) an1(l1) = alw / (alw - one) an1(l2) = alt / al2 an2(l2) = alt / (alt + one) ic1(i) = l2 ic2(i) = l1 END IF END DO DO l = 1, k ! ITRAN(L) = 0 before entering QTRAN. Also, LIVE(L) has to be ! decreased by M before re-entering OPTRA. itran(l) = 0 live(l) = live(l) - m END DO RETURN END SUBROUTINE optra SUBROUTINE qtran(a, m, n, c, ic1, ic2, nc, an1, an2, ncp, d, itran, indx) ! N.B. Argument K has been removed. ! ALGORITHM AS 136.2 APPL. STATIST. (1979) VOL.28, NO.1 ! This is the quick transfer stage. ! IC1(I) is the cluster which point I belongs to. ! IC2(I) is the cluster which point I is most likely to be ! transferred to. ! For each point I, IC1(I) & IC2(I) are switched, if necessary, to ! reduce within-cluster sum of squares. The cluster centres are ! updated after each step. REAL, INTENT(IN) :: a(:,:) ! a(m,n) INTEGER, INTENT(IN) :: m INTEGER, INTENT(IN) :: n REAL, INTENT(IN OUT) :: c(:,:) ! c(k,n) INTEGER, INTENT(IN OUT) :: ic1(:) INTEGER, INTENT(IN OUT) :: ic2(:) INTEGER, INTENT(IN OUT) :: nc(:) REAL, INTENT(IN OUT) :: an1(:) REAL, INTENT(IN OUT) :: an2(:) INTEGER, INTENT(IN OUT) :: ncp(:) REAL, INTENT(OUT) :: d(:) INTEGER, INTENT(OUT) :: itran(:) INTEGER, INTENT(OUT) :: indx ! Local variables INTEGER :: i, icoun, istep, j, l1, l2 REAL :: al1, al2, alt, alw, da, db, dd, de, r2 ! In the optimal transfer stage, NCP(L) indicates the step at which ! cluster L is last updated. In the quick transfer stage, NCP(L) ! is equal to the step at which cluster L is last updated plus M. icoun = 0 istep = 0 10 DO i = 1, m icoun = icoun + 1 istep = istep + 1 l1 = ic1(i) l2 = ic2(i) ! If point I is the only member of cluster L1, no transfer. IF (nc(l1) == 1) GO TO 60 ! If ISTEP > NCP(L1), no need to re-compute distance from point I to ! cluster L1. Note that if cluster L1 is last updated exactly M steps ! ago, we still need to compute the distance from point I to cluster L1. IF (istep > ncp(l1)) GO TO 30 da = zero DO j = 1, n db = a(i,j) - c(l1,j) da = da + db*db END DO d(i) = da * an1(l1) ! If ISTEP >= both NCP(L1) & NCP(L2) there will be no transfer of ! point I at this step. 30 IF (istep >= ncp(l1) .AND. istep >= ncp(l2)) GO TO 60 r2 = d(i) / an2(l2) dd = zero DO j = 1, n de = a(i,j) - c(l2,j) dd = dd + de*de IF (dd >= r2) GO TO 60 END DO ! Update cluster centres, NCP, NC, ITRAN, AN1 & AN2 for clusters ! L1 & L2. Also update IC1(I) & IC2(I). Note that if any ! updating occurs in this stage, INDX is set back to 0. icoun = 0 indx = 0 itran(l1) = 1 itran(l2) = 1 ncp(l1) = istep + m ncp(l2) = istep + m al1 = nc(l1) alw = al1 - one al2 = nc(l2) alt = al2 + one DO j = 1, n c(l1,j) = (c(l1,j) * al1 - a(i,j)) / alw c(l2,j) = (c(l2,j) * al2 + a(i,j)) / alt END DO nc(l1) = nc(l1) - 1 nc(l2) = nc(l2) + 1 an2(l1) = alw / al1 an1(l1) = big IF (alw > one) an1(l1) = alw / (alw - one) an1(l2) = alt / al2 an2(l2) = alt / (alt + one) ic1(i) = l2 ic2(i) = l1 ! If no re-allocation took place in the last M steps, return. 60 IF (icoun == m) RETURN END DO GO TO 10 END SUBROUTINE qtran END MODULE kmeans