/* * soundex -- read a list of words and compute their soundex codes * * Input is a list of words, one per line, output is a list of * soundex codes, also one per line. The algorithm is for American * English. It only handles ASCII. * */ #include #include #include /* * B, F, P, V -> '1' * C, G, J, K, Q, S, X, Z -> '2' * D, T -> '3' * L -> '4' * M, N -> '5' * R -> '6' * A, E, I, O, U, Y -> 0 * 'H' and 'W' -> -2 * \n -> '\n' * others -> -1 */ static int code[] = { -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,'\n',-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, /* @, A, B, C, D, E, F, G, H, I, J, K, L, M, N, O, */ -1, 0, '1','2','3', 0, '1','2',-2, 0, '2','2','4','5','5', 0, /* P, Q, R, S, T, U, V, W, X, Y, Z, [, \, ], ^, _, */ '1','2','6','2','3', 0, '1',-2, '2', 0, '2',-1, -1, -1, -1, -1, /* `, a, b, c, d, e, f, g, h, i, j, k, l, m, n, o, */ -1, 0, '1','2','3', 0, '1','2',-2, 0, '2','2','4','5','5', 0, /* p, q, r, s, t, u, v, w, x, y, z, {, |, }, ~, */ '1','2','6','2','3', 0, '1',-2, '2', 0, '2',-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1 }; int main(int argc, char *argv[]) { int c, vc, last, state = 1; while ((c = getchar()) != EOF) { vc = code[c]; if (state == 1) { /* At first letter of a line */ if (vc == '\n') { /* Empty line */ putchar('\n'); } else if (vc != -1) { /* A letter */ putchar(toupper(c)); last = vc; state = 2; } } else if (state < 5) { /* Expecting 2nd, 3rd or 4th code */ if (vc == '\n') { /* Short word */ putchar('0'); /* Pad with '0' */ if (state < 4) putchar('0'); if (state < 3) putchar('0'); putchar('\n'); state = 1; } else if (vc == -2) { /* 'H' or 'W' */ ; /* Completely ignore */ } else if (vc == 0 || vc == -1) { /* Vowel, or not a letter */ last = 0; /* Don't print, but remember */ } else if (vc != last) { /* Consonant, and not a duplicate */ assert(vc > 0 && vc != '\n'); putchar(vc); last = vc; state++; } } else { /* Ignore until end of line */ assert(state == 5); if (vc == '\n') { putchar('\n'); state = 1; } } } return 0; }