Skip to content

Commit

Permalink
Adding the statcan & lein phonetic algorithms
Browse files Browse the repository at this point in the history
  • Loading branch information
Yomguithereal committed Jul 3, 2016
1 parent c93e41d commit 21becb8
Show file tree
Hide file tree
Showing 6 changed files with 216 additions and 0 deletions.
2 changes: 2 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,8 @@
## 0.7.0

* Adding the `features/extraction/vectorizer` namespace.
* Adding the `phonetics/lein` namespace.
* Adding the `phonetics/statcan` namespace.

## 0.6.0

Expand Down
61 changes: 61 additions & 0 deletions src/phonetics/lein.js
Original file line number Diff line number Diff line change
@@ -0,0 +1,61 @@
/**
* Talisman phonetics/lein
* ========================
*
* The Lein name coding procedure.
*
* [Reference]:
* http://naldc.nal.usda.gov/download/27833/PDF
*/
import deburr from 'lodash/deburr';
import {squeeze, translation} from '../helpers';

/**
* Constants.
*/
const DROPPED = /[AEIOUYWH]/g;

const TRANSLATION = translation('DTMNLRBFPVCJKGQSXZ', '112233444455555555');

/**
* Helpers.
*/
function pad(code) {
return (code + '0000').slice(0, 4);
}

/**
* Function taking a single name and computing its lein code.
*
* @param {string} name - The name to process.
* @return {string} - The lein code.
*
* @throws {Error} The function expects the name to be a string.
*/
export default function lein(name) {
if (typeof name !== 'string')
throw Error('talisman/phonetics/lein: the given name is not a string.');

let code = deburr(name)
.toUpperCase()
.replace(/[^A-Z\s]/g, '');

// 1-- Keeping the first letter
const first = code[0];
code = code.slice(1);

// 2-- Dropping vowels and Y, W & H
code = code.replace(DROPPED, '');

// 3-- Dropping consecutive duplicates and truncating to 4 characters
code = squeeze(code).slice(0, 4);

// 4-- Translations
const backup = code;
code = '';

for (let i = 0, l = backup.length; i < l; i++)
code += TRANSLATION[backup[i]] || backup[i];

return pad(first + code);
}
50 changes: 50 additions & 0 deletions src/phonetics/statcan.js
Original file line number Diff line number Diff line change
@@ -0,0 +1,50 @@
/**
* Talisman phonetics/statcan
* ===========================
*
* The statistics Canada name coding technique.
*
* [Reference]:
* http://naldc.nal.usda.gov/download/27833/PDF
*/
import deburr from 'lodash/deburr';
import {squeeze} from '../helpers';

/**
* Constants.
*/
const DROPPED = /[AEIOUY]/g;

/**
* Function taking a single name and computing its statcan code.
*
* @param {string} name - The name to process.
* @return {string} - The statcan code.
*
* @throws {Error} The function expects the name to be a string.
*/
export default function statcan(name) {

if (typeof name !== 'string')
throw Error('talisman/phonetics/statcan: the given name is not a string.');

let code = deburr(name)
.toUpperCase()
.replace(/[^A-Z\s]/g, '');

// 1-- Keeping the first letter
const first = code[0];
code = code.slice(1);

// 2-- Dropping vowels and Y
code = code.replace(DROPPED, '');

// 3-- Dropping consecutive duplicates
code = squeeze(code);

// 4-- Dropping blanks
code = code.replace(/\s/g, '');

// 5-- Limiting code size to 4
return (first + code).slice(0, 4);
}
2 changes: 2 additions & 0 deletions test/endpoint.js
Original file line number Diff line number Diff line change
Expand Up @@ -46,10 +46,12 @@ describe('phonetics', function() {
require('./phonetics/caverphone.js');
require('./phonetics/daitch-mokotoff.js');
require('./phonetics/double-metaphone.js');
require('./phonetics/lein.js');
require('./phonetics/metaphone.js');
require('./phonetics/mra.js');
require('./phonetics/nysiis.js');
require('./phonetics/soundex.js');
require('./phonetics/statcan.js');

describe('french', function() {
require('./phonetics/french/phonetic.js');
Expand Down
65 changes: 65 additions & 0 deletions test/phonetics/lein.js
Original file line number Diff line number Diff line change
@@ -0,0 +1,65 @@
/**
* Talisman phonetics/lein tests
* ==============================
*
*/
import assert from 'assert';
import lein from '../../src/phonetics/lein';

describe('lein', function() {

it('should throw if the given word is not a string.', function() {
assert.throws(function() {
lein([]);
}, /string/);
});

it('should compute the lein code correctly.', function() {
const tests = [
['Guillaume', 'G320'],
['Dabbs', 'D450'],
['Daves', 'D450'],
['Davies', 'D450'],
['Davis', 'D450'],
['Debaca', 'D450'],
['Debose', 'D450'],
['Debus', 'D450'],
['Defazio', 'D450'],
['Defigh', 'D450'],
['Deveaux', 'D450'],
['Devese', 'D450'],
['Devies', 'D450'],
['Devos', 'D450'],
['Dipiazza', 'D450'],
['Divish', 'D450'],
['Dobak', 'D450'],
['Dobbs', 'D450'],
['Dobis', 'D450'],
['Dobish', 'D450'],
['Dobosh', 'D450'],
['Doepke', 'D450'],
['Dopps', 'D450'],
['Doubek', 'D450'],
['Doviak', 'D450'],
['Dubbs', 'D450'],
['Dubke', 'D450'],
['Dubois', 'D450'],
['Duboise', 'D450'],
['Dubose', 'D450'],
['Dubs', 'D450'],
['Dubukey', 'D450'],
['Dubus', 'D450'],
['Dufek', 'D450'],
['Duffek', 'D450'],
['Dupas', 'D450'],
['Dupois', 'D450'],
['Dupuis', 'D450'],
['Arlène', 'A332'],
['Lüdenscheidt', 'L125']
];

tests.forEach(function([word, code]) {
assert.strictEqual(lein(word), code, `${word} => ${code}`);
});
});
});
36 changes: 36 additions & 0 deletions test/phonetics/statcan.js
Original file line number Diff line number Diff line change
@@ -0,0 +1,36 @@
/**
* Talisman phonetics/statcan tests
* =================================
*
*/
import assert from 'assert';
import statcan from '../../src/phonetics/statcan';

describe('statcan', function() {

it('should throw if the given word is not a string.', function() {
assert.throws(function() {
statcan([]);
}, /string/);
});

it('should compute the statcan code correctly.', function() {
const tests = [
['Guillaume', 'GLM'],
['Daves', 'DVS'],
['Davies', 'DVS'],
['Davis', 'DVS'],
['Devese', 'DVS'],
['Devies', 'DVS'],
['Devos', 'DVS'],
['Dove', 'DV'],
['Divish', 'DVSH'],
['Arlène', 'ARLN'],
['Lüdenscheidt', 'LDNS']
];

tests.forEach(function([word, code]) {
assert.strictEqual(statcan(word), code, `${word} => ${code}`);
});
});
});

0 comments on commit 21becb8

Please sign in to comment.