name : Soundex.php
<?php  namespace Spondonit\Arabic\Arabic;
/**
 * ----------------------------------------------------------------------
 *
 * Copyright (c) 2006-2013 Khaled Al-Sham'aa.
 *
 * http://www.ar-php.org
 *
 * PHP Version 5
 *
 * ----------------------------------------------------------------------
 *
 * LICENSE
 *
 * This program is open source product; you can redistribute it and/or
 * modify it under the terms of the GNU Lesser General Public License (LGPL)
 * as published by the Free Software Foundation; either version 3
 * of the License, or (at your option) any later version.
 *
 * This program is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 * GNU Lesser General Public License for more details.
 *
 * You should have received a copy of the GNU Lesser General Public License
 * along with this program.  If not, see <http://www.gnu.org/licenses/lgpl.txt>.
 *
 * ----------------------------------------------------------------------
 *
 * Class Name: Arabic Soundex
 *
 * Filename:   Soundex.php
 *
 * Original    Author(s): Khaled Al-Sham'aa <khaled@ar-php.org>
 *
 * Purpose:    Arabic soundex algorithm takes Arabic word as an input
 *             and produces a character string which identifies a set words
 *             that are (roughly) phonetically alike.
 *              
 * ----------------------------------------------------------------------
 *  
 * Arabic Soundex
 *
 * PHP class for Arabic soundex algorithm takes Arabic word as an input and
 * produces a character string which identifies a set words of those are
 * (roughly) phonetically alike.
 * 
 * Terms that are often misspelled can be a problem for database designers. Names, 
 * for example, are variable length, can have strange spellings, and they are not 
 * unique. Words can be misspelled or have multiple spellings, especially across 
 * different cultures or national sources.
 * 
 * To solve this problem, we need phonetic algorithms which can find similar 
 * sounding terms and names. Just such a family of algorithms exists and is called 
 * SoundExes, after the first patented version.
 * 
 * A Soundex search algorithm takes a word, such as a person's name, as input and 
 * produces a character string which identifies a set of words that are (roughly) 
 * phonetically alike. It is very handy for searching large databases when the user 
 * has incomplete data.
 * 
 * The original Soundex algorithm was patented by Margaret O'Dell and Robert 
 * C. Russell in 1918. The method is based on the six phonetic classifications of 
 * human speech sounds (bilabial, labiodental, dental, alveolar, velar, and 
 * glottal), which in turn are based on where you put your lips and tongue to make 
 * the sounds.
 * 
 * Soundex function that is available in PHP, but it has been limited to English and 
 * other Latin-based languages. This function described in PHP manual as the 
 * following: Soundex keys have the property that words pronounced similarly produce 
 * the same soundex key, and can thus be used to simplify searches in databases 
 * where you know the pronunciation but not the spelling. This soundex function 
 * returns string of 4 characters long, starting with a letter.
 * 
 * We develop this class as an Arabic counterpart to English Soundex, it handle an 
 * Arabic input string formatted in UTF-8 character set to return Soundex key 
 * equivalent to normal soundex function in PHP even for English and other 
 * Latin-based languages because the original algorithm focus on phonetically 
 * characters alike not the meaning of the word itself.
 * 
 * Example:
 * <code>
 *   include('./I18N/Arabic.php');
 *   $obj = new I18N_Arabic('Soundex');
 *     
 *   $soundex = $obj->soundex($name);
 * </code>   
 *    
 * @category  I18N 
 * @package   I18N_Arabic
 * @author    Khaled Al-Sham'aa <khaled@ar-php.org>
 * @copyright 2006-2013 Khaled Al-Sham'aa
 *    
 * @license   LGPL <http://www.gnu.org/licenses/lgpl.txt>
 * @link      http://www.ar-php.org 
 */

// New in PHP V5.3: Namespaces
// namespace I18N\Arabic;
// 
// $obj = new I18N\Arabic\Soundex();
// 
// use I18N\Arabic;
// $obj = new Arabic\Soundex();
//
// use I18N\Arabic\Soundex as Soundex;
// $obj = new Soundex();

/**
 * This PHP class implement Arabic soundex algorithm
 *  
 * @category  I18N 
 * @package   I18N_Arabic
 * @author    Khaled Al-Sham'aa <khaled@ar-php.org>
 * @copyright 2006-2013 Khaled Al-Sham'aa
 *    
 * @license   LGPL <http://www.gnu.org/licenses/lgpl.txt>
 * @link      http://www.ar-php.org 
 */ 
class I18N_Arabic_Soundex
{
    private $_asoundexCode    = array();
    private $_aphonixCode     = array();
    private $_transliteration = array();
    private $_map             = array();
    
    private $_len  = 4;
    private $_lang = 'en';
    private $_code = 'soundex';

    /**
     * Loads initialize values
     *
     * @ignore
     */         
    public function __construct()
    {
        $xml = simplexml_load_file(dirname(__FILE__).'/data/ArSoundex.xml');
        
        foreach ($xml->asoundexCode->item as $item) {
            $index = $item['id'];
            $value = (string) $item;
            $this->_asoundexCode["$value"] = $index;
        } 

        foreach ($xml->aphonixCode->item as $item) {
            $index = $item['id'];
            $value = (string) $item;
            $this->_aphonixCode["$value"] = $index;
        } 
        
        foreach ($xml->transliteration->item as $item) {
            $index = $item['id'];
            $this->_transliteration["$index"] = (string)$item;
        } 

        $this->_map = $this->_asoundexCode;
    }
    
    /**
     * Set the length of soundex key (default value is 4)
     *      
     * @param integer $integer Soundex key length
     *      
     * @return object $this to build a fluent interface
     * @author Khaled Al-Sham'aa <khaled@ar-php.org>
     */
    public function setLen($integer)
    {
        $this->_len = (int)$integer;
        
        return $this;
    }
    
    /**
     * Set the language of the soundex key (default value is "en")
     *      
     * @param string $str Soundex key language [ar|en]
     *      
     * @return object $this to build a fluent interface
     * @author Khaled Al-Sham'aa <khaled@ar-php.org>
     */
    public function setLang($str)
    {
        $str = strtolower($str);
        
        if ($str == 'ar' || $str == 'en') {
            $this->_lang = $str;
        }
        
        return $this;
    }
    
    /**
     * Set the mapping code of the soundex key (default value is "soundex")
     *      
     * @param string $str Soundex key mapping code [soundex|phonix]
     *      
     * @return object $this to build a fluent interface
     * @author Khaled Al-Sham'aa <khaled@ar-php.org>
     */
    public function setCode($str)
    {
        $str = strtolower($str);
        
        if ($str == 'soundex' || $str == 'phonix') {
            $this->_code = $str;
            if ($str == 'phonix') {
                $this->_map = $this->_aphonixCode;
            } else {
                $this->_map = $this->_asoundexCode;
            }
        }
        
        return $this;
    }
    
    /**
     * Get the soundex key length used now
     *      
     * @return integer return current setting for soundex key length
     * @author Khaled Al-Sham'aa <khaled@ar-php.org>
     */
    public function getLen()
    {
        return $this->_len;
    }
    
    /**
     * Get the soundex key language used now
     *      
     * @return string return current setting for soundex key language
     * @author Khaled Al-Sham'aa <khaled@ar-php.org>
     */
    public function getLang()
    {
        return $this->_lang;
    }
    
    /**
     * Get the soundex key calculation method used now
     *      
     * @return string return current setting for soundex key calculation method
     * @author Khaled Al-Sham'aa <khaled@ar-php.org>
     */
    public function getCode()
    {
        return $this->_code;
    }
    
    /**
     * Methode to get soundex/phonix numric code for given word
     *      
     * @param string $word The word that we want to encode it
     *      
     * @return string The calculated soundex/phonix numeric code
     * @author Khaled Al-Sham'aa <khaled@ar-php.org>
     */
    protected function mapCode($word)
    {
        $encodedWord = '';
        
        $max   = mb_strlen($word, 'UTF-8');
        
        for ($i=0; $i < $max; $i++) {
            $char = mb_substr($word, $i, 1, 'UTF-8');
            if (isset($this->_map["$char"])) {
                $encodedWord .= $this->_map["$char"];
            } else {
                $encodedWord .= '0';
            }
        }
        
        return $encodedWord;
    }
    
    /**
     * Remove any characters replicates
     *      
     * @param string $word Arabic word you want to check if it is feminine
     *      
     * @return string Same word without any duplicate chracters
     * @author Khaled Al-Sham'aa <khaled@ar-php.org>
     */
    protected function trimRep($word)
    {
        $lastChar  = null;
        $cleanWord = null;
        $max       = mb_strlen($word, 'UTF-8');
        
        for ($i = 0; $i < $max; $i++) {
            $char = mb_substr($word, $i, 1, 'UTF-8');
            if ($char != $lastChar) {
                $cleanWord .= $char;
            }
            $lastChar = $char;
        }
        
        return $cleanWord;
    }
    
    /**
     * Arabic soundex algorithm takes Arabic word as an input and produces a 
     * character string which identifies a set words that are (roughly) 
     * phonetically alike.
     *      
     * @param string $word Arabic word you want to calculate its soundex
     *                    
     * @return string Soundex value for a given Arabic word
     * @author Khaled Al-Sham'aa <khaled@ar-php.org>
     */
    public function soundex($word)
    {
        $soundex = mb_substr($word, 0, 1, 'UTF-8');
        $rest    = mb_substr($word, 1, mb_strlen($word, 'UTF-8'), 'UTF-8');
        
        if ($this->_lang == 'en') {
            $soundex = $this->_transliteration[$soundex];
        }
        
        $encodedRest      = $this->mapCode($rest);
        $cleanEncodedRest = $this->trimRep($encodedRest);
        
        $soundex .= $cleanEncodedRest;
        
        $soundex = str_replace('0', '', $soundex);
        
        $totalLen = mb_strlen($soundex, 'UTF-8');
        if ($totalLen > $this->_len) {
            $soundex = mb_substr($soundex, 0, $this->_len, 'UTF-8');
        } else {
            $soundex .= str_repeat('0', $this->_len - $totalLen);
        }
        
        return $soundex;
    }
}

© 2025 UnknownSec
afwwrfwafr45458465
Password