Automatically assigned DDC number: 006454

Manually assigned DDC number: 00635

Title: Statistical Identification of Language

Author:

Subject: Ted Dunning Statistical Identification of Language

Description: A statistically based program has been written which learns to distinguish between languages. The amount of training text that such a program needs is surprisingly small, and the amount of text needed to make an identification is also quite small. The program incorporates no linguistic presuppositions other than the assumption that text can be encoded as a string of bytes. Such a program can be used to determine which language small bits of text are in. It also shows a potential for what might be called `statistical philology' in that it may be applied directly to phonetic transcriptions to help elucidate family trees among language dialects. A variant of this program has been shown to be useful as a quality control in biochemistry. In this application, genetic sequences are assumed to be expressions in a language peculiar to the organism from which the sequence is taken. Thus language identification becomes species identification. Introduction Given the following 20 character strin...

Contributor: The Pennsylvania State University CiteSeer Archives

Publisher: unknown

Date: 1995-12-22

Pubyear: 1994

Format: ps

Identifier: http://citeseer.ist.psu.edu/140384.html

Source: http://www.comp.lancs.ac.uk/computing/users/paul/ucrel/papers/lingdet.ps

Language: en

Rights: unrestricted

Graph

<?xml   version="1.0"   encoding="UTF-8"?>

<references_metadata>

      <rec   ID="SELF"   Type="SELF"   CiteSeer_Book="SELF"   CiteSeer_Volume="SELF"   Title="Statistical   Identification   of   Language">

            <identifier   Org="ISBN:0262133601"   Paper_ID="SELF"   Extracted="0262133601"   DDC="410/.285"   Normalized_DDC="410285"   Normalized_Weight="0.09090909090909091"   />

            <identifier   Org="ISBN:038725482X"   Paper_ID="SELF"   Extracted="038725482X"   DDC="006.454"   Normalized_DDC="006454"   Normalized_Weight="0.09090909090909091"   />

            <identifier   Org="ISBN:0818678984"   Paper_ID="SELF"   Extracted="0818678984"   DDC="006.4/2"   Normalized_DDC="00642"   Normalized_Weight="0.09090909090909091"   />

            <identifier   Org="ISBN:1402012160"   Paper_ID="SELF"   Extracted="1402012160"   DDC="025.04"   Normalized_DDC="02504"   Normalized_Weight="0.09090909090909091"   />

            <identifier   Org="ISBN:1581133200"   Paper_ID="SELF"   Extracted="1581133200"   />

            <identifier   Org="ISBN:3486581724"   Paper_ID="SELF"   Extracted="3486581724"   DDC="025.04"   Normalized_DDC="02504"   Normalized_Weight="0.09090909090909091"   />

            <identifier   Org="ISBN:3540234985"   Paper_ID="SELF"   Extracted="3540234985"   DDC="006.35"   Normalized_DDC="00635"   Normalized_Weight="0.09090909090909091"   />

            <identifier   Org="ISBN:3540321403"   Paper_ID="SELF"   Extracted="3540321403"   />

            <identifier   Org="ISBN:3540419330"   Paper_ID="SELF"   Extracted="3540419330"   DDC="005.74"   Normalized_DDC="00574"   Normalized_Weight="0.09090909090909091"   />

            <identifier   Org="ISBN:3540487123"   Paper_ID="SELF"   Extracted="3540487123"   DDC="658.4'038011   22   22"   Normalized_DDC="65840380112222"   Normalized_Weight="0.09090909090909091"   />

            <identifier   Org="ISBN:3540652590"   Paper_ID="SELF"   Extracted="3540652590"   DDC="418/.02/0285"   Normalized_DDC="418020285"   Normalized_Weight="0.09090909090909091"   />

            <identifier   Org="ISBN:3540675892"   Paper_ID="SELF"   Extracted="3540675892"   DDC="001/.01/2"   Normalized_DDC="001012"   Normalized_Weight="0.09090909090909091"   />

            <identifier   Org="ISBN:3540688218"   Paper_ID="SELF"   Extracted="3540688218"   />

            <identifier   Org="ISBN:3540747818"   Paper_ID="SELF"   Extracted="3540747818"   />

            <identifier   Org="ISBN:3540851097"   Paper_ID="SELF"   Extracted="3540851097"   />

            <identifier   Org="ISBN:3642003818"   Paper_ID="SELF"   Extracted="3642003818"   />

            <identifier   Org="ISBN:9042009438"   Paper_ID="SELF"   Extracted="9042009438"   DDC="410.285"   Normalized_DDC="410285"   Normalized_Weight="0.09090909090909091"   />

      </rec>

</references_metadata>

www.000webhost.com