Automatically assigned DDC number: 006312

Manually assigned DDC number: 006312

Title: Recognizing Text Genres with Simple Metrics Using Discriminant Analysis

Author:

Author:

Subject: Jussi Karlgren,Douglass Cutting Recognizing Text Genres with Simple Metrics Using Discriminant Analysis

Description: A simple method for categorizing texts into pre-determined text genre categories using the statistical standard technique of discriminant analysis is demonstrated with application to the Brown corpus. Discriminant analysis makes it possible use a large number of parameters that may be specific for a certain corpus or information stream, and combine them into a small number of functions, with the parameters weighted on basis of how useful they are for discriminating text genres. An application to information retrieval is discussed. Text Types There are different types of text. Texts "about" the same thing may be in differing genres, of different types, and of varying quality. Texts vary along several parameters, all relevant for the general information retrieval problem of matching reader needs and texts. Given this variation, in a text retrieval context the problems are (i) identifying genres, and (ii) choosing criteria to cluster texts of the same genre, with predictable precision an...

Contributor: The Pennsylvania State University CiteSeer Archives

Publisher: unknown

Date: 1994-12-09

Pubyear: 1994

Format: ps

Identifier: http://citeseer.ist.psu.edu/140411.html

Source: http://www.sics.se/~jussi/Papers/1994_Coling_Kyoto_l/cmplglixcol.ps

Language: en

Rights: unrestricted

Graph

<?xml   version="1.0"   encoding="UTF-8"?>

<references_metadata>

      <rec   ID="SELF"   Type="SELF"   CiteSeer_Book="SELF"   CiteSeer_Volume="SELF"   Title="Recognizing   Text   Genres   with   Simple   Metrics   Using   Discriminant   Analysis">

            <identifier   Org="ISBN:0120121603"   Paper_ID="SELF"   Extracted="0120121603"   />

            <identifier   Org="ISBN:0199292345"   Paper_ID="SELF"   Extracted="0199292345"   DDC="413.028"   Normalized_DDC="413028"   Normalized_Weight="0.08333333333333333"   />

            <identifier   Org="ISBN:0792356853"   Paper_ID="SELF"   Extracted="0792356853"   DDC="410/.285"   Normalized_DDC="410285"   Normalized_Weight="0.08333333333333333"   />

            <identifier   Org="ISBN:0819453463"   Paper_ID="SELF"   Extracted="0819453463"   />

            <identifier   Org="ISBN:1402040261"   Paper_ID="SELF"   Extracted="1402040261"   DDC="025.5240285"   Normalized_DDC="0255240285"   Normalized_Weight="0.08333333333333333"   />

            <identifier   Org="ISBN:157735236X"   Paper_ID="SELF"   Extracted="157735236X"   />

            <identifier   Org="ISBN:1581135610"   Paper_ID="SELF"   Extracted="1581135610"   />

            <identifier   Org="ISBN:1586036475"   Paper_ID="SELF"   Extracted="1586036475"   DDC="610.28"   Normalized_DDC="61028"   Normalized_Weight="0.08333333333333333"   />

            <identifier   Org="ISBN:1845441583"   Paper_ID="SELF"   Extracted="1845441583"   />

            <identifier   Org="ISBN:184628175X"   Paper_ID="SELF"   Extracted="184628175X"   DDC="006.312"   Normalized_DDC="006312"   Normalized_Weight="0.08333333333333333"   />

            <identifier   Org="ISBN:2874630829"   Paper_ID="SELF"   Extracted="2874630829"   />

            <identifier   Org="ISBN:3540231668"   Paper_ID="SELF"   Extracted="3540231668"   DDC="006.3"   Normalized_DDC="0063"   Normalized_Weight="0.08333333333333333"   />

            <identifier   Org="ISBN:3540244751"   Paper_ID="SELF"   Extracted="3540244751"   DDC="006.3/5"   Normalized_DDC="00635"   Normalized_Weight="0.08333333333333333"   />

            <identifier   Org="ISBN:3540298460"   Paper_ID="SELF"   Extracted="3540298460"   DDC="004.678"   Normalized_DDC="004678"   Normalized_Weight="0.08333333333333333"   />

            <identifier   Org="ISBN:3540313133"   Paper_ID="SELF"   Extracted="3540313133"   />

            <identifier   Org="ISBN:3540340459"   Paper_ID="SELF"   Extracted="3540340459"   DDC="469.0285"   Normalized_DDC="4690285"   Normalized_Weight="0.08333333333333333"   />

            <identifier   Org="ISBN:3540406352"   Paper_ID="SELF"   Extracted="3540406352"   DDC="006.4/54"   Normalized_DDC="006454"   Normalized_Weight="0.08333333333333333"   />

            <identifier   Org="ISBN:354070938X"   Paper_ID="SELF"   Extracted="354070938X"   DDC="005.52"   Normalized_DDC="00552"   Normalized_Weight="0.08333333333333333"   />

            <identifier   Org="ISBN:9042024283"   Paper_ID="SELF"   Extracted="9042024283"   DDC="418"   Normalized_DDC="418"   Normalized_Weight="0.08333333333333333"   />

      </rec>

</references_metadata>

www.000webhost.com