Current File : /home/mmdealscpanel/yummmdeals.com/I18N.zip
PK�V�Z�]	���Langinfo.pmnu�[���package I18N::Langinfo;

use 5.006;
use strict;
use warnings;
use Carp;

require Exporter;
require XSLoader;

our @ISA = qw(Exporter);

our @EXPORT = qw(langinfo);

our @EXPORT_OK = qw(
	ABDAY_1
	ABDAY_2
	ABDAY_3
	ABDAY_4
	ABDAY_5
	ABDAY_6
	ABDAY_7
	ABMON_1
	ABMON_10
	ABMON_11
	ABMON_12
	ABMON_2
	ABMON_3
	ABMON_4
	ABMON_5
	ABMON_6
	ABMON_7
	ABMON_8
	ABMON_9
	ALT_DIGITS
	AM_STR
	CODESET
	CRNCYSTR
	DAY_1
	DAY_2
	DAY_3
	DAY_4
	DAY_5
	DAY_6
	DAY_7
	D_FMT
	D_T_FMT
	ERA
	ERA_D_FMT
	ERA_D_T_FMT
	ERA_T_FMT
	MON_1
	MON_10
	MON_11
	MON_12
	MON_2
	MON_3
	MON_4
	MON_5
	MON_6
	MON_7
	MON_8
	MON_9
	NOEXPR
	NOSTR
	PM_STR
	RADIXCHAR
	THOUSEP
	T_FMT
	T_FMT_AMPM
	YESEXPR
	YESSTR
);

our $VERSION = '0.13';

XSLoader::load();

1;
__END__

=head1 NAME

I18N::Langinfo - query locale information

=head1 SYNOPSIS

  use I18N::Langinfo;

=head1 DESCRIPTION

The langinfo() function queries various locale information that can be
used to localize output and user interfaces.  The langinfo() requires
one numeric argument that identifies the locale constant to query:
if no argument is supplied, C<$_> is used.  The numeric constants
appropriate to be used as arguments are exportable from I18N::Langinfo.

The following example will import the langinfo() function itself and
three constants to be used as arguments to langinfo(): a constant for
the abbreviated first day of the week (the numbering starts from
Sunday = 1) and two more constants for the affirmative and negative
answers for a yes/no question in the current locale.

    use I18N::Langinfo qw(langinfo ABDAY_1 YESSTR NOSTR);

    my ($abday_1, $yesstr, $nostr) =
        map { langinfo($_) } (ABDAY_1, YESSTR, NOSTR);

    print "$abday_1? [$yesstr/$nostr] ";

In other words, in the "C" (or English) locale the above will probably
print something like:

    Sun? [yes/no] 

but under a French locale

    dim? [oui/non] 

The usually available constants are

    ABDAY_1 ABDAY_2 ABDAY_3 ABDAY_4 ABDAY_5 ABDAY_6 ABDAY_7
    ABMON_1 ABMON_2 ABMON_3 ABMON_4 ABMON_5 ABMON_6
    ABMON_7 ABMON_8 ABMON_9 ABMON_10 ABMON_11 ABMON_12
    DAY_1 DAY_2 DAY_3 DAY_4 DAY_5 DAY_6 DAY_7
    MON_1 MON_2 MON_3 MON_4 MON_5 MON_6
    MON_7 MON_8 MON_9 MON_10 MON_11 MON_12

for abbreviated and full length days of the week and months of the year,

    D_T_FMT D_FMT T_FMT

for the date-time, date, and time formats used by the strftime() function
(see L<POSIX>)

    AM_STR PM_STR T_FMT_AMPM

for the locales for which it makes sense to have ante meridiem and post
meridiem time formats,

    CODESET CRNCYSTR RADIXCHAR

for the character code set being used (such as "ISO8859-1", "cp850",
"koi8-r", "sjis", "utf8", etc.), for the currency string, for the
radix character used between the integer and the fractional part
of decimal numbers (yes, this is redundant with POSIX::localeconv())

    YESSTR YESEXPR NOSTR NOEXPR

for the affirmative and negative responses and expressions, and

    ERA ERA_D_FMT ERA_D_T_FMT ERA_T_FMT

for the Japanese Emperor eras (naturally only defined under Japanese locales).

See your L<langinfo(3)> for more information about the available
constants.  (Often this means having to look directly at the
F<langinfo.h> C header file.)

Note that unfortunately none of the above constants are guaranteed
to be available on a particular platform.  To be on the safe side
you can wrap the import in an eval like this:

    eval {
        require I18N::Langinfo;
        I18N::Langinfo->import(qw(langinfo CODESET));
        $codeset = langinfo(CODESET()); # note the ()
    };
    if ($@) { ... failed ... }

=head2 EXPORT

By default only the C<langinfo()> function is exported.

=head1 SEE ALSO

L<perllocale>, L<POSIX/localeconv>, L<POSIX/setlocale>, L<nl_langinfo(3)>.

The langinfo() is just a wrapper for the C nl_langinfo() interface.

=head1 AUTHOR

Jarkko Hietaniemi, E<lt>jhi@hut.fiE<gt>

=head1 COPYRIGHT AND LICENSE

Copyright 2001 by Jarkko Hietaniemi

This library is free software; you can redistribute it and/or modify
it under the same terms as Perl itself. 

=cut
PK�6�Z<��c�p�pLangTags/List.pmnu�[���
require 5;
package I18N::LangTags::List;
#  Time-stamp: "2004-10-06 23:26:21 ADT"
use strict;
use vars qw(%Name %Is_Disrec $Debug $VERSION);
$VERSION = '0.39';
# POD at the end.

#----------------------------------------------------------------------
{
# read the table out of our own POD!
  my $seeking = 1;
  my $count = 0;
  my($disrec,$tag,$name);
  my $last_name = '';
  while(<I18N::LangTags::List::DATA>) {
    if($seeking) {
      $seeking = 0 if m/=for woohah/;
    } elsif( ($disrec, $tag, $name) =
          m/(\[?)\{([-0-9a-zA-Z]+)\}(?:\s*:)?\s*([^\[\]]+)/
    ) {
      $name =~ s/\s*[;\.]*\s*$//g;
      next unless $name;
      ++$count;
      print "<$tag> <$name>\n" if $Debug;
      $last_name = $Name{$tag} = $name;
      $Is_Disrec{$tag} = 1 if $disrec;
    } elsif (m/[Ff]ormerly \"([-a-z0-9]+)\"/) {
      $Name{$1} = "$last_name (old tag)" if $last_name;
      $Is_Disrec{$1} = 1;
    }
  }
  die "No tags read??" unless $count;
}
#----------------------------------------------------------------------

sub name {
  my $tag = lc($_[0] || return);
  $tag =~ s/^\s+//s;
  $tag =~ s/\s+$//s;
  
  my $alt;
  if($tag =~ m/^x-(.+)/) {
    $alt = "i-$1";
  } elsif($tag =~ m/^i-(.+)/) {
    $alt = "x-$1";
  } else {
    $alt = '';
  }
  
  my $subform = '';
  my $name = '';
  print "Input: {$tag}\n" if $Debug;
  while(length $tag) {
    last if $name = $Name{$tag};
    last if $name = $Name{$alt};
    if($tag =~ s/(-[a-z0-9]+)$//s) {
      print "Shaving off: $1 leaving $tag\n" if $Debug;
      $subform = "$1$subform";
       # and loop around again
       
      $alt =~ s/(-[a-z0-9]+)$//s && $Debug && print " alt -> $alt\n";
    } else {
      # we're trying to pull a subform off a primary tag. TILT!
      print "Aborting on: {$name}{$subform}\n" if $Debug;
      last;
    }
  }
  print "Output: {$name}{$subform}\n" if $Debug;
  
  return unless $name;   # Failure
  return $name unless $subform;   # Exact match
  $subform =~ s/^-//s;
  $subform =~ s/-$//s;
  return "$name (Subform \"$subform\")";
}

#--------------------------------------------------------------------------

sub is_decent {
  my $tag = lc($_[0] || return 0);
  #require I18N::LangTags;

  return 0 unless
    $tag =~ 
    /^(?:  # First subtag
         [xi] | [a-z]{2,3}
      )
      (?:  # Subtags thereafter
         -           # separator
         [a-z0-9]{1,8}  # subtag  
      )*
    $/xs;

  my @supers = ();
  foreach my $bit (split('-', $tag)) {
    push @supers, 
      scalar(@supers) ? ($supers[-1] . '-' . $bit) : $bit;
  }
  return 0 unless @supers;
  shift @supers if $supers[0] =~ m<^(i|x|sgn)$>s;
  return 0 unless @supers;

  foreach my $f ($tag, @supers) {
    return 0 if $Is_Disrec{$f};
    return 2 if $Name{$f};
     # so that decent subforms of indecent tags are decent
  }
  return 2 if $Name{$tag}; # not only is it decent, it's known!
  return 1;
}

#--------------------------------------------------------------------------
1;

__DATA__

=head1 NAME

I18N::LangTags::List -- tags and names for human languages

=head1 SYNOPSIS

  use I18N::LangTags::List;
  print "Parlez-vous... ", join(', ',
      I18N::LangTags::List::name('elx') || 'unknown_language',
      I18N::LangTags::List::name('ar-Kw') || 'unknown_language',
      I18N::LangTags::List::name('en') || 'unknown_language',
      I18N::LangTags::List::name('en-CA') || 'unknown_language',
    ), "?\n";

prints:

  Parlez-vous... Elamite, Kuwait Arabic, English, Canadian English?

=head1 DESCRIPTION

This module provides a function 
C<I18N::LangTags::List::name( I<langtag> ) > that takes
a language tag (see L<I18N::LangTags|I18N::LangTags>)
and returns the best attempt at an English name for it, or
undef if it can't make sense of the tag.

The function I18N::LangTags::List::name(...) is not exported.

This module also provides a function
C<I18N::LangTags::List::is_decent( I<langtag> )> that returns true iff
the language tag is syntactically valid and is for general use (like
"fr" or "fr-ca", below).  That is, it returns false for tags that are
syntactically invalid and for tags, like "aus", that are listed in
brackets below.  This function is not exported.

The map of tags-to-names that it uses is accessible as
%I18N::LangTags::List::Name, and it's the same as the list
that follows in this documentation, which should be useful
to you even if you don't use this module.

=head1 ABOUT LANGUAGE TAGS

Internet language tags, as defined in RFC 3066, are a formalism
for denoting human languages.  The two-letter ISO 639-1 language
codes are well known (as "en" for English), as are their forms
when qualified by a country code ("en-US").  Less well-known are the
arbitrary-length non-ISO codes (like "i-mingo"), and the 
recently (in 2001) introduced three-letter ISO-639-2 codes.

Remember these important facts:

=over

=item *

Language tags are not locale IDs.  A locale ID is written with a "_"
instead of a "-", (almost?) always matches C<m/^\w\w_\w\w\b/>, and
I<means> something different than a language tag.  A language tag
denotes a language.  A locale ID denotes a language I<as used in>
a particular place, in combination with non-linguistic
location-specific information such as what currency is used
there.  Locales I<also> often denote character set information,
as in "en_US.ISO8859-1".

=item *

Language tags are not for computer languages.

=item *

"Dialect" is not a useful term, since there is no objective
criterion for establishing when two language-forms are
dialects of eachother, or are separate languages.

=item *

Language tags are not case-sensitive.  en-US, en-us, En-Us, etc.,
are all the same tag, and denote the same language.

=item *

Not every language tag really refers to a single language.  Some
language tags refer to conditions: i-default (system-message text
in English plus maybe other languages), und (undetermined
language).  Others (notably lots of the three-letter codes) are
bibliographic tags that classify whole groups of languages, as
with cus "Cushitic (Other)" (i.e., a
language that has been classed as Cushtic, but which has no more
specific code) or the even less linguistically coherent
sai for "South American Indian (Other)".  Though useful in
bibliography, B<SUCH TAGS ARE NOT
FOR GENERAL USE>.  For further guidance, email me.

=item *

Language tags are not country codes.  In fact, they are often
distinct codes, as with language tag ja for Japanese, and
ISO 3166 country code C<.jp> for Japan.

=back

=head1 LIST OF LANGUAGES

The first part of each item is the language tag, between
{...}.  It
is followed by an English name for the language or language-group.
Language tags that I judge to be not for general use, are bracketed.

This list is in alphabetical order by English name of the language.

=for reminder
 The name in the =item line MUST NOT have E<...>'s in it!!

=for woohah START

=over

=item {ab} : Abkhazian

eq Abkhaz

=item {ace} : Achinese

=item {ach} : Acoli

=item {ada} : Adangme

=item {ady} : Adyghe

eq Adygei

=item {aa} : Afar

=item {afh} : Afrihili

(Artificial)

=item {af} : Afrikaans

=item [{afa} : Afro-Asiatic (Other)]

=item {ak} : Akan

(Formerly "aka".)

=item {akk} : Akkadian

(Historical)

=item {sq} : Albanian

=item {ale} : Aleut

=item [{alg} : Algonquian languages]

NOT Algonquin!

=item [{tut} : Altaic (Other)]

=item {am} : Amharic

NOT Aramaic!

=item {i-ami} : Ami

eq Amis.  eq 'Amis.  eq Pangca.

=item [{apa} : Apache languages]

=item {ar} : Arabic

Many forms are mutually un-intelligible in spoken media.
Notable forms:
{ar-ae} UAE Arabic;
{ar-bh} Bahrain Arabic;
{ar-dz} Algerian Arabic;
{ar-eg} Egyptian Arabic;
{ar-iq} Iraqi Arabic;
{ar-jo} Jordanian Arabic;
{ar-kw} Kuwait Arabic;
{ar-lb} Lebanese Arabic;
{ar-ly} Libyan Arabic;
{ar-ma} Moroccan Arabic;
{ar-om} Omani Arabic;
{ar-qa} Qatari Arabic;
{ar-sa} Sauda Arabic;
{ar-sy} Syrian Arabic;
{ar-tn} Tunisian Arabic;
{ar-ye} Yemen Arabic.

=item {arc} : Aramaic

NOT Amharic!  NOT Samaritan Aramaic!

=item {arp} : Arapaho

=item {arn} : Araucanian

=item {arw} : Arawak

=item {hy} : Armenian

=item {an} : Aragonese

=item [{art} : Artificial (Other)]

=item {ast} : Asturian

eq Bable.

=item {as} : Assamese

=item [{ath} : Athapascan languages]

eq Athabaskan.  eq Athapaskan.  eq Athabascan.

=item [{aus} : Australian languages]

=item [{map} : Austronesian (Other)]

=item {av} : Avaric

(Formerly "ava".)

=item {ae} : Avestan

eq Zend

=item {awa} : Awadhi

=item {ay} : Aymara

=item {az} : Azerbaijani

eq Azeri

Notable forms:
{az-Arab} Azerbaijani in Arabic script;
{az-Cyrl} Azerbaijani in Cyrillic script;
{az-Latn} Azerbaijani in Latin script.

=item {ban} : Balinese

=item [{bat} : Baltic (Other)]

=item {bal} : Baluchi

=item {bm} : Bambara

(Formerly "bam".)

=item [{bai} : Bamileke languages]

=item {bad} : Banda

=item [{bnt} : Bantu (Other)]

=item {bas} : Basa

=item {ba} : Bashkir

=item {eu} : Basque

=item {btk} : Batak (Indonesia)

=item {bej} : Beja

=item {be} : Belarusian

eq Belarussian.  eq Byelarussian.
eq Belorussian.  eq Byelorussian.
eq White Russian.  eq White Ruthenian.
NOT Ruthenian!

=item {bem} : Bemba

=item {bn} : Bengali

eq Bangla.

=item [{ber} : Berber (Other)]

=item {bho} : Bhojpuri

=item {bh} : Bihari

=item {bik} : Bikol

=item {bin} : Bini

=item {bi} : Bislama

eq Bichelamar.

=item {bs} : Bosnian

=item {bra} : Braj

=item {br} : Breton

=item {bug} : Buginese

=item {bg} : Bulgarian

=item {i-bnn} : Bunun

=item {bua} : Buriat

=item {my} : Burmese

=item {cad} : Caddo

=item {car} : Carib

=item {ca} : Catalan

eq CatalE<aacute>n.  eq Catalonian.

=item [{cau} : Caucasian (Other)]

=item {ceb} : Cebuano

=item [{cel} : Celtic (Other)]

Notable forms:
{cel-gaulish} Gaulish (Historical)

=item [{cai} : Central American Indian (Other)]

=item {chg} : Chagatai

(Historical?)

=item [{cmc} : Chamic languages]

=item {ch} : Chamorro

=item {ce} : Chechen

=item {chr} : Cherokee

eq Tsalagi

=item {chy} : Cheyenne

=item {chb} : Chibcha

(Historical)  NOT Chibchan (which is a language family).

=item {ny} : Chichewa

eq Nyanja.  eq Chinyanja.

=item {zh} : Chinese

Many forms are mutually un-intelligible in spoken media.
Notable forms:
{zh-Hans} Chinese, in simplified script;
{zh-Hant} Chinese, in traditional script;
{zh-tw} Taiwan Chinese;
{zh-cn} PRC Chinese;
{zh-sg} Singapore Chinese;
{zh-mo} Macau Chinese;
{zh-hk} Hong Kong Chinese;
{zh-guoyu} Mandarin [Putonghua/Guoyu];
{zh-hakka} Hakka [formerly "i-hakka"];
{zh-min} Hokkien;
{zh-min-nan} Southern Hokkien;
{zh-wuu} Shanghaiese;
{zh-xiang} Hunanese;
{zh-gan} Gan;
{zh-yue} Cantonese.

=for etc
{i-hakka} Hakka (old tag)

=item {chn} : Chinook Jargon

eq Chinook Wawa.

=item {chp} : Chipewyan

=item {cho} : Choctaw

=item {cu} : Church Slavic

eq Old Church Slavonic.

=item {chk} : Chuukese

eq Trukese.  eq Chuuk.  eq Truk.  eq Ruk.

=item {cv} : Chuvash

=item {cop} : Coptic

=item {kw} : Cornish

=item {co} : Corsican

eq Corse.

=item {cr} : Cree

NOT Creek!  (Formerly "cre".)

=item {mus} : Creek

NOT Cree!

=item [{cpe} : English-based Creoles and pidgins (Other)]

=item [{cpf} : French-based Creoles and pidgins (Other)]

=item [{cpp} : Portuguese-based Creoles and pidgins (Other)]

=item [{crp} : Creoles and pidgins (Other)]

=item {hr} : Croatian

eq Croat.

=item [{cus} : Cushitic (Other)]

=item {cs} : Czech

=item {dak} : Dakota

eq Nakota.  eq Latoka.

=item {da} : Danish

=item {dar} : Dargwa

=item {day} : Dayak

=item {i-default} : Default (Fallthru) Language

Defined in RFC 2277, this is for tagging text
(which must include English text, and might/should include text
in other appropriate languages) that is emitted in a context
where language-negotiation wasn't possible -- in SMTP mail failure
messages, for example.

=item {del} : Delaware

=item {din} : Dinka

=item {dv} : Divehi

eq Maldivian.  (Formerly "div".)

=item {doi} : Dogri

NOT Dogrib!

=item {dgr} : Dogrib

NOT Dogri!

=item [{dra} : Dravidian (Other)]

=item {dua} : Duala

=item {nl} : Dutch

eq Netherlander.  Notable forms:
{nl-nl} Netherlands Dutch;
{nl-be} Belgian Dutch.

=item {dum} : Middle Dutch (ca.1050-1350)

(Historical)

=item {dyu} : Dyula

=item {dz} : Dzongkha

=item {efi} : Efik

=item {egy} : Ancient Egyptian

(Historical)

=item {eka} : Ekajuk

=item {elx} : Elamite

(Historical)

=item {en} : English

Notable forms:
{en-au} Australian English;
{en-bz} Belize English;
{en-ca} Canadian English;
{en-gb} UK English;
{en-ie} Irish English;
{en-jm} Jamaican English;
{en-nz} New Zealand English;
{en-ph} Philippine English;
{en-tt} Trinidad English;
{en-us} US English;
{en-za} South African English;
{en-zw} Zimbabwe English.

=item {enm} : Old English (1100-1500)

(Historical)

=item {ang} : Old English (ca.450-1100)

eq Anglo-Saxon.  (Historical)

=item {i-enochian} : Enochian (Artificial)

=item {myv} : Erzya

=item {eo} : Esperanto

(Artificial)

=item {et} : Estonian

=item {ee} : Ewe

(Formerly "ewe".)

=item {ewo} : Ewondo

=item {fan} : Fang

=item {fat} : Fanti

=item {fo} : Faroese

=item {fj} : Fijian

=item {fi} : Finnish

=item [{fiu} : Finno-Ugrian (Other)]

eq Finno-Ugric.  NOT Ugaritic!

=item {fon} : Fon

=item {fr} : French

Notable forms:
{fr-fr} France French;
{fr-be} Belgian French;
{fr-ca} Canadian French;
{fr-ch} Swiss French;
{fr-lu} Luxembourg French;
{fr-mc} Monaco French.

=item {frm} : Middle French (ca.1400-1600)

(Historical)

=item {fro} : Old French (842-ca.1400)

(Historical)

=item {fy} : Frisian

=item {fur} : Friulian

=item {ff} : Fulah

(Formerly "ful".)

=item {gaa} : Ga

=item {gd} : Scots Gaelic

NOT Scots!

=item {gl} : Gallegan

eq Galician

=item {lg} : Ganda

(Formerly "lug".)

=item {gay} : Gayo

=item {gba} : Gbaya

=item {gez} : Geez

eq Ge'ez

=item {ka} : Georgian

=item {de} : German

Notable forms:
{de-at} Austrian German;
{de-be} Belgian German;
{de-ch} Swiss German;
{de-de} Germany German;
{de-li} Liechtenstein German;
{de-lu} Luxembourg German.

=item {gmh} : Middle High German (ca.1050-1500)

(Historical)

=item {goh} : Old High German (ca.750-1050)

(Historical)

=item [{gem} : Germanic (Other)]

=item {gil} : Gilbertese

=item {gon} : Gondi

=item {gor} : Gorontalo

=item {got} : Gothic

(Historical)

=item {grb} : Grebo

=item {grc} : Ancient Greek

(Historical)  (Until 15th century or so.)

=item {el} : Modern Greek

(Since 15th century or so.)

=item {gn} : Guarani

GuaranE<iacute>

=item {gu} : Gujarati

=item {gwi} : Gwich'in

eq Gwichin

=item {hai} : Haida

=item {ht} : Haitian

eq Haitian Creole

=item {ha} : Hausa

=item {haw} : Hawaiian

Hawai'ian

=item {he} : Hebrew

(Formerly "iw".)

=for etc
{iw} Hebrew (old tag)

=item {hz} : Herero

=item {hil} : Hiligaynon

=item {him} : Himachali

=item {hi} : Hindi

=item {ho} : Hiri Motu

=item {hit} : Hittite

(Historical)

=item {hmn} : Hmong

=item {hu} : Hungarian

=item {hup} : Hupa

=item {iba} : Iban

=item {is} : Icelandic

=item {io} : Ido

(Artificial)

=item {ig} : Igbo

(Formerly "ibo".)

=item {ijo} : Ijo

=item {ilo} : Iloko

=item [{inc} : Indic (Other)]

=item [{ine} : Indo-European (Other)]

=item {id} : Indonesian

(Formerly "in".)

=for etc
{in} Indonesian (old tag)

=item {inh} : Ingush

=item {ia} : Interlingua (International Auxiliary Language Association)

(Artificial)  NOT Interlingue!

=item {ie} : Interlingue

(Artificial)  NOT Interlingua!

=item {iu} : Inuktitut

A subform of "Eskimo".

=item {ik} : Inupiaq

A subform of "Eskimo".

=item [{ira} : Iranian (Other)]

=item {ga} : Irish

=item {mga} : Middle Irish (900-1200)

(Historical)

=item {sga} : Old Irish (to 900)

(Historical)

=item [{iro} : Iroquoian languages]

=item {it} : Italian

Notable forms:
{it-it} Italy Italian;
{it-ch} Swiss Italian.

=item {ja} : Japanese

(NOT "jp"!)

=item {jv} : Javanese

(Formerly "jw" because of a typo.)

=item {jrb} : Judeo-Arabic

=item {jpr} : Judeo-Persian

=item {kbd} : Kabardian

=item {kab} : Kabyle

=item {kac} : Kachin

=item {kl} : Kalaallisut

eq Greenlandic "Eskimo"

=item {xal} : Kalmyk

=item {kam} : Kamba

=item {kn} : Kannada

eq Kanarese.  NOT Canadian!

=item {kr} : Kanuri

(Formerly "kau".)

=item {krc} : Karachay-Balkar

=item {kaa} : Kara-Kalpak

=item {kar} : Karen

=item {ks} : Kashmiri

=item {csb} : Kashubian

eq Kashub

=item {kaw} : Kawi

=item {kk} : Kazakh

=item {kha} : Khasi

=item {km} : Khmer

eq Cambodian.  eq Kampuchean.

=item [{khi} : Khoisan (Other)]

=item {kho} : Khotanese

=item {ki} : Kikuyu

eq Gikuyu.

=item {kmb} : Kimbundu

=item {rw} : Kinyarwanda

=item {ky} : Kirghiz

=item {i-klingon} : Klingon

=item {kv} : Komi

=item {kg} : Kongo

(Formerly "kon".)

=item {kok} : Konkani

=item {ko} : Korean

=item {kos} : Kosraean

=item {kpe} : Kpelle

=item {kro} : Kru

=item {kj} : Kuanyama

=item {kum} : Kumyk

=item {ku} : Kurdish

=item {kru} : Kurukh

=item {kut} : Kutenai

=item {lad} : Ladino

eq Judeo-Spanish.  NOT Ladin (a minority language in Italy).

=item {lah} : Lahnda

NOT Lamba!

=item {lam} : Lamba

NOT Lahnda!

=item {lo} : Lao

eq Laotian.

=item {la} : Latin

(Historical)  NOT Ladin!  NOT Ladino!

=item {lv} : Latvian

eq Lettish.

=item {lb} : Letzeburgesch

eq Luxemburgian, eq Luxemburger.  (Formerly "i-lux".)

=for etc
{i-lux} Letzeburgesch (old tag)

=item {lez} : Lezghian

=item {li} : Limburgish

eq Limburger, eq Limburgan.  NOT Letzeburgesch!

=item {ln} : Lingala

=item {lt} : Lithuanian

=item {nds} : Low German

eq Low Saxon.  eq Low German.  eq Low Saxon.

=item {art-lojban} : Lojban (Artificial)

=item {loz} : Lozi

=item {lu} : Luba-Katanga

(Formerly "lub".)

=item {lua} : Luba-Lulua

=item {lui} : Luiseno

eq LuiseE<ntilde>o.

=item {lun} : Lunda

=item {luo} : Luo (Kenya and Tanzania)

=item {lus} : Lushai

=item {mk} : Macedonian

eq the modern Slavic language spoken in what was Yugoslavia.
NOT the form of Greek spoken in Greek Macedonia!

=item {mad} : Madurese

=item {mag} : Magahi

=item {mai} : Maithili

=item {mak} : Makasar

=item {mg} : Malagasy

=item {ms} : Malay

NOT Malayalam!

=item {ml} : Malayalam

NOT Malay!

=item {mt} : Maltese

=item {mnc} : Manchu

=item {mdr} : Mandar

NOT Mandarin!

=item {man} : Mandingo

=item {mni} : Manipuri

eq Meithei.

=item [{mno} : Manobo languages]

=item {gv} : Manx

=item {mi} : Maori

NOT Mari!

=item {mr} : Marathi

=item {chm} : Mari

NOT Maori!

=item {mh} : Marshall

eq Marshallese.

=item {mwr} : Marwari

=item {mas} : Masai

=item [{myn} : Mayan languages]

=item {men} : Mende

=item {mic} : Micmac

=item {min} : Minangkabau

=item {i-mingo} : Mingo

eq the Irquoian language West Virginia Seneca.  NOT New York Seneca!

=item [{mis} : Miscellaneous languages]

Don't use this.

=item {moh} : Mohawk

=item {mdf} : Moksha

=item {mo} : Moldavian

eq Moldovan.

=item [{mkh} : Mon-Khmer (Other)]

=item {lol} : Mongo

=item {mn} : Mongolian

eq Mongol.

=item {mos} : Mossi

=item [{mul} : Multiple languages]

Not for normal use.

=item [{mun} : Munda languages]

=item {nah} : Nahuatl

=item {nap} : Neapolitan

=item {na} : Nauru

=item {nv} : Navajo

eq Navaho.  (Formerly "i-navajo".)

=for etc
{i-navajo} Navajo (old tag)

=item {nd} : North Ndebele

=item {nr} : South Ndebele

=item {ng} : Ndonga

=item {ne} : Nepali

eq Nepalese.  Notable forms:
{ne-np} Nepal Nepali;
{ne-in} India Nepali.

=item {new} : Newari

=item {nia} : Nias

=item [{nic} : Niger-Kordofanian (Other)]

=item [{ssa} : Nilo-Saharan (Other)]

=item {niu} : Niuean

=item {nog} : Nogai

=item {non} : Old Norse

(Historical)

=item [{nai} : North American Indian]

Do not use this.

=item {no} : Norwegian

Note the two following forms:

=item {nb} : Norwegian Bokmal

eq BokmE<aring>l, (A form of Norwegian.)  (Formerly "no-bok".)

=for etc
{no-bok} Norwegian Bokmal (old tag)

=item {nn} : Norwegian Nynorsk

(A form of Norwegian.)  (Formerly "no-nyn".)

=for etc
{no-nyn} Norwegian Nynorsk (old tag)

=item [{nub} : Nubian languages]

=item {nym} : Nyamwezi

=item {nyn} : Nyankole

=item {nyo} : Nyoro

=item {nzi} : Nzima

=item {oc} : Occitan (post 1500)

eq ProvenE<ccedil>al, eq Provencal

=item {oj} : Ojibwa

eq Ojibwe.  (Formerly "oji".)

=item {or} : Oriya

=item {om} : Oromo

=item {osa} : Osage

=item {os} : Ossetian; Ossetic

=item [{oto} : Otomian languages]

Group of languages collectively called "OtomE<iacute>".

=item {pal} : Pahlavi

eq Pahlevi

=item {i-pwn} : Paiwan

eq Pariwan

=item {pau} : Palauan

=item {pi} : Pali

(Historical?)

=item {pam} : Pampanga

=item {pag} : Pangasinan

=item {pa} : Panjabi

eq Punjabi

=item {pap} : Papiamento

eq Papiamentu.

=item [{paa} : Papuan (Other)]

=item {fa} : Persian

eq Farsi.  eq Iranian.

=item {peo} : Old Persian (ca.600-400 B.C.)

=item [{phi} : Philippine (Other)]

=item {phn} : Phoenician

(Historical)

=item {pon} : Pohnpeian

NOT Pompeiian!

=item {pl} : Polish

=item {pt} : Portuguese

eq Portugese.  Notable forms:
{pt-pt} Portugal Portuguese;
{pt-br} Brazilian Portuguese.

=item [{pra} : Prakrit languages]

=item {pro} : Old Provencal (to 1500)

eq Old ProvenE<ccedil>al.  (Historical.)

=item {ps} : Pushto

eq Pashto.  eq Pushtu.

=item {qu} : Quechua

eq Quecha.

=item {rm} : Raeto-Romance

eq Romansh.

=item {raj} : Rajasthani

=item {rap} : Rapanui

=item {rar} : Rarotongan

=item [{qaa - qtz} : Reserved for local use.]

=item [{roa} : Romance (Other)]

NOT Romanian!  NOT Romany!  NOT Romansh!

=item {ro} : Romanian

eq Rumanian.  NOT Romany!

=item {rom} : Romany

eq Rom.  NOT Romanian!

=item {rn} : Rundi

=item {ru} : Russian

NOT White Russian!  NOT Rusyn!

=item [{sal} : Salishan languages]

Large language group.

=item {sam} : Samaritan Aramaic

NOT Aramaic!

=item {se} : Northern Sami

eq Lappish.  eq Lapp.  eq (Northern) Saami.

=item {sma} : Southern Sami

=item {smn} : Inari Sami

=item {smj} : Lule Sami

=item {sms} : Skolt Sami

=item [{smi} : Sami languages (Other)]

=item {sm} : Samoan

=item {sad} : Sandawe

=item {sg} : Sango

=item {sa} : Sanskrit

(Historical)

=item {sat} : Santali

=item {sc} : Sardinian

eq Sard.

=item {sas} : Sasak

=item {sco} : Scots

NOT Scots Gaelic!

=item {sel} : Selkup

=item [{sem} : Semitic (Other)]

=item {sr} : Serbian

eq Serb.  NOT Sorbian.

Notable forms:
{sr-Cyrl} : Serbian in Cyrillic script;
{sr-Latn} : Serbian in Latin script.

=item {srr} : Serer

=item {shn} : Shan

=item {sn} : Shona

=item {sid} : Sidamo

=item {sgn-...} : Sign Languages

Always use with a subtag.  Notable forms:
{sgn-gb} British Sign Language (BSL);
{sgn-ie} Irish Sign Language (ESL);
{sgn-ni} Nicaraguan Sign Language (ISN);
{sgn-us} American Sign Language (ASL).

(And so on with other country codes as the subtag.)

=item {bla} : Siksika

eq Blackfoot.  eq Pikanii.

=item {sd} : Sindhi

=item {si} : Sinhalese

eq Sinhala.

=item [{sit} : Sino-Tibetan (Other)]

=item [{sio} : Siouan languages]

=item {den} : Slave (Athapascan)

("Slavey" is a subform.)

=item [{sla} : Slavic (Other)]

=item {sk} : Slovak

eq Slovakian.

=item {sl} : Slovenian

eq Slovene.

=item {sog} : Sogdian

=item {so} : Somali

=item {son} : Songhai

=item {snk} : Soninke

=item {wen} : Sorbian languages

eq Wendish.  eq Sorb.  eq Lusatian.  eq Wend.  NOT Venda!  NOT Serbian!

=item {nso} : Northern Sotho

=item {st} : Southern Sotho

eq Sutu.  eq Sesotho.

=item [{sai} : South American Indian (Other)]

=item {es} : Spanish

Notable forms:
{es-ar} Argentine Spanish;
{es-bo} Bolivian Spanish;
{es-cl} Chilean Spanish;
{es-co} Colombian Spanish;
{es-do} Dominican Spanish;
{es-ec} Ecuadorian Spanish;
{es-es} Spain Spanish;
{es-gt} Guatemalan Spanish;
{es-hn} Honduran Spanish;
{es-mx} Mexican Spanish;
{es-pa} Panamanian Spanish;
{es-pe} Peruvian Spanish;
{es-pr} Puerto Rican Spanish;
{es-py} Paraguay Spanish;
{es-sv} Salvadoran Spanish;
{es-us} US Spanish;
{es-uy} Uruguayan Spanish;
{es-ve} Venezuelan Spanish.

=item {suk} : Sukuma

=item {sux} : Sumerian

(Historical)

=item {su} : Sundanese

=item {sus} : Susu

=item {sw} : Swahili

eq Kiswahili

=item {ss} : Swati

=item {sv} : Swedish

Notable forms:
{sv-se} Sweden Swedish;
{sv-fi} Finland Swedish.

=item {syr} : Syriac

=item {tl} : Tagalog

=item {ty} : Tahitian

=item [{tai} : Tai (Other)]

NOT Thai!

=item {tg} : Tajik

=item {tmh} : Tamashek

=item {ta} : Tamil

=item {i-tao} : Tao

eq Yami.

=item {tt} : Tatar

=item {i-tay} : Tayal

eq Atayal.  eq Atayan.

=item {te} : Telugu

=item {ter} : Tereno

=item {tet} : Tetum

=item {th} : Thai

NOT Tai!

=item {bo} : Tibetan

=item {tig} : Tigre

=item {ti} : Tigrinya

=item {tem} : Timne

eq Themne.  eq Timene.

=item {tiv} : Tiv

=item {tli} : Tlingit

=item {tpi} : Tok Pisin

=item {tkl} : Tokelau

=item {tog} : Tonga (Nyasa)

NOT Tsonga!

=item {to} : Tonga (Tonga Islands)

(Pronounced "Tong-a", not "Tong-ga")

NOT Tsonga!

=item {tsi} : Tsimshian

eq Sm'algyax

=item {ts} : Tsonga

NOT Tonga!

=item {i-tsu} : Tsou

=item {tn} : Tswana

Same as Setswana.

=item {tum} : Tumbuka

=item [{tup} : Tupi languages]

=item {tr} : Turkish

(Typically in Roman script)

=item {ota} : Ottoman Turkish (1500-1928)

(Typically in Arabic script)  (Historical)

=item {crh} : Crimean Turkish

eq Crimean Tatar

=item {tk} : Turkmen

eq Turkmeni.

=item {tvl} : Tuvalu

=item {tyv} : Tuvinian

eq Tuvan.  eq Tuvin.

=item {tw} : Twi

=item {udm} : Udmurt

=item {uga} : Ugaritic

NOT Ugric!

=item {ug} : Uighur

=item {uk} : Ukrainian

=item {umb} : Umbundu

=item {und} : Undetermined

Not a tag for normal use.

=item {ur} : Urdu

=item {uz} : Uzbek

eq E<Ouml>zbek

Notable forms:
{uz-Cyrl} Uzbek in Cyrillic script;
{uz-Latn} Uzbek in Latin script.

=item {vai} : Vai

=item {ve} : Venda

NOT Wendish!  NOT Wend!  NOT Avestan!  (Formerly "ven".)

=item {vi} : Vietnamese

eq Viet.

=item {vo} : Volapuk

eq VolapE<uuml>k.  (Artificial)

=item {vot} : Votic

eq Votian.  eq Vod.

=item [{wak} : Wakashan languages]

=item {wa} : Walloon

=item {wal} : Walamo

eq Wolaytta.

=item {war} : Waray

Presumably the Philippine language Waray-Waray (SamareE<ntilde>o),
not the smaller Philippine language Waray Sorsogon, nor the extinct
Australian language Waray.

=item {was} : Washo

eq Washoe

=item {cy} : Welsh

=item {wo} : Wolof

=item {x-...} : Unregistered (Semi-Private Use)

"x-" is a prefix for language tags that are not registered with ISO
or IANA.  Example, x-double-dutch

=item {xh} : Xhosa

=item {sah} : Yakut

=item {yao} : Yao

(The Yao in Malawi?)

=item {yap} : Yapese

eq Yap

=item {ii} : Sichuan Yi

=item {yi} : Yiddish

Formerly "ji".  Usually in Hebrew script.

Notable forms:
{yi-latn} Yiddish in Latin script

=item {yo} : Yoruba

=item [{ypk} : Yupik languages]

Several "Eskimo" languages.

=item {znd} : Zande

=item [{zap} : Zapotec]

(A group of languages.)

=item {zen} : Zenaga

NOT Zend.

=item {za} : Zhuang

=item {zu} : Zulu

=item {zun} : Zuni

eq ZuE<ntilde>i

=back

=for woohah END

=head1 SEE ALSO

L<I18N::LangTags|I18N::LangTags> and its "See Also" section.

=head1 COPYRIGHT AND DISCLAIMER

Copyright (c) 2001+ Sean M. Burke. All rights reserved.

You can redistribute and/or
modify this document under the same terms as Perl itself.

This document is provided in the hope that it will be
useful, but without any warranty;
without even the implied warranty of accuracy, authoritativeness,
completeness, merchantability, or fitness for a particular purpose.

Email any corrections or questions to me.

=head1 AUTHOR

Sean M. Burke, sburkeE<64>cpan.org

=cut


# To generate a list of just the two and three-letter codes:

#!/usr/local/bin/perl -w

require 5; # Time-stamp: "2001-03-13 21:53:39 MST"
 # Sean M. Burke, sburke@cpan.org
 # This program is for generating the language_codes.txt file
use strict;
use LWP::Simple;
use HTML::TreeBuilder 3.10;
my $root = HTML::TreeBuilder->new();
my $url = 'http://lcweb.loc.gov/standards/iso639-2/bibcodes.html';
$root->parse(get($url) || die "Can't get $url");
$root->eof();

my @codes;

foreach my $tr ($root->find_by_tag_name('tr')) {
  my @f = map $_->as_text(), $tr->content_list();
  #print map("<$_> ", @f), "\n";
  next unless @f == 5;
  pop @f; # nix the French name
  next if $f[-1] eq 'Language Name (English)'; # it's a header line
  my $xx = splice(@f, 2,1); # pull out the two-letter code
  $f[-1] =~ s/^\s+//;
  $f[-1] =~ s/\s+$//;
  if($xx =~ m/[a-zA-Z]/) {   # there's a two-letter code for it
    push   @codes, [ lc($f[-1]),   "$xx\t$f[-1]\n" ];
  } else { # print the three-letter codes.
    if($f[0] eq $f[1]) {
      push @codes, [ lc($f[-1]), "$f[1]\t$f[2]\n" ];
    } else { # shouldn't happen
      push @codes, [ lc($f[-1]), "@f !!!!!!!!!!\n" ]; 
    }
  }
}

print map $_->[1], sort {; $a->[0] cmp $b->[0] } @codes;
print "[ based on $url\n at ", scalar(localtime), "]\n",
  "[Note: doesn't include IANA-registered codes.]\n";
exit;
__END__

PK�6�Z��W���LangTags/Detect.pmnu�[���
# Time-stamp: "2004-06-20 21:47:55 ADT"

require 5;
package I18N::LangTags::Detect;
use strict;

use vars qw( @ISA $VERSION $MATCH_SUPERS $USING_LANGUAGE_TAGS
             $USE_LITERALS $MATCH_SUPERS_TIGHTLY);

BEGIN { unless(defined &DEBUG) { *DEBUG = sub () {0} } }
 # define the constant 'DEBUG' at compile-time

$VERSION = "1.06";
@ISA = ();
use I18N::LangTags qw(alternate_language_tags locale2language_tag);

sub _uniq { my %seen; return grep(!($seen{$_}++), @_); }
sub _normalize {
  my(@languages) =
    map lc($_),
    grep $_,
    map {; $_, alternate_language_tags($_) } @_;
  return _uniq(@languages) if wantarray;
  return $languages[0];
}

#---------------------------------------------------------------------------
# The extent of our functional interface:

sub detect () { return __PACKAGE__->ambient_langprefs; }

#===========================================================================

sub ambient_langprefs { # always returns things untainted
  my $base_class = $_[0];
  
  return $base_class->http_accept_langs
   if length( $ENV{'REQUEST_METHOD'} || '' ); # I'm a CGI
       # it's off in its own routine because it's complicated

  # Not running as a CGI: try to puzzle out from the environment
  my @languages;

  foreach my $envname (qw( LANGUAGE LC_ALL LC_MESSAGES LANG )) {
    next unless $ENV{$envname};
    DEBUG and print "Noting \$$envname: $ENV{$envname}\n";
    push @languages,
      map locale2language_tag($_),
        # if it's a lg tag, fine, pass thru (untainted)
        # if it's a locale ID, try converting to a lg tag (untainted),
        # otherwise nix it.

      split m/[,:]/,
      $ENV{$envname}
    ;
    last; # first one wins
  }
  
  if($ENV{'IGNORE_WIN32_LOCALE'}) {
    # no-op
  } elsif(&_try_use('Win32::Locale')) {
    # If we have that module installed...
    push @languages, Win32::Locale::get_language() || ''
     if defined &Win32::Locale::get_language;
  }
  return _normalize @languages;
}

#---------------------------------------------------------------------------

sub http_accept_langs {
  # Deal with HTTP "Accept-Language:" stuff.  Hassle.
  # This code is more lenient than RFC 3282, which you must read.
  # Hm.  Should I just move this into I18N::LangTags at some point?
  no integer;

  my $in = (@_ > 1) ? $_[1] : $ENV{'HTTP_ACCEPT_LANGUAGE'};
  # (always ends up untainting)

  return() unless defined $in and length $in;

  $in =~ s/\([^\)]*\)//g; # nix just about any comment
  
  if( $in =~ m/^\s*([a-zA-Z][-a-zA-Z]+)\s*$/s ) {
    # Very common case: just one language tag
    return _normalize $1;
  } elsif( $in =~ m/^\s*[a-zA-Z][-a-zA-Z]+(?:\s*,\s*[a-zA-Z][-a-zA-Z]+)*\s*$/s ) {
    # Common case these days: just "foo, bar, baz"
    return _normalize( $in =~ m/([a-zA-Z][-a-zA-Z]+)/g );
  }

  # Else it's complicated...

  $in =~ s/\s+//g;  # Yes, we can just do without the WS!
  my @in = $in =~ m/([^,]+)/g;
  my %pref;
  
  my $q;
  foreach my $tag (@in) {
    next unless $tag =~
     m/^([a-zA-Z][-a-zA-Z]+)
        (?:
         ;q=
         (
          \d*   # a bit too broad of a RE, but so what.
          (?:
            \.\d+
          )?
         )
        )?
       $
      /sx
    ;
    $q = (defined $2 and length $2) ? $2 : 1;
    #print "$1 with q=$q\n";
    push @{ $pref{$q} }, lc $1;
  }

  return _normalize(
    # Read off %pref, in descending key order...
    map @{$pref{$_}},
    sort {$b <=> $a}
    keys %pref
  );
}

#===========================================================================

my %tried = ();
  # memoization of whether we've used this module, or found it unusable.

sub _try_use {   # Basically a wrapper around "require Modulename"
  # "Many men have tried..."  "They tried and failed?"  "They tried and died."
  return $tried{$_[0]} if exists $tried{$_[0]};  # memoization

  my $module = $_[0];   # ASSUME sane module name!
  { no strict 'refs';
    no warnings 'once';
    return($tried{$module} = 1)
     if %{$module . "::Lexicon"} or @{$module . "::ISA"};
    # weird case: we never use'd it, but there it is!
  }

  print " About to use $module ...\n" if DEBUG;
  {
    local $SIG{'__DIE__'};
    local @INC = @INC;
    pop @INC if $INC[-1] eq '.';
    eval "require $module"; # used to be "use $module", but no point in that.
  }
  if($@) {
    print "Error using $module \: $@\n" if DEBUG > 1;
    return $tried{$module} = 0;
  } else {
    print " OK, $module is used\n" if DEBUG;
    return $tried{$module} = 1;
  }
}

#---------------------------------------------------------------------------
1;
__END__


=head1 NAME

I18N::LangTags::Detect - detect the user's language preferences

=head1 SYNOPSIS

  use I18N::LangTags::Detect;
  my @user_wants = I18N::LangTags::Detect::detect();

=head1 DESCRIPTION

It is a common problem to want to detect what language(s) the user would
prefer output in.

=head1 FUNCTIONS

This module defines one public function,
C<I18N::LangTags::Detect::detect()>.  This function is not exported
(nor is even exportable), and it takes no parameters.

In scalar context, the function returns the most preferred language
tag (or undef if no preference was seen).

In list context (which is usually what you want),
the function returns a
(possibly empty) list of language tags representing (best first) what
languages the user apparently would accept output in.  You will
probably want to pass the output of this through
C<I18N::LangTags::implicate_supers_tightly(...)>
or
C<I18N::LangTags::implicate_supers(...)>, like so:

  my @languages =
    I18N::LangTags::implicate_supers_tightly(
      I18N::LangTags::Detect::detect()
    );


=head1 ENVIRONMENT

This module looks for several environment variables, including
REQUEST_METHOD, HTTP_ACCEPT_LANGUAGE,
LANGUAGE, LC_ALL, LC_MESSAGES, and LANG.

It will also use the L<Win32::Locale> module, if it's installed.


=head1 SEE ALSO

L<I18N::LangTags>, L<Win32::Locale>, L<Locale::Maketext>.

(This module's core code started out as a routine in Locale::Maketext;
but I moved it here once I realized it was more generally useful.)


=head1 COPYRIGHT

Copyright (c) 1998-2004 Sean M. Burke. All rights reserved.

This library is free software; you can redistribute it and/or
modify it under the same terms as Perl itself.

The programs and documentation in this dist are distributed in
the hope that they will be useful, but without any warranty; without
even the implied warranty of merchantability or fitness for a
particular purpose.


=head1 AUTHOR

Sean M. Burke C<sburke@cpan.org>

=cut

# a tip: Put a bit of chopped up pickled ginger in your salad. It's tasty!
PK�6�Z
�C�m�mLangTags.pmnu�[���
# Time-stamp: "2004-10-06 23:26:33 ADT"
# Sean M. Burke <sburke@cpan.org>

require 5.000;
package I18N::LangTags;
use strict;
use vars qw(@ISA @EXPORT @EXPORT_OK %EXPORT_TAGS $VERSION %Panic);
require Exporter;
@ISA = qw(Exporter);
@EXPORT = qw();
@EXPORT_OK = qw(is_language_tag same_language_tag
                extract_language_tags super_languages
                similarity_language_tag is_dialect_of
                locale2language_tag alternate_language_tags
                encode_language_tag panic_languages
                implicate_supers
                implicate_supers_strictly
               );
%EXPORT_TAGS = ('ALL' => \@EXPORT_OK);

$VERSION = "0.42";

sub uniq { my %seen; return grep(!($seen{$_}++), @_); } # a util function


=head1 NAME

I18N::LangTags - functions for dealing with RFC3066-style language tags

=head1 SYNOPSIS

  use I18N::LangTags();

...or specify whichever of those functions you want to import, like so:

  use I18N::LangTags qw(implicate_supers similarity_language_tag);

All the exportable functions are listed below -- you're free to import
only some, or none at all.  By default, none are imported.  If you
say:

    use I18N::LangTags qw(:ALL)

...then all are exported.  (This saves you from having to use
something less obvious like C<use I18N::LangTags qw(/./)>.)

If you don't import any of these functions, assume a C<&I18N::LangTags::>
in front of all the function names in the following examples.

=head1 DESCRIPTION

Language tags are a formalism, described in RFC 3066 (obsoleting
1766), for declaring what language form (language and possibly
dialect) a given chunk of information is in.

This library provides functions for common tasks involving language
tags as they are needed in a variety of protocols and applications.

Please see the "See Also" references for a thorough explanation
of how to correctly use language tags.

=over

=cut

###########################################################################

=item * the function is_language_tag($lang1)

Returns true iff $lang1 is a formally valid language tag.

   is_language_tag("fr")            is TRUE
   is_language_tag("x-jicarilla")   is FALSE
       (Subtags can be 8 chars long at most -- 'jicarilla' is 9)

   is_language_tag("sgn-US")    is TRUE
       (That's American Sign Language)

   is_language_tag("i-Klikitat")    is TRUE
       (True without regard to the fact noone has actually
        registered Klikitat -- it's a formally valid tag)

   is_language_tag("fr-patois")     is TRUE
       (Formally valid -- altho descriptively weak!)

   is_language_tag("Spanish")       is FALSE
   is_language_tag("french-patois") is FALSE
       (No good -- first subtag has to match
        /^([xXiI]|[a-zA-Z]{2,3})$/ -- see RFC3066)

   is_language_tag("x-borg-prot2532") is TRUE
       (Yes, subtags can contain digits, as of RFC3066)

=cut

sub is_language_tag {

  ## Changes in the language tagging standards may have to be reflected here.

  my($tag) = lc($_[0]);

  return 0 if $tag eq "i" or $tag eq "x";
  # Bad degenerate cases that the following
  #  regexp would erroneously let pass

  return $tag =~ 
    /^(?:  # First subtag
         [xi] | [a-z]{2,3}
      )
      (?:  # Subtags thereafter
         -           # separator
         [a-z0-9]{1,8}  # subtag  
      )*
    $/xs ? 1 : 0;
}

###########################################################################

=item * the function extract_language_tags($whatever)

Returns a list of whatever looks like formally valid language tags
in $whatever.  Not very smart, so don't get too creative with
what you want to feed it.

  extract_language_tags("fr, fr-ca, i-mingo")
    returns:   ('fr', 'fr-ca', 'i-mingo')

  extract_language_tags("It's like this: I'm in fr -- French!")
    returns:   ('It', 'in', 'fr')
  (So don't just feed it any old thing.)

The output is untainted.  If you don't know what tainting is,
don't worry about it.

=cut

sub extract_language_tags {

  ## Changes in the language tagging standards may have to be reflected here.

  my($text) =
    $_[0] =~ m/(.+)/  # to make for an untainted result
    ? $1 : ''
  ;
  
  return grep(!m/^[ixIX]$/s, # 'i' and 'x' aren't good tags
    $text =~ 
    m/
      \b
      (?:  # First subtag
         [iIxX] | [a-zA-Z]{2,3}
      )
      (?:  # Subtags thereafter
         -           # separator
         [a-zA-Z0-9]{1,8}  # subtag  
      )*
      \b
    /xsg
  );
}

###########################################################################

=item * the function same_language_tag($lang1, $lang2)

Returns true iff $lang1 and $lang2 are acceptable variant tags
representing the same language-form.

   same_language_tag('x-kadara', 'i-kadara')  is TRUE
      (The x/i- alternation doesn't matter)
   same_language_tag('X-KADARA', 'i-kadara')  is TRUE
      (...and neither does case)
   same_language_tag('en',       'en-US')     is FALSE
      (all-English is not the SAME as US English)
   same_language_tag('x-kadara', 'x-kadar')   is FALSE
      (these are totally unrelated tags)
   same_language_tag('no-bok',    'nb')       is TRUE
      (no-bok is a legacy tag for nb (Norwegian Bokmal))

C<same_language_tag> works by just seeing whether
C<encode_language_tag($lang1)> is the same as
C<encode_language_tag($lang2)>.

(Yes, I know this function is named a bit oddly.  Call it historic
reasons.)

=cut

sub same_language_tag {
  my $el1 = &encode_language_tag($_[0]);
  return 0 unless defined $el1;
   # this avoids the problem of
   # encode_language_tag($lang1) eq and encode_language_tag($lang2)
   # being true if $lang1 and $lang2 are both undef

  return $el1 eq &encode_language_tag($_[1]) ? 1 : 0;
}

###########################################################################

=item * the function similarity_language_tag($lang1, $lang2)

Returns an integer representing the degree of similarity between
tags $lang1 and $lang2 (the order of which does not matter), where
similarity is the number of common elements on the left,
without regard to case and to x/i- alternation.

   similarity_language_tag('fr', 'fr-ca')           is 1
      (one element in common)
   similarity_language_tag('fr-ca', 'fr-FR')        is 1
      (one element in common)

   similarity_language_tag('fr-CA-joual',
                           'fr-CA-PEI')             is 2
   similarity_language_tag('fr-CA-joual', 'fr-CA')  is 2
      (two elements in common)

   similarity_language_tag('x-kadara', 'i-kadara')  is 1
      (x/i- doesn't matter)

   similarity_language_tag('en',       'x-kadar')   is 0
   similarity_language_tag('x-kadara', 'x-kadar')   is 0
      (unrelated tags -- no similarity)

   similarity_language_tag('i-cree-syllabic',
                           'i-cherokee-syllabic')   is 0
      (no B<leftmost> elements in common!)

=cut

sub similarity_language_tag {
  my $lang1 = &encode_language_tag($_[0]);
  my $lang2 = &encode_language_tag($_[1]);
   # And encode_language_tag takes care of the whole
   #  no-nyn==nn, i-hakka==zh-hakka, etc, things
   
  # NB: (i-sil-...)?  (i-sgn-...)?

  return undef if !defined($lang1) and !defined($lang2);
  return 0 if !defined($lang1) or !defined($lang2);

  my @l1_subtags = split('-', $lang1);
  my @l2_subtags = split('-', $lang2);
  my $similarity = 0;

  while(@l1_subtags and @l2_subtags) {
    if(shift(@l1_subtags) eq shift(@l2_subtags)) {
      ++$similarity;
    } else {
      last;
    } 
  }
  return $similarity;
}

###########################################################################

=item * the function is_dialect_of($lang1, $lang2)

Returns true iff language tag $lang1 represents a subform of
language tag $lang2.

B<Get the order right!  It doesn't work the other way around!>

   is_dialect_of('en-US', 'en')            is TRUE
     (American English IS a dialect of all-English)

   is_dialect_of('fr-CA-joual', 'fr-CA')   is TRUE
   is_dialect_of('fr-CA-joual', 'fr')      is TRUE
     (Joual is a dialect of (a dialect of) French)

   is_dialect_of('en', 'en-US')            is FALSE
     (all-English is a NOT dialect of American English)

   is_dialect_of('fr', 'en-CA')            is FALSE

   is_dialect_of('en',    'en'   )         is TRUE
   is_dialect_of('en-US', 'en-US')         is TRUE
     (B<Note:> these are degenerate cases)

   is_dialect_of('i-mingo-tom', 'x-Mingo') is TRUE
     (the x/i thing doesn't matter, nor does case)

   is_dialect_of('nn', 'no')               is TRUE
     (because 'nn' (New Norse) is aliased to 'no-nyn',
      as a special legacy case, and 'no-nyn' is a
      subform of 'no' (Norwegian))

=cut

sub is_dialect_of {

  my $lang1 = &encode_language_tag($_[0]);
  my $lang2 = &encode_language_tag($_[1]);

  return undef if !defined($lang1) and !defined($lang2);
  return 0 if !defined($lang1) or !defined($lang2);

  return 1 if $lang1 eq $lang2;
  return 0 if length($lang1) < length($lang2);

  $lang1 .= '-';
  $lang2 .= '-';
  return
    (substr($lang1, 0, length($lang2)) eq $lang2) ? 1 : 0;
}

###########################################################################

=item * the function super_languages($lang1)

Returns a list of language tags that are superordinate tags to $lang1
-- it gets this by removing subtags from the end of $lang1 until
nothing (or just "i" or "x") is left.

   super_languages("fr-CA-joual")  is  ("fr-CA", "fr")

   super_languages("en-AU")  is  ("en")

   super_languages("en")  is  empty-list, ()

   super_languages("i-cherokee")  is  empty-list, ()
    ...not ("i"), which would be illegal as well as pointless.

If $lang1 is not a valid language tag, returns empty-list in
a list context, undef in a scalar context.

A notable and rather unavoidable problem with this method:
"x-mingo-tom" has an "x" because the whole tag isn't an
IANA-registered tag -- but super_languages('x-mingo-tom') is
('x-mingo') -- which isn't really right, since 'i-mingo' is
registered.  But this module has no way of knowing that.  (But note
that same_language_tag('x-mingo', 'i-mingo') is TRUE.)

More importantly, you assume I<at your peril> that superordinates of
$lang1 are mutually intelligible with $lang1.  Consider this
carefully.

=cut 

sub super_languages {
  my $lang1 = $_[0];
  return() unless defined($lang1) && &is_language_tag($lang1);

  # a hack for those annoying new (2001) tags:
  $lang1 =~ s/^nb\b/no-bok/i; # yes, backwards
  $lang1 =~ s/^nn\b/no-nyn/i; # yes, backwards
  $lang1 =~ s/^[ix](-hakka\b)/zh$1/i; # goes the right way
   # i-hakka-bork-bjork-bjark => zh-hakka-bork-bjork-bjark

  my @l1_subtags = split('-', $lang1);

  ## Changes in the language tagging standards may have to be reflected here.

  # NB: (i-sil-...)?

  my @supers = ();
  foreach my $bit (@l1_subtags) {
    push @supers, 
      scalar(@supers) ? ($supers[-1] . '-' . $bit) : $bit;
  }
  pop @supers if @supers;
  shift @supers if @supers && $supers[0] =~ m<^[iIxX]$>s;
  return reverse @supers;
}

###########################################################################

=item * the function locale2language_tag($locale_identifier)

This takes a locale name (like "en", "en_US", or "en_US.ISO8859-1")
and maps it to a language tag.  If it's not mappable (as with,
notably, "C" and "POSIX"), this returns empty-list in a list context,
or undef in a scalar context.

   locale2language_tag("en") is "en"

   locale2language_tag("en_US") is "en-US"

   locale2language_tag("en_US.ISO8859-1") is "en-US"

   locale2language_tag("C") is undef or ()

   locale2language_tag("POSIX") is undef or ()

   locale2language_tag("POSIX") is undef or ()

I'm not totally sure that locale names map satisfactorily to language
tags.  Think REAL hard about how you use this.  YOU HAVE BEEN WARNED.

The output is untainted.  If you don't know what tainting is,
don't worry about it.

=cut 

sub locale2language_tag {
  my $lang =
    $_[0] =~ m/(.+)/  # to make for an untainted result
    ? $1 : ''
  ;

  return $lang if &is_language_tag($lang); # like "en"

  $lang =~ tr<_><->;  # "en_US" -> en-US
  $lang =~ s<(?:[\.\@][-_a-zA-Z0-9]+)+$><>s;  # "en_US.ISO8859-1" -> en-US
   # it_IT.utf8@euro => it-IT

  return $lang if &is_language_tag($lang);

  return;
}

###########################################################################

=item * the function encode_language_tag($lang1)

This function, if given a language tag, returns an encoding of it such
that:

* tags representing different languages never get the same encoding.

* tags representing the same language always get the same encoding.

* an encoding of a formally valid language tag always is a string
value that is defined, has length, and is true if considered as a
boolean.

Note that the encoding itself is B<not> a formally valid language tag.
Note also that you cannot, currently, go from an encoding back to a
language tag that it's an encoding of.

Note also that you B<must> consider the encoded value as atomic; i.e.,
you should not consider it as anything but an opaque, unanalysable
string value.  (The internals of the encoding method may change in
future versions, as the language tagging standard changes over time.)

C<encode_language_tag> returns undef if given anything other than a
formally valid language tag.

The reason C<encode_language_tag> exists is because different language
tags may represent the same language; this is normally treatable with
C<same_language_tag>, but consider this situation:

You have a data file that expresses greetings in different languages.
Its format is "[language tag]=[how to say 'Hello']", like:

          en-US=Hiho
          fr=Bonjour
          i-mingo=Hau'

And suppose you write a program that reads that file and then runs as
a daemon, answering client requests that specify a language tag and
then expect the string that says how to greet in that language.  So an
interaction looks like:

          greeting-client asks:    fr
          greeting-server answers: Bonjour

So far so good.  But suppose the way you're implementing this is:

          my %greetings;
          die unless open(IN, "<", "in.dat");
          while(<IN>) {
            chomp;
            next unless /^([^=]+)=(.+)/s;
            my($lang, $expr) = ($1, $2);
            $greetings{$lang} = $expr;
          }
          close(IN);

at which point %greetings has the contents:

          "en-US"   => "Hiho"
          "fr"      => "Bonjour"
          "i-mingo" => "Hau'"

And suppose then that you answer client requests for language $wanted
by just looking up $greetings{$wanted}.

If the client asks for "fr", that will look up successfully in
%greetings, to the value "Bonjour".  And if the client asks for
"i-mingo", that will look up successfully in %greetings, to the value
"Hau'".

But if the client asks for "i-Mingo" or "x-mingo", or "Fr", then the
lookup in %greetings fails.  That's the Wrong Thing.

You could instead do lookups on $wanted with:

          use I18N::LangTags qw(same_language_tag);
          my $response = '';
          foreach my $l2 (keys %greetings) {
            if(same_language_tag($wanted, $l2)) {
              $response = $greetings{$l2};
              last;
            }
          }

But that's rather inefficient.  A better way to do it is to start your
program with:

          use I18N::LangTags qw(encode_language_tag);
          my %greetings;
          die unless open(IN, "<", "in.dat");
          while(<IN>) {
            chomp;
            next unless /^([^=]+)=(.+)/s;
            my($lang, $expr) = ($1, $2);
            $greetings{
                        encode_language_tag($lang)
                      } = $expr;
          }
          close(IN);

and then just answer client requests for language $wanted by just
looking up

          $greetings{encode_language_tag($wanted)}

And that does the Right Thing.

=cut

sub encode_language_tag {
  # Only similarity_language_tag() is allowed to analyse encodings!

  ## Changes in the language tagging standards may have to be reflected here.

  my($tag) = $_[0] || return undef;
  return undef unless &is_language_tag($tag);

  # For the moment, these legacy variances are few enough that
  #  we can just handle them here with regexps.
  $tag =~ s/^iw\b/he/i; # Hebrew
  $tag =~ s/^in\b/id/i; # Indonesian
  $tag =~ s/^cre\b/cr/i; # Cree
  $tag =~ s/^jw\b/jv/i; # Javanese
  $tag =~ s/^[ix]-lux\b/lb/i;  # Luxemburger
  $tag =~ s/^[ix]-navajo\b/nv/i;  # Navajo
  $tag =~ s/^ji\b/yi/i;  # Yiddish
  # SMB 2003 -- Hm.  There's a bunch of new XXX->YY variances now,
  #  but maybe they're all so obscure I can ignore them.   "Obscure"
  #  meaning either that the language is obscure, and/or that the
  #  XXX form was extant so briefly that it's unlikely it was ever
  #  used.  I hope.
  #
  # These go FROM the simplex to complex form, to get
  #  similarity-comparison right.  And that's okay, since
  #  similarity_language_tag is the only thing that
  #  analyzes our output.
  $tag =~ s/^[ix]-hakka\b/zh-hakka/i;  # Hakka
  $tag =~ s/^nb\b/no-bok/i;  # BACKWARDS for Bokmal
  $tag =~ s/^nn\b/no-nyn/i;  # BACKWARDS for Nynorsk

  $tag =~ s/^[xiXI]-//s;
   # Just lop off any leading "x/i-"

  return "~" . uc($tag);
}

#--------------------------------------------------------------------------

=item * the function alternate_language_tags($lang1)

This function, if given a language tag, returns all language tags that
are alternate forms of this language tag.  (I.e., tags which refer to
the same language.)  This is meant to handle legacy tags caused by
the minor changes in language tag standards over the years; and
the x-/i- alternation is also dealt with.

Note that this function does I<not> try to equate new (and never-used,
and unusable)
ISO639-2 three-letter tags to old (and still in use) ISO639-1
two-letter equivalents -- like "ara" -> "ar" -- because
"ara" has I<never> been in use as an Internet language tag,
and RFC 3066 stipulates that it never should be, since a shorter
tag ("ar") exists.

Examples:

  alternate_language_tags('no-bok')       is ('nb')
  alternate_language_tags('nb')           is ('no-bok')
  alternate_language_tags('he')           is ('iw')
  alternate_language_tags('iw')           is ('he')
  alternate_language_tags('i-hakka')      is ('zh-hakka', 'x-hakka')
  alternate_language_tags('zh-hakka')     is ('i-hakka', 'x-hakka')
  alternate_language_tags('en')           is ()
  alternate_language_tags('x-mingo-tom')  is ('i-mingo-tom')
  alternate_language_tags('x-klikitat')   is ('i-klikitat')
  alternate_language_tags('i-klikitat')   is ('x-klikitat')

This function returns empty-list if given anything other than a formally
valid language tag.

=cut

my %alt = qw( i x   x i   I X   X I );
sub alternate_language_tags {
  my $tag = $_[0];
  return() unless &is_language_tag($tag);

  my @em; # push 'em real goood!

  # For the moment, these legacy variances are few enough that
  #  we can just handle them here with regexps.
  
  if(     $tag =~ m/^[ix]-hakka\b(.*)/i) {push @em, "zh-hakka$1";
  } elsif($tag =~ m/^zh-hakka\b(.*)/i) {  push @em, "x-hakka$1", "i-hakka$1";

  } elsif($tag =~ m/^he\b(.*)/i) { push @em, "iw$1";
  } elsif($tag =~ m/^iw\b(.*)/i) { push @em, "he$1";

  } elsif($tag =~ m/^in\b(.*)/i) { push @em, "id$1";
  } elsif($tag =~ m/^id\b(.*)/i) { push @em, "in$1";

  } elsif($tag =~ m/^[ix]-lux\b(.*)/i) { push @em, "lb$1";
  } elsif($tag =~ m/^lb\b(.*)/i) {       push @em, "i-lux$1", "x-lux$1";

  } elsif($tag =~ m/^[ix]-navajo\b(.*)/i) { push @em, "nv$1";
  } elsif($tag =~ m/^nv\b(.*)/i) {          push @em, "i-navajo$1", "x-navajo$1";

  } elsif($tag =~ m/^yi\b(.*)/i) { push @em, "ji$1";
  } elsif($tag =~ m/^ji\b(.*)/i) { push @em, "yi$1";

  } elsif($tag =~ m/^nb\b(.*)/i) {     push @em, "no-bok$1";
  } elsif($tag =~ m/^no-bok\b(.*)/i) { push @em, "nb$1";
  
  } elsif($tag =~ m/^nn\b(.*)/i) {     push @em, "no-nyn$1";
  } elsif($tag =~ m/^no-nyn\b(.*)/i) { push @em, "nn$1";
  }

  push @em, $alt{$1} . $2 if $tag =~ /^([XIxi])(-.+)/;
  return @em;
}

###########################################################################

{
  # Init %Panic...
  
  my @panic = (  # MUST all be lowercase!
   # Only large ("national") languages make it in this list.
   #  If you, as a user, are so bizarre that the /only/ language
   #  you claim to accept is Galician, then no, we won't do you
   #  the favor of providing Catalan as a panic-fallback for
   #  you.  Because if I start trying to add "little languages" in
   #  here, I'll just go crazy.

   # Scandinavian lgs.  All based on opinion and hearsay.
   'sv' => [qw(nb no da nn)],
   'da' => [qw(nb no sv nn)], # I guess
   [qw(no nn nb)], [qw(no nn nb sv da)],
   'is' => [qw(da sv no nb nn)],
   'fo' => [qw(da is no nb nn sv)], # I guess
   
   # I think this is about the extent of tolerable intelligibility
   #  among large modern Romance languages.
   'pt' => [qw(es ca it fr)], # Portuguese, Spanish, Catalan, Italian, French
   'ca' => [qw(es pt it fr)],
   'es' => [qw(ca it fr pt)],
   'it' => [qw(es fr ca pt)],
   'fr' => [qw(es it ca pt)],
   
   # Also assume that speakers of the main Indian languages prefer
   #  to read/hear Hindi over English
   [qw(
     as bn gu kn ks kok ml mni mr ne or pa sa sd te ta ur
   )] => 'hi',
    # Assamese, Bengali, Gujarati, [Hindi,] Kannada (Kanarese), Kashmiri,
    # Konkani, Malayalam, Meithei (Manipuri), Marathi, Nepali, Oriya,
    # Punjabi, Sanskrit, Sindhi, Telugu, Tamil, and Urdu.
   'hi' => [qw(bn pa as or)],
   # I welcome finer data for the other Indian languages.
   #  E.g., what should Oriya's list be, besides just Hindi?
   
   # And the panic languages for English is, of course, nil!

   # My guesses at Slavic intelligibility:
   ([qw(ru be uk)]) x 2,  # Russian, Belarusian, Ukranian
   ([qw(sr hr bs)]) x 2,  # Serbian, Croatian, Bosnian
   'cs' => 'sk', 'sk' => 'cs', # Czech + Slovak

   'ms' => 'id', 'id' => 'ms', # Malay + Indonesian

   'et' => 'fi', 'fi' => 'et', # Estonian + Finnish

   #?? 'lo' => 'th', 'th' => 'lo', # Lao + Thai

  );
  my($k,$v);
  while(@panic) {
    ($k,$v) = splice(@panic,0,2);
    foreach my $k (ref($k) ? @$k : $k) {
      foreach my $v (ref($v) ? @$v : $v) {
        push @{$Panic{$k} ||= []}, $v unless $k eq $v;
      }
    }
  }
}

=item * the function @langs = panic_languages(@accept_languages)

This function takes a list of 0 or more language
tags that constitute a given user's Accept-Language list, and
returns a list of tags for I<other> (non-super)
languages that are probably acceptable to the user, to be
used I<if all else fails>.

For example, if a user accepts only 'ca' (Catalan) and
'es' (Spanish), and the documents/interfaces you have
available are just in German, Italian, and Chinese, then
the user will most likely want the Italian one (and not
the Chinese or German one!), instead of getting
nothing.  So C<panic_languages('ca', 'es')> returns
a list containing 'it' (Italian).

English ('en') is I<always> in the return list, but
whether it's at the very end or not depends
on the input languages.  This function works by consulting
an internal table that stipulates what common
languages are "close" to each other.

A useful construct you might consider using is:

  @fallbacks = super_languages(@accept_languages);
  push @fallbacks, panic_languages(
    @accept_languages, @fallbacks,
  );

=cut

sub panic_languages {
  # When in panic or in doubt, run in circles, scream, and shout!
  my(@out, %seen);
  foreach my $t (@_) {
    next unless $t;
    next if $seen{$t}++; # so we don't return it or hit it again
    # push @out, super_languages($t); # nah, keep that separate
    push @out, @{ $Panic{lc $t} || next };
  }
  return grep !$seen{$_}++,  @out, 'en';
}

#---------------------------------------------------------------------------
#---------------------------------------------------------------------------

=item * the function implicate_supers( ...languages... )

This takes a list of strings (which are presumed to be language-tags;
strings that aren't, are ignored); and after each one, this function
inserts super-ordinate forms that don't already appear in the list.
The original list, plus these insertions, is returned.

In other words, it takes this:

  pt-br de-DE en-US fr pt-br-janeiro

and returns this:

  pt-br pt de-DE de en-US en fr pt-br-janeiro

This function is most useful in the idiom

  implicate_supers( I18N::LangTags::Detect::detect() );

(See L<I18N::LangTags::Detect>.)


=item * the function implicate_supers_strictly( ...languages... )

This works like C<implicate_supers> except that the implicated
forms are added to the end of the return list.

In other words, implicate_supers_strictly takes a list of strings
(which are presumed to be language-tags; strings that aren't, are
ignored) and after the whole given list, it inserts the super-ordinate forms 
of all given tags, minus any tags that already appear in the input list.

In other words, it takes this:

  pt-br de-DE en-US fr pt-br-janeiro

and returns this:

  pt-br de-DE en-US fr pt-br-janeiro pt de en

The reason this function has "_strictly" in its name is that when
you're processing an Accept-Language list according to the RFCs, if
you interpret the RFCs quite strictly, then you would use
implicate_supers_strictly, but for normal use (i.e., common-sense use,
as far as I'm concerned) you'd use implicate_supers.

=cut

sub implicate_supers {
  my @languages = grep is_language_tag($_), @_;
  my %seen_encoded;
  foreach my $lang (@languages) {
    $seen_encoded{ I18N::LangTags::encode_language_tag($lang) } = 1
  }

  my(@output_languages);
  foreach my $lang (@languages) {
    push @output_languages, $lang;
    foreach my $s ( I18N::LangTags::super_languages($lang) ) {
      # Note that super_languages returns the longest first.
      last if $seen_encoded{ I18N::LangTags::encode_language_tag($s) };
      push @output_languages, $s;
    }
  }
  return uniq( @output_languages );

}

sub implicate_supers_strictly {
  my @tags = grep is_language_tag($_), @_;
  return uniq( @_,   map super_languages($_), @_ );
}



###########################################################################
1;
__END__

=back

=head1 ABOUT LOWERCASING

I've considered making all the above functions that output language
tags return all those tags strictly in lowercase.  Having all your
language tags in lowercase does make some things easier.  But you
might as well just lowercase as you like, or call
C<encode_language_tag($lang1)> where appropriate.

=head1 ABOUT UNICODE PLAINTEXT LANGUAGE TAGS

In some future version of I18N::LangTags, I plan to include support
for RFC2482-style language tags -- which are basically just normal
language tags with their ASCII characters shifted into Plane 14.

=head1 SEE ALSO

* L<I18N::LangTags::List|I18N::LangTags::List>

* RFC 3066, C<http://www.ietf.org/rfc/rfc3066.txt>, "Tags for the
Identification of Languages".  (Obsoletes RFC 1766)

* RFC 2277, C<http://www.ietf.org/rfc/rfc2277.txt>, "IETF Policy on
Character Sets and Languages".

* RFC 2231, C<http://www.ietf.org/rfc/rfc2231.txt>, "MIME Parameter
Value and Encoded Word Extensions: Character Sets, Languages, and
Continuations".

* RFC 2482, C<http://www.ietf.org/rfc/rfc2482.txt>,
"Language Tagging in Unicode Plain Text".

* Locale::Codes, in
C<http://www.perl.com/CPAN/modules/by-module/Locale/>

* ISO 639-2, "Codes for the representation of names of languages",
including two-letter and three-letter codes,
C<http://www.loc.gov/standards/iso639-2/php/code_list.php>

* The IANA list of registered languages (hopefully up-to-date),
C<http://www.iana.org/assignments/language-tags>

=head1 COPYRIGHT

Copyright (c) 1998+ Sean M. Burke. All rights reserved.

This library is free software; you can redistribute it and/or
modify it under the same terms as Perl itself.

The programs and documentation in this dist are distributed in
the hope that they will be useful, but without any warranty; without
even the implied warranty of merchantability or fitness for a
particular purpose.

=head1 AUTHOR

Sean M. Burke C<sburke@cpan.org>

=cut

PK�6�Z���
Collate.pmnu�[���package I18N::Collate;

use strict;
our $VERSION = '1.02';

=head1 NAME

I18N::Collate - compare 8-bit scalar data according to the current locale

=head1 SYNOPSIS

    use I18N::Collate;
    setlocale(LC_COLLATE, 'locale-of-your-choice');
    $s1 = I18N::Collate->new("scalar_data_1");
    $s2 = I18N::Collate->new("scalar_data_2");

=head1 DESCRIPTION

  ***

  WARNING: starting from the Perl version 5.003_06
  the I18N::Collate interface for comparing 8-bit scalar data
  according to the current locale

	HAS BEEN DEPRECATED

  That is, please do not use it anymore for any new applications
  and please migrate the old applications away from it because its
  functionality was integrated into the Perl core language in the
  release 5.003_06.

  See the perllocale manual page for further information.

  ***

This module provides you with objects that will collate
according to your national character set, provided that the
POSIX setlocale() function is supported on your system.

You can compare $s1 and $s2 above with

    $s1 le $s2

to extract the data itself, you'll need a dereference: $$s1

This module uses POSIX::setlocale(). The basic collation conversion is
done by strxfrm() which terminates at NUL characters being a decent C
routine.  collate_xfrm() handles embedded NUL characters gracefully.

The available locales depend on your operating system; try whether
C<locale -a> shows them or man pages for "locale" or "nlsinfo" or the
direct approach C<ls /usr/lib/nls/loc> or C<ls /usr/lib/nls> or
C<ls /usr/lib/locale>.  Not all the locales that your vendor supports
are necessarily installed: please consult your operating system's
documentation and possibly your local system administration.  The
locale names are probably something like C<xx_XX.(ISO)?8859-N> or
C<xx_XX.(ISO)?8859N>, for example C<fr_CH.ISO8859-1> is the Swiss (CH)
variant of French (fr), ISO Latin (8859) 1 (-1) which is the Western
European character set.

=cut

# I18N::Collate.pm
#
# Author:	Jarkko Hietaniemi <F<jhi@iki.fi>>
#		Helsinki University of Technology, Finland
#
# Acks:		Guy Decoux <F<decoux@moulon.inra.fr>> understood
#		overloading magic much deeper than I and told
#		how to cut the size of this code by more than half.
#		(my first version did overload all of lt gt eq le ge cmp)
#
# Purpose:      compare 8-bit scalar data according to the current locale
#
# Requirements:	Perl5 POSIX::setlocale() and POSIX::strxfrm()
#
# Exports:	setlocale 1)
#		collate_xfrm 2)
#
# Overloads:	cmp # 3)
#
# Usage:	use I18N::Collate;
#	        setlocale(LC_COLLATE, 'locale-of-your-choice'); # 4)
#		$s1 = I18N::Collate->("scalar_data_1");
#		$s2 = I18N::Collate->("scalar_data_2");
#
#		now you can compare $s1 and $s2: $s1 le $s2
#		to extract the data itself, you need to deref: $$s1
#
# Notes:
#		1) this uses POSIX::setlocale
#		2) the basic collation conversion is done by strxfrm() which
#		   terminates at NUL characters being a decent C routine.
#		   collate_xfrm handles embedded NUL characters gracefully.
#		3) due to cmp and overload magic, lt le eq ge gt work also
#		4) the available locales depend on your operating system;
#		   try whether "locale -a" shows them or man pages for
#		   "locale" or "nlsinfo" work or the more direct
#		   approach "ls /usr/lib/nls/loc" or "ls /usr/lib/nls".
#		   Not all the locales that your vendor supports
#		   are necessarily installed: please consult your
#		   operating system's documentation.
#		   The locale names are probably something like
#		   'xx_XX.(ISO)?8859-N' or 'xx_XX.(ISO)?8859N',
#		   for example 'fr_CH.ISO8859-1' is the Swiss (CH)
#		   variant of French (fr), ISO Latin (8859) 1 (-1)
#		   which is the Western European character set.
#
# Updated:	19961005
#
# ---

use POSIX qw(strxfrm LC_COLLATE);
use warnings::register;

require Exporter;

our @ISA = qw(Exporter);
our @EXPORT = qw(collate_xfrm setlocale LC_COLLATE);
our @EXPORT_OK = qw();

use overload qw(
fallback	1
cmp		collate_cmp
);

our($LOCALE, $C);

our $please_use_I18N_Collate_even_if_deprecated = 0;
sub new {
  my $new = $_[1];

  if (warnings::enabled() && $] >= 5.003_06) {
    unless ($please_use_I18N_Collate_even_if_deprecated) {
      warnings::warn <<___EOD___;
***

  WARNING: starting from the Perl version 5.003_06
  the I18N::Collate interface for comparing 8-bit scalar data
  according to the current locale

	HAS BEEN DEPRECATED

  That is, please do not use it anymore for any new applications
  and please migrate the old applications away from it because its
  functionality was integrated into the Perl core language in the
  release 5.003_06.

  See the perllocale manual page for further information.

***
___EOD___
      $please_use_I18N_Collate_even_if_deprecated++;
    }
  }

  bless \$new;
}

sub setlocale {
 my ($category, $locale) = @_[0,1];

 POSIX::setlocale($category, $locale) if (defined $category);
 # the current $LOCALE
 $LOCALE = $locale || $ENV{'LC_COLLATE'} || $ENV{'LC_ALL'} || '';
}

sub C {
  my $s = ${$_[0]};

  $C->{$LOCALE}->{$s} = collate_xfrm($s)
    unless (defined $C->{$LOCALE}->{$s}); # cache when met

  $C->{$LOCALE}->{$s};
}

sub collate_xfrm {
  my $s = $_[0];
  my $x = '';

  for (split(/(\000+)/, $s)) {
    $x .= (/^\000/) ? $_ : strxfrm("$_\000");
  }

  $x;
}

sub collate_cmp {
  &C($_[0]) cmp &C($_[1]);
}

# init $LOCALE

&I18N::Collate::setlocale();

1; # keep require happy
PK�($[�g]�>>Langinfo/Langinfo.sonuȯ��ELF>P@7@8	@�� p'p' p' �� -- - 888$$ppp  S�tdppp  P�td88844Q�tdR�tdp'p' p' ��GNU�;��:�KOT\�B��gN9V��@ BE���|�qX�acm�e t�I�� ����T�%8v���, �F"h
0  0 0 ���__gmon_start___ITM_deregisterTMCloneTable_ITM_registerTMCloneTable__cxa_finalizelibpthread.so.0__errno_locationPerl_sv_2mortalPerl_sv_2iv_flagsnl_langinfoPerl_newSVpvPerl_croak_xs_usagePerl_newSVpvn_flagsPerl_newSVpvf_nocontextPerl_croak_svboot_I18N__LanginfoPerl_xs_handshakePerl_newXS_deffilePerl_newXS_flagsPL_thr_keypthread_getspecificPerl_get_hvPerl_newCONSTSUBPerl_newSVivPerl_hv_common_key_lenPerl_mro_method_changed_inPerl_xs_boot_epilogPerl_sv_upgradePerl_croaklibperl.so.5.26libc.so.6_edata__bss_start_endGLIBC_2.2.5 ui	%Uui	%p' x' ��' �' �' ��' ��' ��' �( �( �0( �H( �`( �x( ��( ��( �( 
�( �( ) % ) -8) 5P) =h) E�) M�) X�) ��) ��) ��) �* �(* �@* �X* gp* q�* _�* c�* m�* y�* �+ �+ �0+ H+ `+ x+ �+ '�+ /�+ 7�+ ?�+ G, � , �8, �P, }h, ��, ��, ��, ��, ��, ��/ �/ �/ 	�/ �/ (/ 0/ 8/ @/ H/ P/ X/ 
`/ h/ p/ 
x/ �/ �/ �/ �/ �/ �/ �/ �/ �/ �/ �/ ��H��H�� H��t��H����5� �%� ��h�������h��������h�������h�������h�������h�������h�������h��q������h��a������h	��Q������h
��A������h��1������h��!������h
��������h��������h������h�������h��������h�������h�������h�������h��������%- D���%% D���% D���% D���%
 D���% D���%� D���%� D���%� D���%� D���%� D���%� D���%� D���%� D���%� D���%� D���%� D���%� D���%� D���%� D���%� D���%� DH�=� H�� H9�tH�n H��t	�����H�=y H�5r H)�H��H��H��?H�H�tH�E H��t��fD�����=5 u+UH�=" H��tH�=� �I����d����
 ]������w������ATUSH�GxH��H�H�P�H�WxH�WHc�hH��H)�H��H������Hc�H�4�L�$��F% =u?H��x ��yE���H��8�H���C���H�SH��LcL�#[]A\����>����ǅ�x����1�H��H�����H���H��H�5o�r���f���SH��H��H�CxH�H�P�H�SxHcH�CH��H)�H��H����tH�5?�1����NH�H��H�vH�P�� ������H���H�=�H��1��J$H�R0���H��H���e���H��H���j���f.���AWL��H��1�AVH�
�H��AUI�����
ATUSH�� H���0���H����H�5�L��D$����E1�L��L��H�
lH����H�5����H�� �8���H�5��H��H�������I���#L��L��L��H������H��H�;��H�sH������H��L�3�KjE1�A�L��L��H��I������ZYH����H�H�A��u��Ѝr���v��Ɓ���
�y�����tE��H��L�y�AA�OH�;�t���L��H���J����t$H��L��[]A\A]A^A_�@���H�κH��H�L$����H�L$�A�L��H�5�H���?�����H��H���code0.13v5.26.0Langinfo.cI18N::Langinfo::AUTOLOAD_I18N::Langinfo::langinfoI18N::Langinfo::CODESETABDAY_1ABDAY_2ABDAY_3ABDAY_4ABDAY_5ABDAY_6ABDAY_7ABMON_1ABMON_10ABMON_11ABMON_12ABMON_2ABMON_3ABMON_4ABMON_5ABMON_6ABMON_7ABMON_8ABMON_9ALT_DIGITSAM_STRERAERA_D_FMTERA_D_T_FMTERA_T_FMTNOEXPRNOSTRPM_STRT_FMT_AMPMYESEXPRYESSTRCRNCYSTRTHOUSEPRADIXCHAR%-p is not a valid I18N::Langinfo macro at %s line %lu
Couldn't add key '%s' to %%I18N::Langinfo::;4H���P����x���������H����zRx�$�pFJw�?:*3$"D8���`,\@����F�A�A ��
ABH����E�T�h����F�N�P �J(�A0�A8�KP�XH`[XAPk
8D0A(B BBBEGNU���' �����������
%-5=EM
/X&���	�
���
g)q(_,c	.m0y	1��#�$%'/7 ?!G"���'}*�
+�����	U�X
4p' x' ���o`��
1/ H0	���o���o����o�o����o<- ������� 0@P`p�������GA$3a1XAGA$3p11131GA*GA$annobin gcc 8.5.0 20210514GA$plugin name: gcc-annobinGA$running gcc 8.5.0 20210514GA*GA*GA!
GA*FORTIFYGA+GLIBCXX_ASSERTIONSGA*GOW*�GA*cf_protectionGA+omit_frame_pointerGA+stack_clashGA!stack_realign
GA*FORTIFY�GA+GLIBCXX_ASSERTIONSLanginfo.so-5.26.3-423.el8_10.x86_64.debug�R�7zXZ�ִF!t/��-]?�E�h=��ڊ�2N�a6�Dg1u�@��W�*>n���q��r�@H�`Z�q���ʾ�+�s#��2Q���P2���ު����,r���yS�^B��3.Ǆ\t6_K��s�K���o	7�]KZ��!:=!��?r��RWԫ����"���n�>���ІwU�����{�%�3��7R��.eD�T`T���Rz�I�i�Ȋ�z��q]�6�.{r%
4_�YHs�\�m+��1!ȫ���e*�{���S=���w��믏�!���0�x+�����ɰ��C<��3��j�Rr��VrR��9t �5v�k"��)�˦v�v��Hw�����n:.V>�]
�,�`�����ghwq,���i{�oV�����S���Ֆ�<�S�	��i#��l�/�=U�T�����x�h�j��oa�J�F[>t�|D��i57`��z�u����nӏ"���9��Sc=����y��Vτ���BgN�V
�ؐe����HYx��o�����j1�گ|/��}m�#Q�`C����1�k$<�\L�d3
�m�O�_���'c��Be�cW���;C���]���|6�����i^I�W���h�Q
�@�k�؜�/~Ts8��e�0�KjJ�k��|}�{QA�n>����'�z1�Û���Up�=�}�^�,�zԲ�����j�������_�R���R���}��Ʒ}��q�-�aG=/x�%T��Fk��Q�9�)���.�p�^�����3���������O9IA�ޮJK\T��dT���!v�1�֢j��W�������A��-�"��g�YZ.shstrtab.note.gnu.build-id.gnu.hash.dynsym.dynstr.gnu.version.gnu.version_r.rela.dyn.rela.plt.init.plt.sec.text.fini.rodata.eh_frame_hdr.eh_frame.note.gnu.property.init_array.fini_array.data.rel.ro.dynamic.got.bss.gnu.build.attributes.gnu_debuglink.gnu_debugdata88$���o``4(���0��18���o��>E���o��@T00^BHHhXXc��pn��`wPP�}44
�2HH��884�pp��pp �p' p'�x' x'��' �'� �- -�/ /��0 0�0`0HH20x2p�5"PK�V�Z�]	���Langinfo.pmnu�[���PK�6�Z<��c�p�pLangTags/List.pmnu�[���PK�6�Z��W����LangTags/Detect.pmnu�[���PK�6�Z
�C�m�m�LangTags.pmnu�[���PK�6�Z���
�Collate.pmnu�[���PK�($[�g]�>>(Langinfo/Langinfo.sonuȯ��PK�|\