lib/Encode/Simple.pm - metacpan.org

package Encode::Simple;

use strict;
use warnings;
use Carp ();
use Encode ();
use Exporter 'import';

our $VERSION = '1.002';

our @EXPORT = qw(encode encode_utf8 decode decode_utf8);
our @EXPORT_OK = qw(encode_lax encode_utf8_lax decode_lax decode_utf8_lax);
our %EXPORT_TAGS = (
  all => [@EXPORT, @EXPORT_OK],
  strict => [qw(encode encode_utf8 decode decode_utf8)],
  lax => [qw(encode_lax encode_utf8_lax decode_lax decode_utf8_lax)],
  utf8 => [qw(encode_utf8 encode_utf8_lax decode_utf8 decode_utf8_lax)],
);

use constant HAS_UNICODE_UTF8 => do { local $@; !!eval { require Unicode::UTF8; 1 } };
use constant MASK_STRICT => Encode::FB_CROAK | Encode::LEAVE_SRC;
use constant MASK_LAX => Encode::FB_DEFAULT | Encode::LEAVE_SRC;

my %ENCODINGS;

sub encode {
  my ($encoding, $input) = @_;
  return undef unless defined $input;
  my $obj = $ENCODINGS{$encoding} || _find_encoding($encoding);
  my ($output, $error);
  { local $@; use warnings FATAL => 'utf8'; # Encode::Unicode throws warnings in this category
    unless (eval { $output = $obj->encode("$input", MASK_STRICT); 1 }) { $error = $@ || 'Error' }
  }
  _rethrow($error) if defined $error;
  return $output;
}

sub encode_lax {
  my ($encoding, $input) = @_;
  return undef unless defined $input;
  my $obj = $ENCODINGS{$encoding} || _find_encoding($encoding);
  my ($output, $error);
  { local $@; no warnings 'utf8'; # Encode::Unicode throws warnings in this category
    unless (eval { $output = $obj->encode("$input", MASK_LAX); 1 }) { $error = $@ || 'Error' }
  }
  _rethrow($error) if defined $error;
  return $output;
}

sub encode_utf8 {
  my ($input) = @_;
  return undef unless defined $input;
  my ($output, $error);
  if (HAS_UNICODE_UTF8) {
    local $@; use warnings FATAL => 'utf8'; # Unicode::UTF8 throws warnings in this category
    unless (eval { $output = Unicode::UTF8::encode_utf8($input); 1 }) { $error = $@ || 'Error' }
  } else {
    my $obj = $ENCODINGS{'UTF-8'} || _find_encoding('UTF-8');
    local $@;
    unless (eval { $output = $obj->encode("$input", MASK_STRICT); 1 }) { $error = $@ || 'Error' }
  }
  _rethrow($error) if defined $error;
  return $output;
}

sub encode_utf8_lax {
  my ($input) = @_;
  return undef unless defined $input;
  my ($output, $error);
  if (HAS_UNICODE_UTF8) {
    local $@; no warnings 'utf8'; # Unicode::UTF8 throws warnings in this category
    unless (eval { $output = Unicode::UTF8::encode_utf8($input); 1 }) { $error = $@ || 'Error' }
  } else {
    my $obj = $ENCODINGS{'UTF-8'} || _find_encoding('UTF-8');
    local $@;
    unless (eval { $output = $obj->encode("$input", MASK_LAX); 1 }) { $error = $@ || 'Error' }
  }
  _rethrow($error) if defined $error;
  return $output;
}

sub decode {
  my ($encoding, $input) = @_;
  return undef unless defined $input;
  my $obj = $ENCODINGS{$encoding} || _find_encoding($encoding);
  my ($output, $error);
  { local $@; use warnings FATAL => 'utf8'; # Encode::Unicode throws warnings in this category
    unless (eval { $output = $obj->decode("$input", MASK_STRICT); 1 }) { $error = $@ || 'Error' }
  }
  _rethrow($error) if defined $error;
  return $output;
}

sub decode_lax {
  my ($encoding, $input) = @_;
  return undef unless defined $input;
  my $obj = $ENCODINGS{$encoding} || _find_encoding($encoding);
  my ($output, $error);
  { local $@; no warnings 'utf8'; # Encode::Unicode throws warnings in this category
    unless (eval { $output = $obj->decode("$input", MASK_LAX); 1 }) { $error = $@ || 'Error' }
  }
  _rethrow($error) if defined $error;
  return $output;
}

sub decode_utf8 {
  my ($input) = @_;
  return undef unless defined $input;
  my ($output, $error);
  if (HAS_UNICODE_UTF8) {
    local $@; use warnings FATAL => 'utf8'; # Unicode::UTF8 throws warnings in this category
    unless (eval { $output = Unicode::UTF8::decode_utf8($input); 1 }) { $error = $@ || 'Error' }
  } else {
    my $obj = $ENCODINGS{'UTF-8'} || _find_encoding('UTF-8');
    local $@;
    unless (eval { $output = $obj->decode("$input", MASK_STRICT); 1 }) { $error = $@ || 'Error' }
  }
  _rethrow($error) if defined $error;
  return $output;
}

sub decode_utf8_lax {
  my ($input) = @_;
  return undef unless defined $input;
  my ($output, $error);
  if (HAS_UNICODE_UTF8) {
    local $@; no warnings 'utf8'; # Unicode::UTF8 throws warnings in this category
    unless (eval { $output = Unicode::UTF8::decode_utf8($input); 1 }) { $error = $@ || 'Error' }
  } else {
    my $obj = $ENCODINGS{'UTF-8'} || _find_encoding('UTF-8');
    local $@;
    unless (eval { $output = $obj->decode("$input", MASK_LAX); 1 }) { $error = $@ || 'Error' }
  }
  _rethrow($error) if defined $error;
  return $output;
}

sub _find_encoding {
  my ($encoding) = @_;
  Carp::croak('Encoding name should not be undef') unless defined $encoding;
  my $obj = Encode::find_encoding($encoding);
  Carp::croak("Unknown encoding '$encoding'") unless defined $obj;
  return $ENCODINGS{$encoding} = $obj;
}

sub _rethrow {
  my ($error) = @_;
  die $error if ref $error or $error =~ m/\n(?!\z)/;
  $error =~ s/ at .+? line [0-9]+\.\n\z//;
  Carp::croak($error);
}

1;

=head1 NAME

Encode::Simple - Encode and decode text, simply

=head1 SYNOPSIS

  use Encode::Simple qw(encode encode_lax encode_utf8 decode decode_lax decode_utf8);
  my $bytes = encode 'Shift_JIS', $characters;
  my $bytes = encode_lax 'ASCII', $characters;
  my $bytes = encode_utf8 $characters;
  my $characters = decode 'cp1252', $bytes;
  my $characters = decode_lax 'UTF-8', $bytes;
  my $characters = decode_utf8 $bytes;

=head1 DESCRIPTION

This module is a simple wrapper around L<Encode> that presents L</"encode"> and
L</"decode"> functions with straightforward behavior and error handling. See
L<Encode::Supported> for a list of supported encodings.

=head1 FUNCTIONS

All functions are exported by name, as well as via the tags C<:all>,
C<:strict>, C<:lax>, and C<:utf8>. By default, L</"encode">, L</"encode_utf8">,
L</"decode">, and L</"decode_utf8"> are exported as in L<Encode>.

=head2 encode

  my $bytes = encode $encoding, $characters;

Encodes the input string of characters into a byte string using C<$encoding>.
Throws an exception if the input string contains characters that are not valid
or possible to represent in C<$encoding>.

=head2 encode_lax

  my $bytes = encode_lax $encoding, $characters;

Encodes the input string of characters into a byte string using C<$encoding>,
encoding any invalid characters as a substitution character (the substitution
character used depends on the encoding). Note that some encoders do not respect
this option and may throw an exception anyway, this notably includes
L<Encode::Unicode> (but not UTF-8).

=head2 encode_utf8

  my $bytes = encode_utf8 $characters;

I<Since version 1.000.>

Encodes the input string of characters into a UTF-8 byte string. Throws an
exception if the input string contains characters that are not valid or
possible to represent in UTF-8.

This function will use the more consistent and efficient
L<Unicode::UTF8/"encode_utf8"> if installed, and is otherwise equivalent to
L</"encode"> with an encoding of C<UTF-8>. It is B<not> equivalent to
L<Encode/"encode_utf8">, which should be avoided.

=head2 encode_utf8_lax

  my $bytes = encode_utf8_lax $characters;

I<Since version 1.000.>

Encodes the input string of characters into a UTF-8 byte string, encoding any
invalid characters as the Unicode replacement character C<U+FFFD>, represented
in UTF-8 as the three bytes C<0xEFBFBD>.

This function will use the more consistent and efficient
L<Unicode::UTF8/"encode_utf8"> if installed, and is otherwise equivalent to
L</"encode_lax"> with an encoding of C<UTF-8>. It is B<not> equivalent to
L<Encode/"encode_utf8">, which should be avoided.

=head2 decode

  my $characters = decode $encoding, $bytes;

Decodes the input byte string into a string of characters using C<$encoding>.
Throws an exception if the input bytes are not valid for C<$encoding>.

=head2 decode_lax

  my $characters = decode_lax $encoding, $bytes;

Decodes the input byte string into a string of characters using C<$encoding>,
decoding any malformed bytes to the Unicode replacement character (U+FFFD).
Note that some encoders do not respect this option and may throw an exception
anyway, this notably includes L<Encode::Unicode> (but not UTF-8).

=head2 decode_utf8

  my $characters = decode_utf8 $bytes;

I<Since version 1.000.>

Decodes the input UTF-8 byte string into a string of characters. Throws an
exception if the input bytes are not valid for UTF-8.

This function will use the more consistent and efficient
L<Unicode::UTF8/"decode_utf8"> if installed, and is otherwise equivalent to
L</"decode"> with an encoding of C<UTF-8>. It is B<not> equivalent to
L<Encode/"decode_utf8">, which should be avoided.

=head2 decode_utf8_lax

  my $characters = decode_utf8_lax $bytes;

I<Since version 1.000.>

Decodes the input UTF-8 byte string into a string of characters, decoding any
malformed bytes to the Unicode replacement character C<U+FFFD>.

This function will use the more consistent and efficient
L<Unicode::UTF8/"decode_utf8"> if installed, and is otherwise equivalent to
L</"decode_lax"> with an encoding of C<UTF-8>. It is B<not> equivalent to
L<Encode/"decode_utf8">, which should be avoided.

=head1 BUGS

Report any issues on the public bugtracker.

=head1 AUTHOR

Dan Book <[email protected]>

=head1 COPYRIGHT AND LICENSE

This software is Copyright (c) 2018 by Dan Book.

This is free software, licensed under:

  The Artistic License 2.0 (GPL Compatible)

=head1 SEE ALSO

L<Unicode::UTF8>
	Global
`s`	Focus search bar
`?`	Bring up this help dialog
	GitHub
`g` `p`	Go to pull requests
`g` `i`	go to github issues (only if github is preferred repository)
	POD
`g` `a`	Go to author
`g` `c`	Go to changes
`g` `i`	Go to issues
`g` `d`	Go to dist
`g` `r`	Go to repository/SCM
`g` `s`	Go to source
`g` `b`	Go to file browse
	Search terms
module: (e.g. module:Plugin)
distribution: (e.g. distribution:Dancer auth)
author: (e.g. author:SONGMU Redis)
version: (e.g. version:1.00)