use strict;
use warnings;
package Data::SPath;
BEGIN {
$Data::SPath::VERSION = '0.0004';
}
#ABSTRACT: lookup on nested data with simple path notation
use 5.010_000;
use feature qw(switch);
use Carp qw/croak/;
use Scalar::Util qw/reftype blessed/;
use Text::Balanced qw/
extract_delimited
extract_bracketed
extract_multiple
/;
use Sub::Exporter -setup => {
exports => [ spath => \&_build_spath ]
};
my @Error_Handlers = qw(
method_miss
key_miss
index_miss
key_on_non_hash
args_on_non_method
);
sub _build_spath {
my ( $class, $name, $args ) = @_;
return sub {
my ( $data, $path, $opts ) = @_;
for ( @Error_Handlers ) {
unless ( exists $opts->{ $_ } ) {
if ( exists $args->{ $_ } ) {
$opts->{ $_ } = $args->{ $_ };
}
else {
$opts->{ $_ } = \&{ "_$_" };
}
}
no warnings 'uninitialized';
unless ( ref( $opts->{ $_ } ) eq 'CODE' ) {
croak "$_ must be set to a code reference";
}
}
return _spath( $data, $path, $opts );
};
}
# taken from Data::DPath
sub _unescape {
my ( $str ) = @_;
return unless defined $str;
$str =~ s/(?<!\\)\\(["'])/$1/g; # '"$
$str =~ s/\\{2}/\\/g;
return $str;
}
# Modified from Data::DPath. Added /s modifier to allow new lines in keys (why
# not?)
# this originally only supported double quote
sub _unquote {
my ($str) = @_;
$str =~ s/^(['"])(.*)\1$/$2/sg;
return $str;
}
sub _quoted { shift =~ m,^/["'], }
sub _method_miss {
my ( $method_name, $current, $depth ) = @_;
my $reftype = reftype( $current );
croak "tried to call nonexistent method '"
. $method_name
. "' on object with type $reftype at spath path element "
. $depth;
}
sub _key_miss {
my ( $key, $current, $depth ) = @_;
croak "tried to access nonexistent key '"
. $key
. "' in hash at spath path element "
. $depth;
}
sub _index_miss {
my ( $index, $current, $depth ) = @_;
croak "tried to access nonexistent index '"
. $index
. "' in array at spath path element "
. $depth;
}
sub _key_on_non_hash {
my ( $key, $current, $depth ) = @_;
my $reftype = reftype( $current ) || '(non reference)';
croak "tried to access key '"
. $key
. "' on a non-hash type "
. $reftype
. " at spath path element "
. $depth;
}
sub _args_on_non_method {
my ( $key, $current, $args, $depth ) = @_;
my $reftype = reftype( $current ) || '(non reference)';
croak "tried to pass arguments '"
. $args
. "' to a non-method '"
. $key
. "' of type "
. $reftype
. "at spath path element "
. $depth;
}
sub _tokenize {
my ( $path ) = @_;
my $remaining_path = $path;
my $extracted;
my @tokens;
while ( $remaining_path ) {
my ( $prefix, $args );
my $key;
if ( _quoted( $remaining_path ) ) {
( $key, $remaining_path ) = extract_delimited( $remaining_path, q|'"|, '/' );
( $args, $remaining_path ) = extract_bracketed( $remaining_path, q|('")| );
$key = _unescape _unquote $key;
}
else {
# must extract arguments first to keep extract_delimited from getting
# quoted structures with / in them
if ( $remaining_path =~ m,^/[^/]+\(, ) {
( $extracted, $remaining_path, $prefix ) = extract_bracketed( $remaining_path, q|('")|, '[^(]*' );
if ( defined $prefix or defined $remaining_path ) {
no warnings 'uninitialized';
$remaining_path = $prefix . $remaining_path;
$args = $extracted;
}
else {
$remaining_path = $extracted;
}
}
( $extracted, $remaining_path ) = extract_delimited( $remaining_path, '/' );
if ( not $extracted ) {
( $extracted, $remaining_path ) = ( $remaining_path, undef );
}
else {
$remaining_path = ( chop $extracted ) . $remaining_path;
}
( $key ) = $extracted =~ m,^/(.*),gs;
$key = _unescape $key;
}
push @tokens, [ $key, $args ];
}
return \@tokens;
}
sub _tokenize_args {
my $args = shift;
( $args ) = $args =~ /^\((.*)\)$/;
return map { _unescape( $_ =~ /^['"]/ ? _unquote( $_ ) : $_ ) }
extract_multiple( $args, [
# quoted structures
sub { extract_delimited( $_[0], q|'"| ) },
# handle unquoted bare words
qr/\s*(\w+)/s,
qr/\s*([^,]+)(.*)/s
], undef, 1 );
}
sub _spath {
my ( $data, $path, $opts ) = @_;
my $current = $data;
my $depth = 0;
my $wantlist = wantarray;
my $tokens = _tokenize( $path );
for my $token ( @{ $tokens } ) {
$depth++;
my ( $key, $args ) = @{ $token };
if ( blessed $current ) {
my @args;
@args = _tokenize_args( $args )
if defined $args;
return $opts->{method_miss}->( $key, $current, $depth )
unless my $method = $current->can( $key );
if ( $wantlist ) {
my @current = $current->$method( @args );
$current = @current > 1 ? \@current : $current[0];
}
else {
$current = $current->$method( @args );
}
}
else {
return $opts->{args_on_non_method}->( $key, $current, $args, $depth )
if defined $args;
given ( ref $current ) {
when( 'HASH' ) {
return $opts->{key_miss}->( $key, $current, $depth )
unless exists $current->{ $key };
$current = $current->{ $key };
}
when ( 'ARRAY' ) {
return $opts->{key_on_non_hash}->( $key, $current, $depth )
unless $key =~ /^\d+$/;
return $opts->{index_miss}->( $key, $current, $depth )
if $#{ $current } < $key;
$current = $current->[ $key ];
}
default {
return $opts->{key_on_non_hash}->( $key, $current, $depth );
}
}
}
}
return $current;
}
1;
__END__
=pod
=head1 NAME
Data::SPath - lookup on nested data with simple path notation
=head1 VERSION
version 0.0004
=head1 SYNOPSIS
use Data::SPath
spath => {
# sets up default error handling
method_miss => \&_method_miss,
key_miss => \&_key_miss,
index_miss => \&_index_miss,
key_on_non_hash => \&_key_on_non_hash,
args_on_non_method => \&_args_on_non_method
};
my $data = {
foo => [ qw/foobly fooble/ ],
bar => [ { bat => "boo" }, { bat => "bar" } ]
"foo bar" => 1,
"foo\"bar" => { "foo/bar" => 20 }
obj => SomeClass->new,
};
my $match;
# returns foobly
$match = spath $data, "/foo/0";
# returns boo
$match = spath $data, "/bar/0/bat";
# returns { bat => "bar" }
$match = spath $data, "/bar/1";
# returns 1
$match = spath $data, q{/"foo bar"};
# returns 20
$match = spath $data, q{/"foo\\"bar/"foo/bar"};
# returns the call to method passing arguments
$match = spath $data, q{/obj/method( "arg1", 'arg2', bareword )};
=head1 DESCRIPTION
This module implements very simple path lookups on nested data structures. At
the time of this writing there are two modules that implement path matching.
They are L<Data::Path> and L<Data::DPath>. Both of these modules have more
complicated matching similar to C<XPath>. This module does not support
B<matching>, only lookups. So one call will alway return a single match. Also,
when this module encounters a C<blessed> reference, instead of access the references
internal data structure (like L<Data::DPath>) a method call is made on the object
by the name of the key. See L</SYNOPSIS>.
=head1 FUNCTIONS
=head2 C<spath( $data, $path, $opts )>
C<spath> takes the data to perform lookup on as the first argument. The second
argument should be a string with a path specification in it. The third optional
argument, if specified, should be a hash reference of options. Currently the
only supported options are error handlers. See L</"ERROR HANDLING">. C<spath>
returns the lookup if it is found, calls croak() otherwise with the error. This
behavior can be changed by setting error handlers. If the error handler
returns, that value is returned.
=over 4
=item *
data
Data can be any type of data, although it makes little sense to pass in
something other than a hash reference, an array reference or an object.
=item *
path
Path should start with a slash and be a slash separated list of keys to lookup.
Each level of key is one level deeper in the data.
=over 4
=item *
hash
When the current level in the data is a hash reference, the key is looked up in
the hash, and the current level is set to the return of the lookup on the hash.
=item *
array
When the current level is an array reference, the key should be an index into
the array, the current level is then set to the return of the lookup on the
array reference.
=item *
object
If the current level is an object, the key is treated as the name of a method
to call on the object. The method is called in list context if C<spath> was
called in list context, otherwise scalar context. If the method returns more
than one item, the current level is set to an array reference of the return,
otherwise the current level is set to the return of the method call. It is
possible to pass in arguments to object methods. Arguments are expected to be a
comma separated list of either quoted structures or barewords which must match
C<\w+>. See L</SYNOPSIS> for examples.
=back
Quotes are allowed on each level. You only need quotes if you have spaces
or C</> in your keys. For example:
my $data = { "foo bar" => 1, "foo/bar" => 1 };
spath $data, q{/"foo bar"};
spath $data, q{/"foo/bar"};
You can also use C<\> to escape quotes:
spath $data, q{/"foo\"bar"}; # embedded quotes
=item *
opts
The only options currently accepted are error handlers. See L</"ERROR
HANDLING">.
=back
=head1 EXPORTS
Nothing is exported by default. You can request C<spath> be exported to you
namespace. This module uses L<Sub::Exporter> for exporting.
=head1 ERROR HANDLING
Data::SPath defaults to calling Carp::croak() when any kind of error occurs.
You can change any of the error handlers by passing in a third argument to
C<spath>:
spath $data, "/path", {
method_miss => \&_method_miss,
key_miss => \&_key_miss,
index_miss => \&_index_miss,
key_on_non_hash => \&_key_on_non_hash,
args_on_non_method => \&_args_on_non_method
};
Or you can setup default error handlers at compile time by passing them into
your call to C<import()>:
use Data::SPath
spath => {
method_miss => \&_method_miss,
key_miss => \&_key_miss,
index_miss => \&_index_miss,
key_on_non_hash => \&_key_on_non_hash,
args_on_non_method => \&_args_on_non_method
};
The default error handlers look like this:
sub _method_miss {
my ( $method_name, $current, $depth ) = @_;
my $reftype = reftype( $current );
croak "tried to call nonexistent method '"
. $method_name
. "' on object with type $reftype at spath path element "
. $depth;
}
sub _key_miss {
my ( $key, $current, $depth ) = @_;
croak "tried to access nonexistent key '"
. $key
. "' in hash at spath path element "
. $depth;
}
sub _index_miss {
my ( $index, $current, $depth ) = @_;
croak "tried to access nonexistent index '"
. $index
. "' in array at spath path element "
. $depth;
}
sub _key_on_non_hash {
my ( $key, $current, $depth ) = @_;
my $reftype = reftype( $current ) || '(non reference)';
croak "tried to access key '"
. $key
. "' on a non-hash type $reftype at spath path element "
. $depth;
}
sub _args_on_non_method {
my ( $key, $current, $args, $depth ) = @_;
my $reftype = reftype( $current ) || '(non reference)';
croak "tried to pass arguments '"
. $args
. "' to a non-method '"
. $key
. "' of type "
. $reftype
. "at spath path element "
. $depth;
}
If you return from an error handler, that value is returned from C<spath>.
=head1 AUTHOR
Scott Beck <[email protected]>
=head1 COPYRIGHT AND LICENSE
This software is copyright (c) 2010 by Scott Beck <[email protected]>.
This is free software; you can redistribute it and/or modify it under
the same terms as the Perl 5 programming language system itself.
=cut