📄 dataset.pm
字号:
# vim:ts=4 sw=4
# ----------------------------------------------------------------------------------------------------
# Name : ETL::Pequel3::Type::DataSet.pm
# Created : 7 September 2006
# Author : Mario Gaffiero (gaffie)
#
# Copyright 1999-2007 Mario Gaffiero.
#
# This file is part of Pequel(TM).
#
# Pequel is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation; version 2 of the License.
#
# Pequel is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with Pequel; if not, write to the Free Software
# Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
# ----------------------------------------------------------------------------------------------------
# Modification History
# When Version Who What
# 16/05/2007 0.94 DB More efficient split in Input/Ascii;
# ----------------------------------------------------------------------------------------------------
package ETL::Pequel3::Type::DataSet;
require 5.005_62;
use strict;
use warnings;
use ETL::Pequel3::Type::DataMember;
# ----------------------------------------------------------------------------------------------------
{
package ETL::Pequel3::Type::DataSet::Abstract;
use base qw(Class::STL::Element); # --> ETL::Pequel3::Type::Code; -- contains code_ functions
use Class::STL::ClassMembers
qw(
err
catalogue
properties
configuration
name
dataset_spec
datasource
count_vname
arr_vname
numflds_vname
default_datasource_spec
datasource_type
datasource_factory_type
datasource_factory
use_list
pequel_ref
description
),
ETL::Pequel3::Type::DataMember::User->new(name => 'shared', default => 0, validate => '^(0|1)$'),
ETL::Pequel3::Type::DataMember::User->new(name => 'verify', default => 0, validate => '^(0|skip|abort)$');
use Class::STL::ClassMembers::Constructor;
use ETL::Pequel3::CodeStyler;
sub new_extra
{
my $self = shift;
$self->err(ETL::Pequel3::Error->new());
use ETL::Pequel3::Catalogue;
$self->catalogue(ETL::Pequel3::Catalogue->new());
$self->properties($self->catalogue()->properties());
$self->configuration($self->pequel_ref()->config());
$self->datasource_factory(eval("@{[ $self->datasource_factory_type() ]}->new()"));
$self->datasource($self->datasource_factory()->factory(
@_,
pequel_ref => $self->pequel_ref(),
dataset => $self,
shared => $self->shared(),
datasource_type => $self->datasource_type()
)
);
$self->arr_vname($self->datasource()->vname()) if (defined($self->datasource()->vname()));
$self->count_vname("@{[ $self->datasource()->vname() || 'UNDEF' ]}_COUNT");
$self->numflds_vname("@{[ $self->datasource()->vname() || 'UNDEF' ]}_NUMFLDS");
$self->use_list(stl::list());
return $self;
}
sub parse
{
my $self = shift;
#< $self->configuration($self->pequel_ref()->config());
}
sub map_output
{
my $self = shift;
my %p = @_;
$self->datasource()->_map_output(@_);
}
sub code_init
{
my $self = shift;
my $c = shift || ETL::Pequel3::CodeStyler::Program->new();
return $self->datasource()->_code_init($c);
}
sub code_open
{
my $self = shift;
my $c = shift || ETL::Pequel3::CodeStyler::Program->new();
return $self->datasource()->_code_open($c);
}
sub code_close
{
my $self = shift;
my $c = shift || ETL::Pequel3::CodeStyler::Program::Perl->new();
return $self->datasource()->_code_close($c);
}
sub code_lock
{
my $self = shift;
my $c = shift || ETL::Pequel3::CodeStyler::Program::Perl->new();
return $self->datasource()->_code_lock($c);
}
sub code_unlock
{
my $self = shift;
my $c = shift || ETL::Pequel3::CodeStyler::Program::Perl->new();
return $self->datasource()->_code_unlock($c);
}
sub code_prepare
{
my $self = shift;
my $c = shift || ETL::Pequel3::CodeStyler::Program::Perl->new();
return $self->datasource()->_code_prepare($c);
}
sub code_reset
{
my $self = shift;
my $c = shift || ETL::Pequel3::CodeStyler::Program::Perl->new();
return $c;
}
}
# ----------------------------------------------------------------------------------------------------
{
package ETL::Pequel3::Type::DataSet::Input::Abstract;
use base qw(ETL::Pequel3::Type::DataSet::Abstract);
use Class::STL::ClassMembers qw( input_fields ),
Class::STL::ClassMembers::DataMember->new(name => 'default_datasource_spec', default => 'stdin'),
Class::STL::ClassMembers::DataMember->new(name => 'datasource_factory_type', default => 'ETL::Pequel3::Type::DataSource::Input::Factory::Local'),
ETL::Pequel3::Type::DataMember::User->new(name => 'rmctrlm', default => 0),
ETL::Pequel3::Type::DataMember::User->new(name => 'discard_header', default => 0),
ETL::Pequel3::Type::DataMember::User->new(name => 'chomp', default => 0);
use Class::STL::ClassMembers::Constructor;
sub code_init
{
my $self = shift;
my $c = shift || ETL::Pequel3::CodeStyler::Program->new();
$c->code('my @' . $self->arr_vname() . ';');
$c->over();
$c->code("# @{[ defined($self->dataset_spec()) ? $self->dataset_spec() : 'UNDEF' ]}");
$c->code("# @{[ ref($self) ]}");
$c->back;
$c->code("my \$@{[ $self->count_vname() ]}=0;");
return $self->datasource()->_code_init($c);
}
sub code_decode
{
my $self = shift;
my $c = shift || ETL::Pequel3::CodeStyler::Program::Perl->new();
my $numflds = shift;
return $c;
}
sub code_read
{
my $self = shift;
my $c = shift || ETL::Pequel3::CodeStyler::Program::Perl->new();
my @fields = @_; # ETL::Pequel3::Type::Field::Abstract object list;
my $numflds = int(@fields);
return $c;
}
sub code_after_read #--> code_decode #TODO: not here -- not applicable to DBI?
{
my $self = shift;
my $c = shift || ETL::Pequel3::CodeStyler::Program::Perl->new();
if ($self->chomp()) {
$c->code("chomp(\$" . $self->arr_vname() . "[\$#" . $self->arr_vname() . "])");
$c->over();
$c->code("if (defined(\$" . $self->arr_vname() . "[\$#" . $self->arr_vname() . "]));");
$c->back();
}
$c->code("chop;") if ($self->rmctrlm());
return $c;
}
sub code_reset
{
my $self = shift;
my $c = shift || ETL::Pequel3::CodeStyler::Program->new();
$c->code('undef(@' . $self->arr_vname() . ');');
return $c;
}
}
# ----------------------------------------------------------------------------------------------------
{
package ETL::Pequel3::Type::DataSet::Output::Abstract;
use base qw(ETL::Pequel3::Type::DataSet::Abstract);
use Class::STL::ClassMembers qw( output_fields ),
Class::STL::ClassMembers::DataMember->new(name => 'default_datasource_spec', default => 'stdout'),
Class::STL::ClassMembers::DataMember->new(name => 'datasource_factory_type', default => 'ETL::Pequel3::Type::DataSource::Output::Factory::Local'),
ETL::Pequel3::Type::DataMember::User->new(name => 'print_header', default => 0);
use Class::STL::ClassMembers::Constructor;
sub code_init
{
my $self = shift;
my $c = shift || ETL::Pequel3::CodeStyler::Program->new();
$c->code('my ' . ($self->configuration()->hash() ? '%' : '@') . $self->arr_vname() . ';');
$c->over();
$c->code("# @{[ defined($self->dataset_spec()) ? $self->dataset_spec() : 'UNDEF' ]}");
$c->code("# @{[ ref($self) ]}");
$c->back();
return $self->datasource()->_code_init($c);
}
sub code_write
{
my $self = shift;
my $c = shift || ETL::Pequel3::CodeStyler::Program::Perl->new();
my @fields = @_; # ETL::Pequel3::Type::Field::Abstract object list;
return $c;
}
sub code_write_header
{
my $self = shift;
my $c = shift || ETL::Pequel3::CodeStyler::Program::Perl->new();
my @fields = @_; # ETL::Pequel3::Type::Field::Abstract object list;
return $c;
}
sub code_reset
{
my $self = shift;
my $c = shift || ETL::Pequel3::CodeStyler::Program->new();
$c->code('undef(' . ($self->configuration()->hash() ? '%' : '@') . $self->arr_vname() . ');');
return $c;
}
}
# ----------------------------------------------------------------------------------------------------
{
package ETL::Pequel3::Type::DataSet::Input::AsciiDelimited;
use base qw(ETL::Pequel3::Type::DataSet::Input::Abstract);
use Class::STL::ClassMembers
Class::STL::ClassMembers::DataMember->new(name => 'description', default => 'This dataset consists of I<ascii delimited text> with the delimiter as specified by the F<field_delimiter> option.'),
Class::STL::ClassMembers::DataMember->new(name => 'name', default => 'ascii'),
ETL::Pequel3::Type::DataMember::User->new(name => 'chomp', default => 1),
ETL::Pequel3::Type::DataMember::User->new(name => 'rmctrlm', default => 0),
ETL::Pequel3::Type::DataMember::User->new(name => 'field_delimiter', default => '|');
use Class::STL::ClassMembers::Constructor;
sub code_open
{
my $self = shift;
my $c = shift || ETL::Pequel3::CodeStyler::Program->new();
$self->datasource()->_code_open($c);
if ($self->discard_header()) {
$self->code_lock($c);
$c->code("my \$_discard_@{[ $self->datasource()->vname() ]} = <@{[ $self->datasource()->fdname() ]}>;");
$self->code_unlock($c);
}
return $c;
}
sub code_decode
{
my $self = shift;
my $c = shift || ETL::Pequel3::CodeStyler::Program::Perl->new();
my $numflds = shift;
my $fdelim_len = length($self->field_delimiter());
my $fdelim = $self->field_delimiter();
## if we pass in a single character as a delimiter, it seems to be about 7%
## faster if we do not enclose it inside of [] in the regular expression
##
## I tried implementing split as an Inline::C function and it did not perform
## as well as I had hoped
##
## - David Bartle
## This patch will use a different regular expression syntax if the delimiter
## is only a single character (which is most of the time). On my system at lest,
## it showed about a 7% performance increase which is only really noticeable on
## very huge input sources. I don't know if your benchmarks would show a marked
## improvement but I thought I would send the patch along none-the-less.
if( length($fdelim) == 1 || (length($fdelim) == 2 && $fdelim =~ /^\\/)) {
my $fdelim_copy = $fdelim;
$fdelim_copy =~ s/"/\"/g;
$c->code("split(\"\\Q$fdelim_copy\\E\", \$_, -1)");
}
else {
$c->code("split(\"[@{[ $self->field_delimiter() ]}]\", \$_, -1)");
}
return $c;
}
sub code_read
{
my $self = shift;
my $c = shift || ETL::Pequel3::CodeStyler::Program::Perl->new();
my @fields = @_; # ETL::Pequel3::Type::Field::Abstract object list;
my $numflds = int(@fields);
$c->newline_off();
#TODO: this may not work with some input pipe type (eg sqlplus)
$c->code("defined(\$_=<@{[ $self->datasource()->fdname() ]}>) && (\@@{[ $self->arr_vname() ]} = ");
$self->code_decode($c, $numflds);
$c->code(")");
return $c;
}
}
# ----------------------------------------------------------------------------------------------------
{
package ETL::Pequel3::Type::DataSet::Output::AsciiDelimited;
use base qw(ETL::Pequel3::Type::DataSet::Output::Abstract);
use Class::STL::ClassMembers
Class::STL::ClassMembers::DataMember->new(name => 'description', default => 'This dataset consists of I<ascii delimited text> with the delimiter as specified by the F<field_delimiter> option.'),
Class::STL::ClassMembers::DataMember->new(name => 'name', default => 'ascii'),
ETL::Pequel3::Type::DataMember::User->new(name => 'field_delimiter', default => '|');
use Class::STL::ClassMembers::Constructor;
sub code_prepare
{
my $self = shift;
my $c = shift || ETL::Pequel3::CodeStyler::Program::Perl->new();
$c->code("local(\$,)=\"@{[ $self->field_delimiter() ]}\";");
return $self->datasource()->_code_prepare($c);
}
sub code_write_header
{
my $self = shift;
my $c = shift || ETL::Pequel3::CodeStyler::Program::Perl->new();
my @fields = @_; # ETL::Pequel3::Type::Field::Abstract object list;
$c->open_block();
$self->code_prepare($c);
$c->code("print @{[ $self->datasource()->fdname() ]}");
$c->over();
map($c->code("'@{[ $_->name() ]}',"), @fields);
$c->code(";");
$c->back();
$c->close_block();
return $c;
}
sub code_write
{
my $self = shift;
my $c = shift || ETL::Pequel3::CodeStyler::Program::Perl->new();
my @fields = @_; # ETL::Pequel3::Type::Field::Abstract object list;
# need field pointers so we can determine type, eg for CSV to quote string fields
$c->open_block();
$self->code_prepare($c);
$c->code("print @{[ $self->datasource()->fdname() ]}");
$c->over();
map($c->code("@{[ $_->pequel_type()->getvar($_) ]},"), @fields);
#< map($c->code("@{[ $_->getvar() ]},"), @fields);
$c->code(";");
$c->back();
$c->close_block();
return $c;
}
}
# ----------------------------------------------------------------------------------------------------
{
package ETL::Pequel3::Type::DataSet::Output::Xml;
use base qw(ETL::Pequel3::Type::DataSet::Output::Abstract);
use Class::STL::ClassMembers
Class::STL::ClassMembers::DataMember->new(name => 'description', default => 'This dataset type will output the data in F<XML> format.'),
Class::STL::ClassMembers::DataMember->new(name => 'name', default => 'xml'),
ETL::Pequel3::Type::DataMember::User->new(name => 'print_header', default => 0),
ETL::Pequel3::Type::DataMember::User->new(name => 'field_delimiter', default => '\n'),
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -