📄 dedup.pm
字号:
# vim:ts=4 sw=4
# ----------------------------------------------------------------------------------------------------
# Name : ETL::Pequel3::Type::Section::Dedup.pm
# Created : 22 June 2006
# Author : Mario Gaffiero (gaffie)
#
# Copyright 1999-2007 Mario Gaffiero.
#
# This file is part of Pequel(TM).
#
# Pequel is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation; version 2 of the License.
#
# Pequel is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with Pequel; if not, write to the Free Software
# Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
# ----------------------------------------------------------------------------------------------------
# Modification History
# When Version Who What
# ----------------------------------------------------------------------------------------------------
package ETL::Pequel3::Type::Section::Dedup;
require 5.005_62;
use strict;
use warnings;
use ETL::Pequel3::Type::Section;
# ----------------------------------------------------------------------------------------------------
{
package ETL::Pequel3::Type::Section::Dedup::Abstract;
use base qw(ETL::Pequel3::Type::Section::Abstract);
use Class::STL::ClassMembers qw( fields ),
Class::STL::ClassMembers::DataMember->new(name => 'target_mem_name', default => 'section_name'),
Class::STL::ClassMembers::DataMember->new(name => 'item_type', default => 'ETL::Pequel3::Type::Section::Dedup::Item'),
Class::STL::ClassMembers::DataMember->new(name => 'element_type', default => "@{[ __PACKAGE__ ]}");
use Class::STL::ClassMembers::Constructor;
sub new_extra
{
my $self = shift;
$self->fields($self->pequel_ref()->catalogue()->fieldsets()->group_by_fields()->new(
pequel_ref => $self->pequel_ref()))
if (defined($self->pequel_ref()));
return $self;
}
#NOTE: dedup on input only relevant when input is sorted (irregardless of hash);
#the dedup_in/output_fields must contain all the sort-fields;
#TODO: dedup on output not relevant when hash or group by is used;
#use warnings when input-sort is not defined (to allow for pre-sorted inputdata);
sub prepare
{
my $self = shift;
my @df = map($_->name(), $self->fields()->to_array());
if (!defined($self->pequel_ref()->input()->input_dataset()->datasource()->sorter())
|| $self->pequel_ref()->input()->input_dataset()->datasource()->sorter()->fields()->size() == 0)
{
$self->err()->user_warn(10257, "@{[ $self->pequel_ref()->pequel_name()
]}: @{[ $self->section_name() ]} assuming implicit sort on-input:@{[ join(', ', @df) ]};");
}
else
{
my @sf = map($_->name(), $self->pequel_ref()->input()->input_dataset()->datasource()->sorter()->fields()->to_array());
foreach my $i (0..$#sf)
{
if ($i > $#df || $sf[$i] ne $df[$i])
{
$self->err()->user_error(10258, "@{[ $self->pequel_ref()->pequel_name()
]}: @{[ $self->section_name() ]} [@{[ join(', ', @df)
]}] does not match sort on-input [@{[ join(', ', @sf) ]}];");
}
}
}
}
package ETL::Pequel3::Type::Section::Dedup::Item;
use base qw(ETL::Pequel3::Type::Section::Item::Abstract);
use Class::STL::ClassMembers;
use Class::STL::ClassMembers::Constructor;
sub new_extra
{
my $self = shift;
$self->attributes()->push_back(
ETL::Pequel3::Type::Properties::FieldName->new(required => 1),
ETL::Pequel3::Type::Properties::PequelType->new(),
);
}
}
# ----------------------------------------------------------------------------------------------------
{
package ETL::Pequel3::Type::Section::Dedup;
use base qw(ETL::Pequel3::Type::Section::Dedup::Abstract);
use Class::STL::ClassMembers
Class::STL::ClassMembers::DataMember->new(name => 'section_name', default => 'dedup'),
Class::STL::ClassMembers::DataMember->new(name => 'element_type', default => "@{[ __PACKAGE__ ]}"),
Class::STL::ClassMembers::DataMember->new(name => 'description', default => 'This section is used to specify a record I<de-duplication> action. Please refer to the specific F<dedup_on_input> and F<dedup_on_output> section.');
use Class::STL::ClassMembers::Constructor;
sub new_extra
{
my $self = shift;
$self->attributes()->push_back(
ETL::Pequel3::Type::Properties::On->new(),
);
return $self;
}
sub select
{
my $self = shift;
my %p = @_;
$self->err()->user_error(10211, "Section '@{[
$self->section_name() ]}' select() function requires 'on' parameter!")
unless (exists($p{on}));
return $self->pequel_ref()->section("@{[ $self->section_name() ]}_on_@{[ lc($p{on}) ]}")->select(@_);
}
}
# ----------------------------------------------------------------------------------------------------
{
package ETL::Pequel3::Type::Section::DedupOnInput;
use base qw(ETL::Pequel3::Type::Section::Dedup::Abstract);
use Class::STL::ClassMembers
Class::STL::ClassMembers::DataMember->new(name => 'section_name', default => 'dedup_on_input'),
Class::STL::ClassMembers::DataMember->new(name => 'element_type', default => "@{[ __PACKAGE__ ]}"),
Class::STL::ClassMembers::DataMember->new(name => 'description', default => "This section is used to specify an input record I<de-duplication> action. Only one of a group of two or more consecutive records that have the same value for the fields specified in the I<items> will be processed -- the other I<duplicate> records will be discarded. The F<pequel_type> item attribute may be specified as I<numeric> so as to perform a numeric comparison.\n\n\nThe de-duplication action will be performed after the F<copy_record> and F<divert_record> actions.");
use Class::STL::ClassMembers::Constructor;
sub prepare
{
my $self = shift;
$self->fields()->map_input($self->items(), $self->pequel_ref()->input()->input_fields());
$self->SUPER::prepare(@_);
}
}
# ----------------------------------------------------------------------------------------------------
{
package ETL::Pequel3::Type::Section::DedupOnOutput;
use base qw(ETL::Pequel3::Type::Section::Dedup::Abstract);
use Class::STL::ClassMembers
Class::STL::ClassMembers::DataMember->new(name => 'section_name', default => 'dedup_on_output'),
Class::STL::ClassMembers::DataMember->new(name => 'element_type', default => "@{[ __PACKAGE__ ]}"),
Class::STL::ClassMembers::DataMember->new(name => 'description', default => "This section is used to specify an output record I<de-duplication> action. Only one of a group of two or more consecutive output records that have the same value for the fields specified in the I<items> will be processed -- the other I<duplicate> records will be discarded. The F<pequel_type> item attribute may be specified as I<numeric> so as to perform a numeric comparison.\n\n\nThe de-duplication action will be performed after the F<output_filter> action and before the F<field_process_on_output> action.");
use Class::STL::ClassMembers::Constructor;
sub prepare
{
my $self = shift;
$self->fields()->map_output($self->items(), $self->pequel_ref()->output()->output_fields());
$self->SUPER::prepare(@_);
}
}
# ----------------------------------------------------------------------------------------------------
1;
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -