⭐ 欢迎来到虫虫下载站! | 📦 资源下载 📁 资源专辑 ℹ️ 关于我们
⭐ 虫虫下载站

📄 dedup.pm

📁 普通的ETL工具
💻 PM
字号:
# vim:ts=4 sw=4
# ----------------------------------------------------------------------------------------------------
#  Name		: ETL::Pequel3::Type::Section::Dedup.pm
#  Created	: 22 June 2006
#  Author	: Mario Gaffiero (gaffie)
#
# Copyright 1999-2007 Mario Gaffiero.
# 
# This file is part of Pequel(TM).
# 
# Pequel is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation; version 2 of the License.
# 
# Pequel is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
# GNU General Public License for more details.
# 
# You should have received a copy of the GNU General Public License
# along with Pequel; if not, write to the Free Software
# Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
# ----------------------------------------------------------------------------------------------------
# Modification History
# When          Version     Who     What
# ----------------------------------------------------------------------------------------------------
package ETL::Pequel3::Type::Section::Dedup;
require 5.005_62;
use strict;
use warnings;
use ETL::Pequel3::Type::Section;
# ----------------------------------------------------------------------------------------------------
{
	package ETL::Pequel3::Type::Section::Dedup::Abstract;
	use base qw(ETL::Pequel3::Type::Section::Abstract);
	use Class::STL::ClassMembers qw( fields ),
		Class::STL::ClassMembers::DataMember->new(name => 'target_mem_name', default => 'section_name'),
		Class::STL::ClassMembers::DataMember->new(name => 'item_type', default => 'ETL::Pequel3::Type::Section::Dedup::Item'),
		Class::STL::ClassMembers::DataMember->new(name => 'element_type', default => "@{[ __PACKAGE__ ]}");
	use Class::STL::ClassMembers::Constructor;
	sub new_extra
	{
		my $self = shift;
		$self->fields($self->pequel_ref()->catalogue()->fieldsets()->group_by_fields()->new(
			pequel_ref => $self->pequel_ref()))
		if (defined($self->pequel_ref()));
		return $self;
	}
	#NOTE: dedup on input only relevant when input is sorted (irregardless of hash);
	#the dedup_in/output_fields must contain all the sort-fields;
	#TODO: dedup on output not relevant when hash or group by is used;
	#use warnings when input-sort is not defined (to allow for pre-sorted inputdata);
	sub prepare
	{
		my $self = shift;
		my @df = map($_->name(), $self->fields()->to_array());
		if (!defined($self->pequel_ref()->input()->input_dataset()->datasource()->sorter())
			|| $self->pequel_ref()->input()->input_dataset()->datasource()->sorter()->fields()->size() == 0) 
		{
			$self->err()->user_warn(10257, "@{[ $self->pequel_ref()->pequel_name() 
				]}: @{[ $self->section_name() ]} assuming implicit sort on-input:@{[ join(', ', @df) ]};");
		}
		else
		{
			my @sf = map($_->name(), $self->pequel_ref()->input()->input_dataset()->datasource()->sorter()->fields()->to_array());
			foreach my $i (0..$#sf) 
			{
				if ($i > $#df || $sf[$i] ne $df[$i]) 
				{
					$self->err()->user_error(10258, "@{[ $self->pequel_ref()->pequel_name()
						]}: @{[ $self->section_name() ]} [@{[ join(', ', @df) 
						]}] does not match sort on-input [@{[ join(', ', @sf) ]}];");
				}
			}
		}
	}
	package ETL::Pequel3::Type::Section::Dedup::Item;
	use base qw(ETL::Pequel3::Type::Section::Item::Abstract);
	use Class::STL::ClassMembers;
	use Class::STL::ClassMembers::Constructor;
	sub new_extra
	{
		my $self = shift;
		$self->attributes()->push_back(
			ETL::Pequel3::Type::Properties::FieldName->new(required => 1),
			ETL::Pequel3::Type::Properties::PequelType->new(),
		);
	}
}
# ----------------------------------------------------------------------------------------------------
{
	package ETL::Pequel3::Type::Section::Dedup;
	use base qw(ETL::Pequel3::Type::Section::Dedup::Abstract);
	use Class::STL::ClassMembers
		Class::STL::ClassMembers::DataMember->new(name => 'section_name', default => 'dedup'),
		Class::STL::ClassMembers::DataMember->new(name => 'element_type', default => "@{[ __PACKAGE__ ]}"),
		Class::STL::ClassMembers::DataMember->new(name => 'description', default => 'This section is used to specify a record I<de-duplication> action. Please refer to the specific F<dedup_on_input> and F<dedup_on_output> section.');
	use Class::STL::ClassMembers::Constructor;
	sub new_extra
	{
		my $self = shift;
		$self->attributes()->push_back(
			ETL::Pequel3::Type::Properties::On->new(),
		);
		return $self;
	}
	sub select
	{
		my $self = shift;
		my %p = @_;
		$self->err()->user_error(10211, "Section '@{[ 
			$self->section_name() ]}' select() function requires 'on' parameter!")
			unless (exists($p{on}));
		return $self->pequel_ref()->section("@{[ $self->section_name() ]}_on_@{[ lc($p{on}) ]}")->select(@_);
	}
}
# ----------------------------------------------------------------------------------------------------
{
	package ETL::Pequel3::Type::Section::DedupOnInput;
	use base qw(ETL::Pequel3::Type::Section::Dedup::Abstract);
	use Class::STL::ClassMembers
		Class::STL::ClassMembers::DataMember->new(name => 'section_name', default => 'dedup_on_input'),
		Class::STL::ClassMembers::DataMember->new(name => 'element_type', default => "@{[ __PACKAGE__ ]}"),
		Class::STL::ClassMembers::DataMember->new(name => 'description', default => "This section is used to specify an input record I<de-duplication> action. Only one of a group of two or more consecutive records that have the same value for the fields specified in the I<items> will be processed -- the other I<duplicate> records will be discarded. The F<pequel_type> item attribute may be specified as I<numeric> so as to perform a numeric comparison.\n\n\nThe de-duplication action will be performed after the F<copy_record> and F<divert_record> actions.");
	use Class::STL::ClassMembers::Constructor;
	sub prepare
	{
		my $self = shift;
		$self->fields()->map_input($self->items(), $self->pequel_ref()->input()->input_fields());
		$self->SUPER::prepare(@_);
	}
}
# ----------------------------------------------------------------------------------------------------
{
	package ETL::Pequel3::Type::Section::DedupOnOutput;
	use base qw(ETL::Pequel3::Type::Section::Dedup::Abstract);
	use Class::STL::ClassMembers
		Class::STL::ClassMembers::DataMember->new(name => 'section_name', default => 'dedup_on_output'),
		Class::STL::ClassMembers::DataMember->new(name => 'element_type', default => "@{[ __PACKAGE__ ]}"),
		Class::STL::ClassMembers::DataMember->new(name => 'description', default => "This section is used to specify an output record I<de-duplication> action. Only one of a group of two or more consecutive output records that have the same value for the fields specified in the I<items> will be processed -- the other I<duplicate> records will be discarded. The F<pequel_type> item attribute may be specified as I<numeric> so as to perform a numeric comparison.\n\n\nThe de-duplication action will be performed after the F<output_filter> action and before the F<field_process_on_output> action.");
	use Class::STL::ClassMembers::Constructor;
	sub prepare
	{
		my $self = shift;
		$self->fields()->map_output($self->items(), $self->pequel_ref()->output()->output_fields());
		$self->SUPER::prepare(@_);
	}
}
# ----------------------------------------------------------------------------------------------------
1;

⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -