#!/usr/bin/perl -s
use re::engine::RE2;
use utf8;
use MeCab;
binmode(STDOUT, ":utf8");
binmode(STDIN, ":utf8");

if ($help==1 or $h==1){
	print '
This is help for ML-Ask, or eMotive eLement and Expression Analysis system, ver. 4.0-4.3

ML-Ask is a keyword-based language-dependent system for automatic affect annotation on utterances in Japanese.

To use on standard input, launch in command line as: "perl mlask.pl"
To use on files, launch in command line as: "perl mlask.pl input_file.txt > output_file.txt"
Using -h or -help option will diplay this help message and exit the program.

The system was developed by Michal Ptaszynski (ptaszynski@ieee.org), Pawel Dybala, Rafal Rzepka and Kenji Araki. 

The ML-Ask system is described in detail in papers below. When using ML-Ask please add reference to either of these papers (or both if you like):

Michal Ptaszynski, Pawel Dybala, Rafal Rzepka and Kenji Araki, "Affecting Corpora: Experiments with Automatic Affect Annotation System - A Case Study of the 2channel Forum -", In Proceedings of The Conference of the Pacific Association for Computational Linguistics (PACLING-09), September 1-4, 2009, Hokkaido University, Sapporo, Japan, pp. 223-228.

Michal Ptaszynski, Pawel Dybala, Wenhan Shi, Rafal Rzepka and Kenji Araki, "A System for Affect Analysis of Utterances in Japanese Supported with Web Mining", Journal of Japan Society for Fuzzy Theory and Intelligent Informatics, Vol. 21, No. 2 (April), pp. 30-49 (194-213), 2009.

Please report any comments and bugs to: ptaszynski@ieee.org

';
	exit;
}

# to use on standard input, launch in command line as: "perl mlask.pl"
# to use on files launch in command line as: "perl mlask.pl input_file.txt > output_file.txt"

#	EMOTEMES:
#	interjections_uncoded.txt, annotated as INT
#	exclamation_uncoded.txt, annotated as EXC
#	interkon_uncoded.txt
#	interkonprzed_uncoded.txt
#	vulgar_uncoded.txt, annotated as VUL
#	endearments_uncoded.txt, annotated as END
#	gitaigo_uncoded.txt, annotated as GIT
#	emotikony_uncoded.txt, annotated as EMO

@emotemy = qw(interjections exclamation vulgar endearments emotikony gitaigo);

# "interkon" and "interkonprzed" are also in emotems, but we do not use them because we would have to add a special procedure for processing of end of sentence (not input! one input could have more than one sentence). now it is done by a mecab-trick (for most cases).

foreach my $nazwa (@emotemy) {
	utf8::decode($_);
	open(FILE_2, "emotemes/$nazwa".'_uncoded.txt') or die "Cannot open!";
	@$nazwa = <FILE_2>;
		foreach (@$nazwa) {
			utf8::decode($_);
			$_ =~ s/ //g
		}
	close FILE_2;
	next;
}

# after the above we have 6 tables (arrays) with the names of emoteme classes; moreover, the table elements are also references to each emotem item in each emotem class.

#	EMOTIONS: 
#	aware_uncoded.txt, annotated as AWA
#	haji_uncoded.txt, annotated as HAJ
#	ikari_uncoded.txt, annotated as IKA
#	iya_uncoded.txt, annotated as IYA
#	kowa_uncoded.txt, annotated as KOW
#	odoroki_uncoded.txt, annotated as ODO
#	suki_uncoded.txt, annotated as SUK
#	takaburi_uncoded.txt, annotated as TAK
#	yasu_uncoded.txt, annotated as YAS
#	yorokobi_uncoded.txt, annotated as YOR
#	
#	2D valence scale mapping: 
#	positive is annotated as POS
#	negative is annotated as NEG
#	activeted(active) is annotated as ACT
#	deactivated(passive) is annotated as PAS

# the same as the above but for emotions.

@emotions = qw(aware haji ikari iya kowa odoroki suki takaburi yasu yorokobi);

foreach my $nazwa_plain (@emotions) {
	utf8::decode($_);
	open(FILE_2, "emotions/$nazwa_plain".'_uncoded.txt') or die "Cannot open!";
	@$nazwa_plain = <FILE_2>;
		foreach (@$nazwa_plain) {
			utf8::decode($_);
		}
	close FILE_2;
	next;
}

# a hash used in CVS procedure ('KEY' , 'VALUE')
%hash_cvs = (
	'suki' => {'iya'},
	'ikari' => {'yasu'},
	'kowa' => {'yasu'},
	'yasu' => {'ikari','takaburi','odoroki','haji','kowa'},
	'iya' => {'yorokobi','suki'},
	'aware' => {'suki','yorokobi','takaburi','odoroki','haji'},
	'takaburi' => {'yasu','aware'},
	'odoroki' => {'yasu','aware'},
	'haji' => {'yasu','aware'},
	'yorokobi' => {'iya'} );

#this is used in CVS pattern: ($cvs_type1)(.*?)(感情表現)(.*?)ない 
$cvs_type1="あまり|そんなに|ぜったい|まったく|すこしも|いまひと|いまひとつも|ちょっとも|ちっとも|いまいち|まさか|そんな|ぜんぜん|そもそも|すら|とても|余り|絶対|ゼッタイ|全く|マッタク|少しも|今ひとつ|今一つ|今一|今いち|全然";

#this is used in CVS pattern: (感情表現)(.*?)($cvs_type2)
$cvs_type2="あまりない|そんなにない|ぜったいない|まったくない|すこしもない|いまひとつない|いまひとつもない|ちょっともない|ちっともない|いまいちない|ぜんぜんない|そもそもない|といえない|とはいえない|と思わない|とは思わない|と思えない|とは思えない|とすら思えない|てはいけない|ちゃいけない|じゃいけない|てはだめ|ちゃだめ|じゃだめ|てはいかん|てはあかん|ちゃいかん|じゃいかん|じゃあかん|ちゃあかん|なくていい|なくてOK|なくてＯＫ|なくて大丈夫|なくて問題ない|なくて結構|なくてもいい|なくてもOK|なくても大丈夫|なくても問題ない|なくても結構|く思わない|く思えない|もんか|ものか|わけではない|わけじゃない|わけない|わけがない|わけはない|わけもない|わけか|わけにはいかない|わけにはいくまい|わけにもいかない|余りない|ゼッタイない|絶対ない|全くない|少しもない|今一つない|今ひとつない|今一つもない|今ひとつもない|今いちない|今一ない|全然ない|と言えない|とは言えない|ては行けない|ちゃ行けない|じゃ行けない|ては行かん|ちゃ行かん|じゃ行かん|じゃあかん|ちゃあかん|なくて良い|なくてOK|なくてＯＫ|なくても良い|訳ではない|訳じゃない|訳ない|訳がない|訳はない|訳もない|訳か|訳にはいかない|訳には行かない|訳にはいくまい|訳にも行かない";

#here the analysis starts. to use on standard input comment lines #1-3 and uncomment line #4

while (<>) {			#1
utf8::decode($_);		#2
$input = $_;			#3
#{$input = <STDIN>;	#4
$input =~ s/\!/！/g;
$input =~ s/\?/？/g;
chomp $input;
my $input_mecab = $input;
print $input;

#mecab trick.

my $mecab = MeCab::Tagger->new();
my $node = $mecab->parseToNode($input_mecab);
for( ; $node; $node = $node->{next} ) {
	next unless defined $node->{surface};
	my $midasi = $node->{surface};
	my( $hinsi, $kijutsu, $genkei ) = (split( /,/, $node->{feature} ))[0,1,6];
	push (@input_lemmas, $genkei);
	$hinsi_kijutsu = $kijutsu.$hinsi; 
	if ($hinsi_kijutsu =~ /感動詞|フィラー|終助詞/g) {
    	push (@found_interjections, $midasi);
    } elsif ($midasi =~ /(てえ|てぇ|ねえ|ねぇ)/g) {
    	push (@found_interjections, $midasi)
    } else {
    	push (@input_lemma_no_emo, $midasi);
    }
}

foreach (@found_interjections) {
	utf8::decode($_);
}

$input_lemma = join ('', @input_lemmas);
$input_lemma =~ s/\*//g;
$input_lemma_no_emo = join ('', @input_lemma_no_emo);

#looking for emotemes.

foreach $emotem_class (@emotemy) {   # @emotemy is a table with emoteme class names, $emotem_class is a name of each class.
	@emotem_items = @$emotem_class;  # @emotem_items is a table with all elements for each class.
	chomp @emotem_items;

	foreach $emotem_item (@emotem_items) { # $emotem_item is a separate element from a certain emoteme class.
		$emotem_itemQE = "\Q$emotem_item\E";
		while ($input_lemma_no_emo =~ /$emotem_itemQE/g) {
			if ($emotem_class =~ /emotikony/) {
				push (@found, '('.$emotem_item.')');
			} else {
				push (@found, $emotem_item);
			}
			$input_lemma_no_emo =~ s/$emotem_itemQE//;
		}
	}

	if ($emotem_class =~ /interjections/) {
		push (@found, @found_interjections);
	}

	if (@found > 0) {
		my $emoteme_3  = substr uc $emotem_class, 0, 3;
		
		push (@output_emotemy, '|'."$emoteme_3".':'."@found");
		$scalar_found = scalar (@found);
		push (@emotive_value, $scalar_found);
	}

	@found = ();
}

my $total = 0; 
($total+=$_) for @emotive_value; # $total is a sum of all emotemes (or: emotive value); a max is set to 5, but perhaps there is a better way to calculate emotive value?
if ($total>5){$total=5;}

if ($total==0){
	print '|non-emotive';
} else {
	print '|emotive|emo_val='.$total."@output_emotemy";

}

@emotive_value = ();
@output_emotemy = ();

if ($total>0){ #here Nakamura's dictionary kicks in.
	
	print '||emotions:';

	foreach $emotion_class (@emotions) {
		@emotion_items = @$emotion_class;  
		chomp @emotion_items;

		foreach $emotion_item (@emotion_items) {
			$emotion_itemQE = "\Q$emotion_item\E";
			$input_lemma_cvs = $input_lemma;
			
			while ($input_lemma =~ /$emotion_itemQE/g) {
				if ($input_lemma_cvs =~ /(($cvs_type1)(.*?)($emotion_itemQE)(.*?)(あるますん|ない))|(($emotion_itemQE)(.*?)($cvs_type2))|($emotion_itemQE(.*?)(あるますん|ない))/g) { #here CVS procedure kicks in.
					foreach (%{$hash_cvs{$emotion_class}}) {
						$new_emotion_class=$_;
						push (@{$found_hash{$new_emotion_class}}, "$emotion_item＊CVS");
					}
					$input_lemma_cvs =~ s/$emotion_item//;
				} else {
					push (@{$found_hash{$emotion_class}}, "$emotion_item");
					$input_lemma_cvs =~ s/$emotion_item//;
				}
			}
		}
	}
	
	foreach $key (keys %found_hash) {
		if ($key =~ /aware|haji|ikari|iya|kowa|odoroki|suki|takaburi|yasu|yorokobi/g){
			my $key_3  = substr uc $key, 0, 3;
			push (@output_emotions, '|'.$key_3.':'."@{$found_hash{$key}}");	
			push (@how_many, $key);
		}
	}
	$scalar_emo = scalar @how_many;
	print '('.$scalar_emo.')';
	print @output_emotions;
	
	if (@how_many>0){
	my $how_many_valence = my $how_many_activation = join (",", @how_many); 
	$how_many_valence =~ s/yasu|yorokobi|suki/P/g;
	$how_many_valence =~ s/iya|aware|ikari|kowa/N/g;
	$how_many_valence =~ s/takaburi|odoroki|haji/NorP/g;
	$cnt_valence_P = $how_many_valence =~ tr/P/P/;
	$cnt_valence_N = $how_many_valence =~ tr/N/N/;
	
	print '||2D|';
	
		if ($cnt_valence_N == $cnt_valence_P) {
			print 'POS_or_NEG';
		} else {
			%hash_valence = (
			$cnt_valence_P => 'POS',
			$cnt_valence_N => 'NEG');
			@valence_array = ($cnt_valence_P, $cnt_valence_N);
			@sorted_valence = (sort { $b <=> $a } @valence_array);
			push (@output_valence, $hash_valence{$sorted_valence[0]});
			
			if (($cnt_valence_N == 0) or ($cnt_valence_P == 0)) {
				print @output_valence;
			} else {
				unshift (@output_valence, 'mostly_');
				print @output_valence;
			}
		@output_valence=();
		}
	
		$how_many_activation =~ s/takaburi|odoroki|haji|ikari|kowa/A/g;
		$how_many_activation =~ s/yasu|aware/D/g;
		$how_many_activation =~ s/iya|yorokobi|suki/DorA/g;
		$cnt_activation_A = $how_many_activation =~ tr/A/A/;
		$cnt_activation_D = $how_many_activation =~ tr/D/D/;
		
		print '|';
	
		if ($cnt_activation_A == $cnt_activation_D) {
			print 'ACT_or_PAS';
		} else {
			%hash_activation = (
			$cnt_activation_A => 'ACT',
			$cnt_activation_D => 'PAS');
			@activation_array = ($cnt_activation_D, $cnt_activation_A);
			@sorted_activation = (sort { $b <=> $a } @activation_array);
			push (@output_activation, $hash_activation{$sorted_activation[0]});
			
			if (($cnt_activation_A == 0) or ($cnt_activation_D == 0)) {
				print @output_activation;
			} else {
				unshift (@output_activation, 'mostly_');
				print @output_activation;
			}
		@output_activation=();
		}
	}
	
	@how_many = ();
	%found_hash=();
	@found=();
	@found_cvs=();
}

print "\n";

@output_emotions = ();
@input_lemmas = ();
@input_lemma_no_emo = ();
@found_interjections = ();
}