# UEA stemmer V1.03
# DJS 26Mar2004
# v.1.03 deals with tagged text 26mar2004
# v.1.02 correct on over 90% of words altered from test set of DJS_text 1-179
#
#  Copyright University of East Anglia 2005
#
#   Licensed under the Apache License, Version 2.0 (the "License");
#   you may not use this file except in compliance with the License.
#   You may obtain a copy of the License at#
#
#       http://www.apache.org/licenses/LICENSE-2.0
#
#   Unless required by applicable law or agreed to in writing, software
#   distributed under the License is distributed on an "AS IS" BASIS,
#   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
#   See the License for the specific language governing permissions and
#   limitations under the License.



####################### stems a line or sentence and counts terms #######################
sub stem_line{
use strict;
my ($sentence, $tagstyle, $docid, $c_wordfreqref, $c_posfreqref, $c_termfreqref, $c_changedref, $d_termfreqref) = @_;
my ($word, $origword, $wordcount, $stemmed_text, $term, $pos, $ruleno) = "";
my @terms = ();

	print "stem_line: sentence input = $sentence ($tagstyle)\n";
	@terms = split / /, $sentence;					# terms includes punctuation, possessives, etc.

	foreach $term (@terms){									# while there's still something left
		if ($tagstyle eq "Brill"){							## word/xxx
			$term=~/(.*)\/(\w+)/;								# pick off the word and PoS
			$pos = $2;											# remember PoS
			$word = $1;											# remember word
			if ($pos ne "nnp" || $pos ne "nnp"){				# lower case all but proper nouns
				$word = lc($word);
			}
			$origword = $word;									# take a copy of word
		 	($word, $ruleno) = &stem_word($word);				# stem the word
			$stemmed_text .= $word.'/'.$pos.' ';				# add word+pos back to stemmed version
			$wordcount++;										# increment counter
 		} elsif ($tagstyle eq "Lingua"){					## <xxx>word</xxx>
			$term=~/\<(\w+)\>(.*)\</;							# pick off the word and PoS
			$pos = $1;           								# remember PoS
			$word = $2;											# remember word
			if ($pos ne "nnp" || $pos ne "NNP"){				# lower case all but proper nouns
				$word = lc($word);
			}
			$origword = $word;									# take a copy of word
			($word, $ruleno) = &stem_word($word);				# stem the word
			$term = '<'.$pos.'>'.$word.'<\\'.$pos.'>';			# rebuild term
			$stemmed_text .= "$term ";								# add word+pos back to stemmed version
			$wordcount++;										# increment counter
		} elsif ($tagstyle eq "none"){						## plain text
			$term=~/(\w['\w-]*)/;							# pick off a word
			$word=$1;											# remember word
			$origword = $word;									# take a copy of word
				print "2 ($term) $word\n";

			($word, $ruleno) = &stem_word($word);				# stem the word
			$stemmed_text .= "$word ";							# add word back to stemmed version
			$wordcount++;										# increment counter
		} else {
			print "unrecognised tagstyle: $tagstyle\n";
			return(0);
		}							
	
															## now deal with results
		if ($origword ne $word){							# a change was made
			$c_changedref->{"$origword\t$word\t$ruleno"}++;	# remember it in the global list
			#print "stem_line: changed= $word\n";
		}
		
		if ($word =~/\w/){
			#print "stem_line: word= $word pos= $pos term= $term\n";
			$c_wordfreqref->{"$docid\t$word"}++;			# add to count for this doc
			$c_posfreqref->{"$docid\t$pos"}++;				# add to count for this doc
			$c_termfreqref->{"$docid\t$term"}++;			# add to count for this doc
			$d_termfreqref->{$term}++;						# add to count for this doc
 		}
	}
	return($wordcount, $stemmed_text);
}



####################### stems a single word - 138 rules#######################
 sub stem_word{
 # DJS 02Mar2004, V0.X MCJ Feb2004
	my $word = shift(@_);
	my ($a1, $a2)="";
	my $ruleno = 0;
	my $origword = $word;
	my $maxwordlength = length("deoxyribonucleicacid"); # or some other suitable value, e.g antidisestablishmentarianism
	my $maxacronymlength = length("CAVASSOO"); 			# or some other suitable value 
	
	print "stem_word:input $origword\n";
	
	# first stage deals with spurious words, NNP, apostrophes, specific problem words

	
	 											##  preliminaries
	if ($word =~ /^(is|as|this|has|was|during)$/){	# word is a frequent problem word (1.01 added)
		$ruleno = 90;
		return($word, $ruleno);
	}
	if (length($word) > $maxwordlength){			# word is too long to be proper 95
		$ruleno = 95;
		return($word, $ruleno);
	}
	if ($word =~ /'/){								## word had apostrophe(s) - remove and continue 94
		$word =~ s/'s$//;							# remove possessive singular
		$word =~ s/'$//;							# remove possessive plural
		$word =~ s/n't/not/; 						# expand contraction n't
		$word =~ s/'ve/have/; 						# expand contraction 've
		$word =~ s/'re/are/; 						# expand contraction 're
		$word =~ s/'m/am/; 							# expand contraction I'm
		$ruleno = 94;
	}
	 											## 90-92 detect NNP, acronym, program variable, ...
	 										
	if ($word =~ /\d+/ && $word !~ /[a-zA-Z]/){		# word is all digits 90.3
		$ruleno = 90.3;
		return($word, $ruleno); 
	} elsif ($word =~ /\w+-\w+/){					# word is hyphenated 90.2
		$ruleno = 90.2;
		return($word, $ruleno); 
	} elsif ($word =~ /-/){							# word has hyphen 90.1
		$ruleno = 90.1;
		return($word, $ruleno); 
	} elsif ($word =~ /_|\d/){						# word has underscore, digit 90
		$ruleno = 90;
		return($word, $ruleno); 
	} elsif ($word =~ /^\p{IsUpper}+s$/){			# word is all uppercase with terminal s 91.1
		$word =~ s/s$//;
		$ruleno = 91;
		return($word, $ruleno);
	} elsif ($word =~ /^\p{IsUpper}+$/){			# word is all uppercase 91
		$ruleno = 91;
		return($word, $ruleno);
	} elsif ($word =~ /\p{IsUpper}.*\p{IsUpper}/){	# word has multiple uppercase chars 92
		$ruleno = 92;
		return($word, $ruleno);
												## assume capitalised words without punctuation are NNP
	} elsif ($word =~ /^\p{IsUpper}{1}/){			# word is capitalised 93
		$ruleno = 93;
		return($word, $ruleno);	
	} 
	($word, $ruleno) = &suffix_remove($word, $ruleno);
	return($word, $ruleno);
 }
 
 
####################### suffix_remove 139 rules ########################
 sub suffix_remove{
 # 139 rule version
 	use strict;
	my $word = shift(@_);
	my $ruleno = shift(@_);
	my ($a1, $a2)="";
	my $origword = $word;
	
	if($word =~ /aceous$/i){ 			# word ends in -aceous 1
		$word =~ s/aceous$//i; 
		$ruleno = 1;
	}elsif($word =~ /ces$/i){ 			# word ends in -ces 2
		$word =~ s/s$//i;# 
		$ruleno = 2;
	}elsif($word =~ /cs$/i){ 			# word ends in -cs 3
		$ruleno = 3;
	}elsif($word =~ /sis$/i){ 			# word ends in -sis 4
		$ruleno = 4;
	}elsif($word =~ /tis$/i){ 			# word ends in -tis 5
		$ruleno = 5;
	}elsif($word =~ /ss$/i){ 			# word ends in -ss 6
		$ruleno = 6;
	}elsif($word =~ /eed$/i){ 			# word ends in -eed 7
		$ruleno = 7;
	}elsif($word =~ /ued$/i){ 			# word ends in -ued 8
		$word =~ s/d$//i;
		$ruleno = 8;
    }elsif($word =~ /ues$/i){ 			# word ends in -ues 9
		$word =~ s/s$//i;
		$ruleno = 9;
	}elsif($word =~ /ees$/i){ 			# word ends in -ees 10
		$word =~ s/s$//i;
		$ruleno = 10;
	}elsif($word =~ /iases$/i){ 		# word ends in -iases 11.4
		$word =~ s/es$//i;
		$ruleno = 11.4;
	}elsif($word =~ /uses$/i){ 			# word ends in -uses 11.3 (change 1.01: more take e than not)
		$word =~ s/s$//i;
		$ruleno = 11.3;
	}elsif($word =~ /sses$/i){ 			# word ends in -sses 11.2
		$word =~ s/es$//i;
		$ruleno = 11.2;
	}elsif($word =~ /eses$/i){ 			# word ends in -eses 11.1
		$word =~ s/es$/is/i;
		$ruleno = 11.1;
	}elsif($word =~ /ses$/i){ 			# word ends in -ses 11
		$word =~ s/s$//i;
		$ruleno = 11;
	}elsif($word =~ /tled$/i){ 			# word ends in -tled 12.5
		$word =~ s/d$//i;
		$ruleno = 12.5;
	}elsif($word =~ /pled$/i){ 			# word ends in -pled 12.4
		$word =~ s/d$//i;
		$ruleno = 12.4;
	}elsif($word =~ /bled$/i){ 			# word ends in -bled 12.3
		$word =~ s/d$//i;
		$ruleno = 12.3;
	}elsif($word =~ /eled$/i){ 			# word ends in -eled 12.2
		$word =~ s/ed$//i;
		$ruleno = 12.2;
	}elsif($word =~ /lled$/i){ 			# word ends in -lled 12.1
		$word =~ s/ed$//i;
		$ruleno = 12.1;
	}elsif($word =~ /led$/i){ 			# word ends in -led 12
		$word =~ s/ed$//i;
		$ruleno = 12;
	}elsif($word =~ /ened$/i){ 			# word ends in -ened 13.7
		$word =~ s/ed$//i;
		$ruleno = 13.7;
	}elsif($word =~ /ained$/i){ 		# word ends in -ained 13.6
		$word =~ s/ed$//i;
		$ruleno = 13.6;
	}elsif($word =~ /erned$/i){ 		# word ends in -erned 13.5
		$word =~ s/ed$//i;
		$ruleno = 13.5;
	}elsif($word =~ /rned$/i){ 			# word ends in -rned 13.4
		$word =~ s/ed$//i;
		$ruleno = 13.4;
	}elsif($word =~ /nned$/i){ 			# word ends in -nned 13.3
		$word =~ s/ned$//i;
		$ruleno = 13.3;
	}elsif($word =~ /oned$/i){ 			# word ends in -oned 13.2
		$word =~ s/ed$//i;
		$ruleno = 13.2;
	}elsif($word =~ /gned$/i){ 			# word ends in -gned 13.1
		$word =~ s/ed$//i;
		$ruleno = 13.1;
	}elsif($word =~ /ned$/i){ 			# word ends in -ned 13
		$word =~ s/d$//i;
		$ruleno = 13;
    }elsif($word =~ /ifted$/i){ 		# word ends in -ifted 14
		$word =~ s/ed$//i;
		$ruleno = 14;
	}elsif($word =~ /ected$/i){ 		# word ends in -ected 15
		$word =~ s/ed$//i;
		$ruleno = 15;
	}elsif($word =~ /vided$/i){ 		# word ends in -vied 16
		$word =~ s/d$//i;
		$ruleno = 16;
	}elsif($word =~ /ved$/i){ 			# word ends in -ved 17
		$word =~ s/d$//i;
		$ruleno = 17;
	}elsif($word =~ /ced$/i){ 			# word ends in -ced 18
		$word =~ s/d$//i;
		$ruleno = 18;
	}elsif($word =~ /erred$/i){ 		# word ends in -erred 19
		$word =~ s/red$//i;
		$ruleno = 19;
	}elsif($word =~ /urred$/i){ 		# word ends in -urred 20.5
		$word =~ s/red$//i;
		$ruleno = 20.5;
	}elsif($word =~ /lored$/i){ 		# word ends in -lored 20.4
		$word =~ s/ed$//i;
		$ruleno = 20.4;
	}elsif($word =~ /eared$/i){ 		# word ends in -eared 20.3
		$word =~ s/ed$//i;
		$ruleno = 20.3;
	}elsif($word =~ /tored$/i){ 		# word ends in -tored 20.2
		$word =~ s/ed$/e/i;
		$ruleno = 20.2;
	}elsif($word =~ /ered$/i){ 			# word ends in -ered 20.1
		$word =~ s/ed$//i;
		$ruleno = 20.1;
	}elsif($word =~ /red$/i){ 			# word ends in -red 20
		$word =~ s/d$//i;
		$ruleno = 20;
	}elsif($word =~ /tted$/i){ 			# word ends in -tted 21
		$word =~ s/ted$//i;
		$ruleno = 21;
    }elsif($word =~ /noted$/i){ 		# word ends in -noted 22.4
		$word =~ s/d$//i;
		$ruleno = 22.4;
    }elsif($word =~ /leted$/i){ 		# word ends in -leted 22.3
		$word =~ s/d$//i;
		$ruleno = 22.3;
    }elsif($word =~ /uted$/i){ 			# word ends in -ated 22.2
		$word =~ s/d$//i;
		$ruleno = 22.2;
    }elsif($word =~ /ated$/i){ 			# word ends in -ated 22.1
		$word =~ s/d$//i;
		$ruleno = 22.1;
    }elsif($word =~ /ted$/i){ 			# word ends in -ted 22
		$word =~ s/ed$//i;
		$ruleno = 22;
 	}elsif($word =~ /anges$/i){ 		# word ends in -anges 23
		$word =~ s/s$//i;
		$ruleno = 23;
	}elsif($word =~ /aining$/i){ 		# word ends in -aining 24
		$word =~ s/ing$//i;
		$ruleno = 24;
 	}elsif($word =~ /acting$/i){ 		# word ends in -acting 25
		$word =~ s/ing$//i;
		$ruleno = 25;
	}elsif($word =~ /tting$/i){ 		# word ends in -tting 26
		$word =~ s/ting$//i;
		$ruleno = 26;
 	}elsif($word =~ /viding$/i){ 		# word ends in -viding 27
		$word =~ s/ing$/e/i;
		$ruleno = 27;
	}elsif($word =~ /ssed$/i){ 			# word ends in -ssed 28
		$word =~ s/ed$//i;
		$ruleno = 28;
	}elsif($word =~ /sed$/i){ 			# word ends in -sed 29
		$word =~ s/d$//i;
		$ruleno = 29;
	}elsif($word =~ /titudes$/i){ 		# word ends in -titudes 30
		$word =~ s/s$//i;
		$ruleno = 30;
	}elsif($word =~ /umed$/i){ 			# word ends in -umed 31
		$word =~ s/d$//i;
		$ruleno = 31;
	}elsif($word =~ /ulted$/i){ 		# word ends in -ulted 32
		$word =~ s/ed$//i;
		$ruleno = 32;
	}elsif($word =~ /uming$/i){ 		# word ends in -uming 33
		$word =~ s/ing$/e/i;
		$ruleno = 33;
	}elsif($word =~ /fulness$/i){ 		# word ends in -fulness 34
		$word =~ s/ness$//i;
		$ruleno = 34;
	}elsif($word =~ /ousness$/i){ 		# word ends in -ousness 35
		$word =~ s/ness$/e/i;
		$ruleno = 35;
	}elsif($word =~ /r[aeiou]bed$/i){ 	# word ends in -r*bed 36.1 (1.01 added)
		$word =~ s/d$//i;
		$ruleno = 36;
	}elsif($word =~ /bed$/i){ 			# word ends in -bed 36 (1.01 changed)
		$word =~ s/ed$//i;
		$ruleno = 36;
	}elsif($word =~ /ssing$/i){ 		#  word ends in -ding 37
		$word =~ s/ing$//i;
		$ruleno = 37;
	}elsif($word =~ /ulting$/i){ 		# word ends in -ulting 38
		$word =~ s/ing$//i;
		$ruleno = 38;
	}elsif($word =~ /ving$/i){ 			# word ends in -ving 39
		$word =~ s/ing$/e/i;
		$ruleno = 39;
	}elsif($word =~ /eading$/i){ 		# word ends in -eading 40.7
		$word =~ s/ing$//i;
		$ruleno = 40.7;
	}elsif($word =~ /oading$/i){ 		# word ends in -oading 40.6
		$word =~ s/ing$//i;
		$ruleno = 40.6;
	}elsif($word =~ /eding$/i){ 		# word ends in -eding 40.5
		$word =~ s/ing$//i;
		$ruleno = 40.5;
	}elsif($word =~ /dding$/i){ 		# word ends in -dding 40.4
		$word =~ s/ding$//i;
		$ruleno = 40.4;
	}elsif($word =~ /lding$/i){ 		# word ends in -lding 40.3
		$word =~ s/ing$//i;
		$ruleno = 40.3;
	}elsif($word =~ /rding$/i){ 		# word ends in -rding 40.2
		$word =~ s/ing$//i;
		$ruleno = 40.2;
	}elsif($word =~ /nding$/i){ 		# word ends in -nding 40.1
		$word =~ s/ing$//i;
		$ruleno = 40.1;
	}elsif($word =~ /ding$/i){ 			# word ends in -ding 40
		$word =~ s/ing$/e/i;
		$ruleno = 40;
	}elsif($word =~ /lling$/i){ 		# word ends in -lling 41
		$word =~ s/ling$//i;
		$ruleno = 41;
	}elsif($word =~ /ealing$/i){ 		# word ends in -ealing 42.4
		$word =~ s/ing$//i;
		$ruleno = 42.4;
	}elsif($word =~ /oling$/i){ 		# word ends in -oling 42.3
		$word =~ s/ing$//i;
		$ruleno = 42.3;
	}elsif($word =~ /ailing$/i){ 		# word ends in -ailing 42.2
		$word =~ s/ing$//i;
		$ruleno = 42.2;
	}elsif($word =~ /eling$/i){ 		# word ends in -ling 42.1
		$word =~ s/ing$//i;
		$ruleno = 42.1;
	}elsif($word =~ /ling$/i){ 			# word ends in -ling 42
		$word =~ s/ing$/e/i;
		$ruleno = 42;
	}elsif($word =~ /nged$/i){ 			# word ends in -nged  43.2
		$word =~ s/d$//i;
		$ruleno = 43.2;
	}elsif($word =~ /gged$/i){ 			# word ends in -gged  43.1
		$word =~ s/ged$//i;
		$ruleno = 43.1;
	}elsif($word =~ /ged$/i){ 			# word ends in -ged  43
		$word =~ s/d$//i;
		$ruleno = 43;
	}elsif($word =~ /mming$/i){ 		# word ends in -mming  44.3
		$word =~ s/ming$//i;
		$ruleno = 44.3;
	}elsif($word =~ /rming$/i){ 		# word ends in -rming  44.2
		$word =~ s/ing$//i;
		$ruleno = 44.2;
	}elsif($word =~ /lming$/i){ 		# word ends in -lming  44.1
		$word =~ s/ing$//i;
		$ruleno = 44.1;
	}elsif($word =~ /ming$/i){ 			# word ends in -ming  44
		$word =~ s/ing$/e/i;
		$ruleno = 44;
	}elsif($word =~ /nging$/i){ 		# word ends in -ging 45.2
		$word =~ s/ing$//i;
		$ruleno = 45.2;
	}elsif($word =~ /gging$/i){ 		# word ends in -ging 45.1
		$word =~ s/ging$//i;
		$ruleno = 45.1;
	}elsif($word =~ /ging$/i){ 			# word ends in -ging 45
		$word =~ s/ing$/e/i;
		$ruleno = 45;
	}elsif($word =~ /aning$/i){ 		# word ends in -aning 46.6
		$word =~ s/ing$//i;
		$ruleno = 46.6;
	}elsif($word =~ /ening$/i){ 		# word ends in -ening 46.5
		$word =~ s/ing$//i;
		$ruleno = 46.5;
	}elsif($word =~ /gning$/i){ 		# word ends in -gning 46.4
		$word =~ s/ing$//i;
		$ruleno = 46.4;
	}elsif($word =~ /nning$/i){ 		# word ends in -nning 46.3
		$word =~ s/ning$//i;
		$ruleno = 46.3;
	}elsif($word =~ /oning$/i){ 		# word ends in -oning 46.2
		$word =~ s/ing$//i;
		$ruleno = 46.2;
	}elsif($word =~ /rning$/i){ 		# word ends in -rning 46.1
		$word =~ s/ing$//i;
		$ruleno = 46.1;
	}elsif($word =~ /ning$/i){ 			# word ends in -ning 46
		$word =~ s/ing$/e/i;
		$ruleno = 46;
    }elsif($word =~ /sting$/i){ 		# word ends in -sting 47
		$word =~ s/ing$//i;
		$ruleno = 47;
	}elsif($word =~ /eting$/i){ 		# word ends in -pting 48.4
		$word =~ s/ing$//i;
		$ruleno = 48.4;
	}elsif($word =~ /pting$/i){ 		# word ends in -pting 48.3
		$word =~ s/ing$//i;
		$ruleno = 48.3;
	}elsif($word =~ /nting$/i){ 		# word ends in -nting 48.2
		$word =~ s/ing$//i;
		$ruleno = 48.2;
	}elsif($word =~ /cting$/i){ 		# word ends in -cting 48.1
		$word =~ s/ing$//i;
		$ruleno = 48.1;
	}elsif($word =~ /ting$/i){ 			# word ends in -ting 48
		$word =~ s/ing$/e/i;
		$ruleno = 48;
	}elsif($word =~ /ssed$/i){ 			# word ends in -ssed 49
		$word =~ s/ed$//i;
		$ruleno = 49;
	}elsif($word =~ /les$/i){ 			# word ends in -les 50
		$word =~ s/s$//i;
		$ruleno = 50;
	}elsif($word =~ /tes$/i){ 			# word ends in -tes 51
		$word =~ s/s$//i;
		$ruleno = 51;
	}elsif($word =~ /zed$/i){ 			# word ends in -zed 52
		$word =~ s/d$//i;
		$ruleno = 52;
	}elsif($word =~ /lled$/i){ 			# word ends in -lled 53
		$word =~ s/ed$//i;
		$ruleno = 53;
	}elsif($word =~ /iring$/i){ 		# word ends in -iring 54.4
		$word =~ s/ing$/e/i;
		$ruleno = 54.4;
	}elsif($word =~ /uring$/i){ 		# word ends in -uring 54.3
		$word =~ s/ing$/e/i;
		$ruleno = 54.3;
	}elsif($word =~ /ncing$/i){ 		# word ends in -ncing 54.2
		$word =~ s/ing$/e/i;
		$ruleno = 54.2;
	}elsif($word =~ /zing$/i){ 			# word ends in -zing 54.1
		$word =~ s/ing$/e/i;
		$ruleno = 54.1;
	}elsif($word =~ /sing$/i){ 			# word ends in -sing 54
		$word =~ s/ing$/e/i;
		$ruleno = 54;
	}elsif($word =~ /lling$/i){ 		# word ends in -lling 55
		$word =~ s/ing$//i;
		$ruleno = 55;
	}elsif($word =~ /ied$/i){ 			# word ends in -ied 56
		$word =~ s/ied$/y/i;
		$ruleno = 56;
	}elsif($word =~ /ating$/i){ 		# word ends in -ating 57
		$word =~ s/ing$/e/i; # strip -ing
		$ruleno = 57;
	}elsif($word =~ /thing$/i){ 		# word ends in -thing 58.1 (1.01 added)
		$ruleno = 58.1;
	}elsif($word =~ /(\w)(\w)ing$/i){ 	# word ends in -ing 58
		$a1 = $1;
		$a2 = $2;
		$word =~ s/ing$//i;
		if ($a1 eq $a2){
			$word =~ s/$a1$//i;
		}
		$ruleno = 58;
	}elsif($word =~ /ies$/i){ 			# word ends in -ies 59
		$word =~ s/ies$/y/i; #strip -es
		$ruleno = 59;
	}elsif($word =~ /lves$/i){ 			# word ends in -lves 60.1
		$word =~ s/ves$/f/;
		$ruleno = 60.1;
	}elsif($word =~ /ves$/i){ 			# word ends in -ves 60
		$word =~ s/s$//i; #changed from s/ves$/f/
		$ruleno = 60;
	}elsif($word =~ /aped$/i){ 			# word ends in -uded 61.3
		$word =~ s/d$//i; #strip -d
		$ruleno = 61.3;
	}elsif($word =~ /uded$/i){ 			# word ends in -uded 61.2
		$word =~ s/d$//i; #strip -d
		$ruleno = 61.2;
	}elsif($word =~ /oded$/i){ 			# word ends in -oded 61.1
		$word =~ s/d$//i; #strip -d
		$ruleno = 61.1;
	}elsif($word =~ /ated$/i){ 			# word ends in -ated 61
		$word =~ s/d$//i; #strip -d
		$ruleno = 61;
	}elsif($word =~ /(\w)(\w)ed$/i){ 	# word ends in -ed 62
		$a1 = $1;
		$a2 = $2;
		$word =~ s/ed$//i;# chop the ending
		if ($a1 eq $a2){
			$word =~ s/$a1$//i;
		}
		$ruleno = 62;
	}elsif($word =~ /pes$/i){ 			# word ends in -pes 63.8 (1.01 added)
		$word =~ s/s$//i; #strip -s
		$ruleno = 63.8;
	}elsif($word =~ /mes$/i){ 			# word ends in -mes 63.7 (1.01 added)
		$word =~ s/s$//i; #strip -s
		$ruleno = 63.7;
	}elsif($word =~ /ones$/i){ 			# word ends in -ones 63.6
		$word =~ s/s$//i; #strip -s
		$ruleno = 63.6;
	}elsif($word =~ /izes$/i){ 			# word ends in -izes 63.5
		$word =~ s/s$//i; #strip -s
		$ruleno = 63.5;
	}elsif($word =~ /ures$/i){ 			# word ends in -ures 63.4
		$word =~ s/s$//i; #strip -s
		$ruleno = 63.4;
	}elsif($word =~ /ines$/i){ 			# word ends in -ines 63.3
		$word =~ s/s$//i; #strip -s
		$ruleno = 63.3;
	}elsif($word =~ /ides$/i){ 			# word ends in -ides 63.2
		$word =~ s/s$//i; #strip -s
		$ruleno = 63.2;
	}elsif($word =~ /ges$/i){ 			# word ends in -ges 63.1
		$word =~ s/s$//i; #strip -s
		$ruleno = 63.1;
	}elsif($word =~ /es$/i){ 			# word ends in -es 63
		$word =~ s/es$//i; #strip -es
		$ruleno = 63;
	}elsif($word =~ /is$/i){ 			# word ends in -is 64
		$word =~ s/is$/e/i; #strip -is
		$ruleno = 64;
	}elsif($word =~ /ous$/i){ 			# word ends in -ous 65
		$ruleno = 65;	
	}elsif($word =~ /ums$/i){ 			# word ends in -ums 66
		$ruleno = 66;
	}elsif($word =~ /us$/i){ 			# word ends in -us 67
		$ruleno = 67;
	}elsif($word =~ /s$/i) { 			# word ends in -s 68
		$word =~ s/s$//i; #strip -s
		$a1 = $word;
		($word, $ruleno) = &stem_word($word); # may be understemmed, so go again
		if ($ruleno == 0){
			$ruleno = 68;
		} # else {print "$a1 $word ($ruleno)\n";}
	}
	if ($word ne $origword){
		print  "$origword to $word ($ruleno)\n";
#	} else {
#		print "$word\n";
	}
	return ($word,$ruleno);
}



####################### stems a single word - 68 rules#######################
 sub stem_word68{
	my $word = shift(@_);
	my ($a1, $a2)="";
	my $ruleno = 0;
	my $origword=$word;
	 
	if($word =~ /aceous$/i){ #       word ends in -aceous 1
		$word =~ s/aceous$//i; 
	}elsif($word =~ /ces$/i){ #         word ends in -ces 2
		$word =~ s/s$//i;# 
	}elsif($word =~ /cs$/i){ #           word ends in -cs 3
	
	}elsif($word =~ /sis$/i){ #         word ends in -sis 4
	
	}elsif($word =~ /tis$/i){ #         word ends in -tis 5
	
	}elsif($word =~ /ss$/i){ #           word ends in -ss 6
	
	}elsif($word =~ /eed$/i){ #         word ends in -eed 7

  }elsif($word =~ /ued$/i){ #         word ends in -ued 8
		$word =~ s/d$//i;
    }elsif($word =~ /ues$/i){ #         word ends in -ues 9
		$word =~ s/s$//i;
	}elsif($word =~ /ees$/i){ #         word ends in -ees 10
		$word =~ s/s$//i;
  }elsif($word =~ /ses$/i){ #         word ends in -ses 11
		$word =~ s/s$//i;
   }elsif($word =~ /led$/i){ #         word ends in -led 12
		$word =~ s/d$//i;
  }elsif($word =~ /ned$/i){ #         word ends in -ned 13
		$word =~ s/d$//i;
    }elsif($word =~ /ifted$/i){ #         word ends in -ifted 14
		$word =~ s/ed$//i;
   }elsif($word =~ /ected$/i){ #         word ends in -ected 15
		$word =~ s/ed$//i;
   }elsif($word =~ /vided$/i){ #         word ends in -vied 16
		$word =~ s/d$//i;
  }elsif($word =~ /ved$/i){ #         word ends in -ved 17
		$word =~ s/d$//i;
  }elsif($word =~ /ced$/i){ #         word ends in -ced 18
		$word =~ s/d$//i;
   }elsif($word =~ /erred$/i){ #         word ends in -erred 19
		$word =~ s/red$//i;
  }elsif($word =~ /red$/i){ #         word ends in -red 20
		$word =~ s/d$//i;
  }elsif($word =~ /tted$/i){ #         word ends in -tted 21
		$word =~ s/ted$//i;
    }elsif($word =~ /ted$/i){ #         word ends in -ted 22
		$word =~ s/d$//i;
   }elsif($word =~ /anges$/i){ #         word ends in -anges 23
		$word =~ s/s$//i;
   }elsif($word =~ /aining$/i){ #         word ends in -aining 24
		$word =~ s/ing$//i;
   }elsif($word =~ /acting$/i){ #         word ends in -acting 25
		$word =~ s/ing$//i;
  }elsif($word =~ /tting$/i){ #         word ends in -tting 26
		$word =~ s/ting$//i;
   }elsif($word =~ /viding$/i){ #         word ends in -viding 27
		$word =~ s/ing$/e/i;
  }elsif($word =~ /ssed$/i){ #         word ends in -ssed 28
		$word =~ s/ed$//i;
  }elsif($word =~ /sed$/i){ #         word ends in -sed 29
		$word =~ s/d$//i;
  }elsif($word =~ /titudes$/i){ #         word ends in -titudes 30
		$word =~ s/s$//i;
  }elsif($word =~ /umed$/i){ #         word ends in -umed 31
		$word =~ s/d$//i;
  }elsif($word =~ /ulted$/i){ #         word ends in -ulted 32
		$word =~ s/ed$//i;
  }elsif($word =~ /uming$/i){ #         word ends in -uming 33
		$word =~ s/ing$/e/i;
   }elsif($word =~ /fulness$/i){ #         word ends in -fulness 34
		$word =~ s/ness$//i;
  }elsif($word =~ /ousness$/i){ #         word ends in -ousness 35
		$word =~ s/ness$/e/i;
  }elsif($word =~ /bed$/i){ #         word ends in -bed 36
		$word =~ s/d$//i;
  }elsif($word =~ /ssing$/i){ #         word ends in -ding 37
		$word =~ s/ing$//i;
  }elsif($word =~ /ulting$/i){ #         word ends in -ulting 38
		$word =~ s/ing$//i;
  }elsif($word =~ /ving$/i){ #         word ends in -ving 39
		$word =~ s/ing$/e/i;
  }elsif($word =~ /ding$/i){ #         word ends in -ding 40
		$word =~ s/ing$/e/i;
  }elsif($word =~ /lling$/i){ #         word ends in -ved 41
		$word =~ s/ing$//i;
  }elsif($word =~ /ling$/i){ #         word ends in -ling 42
		$word =~ s/ing$/e/i;
   }elsif($word =~ /ged$/i){ #         word ends in -ged  43
		$word =~ s/d$//i;
   }elsif($word =~ /ming$/i){ #         word ends in -ming  44
		$word =~ s/ing$/e/i;
  }elsif($word =~ /ging$/i){ #         word ends in -ging 45
		$word =~ s/ing$/e/i;
     }elsif($word =~ /ning$/i){ #         word ends in -ning 46
		$word =~ s/ing$/e/i;
    }elsif($word =~ /sting$/i){ #         word ends in -sting 47
		$word =~ s/ing$//i;
     }elsif($word =~ /ting$/i){ #         word ends in -ting 48
		$word =~ s/ing$/e/i;
  }elsif($word =~ /ssed$/i){ #         word ends in -ssed 49
		$word =~ s/ed$//i;
   }elsif($word =~ /les$/i){ #         word ends in -les 50
		$word =~ s/s$//i;
  }elsif($word =~ /tes$/i){ #         word ends in -tes 51
		$word =~ s/s$//i;
  }elsif($word =~ /zed$/i){ #         word ends in -zed 52
		$word =~ s/d$//i;
	}elsif($word =~ /lled$/i){ #       word ends in -lled 53
		$word =~ s/ed$//i;
  }elsif($word =~ /sing$/i){ #         word ends in -sing 54
		$word =~ s/ing$/e/i;
	}elsif($word =~ /lling$/i){ #    word ends in -lling 55
		$word =~ s/ing$//i;
	}elsif($word =~ /ied$/i){ #        word ends in -ied 56
		$word =~ s/ied$/y/i;
  }elsif($word =~ /ating$/i){ #        word ends in -ating 57
		$word =~ s/ing$/e/i; #strip -ing
	}elsif($word =~ /(\w)(\w)ing$/i){ #word ends in -ing 58
		$a1 = $1;
		$a2 = $2;
		print "a1=$a1    a2=$a2\n";
		$word =~ s/ing$//i;
		if ($a1 eq $a2){
			$word =~ s/$a1$//i;
		}
	}elsif($word =~ /ies$/i){ #        word ends in -ies 59
		$word =~ s/ies$/y/i; #strip -es
	}elsif($word =~ /ves$/i){ #        word ends in -ves 60
		$word =~ s/ves$/f/i; #strip -es
  }elsif($word =~ /ated$/i){ #        word ends in -ted 61
		$word =~ s/d$//i; #strip -d
	}elsif($word =~ /(\w)(\w)ed$/i){ #  word ends in -ed 62
		$a1 = $1;
		$a2 = $2;
		$word =~ s/ed$//i;# chop the ending
		if ($a1 eq $a2){
			$word =~ s/$a1$//i;
		}
	}elsif($word =~ /es$/i){ #          word ends in -es 63
		$word =~ s/es$//i; #strip -es
	}elsif($word =~ /is$/i){ #          word ends in -is 64
		$word =~ s/is$/e/i; #strip -is
	}elsif($word =~ /ous$/i){ #        word ends in -ous 65
	
	}elsif($word =~ /ums$/i){ #        word ends in -ums 66
	
	}elsif($word =~ /us$/i){ #          word ends in -us 67
	
	}elsif($word =~ /s$/i) { #           word ends in -s 68
		$word =~ s/s$//i; #strip -s
	}
	#print OUTFILE "$word\n";
	if ($word ne $origword){
		print OUTFILE "$origword to $word\n";
	}
	return ($word, $ruleno);
}


1;

