#!/usr/bin/perl -w
#use strict;
#use String::Approx 'amatch';
#use Text::LevenshteinXS qw(distance);
use XML::Parser;
use Term::ANSIColor qw(:constants);
use IO::Socket::INET;
use IO::Socket ":all";
use Storable;
use YAML::XS qw/LoadFile/;
use List::MoreUtils qw/ uniq /;
$Storable::interwork_56_64bit =true;
# use Devel::Size 'total_size'; 
  

##########################################################################################

#######################################################################################
#### read 
# my %MAT =();
# $file = shift;
# open(FF, $file);
# my $count=0;
# while(<FF>){
# 	$count++;
# 	chomp;
# 	@el = split(/ /);
# 	my $la = shift @el;
# 	print "$count 1 $la\n"; 
# 	foreach $c (@el){
# 		if ($c=~/(\d+):(\d+)/) {$col = $1+1; $val = $2; print "$count $col $val\n";}
# 	}
# 	# getc();
# }
# close(FF);
# 
# die;

my %ngrams=();
$file = "cheng_unigrams.txt";
open(FF, $file);
my $class=-1;
while(<FF>){
	my $line = $_;
	chomp;
	print STDERR "$line\n";
	if ($line=~m/seful skip ngrams for class (\d+)/){$class=$1; $count=0;next; }
	
	@el = split(/ /,$line);
	# print "$count 1 $el[0]\n"; 
	foreach $g (@el){
		$count++; #if ($count>20) {last;}
		$g=~s/\W//g; 
		$ngrams{$g}=$class;
		# print STDERR "$class $g\n"; getc();
	}
	# getc();
}
close(FF);


$file = "feature_names.txt";
open(FF, $file);
my $class=-1;
while(<FF>){
	my $line = $_;
	chomp;
	# print STDERR "$line\n";
	if ($line=~m/(\d+)\s+(.+)/){
		$fn=$1; $fgram=$2;
		if(exists $ngrams{$fgram}){ print $fn+1, "  $ngrams{$fgram}\n";}
	}
	
}
close(FF);

##############################################################################################
##############################################################################################

