#! /bin/sh
echo "Making datafiles for testing..."

rm test_file_1 >& /dev/null
cat > test_file_1 << EOF1
# this is a testfile for automatic testing of the PTU's.

rec 1

The first record and a mighty123 record it is.
rec 2
record two 123consists of three sentences.
This is the second one
and this the third one.

rec 3
note that this testfile has no tags.

rec 4
note that a certain word has not occurred for two lines now
This state of affairs will continue, because we want to check chaining.
Chaining is the act of creating chains of words that are repeated within a certain distance of each other.

rec 5

This state of affairs will continue for some more lines
But I am getting tired of it soon.
Of course I can simplify mat123ters by setting the chain-length to two or three, using the options of the program.

rec 6 have you already figured out which word has not occurred again after the first few lines?

It was the word "one".

EOF1

rm test_index_1 >& /dev/null
cat > test_index_1 << EOF2
#comments... All a-words are omitted for testing purposes
	

      1	because
      1	but
      1	by
      1	can
      2	certain
      1	chain
      2	chaining
      1	chains
      1	check
      1	consists
      2	continue
      1	course
      1	creating
      1	distance
      1	each
      1	few
      1	figured
      2	first
      3	for
      1	getting
      3	has
      1	have
      2	i
      4	is
      3	it
      1	length
      3	lines
      1	mat
      1	mighty
      1	more
      1	no
      2	not
      2	note
      1	now
      2	occurred
     10	of
      3	one
      1	options
      1	or
      1	other
      1	out
      1	program
      1	ptu
      6	rec
      3	record
      1	repeated
      1	s
      1	second
      1	sentences
      1	setting
      1	simplify
      1	some
      1	soon
      2	state
      1	tags
      1	ters
      2	testfile
      1	testing
      3	that
     10	the
      1	third
      6	this
      2	three
      1	tired
      2	to
      3	two
      1	using
      1	want
      1	was
      1	we
      1	which
      2	will
      1	within
      3	word
      1	words
      1	you

EOF2

rm test_stopwords >& /dev/null
cat > test_stopwords << EOF3
#stopwords

and
has
are
already
the

EOF3

rm test_index_2 >& /dev/null

cat > test_index_2 << EOF4
# test voor tf.idf formats


1 1 rec
1 1 the
1 1 first
1 2 record
1 1 mighty
2 1 rec
2 1 record
2 1 two
2 1 consists
2 2 of
2 2 three
2 1 sentences
2 3 this
2 1 is
2 1 the
2 1 second

2 2 and
3 1 rec
3 1 note
3 1 that
3 1 this
3 1 testfile
3 1 has
3 1 no
3 2 tags
4 1 rec
4 1 note
4 1 that
4 1 certain
4 1 word
4 1 has
4 1 not
4 1 occurred
4 1 for
4 1 two
4 1 lines
4 1 now
4 1 this
4 1 state
4 1 of
4 1 affairs
4 1 will
4 1 continue
4 1 because
4 1 we
4 1 want
4 1 to
4 1 check
4 2 chaining

5 1 rec
5 1 this
5 1 state
5 1 of
5 1 affairs
5 1 will
5 1 continue
5 1 for
5 1 some
5 1 more

6 2 one


EOF4


rm test_index_2_sort >& /dev/null

cat > test_index_2_sort << EOF5
#alle 5's in 'test_file_1' veranderd...


test_file_1 1 affairs
2 2 and
4 1 because
4 1 certain
4 2 chaining
4 1 check
2 1 consists
4 1 continue
test_file_1 1 continue
1 1 first
4 1 for
test_file_1 1 for
3 1 has
4 1 has
2 1 is
4 1 lines
1 1 mighty
test_file_1 1 more
3 1 no
4 1 not
3 1 note
4 1 note
4 1 now
4 1 occurred
2 2 of
4 1 of
test_file_1 1 of
6 2 one
1 1 rec
2 1 rec
3 1 rec
4 1 rec
test_file_1 1 rec
1 2 record
2 1 record
2 1 second
2 1 sentences
test_file_1 1 some
4 1 state
test_file_1 1 state
3 2 tags
3 1 testfile
3 1 that
4 1 that
1 1 the
2 1 the
2 3 this
3 1 this
4 1 this
test_file_1 1 this
2 2 three
4 1 to
2 1 two
4 1 two
# test voor tf.idf formats
4 1 want
4 1 we
4 1 will
test_file_1 1 will
4 1 word

EOF5

#======================================================================

rm test_results >& /dev/null
rm test_results1 >& /dev/null
echo 
echo Testing the program 'chains'.
echo "First we test basic chaining and record separators (options c, r and R)"

chains -c3 -r rec -R RECSEP test_file_1 | tail -1 >> test_results1
echo "  18     40" >> test_results

echo Look at RECSEP:
tail -1 >> test_results1 RECSEP
echo " 6      17" >> test_results

# --------------------------------

echo "Now we check wordlength and alpha-numerics (options L and a)"
chains -a -L4  test_file_1 | tail -1 >> test_results1

echo "  18     38" >> test_results


# --------------------------------------------

echo "Now we check artificial lines and saving of same (options s and S)"

chains -s10 -S naam test_file_1 | tail -1 >> test_results1

echo "  15     52" >> test_results
tail -1 >> test_results1 naam

echo "15 the word one" >> test_results
# --------------------------------------------

echo "Now we check the stopword-file (options q and Q)"

chains -s10 -q test_stopwords test_file_1 | tail -1 >> test_results1

echo "  15     49" >> test_results 

chains -s10 -Q test_stopwords test_file_1 | tail -1 >> test_results1

echo "  15      3" >> test_results

# --------------------------------------------

echo "Now we check the stopsentence-file (options o and O)"
chains -s10 -o test_stopwords test_file_1 | tail -1 >> test_results1

echo "  15     30" >> test_results
chains -s10 -O test_stopwords test_file_1 | tail -1 >> test_results1

echo "  15     26" >> test_results

# --------------------------------------------

echo "Now we check the index-file 1 and weight (options t and indexfile)"
chains -t2 test_file_1 test_index_1 | tail -1 >> test_results1

echo "  18     12" >> test_results

# --------------------------------------------

echo "Now we check the index-file 2 and weight (options t and indexfile)"
chains -t2 test_file_1 test_index_2 | tail -1 >> test_results1

echo "  18     63" >> test_results

chains -c4 -r rec test_file_1 test_index_2_sort | tail -1 >> test_results1

echo "  18      9" >> test_results
echo
echo "if all tests compared satisfactorily, this is the last line."
echo

diff -w test_results test_results1

rm test_results
rm test_results1
rm RECSEP
rm test_file_1
rm test_index_1
rm test_index_2
rm test_stopwords
rm test_index_2_sort














