import nltk
from nltk.corpus import conll2000
grammar = r"""
NP: {
?*}
{<...>?<...>} # Legg til egne m?nstre
"""
cp = nltk.RegexpParser(grammar)
training_chunks = conll2000.chunked_sents("train.txt", chunk_types=["NP"])
test_chunks = conll2000.chunked_sents("test.txt", chunk_types=["NP"])
# Eksempelsetninger skrives ut og tegnes (?pner i ny fane)
print(training_chunks[100], "\n")
training_chunks[100].draw() # M? lukkes for at programmet skal fortsette
print(training_chunks[102], "\n")
training_chunks[102].draw()
print(training_chunks[103], "\n")
training_chunks[103].draw()
print("Evaluering p? treningskorpuset:\n", cp.accuracy(training_chunks))
# Husk at vi optimerer p? treningskorpuset, ikke testkorpuset!
# print("\nEvaluering p? testkorpuset:\n", cp.accuracy(test_chunks))