import nltk from nltk.corpus import conll2000 grammar = r""" NP: {
?*} {<...>?<...>} # Legg til egne m?nstre """ cp = nltk.RegexpParser(grammar) training_chunks = conll2000.chunked_sents("train.txt", chunk_types=["NP"]) test_chunks = conll2000.chunked_sents("test.txt", chunk_types=["NP"]) # Eksempelsetninger skrives ut og tegnes (?pner i ny fane) print(training_chunks[100], "\n") training_chunks[100].draw() # M? lukkes for at programmet skal fortsette print(training_chunks[102], "\n") training_chunks[102].draw() print(training_chunks[103], "\n") training_chunks[103].draw() print("Evaluering p? treningskorpuset:\n", cp.accuracy(training_chunks)) # Husk at vi optimerer p? treningskorpuset, ikke testkorpuset! # print("\nEvaluering p? testkorpuset:\n", cp.accuracy(test_chunks))