import nltk from nltk.corpus import brown nltk.download('tagsets_json') # Legg til m?nstre her: patterns = [ (r"^H?kon$", "NP"), # H?kon (r"^Dan$", "NP"), # Dan (r"^Morgan$", "NP"), # Morgan (r"^Ann$", "NP"), # Ann (r".*ing$", "VBG"), # gerunds (r".*ed$", "VBD"), # simple past (r".*es$", "VBZ"), # 3rd singular present (r".*ould$", "MD"), # modals (r".*\'s$", "NN$"), # possessive nouns (r".*s$", "NNS"), # plural nouns (r"^-?[0-9]+(\.[0-9]+)?$", "CD"), # cardinal numbers (r".*", "NN"), # nouns (default) ] regexp_tagger = nltk.RegexpTagger(patterns) brown_taggede_setninger = brown.tagged_sents(categories="adventure") brown_utaggede_setninger = brown.sents(categories="adventure") print(brown_utaggede_setninger[0]) print(brown_taggede_setninger[0]) print(regexp_tagger.tag(brown_utaggede_setninger[0])) print("N?yaktighet, adventure:", regexp_tagger.accuracy(brown_taggede_setninger)) # Denne printer ut masse info om de ulike taggene! nltk.help.brown_tagset()