from IRstruct import * terms_en = [Document([('lang', 'en'), ('content', x)]) for x in ["big", "cabbage", "car", "drive", "julia", "ketchup", "like", "peace", "peter", "vegetable", "vegetarian", "war"] ] doc_en = [Document([('lang', 'en'), ('content', x)]) for x in ["peter drives a big car", "julia likes peter", "julia is a vegetarian", "vegetarians like vegetables", "cabbage is a vegetable", "big vegetarians who like cabbage do not drive cars", "war is peace", "ketchup is a vegetable"] ] stems_en = dict([ ("drives", "drive"), ("cars", "car"), ("vegetarians", "vegetarian"), ("vegetables", "vegetable"), ("likes", "like") ]) doc_de = [Document([('lang', 'de'), ('content', x)]) for x in ["peter faehrt ein grosses auto", "julia mag peter", "julia ist vegetarierin", "vegetarier moegen gemuese", "kohl ist gemuese", "grosse vegetarier die gemuese moegen fahren keine autos", "krieg ist frieden", "ketchup ist gemuese"] ] terms_de = [Document([('lang', 'de'), ('content', x)]) for x in ["gross", "kohl", "auto", "fahren", "julia", "ketchup", "moegen", "frieden", "peter", "gemuese", "vegetarier", "krieg"] ] stems_de = dict([ ("faehrt", "fahren"), ("grosses", "gross"), ("mag", "moegen"), ("vegetarierin", "vegetarier"), ("grosse", "gross"), ]) docs = [] for i in range(0, len(doc_en)): docs.append(Document(Properties([('docs', [doc_en[i], doc_de[i]])]))) stemprops = {} for x in stems_en.items(): stemprops[Properties([('content', x[0]), ('lang', 'en')])] = Properties([('content', x[1]), ('lang', 'en')]) for x in stems_de.items(): stemprops[Properties([('content', x[0]), ('lang', 'de')])] = Properties([('content', x[1]), ('lang', 'de')])