boostcampaitech4recsys1 / level1_bookratingprediction_recsys-level1-recsys-02

level1_bookratingprediction_recsys-level1-recsys-02 created by GitHub Classroom
5 stars 6 forks source link

데이터 전처리 후 FM/FFM 결과 #15

Closed jwj51720 closed 1 year ago

jwj51720 commented 1 year ago

10월 31일

jwj51720 commented 1 year ago

10월 31일 추가 실험

FM - 평가가 n개인 rating까지 삭제

DL 모델을 사용하면?

시도해볼 점

41ow1ives commented 1 year ago

groupings = {'Fiction': ['fiction', 'ficti'], # 너무 넓으니 맨 위로 빼자 'Literature & Poem': ['liter', 'poem', 'poetry', 'novel', 'sonnet'], 'Science & Math': ['science', 'math', 'logy', 'chemis', 'physics', 'electron'], # science, logy 범위가 너무 넓으니 맨 위로 'Parenting & Relationships': ['baby', 'babies', 'parent', 'family', 'friend', 'tionship', 'brother', 'sister', 'families', 'friendship', 'mother', 'father'], # 좀 큼 'Medical Books': ['medi', 'psycho'], # psy의 세분화 가능 'Adventure' : ['adventu'], 'Animal & Nature': ['animal', 'ecolo', 'plant', 'nature', 'cat', 'dog', 'pets', 'bird', 'bear', 'horse', 'frog', 'duck', 'rabbit', 'dolphin', 'mice', 'deer', 'panda', 'kangaroo', 'lizzard', 'gorilla', 'chimpangee', 'bat', 'insect'], 'Arts & Photography': ['art', 'photo', 'drawing', 'picture'], # art는 겹치는 글자가 너무 많음 'Authors' : ['authors'], 'Biographies & Memoirs': ['biog', 'memo'], 'Business & Money': ['busi', 'money', 'econo', 'finance'], 'Calendars': ['calen'], 'Children\'s Books': ['child', 'baby'], 'Christian Books & Bibles': ['christi', 'bible'], #크리스마스때매 'Christmas' : ['christma'], 'Comics & Graphic Novels': ['comics', 'graphic novel'], 'Computers & Technology': ['computer', 'techno', 'archi'], 'Cookbooks, Food & Wine': ['cook', 'wine', 'food'], 'Countries & Cities' : ['united states', 'russia', 'france', 'africa', 'china', 'japan', 'egypt', 'germany', 'ireland', 'california', 'berline', 'london', 'new york', 'canada', 'chile', 'italy', 'europe', 'australia', 'great britain', 'arizona', 'chicago', 'netherlands', 'calif', 'mexico', 'colombia', 'greece', 'florida', 'algeria', 'new zealand', 'austria', 'denmark', 'washington', 'india', 'england', 'brazil'], 'Crafts, Hobbies & Home': ['crafts', 'hobb', 'home', 'house', 'garden'], 'Crime & Murder' : ['crime', 'murder', 'criminal', 'homicide', 'mafia', 'gang', 'drug'], 'Critic' : ['critic'], 'Education & Teaching': ['educa', 'teach'], 'Drama' : ['drama'], 'Design' : ['design'], 'Engineering & Transportation': ['engine', 'transp'], 'Encyclopedia & Dictionary' : ['encyclo', 'dictiona', 'vocabulary'], 'Essay' : ['essay'], 'Health, Fitness & Dieting': ['health', 'fitness', 'diet'], 'History': ['histo', 'war'], 'Humor & Entertainment': ['humor', 'entertai', 'comed', 'game', 'comic'], 'Law': ['law'], 'Language' : ['language'], 'LGBTQ+ Books': ['lesbian', 'gay', 'bisex'], 'Mystery, Thriller & Suspense': ['myste', 'thril', 'suspen', 'horror', 'occult'], 'Music & Dance' : ['music', 'dance', 'instrument', 'ballet', 'classic'], 'Movie' : ['motion pictur', 'actor', 'actres', 'acting', 'cinema', 'theater', 'director', 'television'], 'Politics': ['politic', 'president'], 'Philosophy' : ['philoso'], 'Reference': ['reference'], 'Religion & Spirituality': ['religi', 'buddh', 'spirit', 'god', 'prayer', 'belief', 'doubt'], 'Romance': ['romance'], 'Science Fiction & Fantasy': ['imagin', 'science fiction', 'fantasy', 'fairy', 'fairies', 'vampire', 'epidemic', 'ghost', 'alien', 'supernatural', 'magic', 'dragons', 'elves', 'angel', 'devil'], 'Short story' : ['short'], 'Social Science' : ['social', 'ethic', 'communism', 'capitalism', 'generation', 'culture'], 'Self-Help': ['self'], # self 검색시 모두 자기계발 관련 'Study': ['test', 'school', 'examina', 'study aids', 'college'], 'Sports & Outdoors': ['exerc','sport','outdoor', 'baseball', 'soccer', 'hockey', 'cricket', 'basketball', 'footbal'], 'Teen & Young Adult': ['teen', 'adol', 'juven'], #nonfiction이란 말은 청소년 관련뿐 'Travel': ['travel'], 'Women' : ['women'], }