Read about nonnegative matrix factorization

ariansajina commented 3 years ago

@Article{Lee1999, author={Lee, Daniel D. and Seung, H. Sebastian}, title={Learning the parts of objects by non-negative matrix factorization}, journal={Nature}, year={1999}, month={Oct}, day={01}, volume={401}, number={6755}, pages={788-791}, abstract={Is perception of the whole based on perception of its parts? There is psychological1 and physiological2,3 evidence for parts-based representations in the brain, and certain computational theories of object recognition rely on such representations4,5. But little is known about how brains or computers might learn the parts of objects. Here we demonstrate an algorithm for non-negative matrix factorization that is able to learn parts of faces and semantic features of text. This is in contrast to other methods, such as principal components analysis and vector quantization, that learn holistic, not parts-based, representations. Non-negative matrix factorization is distinguished from the other methods by its use of non-negativity constraints. These constraints lead to a parts-based representation because they allow only additive, not subtractive, combinations. When non-negative matrix factorization is implemented as a neural network, parts-based representations emerge by virtue of two properties: the firing rates of neurons are never negative and synaptic strengths do not change sign.}, issn={1476-4687}, doi={10.1038/44565}, url={https://doi.org/10.1038/44565} }

ariansajina commented 3 years ago

Also see: https://en.wikipedia.org/wiki/Non-negative_matrix_factorization#Text_mining

ariansajina commented 3 years ago

https://scikit-learn.org/stable/modules/generated/sklearn.decomposition.NMF.html

ariansajina commented 3 years ago

@inproceedings{10.1145/860435.860485, author = {Xu, Wei and Liu, Xin and Gong, Yihong}, title = {Document Clustering Based on Non-Negative Matrix Factorization}, year = {2003}, isbn = {1581136463}, publisher = {Association for Computing Machinery}, address = {New York, NY, USA}, url = {https://doi.org/10.1145/860435.860485}, doi = {10.1145/860435.860485}, abstract = {In this paper, we propose a novel document clustering method based on the non-negative factorization of the term-document matrix of the given document corpus. In the latent semantic space derived by the non-negative matrix factorization (NMF), each axis captures the base topic of a particular document cluster, and each document is represented as an additive combination of the base topics. The cluster membership of each document can be easily determined by finding the base topic (the axis) with which the document has the largest projection value. Our experimental evaluations show that the proposed document clustering method surpasses the latent semantic indexing and the spectral clustering methods not only in the easy and reliable derivation of document clustering results, but also in document clustering accuracies.}, booktitle = {Proceedings of the 26th Annual International ACM SIGIR Conference on Research and Development in Informaion Retrieval}, pages = {267–273}, numpages = {7}, keywords = {non-negative matrix factorization, document clustering}, location = {Toronto, Canada}, series = {SIGIR '03} }

ariansajina / master-thesis

Read about nonnegative matrix factorization #10