@misc{Cichosz_Paweł_A, author={Cichosz, Paweł}, howpublished={online}, publisher={Zielona Góra: Uniwersytet Zielonogórski}, language={eng}, abstract={Despite the rapid growth of other types of social media, Internet discussion forums remain a highly popular communication channel and a useful source of text data for analyzing user interests and sentiments. Being suited to richer, deeper, and longer discussions than microblogging services, they particularly well reflect topics of long-term, persisting involvement and areas of specialized knowledge or experience. Discovering and characterizing such topics and areas by text mining algorithms is therefore an interesting and useful research direction.}, abstract={This work presents a case study in which selected classification algorithms are applied to posts from a Polish discussion forum devoted to psychoactive substances received from home-grown plants, such as hashish or marijuana. The utility of two different vector text representations is examined: the simple bag of words representation and the more refined embedded global vectors one.}, abstract={While the former is found to work well for the multinomial naive Bayes algorithm, the latter turns out more useful for other classification algorithms: logistic regression, SVMs, and random forests. The obtained results suggest that post-classification can be applied for measuring publication intensity of particular topics and, in the case of forums related to psychoactive substances, for monitoring the risk of drug-related crime.}, type={artykuł}, title={A case study in text mining of discussion forum posts: Classification with bag of words and global vectors}, keywords={text mining, discussion forums, text representation, document classification, word embedding}, }