@inproceedings{scholars20415, year = {2025}, doi = {10.1109/AiDAS67696.2025.11213658}, note = {Cited by: 0}, title = {Comprehensive Analysis of Beauty Community Discourse on TikTok Through GPT Embeddings and BERTopic Modeling}, pages = {444 -- 448}, publisher = {Institute of Electrical and Electronics Engineers Inc.}, keywords = {Embeddings; Beauty care; Beauty discourse; Bertopic; Comprehensive analysis; GPT; Noisy signals; Personal care; Tiktok; Topic Modeling; Semantics}, author = {Isawasan, Pradeep and Asmawi, Muhammad Akmal Hakim Ahmad and Ong, Song Quan and Ooi, Boonyaik Yaik and Savita, K. S.}, isbn = {9798331586034}, url = {https://www.scopus.com/inward/record.uri?eid=2-s2.0-105023663181&doi=10.1109\%2FAiDAS67696.2025.11213658&partnerID=40&md5=29a5999867ca6a8f374e658707f42355}, abstract = {Short-form comments on TikTok's Beauty \& Personal Care videos are rich yet noisy signals of consumer sentiment. This study deploys a transformer-based pipeline that couples GPT-derived sentence embeddings with BERTopic to surface salient discussion themes. We scraped 3,912 videos from the 20 highest-revenue beauty influencers and retrieved 34,597 engagement-ranked comments posted between January and August 2024. After a GPT -powered normalisation step that expands slang, translates Malay abbreviations, and converts emoji to text, comments were embedded with OpenAI's text-embedding-3-small model, reduced via UMAP, clustered using HDBSCAN, and distilled into topics with BERTopic. Topic quality was evaluated with four coherence metrics (c{\ensuremath{<}}inf{\ensuremath{>}}v{\ensuremath{<}}/inf{\ensuremath{>}}, u{\ensuremath{<}}inf{\ensuremath{>}}mass{\ensuremath{<}}/inf{\ensuremath{>}}, c{\ensuremath{<}}inf{\ensuremath{>}}uci{\ensuremath{<}}/inf{\ensuremath{>}}, c{\ensuremath{<}}inf{\ensuremath{>}}npmi{\ensuremath{<}}/inf{\ensuremath{>}}) to ensure semantic consistency. The model revealed two coherent, non-overlapping themes: product-usage experiences (e.g., frequency of use, perceived results) and product-feature commentary (e.g., scent, packaging, variants). Ac{\ensuremath{<}}inf{\ensuremath{>}}v{\ensuremath{<}}/inf{\ensuremath{>}} score of 0.620 indicates strong interpretability despite the brevity and informality of TikTok discourse. These findings show that embedding-based topic modeling can unearth actionable insights: influencers should foreground authentic usage outcomes, while brands can boost engagement by highlighting sensory cues in visual storytelling. The study demonstrates the viability of LLM -enhanced analytics for short social media text and provides a replicable framework for future TikTok commerce research, noting limitations and opportunities for multi-category, cross-regional, and multimodal extensions. {\^A}{\copyright} 2025 IEEE.} }