{"id":3852,"date":"2024-10-09T17:11:55","date_gmt":"2024-10-09T17:11:55","guid":{"rendered":"https:\/\/www.canbekcan.com\/en\/?page_id=3852"},"modified":"2025-10-13T04:13:33","modified_gmt":"2025-10-13T04:13:33","slug":"social-media-data-mining-class-notes","status":"publish","type":"page","link":"https:\/\/canbekcan.com\/en\/courses\/social-media-data-mining\/social-media-data-mining-class-notes\/","title":{"rendered":"Social Media Data Mining &#8211; Class Notes"},"content":{"rendered":"<div class=\"wpb-content-wrapper\"><p>[vc_section css=&#8221;.vc_custom_1599166519090{padding: 20px !important;background-color: #474747 !important;}&#8221;][vc_row css=&#8221;.vc_custom_1599166540648{margin: 20px !important;padding: 20px !important;}&#8221;][vc_column][vc_custom_heading source=&#8221;post_title&#8221; font_container=&#8221;tag:h1|text_align:left|color:%23ffffff&#8221;][vc_custom_heading text=&#8221;Eastern Mediterranean University Faculty of Communication and Media Studies Department of New Media and Journalism&#8221; font_container=&#8221;tag:h3|text_align:left|color:%23ffffff&#8221; use_theme_fonts=&#8221;yes&#8221; css=&#8221;&#8221;][\/vc_column][\/vc_row][\/vc_section][vc_section css=&#8221;.vc_custom_1629930119858{margin: 0px !important;padding: 0px !important;background-color: #cccccc !important;}&#8221;][vc_row css=&#8221;.vc_custom_1629930133749{padding-right: 20px !important;padding-left: 20px !important;}&#8221;][vc_column width=&#8221;1\/4&#8243;][\/vc_column][vc_column width=&#8221;1\/2&#8243;][vc_custom_heading text=&#8221;Class Notes&#8221; font_container=&#8221;tag:h4|text_align:center&#8221; google_fonts=&#8221;font_family:Noto%20Serif%3Aregular%2Citalic%2C700%2C700italic|font_style:400%20regular%3A400%3Anormal&#8221; css=&#8221;&#8221;][\/vc_column][vc_column width=&#8221;1\/4&#8243;][\/vc_column][\/vc_row][\/vc_section][vc_section css=&#8221;.vc_custom_1728493681543{padding-right: 40px !important;padding-left: 40px !important;}&#8221;][vc_row][vc_column][vc_column_text css=&#8221;&#8221;]&#8230;[\/vc_column_text][vc_column_text css=&#8221;&#8221;]<a href=\"http:\/\/data.un.org\/Default.aspx\" target=\"_blank\" rel=\"noopener\">UN DATA<\/a><\/p>\n<p><a href=\"https:\/\/unstats.un.org\/\" target=\"_blank\" rel=\"noopener\">UNSTATS<\/a><\/p>\n<p><a href=\"https:\/\/www.dropbox.com\/scl\/fo\/g9jdox8o2e4d6v84dq7eq\/AAhBRtL5lZ2C2SPK7VlCb1U?rlkey=qe9gbuibdyhk12kjuijuexp4l&amp;dl=0\">DropBox Files<\/a>[\/vc_column_text][\/vc_column][\/vc_row][\/vc_section][vc_section css=&#8221;.vc_custom_1728493681543{padding-right: 40px !important;padding-left: 40px !important;}&#8221;][vc_row][vc_column]<h4 class=\"fac-title text-left\" style=\"color:#4b4b4b\">Upgrade Outdated Packages<\/h4>\n[vc_column_text css=&#8221;&#8221;]<code class=\"hljs language-bash\" data-highlighted=\"yes\">MAC<\/code><\/p>\n<pre class=\"lang-bash s-code-block\"><code class=\"hljs language-bash\" data-highlighted=\"yes\">pip freeze --<span class=\"hljs-built_in\">local<\/span> | grep -v <span class=\"hljs-string\">'^\\-e'<\/span> | <span class=\"hljs-built_in\">cut<\/span> -d = -f 1  | xargs -n1 pip install -U\npip install --upgrade pip<\/code><\/pre>\n<p><code class=\"hljs language-bash\" data-highlighted=\"yes\">Windows<\/code><code class=\"hljs language-bash\" data-highlighted=\"yes\"><\/code><\/p>\n<pre class=\"lang-bash s-code-block\"><code class=\"hljs language-bash\" data-highlighted=\"yes\">python.exe -m pip freeze --<span class=\"hljs-built_in\">local<\/span> | grep -v <span class=\"hljs-string\">'^\\-e'<\/span> | <span class=\"hljs-built_in\">cut<\/span> -d = -f 1  | xargs -n1 pip install -U\n<\/code>python.exe -m pip install --upgrade pip<\/pre>\n<div>\n<pre>\u00a0 source_folder = 'C:\/Users\/LAB\/Desktop\/NMEJ205\/PDFtoTXT\/pdf' \u00a0# PDF dosyalar\u0131n\u0131n bulundu\u011fu klas\u00f6r\n\n\u00a0 \u00a0 target_folder = 'C:\/Users\/LAB\/Desktop\/NMEJ205\/PDFtoTXT\/txt' \u00a0# TXT ve CSV dosyalar\u0131n\u0131n kaydedilece\u011fi klas\u00f6r<\/pre>\n<\/div>\n<p>[\/vc_column_text][vc_row_inner][vc_column_inner width=&#8221;1\/3&#8243;]<h4 class=\"fac-title text-left\" style=\"color:#4b4b4b\">LinkedIn Scraper<\/h4>\n[vc_column_text css=&#8221;&#8221;]<a href=\"https:\/\/github.com\/scrapfly\/scrapfly-scrapers\/tree\/main\/linkedin-scraper\" target=\"_blank\" rel=\"noopener\">https:\/\/github.com\/scrapfly\/scrapfly-scrapers\/tree\/main\/linkedin-scraper<\/a>[\/vc_column_text][\/vc_column_inner][vc_column_inner width=&#8221;1\/3&#8243;]<h4 class=\"fac-title text-left\" style=\"color:#4b4b4b\">Web Scraper<\/h4>\n[vc_column_text css=&#8221;&#8221;]<a href=\"https:\/\/scrapfly.io\/blog\/web-scraping-with-python\/\" target=\"_blank\" rel=\"noopener\">https:\/\/scrapfly.io\/blog\/web-scraping-with-python\/<\/a>[\/vc_column_text][\/vc_column_inner][vc_column_inner width=&#8221;1\/3&#8243;]<h4 class=\"fac-title text-left\" style=\"color:#4b4b4b\">Instagram Scraper<\/h4>\n[vc_column_text css=&#8221;&#8221;]<a href=\"https:\/\/github.com\/facebookarchive\/python-instagram\">https:\/\/github.com\/facebookarchive\/python-instagram<\/a>[\/vc_column_text][\/vc_column_inner][\/vc_row_inner][vc_row_inner][vc_column_inner width=&#8221;1\/3&#8243;]<h4 class=\"fac-title text-left\" style=\"color:#4b4b4b\">Google Map Scraper<\/h4>\n[vc_column_text css=&#8221;&#8221;]<a href=\"https:\/\/github.com\/r7avi\/Google-Maps-Data-Scrapper\">https:\/\/github.com\/r7avi\/Google-Maps-Data-Scrapper<\/a>[\/vc_column_text][\/vc_column_inner][vc_column_inner width=&#8221;1\/3&#8243;]<h4 class=\"fac-title text-left\" style=\"color:#4b4b4b\">Google Search Result Scraper<\/h4>\n[vc_column_text css=&#8221;&#8221;]<a href=\"https:\/\/github.com\/canbekcan\/GoogleSearch\">https:\/\/github.com\/canbekcan\/GoogleSearch<\/a>[\/vc_column_text][vc_column_text css=&#8221;&#8221;]<\/p>\n<pre>pip install google-api-python-client<\/pre>\n<p>[\/vc_column_text][\/vc_column_inner][vc_column_inner width=&#8221;1\/3&#8243;]<h4 class=\"fac-title text-left\" style=\"color:#4b4b4b\">...<\/h4>\n[vc_column_text css=&#8221;&#8221;]&#8230;[\/vc_column_text][\/vc_column_inner][\/vc_row_inner][vc_column_text css=&#8221;&#8221;]<\/p>\n<pre>python -m venv myevn<\/pre>\n<p>[\/vc_column_text]<h2 class=\"fac-title text-left\" style=\"color:#4b4b4b\">Google Search Result Scraper<\/h2>\n[vc_column_text css=&#8221;&#8221;]<\/p>\n<div>\n<pre>import httpx\nfrom parsel import Selector\nimport time\nimport csv\n\n# Headers to mimic browser-like behavior\nheaders = {\n    \"User-Agent\": \"Mozilla\/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit\/537.36 (KHTML, like Gecko) Chrome\/96.0.4664.110 Safari\/537.36\",\n    \"Accept\": \"text\/html,application\/xhtml+xml,application\/xml;q=0.9,image\/webp,image\/apng,*\/*;q=0.8\",\n}\n\n# Prompt user for the search query\nsearch_query = input(\"Enter your search query: \")\n# Encode the search query for the URL\nencoded_search_query = search_query.replace(' ', '+')\n\n# Define the base URL for the search query (1)\nbase_url = f\"https:\/\/www.google.com\/search?q={encoded_search_query}&amp;start=\"\n\n# Define the base URL for the exact search query (2)\n# base_url = f\"https:\/\/www.google.com\/search?as_q=&amp;as_epq={encoded_search_query}&amp;start=\"\n\n# Establish a persistent session\nsession = httpx.Client(headers=headers)\n\n# Number of pages to scrape\nnum_pages = 5\n\n# Prompt user for the CSV filename\n# csv_filename = \"search.csv\"\ncsv_filename = input(\"Enter the filename for the CSV (without extension): \") + \".csv\"\n\n# Open the CSV file for writing\nwith open(csv_filename, mode='w', newline='', encoding='utf-8') as file:\n    writer = csv.writer(file)\n    # Write the header row\n    writer.writerow([\"Title\", \"Link\", \"Snippet\"])\n\n    # Iterate through each page\n    for page in range(num_pages):\n        # Construct the URL for the current page by updating the 'start' parameter\n        start = page * 10  # Google uses increments of 10 for the next page\n        url = base_url + str(start)\n\n        # Fetch the URL\n        response = session.get(url)\n\n        # Check if the request was successful\n        if response.status_code != 200:\n            print(f\"Failed to retrieve page {page + 1}. Status code: {response.status_code}\")\n            continue\n\n        # Parse the HTML content\n        html = response.text\n        selector = Selector(text=html)\n\n        print(f\"Page {page + 1}:\")\n        print('--------------------------')\n\n        # Loop through each search result and extract the title, link, and snippet\n        for result in selector.css('div.g'):\n            # Extract title\n            title = result.css('h3::text').get()\n            # Extract the link\n            link = result.css('a::attr(href)').get()\n            # Extract the snippet text\n            snippet = result.css('div.VwiC3b::text, span.aCOpRe::text').get()\n\n            # Check if both title and link exist\n            if title and link:\n                title = title.strip()\n                link = link\n                snippet = snippet.strip() if snippet else \"None\"\n\n                # Write the result to the CSV file\n                writer.writerow([title, link, snippet])\n\n                # Print the result to the console (optional)\n                print(f\"Title: {title}\")\n                print(f\"Link: {link}\")\n                print(f\"Snippet: {snippet}\")\n                print('--------------------------')\n\n        # Wait for a while between requests to avoid triggering anti-scraping mechanisms\n        time.sleep(2)\n\n# Close the session after scraping\nsession.close()\n\nprint(f\"Results for '{search_query}' have been saved to {csv_filename}\")<\/pre>\n<\/div>\n<p>[\/vc_column_text][\/vc_column][\/vc_row][\/vc_section][vc_row content_placement=&#8221;middle&#8221; css=&#8221;.vc_custom_1678097619074{margin-top: 60px !important;padding-right: 60px !important;padding-left: 60px !important;}&#8221;][vc_column][vc_btn title=&#8221;Social media Data Mining&#8221; style=&#8221;flat&#8221; shape=&#8221;square&#8221; color=&#8221;mulled-wine&#8221; i_type=&#8221;typicons&#8221; i_icon_typicons=&#8221;typcn typcn-arrow-left-thick&#8221; css=&#8221;&#8221; add_icon=&#8221;true&#8221; link=&#8221;url:https%3A%2F%2Fwww.canbekcan.com%2Fen%2Fcourses%2Fsocial-media-data-mining%2F|title:Social%20Media%20Data%20Mining&#8221;][\/vc_column][\/vc_row]<\/p>\n<\/div>","protected":false},"excerpt":{"rendered":"<p>[vc_section css=&#8221;.vc_custom_1599166519090{padding: 20px !important;background-color: #474747 !important;}&#8221;][vc_row css=&#8221;.vc_custom_1599166540648{margin: 20px !important;padding: 20px !important;}&#8221;][vc_column][vc_custom_heading source=&#8221;post_title&#8221; font_container=&#8221;tag:h1|text_align:left|color:%23ffffff&#8221;][vc_custom_heading text=&#8221;Eastern Mediterranean University Faculty of Communication and [&hellip;]<\/p>\n","protected":false},"author":1,"featured_media":3814,"parent":3824,"menu_order":0,"comment_status":"closed","ping_status":"closed","template":"page-pagebuilder.php","meta":{"footnotes":""},"class_list":["post-3852","page","type-page","status-publish","has-post-thumbnail","hentry"],"_links":{"self":[{"href":"https:\/\/canbekcan.com\/en\/wp-json\/wp\/v2\/pages\/3852","targetHints":{"allow":["GET"]}}],"collection":[{"href":"https:\/\/canbekcan.com\/en\/wp-json\/wp\/v2\/pages"}],"about":[{"href":"https:\/\/canbekcan.com\/en\/wp-json\/wp\/v2\/types\/page"}],"author":[{"embeddable":true,"href":"https:\/\/canbekcan.com\/en\/wp-json\/wp\/v2\/users\/1"}],"replies":[{"embeddable":true,"href":"https:\/\/canbekcan.com\/en\/wp-json\/wp\/v2\/comments?post=3852"}],"version-history":[{"count":1,"href":"https:\/\/canbekcan.com\/en\/wp-json\/wp\/v2\/pages\/3852\/revisions"}],"predecessor-version":[{"id":3926,"href":"https:\/\/canbekcan.com\/en\/wp-json\/wp\/v2\/pages\/3852\/revisions\/3926"}],"up":[{"embeddable":true,"href":"https:\/\/canbekcan.com\/en\/wp-json\/wp\/v2\/pages\/3824"}],"wp:featuredmedia":[{"embeddable":true,"href":"https:\/\/canbekcan.com\/en\/wp-json\/wp\/v2\/media\/3814"}],"wp:attachment":[{"href":"https:\/\/canbekcan.com\/en\/wp-json\/wp\/v2\/media?parent=3852"}],"curies":[{"name":"wp","href":"https:\/\/api.w.org\/{rel}","templated":true}]}}