|
@@ -0,0 +1,402 @@
|
|
|
+{
|
|
|
+ "cells": [
|
|
|
+ {
|
|
|
+ "cell_type": "code",
|
|
|
+ "execution_count": 1,
|
|
|
+ "metadata": {},
|
|
|
+ "outputs": [],
|
|
|
+ "source": [
|
|
|
+ "import numpy as np\n",
|
|
|
+ "import matplotlib.pyplot as plt\n",
|
|
|
+ "import pandas as pd"
|
|
|
+ ]
|
|
|
+ },
|
|
|
+ {
|
|
|
+ "cell_type": "code",
|
|
|
+ "execution_count": 4,
|
|
|
+ "metadata": {},
|
|
|
+ "outputs": [
|
|
|
+ {
|
|
|
+ "data": {
|
|
|
+ "text/plain": [
|
|
|
+ "'/home/kakashi/intern-tracker/src/analysis'"
|
|
|
+ ]
|
|
|
+ },
|
|
|
+ "execution_count": 4,
|
|
|
+ "metadata": {},
|
|
|
+ "output_type": "execute_result"
|
|
|
+ }
|
|
|
+ ],
|
|
|
+ "source": [
|
|
|
+ "import os\n",
|
|
|
+ "os.getcwd()"
|
|
|
+ ]
|
|
|
+ },
|
|
|
+ {
|
|
|
+ "cell_type": "code",
|
|
|
+ "execution_count": 5,
|
|
|
+ "metadata": {},
|
|
|
+ "outputs": [
|
|
|
+ {
|
|
|
+ "data": {
|
|
|
+ "text/html": [
|
|
|
+ "<div>\n",
|
|
|
+ "<style scoped>\n",
|
|
|
+ " .dataframe tbody tr th:only-of-type {\n",
|
|
|
+ " vertical-align: middle;\n",
|
|
|
+ " }\n",
|
|
|
+ "\n",
|
|
|
+ " .dataframe tbody tr th {\n",
|
|
|
+ " vertical-align: top;\n",
|
|
|
+ " }\n",
|
|
|
+ "\n",
|
|
|
+ " .dataframe thead th {\n",
|
|
|
+ " text-align: right;\n",
|
|
|
+ " }\n",
|
|
|
+ "</style>\n",
|
|
|
+ "<table border=\"1\" class=\"dataframe\">\n",
|
|
|
+ " <thead>\n",
|
|
|
+ " <tr style=\"text-align: right;\">\n",
|
|
|
+ " <th></th>\n",
|
|
|
+ " <th>title</th>\n",
|
|
|
+ " <th>company</th>\n",
|
|
|
+ " <th>salary</th>\n",
|
|
|
+ " <th>location</th>\n",
|
|
|
+ " <th>link</th>\n",
|
|
|
+ " <th>date</th>\n",
|
|
|
+ " <th>query</th>\n",
|
|
|
+ " <th>source</th>\n",
|
|
|
+ " </tr>\n",
|
|
|
+ " </thead>\n",
|
|
|
+ " <tbody>\n",
|
|
|
+ " <tr>\n",
|
|
|
+ " <th>0</th>\n",
|
|
|
+ " <td>Python Developer</td>\n",
|
|
|
+ " <td>Infosys</td>\n",
|
|
|
+ " <td>NaN</td>\n",
|
|
|
+ " <td>Pune, Maharashtra</td>\n",
|
|
|
+ " <td>https://in.indeed.com/rc/clk?jk=b0a156d0bd60b7...</td>\n",
|
|
|
+ " <td>Posted 2 days ago</td>\n",
|
|
|
+ " <td>python developer</td>\n",
|
|
|
+ " <td>indeed</td>\n",
|
|
|
+ " </tr>\n",
|
|
|
+ " <tr>\n",
|
|
|
+ " <th>1</th>\n",
|
|
|
+ " <td>Junior Python Developer</td>\n",
|
|
|
+ " <td>1E9 Advisors</td>\n",
|
|
|
+ " <td>NaN</td>\n",
|
|
|
+ " <td>Aundh, Pune, Maharashtra</td>\n",
|
|
|
+ " <td>https://in.indeed.com/rc/clk?jk=6227a113217cc2...</td>\n",
|
|
|
+ " <td>Posted 24 days ago</td>\n",
|
|
|
+ " <td>python developer</td>\n",
|
|
|
+ " <td>indeed</td>\n",
|
|
|
+ " </tr>\n",
|
|
|
+ " <tr>\n",
|
|
|
+ " <th>2</th>\n",
|
|
|
+ " <td>Entry-Level Software Developer</td>\n",
|
|
|
+ " <td>Tantransh Solutions</td>\n",
|
|
|
+ " <td>NaN</td>\n",
|
|
|
+ " <td>Bajaj Nagar, Nagpur, Maharashtra</td>\n",
|
|
|
+ " <td>https://in.indeed.com/rc/clk?jk=43540174e00001...</td>\n",
|
|
|
+ " <td>Posted 13 days ago</td>\n",
|
|
|
+ " <td>python developer</td>\n",
|
|
|
+ " <td>indeed</td>\n",
|
|
|
+ " </tr>\n",
|
|
|
+ " <tr>\n",
|
|
|
+ " <th>3</th>\n",
|
|
|
+ " <td>Python Developer</td>\n",
|
|
|
+ " <td>QuantGrade</td>\n",
|
|
|
+ " <td>NaN</td>\n",
|
|
|
+ " <td>Remote in Noida, Uttar Pradesh</td>\n",
|
|
|
+ " <td>https://in.indeed.com/rc/clk?jk=055ccbf93d79b7...</td>\n",
|
|
|
+ " <td>Posted 7 days ago</td>\n",
|
|
|
+ " <td>python developer</td>\n",
|
|
|
+ " <td>indeed</td>\n",
|
|
|
+ " </tr>\n",
|
|
|
+ " <tr>\n",
|
|
|
+ " <th>4</th>\n",
|
|
|
+ " <td>Python (Programming Language)-Application Deve...</td>\n",
|
|
|
+ " <td>Accenture</td>\n",
|
|
|
+ " <td>NaN</td>\n",
|
|
|
+ " <td>Bengaluru, Karnataka</td>\n",
|
|
|
+ " <td>https://in.indeed.com/rc/clk?jk=62317f94ed4532...</td>\n",
|
|
|
+ " <td>Today</td>\n",
|
|
|
+ " <td>python developer</td>\n",
|
|
|
+ " <td>indeed</td>\n",
|
|
|
+ " </tr>\n",
|
|
|
+ " </tbody>\n",
|
|
|
+ "</table>\n",
|
|
|
+ "</div>"
|
|
|
+ ],
|
|
|
+ "text/plain": [
|
|
|
+ " title company \\\n",
|
|
|
+ "0 Python Developer Infosys \n",
|
|
|
+ "1 Junior Python Developer 1E9 Advisors \n",
|
|
|
+ "2 Entry-Level Software Developer Tantransh Solutions \n",
|
|
|
+ "3 Python Developer QuantGrade \n",
|
|
|
+ "4 Python (Programming Language)-Application Deve... Accenture \n",
|
|
|
+ "\n",
|
|
|
+ " salary location \\\n",
|
|
|
+ "0 NaN Pune, Maharashtra \n",
|
|
|
+ "1 NaN Aundh, Pune, Maharashtra \n",
|
|
|
+ "2 NaN Bajaj Nagar, Nagpur, Maharashtra \n",
|
|
|
+ "3 NaN Remote in Noida, Uttar Pradesh \n",
|
|
|
+ "4 NaN Bengaluru, Karnataka \n",
|
|
|
+ "\n",
|
|
|
+ " link date \\\n",
|
|
|
+ "0 https://in.indeed.com/rc/clk?jk=b0a156d0bd60b7... Posted 2 days ago \n",
|
|
|
+ "1 https://in.indeed.com/rc/clk?jk=6227a113217cc2... Posted 24 days ago \n",
|
|
|
+ "2 https://in.indeed.com/rc/clk?jk=43540174e00001... Posted 13 days ago \n",
|
|
|
+ "3 https://in.indeed.com/rc/clk?jk=055ccbf93d79b7... Posted 7 days ago \n",
|
|
|
+ "4 https://in.indeed.com/rc/clk?jk=62317f94ed4532... Today \n",
|
|
|
+ "\n",
|
|
|
+ " query source \n",
|
|
|
+ "0 python developer indeed \n",
|
|
|
+ "1 python developer indeed \n",
|
|
|
+ "2 python developer indeed \n",
|
|
|
+ "3 python developer indeed \n",
|
|
|
+ "4 python developer indeed "
|
|
|
+ ]
|
|
|
+ },
|
|
|
+ "execution_count": 5,
|
|
|
+ "metadata": {},
|
|
|
+ "output_type": "execute_result"
|
|
|
+ }
|
|
|
+ ],
|
|
|
+ "source": [
|
|
|
+ "data = pd.read_csv('/home/kakashi/intern-tracker/data/cleaned/indeed/2024_03_15.csv')\n",
|
|
|
+ "data.head()"
|
|
|
+ ]
|
|
|
+ },
|
|
|
+ {
|
|
|
+ "cell_type": "code",
|
|
|
+ "execution_count": 6,
|
|
|
+ "metadata": {},
|
|
|
+ "outputs": [
|
|
|
+ {
|
|
|
+ "name": "stdout",
|
|
|
+ "output_type": "stream",
|
|
|
+ "text": [
|
|
|
+ "<class 'pandas.core.frame.DataFrame'>\n",
|
|
|
+ "RangeIndex: 225 entries, 0 to 224\n",
|
|
|
+ "Data columns (total 8 columns):\n",
|
|
|
+ " # Column Non-Null Count Dtype \n",
|
|
|
+ "--- ------ -------------- ----- \n",
|
|
|
+ " 0 title 218 non-null object\n",
|
|
|
+ " 1 company 225 non-null object\n",
|
|
|
+ " 2 salary 31 non-null object\n",
|
|
|
+ " 3 location 225 non-null object\n",
|
|
|
+ " 4 link 225 non-null object\n",
|
|
|
+ " 5 date 225 non-null object\n",
|
|
|
+ " 6 query 225 non-null object\n",
|
|
|
+ " 7 source 225 non-null object\n",
|
|
|
+ "dtypes: object(8)\n",
|
|
|
+ "memory usage: 14.2+ KB\n"
|
|
|
+ ]
|
|
|
+ }
|
|
|
+ ],
|
|
|
+ "source": [
|
|
|
+ "data.info()"
|
|
|
+ ]
|
|
|
+ },
|
|
|
+ {
|
|
|
+ "cell_type": "code",
|
|
|
+ "execution_count": 9,
|
|
|
+ "metadata": {},
|
|
|
+ "outputs": [
|
|
|
+ {
|
|
|
+ "data": {
|
|
|
+ "text/plain": [
|
|
|
+ "(225, 8)"
|
|
|
+ ]
|
|
|
+ },
|
|
|
+ "execution_count": 9,
|
|
|
+ "metadata": {},
|
|
|
+ "output_type": "execute_result"
|
|
|
+ }
|
|
|
+ ],
|
|
|
+ "source": [
|
|
|
+ "data.shape"
|
|
|
+ ]
|
|
|
+ },
|
|
|
+ {
|
|
|
+ "cell_type": "code",
|
|
|
+ "execution_count": 8,
|
|
|
+ "metadata": {},
|
|
|
+ "outputs": [
|
|
|
+ {
|
|
|
+ "data": {
|
|
|
+ "text/html": [
|
|
|
+ "<div>\n",
|
|
|
+ "<style scoped>\n",
|
|
|
+ " .dataframe tbody tr th:only-of-type {\n",
|
|
|
+ " vertical-align: middle;\n",
|
|
|
+ " }\n",
|
|
|
+ "\n",
|
|
|
+ " .dataframe tbody tr th {\n",
|
|
|
+ " vertical-align: top;\n",
|
|
|
+ " }\n",
|
|
|
+ "\n",
|
|
|
+ " .dataframe thead th {\n",
|
|
|
+ " text-align: right;\n",
|
|
|
+ " }\n",
|
|
|
+ "</style>\n",
|
|
|
+ "<table border=\"1\" class=\"dataframe\">\n",
|
|
|
+ " <thead>\n",
|
|
|
+ " <tr style=\"text-align: right;\">\n",
|
|
|
+ " <th></th>\n",
|
|
|
+ " <th>count</th>\n",
|
|
|
+ " <th>unique</th>\n",
|
|
|
+ " <th>top</th>\n",
|
|
|
+ " <th>freq</th>\n",
|
|
|
+ " </tr>\n",
|
|
|
+ " </thead>\n",
|
|
|
+ " <tbody>\n",
|
|
|
+ " <tr>\n",
|
|
|
+ " <th>title</th>\n",
|
|
|
+ " <td>218</td>\n",
|
|
|
+ " <td>117</td>\n",
|
|
|
+ " <td>Python Developer</td>\n",
|
|
|
+ " <td>37</td>\n",
|
|
|
+ " </tr>\n",
|
|
|
+ " <tr>\n",
|
|
|
+ " <th>company</th>\n",
|
|
|
+ " <td>225</td>\n",
|
|
|
+ " <td>154</td>\n",
|
|
|
+ " <td>Oracle</td>\n",
|
|
|
+ " <td>11</td>\n",
|
|
|
+ " </tr>\n",
|
|
|
+ " <tr>\n",
|
|
|
+ " <th>salary</th>\n",
|
|
|
+ " <td>31</td>\n",
|
|
|
+ " <td>29</td>\n",
|
|
|
+ " <td>₹15,000 - ₹70,000 a month</td>\n",
|
|
|
+ " <td>2</td>\n",
|
|
|
+ " </tr>\n",
|
|
|
+ " <tr>\n",
|
|
|
+ " <th>location</th>\n",
|
|
|
+ " <td>225</td>\n",
|
|
|
+ " <td>52</td>\n",
|
|
|
+ " <td>Remote</td>\n",
|
|
|
+ " <td>44</td>\n",
|
|
|
+ " </tr>\n",
|
|
|
+ " <tr>\n",
|
|
|
+ " <th>link</th>\n",
|
|
|
+ " <td>225</td>\n",
|
|
|
+ " <td>219</td>\n",
|
|
|
+ " <td>https://in.indeed.comnan</td>\n",
|
|
|
+ " <td>7</td>\n",
|
|
|
+ " </tr>\n",
|
|
|
+ " <tr>\n",
|
|
|
+ " <th>date</th>\n",
|
|
|
+ " <td>225</td>\n",
|
|
|
+ " <td>31</td>\n",
|
|
|
+ " <td>Posted 30+ days ago</td>\n",
|
|
|
+ " <td>52</td>\n",
|
|
|
+ " </tr>\n",
|
|
|
+ " <tr>\n",
|
|
|
+ " <th>query</th>\n",
|
|
|
+ " <td>225</td>\n",
|
|
|
+ " <td>3</td>\n",
|
|
|
+ " <td>python developer</td>\n",
|
|
|
+ " <td>75</td>\n",
|
|
|
+ " </tr>\n",
|
|
|
+ " <tr>\n",
|
|
|
+ " <th>source</th>\n",
|
|
|
+ " <td>225</td>\n",
|
|
|
+ " <td>1</td>\n",
|
|
|
+ " <td>indeed</td>\n",
|
|
|
+ " <td>225</td>\n",
|
|
|
+ " </tr>\n",
|
|
|
+ " </tbody>\n",
|
|
|
+ "</table>\n",
|
|
|
+ "</div>"
|
|
|
+ ],
|
|
|
+ "text/plain": [
|
|
|
+ " count unique top freq\n",
|
|
|
+ "title 218 117 Python Developer 37\n",
|
|
|
+ "company 225 154 Oracle 11\n",
|
|
|
+ "salary 31 29 ₹15,000 - ₹70,000 a month 2\n",
|
|
|
+ "location 225 52 Remote 44\n",
|
|
|
+ "link 225 219 https://in.indeed.comnan 7\n",
|
|
|
+ "date 225 31 Posted 30+ days ago 52\n",
|
|
|
+ "query 225 3 python developer 75\n",
|
|
|
+ "source 225 1 indeed 225"
|
|
|
+ ]
|
|
|
+ },
|
|
|
+ "execution_count": 8,
|
|
|
+ "metadata": {},
|
|
|
+ "output_type": "execute_result"
|
|
|
+ }
|
|
|
+ ],
|
|
|
+ "source": [
|
|
|
+ "data.describe().T"
|
|
|
+ ]
|
|
|
+ },
|
|
|
+ {
|
|
|
+ "cell_type": "code",
|
|
|
+ "execution_count": 10,
|
|
|
+ "metadata": {},
|
|
|
+ "outputs": [
|
|
|
+ {
|
|
|
+ "name": "stdout",
|
|
|
+ "output_type": "stream",
|
|
|
+ "text": [
|
|
|
+ "[nan '₹10,00,000 - ₹12,00,000 a year' 'Up to ₹50,000 a month'\n",
|
|
|
+ " '₹15,000 - ₹70,000 a month' '₹20,000 - ₹30,000 a month'\n",
|
|
|
+ " 'Up to ₹5,00,000 a year' '₹15,000 - ₹25,000 a month'\n",
|
|
|
+ " '₹41,40,000 - ₹62,10,000 a year' '₹8,00,000 - ₹18,00,000 a year'\n",
|
|
|
+ " 'Up to ₹60,000 a month' '₹30,000 - ₹45,000 a month'\n",
|
|
|
+ " '₹25,000 - ₹80,000 a month' '₹1,44,000 - ₹3,60,000 a year'\n",
|
|
|
+ " '₹40,000 - ₹60,000 a month' 'From ₹90,000 a month'\n",
|
|
|
+ " '₹90,000 - ₹1,00,000 a month' '₹10,00,000 - ₹26,00,000 a year'\n",
|
|
|
+ " '₹80,000 - ₹1,00,000 a month' '₹40,000 a month'\n",
|
|
|
+ " '₹30,00,000 - ₹35,00,000 a year' '₹4,00,000 - ₹5,00,000 a year'\n",
|
|
|
+ " '₹40,000 - ₹45,000 a month' '₹4,00,000 - ₹8,00,000 a year'\n",
|
|
|
+ " '₹90,000 - ₹1,60,000 a month' '₹10,00,000 - ₹25,00,000 a year'\n",
|
|
|
+ " '₹8,00,000 - ₹12,00,000 a year' '₹15,000 - ₹30,000 a month'\n",
|
|
|
+ " '₹35,000 - ₹65,000 a month' '₹30,000 - ₹50,000 a month'\n",
|
|
|
+ " '₹11,547.68 - ₹52,691.43 a month']\n"
|
|
|
+ ]
|
|
|
+ }
|
|
|
+ ],
|
|
|
+ "source": [
|
|
|
+ "print(data.salary.unique())"
|
|
|
+ ]
|
|
|
+ },
|
|
|
+ {
|
|
|
+ "cell_type": "code",
|
|
|
+ "execution_count": null,
|
|
|
+ "metadata": {},
|
|
|
+ "outputs": [],
|
|
|
+ "source": [
|
|
|
+ "def process_salary(sample):\n",
|
|
|
+ " salary = sample['salary']\n",
|
|
|
+ " if salary !='NaN':\n",
|
|
|
+ " if salary.endswith('year'):\n",
|
|
|
+ " "
|
|
|
+ ]
|
|
|
+ }
|
|
|
+ ],
|
|
|
+ "metadata": {
|
|
|
+ "kernelspec": {
|
|
|
+ "display_name": "production",
|
|
|
+ "language": "python",
|
|
|
+ "name": "python3"
|
|
|
+ },
|
|
|
+ "language_info": {
|
|
|
+ "codemirror_mode": {
|
|
|
+ "name": "ipython",
|
|
|
+ "version": 3
|
|
|
+ },
|
|
|
+ "file_extension": ".py",
|
|
|
+ "mimetype": "text/x-python",
|
|
|
+ "name": "python",
|
|
|
+ "nbconvert_exporter": "python",
|
|
|
+ "pygments_lexer": "ipython3",
|
|
|
+ "version": "3.8.16"
|
|
|
+ }
|
|
|
+ },
|
|
|
+ "nbformat": 4,
|
|
|
+ "nbformat_minor": 2
|
|
|
+}
|