Transporto įmonės duomenų apžvalga
import pandas as pd
import mysql.connector
import pandas as pd
import numpy as np
import seaborn as sns
import matplotlib
import matplotlib.pyplot as plt
mydb = mysql.connector.connect(
host="127.0.0.1",
port="3306",
user="root",
password="****",
)
cursor = mydb.cursor()
cursor.execute('USE crm_local')
pd.merge??
accounts = pd.read_sql('select id, accounts.billing_address_country as client_country from accounts', con=mydb)
accounts_cstm = pd.read_sql('select id_c, grupe_c from accounts_cstm', con=mydb)
accounts.rename(columns={'id': 'id_c'}, inplace=True)
acc = pd.merge(accounts, accounts_cstm, on=['id_c'], suffixes=('_s', '_cstm'), how = 'inner')
accounts_crm = pd.read_sql('select accounts_crm_paslaugos_1accounts_ida, accounts_crm_paslaugos_1crm_paslaugos_idb from accounts_crm', con=mydb)
accounts_crm.rename(columns={'accounts_crm_paslaugos_1accounts_ida': 'id_c'}, inplace=True)
accounts_crm.rename(columns={'accounts_crm_paslaugos_1crm_paslaugos_idb': 'id'}, inplace=True)
all_acc = pd.merge(acc, accounts_crm, on=['id_c'], suffixes=('_s', '_cstm'), how = 'inner')
all_acc.shape
(3028180, 4)
paslaugos_cstm = pd.read_sql('select id_c, price_c from paslaugos_cstm', con=mydb)
paslaugos_cstm.rename(columns={'id_c': 'id'}, inplace=True)
paslaugos = pd.read_sql('select id, data, code_nr as service_qnt, service, created_by from paslaugos', con=mydb)
pasl = pd.merge(paslaugos, paslaugos_cstm, on=['id'], suffixes=('_s', '_cstm'), how = 'inner')
all_acc_pasl = pd.merge(all_acc, pasl, on=['id'], suffixes=('_s', '_cstm'), how = 'inner')
all_acc_pasl.shape
(2933858, 9)
users = pd.read_sql('select id, user_name from users', con=mydb)
users.rename(columns={'id': 'created_by'}, inplace=True)
employees = pd.read_sql('select Employee, Country from employees', con=mydb)
employees.rename(columns={'Employee': 'user_name'}, inplace=True)
user_emp = pd.merge(employees, users, on=['user_name'], suffixes=('_s', '_cstm'), how = 'inner')
all_table = pd.merge(all_acc_pasl, user_emp, on=['created_by'], suffixes=('_s', '_cstm'), how = 'inner')
del all_table['user_name']
del all_table['id_c']
del all_table['created_by']
all_table.rename(columns={'Country': 'employee_country'}, inplace=True)
all_table.rename(columns={'data': 'date'}, inplace=True)
all_table.rename(columns={'price_c': 'unit_price'}, inplace=True)
all_table.shape
(2547149, 8)
#total_price
all_table['total_price'] = all_table['service_qnt'] * all_table['unit_price']
del all_table['service_qnt']
del all_table['unit_price']
#susitvarkom Sales salis
all_table['grupe_c'].replace({'5': 2,'4': 2, '9': 2, '11': 2, '7':3, '12':3, '13':4, '8':4, '6':3, '1':1,'blank':0,None:0,'':0, '10':3}, inplace=True)
all_table['sales_country'] = all_table['grupe_c']
all_table['sales_country'].replace({2: 'Canada', 3: 'JAV', 4: 'Mexico', 1:'Panama'}, inplace=True)
# read CSV, paslaugu grupes
service = pd.read_csv('C:\\Users\\37062\\Downloads\\ServiceManagement.csv')
all_table = pd.merge(all_table, service, on=['service'], suffixes=('_s', '_cstm'), how = 'inner')
del all_table['service']
all_table['service_qnts'] = 1
all_table
client_country | grupe_c | id | date | employee_country | total_price | sales_country | service_group | service_qnts | |
---|---|---|---|---|---|---|---|---|---|
0 | None | 0 | 6b2a4491-a1dd-8a3e-c672-529473f59022 | 2013-11-25 | Other | 156.00 | 0 | Motociklai | 1 |
1 | 0 | 6b2a4491-a1dd-8a3e-c672-529473f59022 | 2013-11-25 | Other | 156.00 | 0 | Motociklai | 1 | |
2 | None | 0 | 3011d857-a116-a9a6-f054-52b847738711 | 2013-12-22 | Other | 32.00 | 0 | Motociklai | 1 |
3 | None | 0 | 4e3dea3e-e98e-be6c-a118-52b84749ace6 | 2013-12-22 | Other | 150.15 | 0 | Motociklai | 1 |
4 | None | 0 | 65f7c95e-c313-b3d9-2386-529479ee78f2 | 2013-11-29 | Other | 162.00 | 0 | Motociklai | 1 |
... | ... | ... | ... | ... | ... | ... | ... | ... | ... |
2543404 | Poland | 2 | 143caea2-896b-d66c-adba-610ea14f8ebe | 2021-08-07 | Canada | 60.00 | Canada | Jachtos | 1 |
2543405 | Poland | 2 | f0a5b64b-b846-507a-73f7-61c076479a3c | 2021-12-20 | Canada | 15.00 | Canada | Jachtos | 1 |
2543406 | Poland | 2 | f0e5b116-fcf3-facd-9a7e-616df6be6cfa | 2021-10-19 | Canada | 1.20 | Canada | Jachtos | 1 |
2543407 | Poland | 2 | e5b09ec9-536b-8f8f-77b5-61aa4c157a66 | 2021-12-03 | Canada | 2.00 | Canada | Jachtos | 1 |
2543408 | Poland | 2 | 1c84eaac-7ebf-ff43-b4b3-60c8609d9ebb | 2021-06-15 | Canada | 16.00 | Canada | Atsargines dalys | 1 |
2543409 rows × 9 columns
all_table.shape
(2543409, 9)
all_table.describe()
grupe_c | total_price | service_qnts | |
---|---|---|---|
count | 2.543409e+06 | 2.543409e+06 | 2543409.0 |
mean | 2.898573e+00 | 2.824321e+01 | 1.0 |
std | 5.933416e-01 | 9.428225e+03 | 0.0 |
min | 0.000000e+00 | -1.121900e+03 | 1.0 |
25% | 3.000000e+00 | 6.000000e+00 | 1.0 |
50% | 3.000000e+00 | 1.000000e+01 | 1.0 |
75% | 3.000000e+00 | 1.700000e+01 | 1.0 |
max | 4.000000e+00 | 1.090909e+07 | 1.0 |
round(all_table['total_price'].mean(), 2)
28.24
all_table['total_price'].median()
10.0
df = all_table[all_table['total_price'] <50]
df = df[df['total_price'] >0]
df.shape
(2401229, 9)
round(df['total_price'].mean(), 2)
12.1
df['total_price'].median()
10.0
sns.displot(df['total_price'], bins=30, kde=True)
<seaborn.axisgrid.FacetGrid at 0x228e16e3bb0>
round(100-df['service_qnts'].count()/all_table['service_qnts'].count()*100, 2)
5.59
# df.to_csv('df.csv')
sns.boxplot(x='sales_country',y='total_price', data=df)
<AxesSubplot:xlabel='sales_country', ylabel='total_price'>
sns.boxplot(x='service_group',y='total_price', data=df)
plt.xticks(rotation=90)
(array([0, 1, 2, 3, 4, 5, 6, 7]), [Text(0, 0, 'Motociklai'), Text(1, 0, 'Automobiliai'), Text(2, 0, 'Jachtos'), Text(3, 0, 'Traktoriai'), Text(4, 0, 'Atsargines dalys'), Text(5, 0, 'Dviraciai'), Text(6, 0, 'Tarpiniai mokejimai'), Text(7, 0, 'Lektuvai')])
df['week_day'] = pd.to_datetime(df['date']).dt.dayofweek+1
sns.displot(df['week_day'])
df.iloc[:2]
client_country | grupe_c | id | date | employee_country | total_price | sales_country | service_group | service_qnts | week_day | |
---|---|---|---|---|---|---|---|---|---|---|
2 | None | 0 | 3011d857-a116-a9a6-f054-52b847738711 | 2013-12-22 | Other | 32.0 | 0 | Motociklai | 1 | 7 |
30 | None | 0 | 9f192f6e-5fdc-9d34-d8c1-52e90b655cd7 | 2014-01-28 | Other | 10.0 | 0 | Motociklai | 1 | 2 |
pd.DataFrame(round(df.groupby('week_day')['total_price'].mean().sort_values(ascending=False),2))
total_price | |
---|---|
week_day | |
1 | 12.42 |
2 | 12.31 |
3 | 12.23 |
4 | 12.14 |
7 | 12.06 |
5 | 12.03 |
6 | 11.72 |
sns.scatterplot(data=df, x='total_price', y='grupe_c')
<AxesSubplot:xlabel='total_price', ylabel='grupe_c'>
x = input('Iveskite SMS teksta - ')
y = input('Ar siusti sms? (yes/no) - ')
if y == 'yes':
import requests
url = "http://api.******.**/send?"
payload = "sender=Sender&text="+x+"&number=37012345678&user-key=****&user=user&api-key=*****"
headers = {
'Content-Type': "application/x-www-form-urlencoded",
'Cache-Control': "no-cache",
}
response = requests.request("POST", url, data=payload, headers=headers)
y = 'SMS zinute issiusta 😉'
else:
y = 'SMS zinute neisiusta ☹'
y
Iveskite SMS teksta - 45 Ar siusti sms? (yes/no) - yes
'SMS zinute issiusta 😉'