import numpy as np
import pandas as pd
import sqlite3 as sql
import matplotlib.pyplot as plt
import seaborn as sns
import plotly.express as px
import plotly.graph_objs as go
import nltk
from nltk.corpus import stopwords
from nltk.stem.wordnet import WordNetLemmatizer
from gensim import corpora
import gensim


# Load the data from three tables of sqlite file
conn = sql.connect('/Users/vytautas/Downloads/mental_health.sqlite')

query = "SELECT * FROM Survey;"
survey_df = pd.read_sql_query(query, conn)

query = "SELECT * FROM Question;"
question_df = pd.read_sql_query(query, conn)

query = "SELECT * FROM Answer;"
answer_df = pd.read_sql_query(query, conn)


# Now we can display first few rows of each dataframe
for df, name in zip([survey_df, question_df, answer_df], ['Survey', 'Question', 'Answer']):
    print(f'\nFirst few rows of {name} DataFrame:\n', df.head())

First few rows of Survey DataFrame:
    SurveyID                    Description
0      2014  mental health survey for 2014
1      2016  mental health survey for 2016
2      2017  mental health survey for 2017
3      2018  mental health survey for 2018
4      2019  mental health survey for 2019

First few rows of Question DataFrame:
                                         questiontext  questionid
0                                  What is your age?           1
1                               What is your gender?           2
2                       What country do you live in?           3
3  If you live in the United States, which state ...           4
4                             Are you self-employed?           5

First few rows of Answer DataFrame:
   AnswerText  SurveyID  UserID  QuestionID
0         37      2014       1           1
1         44      2014       2           1
2         32      2014       3           1
3         31      2014       4           1
4         31      2014       5           1


for df, name in zip([survey_df, question_df, answer_df], ['Survey', 'Question', 'Answer']):
    print(f'\nInformation of {name} DataFrame:\n')
    df.info()
    print(f'\nDescriptive statistics of {name} DataFrame:\n', df.describe(include='all'))

Information of Survey DataFrame:

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 5 entries, 0 to 4
Data columns (total 2 columns):
 #   Column       Non-Null Count  Dtype 
---  ------       --------------  ----- 
 0   SurveyID     5 non-null      int64 
 1   Description  5 non-null      object
dtypes: int64(1), object(1)
memory usage: 208.0+ bytes

Descriptive statistics of Survey DataFrame:
            SurveyID                    Description
count      5.000000                              5
unique          NaN                              5
top             NaN  mental health survey for 2016
freq            NaN                              1
mean    2016.800000                            NaN
std        1.923538                            NaN
min     2014.000000                            NaN
25%     2016.000000                            NaN
50%     2017.000000                            NaN
75%     2018.000000                            NaN
max     2019.000000                            NaN

Information of Question DataFrame:

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 105 entries, 0 to 104
Data columns (total 2 columns):
 #   Column        Non-Null Count  Dtype 
---  ------        --------------  ----- 
 0   questiontext  105 non-null    object
 1   questionid    105 non-null    int64 
dtypes: int64(1), object(1)
memory usage: 1.8+ KB

Descriptive statistics of Question DataFrame:
                                              questiontext  questionid
count                                                 105  105.000000
unique                                                105         NaN
top     Have your previous employers provided mental h...         NaN
freq                                                    1         NaN
mean                                                  NaN   61.790476
std                                                   NaN   35.589458
min                                                   NaN    1.000000
25%                                                   NaN   27.000000
50%                                                   NaN   66.000000
75%                                                   NaN   92.000000
max                                                   NaN  118.000000

Information of Answer DataFrame:

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 236898 entries, 0 to 236897
Data columns (total 4 columns):
 #   Column      Non-Null Count   Dtype 
---  ------      --------------   ----- 
 0   AnswerText  236898 non-null  object
 1   SurveyID    236898 non-null  int64 
 2   UserID      236898 non-null  int64 
 3   QuestionID  236898 non-null  int64 
dtypes: int64(3), object(1)
memory usage: 7.2+ MB

Descriptive statistics of Answer DataFrame:
        AnswerText       SurveyID         UserID     QuestionID
count      236898  236898.000000  236898.000000  236898.000000
unique       4215            NaN            NaN            NaN
top            -1            NaN            NaN            NaN
freq        56598            NaN            NaN            NaN
mean          NaN    2016.572297    2514.516619      48.655898
std           NaN       1.421134    1099.462747      36.126215
min           NaN    2014.000000       1.000000       1.000000
25%           NaN    2016.000000    1691.000000      15.000000
50%           NaN    2016.000000    2652.000000      48.000000
75%           NaN    2017.000000    3439.000000      80.000000
max           NaN    2019.000000    4218.000000     118.000000


# Furthermore we double check for duplicate values and missing values
for df, name in zip([survey_df, question_df, answer_df], ['Survey', 'Question', 'Answer']):
    print(f'\nMissing values in {name} DataFrame:\n', df.isnull().sum())
    print(f'\nDuplicate rows in {name} DataFrame:', df.duplicated().sum())

Missing values in Survey DataFrame:
 SurveyID       0
Description    0
dtype: int64

Duplicate rows in Survey DataFrame: 0

Missing values in Question DataFrame:
 questiontext    0
questionid      0
dtype: int64

Duplicate rows in Question DataFrame: 0

Missing values in Answer DataFrame:
 AnswerText    0
SurveyID      0
UserID        0
QuestionID    0
dtype: int64

Duplicate rows in Answer DataFrame: 0


# Quick look on the available questions 
questions = question_df['questiontext']

for i, question in enumerate(questions, 1):
    print(f"{i}. {question}")

1. What is your age?
2. What is your gender?
3. What country do you live in?
4. If you live in the United States, which state or territory do you live in?
5. Are you self-employed?
6. Do you have a family history of mental illness?
7. Have you ever sought treatment for a mental health disorder from a mental health professional?
8. How many employees does your company or organization have?
9. Is your employer primarily a tech company/organization?
10. Does your employer provide mental health benefits as part of healthcare coverage?
11. Is your anonymity protected if you choose to take advantage of mental health or substance abuse treatment resources provided by your employer?
12. Would you bring up a mental health issue with a potential employer in an interview?
13. Is your primary role within your company related to tech/IT?
14. Do you know the options for mental health care available under your employer-provided health coverage?
15. Has your employer ever formally discussed mental health (for example, as part of a wellness campaign or other official communication)?
16. Does your employer offer resources to learn more about mental health disorders and options for seeking help?
17. If a mental health issue prompted you to request a medical leave from work, how easy or difficult would it be to ask for that leave?
18. Would you feel comfortable discussing a mental health issue with your coworkers?
19. Would you feel comfortable discussing a mental health issue with your direct supervisor(s)?
20. Do you have medical coverage (private insurance or state-provided) that includes treatment of mental health disorders?
21. Do you know local or online resources to seek help for a mental health issue?
22. Do you have previous employers?
23. Have your previous employers provided mental health benefits?
24. Were you aware of the options for mental health care provided by your previous employers?
25. Did your previous employers ever formally discuss mental health (as part of a wellness campaign or other official communication)?
26. Did your previous employers provide resources to learn more about mental health disorders and how to seek help?
27. Was your anonymity protected if you chose to take advantage of mental health or substance abuse treatment resources with previous employers?
28. Would you have been willing to discuss your mental health with your direct supervisor(s)?
29. Would you be willing to bring up a physical health issue with a potential employer in an interview?
30. How willing would you be to share with friends and family that you have a mental illness?
31. Have your observations of how another individual who discussed a mental health disorder made you less likely to reveal a mental health issue yourself in your current workplace?
32. Have you had a mental health disorder in the past?
33. Do you currently have a mental health disorder?
34. Have you ever been diagnosed with a mental health disorder?
35. If you have a mental health disorder, how often do you feel that it interferes with your work when being treated effectively?
36. If you have a mental health disorder, how often do you feel that it interferes with your work when not being treated effectively (i.e., when you are experiencing symptoms)?
37. What country do you work in?
38. What US state or territory do you work in?
39. If you have been diagnosed or treated for a mental health disorder, do you ever reveal this to clients or business contacts?
40. If you have been diagnosed or treated for a mental health disorder, do you ever reveal this to coworkers or employees?
41. Do you believe your productivity is ever affected by a mental health issue?
42. If yes, what percentage of your work time (time performing primary or secondary job functions) is affected by a mental health issue?
43. Have you observed or experienced an unsupportive or badly handled response to a mental health issue in your current or previous workplace?
44. Would you feel more comfortable talking to your coworkers about your physical health or your mental health?
45. Have you ever discussed your mental health with your employer?
46. Describe the conversation you had with your employer about your mental health, including their reactions and what actions were taken to address your mental health issue/questions.
47. Have you ever discussed your mental health with coworkers?
48. Describe the conversation with coworkers you had about your mental health including their reactions.
49. Have you ever had a coworker discuss their or another coworker's mental health with you?
50. Describe the conversation your coworker had with you about their mental health (please do not use names).
51. Overall, how much importance does your employer place on physical health?
52. Overall, how much importance does your employer place on mental health?
53. If you have revealed a mental health disorder to a client or business contact, how has this affected you or the relationship?
54. If you have revealed a mental health disorder to a coworker or employee, how has this impacted you or the relationship?
55. Was your employer primarily a tech company/organization?
56. Would you have felt more comfortable talking to your previous employer about your physical health or your mental health?
57. Did you ever discuss your mental health with your previous employer?
58. Describe the conversation you had with your previous employer about your mental health, including their reactions and actions taken to address your mental health issue/questions.
59. Did you ever discuss your mental health with a previous coworker(s)?
60. Describe the conversation you had with your previous coworkers about your mental health including their reactions.
61. Did you ever have a previous coworker discuss their or another coworker's mental health with you?
62. Describe the conversation your coworker had with you about their mental health (please do not use names)..1
63. Overall, how much importance did your previous employer place on physical health?
64. Overall, how much importance did your previous employer place on mental health?
65. Are you openly identified at work as a person with a mental health issue?
66. Has being identified as a person with a mental health issue affected your career?
67. How has it affected your career?
68. If they knew you suffered from a mental health disorder, how do you think that your team members/co-workers would react?
69. Describe the circumstances of the badly handled or unsupportive response.
70. Have you observed or experienced supportive or well handled response to a mental health issue in your current or previous workplace?
71. Describe the circumstances of the supportive or well handled response.
72. Overall, how well do you think the tech industry supports employees with mental health issues?
73. Briefly describe what you think the industry as a whole and/or employers could do to improve mental health support for employees.
74. If there is anything else you would like to tell us that has not been covered by the survey questions, please use this space to do so.
75. Would you be willing to talk to one of us more extensively about your experiences with mental health issues in the tech industry? (Note that all interview responses would be used _anonymously_ and only with your permission.)
76. What is your race?
77. Do you think that discussing a physical health issue with your employer would have negative consequences?
78. Do you feel that your employer takes mental health as seriously as physical health?
79. If you have a mental health condition, do you feel that it interferes with your work?
80. Do you work remotely (outside of an office) at least 50% of the time?
81. Do you know the options for mental health care your employer provides?
82. Has your employer ever discussed mental health as part of an employee wellness program?
83. Does your employer provide resources to learn more about mental health issues and how to seek help?
84. How easy is it for you to take medical leave for a mental health condition?
85. Do you think that discussing a mental health issue with your employer would have negative consequences?
86. Would you be willing to discuss a mental health issue with your coworkers?
87. Would you be willing to discuss a mental health issue with your direct supervisor(s)?
88. Would you bring up a physical health issue with a potential employer in an interview?
89. Have you heard of or observed negative consequences for coworkers with mental health conditions in your workplace?
90. Any additional notes or comments
91. Do you think that discussing a mental health disorder with your employer would have negative consequences?
92. Have you heard of or observed negative consequences for co-workers who have been open about mental health issues in your workplace?
93. If you have revealed a mental health issue to a client or business contact, do you believe this has impacted you negatively?
94. If you have revealed a mental health issue to a coworker or employee, do you believe this has impacted you negatively?
95. Do you think that discussing a mental health disorder with previous employers would have negative consequences?
96. Do you think that discussing a physical health issue with previous employers would have negative consequences?
97. Would you have been willing to discuss a mental health issue with your previous co-workers?
98. Did you feel that your previous employers took mental health as seriously as physical health?
99. Did you hear of or observe negative consequences for co-workers with mental health issues in your previous workplaces?
100. Do you feel that being identified as a person with a mental health issue would hurt your career?
101. Do you think that team members/co-workers would view you more negatively if they knew you suffered from a mental health issue?
102. If yes, what condition(s) have you been diagnosed with?
103. If maybe, what condition(s) do you believe you have?
104. Which of the following best describes your work position?
105. Do you work remotely?


# Query for variations of the term "mental health"
mental_health_variations_query = """
SELECT SurveyID, COUNT(DISTINCT UserID) as Count 
FROM Answer 
WHERE (AnswerText LIKE '%mental health%' OR AnswerText LIKE '%mental well-being%' OR AnswerText LIKE '%psychological health%') 
  AND AnswerText != '-1'
GROUP BY SurveyID;
"""
mental_health_variations_df = pd.read_sql_query(mental_health_variations_query, conn)

# Query for related topics
related_topics_query = """
SELECT SurveyID, COUNT(DISTINCT UserID) as Count 
FROM Answer 
WHERE (AnswerText LIKE '%stress%' OR AnswerText LIKE '%wellness%' OR AnswerText LIKE '%burnout%') 
  AND AnswerText != '1'
GROUP BY SurveyID;
"""
related_topics_df = pd.read_sql_query(related_topics_query, conn)

# Merging related queries
df = pd.merge(mental_health_variations_df, related_topics_df, on='SurveyID', suffixes=('_mental_health', '_related_topics'))

# Total participants
total_participants_query = """
SELECT SurveyID, COUNT(DISTINCT UserID) as TotalParticipants
FROM Answer 
WHERE AnswerText != '1'
GROUP BY SurveyID;
"""
total_participants_df = pd.read_sql_query(total_participants_query, conn)

# After merging, calculating the normalized count for both mental health and related topics
df = pd.merge(df, total_participants_df, on='SurveyID')

df['NormalizedCount_MentalHealth'] = df['Count_mental_health'] / df['TotalParticipants'] * 100
df['NormalizedCount_RelatedTopics'] = df['Count_related_topics'] / df['TotalParticipants'] * 100

fig = go.Figure()

fig.add_trace(go.Scatter(x=df['SurveyID'], y=df['NormalizedCount_MentalHealth'], name='Mental Health'))
fig.add_trace(go.Scatter(x=df['SurveyID'], y=df['NormalizedCount_RelatedTopics'], name='Related Topics'))

fig.update_layout(
    title='Normalized Worker Perspectives Over Time',
    xaxis_title='Survey Year', 
    yaxis_title='Normalized Count (%)',
    title_x = 0.5)

fig.show()


query = """
SELECT SurveyID, AnswerText as Age
FROM Answer
WHERE QuestionID = 1 and AnswerText != '-1'
ORDER BY SurveyID;
"""
df_age = pd.read_sql_query(query, conn)

df_age['Age'] = df_age['Age'].astype(float)

df_age_avg = df_age.groupby('SurveyID').mean()

fig = px.line(df_age_avg, y='Age', title='Average Age Over Time')
fig.update_layout(
    xaxis_title='Survey Year', 
    yaxis_title='Average Age',
    title_x=0.5)

fig.show()


query = """
SELECT SurveyID, AnswerText as Gender
FROM Answer
WHERE QuestionID = 2
ORDER BY SurveyID;
"""

# Grouping based on biologically accepted genders and else

answer_df = pd.read_sql_query(query, conn)

answer_df['Gender'] = answer_df['Gender'].str.lower()

answer_df['Gender'] = answer_df['Gender'].apply(lambda x: x if x in ['male', 'female'] else 'other')

answer_df_grouped = answer_df.groupby(['SurveyID', 'Gender']).size().reset_index(name='Count')

# Getting percentages for the visualizations
total_count = answer_df_grouped.groupby('SurveyID')['Count'].transform('sum')
answer_df_grouped['Percentage'] = (answer_df_grouped['Count'] / total_count) * 100

fig = go.Figure()

for gender in answer_df_grouped['Gender'].unique():
    df_subset = answer_df_grouped[answer_df_grouped['Gender'] == gender]
    fig.add_trace(go.Bar(
        x=df_subset['SurveyID'],
        y=df_subset['Percentage'],
        name=gender,
        text=df_subset['Percentage'].round(1).astype(str) + '%',
        textposition='auto',
    ))

fig.update_layout(
    barmode='group', 
    xaxis_title='Year', 
    yaxis_title='Percentage',
    title='Gender Distribution Over Time',
    title_x=0.5,
    legend_title_text='Gender')

fig.show()


query = """
SELECT SurveyID, AnswerText as Size, COUNT(*) as Occurrences
FROM Answer
WHERE QuestionID = 8 and AnswerText != '-1'
GROUP BY SurveyID, AnswerText
ORDER BY SurveyID, AnswerText
"""
df_size = pd.read_sql_query(query, conn)

# Calculate total occurrences for each survey year
total_occurrences = df_size.groupby('SurveyID')['Occurrences'].transform('sum')

# Calculate percentages for each survey year
df_size['Percentage'] = (df_size['Occurrences'] / total_occurrences) * 100

fig = px.bar(df_size, x='SurveyID', y='Percentage', color='Size',
             labels={'Percentage': '% of Respondents', 'SurveyID': 'Survey Year'},
             title='Company Size Distribution Over Time',
             barmode='group',
             text=df_size['Percentage'].round(1).astype(str) + '%')

fig.update_layout(
    title_x=0.5,
    legend_title_text='Company Size'
)

fig.show()


# Company size
company_size_query = """
SELECT S.SurveyID, A1.AnswerText as CompanySize
FROM Survey as S
JOIN Answer as A1 
ON S.SurveyID = A1.SurveyID AND A1.QuestionID = 8
WHERE A1.AnswerText != '-1'
ORDER BY S.SurveyID;
"""
df_company_size = pd.read_sql_query(company_size_query, conn)

# Attitudes Towards Mental Health
attitudes_query = """
SELECT S.SurveyID, COUNT(DISTINCT A2.UserID) as AttitudesCount
FROM Survey as S
JOIN Answer as A2 
ON S.SurveyID = A2.SurveyID
WHERE (A2.AnswerText LIKE '%mental health%' OR A2.AnswerText LIKE '%mental well-being%' OR A2.AnswerText LIKE '%psychological health%') 
  AND A2.AnswerText != '-1'
GROUP BY S.SurveyID;
"""
df_attitudes = pd.read_sql_query(attitudes_query, conn)

df_merged = pd.merge(df_company_size, df_attitudes, on='SurveyID')

group_stats = df_merged.groupby('CompanySize')['AttitudesCount'].mean().sort_values()

fig = px.box(df_merged, x='CompanySize', y='AttitudesCount', category_orders={"CompanySize": group_stats.index.tolist()})

fig.update_layout(
    title='Box Plot of AttitudesCount Across Company Sizes',
    xaxis_title='Company Size',
    yaxis_title='Attitudes Count',
    title_x=0.5
)

fig.show()


query = """
SELECT
    SurveyID,
    CASE
        WHEN AnswerText IN ("Don't know", "I don't know", "No", "Not eligible for coverage / NA") THEN 'No'
        ELSE AnswerText
    END AS GroupedAnswer,
    COUNT(*) as ResponseCount
FROM Answer
WHERE QuestionID = 10 AND AnswerText != '-1'
GROUP BY SurveyID, GroupedAnswer
ORDER BY SurveyID, GroupedAnswer;
"""
df_resources = pd.read_sql_query(query, conn)

# Percentage calculation
total_counts = df_resources.groupby('SurveyID')['ResponseCount'].transform('sum')
df_resources['Percentage'] = (df_resources['ResponseCount'] / total_counts) * 100

# Grouped bar chart
fig = px.bar(df_resources, x='SurveyID', y='ResponseCount', color='GroupedAnswer',
             labels={'ResponseCount': 'Response Count', 'SurveyID': 'Survey Year'},
             title='Availability of mental resources',
             barmode='group', 
             text=df_resources['Percentage'].round(1).astype(str) + '%'
             ) 

fig.update_layout(
    legend_title_text='Answer',
    title_x=0.5
)

fig.show()


query = """
SELECT
    SurveyID,
    CASE
        WHEN AnswerText IN ("No", "Not eligible for coverage / NA") THEN 'No'
        WHEN AnswerText IN ("Don't know", "I don't know") THEN 'Maybe'
        ELSE AnswerText
    END AS GroupedAnswer,
    COUNT(*) as ResponseCount
FROM Answer
WHERE QuestionID = 10 AND AnswerText != '-1'
GROUP BY SurveyID, GroupedAnswer
ORDER BY SurveyID, GroupedAnswer;
"""
df_resources1 = pd.read_sql_query(query, conn)

# Percentage calculation
total_counts1 = df_resources1.groupby('SurveyID')['ResponseCount'].transform('sum')
df_resources1['Percentage'] = (df_resources1['ResponseCount'] / total_counts1) * 100

fig = px.bar(df_resources1, x='SurveyID', y='ResponseCount', color='GroupedAnswer',
             labels={'ResponseCount': 'Response Count', 'SurveyID': 'Survey Year'},
             title='Availability of mental resources with no assumptions',
             barmode='group', 
             text=df_resources1['Percentage'].round(1).astype(str) + '%'
             ) 

fig.update_layout(
    legend_title_text='Answer',
    title_x=0.5
)

fig.show()


query = """
SELECT
    SurveyID,
    CASE
        WHEN LOWER(AnswerText) = "don't know" OR LOWER(AnswerText) = 'possibly' THEN 'Maybe'
        ELSE AnswerText
    END as MentalHealthDisorder,
    COUNT(*) as DisorderCount
FROM 
    Answer
WHERE 
    QuestionID IN (32, 33, 34)
    AND AnswerText != '-1'
GROUP BY
    SurveyID, 
    MentalHealthDisorder
ORDER BY
    SurveyID,
    MentalHealthDisorder;

"""
df_disorders = pd.read_sql_query(query, conn)

# Percentage calculation
total_counts = df_disorders.groupby('SurveyID')['DisorderCount'].transform('sum')
df_disorders['Percentage'] = (df_disorders['DisorderCount'] / total_counts) * 100

fig = px.bar(df_disorders, 
             x='SurveyID', 
             y='DisorderCount', 
             color='MentalHealthDisorder',
             labels={'DisorderCount': 'Reported Disorders Count', 'SurveyID': 'Survey Year'},
             title='Variations in Reported Mental Health Disorders Across Survey Years',
             barmode='group',
             text=df_disorders['Percentage'].round(1).astype(str) + '%')

fig.update_layout(
    legend_title_text='Answer',
    title_x = 0.5
)
fig.show()


# Merge the dataframes
merged_df = pd.merge(df_disorders, df_resources, on='SurveyID', suffixes=('_disorders', '_resources'))

merged_df['Percentage_disorders'] = (merged_df['DisorderCount'] / merged_df.groupby('SurveyID')['DisorderCount'].transform('sum')) * 100
merged_df['Percentage_resources'] = (merged_df['ResponseCount'] / merged_df.groupby('SurveyID')['ResponseCount'].transform('sum')) * 100

correlation_variables = merged_df[['Percentage_disorders', 'Percentage_resources']]

correlation_matrix = correlation_variables.corr()
print(correlation_matrix)

plt.figure(figsize=(6, 4))
sns.heatmap(correlation_matrix, annot=True, cmap='coolwarm', fmt='.2f', linewidths=.5)
plt.title('Correlation Matrix: Mental Health Disorders vs. Availability of Resources')
plt.show()

                      Percentage_disorders  Percentage_resources
Percentage_disorders          1.000000e+00         -2.025954e-17
Percentage_resources         -2.025954e-17          1.000000e+00


stop = set(stopwords.words('english'))
lemma = WordNetLemmatizer()
texts = question_df['questiontext'].apply(lambda x: [lemma.lemmatize(word) for word in x.lower().split() if word not in stop])

dictionary = corpora.Dictionary(texts)
doc_term_matrix = [dictionary.doc2bow(text) for text in texts]


lda = gensim.models.ldamodel.LdaModel(doc_term_matrix, num_topics=5, id2word = dictionary, passes=50)

topics = lda.print_topics()
for topic in topics:
    print(topic[0])
    word_weight_pairs = topic[1].split(' + ')
    for pair in word_weight_pairs[:5]:
        print(pair)
    print('\n')

0
0.104*"health"
0.103*"mental"
0.037*"ever"
0.029*"employer"
0.025*"previous"


1
0.059*"mental"
0.054*"health"
0.052*"would"
0.040*"issue"
0.034*"willing"


2
0.073*"health"
0.066*"would"
0.054*"mental"
0.053*"employer"
0.041*"think"


3
0.078*"mental"
0.067*"health"
0.046*"describe"
0.034*"previous"
0.031*"conversation"


4
0.040*"health?"
0.040*"employer"
0.040*"overall,"
0.040*"much"
0.040*"place"

Exploring Mental Health in the Tech Industry¶

Project Introduction¶

Research Objectives¶

Analyze the Structure of Survey Questions¶

Understand Worker Perspectives on Mental Health¶

Explore the Frequency of Mental Health Disorders among Tech Professionals¶

Assess Survey Comprehensiveness¶

Hypotheses¶

EDA Questions¶

Data Loading and Inspection:¶

Survey DataFrame¶

Question DataFrame¶

Answer DataFrame¶

Analyze the Structure of Survey Questions¶

Understand Worker Perspectives on Mental Health¶

Summary of Worker Perspectives on Mental Health analysis¶

Explore the Frequency of Mental Health Disorders among Tech Professionals¶

Summary of the Frequency of Mental Health Disorders among Tech Professionals¶

Assess Survey Comprehensiveness¶

Project Summary¶

Research Objectives¶

Analyzing the Structure of Survey Questions¶

Understanding Worker Perspectives on Mental Health¶

Exploring the Frequency of Mental Health Disorders among Tech Professionals¶

Assessing Survey Comprehensiveness¶

Hypotheses¶

EDA Questions¶

Findings¶

Worker Perspectives on Mental Health¶

Frequency of Mental Health Disorders¶

Survey Comprehensiveness¶

Future Improvements¶