marriage_related = [[list(i.values()) for i in df.loc[df['survey_id'] == j]['marriage_related'].explode()] for j in df['survey_id']] # can't really cast this to an array as it would be useless # the number of surveys collected for each survey id is different, so that would lead to shape inconsistencies # so we leave it as a list # doing the same for the rest: work_related = [[list(i.values()) for i in df.loc[df['survey_id'] == j]['work_related'].explode()] for j in df['survey_id']] education_related = [[list(i.values()) for i in df.loc[df['survey_id'] == j]['education_related'].explode()] for j in df['survey_id']] money_related = [[list(i.values()) for i in df.loc[df['survey_id'] == j]['money_related'].explode()] for j in df['survey_id']] health_related = [[list(i.values()) for i in df.loc[df['survey_id'] == j]['health_related'].explode()] for j in df['survey_id']] age_related = [[list(i.values()) for i in df.loc[df['survey_id'] == j]['age_related'].explode()] for j in df['survey_id']] # check if all are correctly sorted for each survey id: len(marriage_related) == len(work_related) == len(education_related) == len(money_related) == len(health_related) == len(age_related) == len(df)
Hosted onDeepnote