Skip to content

Commit

Permalink
Merge pull request #11 from scipy-conference/feat/make-runnable
Browse files Browse the repository at this point in the history
feat: Make notebooks runnable on previous data
  • Loading branch information
guenp authored Apr 17, 2024
2 parents 77cee6a + 03ff4ca commit 6647435
Show file tree
Hide file tree
Showing 7 changed files with 74 additions and 30 deletions.
2 changes: 2 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -10,3 +10,5 @@ venv/
*.ipynb
# pixi environments
.pixi

data/
31 changes: 31 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,37 @@ The checks and tests are all run using Github actions on every pull request and

This repository is setup for Python 3.11. To customize that, change the `VARIANT` argument in `.devcontainer/devcontainer.json`, change the config options in `.precommit-config.yaml` and change the version number in `.github/workflows/python.yaml`.

## Assign Reviewers

First download the following files from Pretalx into the `data/` directory:

* `scipy_reviewers.csv` # people who signed up as reviewers
* `sessions.csv` # all proposal exported from pretalx
* `speakers.csv` # all speakers exported from pretalx
* `pretalx_reviewers.csv` # all reviewers copy-pasted from pretalx
* `scipy_coi_export.csv` # all responses to the coi form
* `coi_authors.csv` # copy pasted values of author names from coi form
* `tracks.csv` # manually entered track IDs

Then run the notebooks as Python files in the following order with `pixi`

```
$ pixi run pre-processing
$ pixi run assignments
```

or run the notebooks manually as Jupyter notebooks either by asking for a JupyterLab instance

```
$ pixi run jupyter lab
```

or just getting a shell

```
$ pixi shell
```

## Development instructions

## With devcontainer
Expand Down
9 changes: 5 additions & 4 deletions assign_reviews.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@
####################
# Imports
import json
from pathlib import Path

import numpy as np
from scipy.optimize import Bounds, LinearConstraint, milp
Expand Down Expand Up @@ -120,7 +121,7 @@ def solve_milp(
############################
## FORMAT AND OUTPUT DATA ##
############################
def format_and_output_result(df_reviewers, df_submissions, solution, post_fix=""):
def format_and_output_result(df_reviewers, df_submissions, solution, post_fix="", output_dir=Path.cwd() / "output"):
reviewers = df_reviewers.to_dict("records")
submissions = df_submissions.to_dict("records")

Expand All @@ -140,20 +141,20 @@ def format_and_output_result(df_reviewers, df_submissions, solution, post_fix=""
if DEBUG:
result = {reviewer["reviewer_id"]: sorted(reviewer["is_tutorial"]) for reviewer in reviewers}

with open(f"output/review-assignments-debug{post_fix}.json", "w") as fp:
with open(output_dir / f"review-assignments-debug{post_fix}.json", "w") as fp:
fp.write(json.dumps(result, indent=4))

result = {reviewer["reviewer_id"]: reviewer["assigned_submission_ids"] for reviewer in reviewers}

with open(f"output/review-assignments{post_fix}.json", "w") as fp:
with open(output_dir / f"review-assignments{post_fix}.json", "w") as fp:
fp.write(json.dumps(result, indent=4))

for submission, assignments in zip(submissions, solution.T):
submission["assigned_reviewer_ids"] = df_reviewers.reviewer_id[assignments].values.tolist()

result = {submission["submission_id"]: submission["assigned_reviewer_ids"] for submission in submissions}

with open(f"output/submission-assignments{post_fix}.json", "w") as fp:
with open(output_dir / f"submission-assignments{post_fix}.json", "w") as fp:
fp.write(json.dumps(result, indent=4))

return reviewers, submissions
Empty file added data/.gitkeep
Empty file.
31 changes: 17 additions & 14 deletions notebooks/pre-processing.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,29 +19,34 @@
# name: python
# nbconvert_exporter: python
# pygments_lexer: ipython3
# version: 3.12.1
# version: 3.12.2
# ---

# %%
from pathlib import Path

import duckdb
from IPython import display

# %%
data_dir = Path.cwd() / ".." / "data"

# Raw data to import
raw_files = dict(
scipy_reviewers="../data/scipy_reviewers.csv", # people who signed up as reviewers
pretalx_sessions="../data/sessions.csv", # all proposal exported from pretalx
pretalx_speakers="../data/speakers.csv", # all speakers exported from pretalx
pretalx_reviewers="../data/pretalx_reviewers.csv", # all reviewers copy-pasted from pretalx
coi_reviewers="../data/scipy_coi_export.csv", # all responses to the coi form
coi_authors="../data/coi_authors.csv", # copy pasted values of author names from coi form
tracks="../data/tracks.csv", # manually entered track IDs
scipy_reviewers=data_dir / "scipy_reviewers.csv", # people who signed up as reviewers
pretalx_sessions=data_dir / "sessions.csv", # all proposal exported from pretalx
pretalx_speakers=data_dir / "speakers.csv", # all speakers exported from pretalx
pretalx_reviewers=data_dir / "pretalx_reviewers.csv", # all reviewers copy-pasted from pretalx
coi_reviewers=data_dir / "scipy_coi_export.csv", # all responses to the coi form
coi_authors=data_dir / "coi_authors.csv", # copy pasted values of author names from coi form
tracks=data_dir / "tracks.csv", # manually entered track IDs
)

# Output
database_file = "../data/assign_reviews.db"
database_file = data_dir / "assign_reviews.db"

# %%
con = duckdb.connect(database_file)
con = duckdb.connect(str(database_file))


# %%
Expand Down Expand Up @@ -141,7 +146,7 @@ def create_and_show_table(file_name, table_name, show=True):
# Reviewers who signed up for pretalx but did not fill in COI

# %%
con = duckdb.connect(database_file)
con = duckdb.connect(str(database_file))

# %%
df = con.sql(
Expand Down Expand Up @@ -359,7 +364,7 @@ def create_and_show_table(file_name, table_name, show=True):
con.sql("table reviewers_with_tracks").df()

# %%
con.sql("select email as reviewer_id, list(track_id) as tracks from reviewers_with_tracks group by email")
con.sql("select email as reviewer_id, list(track_ids) as tracks from reviewers_with_tracks group by email")

# %% [markdown]
# # Final tables for script
Expand Down Expand Up @@ -411,5 +416,3 @@ def create_and_show_table(file_name, table_name, show=True):

# %%
con.close()

# %%
29 changes: 17 additions & 12 deletions notebooks/run-assignments.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,16 +19,16 @@
# name: python
# nbconvert_exporter: python
# pygments_lexer: ipython3
# version: 3.12.1
# version: 3.12.2
# ---

# %%
####################
## ASSIGN REVIEWS ##
####################
# Imports
import json
import sys
from pathlib import Path

import duckdb
import pandas as pd
Expand All @@ -40,16 +40,18 @@
# # Start script

# %%
# mkdir output
data_dir = Path().cwd() / ".." / "data"
output_dir = Path().cwd() / ".." / "output"
output_dir.mkdir(exist_ok=True)

# %%
ASSIGN_TUTORIALS_TO_ANYONE = False
TUTORIAL_COEFF = 0.8

DEBUG = True

database_file = "../data/assign_reviews.db"
con = duckdb.connect(database_file)
database_file = data_dir / "assign_reviews.db"
con = duckdb.connect(str(database_file))
df_submissions = con.sql("table submissions_to_assign").df()
df_reviewers = con.sql("table reviewers_to_assign").df()

Expand Down Expand Up @@ -82,7 +84,9 @@
TUTORIAL_COEFF,
ASSIGN_TUTORIALS_TO_ANYONE,
)
reviewers, submissions = format_and_output_result(df_reviewers, df_submissions_tutorials, solution, post_fix="00")
reviewers, submissions = format_and_output_result(
df_reviewers, df_submissions_tutorials, solution, post_fix="00", output_dir=output_dir
)

# %%
df = pd.DataFrame(reviewers)
Expand Down Expand Up @@ -141,7 +145,7 @@
)
if solution is not None:
reviewers, submissions = format_and_output_result(
df_reviewers_no_submissions, df_submissions_no_tutorials, solution, post_fix="01"
df_reviewers_no_submissions, df_submissions_no_tutorials, solution, post_fix="01", output_dir=output_dir
)

# %%
Expand Down Expand Up @@ -217,7 +221,7 @@

if solution is not None:
reviewers, submissions = format_and_output_result(
df_reviewers_only_tut, df_submissions_few_reviewers, solution, post_fix="02"
df_reviewers_only_tut, df_submissions_few_reviewers, solution, post_fix="02", output_dir=output_dir
)

# %%
Expand Down Expand Up @@ -314,17 +318,18 @@
# ## Final export

# %%
database_file = "../data/assign_reviews.db"
con = duckdb.connect(database_file)
database_file = data_dir / "assign_reviews.db"
con = duckdb.connect(str(database_file))

# %%
reviewer_assignments_final = {
item["reviewer_id"]: item["assigned_submission_ids"]
item["reviewer_id"]: item["assigned_submission_ids"].tolist()
for item in con.sql("table reviewer_assignments_02")
.df()[["reviewer_id", "assigned_submission_ids"]]
.to_dict("records")
}
with open("output/reviewer-assignments.json", "w") as fp:

with open(output_dir / "reviewer-assignments.json", "w") as fp:
fp.write(json.dumps(reviewer_assignments_final, indent=4))

# %%
Expand Down
2 changes: 2 additions & 0 deletions pixi.toml
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,8 @@ channels = ["conda-forge"]
platforms = ["linux-64", "osx-64", "osx-arm64"]

[tasks]
pre-processing = "cd notebooks && python pre-processing.py"
assignments = "cd notebooks && python run-assignments.py"

[dependencies]
python = "3.12.*"
Expand Down

0 comments on commit 6647435

Please sign in to comment.