Skip to content

Commit

Permalink
Merge pull request #2838 from fedspendingtransparency/qat
Browse files Browse the repository at this point in the history
Sprint 118 Production Deploy
  • Loading branch information
tony-sappe authored Nov 2, 2020
2 parents c5464fa + 5f707e9 commit 50c1c15
Show file tree
Hide file tree
Showing 41 changed files with 2,846 additions and 1,394 deletions.
2 changes: 2 additions & 0 deletions usaspending_api/common/helpers/generic_helper.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,7 @@
OVERLAY_VIEWS,
DEPENDENCY_FILEPATH,
MATERIALIZED_VIEWS,
CHUNKED_MATERIALIZED_VIEWS,
MATVIEW_GENERATOR_FILE,
DEFAULT_MATIVEW_DIR,
)
Expand All @@ -22,6 +23,7 @@

logger = logging.getLogger(__name__)
TEMP_SQL_FILES = [DEFAULT_MATIVEW_DIR / val["sql_filename"] for val in MATERIALIZED_VIEWS.values()]
TEMP_SQL_FILES += [DEFAULT_MATIVEW_DIR / val["sql_filename"] for val in CHUNKED_MATERIALIZED_VIEWS.values()]


def read_text_file(filepath):
Expand Down
47 changes: 45 additions & 2 deletions usaspending_api/common/management/commands/matview_runner.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,17 +3,21 @@
import psycopg2
import subprocess

from django.core.management import call_command
from django.core.management.base import BaseCommand
from pathlib import Path

from usaspending_api.common.data_connectors.async_sql_query import async_run_creates
from usaspending_api.common.helpers.timing_helpers import ConsoleTimer as Timer
from usaspending_api.common.matview_manager import (
CHUNKED_MATERIALIZED_VIEWS,
DEFAULT_MATIVEW_DIR,
DEFAULT_CHUNKED_MATIVEW_DIR,
DEPENDENCY_FILEPATH,
DROP_OLD_MATVIEWS,
MATERIALIZED_VIEWS,
MATVIEW_GENERATOR_FILE,
CHUNKED_MATVIEW_GENERATOR_FILE,
OVERLAY_VIEWS,
)
from usaspending_api.common.helpers.sql_helpers import get_database_dsn_string
Expand All @@ -27,12 +31,15 @@ class Command(BaseCommand):

def faux_init(self, args):
self.matviews = MATERIALIZED_VIEWS
self.chunked_matviews = CHUNKED_MATERIALIZED_VIEWS
if args["only"]:
self.matviews = {args["only"]: MATERIALIZED_VIEWS[args["only"]]}
self.matview_dir = args["temp_dir"]
self.matview_chunked_dir = args["temp_chunked_dir"]
self.no_cleanup = args["leave_sql"]
self.remove_matviews = not args["leave_old"]
self.run_dependencies = args["dependencies"]
self.chunk_count = args["chunk_count"]

def add_arguments(self, parser):
parser.add_argument("--only", choices=list(MATERIALIZED_VIEWS.keys()))
Expand All @@ -52,9 +59,18 @@ def add_arguments(self, parser):
help="Choose a non-default directory to store materialized view SQL files.",
default=DEFAULT_MATIVEW_DIR,
)
parser.add_argument(
"--temp-chunked-dir",
type=Path,
help="Choose a non-default directory to store materialized view SQL files.",
default=DEFAULT_CHUNKED_MATIVEW_DIR,
)
parser.add_argument(
"--dependencies", action="store_true", help="Run the SQL dependencies before the materialized view SQL."
)
parser.add_argument(
"--chunk-count", default=10, help="Number of chunks to split chunked matviews into", type=int
)

def handle(self, *args, **options):
"""Overloaded Command Entrypoint"""
Expand All @@ -74,25 +90,52 @@ def generate_matview_sql(self):
recursive_delete(self.matview_dir)
self.matview_dir.mkdir()

if self.matview_chunked_dir.exists():
logger.warning("Clearing dir {}".format(self.matview_chunked_dir))
recursive_delete(self.matview_chunked_dir)
self.matview_chunked_dir.mkdir()

# IF using this for operations, DO NOT LEAVE hardcoded `python3` in the command
exec_str = "python3 {} --quiet --dest={}/ --batch_indexes=3".format(MATVIEW_GENERATOR_FILE, self.matview_dir)
# Create main list of Matview SQL files
exec_str = f"python3 {MATVIEW_GENERATOR_FILE} --quiet --dest={self.matview_dir}/ --batch_indexes=3"
subprocess.call(exec_str, shell=True)

# Create SQL files for Chunked Universal Transaction Matviews
for matview, config in self.chunked_matviews.items():
exec_str = f"python3 {CHUNKED_MATVIEW_GENERATOR_FILE} --quiet --file {config['json_filepath']} --chunk-count {self.chunk_count}"
subprocess.call(exec_str, shell=True)

def cleanup(self):
"""Cleanup files after run"""
recursive_delete(self.matview_dir)

def create_views(self):
loop = asyncio.new_event_loop()
tasks = []

# Create Matviews
for matview, config in self.matviews.items():
logger.info("Creating Future for {}".format(matview))
logger.info(f"Creating Future for matview {matview}")
sql = (self.matview_dir / config["sql_filename"]).read_text()
tasks.append(asyncio.ensure_future(async_run_creates(sql, wrapper=Timer(matview)), loop=loop))

# Create Chunked Matviews
for matview, config in self.chunked_matviews.items():
for current_chunk in range(self.chunk_count):
chunked_matview = f"{matview}_{current_chunk}"
logger.info(f"Creating Future for chunked matview {chunked_matview}")
sql = (self.matview_chunked_dir / f"{chunked_matview}.sql").read_text()
tasks.append(asyncio.ensure_future(async_run_creates(sql, wrapper=Timer(chunked_matview)), loop=loop))

loop.run_until_complete(asyncio.gather(*tasks))
loop.close()

if "universal_transaction_matview" in self.chunked_matviews:
logger.info("Inserting data from universal_transaction_matview chunks into single table.")
call_command(
"combine_universal_transaction_matview_chunks", chunk_count=self.chunk_count, index_concurrency=20,
)

for view in OVERLAY_VIEWS:
run_sql(view.read_text(), "Creating Views")

Expand Down
10 changes: 9 additions & 1 deletion usaspending_api/common/matview_manager.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,9 +7,13 @@
import usaspending_api.search.models as mv

DEFAULT_MATIVEW_DIR = settings.REPO_DIR.parent / "matviews"
DEFAULT_CHUNKED_MATIVEW_DIR = settings.REPO_DIR.parent / "chunked_matviews"
DEPENDENCY_FILEPATH = settings.APP_DIR / "database_scripts" / "matviews" / "functions_and_enums.sql"
JSON_DIR = settings.APP_DIR / "database_scripts" / "matview_sql_generator"
JSON_DIR = settings.APP_DIR / "database_scripts" / "matview_generator"
MATVIEW_GENERATOR_FILE = settings.APP_DIR / "database_scripts" / "matview_generator" / "matview_sql_generator.py"
CHUNKED_MATVIEW_GENERATOR_FILE = (
settings.APP_DIR / "database_scripts" / "matview_generator" / "chunked_matview_sql_generator.py"
)
OVERLAY_VIEWS = [
settings.APP_DIR / "database_scripts" / "matviews" / "vw_award_search.sql",
settings.APP_DIR / "database_scripts" / "matviews" / "vw_es_award_search.sql",
Expand Down Expand Up @@ -113,6 +117,10 @@
"sql_filename": "tas_autocomplete_matview.sql",
},
),
]
)
CHUNKED_MATERIALIZED_VIEWS = OrderedDict(
[
(
"universal_transaction_matview",
{
Expand Down
8 changes: 7 additions & 1 deletion usaspending_api/conftest_helpers.py
Original file line number Diff line number Diff line change
Expand Up @@ -34,6 +34,12 @@ def __init__(self, index_type):
self.client = Elasticsearch([settings.ES_HOSTNAME], timeout=settings.ES_TIMEOUT)
self.template = retrieve_index_template("{}_template".format(self.index_type[:-1]))
self.mappings = json.loads(self.template)["mappings"]
self.etl_config = {
"index_name": self.index_name,
"query_alias_prefix": self.alias_prefix,
"verbose": False,
"write_alias": self.index_name + "-alias",
}

def delete_index(self):
self.client.indices.delete(self.index_name, ignore_unavailable=True)
Expand All @@ -46,7 +52,7 @@ def update_index(self, **options):
"""
self.delete_index()
self.client.indices.create(index=self.index_name, body=self.template)
create_aliases(self.client, self.index_name, self.index_type, True)
create_aliases(self.client, self.etl_config)
self._add_contents(**options)

def _add_contents(self, **options):
Expand Down
44 changes: 22 additions & 22 deletions usaspending_api/data/dabs_submission_window_schedule.csv
Original file line number Diff line number Diff line change
@@ -1,23 +1,23 @@
id,period_start_date,period_end_date,submission_start_date,certification_due_date,submission_due_date,submission_reveal_date,submission_fiscal_year,submission_fiscal_quarter,submission_fiscal_month,is_quarter
2017031,2016-10-01 00:00:00Z,2016-12-31 00:00:00Z,2017-01-19 00:00:00Z,2017-02-19 00:00:00Z,2017-02-19 00:00:00Z,2017-02-20 00:00:00Z,2017,1,3,True
2017061,2017-01-01 00:00:00Z,2017-03-31 00:00:00Z,2017-04-19 00:00:00Z,2017-05-19 00:00:00Z,2017-05-19 00:00:00Z,2017-05-20 00:00:00Z,2017,2,6,True
2017091,2017-04-01 00:00:00Z,2017-06-30 00:00:00Z,2017-07-19 00:00:00Z,2017-08-14 00:00:00Z,2017-08-14 00:00:00Z,2017-08-15 00:00:00Z,2017,3,9,True
2017121,2017-07-01 00:00:00Z,2017-09-30 00:00:00Z,2017-10-06 00:00:00Z,2017-11-30 00:00:00Z,2017-11-30 00:00:00Z,2017-12-01 00:00:00Z,2017,4,12,True
2018031,2017-10-01 00:00:00Z,2017-12-31 00:00:00Z,2018-01-19 00:00:00Z,2018-02-14 00:00:00Z,2018-02-14 00:00:00Z,2018-02-15 00:00:00Z,2018,1,3,True
2018061,2018-01-01 00:00:00Z,2018-03-31 00:00:00Z,2018-04-19 00:00:00Z,2018-05-15 00:00:00Z,2018-05-15 00:00:00Z,2018-05-16 00:00:00Z,2018,2,6,True
2018091,2018-04-01 00:00:00Z,2018-06-30 00:00:00Z,2018-07-19 00:00:00Z,2018-08-14 00:00:00Z,2018-08-14 00:00:00Z,2018-08-15 00:00:00Z,2018,3,9,True
2018121,2018-07-01 00:00:00Z,2018-09-30 00:00:00Z,2018-10-19 00:00:00Z,2018-11-14 00:00:00Z,2018-11-14 00:00:00Z,2018-11-15 00:00:00Z,2018,4,12,True
2019031,2018-10-01 00:00:00Z,2018-12-31 00:00:00Z,2019-02-21 00:00:00Z,2019-03-20 00:00:00Z,2019-03-20 00:00:00Z,2019-03-21 00:00:00Z,2019,1,3,True
2019061,2019-01-01 00:00:00Z,2019-03-31 00:00:00Z,2019-04-19 00:00:00Z,2019-05-15 00:00:00Z,2019-05-15 00:00:00Z,2019-05-16 00:00:00Z,2019,2,6,True
2019091,2019-04-01 00:00:00Z,2019-06-30 00:00:00Z,2019-07-19 00:00:00Z,2019-08-14 00:00:00Z,2019-08-14 00:00:00Z,2019-08-15 00:00:00Z,2019,3,9,True
2019121,2019-07-01 00:00:00Z,2019-09-30 00:00:00Z,2019-10-18 00:00:00Z,2019-11-14 00:00:00Z,2019-11-14 00:00:00Z,2019-11-15 00:00:00Z,2019,4,12,True
2020031,2019-10-01 00:00:00Z,2019-12-31 00:00:00Z,2020-01-17 00:00:00Z,2020-02-14 00:00:00Z,2020-02-14 00:00:00Z,2020-02-15 00:00:00Z,2020,1,3,True
2020061,2020-01-01 00:00:00Z,2020-03-31 00:00:00Z,2020-04-17 00:00:00Z,2020-05-15 00:00:00Z,2020-05-15 00:00:00Z,2020-05-16 00:00:00Z,2020,2,6,True
2020091,2020-04-01 00:00:00Z,2020-06-30 00:00:00Z,2020-07-17 00:00:00Z,2020-08-14 00:00:00Z,2020-07-30 00:00:00Z,2020-07-31 00:00:00Z,2020,3,9,True
2020121,2020-07-01 00:00:00Z,2020-09-30 00:00:00Z,2020-10-19 00:00:00Z,2020-11-16 00:00:00Z,2020-11-16 00:00:00Z,2020-11-17 00:00:00Z,2020,4,12,True
2020070,2020-04-01 00:00:00Z,2020-04-30 00:00:00Z,2020-07-17 00:00:00Z,2020-08-14 00:00:00Z,2020-07-30 00:00:00Z,2020-07-31 00:00:00Z,2020,3,7,False
2020080,2020-05-01 00:00:00Z,2020-05-31 00:00:00Z,2020-07-17 00:00:00Z,2020-08-14 00:00:00Z,2020-07-30 00:00:00Z,2020-07-31 00:00:00Z,2020,3,8,False
2020090,2020-06-01 00:00:00Z,2020-06-30 00:00:00Z,2020-07-17 00:00:00Z,2020-08-14 00:00:00Z,2020-07-30 00:00:00Z,2020-07-31 00:00:00Z,2020,3,9,False
2020100,2020-07-01 00:00:00Z,2020-07-31 00:00:00Z,2020-08-19 00:00:00Z,2020-11-16 00:00:00Z,2020-08-28 00:00:00Z,2020-08-29 00:00:00Z,2020,4,10,False
2020110,2020-08-01 00:00:00Z,2020-08-31 00:00:00Z,2020-09-18 00:00:00Z,2020-11-16 00:00:00Z,2020-09-29 00:00:00Z,2020-09-30 00:00:00Z,2020,4,11,False
2020120,2020-09-01 00:00:00Z,2020-09-30 00:00:00Z,2020-10-19 00:00:00Z,2020-11-16 00:00:00Z,2020-11-16 00:00:00Z,2020-11-17 00:00:00Z,2020,4,12,False
2017031,2016-10-01 00:00:00Z,2016-12-31 00:00:00Z,2017-01-19 00:00:00Z,2017-02-19 00:00:00Z,2017-02-19 00:00:00Z,2017-02-19 00:00:00Z,2017,1,3,True
2017061,2017-01-01 00:00:00Z,2017-03-31 00:00:00Z,2017-04-19 00:00:00Z,2017-05-19 00:00:00Z,2017-05-19 00:00:00Z,2017-05-19 00:00:00Z,2017,2,6,True
2017091,2017-04-01 00:00:00Z,2017-06-30 00:00:00Z,2017-07-19 00:00:00Z,2017-08-14 00:00:00Z,2017-08-14 00:00:00Z,2017-08-14 00:00:00Z,2017,3,9,True
2017121,2017-07-01 00:00:00Z,2017-09-30 00:00:00Z,2017-10-06 00:00:00Z,2017-11-30 00:00:00Z,2017-11-30 00:00:00Z,2017-11-30 00:00:00Z,2017,4,12,True
2018031,2017-10-01 00:00:00Z,2017-12-31 00:00:00Z,2018-01-19 00:00:00Z,2018-02-14 00:00:00Z,2018-02-14 00:00:00Z,2018-02-14 00:00:00Z,2018,1,3,True
2018061,2018-01-01 00:00:00Z,2018-03-31 00:00:00Z,2018-04-19 00:00:00Z,2018-05-15 00:00:00Z,2018-05-15 00:00:00Z,2018-05-15 00:00:00Z,2018,2,6,True
2018091,2018-04-01 00:00:00Z,2018-06-30 00:00:00Z,2018-07-19 00:00:00Z,2018-08-14 00:00:00Z,2018-08-14 00:00:00Z,2018-08-14 00:00:00Z,2018,3,9,True
2018121,2018-07-01 00:00:00Z,2018-09-30 00:00:00Z,2018-10-19 00:00:00Z,2018-11-14 00:00:00Z,2018-11-14 00:00:00Z,2018-11-14 00:00:00Z,2018,4,12,True
2019031,2018-10-01 00:00:00Z,2018-12-31 00:00:00Z,2019-02-21 00:00:00Z,2019-03-20 00:00:00Z,2019-03-20 00:00:00Z,2019-03-20 00:00:00Z,2019,1,3,True
2019061,2019-01-01 00:00:00Z,2019-03-31 00:00:00Z,2019-04-19 00:00:00Z,2019-05-15 00:00:00Z,2019-05-15 00:00:00Z,2019-05-15 00:00:00Z,2019,2,6,True
2019091,2019-04-01 00:00:00Z,2019-06-30 00:00:00Z,2019-07-19 00:00:00Z,2019-08-14 00:00:00Z,2019-08-14 00:00:00Z,2019-08-14 00:00:00Z,2019,3,9,True
2019121,2019-07-01 00:00:00Z,2019-09-30 00:00:00Z,2019-10-18 00:00:00Z,2019-11-14 00:00:00Z,2019-11-14 00:00:00Z,2019-11-14 00:00:00Z,2019,4,12,True
2020031,2019-10-01 00:00:00Z,2019-12-31 00:00:00Z,2020-01-17 00:00:00Z,2020-02-14 00:00:00Z,2020-02-14 00:00:00Z,2020-02-14 00:00:00Z,2020,1,3,True
2020061,2020-01-01 00:00:00Z,2020-03-31 00:00:00Z,2020-04-17 00:00:00Z,2020-05-15 00:00:00Z,2020-05-15 00:00:00Z,2020-05-15 00:00:00Z,2020,2,6,True
2020091,2020-04-01 00:00:00Z,2020-06-30 00:00:00Z,2020-07-17 00:00:00Z,2020-08-14 00:00:00Z,2020-07-30 00:00:00Z,2020-07-30 00:00:00Z,2020,3,9,True
2020121,2020-07-01 00:00:00Z,2020-09-30 00:00:00Z,2020-10-19 00:00:00Z,2020-11-16 00:00:00Z,2020-11-16 00:00:00Z,2020-11-16 00:00:00Z,2020,4,12,True
2020070,2020-04-01 00:00:00Z,2020-04-30 00:00:00Z,2020-07-17 00:00:00Z,2020-08-14 00:00:00Z,2020-07-30 00:00:00Z,2020-07-30 00:00:00Z,2020,3,7,False
2020080,2020-05-01 00:00:00Z,2020-05-31 00:00:00Z,2020-07-17 00:00:00Z,2020-08-14 00:00:00Z,2020-07-30 00:00:00Z,2020-07-30 00:00:00Z,2020,3,8,False
2020090,2020-06-01 00:00:00Z,2020-06-30 00:00:00Z,2020-07-17 00:00:00Z,2020-08-14 00:00:00Z,2020-07-30 00:00:00Z,2020-07-30 00:00:00Z,2020,3,9,False
2020100,2020-07-01 00:00:00Z,2020-07-31 00:00:00Z,2020-08-19 00:00:00Z,2020-11-16 00:00:00Z,2020-08-28 00:00:00Z,2020-08-28 00:00:00Z,2020,4,10,False
2020110,2020-08-01 00:00:00Z,2020-08-31 00:00:00Z,2020-09-18 00:00:00Z,2020-11-16 00:00:00Z,2020-09-29 00:00:00Z,2020-09-29 00:00:00Z,2020,4,11,False
2020120,2020-09-01 00:00:00Z,2020-09-30 00:00:00Z,2020-10-19 00:00:00Z,2020-11-16 00:00:00Z,2020-11-16 00:00:00Z,2020-11-16 00:00:00Z,2020,4,12,False
153 changes: 153 additions & 0 deletions usaspending_api/data/multiprocessing_worker_names.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,153 @@
{
"subjects": [
"Agent",
"Ant",
"Archer",
"Armadillo",
"Assassin",
"Bandit",
"Beetle",
"Boss",
"Brain",
"Captian",
"Champion",
"Commando",
"Conjuror",
"Crusher",
"Dart",
"Defender",
"Dragon",
"Electron",
"Enchanter",
"Eye",
"Falcon",
"Fox",
"Gargoyle",
"Genius",
"Golem",
"Guard",
"Guardian",
"Hammer",
"Heart",
"Hunter",
"Jackal",
"Juggernaut",
"Karma",
"Knight",
"Magician",
"Mamba",
"Mantis",
"Martian",
"Mastermind",
"Mecha",
"Minion",
"Monarch",
"Mongoose",
"Moth",
"Nightmare",
"Nutron",
"Omen",
"Phoenix",
"Protector",
"Proton",
"Puma",
"Ranger",
"Robot",
"Rocket",
"Saber",
"Scythe",
"Seer",
"Sentinel",
"Shadow",
"Shepherd",
"Slayer",
"Smasher",
"Spectacle",
"Spectre",
"Spirit",
"Spy",
"Storm",
"Titan",
"Trident",
"UFO",
"Vector",
"Warrior",
"Watcher",
"Wing",
"Wizard",
"Wolf",
"Wonder"
],
"attributes": [
"Amber",
"Artifical",
"Atomic",
"Bionic",
"Black",
"Blue",
"Capped",
"Captian",
"Colossal",
"Commander",
"Crazy",
"Curious",
"Dark",
"Doctor",
"Eager",
"Earth",
"Ethereal",
"Fabulous",
"Fallen",
"Fancy",
"Fantastic",
"Fearless",
"Fiery",
"Flying",
"Gentle",
"Giant",
"Glorious",
"Green",
"Grey",
"Heavy",
"Humble",
"Ice",
"Infamous",
"Intelligent",
"Invisible",
"Jade",
"Kind",
"Mega",
"Mighty",
"Mysterious",
"Nefarious",
"Night",
"Nocturnal",
"Orange",
"Prime",
"Professor",
"Purple",
"Quick",
"Red",
"Ruby",
"Sassy",
"Sauve",
"Scarlet",
"Sensitive",
"Silver",
"Smooth",
"Sneeky",
"Speedy",
"Super",
"Supreme",
"The",
"Thunder",
"Ultra",
"Unarmed",
"Universal",
"White",
"Wild",
"Winged",
"Wonder",
"Yellow"
]
}
Loading

0 comments on commit 50c1c15

Please sign in to comment.