From 75155c15ebc7135687475202a24997955bbcda1e Mon Sep 17 00:00:00 2001 From: NicholasConnors Date: Sun, 17 Nov 2019 00:44:25 -0500 Subject: [PATCH] Merge branch 'css-styling' of C:\Users\nicho\OneDrive\Documents\Coding\Repo\arxiv-net with conflicts. --- .idea/arxiv-net.iml | 2 +- .idea/misc.xml | 2 +- arxiv_net/dashboard/assets/css.css | 98 ++++- arxiv_net/dashboard/pages/arxiv_dash.py | 500 +++++++++++++++++++----- 4 files changed, 508 insertions(+), 94 deletions(-) diff --git a/.idea/arxiv-net.iml b/.idea/arxiv-net.iml index 6711606..f9be485 100644 --- a/.idea/arxiv-net.iml +++ b/.idea/arxiv-net.iml @@ -2,7 +2,7 @@ - + diff --git a/.idea/misc.xml b/.idea/misc.xml index a2e120d..c65c3c9 100644 --- a/.idea/misc.xml +++ b/.idea/misc.xml @@ -1,4 +1,4 @@ - + \ No newline at end of file diff --git a/arxiv_net/dashboard/assets/css.css b/arxiv_net/dashboard/assets/css.css index 2a7f65c..cc4b7cc 100644 --- a/arxiv_net/dashboard/assets/css.css +++ b/arxiv_net/dashboard/assets/css.css @@ -13,7 +13,13 @@ input { } #button-div { - padding: 23px 0px 0px 0px ! important; + padding: 0px ! important; + width: 100%; +} + +#button { + float: right; + margin-right: 8px; } .links { @@ -24,7 +30,7 @@ input { text-align: right; } -#title-div { +.row #title-div.two.columns.title { white-space: nowrap; } @@ -48,10 +54,74 @@ h2 { } +#h1 { + padding-left:8px; +} + +#uname-box { + margin-left: 8px; +} + +.container { + width: 100% ! important; + padding: 0px ! important; + margin: 0px ! important; + max-width: none ! important; +} + +#checklist { + text-align: right; +} + + #checklist input[type="checkbox"] { + float: right; + margin-top: 6px; + } + #feed { } +#feed-1, #feed-2 { + width: 33%; + border-right: 1px solid black; + border-bottom: none ! important; + height: 100%; + display: inline-block; +} + +#feed-2 { + background-color: #f4f4f4; +} + +#filters { + +} + +#filters div { + +} + +#filters div label { + text-align: left ! important; + float: left; +} + +#filters #title-div, #author-div, #date-div, #topic-div { + display: inline-block; + width: 100%; +} + +#filters #topic-div, #date-div { + padding-top: 8px ! important; +} + +.feed-input { + float: right; + margin-right: 8px; + width:70%; +} + #user-name { color: white; float: left; @@ -63,14 +133,25 @@ h2 { padding-right: 8px; } +#login-button { + margin-left: 2px; + margin-bottom: 1px; + height: 36px; +} + .logo { height: 0px; } +#header { + width: 100%; +} + .header { background-color: darkred; align-items: center; display: block; + width: 100%; } .custom_button { @@ -85,6 +166,19 @@ h2 { float: right; } +.paper-placeholder { + margin: 0px ! important; + padding: 0px ! important; +} + +.paper-placeholder:hover { + background-color: lightblue; +} + +.selected-paper-div { + background-color: pink; +} + .page { width: 100%; font-family: Avenir; diff --git a/arxiv_net/dashboard/pages/arxiv_dash.py b/arxiv_net/dashboard/pages/arxiv_dash.py index bdc6f59..cc291eb 100644 --- a/arxiv_net/dashboard/pages/arxiv_dash.py +++ b/arxiv_net/dashboard/pages/arxiv_dash.py @@ -1,24 +1,41 @@ import json import pickle from collections import defaultdict -from typing import Dict, Set +from typing import Dict, Set, List, Optional +from tqdm import tqdm +from pathlib import Path +import pandas as pd +import dash import dash_core_components as dcc import dash_html_components as html from dash.dependencies import Input, Output, State from dash.exceptions import PreventUpdate -from tqdm import tqdm +import plotly.graph_objs as go + from arxiv_net.dashboard.assets.style import * from arxiv_net.dashboard.server import app +from arxiv_net.textsearch.whoosh import get_index, search_index from arxiv_net.users import USER_DIR from arxiv_net.utilities import Config -from arxiv_net.textsearch.whoosh import get_index, search_index +from arxiv_net.dashboard import DASH_DIR ################################################################################ # DATA LOADING ################################################################################ DB = pickle.load(open(Config.ss_db_path, 'rb')) +DB_ARXIV = pickle.load(open(Config.db_path, 'rb')) +SIMILARITIES = pickle.load(open(Config.sim_path, 'rb')) + +embed_db_path = Path(Config.bert_abstract_embed_db_path) +embeds_tsne_csv_path = embed_db_path.with_name(embed_db_path.name.replace(".p", "_tsne.csv")) +TSNE_CSV = pd.read_csv(embeds_tsne_csv_path, dtype=str) +TSNE_CSV["Topic"] = [DB_ARXIV[i]["arxiv_primary_category"]["term"] if i in DB_ARXIV else None for i in TSNE_CSV['Unnamed: 0']] +TSNE_CSV["CitationVelocity"] = [DB[i].citationVelocity if i in DB else None for i in TSNE_CSV['Unnamed: 0']] +TSNE_CSV["Year"] = [int(DB_ARXIV[i]["published"][:4]) if i in DB_ARXIV else None for i in TSNE_CSV['Unnamed: 0']] +TSNE_CSV["Title"] = [DB[i].title if i in DB else None for i in TSNE_CSV['Unnamed: 0']] + # TODO: add auto-completion (https://community.plot.ly/t/auto-complete-text-suggestion-option-in-textfield/8940) # Indexing db, should be done asynchronously while fetching from SS @@ -40,88 +57,136 @@ TOPICS[topic.topic].add(paper_id) TITLES[paper.title].add(paper_id) + +class PaperFeed: + """" A tracker for displayed / selected papers + """ + + def __init__(self, + collection: List[PaperID], + selected: Optional[PaperID] = None, + display_size: int = 10, + ): + self.collection = collection + self.display_size = display_size + self.selected = selected + self.current_page = 0 + self.total_pages = len(self.collection) // display_size + 1 + + @property + def displayed(self): + return self.collection[self.display_size * self.current_page: + self.display_size * self.current_page + self.display_size] + + def __call__(self, *args, **kwargs): + return self.displayed + + def reset(self): + self.collection = list() + self.selected = None + self.current_page = 0 + + def pg_up(self): + self.current_page += 1 + + def pg_down(self): + self.current_page -= 1 + + +class Dashboard: + """ Encapsulates all methods related to the dash. + """ + + def __init__(self, current_user: str = 'default', feed: PaperFeed = None): + self.current_user = current_user + self.feed = feed or PaperFeed(collection=[]) + + +DASH = Dashboard() + + ################################################################################ # HTML DIVS ################################################################################ +# Configure static layout date_filter = html.Div( id='date-div', children=[ - html.Label('Published:', - style={'textAlign': 'center'}), + html.Label('Published:'), dcc.Dropdown( id='date', + className='feed-input', options=[{'label': c, 'value': c} for c in LOOKBACKS], value=LOOKBACKS[0] ) ], - style={'display': 'block'}, - className='two columns', + # style={'display': 'block'}, + # className='two columns', ) topics_filter = html.Div( id='topic-div', children=[ - html.Label('Topic:', - style={'textAlign': 'center'}), + html.Label('Topic:'), dcc.Dropdown( id='topic', + className='feed-input', options=[{'label': c, 'value': c} for c in TOPICS], value='Any' ) - ], - style={'display': 'block'}, - className='two columns', + ] ) title_filter = html.Div( id='title-div', children=[ - html.Label('Title:', - style={'textAlign': 'center'}), + html.Label('Title:'), dcc.Input( id='title', + className='feed-input', placeholder='Attention Is All You Need', type='text', - value='Any', - style={'width' : '100%', - 'textAlign': 'center'} + value='Any' ) - ], - style={'display': 'block'}, - className='two columns', + ] ) author_filter = html.Div( id='author-div', children=[ - html.Label('Author:', - style={'textAlign': 'center'}), + html.Label('Author:'), dcc.Input( id='author', + className='feed-input', placeholder='Richard Sutton', type='text', - value='Any', - style={'width' : '100%', - 'textAlign': 'center'} + value='Any' ) - ], - style={'display': 'block'}, - className='two columns', + ] ) search_button = html.Div( - html.Div( - id='button-div', - children=[ - html.Button('Search', id='button'), - ], - className='one column custom_button', - ) + id='button-div', + children=[ + html.Button('Search', id='button'), + ], + className='one column custom_button', ) +tsne_plot = html.Div( + className="six columns", + children=[ + dcc.Graph(id="graph-3d-plot-tsne", style={"height": "98vh"}) + ], +), + +################################################################################ +# LAYOUT +################################################################################ + layout = html.Div([ html.Div( children=[ @@ -176,7 +241,7 @@ ), html.Div( - id='static-components', + id='feed-1', children=[ html.Div( id='filters', @@ -188,17 +253,47 @@ search_button ] ), + html.Hr(), + html.Div( + id='feed-div', + children=[ + dcc.Loading( + id='display-feed', + type='cube', + children=[ + html.Ul( + children=[ + html.Li(id=f'paper-placeholder-{i}', className='paper-placeholder') + for i in + range(DASH.feed.display_size - 1) + ], + style={'list-style-type': 'none'} + ) + + ] + ) + ] + ) ], - className='row' + className='six columns' ), - + html.Div( - id='dynamic-components', + id='feed-2', children=[ + dcc.Checklist( + id='checklist', + options=[ + {'label': 'Similar', 'value': 'similar'}, + {'label': 'References', 'value': 'references'}, + {'label': 'Citations', 'value': 'citations'} + ], + value=['Citations'] + ), + html.Hr(), html.Div( - id='feed-div', + id='feed2-div', children=[ - dcc.Loading(id='display-feed', type='cube') ], style={ 'textAlign' : 'center', @@ -206,18 +301,197 @@ }, ), ], - className='row', + # className='six columns' ), ], className='page', ) ]) +################################################################################ +# STATIC MARKDOWN +################################################################################ +discover_intro_md = (DASH_DIR / "assets/discover_intro.md").read_text() ################################################################################ -# CALLBACKS +# COMPONENT FACTORIES +################################################################################ +def Card(children, **kwargs): + return html.Section(children, className="card-style") + +def NamedRangeSlider(name, short, min, max, step, val, marks=None): + if marks: + step = None + else: + marks = {i: i for i in range(min, max + 1, step)} + + return html.Div( + style={"margin": "25px 5px 30px 0px"}, + children=[ + f"{name}:", + html.Div( + style={"margin-left": "5px"}, + children=[ + dcc.RangeSlider( + id=f"slider-{short}", + min=min, + max=max, + marks=marks, + step=step, + value=val, + ) + ], + ), + ], + ) + + +################################################################################ +# LAYOUT FACTORIES +################################################################################ +def create_discover_layout(app): + return html.Div( + className="row", + style={"max-width": "100%", "font-size": "1.5rem", "padding": "0px 0px"}, + children=[ + # Demo Description + html.Div( + className="row background", + id="discover-explanation", + style={"padding": "50px 45px"}, + children=[ + html.Div( + id="discover-description-text", + children=dcc.Markdown(discover_intro_md) + ), + # TODO: what is this + html.Div( + html.Button(id="learn-more-button", children=["Learn More"]) + ), + ], + ), + # Body + html.Div( + className="row background", + style={"padding": "10px"}, + children=[ + html.Div( + className="three columns", + children=[ + Card( + [ + dcc.Dropdown( + id="discover-categories", + searchable=False, + clearable=False, + options=[ + { + "label": "Machine Learning", + "value": "cs.LG", + }, + { + "label": "Computer Vision", + "value": "cs.CV", + }, + { + "label": "Computational Ling.", + "value": "cs.CL", + }, + ], + placeholder="Select a machine learning field", + value="cs.LG", + ), + NamedRangeSlider( + name="Year", + short="discover-year", + min=1995, + max=2020, + step=None, + val=(2019, 2020), + marks={ + i: str(i) for i in range(1995, 2020, 5) + }, + ), + ] + ) + ], + ), + html.Div( + className="six columns", + children=[ + dcc.Graph(id="graph-3d-plot-tsne", style={"height": "98vh"}) + ], + ), + ], + ), + ], + ) + + ################################################################################ +# HELPER METHODS +################################################################################ +def _soft_match_title(user_title: str) -> Set[PaperID]: + search_results = set() + if user_title == 'Any': + for papers in TITLES.values(): + search_results |= papers + return search_results + search_results = set(search_index(user_title, "abstract", index)) + return search_results + + +def _soft_match_author(user_author: str) -> Set[PaperID]: + # TODO: Adjust for casing + matched = set() + for author, papers in AUTHORS.items(): + if user_author == 'Any' or user_author in author: + matched |= papers + return matched + + +def _soft_match_topic(user_topic: str) -> Set[PaperID]: + # TODO: Adjust for casing + matched = set() + for topic, papers in TOPICS.items(): + if user_topic == 'Any' or user_topic in topic: + matched |= papers + return matched + + +def exploration_feed(username: str, + author: str, + title: str, + topic: str, + date: str + ): + matched_titles = _soft_match_title(title) + matched_authors = _soft_match_author(author) + matched_topics = _soft_match_topic(topic) + print(author, title, topic) + # print(f'Matched authors: {matched_authors}') + # print(f'Matched titles: {matched_titles}') + # print(f'Matched topics: {matched_topics}') + possible_papers = list(matched_authors & matched_topics & matched_titles) + DASH.feed = PaperFeed(collection=possible_papers) + + li = list() + for i, paper_id in enumerate(DASH.feed.displayed): + paper = DB[paper_id] + li.append( + [ + html.H5([html.A(paper.title, href=paper.url)]), + html.H6([', '.join([author.name for author in paper.authors]), + ' -- ', paper.year, ' -- ', paper.venue]), + html.Hr(), + ] + ) + return li + +################################################################################ +# CALLBACKS +################################################################################ @app.callback( Output('filters', 'children'), [Input('feed', 'value')] @@ -228,12 +502,17 @@ def display_filters(feed: str): return [topics_filter, author_filter, title_filter, date_filter, search_button] elif feed == 'Recommended': return [date_filter, search_button] + elif feed == 'Discover': + return create_discover_layout(app) else: return [] - @app.callback( - Output('display-feed', 'children'), + [ + # Output('display-feed', 'children'), + Output(f'paper-placeholder-{i}', 'children') + for i in range(DASH.feed.display_size - 1) + ], [ Input('button', 'n_clicks'), ], @@ -260,6 +539,8 @@ def display_feed( ff = dict() for f in filters: filter_name = f['props']['id'].split('-')[0] + if filter_name == 'button': + continue filter_value = f['props']['children'][1]['props']['value'] ff[filter_name] = filter_value @@ -278,69 +559,108 @@ def display_feed( raise ValueError(f'Unknown feed {feed}') -# The following 3 callbacks should probably be handled with elastic search -def _soft_match_title(user_title: str) -> Set[PaperID]: - search_results = set(search_index(user_title, "abstract", index)) - return search_results - - -def _soft_match_author(user_author: str) -> Set[PaperID]: - # TODO: Adjust for casing - matched = set() - for author, papers in AUTHORS.items(): - if user_author == 'Any' or user_author in author: - matched |= papers - return matched - - -def _soft_match_topic(user_topic: str) -> Set[PaperID]: - # TODO: Adjust for casing - matched = set() - for topic, papers in TOPICS.items(): - if user_topic == 'Any' or user_topic in topic: - matched |= papers - return matched +@app.callback( + [Output(f'paper-placeholder-{i}', 'className') for i in + range(DASH.feed.display_size - 1)], + [Input('feed2-div', 'children')], +) +def highlight_selected_paper(*args): + classnames = ['paper-placeholder' for paper in range(DASH.feed.display_size - 1)] + classnames[DASH.feed.selected] = 'selected-paper-div' + print(classnames) + return classnames -def exploration_feed(username: str, - author: str, - title: str, - topic: str, - date: str - ) -> html.Ul: - matched_titles = _soft_match_title(title) - matched_authors = _soft_match_author(author) - matched_topics = _soft_match_topic(topic) - - print(author, title, topic) - - # print(f'Matched authors: {matched_authors}') - # print(f'Matched titles: {matched_titles}') - # print(f'Matched topics: {matched_topics}') - - possible_papers = matched_authors & matched_topics & matched_titles - print(len(possible_papers)) - possible_papers = list(possible_papers)[:10] +@app.callback( + Output('feed2-div', 'children'), + [Input(f'paper-placeholder-{i}', 'n_clicks') for i in + range(DASH.feed.display_size - 1)], + [State('checklist', 'value')] +) +def feed2(*args): + """ Dynamically create callbacks for each paper? """ + print(dash.callback_context.triggered) + checklist = args[-1] + idx = int( + dash.callback_context.triggered[0]['prop_id'].split('.')[0].split('-')[ + -1]) + DASH.feed.selected = idx + print("Selected: ", DASH.feed.selected) + paper = DB[DASH.feed.displayed[idx]] + print(f'PAPER SELECTED: {paper.title}') li = list() - for paper in possible_papers: - paper = DB[paper] + + to_display = list() + for category in checklist: + if category == 'similar': + pass + elif category == 'citations': + to_display += paper.citations + elif category == 'references': + to_display += paper.references + + for p in tqdm(to_display): + if p.arxivId is None or p.arxivId not in DB: + continue + paper = DB[p.arxivId] + print(f'FOUND CITATION: {p.arxivId}') li.append(html.Li( children=[ html.H5([html.A(paper.title, href=paper.url)]), html.H6([', '.join([author.name for author in paper.authors]), ' -- ', paper.year, ' -- ', paper.venue]), + html.Button('More like this', id=f'more-{paper.doi}'), + html.Button('Less like this', id=f'less-{paper.doi}'), html.Hr(), ], style={'list-style-type': 'none'} )) - return html.Ul(children=li) +@app.callback( + Output("graph-3d-plot-tsne", "figure"), + [ + Input("discover-categories", "value"), + Input("slider-discover-year", "value"), + ], +) +def display_3d_scatter_plot( + category, + year_range, +): + start, end = year_range + tsne_df = TSNE_CSV.loc[(TSNE_CSV["Topic"] == category) & + (TSNE_CSV["Year"] <= end) & + (TSNE_CSV["Year"] >= start)].sort_values("CitationVelocity") + + axes = dict(title="", showgrid=True, zeroline=False, + showticklabels=False) + layout = go.Layout( + margin=dict(l=0, r=0, b=0, t=0), + scene=dict(xaxis=axes, yaxis=axes, zaxis=axes), + ) + + scatter = go.Scatter3d( + name=str(tsne_df.index), + x=tsne_df["x"], + y=tsne_df["y"], + z=tsne_df["z"], + text=tsne_df["Title"], + textposition="middle center", + showlegend=False, + mode="markers", + marker=dict(size=3, color="#3266c1", symbol="circle"), + ) + + figure = go.Figure(data=[scatter], layout=layout) + return figure + + def recommendation_feed(username: str, date: str) -> html.Ul: """ Generates a list of recommended paper based on user's preference. - + """ # TODO: dump preferences in SQL instead of flat files