You signed in with another tab or window. Reload to refresh your session.You signed out in another tab or window. Reload to refresh your session.You switched accounts on another tab or window. Reload to refresh your session.Dismiss alert
Here is a program that takes in a sentence with a mask for a missing word. It finally outputs a heat map .png graphic. (All 144 of them lol) It would be nice to be able to see the final results through bertviz. I have had a play around with some of the notebooks and tried various builds. But I can't seem to get it to do what the notebooks so brilliantly show. The issue seems to be around
I can't seem to integrate this into one of the notebooks. Do you have an example notebook that demonstrates using and predicting a [MASK]
This is the code I have:
import sys
import tensorflow as tf
from PIL import Image, ImageDraw, ImageFont
from transformers import AutoTokenizer,AutoModel, TFBertForMaskedLM
from bertviz import head_view
Pre-trained masked language model
MODEL = "bert-base-uncased"
Number of predictions to generate
K = 3
Constants for generating attention diagrams
FONT = ImageFont.truetype("assets/fonts/OpenSans-Regular.ttf", 28)
GRID_SIZE = 40
PIXELS_PER_WORD = 200
def main():
#text = input("Text: ")
text = "Then I picked up a [MASK] from the table."
# Tokenize input
tokenizer = AutoTokenizer.from_pretrained(MODEL)
inputs = tokenizer(text, return_tensors="tf")
mask_token_index = get_mask_token_index(tokenizer.mask_token_id, inputs)
if mask_token_index is None:
sys.exit(f"Input must include mask token {tokenizer.mask_token}.")
# Use model to process input
model = TFBertForMaskedLM.from_pretrained(MODEL)
result = model(**inputs, output_attentions=True)
# Generate predictions
mask_token_logits = result.logits[0, mask_token_index]
top_tokens = tf.math.top_k(mask_token_logits, K).indices.numpy()
for token in top_tokens:
print(text.replace(tokenizer.mask_token, tokenizer.decode([token])))
# Visualize attentions
visualize_attentions(inputs.tokens(), result.attentions)
def get_mask_token_index(mask_token_id, inputs):
"""
Return the index of the token with the specified mask_token_id, or None if not present in the inputs.
"""
input_ids = inputs['input_ids'][0].numpy()
for i, token_id in enumerate(input_ids):
if token_id == mask_token_id:
return i
return None
def get_color_for_attention_score(attention_score):
"""
Return a tuple of three integers representing a shade of gray for the
given attention_score. Each value should be in the range [0, 255].
"""
gray_value = int(attention_score * 255)
return (gray_value, gray_value, gray_value)
def visualize_attentions(tokens, attentions):
"""
Produce a graphical representation of self-attention scores for all layers and heads.
For each attention layer, one diagram should be generated for each
attention head in the layer. Each diagram should include the list of
`tokens` in the sentence. The filename for each diagram should
include both the layer number (starting count from 1) and head number
(starting count from 1).
"""
num_layers = len(attentions)
num_heads = len(attentions[0][0])
for layer_number in range(1, num_layers + 1):
for head_number in range(1, num_heads + 1):
generate_diagram(
layer_number,
head_number,
tokens,
attentions[layer_number - 1][0][head_number - 1]
)
def generate_diagram(layer_number, head_number, tokens, attention_weights):
"""
Generate a diagram representing the self-attention scores for a single
attention head. The diagram shows one row and column for each of the tokens, and cells are shaded based on attention_weights, with lighter
cells corresponding to higher attention scores.
The diagram is saved with a filename that includes both the `layer_number`
and `head_number`.
"""
# Create new image
image_size = GRID_SIZE * len(tokens) + PIXELS_PER_WORD
img = Image.new("RGBA", (image_size, image_size), "black")
draw = ImageDraw.Draw(img)
# Draw each token onto the image
for i, token in enumerate(tokens):
# Draw token columns
token_image = Image.new("RGBA", (image_size, image_size), (0, 0, 0, 0))
token_draw = ImageDraw.Draw(token_image)
token_draw.text(
(image_size - PIXELS_PER_WORD, PIXELS_PER_WORD + i * GRID_SIZE),
token,
fill="white",
font=FONT
)
token_image = token_image.rotate(90)
img.paste(token_image, mask=token_image)
# Draw token rows
_, _, width, _ = draw.textbbox((0, 0), token, font=FONT)
draw.text(
(PIXELS_PER_WORD - width, PIXELS_PER_WORD + i * GRID_SIZE),
token,
fill="white",
font=FONT
)
# Draw each word
for i in range(len(tokens)):
y = PIXELS_PER_WORD + i * GRID_SIZE
for j in range(len(tokens)):
x = PIXELS_PER_WORD + j * GRID_SIZE
color = get_color_for_attention_score(attention_weights[i][j])
draw.rectangle((x, y, x + GRID_SIZE, y + GRID_SIZE), fill=color)
# Save image
img.save(f"Attention_Layer{layer_number}_Head{head_number}.png")
if name == "main":
main()
any help or pointer would be gratefully received... :)
The text was updated successfully, but these errors were encountered:
Here is a program that takes in a sentence with a mask for a missing word. It finally outputs a heat map .png graphic. (All 144 of them lol) It would be nice to be able to see the final results through bertviz. I have had a play around with some of the notebooks and tried various builds. But I can't seem to get it to do what the notebooks so brilliantly show. The issue seems to be around
inputs = tokenizer(text, return_tensors="tf")
mask_token_index = get_mask_token_index(tokenizer.mask_token_id, inputs)
I can't seem to integrate this into one of the notebooks. Do you have an example notebook that demonstrates using and predicting a [MASK]
This is the code I have:
import sys
import tensorflow as tf
from PIL import Image, ImageDraw, ImageFont
from transformers import AutoTokenizer,AutoModel, TFBertForMaskedLM
from bertviz import head_view
Pre-trained masked language model
MODEL = "bert-base-uncased"
Number of predictions to generate
K = 3
Constants for generating attention diagrams
FONT = ImageFont.truetype("assets/fonts/OpenSans-Regular.ttf", 28)
GRID_SIZE = 40
PIXELS_PER_WORD = 200
def main():
#text = input("Text: ")
text = "Then I picked up a [MASK] from the table."
def get_mask_token_index(mask_token_id, inputs):
"""
Return the index of the token with the specified
mask_token_id
, orNone
if not present in theinputs
."""
input_ids = inputs['input_ids'][0].numpy()
for i, token_id in enumerate(input_ids):
if token_id == mask_token_id:
return i
return None
def get_color_for_attention_score(attention_score):
"""
Return a tuple of three integers representing a shade of gray for the
given
attention_score
. Each value should be in the range [0, 255]."""
gray_value = int(attention_score * 255)
return (gray_value, gray_value, gray_value)
def visualize_attentions(tokens, attentions):
"""
Produce a graphical representation of self-attention scores for all layers and heads.
def generate_diagram(layer_number, head_number, tokens, attention_weights):
"""
Generate a diagram representing the self-attention scores for a single
attention head. The diagram shows one row and column for each of the
tokens
, and cells are shaded based onattention_weights
, with lightercells corresponding to higher attention scores.
if name == "main":
main()
any help or pointer would be gratefully received... :)
The text was updated successfully, but these errors were encountered: