This shows you the differences between two versions of the page.
| Both sides previous revision Previous revision Next revision | Previous revision | ||
|
cs501r_f2018:lab6 [2018/10/02 18:18] carr |
cs501r_f2018:lab6 [2021/06/30 23:42] (current) |
||
|---|---|---|---|
| Line 10: | Line 10: | ||
| You will train it on a text corpus that you're interested in, and then | You will train it on a text corpus that you're interested in, and then | ||
| show samples from the model. | show samples from the model. | ||
| + | |||
| + | This lab is slightly different than previous labs in that we give you a large portion of the code, and you will just be filling in pieces of classes and functions. If you get suck, please get help from the TA's or your classmates. | ||
| You should turn in your jupyter notebook | You should turn in your jupyter notebook | ||
| Line 40: | Line 42: | ||
| * 40% Correct implementation of the sequence to sequence class | * 40% Correct implementation of the sequence to sequence class | ||
| * 20% Correct implementation of training and sampling | * 20% Correct implementation of training and sampling | ||
| - | * 10% Correct implementation of GRU cell | + | * 5% Correct implementation of GRU cell |
| - | * 30% Training and sampling on a novel text dataset | + | * 20% Training and sampling on a novel text dataset (Must be your choice) |
| + | * 15% Good coding style, readable output | ||
| ---- | ---- | ||
| Line 65: | Line 68: | ||
| ---- | ---- | ||
| - | **Part 0: Readings and data loading** | + | **Part 0: Readings, data loading, and high level training** |
| There is a tutorial here that will help build out scaffolding code, and get an understanding of using sequences in pytorch. | There is a tutorial here that will help build out scaffolding code, and get an understanding of using sequences in pytorch. | ||
| Line 73: | Line 76: | ||
| [[http://colah.github.io/posts/2015-08-Understanding-LSTMs/|Understanding LSTM Networks]] | [[http://colah.github.io/posts/2015-08-Understanding-LSTMs/|Understanding LSTM Networks]] | ||
| + | |||
| + | <code bash> | ||
| + | ! wget -O ./text_files.tar.gz 'https://piazza.com/redirect/s3?bucket=uploads&prefix=attach%2Fjlifkda6h0x5bk%2Fhzosotq4zil49m%2Fjn13x09arfeb%2Ftext_files.tar.gz' | ||
| + | ! tar -xzf text_files.tar.gz | ||
| + | ! pip install unidecode | ||
| + | ! pip install torch | ||
| + | </code> | ||
| + | |||
| + | |||
| <code python> | <code python> | ||
| Line 102: | Line 114: | ||
| <code python> | <code python> | ||
| + | import torch | ||
| + | from torch.autograd import Variable | ||
| # Turn string into list of longs | # Turn string into list of longs | ||
| def char_tensor(string): | def char_tensor(string): | ||
| Line 118: | Line 132: | ||
| target = char_tensor(chunk[1:]) | target = char_tensor(chunk[1:]) | ||
| return inp, target | return inp, target | ||
| + | </code> | ||
| + | |||
| + | <code python> | ||
| + | import time | ||
| + | n_epochs = 2000 | ||
| + | print_every = 100 | ||
| + | plot_every = 10 | ||
| + | hidden_size = 100 | ||
| + | n_layers = 1 | ||
| + | lr = 0.005 | ||
| + | |||
| + | decoder = RNN(n_characters, hidden_size, n_characters, n_layers) | ||
| + | decoder_optimizer = torch.optim.Adam(decoder.parameters(), lr=lr) | ||
| + | criterion = nn.CrossEntropyLoss() | ||
| + | |||
| + | start = time.time() | ||
| + | all_losses = [] | ||
| + | loss_avg = 0 | ||
| + | |||
| + | for epoch in range(1, n_epochs + 1): | ||
| + | loss_ = train(*random_training_set()) | ||
| + | loss_avg += loss_ | ||
| + | |||
| + | if epoch % print_every == 0: | ||
| + | print('[%s (%d %d%%) %.4f]' % (time.time() - start, epoch, epoch / n_epochs * 100, loss_)) | ||
| + | print(evaluate('Wh', 100), '\n') | ||
| + | |||
| + | if epoch % plot_every == 0: | ||
| + | all_losses.append(loss_avg / plot_every) | ||
| + | loss_avg = 0 | ||
| </code> | </code> | ||
| Line 147: | Line 191: | ||
| # decode output | # decode output | ||
| | | ||
| - | def forward(self, input, hidden): | + | def forward(self, input_char, hidden): |
| # by reviewing the documentation, construct a forward function that properly uses the output | # by reviewing the documentation, construct a forward function that properly uses the output | ||
| # of the GRU | # of the GRU | ||
| Line 168: | Line 212: | ||
| # your code here | # your code here | ||
| ## / | ## / | ||
| + | loss = 0 | ||
| for c in range(chunk_len): | for c in range(chunk_len): | ||
| output, hidden = # run the forward pass of your rnn with proper input | output, hidden = # run the forward pass of your rnn with proper input | ||
| - | loss += criterion(output, target[c].view(1)) | + | loss += criterion(output, target[c].unsqueeze(0)) |
| ## calculate backwards loss and step the optimizer (globally) | ## calculate backwards loss and step the optimizer (globally) | ||
| Line 186: | Line 230: | ||
| <code python> | <code python> | ||
| def evaluate(prime_str='A', predict_len=100, temperature=0.8): | def evaluate(prime_str='A', predict_len=100, temperature=0.8): | ||
| - | ## initialize hidden variable, get tensor of input primer and initialize other useful variables | + | ## initialize hidden variable, initialize other useful variables |
| # your code here | # your code here | ||
| ## / | ## / | ||
| + | | ||
| + | prime_input = char_tensor(prime_str) | ||
| # Use priming string to "build up" hidden state | # Use priming string to "build up" hidden state | ||