Skip to content

Commit 6d8973a

Browse files
authored
Merge pull request #5 from r9y9/master
Reverse PR
2 parents 32cab90 + df8695c commit 6d8973a

File tree

9 files changed

+33
-26
lines changed

9 files changed

+33
-26
lines changed

.travis.yml

Lines changed: 8 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -21,13 +21,20 @@ before_install:
2121
# Useful for debugging any issues with conda
2222
- conda config --add channels pypi
2323
- conda info -a
24-
- deps='pip numpy scipy cython nose pytorch'
24+
- deps='pip numpy scipy cython nose pytorch flake8'
2525
- conda create -q -n test-environment "python=$TRAVIS_PYTHON_VERSION" $deps -c pytorch
2626
- source activate test-environment
2727

2828
install:
2929
- pip install -e ".[test]"
3030
- python -c "import nltk; nltk.download('cmudict')"
3131

32+
before_script:
33+
# stop the build if there are Python syntax errors or undefined names
34+
- flake8 . --count --select=E901,E999,F821,F822,F823 --show-source --statistics
35+
# exit-zero treats all errors as warnings. The GitHub editor is 127 chars wide
36+
- flake8 . --count --exit-zero --max-complexity=10 --max-line-length=127 --statistics
37+
3238
script:
3339
- nosetests -v -w tests/ -a '!local_only'
40+

README.md

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -5,7 +5,7 @@
55

66
PyTorch implementation of convolutional networks-based text-to-speech synthesis models:
77

8-
1. [arXiv:1710.07654](https://arxiv.org/abs/1710.07654): Deep Voice 3: 2000-Speaker Neural Text-to-Speech.
8+
1. [arXiv:1710.07654](https://arxiv.org/abs/1710.07654): Deep Voice 3: Scaling Text-to-Speech with Convolutional Sequence Learning.
99
2. [arXiv:1710.08969](https://arxiv.org/abs/1710.08969): Efficiently Trainable Text-to-Speech System Based on Deep Convolutional Networks with Guided Attention.
1010

1111
Audio samples are available at https://r9y9.github.io/deepvoice3_pytorch/.

deepvoice3_pytorch/conv.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -40,7 +40,7 @@ def incremental_forward(self, input):
4040
self.input_buffer[:, :-1, :] = self.input_buffer[:, 1:, :].clone()
4141
# append next input
4242
self.input_buffer[:, -1, :] = input[:, -1, :]
43-
input = torch.autograd.Variable(self.input_buffer, volatile=True)
43+
input = torch.Tensor(self.input_buffer)
4444
if dilation > 1:
4545
input = input[:, 0::dilation, :].contiguous()
4646
output = F.linear(input.view(bsz, -1), weight, self.bias)

deepvoice3_pytorch/modules.py

Lines changed: 6 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -48,22 +48,18 @@ def forward(self, x, w=1.0):
4848

4949
if isscaler or w.size(0) == 1:
5050
weight = sinusoidal_encode(self.weight, w)
51-
return self._backend.Embedding.apply(
52-
x, weight,
53-
padding_idx, self.max_norm, self.norm_type,
54-
self.scale_grad_by_freq, self.sparse
55-
)
51+
return F.embedding(
52+
x, weight, padding_idx, self.max_norm,
53+
self.norm_type, self.scale_grad_by_freq, self.sparse)
5654
else:
5755
# TODO: cannot simply apply for batch
5856
# better to implement efficient function
5957
pe = []
6058
for batch_idx, we in enumerate(w):
6159
weight = sinusoidal_encode(self.weight, we)
62-
pe.append(self._backend.Embedding.apply(
63-
x[batch_idx], weight,
64-
padding_idx, self.max_norm, self.norm_type,
65-
self.scale_grad_by_freq, self.sparse
66-
))
60+
pe.append(F.embedding(
61+
x[batch_idx], weight, padding_idx, self.max_norm,
62+
self.norm_type, self.scale_grad_by_freq, self.sparse))
6763
pe = torch.stack(pe)
6864
return pe
6965

docs/content/index.md

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -392,5 +392,5 @@ Your browser does not support the audio element.
392392

393393
## References
394394

395-
- [Wei Ping, Kainan Peng, Andrew Gibiansky, et al, "Deep Voice 3: 2000-Speaker Neural Text-to-Speech", arXiv:1710.07654, Oct. 2017.](https://arxiv.org/abs/1710.07654)
395+
- [Wei Ping, Kainan Peng, Andrew Gibiansky, et al, "Deep Voice 3: Scaling Text-to-Speech with Convolutional Sequence Learning", arXiv:1710.07654, Oct. 2017.](https://arxiv.org/abs/1710.07654)
396396
- [Hideyuki Tachibana, Katsuya Uenoyama, Shunsuke Aihara, "Efficiently Trainable Text-to-Speech System Based on Deep Convolutional Networks with Guided Attention". arXiv:1710.08969, Oct 2017.](https://arxiv.org/abs/1710.08969)

nikl_m.py

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -6,6 +6,8 @@
66
import re
77
from hparams import hparams
88

9+
from hparams import hparams
10+
911

1012
def build_from_path(in_dir, out_dir, num_workers=1, tqdm=lambda x: x):
1113
'''Preprocesses the LJ Speech dataset from a given input path into a given output directory.

nikl_s.py

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -5,6 +5,8 @@
55
import audio
66
import re
77

8+
from hparams import hparams
9+
810

911
def build_from_path(in_dir, out_dir, num_workers=1, tqdm=lambda x: x):
1012
'''Preprocesses the LJ Speech dataset from a given input path into a given output directory.

tests/test_deepvoice3.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -255,7 +255,7 @@ def test_incremental_forward():
255255
max_input_len = np.max(input_lengths) + 10 # manuall padding
256256
seqs = np.array([_pad(x, max_input_len) for x in seqs], dtype=np.int)
257257
input_lengths = torch.LongTensor(input_lengths)
258-
input_lengths = input_lengths.cuda() if use_cuda else input_lenghts
258+
input_lengths = input_lengths.cuda() if use_cuda else input_lengths
259259
else:
260260
input_lengths = None
261261

train.py

Lines changed: 11 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -728,30 +728,30 @@ def train(model, data_loader, optimizer, writer,
728728
# Update
729729
loss.backward()
730730
if clip_thresh > 0:
731-
grad_norm = torch.nn.utils.clip_grad_norm(
731+
grad_norm = torch.nn.utils.clip_grad_norm_(
732732
model.get_trainable_parameters(), clip_thresh)
733733
optimizer.step()
734734

735735
# Logs
736-
writer.add_scalar("loss", float(loss.data[0]), global_step)
736+
writer.add_scalar("loss", float(loss.item()), global_step)
737737
if train_seq2seq:
738-
writer.add_scalar("done_loss", float(done_loss.data[0]), global_step)
739-
writer.add_scalar("mel loss", float(mel_loss.data[0]), global_step)
740-
writer.add_scalar("mel_l1_loss", float(mel_l1_loss.data[0]), global_step)
741-
writer.add_scalar("mel_binary_div_loss", float(mel_binary_div.data[0]), global_step)
738+
writer.add_scalar("done_loss", float(done_loss.item()), global_step)
739+
writer.add_scalar("mel loss", float(mel_loss.item()), global_step)
740+
writer.add_scalar("mel_l1_loss", float(mel_l1_loss.item()), global_step)
741+
writer.add_scalar("mel_binary_div_loss", float(mel_binary_div.item()), global_step)
742742
if train_postnet:
743-
writer.add_scalar("linear_loss", float(linear_loss.data[0]), global_step)
744-
writer.add_scalar("linear_l1_loss", float(linear_l1_loss.data[0]), global_step)
743+
writer.add_scalar("linear_loss", float(linear_loss.item()), global_step)
744+
writer.add_scalar("linear_l1_loss", float(linear_l1_loss.item()), global_step)
745745
writer.add_scalar("linear_binary_div_loss", float(
746-
linear_binary_div.data[0]), global_step)
746+
linear_binary_div.item()), global_step)
747747
if train_seq2seq and hparams.use_guided_attention:
748-
writer.add_scalar("attn_loss", float(attn_loss.data[0]), global_step)
748+
writer.add_scalar("attn_loss", float(attn_loss.item()), global_step)
749749
if clip_thresh > 0:
750750
writer.add_scalar("gradient norm", grad_norm, global_step)
751751
writer.add_scalar("learning rate", current_lr, global_step)
752752

753753
global_step += 1
754-
running_loss += loss.data[0]
754+
running_loss += loss.item()
755755

756756
averaged_loss = running_loss / (len(data_loader))
757757
writer.add_scalar("loss (per epoch)", averaged_loss, global_epoch)

0 commit comments

Comments
 (0)