Refactor: Format Code with Ruff and Update Deprecated G2PW Link (#2255)
* ruff check --fix * ruff format --line-length 120 --target-version py39 * Change the link for G2PW Model * update pytorch version and colab
This commit is contained in:
@@ -7,6 +7,7 @@ from module import commons
|
||||
|
||||
from typing import Optional
|
||||
|
||||
|
||||
class LayerNorm(nn.Module):
|
||||
def __init__(self, channels, eps=1e-5):
|
||||
super().__init__()
|
||||
@@ -43,7 +44,7 @@ class Encoder(nn.Module):
|
||||
p_dropout=0.0,
|
||||
window_size=4,
|
||||
isflow=True,
|
||||
**kwargs
|
||||
**kwargs,
|
||||
):
|
||||
super().__init__()
|
||||
self.hidden_channels = hidden_channels
|
||||
@@ -65,13 +66,9 @@ class Encoder(nn.Module):
|
||||
if self.gin_channels != 0:
|
||||
self.spk_emb_linear = nn.Linear(self.gin_channels, self.hidden_channels)
|
||||
# vits2 says 3rd block, so idx is 2 by default
|
||||
self.cond_layer_idx = (
|
||||
kwargs["cond_layer_idx"] if "cond_layer_idx" in kwargs else 2
|
||||
)
|
||||
self.cond_layer_idx = kwargs["cond_layer_idx"] if "cond_layer_idx" in kwargs else 2
|
||||
logging.debug(self.gin_channels, self.cond_layer_idx)
|
||||
assert (
|
||||
self.cond_layer_idx < self.n_layers
|
||||
), "cond_layer_idx should be less than n_layers"
|
||||
assert self.cond_layer_idx < self.n_layers, "cond_layer_idx should be less than n_layers"
|
||||
self.drop = nn.Dropout(p_dropout)
|
||||
self.attn_layers = nn.ModuleList()
|
||||
self.norm_layers_1 = nn.ModuleList()
|
||||
@@ -117,11 +114,13 @@ class Encoder(nn.Module):
|
||||
# x = self.norm_layers_2[i](x + y)
|
||||
# x = x * x_mask
|
||||
# return x
|
||||
|
||||
|
||||
def forward(self, x, x_mask):
|
||||
attn_mask = x_mask.unsqueeze(2) * x_mask.unsqueeze(-1)
|
||||
x = x * x_mask
|
||||
for attn_layers,norm_layers_1,ffn_layers,norm_layers_2 in zip(self.attn_layers,self.norm_layers_1,self.ffn_layers,self.norm_layers_2):
|
||||
for attn_layers, norm_layers_1, ffn_layers, norm_layers_2 in zip(
|
||||
self.attn_layers, self.norm_layers_1, self.ffn_layers, self.norm_layers_2
|
||||
):
|
||||
y = attn_layers(x, x, attn_mask)
|
||||
y = self.drop(y)
|
||||
x = norm_layers_1(x + y)
|
||||
@@ -170,14 +169,8 @@ class MultiHeadAttention(nn.Module):
|
||||
if window_size is not None:
|
||||
n_heads_rel = 1 if heads_share else n_heads
|
||||
rel_stddev = self.k_channels**-0.5
|
||||
self.emb_rel_k = nn.Parameter(
|
||||
torch.randn(n_heads_rel, window_size * 2 + 1, self.k_channels)
|
||||
* rel_stddev
|
||||
)
|
||||
self.emb_rel_v = nn.Parameter(
|
||||
torch.randn(n_heads_rel, window_size * 2 + 1, self.k_channels)
|
||||
* rel_stddev
|
||||
)
|
||||
self.emb_rel_k = nn.Parameter(torch.randn(n_heads_rel, window_size * 2 + 1, self.k_channels) * rel_stddev)
|
||||
self.emb_rel_v = nn.Parameter(torch.randn(n_heads_rel, window_size * 2 + 1, self.k_channels) * rel_stddev)
|
||||
|
||||
nn.init.xavier_uniform_(self.conv_q.weight)
|
||||
nn.init.xavier_uniform_(self.conv_k.weight)
|
||||
@@ -187,7 +180,7 @@ class MultiHeadAttention(nn.Module):
|
||||
self.conv_k.weight.copy_(self.conv_q.weight)
|
||||
self.conv_k.bias.copy_(self.conv_q.bias)
|
||||
|
||||
def forward(self, x, c, attn_mask:Optional[torch.Tensor]=None):
|
||||
def forward(self, x, c, attn_mask: Optional[torch.Tensor] = None):
|
||||
q = self.conv_q(x)
|
||||
k = self.conv_k(c)
|
||||
v = self.conv_v(c)
|
||||
@@ -198,7 +191,7 @@ class MultiHeadAttention(nn.Module):
|
||||
x = self.conv_o(x)
|
||||
return x
|
||||
|
||||
def attention(self, query, key, value, mask:Optional[torch.Tensor]=None):
|
||||
def attention(self, query, key, value, mask: Optional[torch.Tensor] = None):
|
||||
# reshape [b, d, t] -> [b, n_h, t, d_k]
|
||||
b, d, t_s, _ = (*key.size(), query.size(2))
|
||||
query = query.view(b, self.n_heads, self.k_channels, -1).transpose(2, 3)
|
||||
@@ -223,8 +216,8 @@ class MultiHeadAttention(nn.Module):
|
||||
relative_weights = self._absolute_position_to_relative_position(p_attn)
|
||||
value_relative_embeddings = self._get_relative_embeddings(self.emb_rel_v, t_s)
|
||||
output = output + self._matmul_with_relative_values(relative_weights, value_relative_embeddings)
|
||||
|
||||
output = (output.transpose(2, 3).contiguous().view(b, d, -1))
|
||||
|
||||
output = output.transpose(2, 3).contiguous().view(b, d, -1)
|
||||
return output, p_attn
|
||||
|
||||
def _matmul_with_relative_values(self, x, y):
|
||||
@@ -248,19 +241,17 @@ class MultiHeadAttention(nn.Module):
|
||||
def _get_relative_embeddings(self, relative_embeddings, length):
|
||||
max_relative_position = 2 * self.window_size + 1
|
||||
# Pad first before slice to avoid using cond ops.
|
||||
pad_l = torch.zeros((1), dtype = torch.int64) + length - (self.window_size + 1)
|
||||
pad_s = torch.zeros((1), dtype = torch.int64) + (self.window_size + 1) - length
|
||||
pad_length = torch.max(pad_l, other=torch.zeros((1), dtype = torch.int64))
|
||||
slice_start_position = torch.max(pad_s, other=torch.zeros((1), dtype = torch.int64))
|
||||
pad_l = torch.zeros((1), dtype=torch.int64) + length - (self.window_size + 1)
|
||||
pad_s = torch.zeros((1), dtype=torch.int64) + (self.window_size + 1) - length
|
||||
pad_length = torch.max(pad_l, other=torch.zeros((1), dtype=torch.int64))
|
||||
slice_start_position = torch.max(pad_s, other=torch.zeros((1), dtype=torch.int64))
|
||||
|
||||
slice_end_position = slice_start_position + 2 * length - 1
|
||||
padded_relative_embeddings = F.pad(
|
||||
relative_embeddings,
|
||||
commons.convert_pad_shape([[0, 0], [pad_length, pad_length], [0, 0]]),
|
||||
)
|
||||
used_relative_embeddings = padded_relative_embeddings[
|
||||
:, slice_start_position:slice_end_position
|
||||
]
|
||||
used_relative_embeddings = padded_relative_embeddings[:, slice_start_position:slice_end_position]
|
||||
return used_relative_embeddings
|
||||
|
||||
def _relative_position_to_absolute_position(self, x):
|
||||
@@ -274,14 +265,10 @@ class MultiHeadAttention(nn.Module):
|
||||
|
||||
# Concat extra elements so to add up to shape (len+1, 2*len-1).
|
||||
x_flat = x.view([batch, heads, length * 2 * length])
|
||||
x_flat = F.pad(
|
||||
x_flat, commons.convert_pad_shape([[0, 0], [0, 0], [0, length - 1]])
|
||||
)
|
||||
x_flat = F.pad(x_flat, commons.convert_pad_shape([[0, 0], [0, 0], [0, length - 1]]))
|
||||
|
||||
# Reshape and slice out the padded elements.
|
||||
x_final = x_flat.view([batch, heads, length + 1, 2 * length - 1])[
|
||||
:, :, :length, length - 1 :
|
||||
]
|
||||
x_final = x_flat.view([batch, heads, length + 1, 2 * length - 1])[:, :, :length, length - 1 :]
|
||||
return x_final
|
||||
|
||||
def _absolute_position_to_relative_position(self, x):
|
||||
@@ -291,9 +278,7 @@ class MultiHeadAttention(nn.Module):
|
||||
"""
|
||||
batch, heads, length, _ = x.size()
|
||||
# padd along column
|
||||
x = F.pad(
|
||||
x, commons.convert_pad_shape([[0, 0], [0, 0], [0, 0], [0, length - 1]])
|
||||
)
|
||||
x = F.pad(x, commons.convert_pad_shape([[0, 0], [0, 0], [0, 0], [0, length - 1]]))
|
||||
x_flat = x.view([batch, heads, length**2 + length * (length - 1)])
|
||||
# add 0's in the beginning that will skew the elements after reshape
|
||||
x_flat = F.pad(x_flat, commons.convert_pad_shape([[0, 0], [0, 0], [length, 0]]))
|
||||
@@ -351,7 +336,7 @@ class FFN(nn.Module):
|
||||
x = self.drop(x)
|
||||
x = self.conv_2(self.padding(x * x_mask))
|
||||
return x * x_mask
|
||||
|
||||
|
||||
def padding(self, x):
|
||||
return self._same_padding(x)
|
||||
|
||||
@@ -395,12 +380,6 @@ class MRTE(nn.Module):
|
||||
|
||||
ssl_enc = self.c_pre(ssl_enc * ssl_mask)
|
||||
text_enc = self.text_pre(text * text_mask)
|
||||
x = (
|
||||
self.cross_attention(
|
||||
ssl_enc * ssl_mask, text_enc * text_mask, attn_mask
|
||||
)
|
||||
+ ssl_enc
|
||||
+ ge
|
||||
)
|
||||
x = self.cross_attention(ssl_enc * ssl_mask, text_enc * text_mask, attn_mask) + ssl_enc + ge
|
||||
x = self.c_post(x * ssl_mask)
|
||||
return x
|
||||
|
||||
Reference in New Issue
Block a user