diff --git a/nlp/language_model/bert/pytorch/README.md b/nlp/language_model/bert/pytorch/README.md
index 050689b3fb7aef8dda3ef65c6cfc1bd7c04efa0c..bec27fdfdf000bd62b4d2428ef7b63426e0c6419 100644
--- a/nlp/language_model/bert/pytorch/README.md
+++ b/nlp/language_model/bert/pytorch/README.md
@@ -25,7 +25,11 @@ Reference: [training_results_v1.0](https://github.com/mlcommons/training_results
 ### Install Dependencies
 
 ```shell
-bash init.sh
+apt install -y git numactl
+pip install h5py
+pip install psutil
+pip install mlperf-logging
+pip install boto3
 ```
 
 ## Model Training
diff --git a/nlp/language_model/bert/pytorch/optim/distributed_fused_lamb.py b/nlp/language_model/bert/pytorch/optim/distributed_fused_lamb.py
index 150d9e3d0e7035b70dd284953ac239bbb7696b67..0b72aeb76a139da15e8627ab3f6a77ec30093b30 100644
--- a/nlp/language_model/bert/pytorch/optim/distributed_fused_lamb.py
+++ b/nlp/language_model/bert/pytorch/optim/distributed_fused_lamb.py
@@ -43,7 +43,7 @@ def _pipeline_block_reductions_patched(self, block_id):
         rs_stream.wait_stream(torch.cuda.current_stream())
         rs_stream.wait_stream(self._l2_grad_norm_st)
         with torch.cuda.stream(rs_stream):
-            works[chunk_id] = torch.distributed.reduce_scatter(self._fp16_g_chunks[block_id][chunk_id],self._flat_grads_shards[block_id][chunk_id],group=self._rs_pg[glob_chunk_id%self._num_rs_pg],async_op=True,no_copy=True)
+            works[chunk_id] = torch.distributed.reduce_scatter(self._fp16_g_chunks[block_id][chunk_id],self._flat_grads_shards[block_id][chunk_id],group=self._rs_pg[glob_chunk_id%self._num_rs_pg],async_op=True)
 
     # Reduction across nodes for each rank
     if self._num_groups > 1:
@@ -118,6 +118,6 @@ def _pipeline_step_patched(self):
                 self._contrib_weight_decay,
                 global_grad_norm,
                 self._use_nvlamb)
-        torch.distributed.all_gather(self._new_params_mega_shards, self._fp16_p, group=self._ag_pg[0], no_copy=True)
+        torch.distributed.all_gather(self._new_params_mega_shards, self._fp16_p, group=self._ag_pg[0])