Changeset 14886


Ignore:
Timestamp:
04/24/17 16:26:43 (4 years ago)
Author:
mkommend
Message:

#2778: Refactored and corrected shuffling in DataPreprocessing.

Location:
trunk/sources/HeuristicLab.DataPreprocessing/3.4
Files:
3 edited

Legend:

Unmodified
Added
Removed
  • trunk/sources/HeuristicLab.DataPreprocessing/3.4/HeuristicLab.DataPreprocessing-3.4.csproj

    r13514 r14886  
    167167      <Private>False</Private>
    168168    </ProjectReference>
     169    <ProjectReference Include="..\..\HeuristicLab.Random\3.3\HeuristicLab.Random-3.3.csproj">
     170      <Project>{F4539FB6-4708-40C9-BE64-0A1390AEA197}</Project>
     171      <Name>HeuristicLab.Random-3.3</Name>
     172      <Private>False</Private>
     173    </ProjectReference>
    169174  </ItemGroup>
    170175  <Import Project="$(MSBuildToolsPath)\Microsoft.CSharp.targets" />
  • trunk/sources/HeuristicLab.DataPreprocessing/3.4/Logic/ManipulationLogic.cs

    r14185 r14886  
    2424using System.Linq;
    2525using HeuristicLab.Data;
     26using HeuristicLab.Random;
    2627
    2728namespace HeuristicLab.DataPreprocessing {
     
    8182    public void ReplaceIndicesByRandomValue(IDictionary<int, IList<int>> cells, bool considerSelection = false) {
    8283      preprocessingData.InTransaction(() => {
    83         Random r = new Random();
     84        System.Random r = new System.Random();
    8485
    8586        foreach (var column in cells) {
     
    226227
    227228    public void Shuffle(bool shuffleRangesSeparately) {
    228       Random random = new Random();
    229       var ranges = new[] { preprocessingData.TestPartition, preprocessingData.TrainingPartition };
     229      var random = new FastRandom();
     230
    230231      if (shuffleRangesSeparately) {
     232        var ranges = new[] { preprocessingData.TestPartition, preprocessingData.TrainingPartition };
    231233        preprocessingData.InTransaction(() => {
    232234          // process all given ranges - e.g. TrainingPartition, TestPartition
    233235          foreach (IntRange range in ranges) {
    234             List<Tuple<int, int>> shuffledIndices = new List<Tuple<int, int>>();
    235 
    236             // generate random indices used for shuffeling each column
    237             for (int i = range.End - 1; i >= range.Start; --i) {
    238               int rand = random.Next(range.Start, i);
    239               shuffledIndices.Add(new Tuple<int, int>(i, rand));
    240             }
    241 
    242             ShuffleToIndices(shuffledIndices);
     236            var indices = Enumerable.Range(0, preprocessingData.Rows).ToArray();
     237            var shuffledIndices = Enumerable.Range(range.Start, range.Size).Shuffle(random).ToArray();
     238            for (int i = range.Start, j = 0; i < range.End; i++, j++)
     239              indices[i] = shuffledIndices[j];
     240
     241            ReOrderToIndices(indices);
    243242          }
    244243        });
     244
    245245      } else {
    246246        preprocessingData.InTransaction(() => {
    247           var indices = ranges.SelectMany(x => Enumerable.Range(x.Start, x.Size)).ToList();
    248           var shuffledIndices = indices.OrderBy(x => random.Next());
    249           ShuffleToIndices(indices.Zip(shuffledIndices, (i, j) => new Tuple<int, int>(i, j)).ToList());
     247          var indices = Enumerable.Range(0, preprocessingData.Rows);
     248          var shuffledIndices = indices.Shuffle(random).ToArray();
     249          ReOrderToIndices(shuffledIndices);
    250250        });
    251251      }
    252252    }
    253253
    254     public void ReOrderToIndices(IEnumerable<int> indices) {
    255       List<Tuple<int, int>> indicesTuple = new List<Tuple<int, int>>();
    256 
    257       for (int i = 0; i < indices.Count(); ++i) {
    258         indicesTuple.Add(new Tuple<int, int>(i, indices.ElementAt(i)));
    259       }
    260 
    261       ReOrderToIndices(indicesTuple);
    262     }
    263 
    264     public void ReOrderToIndices(IList<System.Tuple<int, int>> indices) {
     254    public void ReOrderToIndices(int[] indices) {
    265255      preprocessingData.InTransaction(() => {
    266256        for (int i = 0; i < preprocessingData.Columns; ++i) {
    267257          if (preprocessingData.VariableHasType<double>(i)) {
    268             reOrderToIndices<double>(i, indices);
     258            ReOrderToIndices<double>(i, indices);
    269259          } else if (preprocessingData.VariableHasType<string>(i)) {
    270             reOrderToIndices<string>(i, indices);
     260            ReOrderToIndices<string>(i, indices);
    271261          } else if (preprocessingData.VariableHasType<DateTime>(i)) {
    272             reOrderToIndices<DateTime>(i, indices);
    273           }
    274         }
    275       });
    276     }
    277 
    278     public void ShuffleToIndices(IList<System.Tuple<int, int>> indices) {
    279       preprocessingData.InTransaction(() => {
    280         for (int i = 0; i < preprocessingData.Columns; ++i) {
    281           if (preprocessingData.VariableHasType<double>(i)) {
    282             ShuffleToIndices<double>(i, indices);
    283           } else if (preprocessingData.VariableHasType<string>(i)) {
    284             ShuffleToIndices<string>(i, indices);
    285           } else if (preprocessingData.VariableHasType<DateTime>(i)) {
    286             ShuffleToIndices<DateTime>(i, indices);
    287           }
    288         }
    289       });
    290     }
    291 
    292     private void reOrderToIndices<T>(int columnIndex, IList<Tuple<int, int>> indices) {
    293 
     262            ReOrderToIndices<DateTime>(i, indices);
     263          }
     264        }
     265      });
     266    }
     267
     268    private void ReOrderToIndices<T>(int columnIndex, int[] indices) {
    294269      List<T> originalData = new List<T>(preprocessingData.GetValues<T>(columnIndex));
    295 
    296       // process all columns equally
    297       foreach (Tuple<int, int> index in indices) {
    298         int originalIndex = index.Item1;
    299         int replaceIndex = index.Item2;
     270      if (indices.Length != originalData.Count) throw new InvalidOperationException("The number of provided indices does not match the values.");
     271
     272      for (int i = 0; i < indices.Length; i++) {
     273        int originalIndex = i;
     274        int replaceIndex = indices[i];
    300275
    301276        T replaceValue = originalData.ElementAt<T>(replaceIndex);
    302277        preprocessingData.SetCell<T>(columnIndex, originalIndex, replaceValue);
    303       }
    304     }
    305 
    306     private void ShuffleToIndices<T>(int columnIndex, IList<Tuple<int, int>> indices) {
    307       // process all columns equally
    308       foreach (Tuple<int, int> index in indices) {
    309         int originalIndex = index.Item1;
    310         int replaceIndex = index.Item2;
    311 
    312         T tmp = preprocessingData.GetCell<T>(columnIndex, originalIndex);
    313         T replaceValue = preprocessingData.GetCell<T>(columnIndex, replaceIndex);
    314 
    315         preprocessingData.SetCell<T>(columnIndex, originalIndex, replaceValue);
    316         preprocessingData.SetCell<T>(columnIndex, replaceIndex, tmp);
    317278      }
    318279    }
  • trunk/sources/HeuristicLab.DataPreprocessing/3.4/Plugin.cs.frame

    r14195 r14886  
    3434  [PluginDependency("HeuristicLab.Collections", "3.3")]
    3535  [PluginDependency("HeuristicLab.Data","3.3")]
    36   [PluginDependency("HeuristicLab.Optimization", "3.3")]
     36  [PluginDependency("HeuristicLab.Optimization", "3.3")] 
    3737  [PluginDependency("HeuristicLab.Persistence", "3.3")]
    3838  [PluginDependency("HeuristicLab.Problems.DataAnalysis","3.4")]
     39  [PluginDependency("HeuristicLab.Random", "3.3")]
    3940  public class HeuristicLabDataPreprocessingPlugin : PluginBase {
    4041  }
Note: See TracChangeset for help on using the changeset viewer.