Changeset 3946


Ignore:
Timestamp:
06/24/10 15:35:56 (9 years ago)
Author:
epitzer
Message:

Revise CompactNumberArray2StringSerializer: split text into several smaller strings to mitigate memory problems for very large arrays (#1138)

Location:
trunk/sources/HeuristicLab.Persistence/3.3
Files:
2 edited

Legend:

Unmodified
Added
Removed
  • trunk/sources/HeuristicLab.Persistence/3.3/Default/CompositeSerializers/CompactNumberArray2StringSerializer.cs

    r3937 r3946  
    3434
    3535  [StorableClass]
    36   internal sealed class CompactNumberArray2StringSerializer : ICompositeSerializer {
     36  public sealed class CompactNumberArray2StringSerializer : ICompositeSerializer {
     37
     38    public const int SPLIT_THRESHOLD = 1024 * 1024;
    3739
    3840    public int Priority {
     
    6062      int[] lengths = new int[a.Rank];
    6163      int[] lowerBounds = new int[a.Rank];
    62       StringBuilder sb = new StringBuilder(a.Rank * 3);
     64      StringBuilder sb = new StringBuilder(a.Rank * 6);
    6365      sb.Append(a.Rank).Append(';');
    64       int capacity = 1;
    6566      for (int i = 0; i < a.Rank; i++) {
    6667        sb.Append(a.GetLength(i)).Append(';');
    6768        lengths[i] = a.GetLength(i);
    68         capacity *= lengths[i];
    6969      }
    70       sb.EnsureCapacity(capacity * 3);
    7170      for (int i = 0; i < a.Rank; i++) {
    7271        sb.Append(a.GetLowerBound(i)).Append(';');
    7372        lowerBounds[i] = a.GetLowerBound(i);
    7473      }
     74      yield return new Tag(sb.ToString());
     75      int nElements = 1;
     76      for (int i = 0; i < a.Rank; i++) {
     77        lowerBounds[i] = a.GetLowerBound(i);
     78        lengths[i] = a.GetLength(i);
     79        nElements *= lengths[i];
     80      }
     81      sb = new StringBuilder(Math.Min(nElements * 3, SPLIT_THRESHOLD));
    7582      int[] positions = (int[])lowerBounds.Clone();
    7683      while (positions[a.Rank - 1] < lengths[a.Rank - 1] + lowerBounds[a.Rank - 1]) {
    7784        sb.Append(numberConverter.Format(a.GetValue(positions))).Append(';');
     85        if (sb.Length > SPLIT_THRESHOLD && sb.Length > sb.Capacity - 18) {
     86          yield return new Tag(sb.ToString());
     87          sb = new StringBuilder(Math.Min(nElements * 3, SPLIT_THRESHOLD));
     88        }
    7889        positions[0] += 1;
    7990        for (int i = 0; i < a.Rank - 1; i++) {
     
    8697        }
    8798      }
    88       yield return new Tag("compact array", sb.ToString());
     99      if (sb.Length > 0)
     100        yield return new Tag(sb.ToString());
    89101    }
    90102
     103    private static Tag[] emptyTag = new Tag[0];
    91104    public IEnumerable<Tag> Decompose(object obj) {
    92       return new Tag[] { };
     105      return emptyTag;
    93106    }
    94107
     
    99112        var valueIter = ((string)tagIter.Current.Value).GetSplitEnumerator(';');
    100113        valueIter.MoveNext();
    101         int rank = int.Parse((string)valueIter.Current);
     114        int rank = int.Parse(valueIter.Current);
    102115        int[] lengths = new int[rank];
    103116        int[] lowerBounds = new int[rank];
    104117        for (int i = 0; i < rank; i++) {
    105118          valueIter.MoveNext();
    106           lengths[i] = int.Parse((string)valueIter.Current);
     119          lengths[i] = int.Parse(valueIter.Current);
    107120        }
    108121        for (int i = 0; i < rank; i++) {
    109122          valueIter.MoveNext();
    110           lowerBounds[i] = int.Parse((string)valueIter.Current);
     123          lowerBounds[i] = int.Parse(valueIter.Current);
    111124        }
    112125        Type elementType = type.GetElementType();
    113126        Array a = Array.CreateInstance(elementType, lengths, lowerBounds);
     127        if (a == null) throw new PersistenceException("invalid instance data type, expected array");
    114128        int[] positions = (int[])lowerBounds.Clone();
    115         while (valueIter.MoveNext()) {
    116           a.SetValue(
    117             numberConverter.Parse((string)valueIter.Current, elementType),
    118             positions);
    119           positions[0] += 1;
    120           for (int i = 0; i < rank - 1; i++) {
    121             if (positions[i] >= lengths[i] + lowerBounds[i]) {
    122               positions[i + 1] += 1;
    123               positions[i] = lowerBounds[i];
    124             } else {
    125               break;
     129        while (tagIter.MoveNext()) {
     130          valueIter = ((string)tagIter.Current.Value).GetSplitEnumerator(';');
     131          while (valueIter.MoveNext()) {
     132            a.SetValue(numberConverter.Parse(valueIter.Current, elementType), positions);
     133            positions[0] += 1;
     134            for (int i = 0; i < a.Rank - 1; i++) {
     135              if (positions[i] >= lengths[i] + lowerBounds[i]) {
     136                positions[i + 1] += 1;
     137                positions[i] = lowerBounds[i];
     138              } else {
     139                break;
     140              }
    126141            }
    127142          }
     
    129144        return a;
    130145      } catch (InvalidOperationException e) {
    131         throw new PersistenceException("Insufficient data to deserialize compact array", e);
     146        throw new PersistenceException("Insuffictient data to deserialize compact array", e);
    132147      } catch (InvalidCastException e) {
    133148        throw new PersistenceException("Invalid element data during compact array deserialization", e);
     
    136151
    137152    public void Populate(object instance, IEnumerable<Tag> tags, Type type) {
    138       // Nothing to do: Compact arrays are already populated during instance creation.
     153      // Nothing to do. Arrays are populated during instance creation;
    139154    }
    140155
  • trunk/sources/HeuristicLab.Persistence/3.3/Tests/UseCases.cs

    r3937 r3946  
    10751075    }
    10761076
     1077    [TestMethod]
     1078    public void TestCompactNumberArraySerializer() {
     1079      Random r = new Random();
     1080      double[] a = new double[CompactNumberArray2StringSerializer.SPLIT_THRESHOLD * 2 + 1];
     1081      for (int i = 0; i < a.Length; i++)
     1082        a[i] = r.Next(10);
     1083      var config = ConfigurationService.Instance.GetDefaultConfig(new XmlFormat());
     1084      config = new Configuration(config.Format,
     1085        config.PrimitiveSerializers.Where(s => s.SourceType != typeof(double[])),
     1086        config.CompositeSerializers);
     1087      XmlGenerator.Serialize(a, tempFile, config);
     1088      double[] newA = XmlParser.Deserialize<double[]>(tempFile);
     1089      Assert.AreEqual(a.Length, newA.Length);
     1090      for (int i = 0; i < a.Rank; i++) {
     1091        Assert.AreEqual(a.GetLength(i), newA.GetLength(i));
     1092        Assert.AreEqual(a.GetLowerBound(i), newA.GetLowerBound(i));
     1093      }
     1094      for (int i = 0; i < a.Length; i++) {
     1095        Assert.AreEqual(a[i], newA[i]);
     1096      }
     1097    }
    10771098
    10781099    [ClassInitialize]
Note: See TracChangeset for help on using the changeset viewer.