@@ -494,37 +494,144 @@ public TFTensor (long [] data) : base (SetupTensor (TFDataType.Int64, data, size
494494 /// <param name="data">Data.</param>
495495 public TFTensor ( Complex [ ] data ) : base ( SetupTensor ( TFDataType . Complex128 , data , size : 16 ) ) { }
496496
497- /// <summary>
498- /// Creates a single-dimension tensor from a byte buffer. This is different than creating a tensor from a byte array that produces a tensor with as many elements as the byte array.
497+ /// <summary>
498+ /// Creates a single-dimension tensor from a byte buffer. This is different than creating a tensor from a byte array that produces a tensor with as many elements as the byte array.
499+ /// </summary>
500+ public unsafe static TFTensor CreateString ( byte [ ] buffer )
501+ {
502+ if ( buffer == null )
503+ throw new ArgumentNullException ( nameof ( buffer ) ) ;
504+ //
505+ // TF_STRING tensors are encoded with a table of 8-byte offsets followed by
506+ // TF_StringEncode-encoded bytes.
507+ //
508+ var size = TFString . TF_StringEncodedSize ( ( UIntPtr ) buffer . Length ) ;
509+ IntPtr handle = TF_AllocateTensor ( TFDataType . String , IntPtr . Zero , 0 , ( UIntPtr ) ( ( ulong ) size + 8 ) ) ;
510+
511+ // Clear offset table
512+ IntPtr dst = TF_TensorData ( handle ) ;
513+ Marshal . WriteInt64 ( dst , 0 ) ;
514+ var status = TFStatus . TF_NewStatus ( ) ;
515+ fixed ( byte * src = & buffer [ 0 ] )
516+ {
517+ TFString . TF_StringEncode ( src , ( UIntPtr ) buffer . Length , ( byte * ) ( dst + 8 ) , size , status ) ;
518+ var ok = TFStatus . TF_GetCode ( status ) == TFCode . Ok ;
519+ TFStatus . TF_DeleteStatus ( status ) ;
520+ if ( ! ok )
521+ return null ;
522+ }
523+ return new TFTensor ( handle ) ;
524+ }
525+
526+ /// <summary>
527+ /// Converts a single-dimension tensor into a byte buffer. The byte array can be further decoded into strings using appropriate encoding scheme e.g. "UTF8"
499528 /// </summary>
500- public unsafe static TFTensor CreateString ( byte [ ] buffer )
501- {
502- if ( buffer == null )
503- throw new ArgumentNullException ( nameof ( buffer ) ) ;
504- //
505- // TF_STRING tensors are encoded with a table of 8-byte offsets followed by
506- // TF_StringEncode-encoded bytes.
507- //
508- var size = TFString . TF_StringEncodedSize ( ( UIntPtr ) buffer . Length ) ;
509- IntPtr handle = TF_AllocateTensor ( TFDataType . String , IntPtr . Zero , 0 , ( UIntPtr ) ( ( ulong ) size + 8 ) ) ;
510-
511- // Clear offset table
512- IntPtr dst = TF_TensorData ( handle ) ;
513- Marshal . WriteInt64 ( dst , 0 ) ;
514- var status = TFStatus . TF_NewStatus ( ) ;
515- fixed ( byte * src = & buffer [ 0 ] )
516- {
517- TFString . TF_StringEncode ( src , ( UIntPtr ) buffer . Length , ( sbyte * ) ( dst + 8 ) , size , status ) ;
518- var ok = TFStatus . TF_GetCode ( status ) == TFCode . Ok ;
519- TFStatus . TF_DeleteStatus ( status ) ;
520- if ( ! ok )
521- return null ;
522- }
523- return new TFTensor ( handle ) ;
524- }
525-
526- // Convenience function to factor out the setup of a new tensor from an array
527- static IntPtr SetupTensor ( TFDataType dt , long [ ] dims , Array data , int size )
529+ public static unsafe byte [ ] DecodeString ( TFTensor tensor )
530+ {
531+ if ( tensor == null )
532+ throw new ArgumentNullException ( nameof ( tensor ) ) ;
533+ //
534+ // TF_STRING tensors are encoded with a table of 8-byte offsets followed by TF_StringEncode-encoded bytes.
535+ // [offset1, offset2,...,offsetn, s1size, s1bytes, s2size, s2bytes,...,snsize,snbytes]
536+ //
537+ var src = TF_TensorData ( tensor . handle ) ;
538+ using ( var status = new TFStatus ( ) )
539+ {
540+ IntPtr dst = IntPtr . Zero ;
541+ UIntPtr dst_len = UIntPtr . Zero ;
542+ TFString . TF_StringDecode ( ( byte * ) ( src + 8 ) , tensor . TensorByteSize - 8 , ( byte * * ) & dst , & dst_len , status . handle ) ;
543+ var ok = status . StatusCode == TFCode . Ok ;
544+ if ( ! ok )
545+ return null ;
546+ var buffer = new byte [ ( int ) dst_len ] ;
547+ Marshal . Copy ( dst , buffer , 0 , buffer . Length ) ;
548+ return buffer ;
549+ }
550+ }
551+
552+ /// <summary>
553+ /// Creates a multi-dimension tensor from an array of byte buffer. The bytes for string[i] are represented as buffer[i][:].
554+ /// </summary>
555+ public static unsafe TFTensor CreateString ( byte [ ] [ ] buffer , TFShape shape )
556+ {
557+ if ( buffer == null )
558+ throw new ArgumentNullException ( nameof ( buffer ) ) ;
559+ //
560+ // TF_STRING tensors are encoded with a table of 8-byte offsets followed by TF_StringEncode-encoded bytes.
561+ // [offset1, offset2,...,offsetn, s1size, s1bytes, s2size, s2bytes,...,snsize,snbytes]
562+ //
563+ int size = 0 ;
564+ foreach ( var b in buffer )
565+ {
566+ size += ( int ) TFString . TF_StringEncodedSize ( ( UIntPtr ) b . Length ) ;
567+ }
568+ int totalSize = size + buffer . Length * 8 ;
569+ ulong offset = 0 ;
570+ IntPtr handle = TF_AllocateTensor ( TFDataType . String , shape . dims , shape . dims . Length , ( UIntPtr ) totalSize ) ;
571+
572+ // Clear offset table
573+ IntPtr pOffset = TF_TensorData ( handle ) ;
574+ IntPtr dst = pOffset + buffer . Length * 8 ;
575+ IntPtr dstLimit = pOffset + totalSize ;
576+ for ( int i = 0 ; i < buffer . Length ; i ++ )
577+ {
578+ Marshal . WriteInt64 ( pOffset , ( long ) offset ) ;
579+ using ( var status = new TFStatus ( ) )
580+ {
581+ fixed ( byte * src = & buffer [ i ] [ 0 ] )
582+ {
583+ var written = TFString . TF_StringEncode ( src , ( UIntPtr ) buffer [ i ] . Length , ( byte * ) dst , ( size_t ) ( dstLimit . ToInt64 ( ) - dst . ToInt64 ( ) ) , status . handle ) ;
584+ var ok = status . StatusCode == TFCode . Ok ;
585+ if ( ! ok )
586+ return null ;
587+ pOffset += 8 ;
588+ dst += ( int ) written ;
589+ offset += written . ToUInt64 ( ) ;
590+ }
591+ }
592+ }
593+ return new TFTensor ( handle ) ;
594+ }
595+
596+ /// <summary>
597+ /// Converts a multi-dimension tensor into a byte buffer array. The byte array can be further decoded into strings using appropriate encoding scheme e.g. "UTF8"
598+ /// </summary>
599+ public static unsafe byte [ ] [ ] DecodeMultiDimensionString ( TFTensor tensor )
600+ {
601+ if ( tensor == null )
602+ throw new ArgumentNullException ( nameof ( tensor ) ) ;
603+ //
604+ // TF_STRING tensors are encoded with a table of 8-byte offsets followed by TF_StringEncode-encoded bytes.
605+ // [offset1, offset2,...,offsetn, s1size, s1bytes, s2size, s2bytes,...,snsize,snbytes]
606+ //
607+ long size = 1 ;
608+ foreach ( var s in tensor . Shape )
609+ size *= s ;
610+
611+ var buffer = new byte [ size ] [ ] ;
612+ var src = TF_TensorData ( tensor . handle ) ;
613+ var srcLen = ( IntPtr ) ( src . ToInt64 ( ) + ( long ) tensor . TensorByteSize ) ;
614+ src += ( int ) ( size * 8 ) ;
615+ for ( int i = 0 ; i < buffer . Length ; i ++ )
616+ {
617+ using ( var status = new TFStatus ( ) )
618+ {
619+ IntPtr dst = IntPtr . Zero ;
620+ UIntPtr dstLen = UIntPtr . Zero ;
621+ var read = TFString . TF_StringDecode ( ( byte * ) src , ( size_t ) ( srcLen . ToInt64 ( ) - src . ToInt64 ( ) ) , ( byte * * ) & dst , & dstLen , status . handle ) ;
622+ var ok = status . StatusCode == TFCode . Ok ;
623+ if ( ! ok )
624+ return null ;
625+ buffer [ i ] = new byte [ ( int ) dstLen ] ;
626+ Marshal . Copy ( dst , buffer [ i ] , 0 , buffer [ i ] . Length ) ;
627+ src += ( int ) read ;
628+ }
629+ }
630+ return buffer ;
631+ }
632+
633+ // Convenience function to factor out the setup of a new tensor from an array
634+ static IntPtr SetupTensor ( TFDataType dt , long [ ] dims , Array data , int size )
528635 {
529636 return SetupTensor ( dt , dims , data , start : 0 , count : data . Length , size : size ) ;
530637 }
0 commit comments