'***************************************************************************************************
'             VB sample for the usage of TopoART (classes TopoART and Fast_TopoART) and            *
'             Hypersphere TopoART (class Hypersphere_TopoART)                                      *
'***************************************************************************************************
'                           Created by Marko Tscherepanow, 24 October 2019                         *
'***************************************************************************************************

' Compile and run from the console: dotnet run --project TopoART_sample3.vbproj

Imports System
Imports System.Globalization
Imports System.IO
Imports System.Reflection
Imports System.Text.RegularExpressions
Imports LibTopoART

Namespace LibTopoART_Samples

	''' <summary>
	''' Clustering sample using very noisy synthetic two-dimensional data. [VB]
	''' <para>Train TopoART or Hypersphere TopoART with a two-dimensional dataset similar to the one used in "Marko 
	''' Tscherepanow and Sören Riechers (2012). An Incremental On-line Classifier for Imbalanced, Incomplete, and Noisy 
	''' Data. In Proceedings of the European Conference on Artificial Intelligence (ECAI), Workshop on Active and 
	''' Incremental Learning (AIL), pp. 18-23. Montpellier, France." The dataset applied here comprises 1,000,000 samples 
	''' equally allotted to six clusters (each containing one sixth of the samples). Additionally, 1,000,000 uniformly
	''' distributed random samples are added. Finally, all samples are randomly shuffled.</para>
	''' <para>Due to the randomness involved, the results differ between different runs of this program. However, they  
	''' are qualitatively comparable: The first module creates a coarse clustering of the data while the second module 
	''' refines it to the six clusters of the undisturbed portion of the dataset.</para>
	''' <para>These results show the abilities of TopoART to cope with a high amount of noise data and produce 
	''' stable results independent of the sample order.</para>
	''' <para>The resulting neural network can be visualised using the R script <c>ShowTopoARTResults.R</c> or the 
	''' R script <c>ShowHypersphereTopoARTResults.R</c>, respectively. Both R scripts are provided in the 
	''' subfolder <c>R</c>.</para>
	''' </summary>
	Module TopoART_sample3

		Private Enum Selector
			USE_TopoART
			USE_Fast_TopoART
			USE_Hypersphere_TopoART
		End Enum

		' Choose TopoART implementation (see enum Selector above)
		Const Sel = Selector.USE_Fast_TopoART

		' Dataset (1,000,000 cluster samples without noise samples)
		Const Dataset 		=	"../../../../../data/AIL12-like_dataset_large_and_noise_free.txt"	
		' Destination directory for trained networks
		Const NetworkPath	=	"../../../../../results/networks/"

		' Number of samples in the dataset
		Const DatasetSampleNumber	=	1000000

		' Number of noise samples to be added
		Const NoiseSampleNumber		=	1000000

		Const SampleNumber			=	DatasetSampleNumber + NoiseSampleNumber
		Const InputDimension		=	2

		' Common network parameters
		Const ModuleNumber	=	2
		Const rho_a			=	0.92D
		Const beta_sbm		=	0.65D
		Const tau			=	2000

		Sub Main(args As String())
			Dim ta As ITopoART = Nothing
			Dim filePrefix As String = Nothing 
			Dim fileSuffix As String = Nothing
			Dim inputs(SampleNumber)() As Decimal
			Dim startTime, stopTime As DateTime
			Dim time As TimeSpan
			Dim rnd As New Random

			' Set working directory to assembly directory
			Directory.SetCurrentDirectory(Path.GetDirectoryName(New Uri(Assembly.GetEntryAssembly().Location).LocalPath))

			Console.WriteLine("Prepare training data")

			' Load dataset
			Using datasetFile As New StreamReader(File.OpenRead(Dataset))
				For i As Integer = 0 To DatasetSampleNumber - 1
					Dim numbers() As String = Regex.Split(DatasetFile.ReadLine(), "\s+")
					Dim input(InputDimension) As Decimal
					input(0) = Decimal.Parse(numbers(1), NumberStyles.Float, CultureInfo.InvariantCulture)
					input(1) = Decimal.Parse(numbers(2), NumberStyles.Float, CultureInfo.InvariantCulture)
					inputs(i) = input
				Next
			End Using

			' Add noise samples
			For i As Integer = 0 To NoiseSampleNumber - 1
				Dim input(InputDimension) As Decimal 
				input(0) = rnd.NextDouble()
				input(1) = rnd.NextDouble()
				inputs(i + DatasetSampleNumber) = input
			Next

			' Shuffle data
			For i As Integer = 0 To SampleNumber - 1
				Dim newIndex As Integer = Math.Round(rnd.NextDouble() * (SampleNumber - 1))
				Dim input() As Decimal = inputs(i)
				inputs(i) = inputs(newIndex)
				inputs(newIndex) = input
			Next

			' Start time measuring
			startTime = Now

			' Initialise a TopoART network with appropriate parameter values
			Select Sel 
				Case Selector.USE_TopoART
						ta = New TopoART(InputDimension, ModuleNumber, rho_a)
						FilePrefix 	=	"TopoART"
						FileSuffix	=	"ta"
				Case Selector.USE_Fast_TopoART
						ta = New Fast_TopoART(InputDimension, ModuleNumber, rho_a)
						FilePrefix 	=	"Fast_TopoART"
						FileSuffix	=	"fta"
				Case Selector.USE_Hypersphere_TopoART
						ta = New Hypersphere_TopoART(InputDimension, ModuleNumber, rho_a)
						FilePrefix 	=	"Hypersphere_TopoART"
						FileSuffix	=	"hta"
			End Select

			' Set remaining common parameters
			ta.Beta_sbm	=	beta_sbm
			ta.Tau		=	tau
			ta.Phis		=	New Long() { 28, 15 }

			' Train network
			For i As Long = 0 To SampleNumber - 1
				ta.Learn(inputs(i))
				If (i Mod 40000) = 0 
					Console.Write(".")
				End If
			Next
			Console.WriteLine("")

			' Determine clusters
			ta.ComputeClusterIDs()

			' Stop time measuring
			stopTime = Now

			' Output the required time
			time = stopTime - startTime
			Console.WriteLine("Time for computation: " & time.ToString())

			' Save network in human-readable form
			ta.SaveText(NetworkPath + filePrefix + "_AIL12-like_dataset_large_and_noisy.txt")

			' Save network in binary form
			ta.Save(NetworkPath + filePrefix + "_AIL12-like_dataset_large_and_noisy." + fileSuffix)
		End Sub

	End Module

End Namespace
