' VARIABLE NAME PREFIXES:
' dt = Data Table
' str = String
' int = Integer
' sb = string builder

' THIS FUNCTION RETURNS: a datatable containing all N-Grams for a string of words

Public Function CreateNGramsFromStringOfWords(ByVal StringOfWords As String) As DataTable

' OUTPUT TABLE COLUMNS ARE ...
' Output OutputNbrWords StartWordNbr EndWordNbr Input InputNbrWords
' NGram OutputNbrWords StartWordNbr EndWordNbr Input InputNbrWords

Dim dtNGrams As New DataTable ' This is the return value for this Function

Dim strArrayOfWords() As String = Nothing
Dim intFirstWordPntr As Integer = 0
Dim intLastWordPntr As Integer = 0
Dim intLastWordIdx As Integer = 0
Dim sbSetOfWords As New System.Text.StringBuilder

' DIMENSION THE INTERMEDIATE VARZ
Dim strNGram As String
Dim intOutputNbrWords As Integer
Dim intStartWordNbr As Integer
Dim intEndWordNbr As Integer

' ENSURE THAT THESE ARE CLEANED OF PUNCTUATION
StringOfWords = Replace(StringOfWords, ".", "")
StringOfWords = Replace(StringOfWords, "?", "")
StringOfWords = StringOfWords.Trim

strArrayOfWords = Tokenizer(StringOfWords)

intLastWordIdx = strArrayOfWords.Count - 1

' BUILD OUTPUT TABLE COLUMNS
dtNGrams.Columns.Add("NGram")
dtNGrams.Columns.Add("OutputNbrWords", GetType(Integer))
dtNGrams.Columns.Add("StartWordNbr", GetType(Integer))
dtNGrams.Columns.Add("EndWordNbr", GetType(Integer))
dtNGrams.Columns.Add("Input")
dtNGrams.Columns.Add("InputNbrWords", GetType(Integer))

' ** BEGIN LOOPS **
intFirstWordPntr = 0
intLastWordPntr = intLastWordIdx

For Loop3 = intFirstWordPntr To intLastWordPntr
For Loop2 = intFirstWordPntr To intLastWordPntr
For Loop1 = intFirstWordPntr To intLastWordPntr
sbSetOfWords.Append(strArrayOfWords(Loop1) & " ")
Next
'
strNGram = sbSetOfWords.ToString.Trim
intOutputNbrWords = (intLastWordPntr - intFirstWordPntr + 1)
intStartWordNbr = intFirstWordPntr
intEndWordNbr = intLastWordPntr
'
dtNGrams.Rows.Add(strNGram, intOutputNbrWords, intStartWordNbr, intEndWordNbr, StringOfWords, strArrayOfWords.Count)
'
sbSetOfWords.Length = 0 ' Clears the string builder
intLastWordPntr -= 1 ' REMOVES LAST WORD
Next

intLastWordPntr = intLastWordIdx ' Reset pntr to last word index
intFirstWordPntr += 1 ' REMOVES FIRST WORD
Next

Return dtNGrams
End Function

Zoe The Robot. Why generate NGrams?

Zoe The Robot. Actual code used to generate NGrams