コード例 #1
0
ファイル: fb2parser.go プロジェクト: dkovalkov/FB2-indexer
func nextTag(reader *xmltextreader.XmlTextReaderPtr) int {
	res := reader.Read()
	if res == 1 {
		for nodeType := reader.NodeType(); nodeType != -1 && nodeType != xmltextreader.XML_START_ELEMENT && res == 1; {
			res = reader.Read()
			nodeType = reader.NodeType()
		}
	}
	return res
}
コード例 #2
0
ファイル: fb2parser.go プロジェクト: dkovalkov/FB2-indexer
func getText(reader *xmltextreader.XmlTextReaderPtr) string {
	res := reader.Read()
	eventType := reader.NodeType()

	for xmltextreader.XML_END_ELEMENT != eventType && xmltextreader.XML_TEXT_NODE != eventType && res != -1 {
		res = reader.Read()
		eventType = reader.NodeType()
	}

	if xmltextreader.XML_TEXT_NODE == eventType {
		return reader.Value()
	}
	return ""
}
コード例 #3
0
ファイル: fb2parser.go プロジェクト: dkovalkov/FB2-indexer
func getParaToTagEnd(reader *xmltextreader.XmlTextReaderPtr, tag string, fb2Pointer string, weightMult float32, c chan *Word) {
	res := reader.Read()
	eventType := reader.NodeType()

	var (
		strong   byte = 1 << 0
		emphasis byte = 1 << 1
		link     byte = 1 << 2
	)

	var name string
	var styles byte = 0

	for !(eventType == xmltextreader.XML_END_ELEMENT && tag == reader.Name()) && res != -1 {
		if xmltextreader.XML_TEXT_NODE == eventType {
			var weight float32 = 1.0
			if styles&strong != 0 {
				weight += 0.2
			} else if styles&emphasis != 0 {
				weight += 0.1
			} else if styles&link != 0 {
				weight += 0.1
			}
			if len(fb2Pointer) > 0 {
				sendWords(reader.Value(), fb2Pointer, weight*weightMult, c)
			}
		} else if xmltextreader.XML_START_ELEMENT == eventType {
			name = reader.Name()
			if emphasis_tag == name {
				styles |= emphasis
			} else if strong_tag == name {
				styles |= strong
			} else if link_tag == name {
				styles |= link
			}
		} else if xmltextreader.XML_END_ELEMENT == eventType {
			name = reader.Name()
			if emphasis_tag == name {
				styles &^= emphasis
			} else if strong_tag == name {
				styles &^= strong
			} else if link_tag == name {
				styles &^= link
			}
		}
		res = reader.Read()
		eventType = reader.NodeType()
	}
}
コード例 #4
0
ファイル: fb2parser.go プロジェクト: dkovalkov/FB2-indexer
func getParagraphsToTagEnd(reader *xmltextreader.XmlTextReaderPtr, tag string, currentPointer string, c chan *Word) {
	res := reader.Read()
	eventType := reader.NodeType()
	currentElementNum := 0

	for !(eventType == xmltextreader.XML_END_ELEMENT && tag == reader.Name()) && res != -1 {
		if xmltextreader.XML_START_ELEMENT == eventType {
			currentElementNum += 1
			name := reader.Name()
			if name == paragraph_tag {
				getParaToTagEnd(reader, paragraph_tag, currentPointer+"/"+strconv.Itoa(currentElementNum), 1.5, c)
			}
		}
		res = reader.Read()
		eventType = reader.NodeType()
	}
}
コード例 #5
0
ファイル: fb2parser.go プロジェクト: dkovalkov/FB2-indexer
func processDescription(reader *xmltextreader.XmlTextReaderPtr, currentPointer string, c chan *Word) int {
	nextTag(reader)
	if reader.Name() != description_tag {
		return -1
	}
	res := reader.Read()
	eventType := reader.NodeType()
	currentElementNum := 0

	for !(eventType == xmltextreader.XML_END_ELEMENT && reader.Name() == description_tag) && res != -1 {
		if eventType == xmltextreader.XML_START_ELEMENT {
			currentElementNum += 1
			name := reader.Name()
			if name == title_info_tag || name == src_title_info_tag {
				processTitleInfo(reader, name, currentPointer+"/"+strconv.Itoa(currentElementNum), c)
			}
		}
		res = reader.Read()
		eventType = reader.NodeType()
	}
	return 1
}
コード例 #6
0
ファイル: fb2parser.go プロジェクト: dkovalkov/FB2-indexer
func processPersonInfo(reader *xmltextreader.XmlTextReaderPtr, tag string, currentPointer string, c chan *Word) {
	res := reader.Read()
	eventType := reader.NodeType()
	currentElementNum := 0

	for !(eventType == xmltextreader.XML_END_ELEMENT && tag == reader.Name()) && res != -1 {
		if eventType == xmltextreader.XML_START_ELEMENT {
			currentElementNum += 1
			name := reader.Name()
			if first_name_tag == name {
				sendWords(getText(reader), currentPointer+"/"+strconv.Itoa(currentElementNum), 1.3, c)
			} else if last_name_tag == name {
				sendWords(getText(reader), currentPointer+"/"+strconv.Itoa(currentElementNum), 1.3, c)
			} else if middle_name_tag == name {
				sendWords(getText(reader), currentPointer+"/"+strconv.Itoa(currentElementNum), 1.3, c)
			} else if nick_name_tag == name {
				sendWords(getText(reader), currentPointer+"/"+strconv.Itoa(currentElementNum), 1.1, c)
			}
		}
		res = reader.Read()
		eventType = reader.NodeType()
	}
}
コード例 #7
0
ファイル: fb2parser.go プロジェクト: dkovalkov/FB2-indexer
func processTitleInfo(reader *xmltextreader.XmlTextReaderPtr, tag string, currentPointer string, c chan *Word) {
	res := reader.Read()
	eventType := reader.NodeType()
	currentElementNum := 0

	for !(eventType == xmltextreader.XML_END_ELEMENT && tag == reader.Name()) && res != -1 {
		if eventType == xmltextreader.XML_START_ELEMENT {
			currentElementNum += 1
			name := reader.Name()
			if author_tag == name {
				processPersonInfo(reader, name, currentPointer+"/"+strconv.Itoa(currentElementNum), c)
			} else if book_title_tag == name {
				sendWords(getText(reader), currentPointer+"/"+strconv.Itoa(currentElementNum), 2.0, c)
			} else if annotation_tag == name {
				getParagraphsToTagEnd(reader, annotation_tag, currentPointer+"/"+strconv.Itoa(currentElementNum), c)
			} else if translator_tag == name {
				sendWords(getText(reader), currentPointer+"/"+strconv.Itoa(currentElementNum), 1.1, c)
			}
		}
		res = reader.Read()
		eventType = reader.NodeType()
	}
}
コード例 #8
0
ファイル: fb2parser.go プロジェクト: dkovalkov/FB2-indexer
func processBody(reader *xmltextreader.XmlTextReaderPtr, tag string, currentPointer string, c chan *Word) {
	currentElementNum := 0
	res := reader.Read()
	eventType := reader.NodeType()
	if currentPointer == "/1" {
		currentElementNum = 1
	}

	for !(xmltextreader.XML_END_ELEMENT == eventType && tag == reader.Name()) && -1 != res {
		name := reader.Name()

		if xmltextreader.XML_START_ELEMENT == eventType {
			currentElementNum += 1

			if paragraph_tag == name || text_author_tag == name || subtitle_tag == name || verse_tag == name {
				getParaToTagEnd(reader, name, currentPointer+"/"+strconv.Itoa(currentElementNum), 1, c)
			} else if body_tag == name {
				hasAttr, err := reader.HasAttributes()
				if nil == err && hasAttr == false {
					processBody(reader, name, currentPointer+"/"+strconv.Itoa(currentElementNum), c)
				}
			} else if section_tag == name {
				hasAttr, err := reader.HasAttributes()
				if nil == err && hasAttr {
					// Skip section with id attr, notes definitions
					getParaToTagEnd(reader, name, "", 1, c)
				} else {
					processBody(reader, name, currentPointer+"/"+strconv.Itoa(currentElementNum), c)
				}
			} else if title_tag == name || epigraph_tag == name {
				processBody(reader, name, currentPointer+"/"+strconv.Itoa(currentElementNum), c)
			}
		}
		res = reader.Read()
		eventType = reader.NodeType()
	}
}